1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4 * 5 * Test code for seccomp bpf. 6 */ 7 8 #define _GNU_SOURCE 9 #include <sys/types.h> 10 11 /* 12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 13 * we need to use the kernel's siginfo.h file and trick glibc 14 * into accepting it. 15 */ 16 #if !__GLIBC_PREREQ(2, 26) 17 # include <asm/siginfo.h> 18 # define __have_siginfo_t 1 19 # define __have_sigval_t 1 20 # define __have_sigevent_t 1 21 #endif 22 23 #include <errno.h> 24 #include <linux/filter.h> 25 #include <sys/prctl.h> 26 #include <sys/ptrace.h> 27 #include <sys/user.h> 28 #include <linux/prctl.h> 29 #include <linux/ptrace.h> 30 #include <linux/seccomp.h> 31 #include <pthread.h> 32 #include <semaphore.h> 33 #include <signal.h> 34 #include <stddef.h> 35 #include <stdbool.h> 36 #include <string.h> 37 #include <time.h> 38 #include <linux/elf.h> 39 #include <sys/uio.h> 40 #include <sys/utsname.h> 41 #include <sys/fcntl.h> 42 #include <sys/mman.h> 43 #include <sys/times.h> 44 #include <sys/socket.h> 45 #include <sys/ioctl.h> 46 47 #include <unistd.h> 48 #include <sys/syscall.h> 49 #include <poll.h> 50 51 #include "../kselftest_harness.h" 52 53 #ifndef PR_SET_PTRACER 54 # define PR_SET_PTRACER 0x59616d61 55 #endif 56 57 #ifndef PR_SET_NO_NEW_PRIVS 58 #define PR_SET_NO_NEW_PRIVS 38 59 #define PR_GET_NO_NEW_PRIVS 39 60 #endif 61 62 #ifndef PR_SECCOMP_EXT 63 #define PR_SECCOMP_EXT 43 64 #endif 65 66 #ifndef SECCOMP_EXT_ACT 67 #define SECCOMP_EXT_ACT 1 68 #endif 69 70 #ifndef SECCOMP_EXT_ACT_TSYNC 71 #define SECCOMP_EXT_ACT_TSYNC 1 72 #endif 73 74 #ifndef SECCOMP_MODE_STRICT 75 #define SECCOMP_MODE_STRICT 1 76 #endif 77 78 #ifndef SECCOMP_MODE_FILTER 79 #define SECCOMP_MODE_FILTER 2 80 #endif 81 82 #ifndef SECCOMP_RET_ALLOW 83 struct seccomp_data { 84 int nr; 85 __u32 arch; 86 __u64 instruction_pointer; 87 __u64 args[6]; 88 }; 89 #endif 90 91 #ifndef SECCOMP_RET_KILL_PROCESS 92 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 93 #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 94 #endif 95 #ifndef SECCOMP_RET_KILL 96 #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 97 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 98 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 99 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 100 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 101 #endif 102 #ifndef SECCOMP_RET_LOG 103 #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 104 #endif 105 106 #ifndef __NR_seccomp 107 # if defined(__i386__) 108 # define __NR_seccomp 354 109 # elif defined(__x86_64__) 110 # define __NR_seccomp 317 111 # elif defined(__arm__) 112 # define __NR_seccomp 383 113 # elif defined(__aarch64__) 114 # define __NR_seccomp 277 115 # elif defined(__riscv) 116 # define __NR_seccomp 277 117 # elif defined(__hppa__) 118 # define __NR_seccomp 338 119 # elif defined(__powerpc__) 120 # define __NR_seccomp 358 121 # elif defined(__s390__) 122 # define __NR_seccomp 348 123 # else 124 # warning "seccomp syscall number unknown for this architecture" 125 # define __NR_seccomp 0xffff 126 # endif 127 #endif 128 129 #ifndef SECCOMP_SET_MODE_STRICT 130 #define SECCOMP_SET_MODE_STRICT 0 131 #endif 132 133 #ifndef SECCOMP_SET_MODE_FILTER 134 #define SECCOMP_SET_MODE_FILTER 1 135 #endif 136 137 #ifndef SECCOMP_GET_ACTION_AVAIL 138 #define SECCOMP_GET_ACTION_AVAIL 2 139 #endif 140 141 #ifndef SECCOMP_GET_NOTIF_SIZES 142 #define SECCOMP_GET_NOTIF_SIZES 3 143 #endif 144 145 #ifndef SECCOMP_FILTER_FLAG_TSYNC 146 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 147 #endif 148 149 #ifndef SECCOMP_FILTER_FLAG_LOG 150 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 151 #endif 152 153 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW 154 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 155 #endif 156 157 #ifndef PTRACE_SECCOMP_GET_METADATA 158 #define PTRACE_SECCOMP_GET_METADATA 0x420d 159 160 struct seccomp_metadata { 161 __u64 filter_off; /* Input: which filter */ 162 __u64 flags; /* Output: filter's flags */ 163 }; 164 #endif 165 166 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER 167 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 168 169 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U 170 171 #define SECCOMP_IOC_MAGIC '!' 172 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 173 #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 174 #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 175 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 176 177 /* Flags for seccomp notification fd ioctl. */ 178 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 179 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 180 struct seccomp_notif_resp) 181 #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) 182 183 struct seccomp_notif { 184 __u64 id; 185 __u32 pid; 186 __u32 flags; 187 struct seccomp_data data; 188 }; 189 190 struct seccomp_notif_resp { 191 __u64 id; 192 __s64 val; 193 __s32 error; 194 __u32 flags; 195 }; 196 197 struct seccomp_notif_sizes { 198 __u16 seccomp_notif; 199 __u16 seccomp_notif_resp; 200 __u16 seccomp_data; 201 }; 202 #endif 203 204 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY 205 #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 206 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 207 #endif 208 209 #ifndef seccomp 210 int seccomp(unsigned int op, unsigned int flags, void *args) 211 { 212 errno = 0; 213 return syscall(__NR_seccomp, op, flags, args); 214 } 215 #endif 216 217 #if __BYTE_ORDER == __LITTLE_ENDIAN 218 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) 219 #elif __BYTE_ORDER == __BIG_ENDIAN 220 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) 221 #else 222 #error "wut? Unknown __BYTE_ORDER?!" 223 #endif 224 225 #define SIBLING_EXIT_UNKILLED 0xbadbeef 226 #define SIBLING_EXIT_FAILURE 0xbadface 227 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed 228 229 TEST(mode_strict_support) 230 { 231 long ret; 232 233 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 234 ASSERT_EQ(0, ret) { 235 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 236 } 237 syscall(__NR_exit, 0); 238 } 239 240 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) 241 { 242 long ret; 243 244 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); 245 ASSERT_EQ(0, ret) { 246 TH_LOG("Kernel does not support CONFIG_SECCOMP"); 247 } 248 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 249 NULL, NULL, NULL); 250 EXPECT_FALSE(true) { 251 TH_LOG("Unreachable!"); 252 } 253 } 254 255 /* Note! This doesn't test no new privs behavior */ 256 TEST(no_new_privs_support) 257 { 258 long ret; 259 260 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 261 EXPECT_EQ(0, ret) { 262 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 263 } 264 } 265 266 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */ 267 TEST(mode_filter_support) 268 { 269 long ret; 270 271 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 272 ASSERT_EQ(0, ret) { 273 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 274 } 275 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); 276 EXPECT_EQ(-1, ret); 277 EXPECT_EQ(EFAULT, errno) { 278 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); 279 } 280 } 281 282 TEST(mode_filter_without_nnp) 283 { 284 struct sock_filter filter[] = { 285 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 286 }; 287 struct sock_fprog prog = { 288 .len = (unsigned short)ARRAY_SIZE(filter), 289 .filter = filter, 290 }; 291 long ret; 292 293 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); 294 ASSERT_LE(0, ret) { 295 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); 296 } 297 errno = 0; 298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 299 /* Succeeds with CAP_SYS_ADMIN, fails without */ 300 /* TODO(wad) check caps not euid */ 301 if (geteuid()) { 302 EXPECT_EQ(-1, ret); 303 EXPECT_EQ(EACCES, errno); 304 } else { 305 EXPECT_EQ(0, ret); 306 } 307 } 308 309 #define MAX_INSNS_PER_PATH 32768 310 311 TEST(filter_size_limits) 312 { 313 int i; 314 int count = BPF_MAXINSNS + 1; 315 struct sock_filter allow[] = { 316 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 317 }; 318 struct sock_filter *filter; 319 struct sock_fprog prog = { }; 320 long ret; 321 322 filter = calloc(count, sizeof(*filter)); 323 ASSERT_NE(NULL, filter); 324 325 for (i = 0; i < count; i++) 326 filter[i] = allow[0]; 327 328 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 329 ASSERT_EQ(0, ret); 330 331 prog.filter = filter; 332 prog.len = count; 333 334 /* Too many filter instructions in a single filter. */ 335 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 336 ASSERT_NE(0, ret) { 337 TH_LOG("Installing %d insn filter was allowed", prog.len); 338 } 339 340 /* One less is okay, though. */ 341 prog.len -= 1; 342 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 343 ASSERT_EQ(0, ret) { 344 TH_LOG("Installing %d insn filter wasn't allowed", prog.len); 345 } 346 } 347 348 TEST(filter_chain_limits) 349 { 350 int i; 351 int count = BPF_MAXINSNS; 352 struct sock_filter allow[] = { 353 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 354 }; 355 struct sock_filter *filter; 356 struct sock_fprog prog = { }; 357 long ret; 358 359 filter = calloc(count, sizeof(*filter)); 360 ASSERT_NE(NULL, filter); 361 362 for (i = 0; i < count; i++) 363 filter[i] = allow[0]; 364 365 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 366 ASSERT_EQ(0, ret); 367 368 prog.filter = filter; 369 prog.len = 1; 370 371 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 372 ASSERT_EQ(0, ret); 373 374 prog.len = count; 375 376 /* Too many total filter instructions. */ 377 for (i = 0; i < MAX_INSNS_PER_PATH; i++) { 378 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 379 if (ret != 0) 380 break; 381 } 382 ASSERT_NE(0, ret) { 383 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", 384 i, count, i * (count + 4)); 385 } 386 } 387 388 TEST(mode_filter_cannot_move_to_strict) 389 { 390 struct sock_filter filter[] = { 391 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 392 }; 393 struct sock_fprog prog = { 394 .len = (unsigned short)ARRAY_SIZE(filter), 395 .filter = filter, 396 }; 397 long ret; 398 399 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 400 ASSERT_EQ(0, ret); 401 402 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 403 ASSERT_EQ(0, ret); 404 405 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); 406 EXPECT_EQ(-1, ret); 407 EXPECT_EQ(EINVAL, errno); 408 } 409 410 411 TEST(mode_filter_get_seccomp) 412 { 413 struct sock_filter filter[] = { 414 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 415 }; 416 struct sock_fprog prog = { 417 .len = (unsigned short)ARRAY_SIZE(filter), 418 .filter = filter, 419 }; 420 long ret; 421 422 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 423 ASSERT_EQ(0, ret); 424 425 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 426 EXPECT_EQ(0, ret); 427 428 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 429 ASSERT_EQ(0, ret); 430 431 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 432 EXPECT_EQ(2, ret); 433 } 434 435 436 TEST(ALLOW_all) 437 { 438 struct sock_filter filter[] = { 439 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 440 }; 441 struct sock_fprog prog = { 442 .len = (unsigned short)ARRAY_SIZE(filter), 443 .filter = filter, 444 }; 445 long ret; 446 447 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 448 ASSERT_EQ(0, ret); 449 450 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 451 ASSERT_EQ(0, ret); 452 } 453 454 TEST(empty_prog) 455 { 456 struct sock_filter filter[] = { 457 }; 458 struct sock_fprog prog = { 459 .len = (unsigned short)ARRAY_SIZE(filter), 460 .filter = filter, 461 }; 462 long ret; 463 464 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 465 ASSERT_EQ(0, ret); 466 467 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 468 EXPECT_EQ(-1, ret); 469 EXPECT_EQ(EINVAL, errno); 470 } 471 472 TEST(log_all) 473 { 474 struct sock_filter filter[] = { 475 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 476 }; 477 struct sock_fprog prog = { 478 .len = (unsigned short)ARRAY_SIZE(filter), 479 .filter = filter, 480 }; 481 long ret; 482 pid_t parent = getppid(); 483 484 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 485 ASSERT_EQ(0, ret); 486 487 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 488 ASSERT_EQ(0, ret); 489 490 /* getppid() should succeed and be logged (no check for logging) */ 491 EXPECT_EQ(parent, syscall(__NR_getppid)); 492 } 493 494 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) 495 { 496 struct sock_filter filter[] = { 497 BPF_STMT(BPF_RET|BPF_K, 0x10000000U), 498 }; 499 struct sock_fprog prog = { 500 .len = (unsigned short)ARRAY_SIZE(filter), 501 .filter = filter, 502 }; 503 long ret; 504 505 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 506 ASSERT_EQ(0, ret); 507 508 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 509 ASSERT_EQ(0, ret); 510 EXPECT_EQ(0, syscall(__NR_getpid)) { 511 TH_LOG("getpid() shouldn't ever return"); 512 } 513 } 514 515 /* return code >= 0x80000000 is unused. */ 516 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) 517 { 518 struct sock_filter filter[] = { 519 BPF_STMT(BPF_RET|BPF_K, 0x90000000U), 520 }; 521 struct sock_fprog prog = { 522 .len = (unsigned short)ARRAY_SIZE(filter), 523 .filter = filter, 524 }; 525 long ret; 526 527 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 528 ASSERT_EQ(0, ret); 529 530 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 531 ASSERT_EQ(0, ret); 532 EXPECT_EQ(0, syscall(__NR_getpid)) { 533 TH_LOG("getpid() shouldn't ever return"); 534 } 535 } 536 537 TEST_SIGNAL(KILL_all, SIGSYS) 538 { 539 struct sock_filter filter[] = { 540 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 541 }; 542 struct sock_fprog prog = { 543 .len = (unsigned short)ARRAY_SIZE(filter), 544 .filter = filter, 545 }; 546 long ret; 547 548 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 549 ASSERT_EQ(0, ret); 550 551 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 552 ASSERT_EQ(0, ret); 553 } 554 555 TEST_SIGNAL(KILL_one, SIGSYS) 556 { 557 struct sock_filter filter[] = { 558 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 559 offsetof(struct seccomp_data, nr)), 560 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 561 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 562 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 563 }; 564 struct sock_fprog prog = { 565 .len = (unsigned short)ARRAY_SIZE(filter), 566 .filter = filter, 567 }; 568 long ret; 569 pid_t parent = getppid(); 570 571 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 572 ASSERT_EQ(0, ret); 573 574 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 575 ASSERT_EQ(0, ret); 576 577 EXPECT_EQ(parent, syscall(__NR_getppid)); 578 /* getpid() should never return. */ 579 EXPECT_EQ(0, syscall(__NR_getpid)); 580 } 581 582 TEST_SIGNAL(KILL_one_arg_one, SIGSYS) 583 { 584 void *fatal_address; 585 struct sock_filter filter[] = { 586 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 587 offsetof(struct seccomp_data, nr)), 588 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), 589 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 590 /* Only both with lower 32-bit for now. */ 591 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), 592 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 593 (unsigned long)&fatal_address, 0, 1), 594 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 595 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 596 }; 597 struct sock_fprog prog = { 598 .len = (unsigned short)ARRAY_SIZE(filter), 599 .filter = filter, 600 }; 601 long ret; 602 pid_t parent = getppid(); 603 struct tms timebuf; 604 clock_t clock = times(&timebuf); 605 606 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 607 ASSERT_EQ(0, ret); 608 609 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 610 ASSERT_EQ(0, ret); 611 612 EXPECT_EQ(parent, syscall(__NR_getppid)); 613 EXPECT_LE(clock, syscall(__NR_times, &timebuf)); 614 /* times() should never return. */ 615 EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); 616 } 617 618 TEST_SIGNAL(KILL_one_arg_six, SIGSYS) 619 { 620 #ifndef __NR_mmap2 621 int sysno = __NR_mmap; 622 #else 623 int sysno = __NR_mmap2; 624 #endif 625 struct sock_filter filter[] = { 626 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 627 offsetof(struct seccomp_data, nr)), 628 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), 629 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 630 /* Only both with lower 32-bit for now. */ 631 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), 632 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), 633 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 634 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 635 }; 636 struct sock_fprog prog = { 637 .len = (unsigned short)ARRAY_SIZE(filter), 638 .filter = filter, 639 }; 640 long ret; 641 pid_t parent = getppid(); 642 int fd; 643 void *map1, *map2; 644 int page_size = sysconf(_SC_PAGESIZE); 645 646 ASSERT_LT(0, page_size); 647 648 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 649 ASSERT_EQ(0, ret); 650 651 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 652 ASSERT_EQ(0, ret); 653 654 fd = open("/dev/zero", O_RDONLY); 655 ASSERT_NE(-1, fd); 656 657 EXPECT_EQ(parent, syscall(__NR_getppid)); 658 map1 = (void *)syscall(sysno, 659 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); 660 EXPECT_NE(MAP_FAILED, map1); 661 /* mmap2() should never return. */ 662 map2 = (void *)syscall(sysno, 663 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); 664 EXPECT_EQ(MAP_FAILED, map2); 665 666 /* The test failed, so clean up the resources. */ 667 munmap(map1, page_size); 668 munmap(map2, page_size); 669 close(fd); 670 } 671 672 /* This is a thread task to die via seccomp filter violation. */ 673 void *kill_thread(void *data) 674 { 675 bool die = (bool)data; 676 677 if (die) { 678 prctl(PR_GET_SECCOMP, 0, 0, 0, 0); 679 return (void *)SIBLING_EXIT_FAILURE; 680 } 681 682 return (void *)SIBLING_EXIT_UNKILLED; 683 } 684 685 /* Prepare a thread that will kill itself or both of us. */ 686 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) 687 { 688 pthread_t thread; 689 void *status; 690 /* Kill only when calling __NR_prctl. */ 691 struct sock_filter filter_thread[] = { 692 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 693 offsetof(struct seccomp_data, nr)), 694 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 695 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), 696 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 697 }; 698 struct sock_fprog prog_thread = { 699 .len = (unsigned short)ARRAY_SIZE(filter_thread), 700 .filter = filter_thread, 701 }; 702 struct sock_filter filter_process[] = { 703 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 704 offsetof(struct seccomp_data, nr)), 705 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 706 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), 707 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 708 }; 709 struct sock_fprog prog_process = { 710 .len = (unsigned short)ARRAY_SIZE(filter_process), 711 .filter = filter_process, 712 }; 713 714 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 715 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 716 } 717 718 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, 719 kill_process ? &prog_process : &prog_thread)); 720 721 /* 722 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS 723 * flag cannot be downgraded by a new filter. 724 */ 725 ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); 726 727 /* Start a thread that will exit immediately. */ 728 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); 729 ASSERT_EQ(0, pthread_join(thread, &status)); 730 ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); 731 732 /* Start a thread that will die immediately. */ 733 ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); 734 ASSERT_EQ(0, pthread_join(thread, &status)); 735 ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); 736 737 /* 738 * If we get here, only the spawned thread died. Let the parent know 739 * the whole process didn't die (i.e. this thread, the spawner, 740 * stayed running). 741 */ 742 exit(42); 743 } 744 745 TEST(KILL_thread) 746 { 747 int status; 748 pid_t child_pid; 749 750 child_pid = fork(); 751 ASSERT_LE(0, child_pid); 752 if (child_pid == 0) { 753 kill_thread_or_group(_metadata, false); 754 _exit(38); 755 } 756 757 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 758 759 /* If only the thread was killed, we'll see exit 42. */ 760 ASSERT_TRUE(WIFEXITED(status)); 761 ASSERT_EQ(42, WEXITSTATUS(status)); 762 } 763 764 TEST(KILL_process) 765 { 766 int status; 767 pid_t child_pid; 768 769 child_pid = fork(); 770 ASSERT_LE(0, child_pid); 771 if (child_pid == 0) { 772 kill_thread_or_group(_metadata, true); 773 _exit(38); 774 } 775 776 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 777 778 /* If the entire process was killed, we'll see SIGSYS. */ 779 ASSERT_TRUE(WIFSIGNALED(status)); 780 ASSERT_EQ(SIGSYS, WTERMSIG(status)); 781 } 782 783 /* TODO(wad) add 64-bit versus 32-bit arg tests. */ 784 TEST(arg_out_of_range) 785 { 786 struct sock_filter filter[] = { 787 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), 788 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 789 }; 790 struct sock_fprog prog = { 791 .len = (unsigned short)ARRAY_SIZE(filter), 792 .filter = filter, 793 }; 794 long ret; 795 796 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 797 ASSERT_EQ(0, ret); 798 799 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); 800 EXPECT_EQ(-1, ret); 801 EXPECT_EQ(EINVAL, errno); 802 } 803 804 #define ERRNO_FILTER(name, errno) \ 805 struct sock_filter _read_filter_##name[] = { \ 806 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ 807 offsetof(struct seccomp_data, nr)), \ 808 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ 809 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ 810 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ 811 }; \ 812 struct sock_fprog prog_##name = { \ 813 .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ 814 .filter = _read_filter_##name, \ 815 } 816 817 /* Make sure basic errno values are correctly passed through a filter. */ 818 TEST(ERRNO_valid) 819 { 820 ERRNO_FILTER(valid, E2BIG); 821 long ret; 822 pid_t parent = getppid(); 823 824 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 825 ASSERT_EQ(0, ret); 826 827 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); 828 ASSERT_EQ(0, ret); 829 830 EXPECT_EQ(parent, syscall(__NR_getppid)); 831 EXPECT_EQ(-1, read(0, NULL, 0)); 832 EXPECT_EQ(E2BIG, errno); 833 } 834 835 /* Make sure an errno of zero is correctly handled by the arch code. */ 836 TEST(ERRNO_zero) 837 { 838 ERRNO_FILTER(zero, 0); 839 long ret; 840 pid_t parent = getppid(); 841 842 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 843 ASSERT_EQ(0, ret); 844 845 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); 846 ASSERT_EQ(0, ret); 847 848 EXPECT_EQ(parent, syscall(__NR_getppid)); 849 /* "errno" of 0 is ok. */ 850 EXPECT_EQ(0, read(0, NULL, 0)); 851 } 852 853 /* 854 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. 855 * This tests that the errno value gets capped correctly, fixed by 856 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). 857 */ 858 TEST(ERRNO_capped) 859 { 860 ERRNO_FILTER(capped, 4096); 861 long ret; 862 pid_t parent = getppid(); 863 864 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 865 ASSERT_EQ(0, ret); 866 867 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); 868 ASSERT_EQ(0, ret); 869 870 EXPECT_EQ(parent, syscall(__NR_getppid)); 871 EXPECT_EQ(-1, read(0, NULL, 0)); 872 EXPECT_EQ(4095, errno); 873 } 874 875 /* 876 * Filters are processed in reverse order: last applied is executed first. 877 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the 878 * SECCOMP_RET_DATA mask results will follow the most recently applied 879 * matching filter return (and not the lowest or highest value). 880 */ 881 TEST(ERRNO_order) 882 { 883 ERRNO_FILTER(first, 11); 884 ERRNO_FILTER(second, 13); 885 ERRNO_FILTER(third, 12); 886 long ret; 887 pid_t parent = getppid(); 888 889 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 890 ASSERT_EQ(0, ret); 891 892 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); 893 ASSERT_EQ(0, ret); 894 895 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); 896 ASSERT_EQ(0, ret); 897 898 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); 899 ASSERT_EQ(0, ret); 900 901 EXPECT_EQ(parent, syscall(__NR_getppid)); 902 EXPECT_EQ(-1, read(0, NULL, 0)); 903 EXPECT_EQ(12, errno); 904 } 905 906 FIXTURE_DATA(TRAP) { 907 struct sock_fprog prog; 908 }; 909 910 FIXTURE_SETUP(TRAP) 911 { 912 struct sock_filter filter[] = { 913 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 914 offsetof(struct seccomp_data, nr)), 915 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 916 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 917 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 918 }; 919 920 memset(&self->prog, 0, sizeof(self->prog)); 921 self->prog.filter = malloc(sizeof(filter)); 922 ASSERT_NE(NULL, self->prog.filter); 923 memcpy(self->prog.filter, filter, sizeof(filter)); 924 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 925 } 926 927 FIXTURE_TEARDOWN(TRAP) 928 { 929 if (self->prog.filter) 930 free(self->prog.filter); 931 } 932 933 TEST_F_SIGNAL(TRAP, dfl, SIGSYS) 934 { 935 long ret; 936 937 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 938 ASSERT_EQ(0, ret); 939 940 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 941 ASSERT_EQ(0, ret); 942 syscall(__NR_getpid); 943 } 944 945 /* Ensure that SIGSYS overrides SIG_IGN */ 946 TEST_F_SIGNAL(TRAP, ign, SIGSYS) 947 { 948 long ret; 949 950 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 951 ASSERT_EQ(0, ret); 952 953 signal(SIGSYS, SIG_IGN); 954 955 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 956 ASSERT_EQ(0, ret); 957 syscall(__NR_getpid); 958 } 959 960 static siginfo_t TRAP_info; 961 static volatile int TRAP_nr; 962 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 963 { 964 memcpy(&TRAP_info, info, sizeof(TRAP_info)); 965 TRAP_nr = nr; 966 } 967 968 TEST_F(TRAP, handler) 969 { 970 int ret, test; 971 struct sigaction act; 972 sigset_t mask; 973 974 memset(&act, 0, sizeof(act)); 975 sigemptyset(&mask); 976 sigaddset(&mask, SIGSYS); 977 978 act.sa_sigaction = &TRAP_action; 979 act.sa_flags = SA_SIGINFO; 980 ret = sigaction(SIGSYS, &act, NULL); 981 ASSERT_EQ(0, ret) { 982 TH_LOG("sigaction failed"); 983 } 984 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); 985 ASSERT_EQ(0, ret) { 986 TH_LOG("sigprocmask failed"); 987 } 988 989 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 990 ASSERT_EQ(0, ret); 991 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); 992 ASSERT_EQ(0, ret); 993 TRAP_nr = 0; 994 memset(&TRAP_info, 0, sizeof(TRAP_info)); 995 /* Expect the registers to be rolled back. (nr = error) may vary 996 * based on arch. */ 997 ret = syscall(__NR_getpid); 998 /* Silence gcc warning about volatile. */ 999 test = TRAP_nr; 1000 EXPECT_EQ(SIGSYS, test); 1001 struct local_sigsys { 1002 void *_call_addr; /* calling user insn */ 1003 int _syscall; /* triggering system call number */ 1004 unsigned int _arch; /* AUDIT_ARCH_* of syscall */ 1005 } *sigsys = (struct local_sigsys *) 1006 #ifdef si_syscall 1007 &(TRAP_info.si_call_addr); 1008 #else 1009 &TRAP_info.si_pid; 1010 #endif 1011 EXPECT_EQ(__NR_getpid, sigsys->_syscall); 1012 /* Make sure arch is non-zero. */ 1013 EXPECT_NE(0, sigsys->_arch); 1014 EXPECT_NE(0, (unsigned long)sigsys->_call_addr); 1015 } 1016 1017 FIXTURE_DATA(precedence) { 1018 struct sock_fprog allow; 1019 struct sock_fprog log; 1020 struct sock_fprog trace; 1021 struct sock_fprog error; 1022 struct sock_fprog trap; 1023 struct sock_fprog kill; 1024 }; 1025 1026 FIXTURE_SETUP(precedence) 1027 { 1028 struct sock_filter allow_insns[] = { 1029 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1030 }; 1031 struct sock_filter log_insns[] = { 1032 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1033 offsetof(struct seccomp_data, nr)), 1034 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1035 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1036 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), 1037 }; 1038 struct sock_filter trace_insns[] = { 1039 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1040 offsetof(struct seccomp_data, nr)), 1041 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1042 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1043 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), 1044 }; 1045 struct sock_filter error_insns[] = { 1046 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1047 offsetof(struct seccomp_data, nr)), 1048 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1049 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1050 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), 1051 }; 1052 struct sock_filter trap_insns[] = { 1053 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1054 offsetof(struct seccomp_data, nr)), 1055 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1056 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1057 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), 1058 }; 1059 struct sock_filter kill_insns[] = { 1060 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1061 offsetof(struct seccomp_data, nr)), 1062 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), 1063 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1064 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1065 }; 1066 1067 memset(self, 0, sizeof(*self)); 1068 #define FILTER_ALLOC(_x) \ 1069 self->_x.filter = malloc(sizeof(_x##_insns)); \ 1070 ASSERT_NE(NULL, self->_x.filter); \ 1071 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ 1072 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) 1073 FILTER_ALLOC(allow); 1074 FILTER_ALLOC(log); 1075 FILTER_ALLOC(trace); 1076 FILTER_ALLOC(error); 1077 FILTER_ALLOC(trap); 1078 FILTER_ALLOC(kill); 1079 } 1080 1081 FIXTURE_TEARDOWN(precedence) 1082 { 1083 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) 1084 FILTER_FREE(allow); 1085 FILTER_FREE(log); 1086 FILTER_FREE(trace); 1087 FILTER_FREE(error); 1088 FILTER_FREE(trap); 1089 FILTER_FREE(kill); 1090 } 1091 1092 TEST_F(precedence, allow_ok) 1093 { 1094 pid_t parent, res = 0; 1095 long ret; 1096 1097 parent = getppid(); 1098 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1099 ASSERT_EQ(0, ret); 1100 1101 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1102 ASSERT_EQ(0, ret); 1103 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1104 ASSERT_EQ(0, ret); 1105 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1106 ASSERT_EQ(0, ret); 1107 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1108 ASSERT_EQ(0, ret); 1109 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1110 ASSERT_EQ(0, ret); 1111 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1112 ASSERT_EQ(0, ret); 1113 /* Should work just fine. */ 1114 res = syscall(__NR_getppid); 1115 EXPECT_EQ(parent, res); 1116 } 1117 1118 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) 1119 { 1120 pid_t parent, res = 0; 1121 long ret; 1122 1123 parent = getppid(); 1124 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1125 ASSERT_EQ(0, ret); 1126 1127 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1128 ASSERT_EQ(0, ret); 1129 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1130 ASSERT_EQ(0, ret); 1131 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1132 ASSERT_EQ(0, ret); 1133 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1134 ASSERT_EQ(0, ret); 1135 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1136 ASSERT_EQ(0, ret); 1137 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1138 ASSERT_EQ(0, ret); 1139 /* Should work just fine. */ 1140 res = syscall(__NR_getppid); 1141 EXPECT_EQ(parent, res); 1142 /* getpid() should never return. */ 1143 res = syscall(__NR_getpid); 1144 EXPECT_EQ(0, res); 1145 } 1146 1147 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) 1148 { 1149 pid_t parent; 1150 long ret; 1151 1152 parent = getppid(); 1153 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1154 ASSERT_EQ(0, ret); 1155 1156 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1157 ASSERT_EQ(0, ret); 1158 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); 1159 ASSERT_EQ(0, ret); 1160 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1161 ASSERT_EQ(0, ret); 1162 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1163 ASSERT_EQ(0, ret); 1164 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1165 ASSERT_EQ(0, ret); 1166 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1167 ASSERT_EQ(0, ret); 1168 /* Should work just fine. */ 1169 EXPECT_EQ(parent, syscall(__NR_getppid)); 1170 /* getpid() should never return. */ 1171 EXPECT_EQ(0, syscall(__NR_getpid)); 1172 } 1173 1174 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) 1175 { 1176 pid_t parent; 1177 long ret; 1178 1179 parent = getppid(); 1180 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1181 ASSERT_EQ(0, ret); 1182 1183 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1184 ASSERT_EQ(0, ret); 1185 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1186 ASSERT_EQ(0, ret); 1187 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1188 ASSERT_EQ(0, ret); 1189 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1190 ASSERT_EQ(0, ret); 1191 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1192 ASSERT_EQ(0, ret); 1193 /* Should work just fine. */ 1194 EXPECT_EQ(parent, syscall(__NR_getppid)); 1195 /* getpid() should never return. */ 1196 EXPECT_EQ(0, syscall(__NR_getpid)); 1197 } 1198 1199 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) 1200 { 1201 pid_t parent; 1202 long ret; 1203 1204 parent = getppid(); 1205 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1206 ASSERT_EQ(0, ret); 1207 1208 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1209 ASSERT_EQ(0, ret); 1210 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); 1211 ASSERT_EQ(0, ret); 1212 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1213 ASSERT_EQ(0, ret); 1214 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1215 ASSERT_EQ(0, ret); 1216 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1217 ASSERT_EQ(0, ret); 1218 /* Should work just fine. */ 1219 EXPECT_EQ(parent, syscall(__NR_getppid)); 1220 /* getpid() should never return. */ 1221 EXPECT_EQ(0, syscall(__NR_getpid)); 1222 } 1223 1224 TEST_F(precedence, errno_is_third) 1225 { 1226 pid_t parent; 1227 long ret; 1228 1229 parent = getppid(); 1230 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1231 ASSERT_EQ(0, ret); 1232 1233 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1234 ASSERT_EQ(0, ret); 1235 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1236 ASSERT_EQ(0, ret); 1237 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1238 ASSERT_EQ(0, ret); 1239 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1240 ASSERT_EQ(0, ret); 1241 /* Should work just fine. */ 1242 EXPECT_EQ(parent, syscall(__NR_getppid)); 1243 EXPECT_EQ(0, syscall(__NR_getpid)); 1244 } 1245 1246 TEST_F(precedence, errno_is_third_in_any_order) 1247 { 1248 pid_t parent; 1249 long ret; 1250 1251 parent = getppid(); 1252 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1253 ASSERT_EQ(0, ret); 1254 1255 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1256 ASSERT_EQ(0, ret); 1257 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); 1258 ASSERT_EQ(0, ret); 1259 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1260 ASSERT_EQ(0, ret); 1261 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1262 ASSERT_EQ(0, ret); 1263 /* Should work just fine. */ 1264 EXPECT_EQ(parent, syscall(__NR_getppid)); 1265 EXPECT_EQ(0, syscall(__NR_getpid)); 1266 } 1267 1268 TEST_F(precedence, trace_is_fourth) 1269 { 1270 pid_t parent; 1271 long ret; 1272 1273 parent = getppid(); 1274 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1275 ASSERT_EQ(0, ret); 1276 1277 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1278 ASSERT_EQ(0, ret); 1279 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1280 ASSERT_EQ(0, ret); 1281 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1282 ASSERT_EQ(0, ret); 1283 /* Should work just fine. */ 1284 EXPECT_EQ(parent, syscall(__NR_getppid)); 1285 /* No ptracer */ 1286 EXPECT_EQ(-1, syscall(__NR_getpid)); 1287 } 1288 1289 TEST_F(precedence, trace_is_fourth_in_any_order) 1290 { 1291 pid_t parent; 1292 long ret; 1293 1294 parent = getppid(); 1295 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1296 ASSERT_EQ(0, ret); 1297 1298 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); 1299 ASSERT_EQ(0, ret); 1300 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1301 ASSERT_EQ(0, ret); 1302 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1303 ASSERT_EQ(0, ret); 1304 /* Should work just fine. */ 1305 EXPECT_EQ(parent, syscall(__NR_getppid)); 1306 /* No ptracer */ 1307 EXPECT_EQ(-1, syscall(__NR_getpid)); 1308 } 1309 1310 TEST_F(precedence, log_is_fifth) 1311 { 1312 pid_t mypid, parent; 1313 long ret; 1314 1315 mypid = getpid(); 1316 parent = getppid(); 1317 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1318 ASSERT_EQ(0, ret); 1319 1320 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1321 ASSERT_EQ(0, ret); 1322 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1323 ASSERT_EQ(0, ret); 1324 /* Should work just fine. */ 1325 EXPECT_EQ(parent, syscall(__NR_getppid)); 1326 /* Should also work just fine */ 1327 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1328 } 1329 1330 TEST_F(precedence, log_is_fifth_in_any_order) 1331 { 1332 pid_t mypid, parent; 1333 long ret; 1334 1335 mypid = getpid(); 1336 parent = getppid(); 1337 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1338 ASSERT_EQ(0, ret); 1339 1340 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); 1341 ASSERT_EQ(0, ret); 1342 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); 1343 ASSERT_EQ(0, ret); 1344 /* Should work just fine. */ 1345 EXPECT_EQ(parent, syscall(__NR_getppid)); 1346 /* Should also work just fine */ 1347 EXPECT_EQ(mypid, syscall(__NR_getpid)); 1348 } 1349 1350 #ifndef PTRACE_O_TRACESECCOMP 1351 #define PTRACE_O_TRACESECCOMP 0x00000080 1352 #endif 1353 1354 /* Catch the Ubuntu 12.04 value error. */ 1355 #if PTRACE_EVENT_SECCOMP != 7 1356 #undef PTRACE_EVENT_SECCOMP 1357 #endif 1358 1359 #ifndef PTRACE_EVENT_SECCOMP 1360 #define PTRACE_EVENT_SECCOMP 7 1361 #endif 1362 1363 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) 1364 bool tracer_running; 1365 void tracer_stop(int sig) 1366 { 1367 tracer_running = false; 1368 } 1369 1370 typedef void tracer_func_t(struct __test_metadata *_metadata, 1371 pid_t tracee, int status, void *args); 1372 1373 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, 1374 tracer_func_t tracer_func, void *args, bool ptrace_syscall) 1375 { 1376 int ret = -1; 1377 struct sigaction action = { 1378 .sa_handler = tracer_stop, 1379 }; 1380 1381 /* Allow external shutdown. */ 1382 tracer_running = true; 1383 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); 1384 1385 errno = 0; 1386 while (ret == -1 && errno != EINVAL) 1387 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); 1388 ASSERT_EQ(0, ret) { 1389 kill(tracee, SIGKILL); 1390 } 1391 /* Wait for attach stop */ 1392 wait(NULL); 1393 1394 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ? 1395 PTRACE_O_TRACESYSGOOD : 1396 PTRACE_O_TRACESECCOMP); 1397 ASSERT_EQ(0, ret) { 1398 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); 1399 kill(tracee, SIGKILL); 1400 } 1401 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1402 tracee, NULL, 0); 1403 ASSERT_EQ(0, ret); 1404 1405 /* Unblock the tracee */ 1406 ASSERT_EQ(1, write(fd, "A", 1)); 1407 ASSERT_EQ(0, close(fd)); 1408 1409 /* Run until we're shut down. Must assert to stop execution. */ 1410 while (tracer_running) { 1411 int status; 1412 1413 if (wait(&status) != tracee) 1414 continue; 1415 if (WIFSIGNALED(status) || WIFEXITED(status)) 1416 /* Child is dead. Time to go. */ 1417 return; 1418 1419 /* Check if this is a seccomp event. */ 1420 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status)); 1421 1422 tracer_func(_metadata, tracee, status, args); 1423 1424 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT, 1425 tracee, NULL, 0); 1426 ASSERT_EQ(0, ret); 1427 } 1428 /* Directly report the status of our test harness results. */ 1429 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); 1430 } 1431 1432 /* Common tracer setup/teardown functions. */ 1433 void cont_handler(int num) 1434 { } 1435 pid_t setup_trace_fixture(struct __test_metadata *_metadata, 1436 tracer_func_t func, void *args, bool ptrace_syscall) 1437 { 1438 char sync; 1439 int pipefd[2]; 1440 pid_t tracer_pid; 1441 pid_t tracee = getpid(); 1442 1443 /* Setup a pipe for clean synchronization. */ 1444 ASSERT_EQ(0, pipe(pipefd)); 1445 1446 /* Fork a child which we'll promote to tracer */ 1447 tracer_pid = fork(); 1448 ASSERT_LE(0, tracer_pid); 1449 signal(SIGALRM, cont_handler); 1450 if (tracer_pid == 0) { 1451 close(pipefd[0]); 1452 start_tracer(_metadata, pipefd[1], tracee, func, args, 1453 ptrace_syscall); 1454 syscall(__NR_exit, 0); 1455 } 1456 close(pipefd[1]); 1457 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); 1458 read(pipefd[0], &sync, 1); 1459 close(pipefd[0]); 1460 1461 return tracer_pid; 1462 } 1463 void teardown_trace_fixture(struct __test_metadata *_metadata, 1464 pid_t tracer) 1465 { 1466 if (tracer) { 1467 int status; 1468 /* 1469 * Extract the exit code from the other process and 1470 * adopt it for ourselves in case its asserts failed. 1471 */ 1472 ASSERT_EQ(0, kill(tracer, SIGUSR1)); 1473 ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); 1474 if (WEXITSTATUS(status)) 1475 _metadata->passed = 0; 1476 } 1477 } 1478 1479 /* "poke" tracer arguments and function. */ 1480 struct tracer_args_poke_t { 1481 unsigned long poke_addr; 1482 }; 1483 1484 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, 1485 void *args) 1486 { 1487 int ret; 1488 unsigned long msg; 1489 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; 1490 1491 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1492 EXPECT_EQ(0, ret); 1493 /* If this fails, don't try to recover. */ 1494 ASSERT_EQ(0x1001, msg) { 1495 kill(tracee, SIGKILL); 1496 } 1497 /* 1498 * Poke in the message. 1499 * Registers are not touched to try to keep this relatively arch 1500 * agnostic. 1501 */ 1502 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); 1503 EXPECT_EQ(0, ret); 1504 } 1505 1506 FIXTURE_DATA(TRACE_poke) { 1507 struct sock_fprog prog; 1508 pid_t tracer; 1509 long poked; 1510 struct tracer_args_poke_t tracer_args; 1511 }; 1512 1513 FIXTURE_SETUP(TRACE_poke) 1514 { 1515 struct sock_filter filter[] = { 1516 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1517 offsetof(struct seccomp_data, nr)), 1518 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 1519 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), 1520 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1521 }; 1522 1523 self->poked = 0; 1524 memset(&self->prog, 0, sizeof(self->prog)); 1525 self->prog.filter = malloc(sizeof(filter)); 1526 ASSERT_NE(NULL, self->prog.filter); 1527 memcpy(self->prog.filter, filter, sizeof(filter)); 1528 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1529 1530 /* Set up tracer args. */ 1531 self->tracer_args.poke_addr = (unsigned long)&self->poked; 1532 1533 /* Launch tracer. */ 1534 self->tracer = setup_trace_fixture(_metadata, tracer_poke, 1535 &self->tracer_args, false); 1536 } 1537 1538 FIXTURE_TEARDOWN(TRACE_poke) 1539 { 1540 teardown_trace_fixture(_metadata, self->tracer); 1541 if (self->prog.filter) 1542 free(self->prog.filter); 1543 } 1544 1545 TEST_F(TRACE_poke, read_has_side_effects) 1546 { 1547 ssize_t ret; 1548 1549 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1550 ASSERT_EQ(0, ret); 1551 1552 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1553 ASSERT_EQ(0, ret); 1554 1555 EXPECT_EQ(0, self->poked); 1556 ret = read(-1, NULL, 0); 1557 EXPECT_EQ(-1, ret); 1558 EXPECT_EQ(0x1001, self->poked); 1559 } 1560 1561 TEST_F(TRACE_poke, getpid_runs_normally) 1562 { 1563 long ret; 1564 1565 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1566 ASSERT_EQ(0, ret); 1567 1568 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1569 ASSERT_EQ(0, ret); 1570 1571 EXPECT_EQ(0, self->poked); 1572 EXPECT_NE(0, syscall(__NR_getpid)); 1573 EXPECT_EQ(0, self->poked); 1574 } 1575 1576 #if defined(__x86_64__) 1577 # define ARCH_REGS struct user_regs_struct 1578 # define SYSCALL_NUM orig_rax 1579 # define SYSCALL_RET rax 1580 #elif defined(__i386__) 1581 # define ARCH_REGS struct user_regs_struct 1582 # define SYSCALL_NUM orig_eax 1583 # define SYSCALL_RET eax 1584 #elif defined(__arm__) 1585 # define ARCH_REGS struct pt_regs 1586 # define SYSCALL_NUM ARM_r7 1587 # define SYSCALL_RET ARM_r0 1588 #elif defined(__aarch64__) 1589 # define ARCH_REGS struct user_pt_regs 1590 # define SYSCALL_NUM regs[8] 1591 # define SYSCALL_RET regs[0] 1592 #elif defined(__riscv) && __riscv_xlen == 64 1593 # define ARCH_REGS struct user_regs_struct 1594 # define SYSCALL_NUM a7 1595 # define SYSCALL_RET a0 1596 #elif defined(__hppa__) 1597 # define ARCH_REGS struct user_regs_struct 1598 # define SYSCALL_NUM gr[20] 1599 # define SYSCALL_RET gr[28] 1600 #elif defined(__powerpc__) 1601 # define ARCH_REGS struct pt_regs 1602 # define SYSCALL_NUM gpr[0] 1603 # define SYSCALL_RET gpr[3] 1604 #elif defined(__s390__) 1605 # define ARCH_REGS s390_regs 1606 # define SYSCALL_NUM gprs[2] 1607 # define SYSCALL_RET gprs[2] 1608 #elif defined(__mips__) 1609 # define ARCH_REGS struct pt_regs 1610 # define SYSCALL_NUM regs[2] 1611 # define SYSCALL_SYSCALL_NUM regs[4] 1612 # define SYSCALL_RET regs[2] 1613 # define SYSCALL_NUM_RET_SHARE_REG 1614 #else 1615 # error "Do not know how to find your architecture's registers and syscalls" 1616 #endif 1617 1618 /* When the syscall return can't be changed, stub out the tests for it. */ 1619 #ifdef SYSCALL_NUM_RET_SHARE_REG 1620 # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) 1621 #else 1622 # define EXPECT_SYSCALL_RETURN(val, action) \ 1623 do { \ 1624 errno = 0; \ 1625 if (val < 0) { \ 1626 EXPECT_EQ(-1, action); \ 1627 EXPECT_EQ(-(val), errno); \ 1628 } else { \ 1629 EXPECT_EQ(val, action); \ 1630 } \ 1631 } while (0) 1632 #endif 1633 1634 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for 1635 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). 1636 */ 1637 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) 1638 #define HAVE_GETREGS 1639 #endif 1640 1641 /* Architecture-specific syscall fetching routine. */ 1642 int get_syscall(struct __test_metadata *_metadata, pid_t tracee) 1643 { 1644 ARCH_REGS regs; 1645 #ifdef HAVE_GETREGS 1646 EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { 1647 TH_LOG("PTRACE_GETREGS failed"); 1648 return -1; 1649 } 1650 #else 1651 struct iovec iov; 1652 1653 iov.iov_base = ®s; 1654 iov.iov_len = sizeof(regs); 1655 EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { 1656 TH_LOG("PTRACE_GETREGSET failed"); 1657 return -1; 1658 } 1659 #endif 1660 1661 #if defined(__mips__) 1662 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1663 return regs.SYSCALL_SYSCALL_NUM; 1664 #endif 1665 return regs.SYSCALL_NUM; 1666 } 1667 1668 /* Architecture-specific syscall changing routine. */ 1669 void change_syscall(struct __test_metadata *_metadata, 1670 pid_t tracee, int syscall, int result) 1671 { 1672 int ret; 1673 ARCH_REGS regs; 1674 #ifdef HAVE_GETREGS 1675 ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); 1676 #else 1677 struct iovec iov; 1678 iov.iov_base = ®s; 1679 iov.iov_len = sizeof(regs); 1680 ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); 1681 #endif 1682 EXPECT_EQ(0, ret) {} 1683 1684 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ 1685 defined(__s390__) || defined(__hppa__) || defined(__riscv) 1686 { 1687 regs.SYSCALL_NUM = syscall; 1688 } 1689 #elif defined(__mips__) 1690 { 1691 if (regs.SYSCALL_NUM == __NR_O32_Linux) 1692 regs.SYSCALL_SYSCALL_NUM = syscall; 1693 else 1694 regs.SYSCALL_NUM = syscall; 1695 } 1696 1697 #elif defined(__arm__) 1698 # ifndef PTRACE_SET_SYSCALL 1699 # define PTRACE_SET_SYSCALL 23 1700 # endif 1701 { 1702 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); 1703 EXPECT_EQ(0, ret); 1704 } 1705 1706 #elif defined(__aarch64__) 1707 # ifndef NT_ARM_SYSTEM_CALL 1708 # define NT_ARM_SYSTEM_CALL 0x404 1709 # endif 1710 { 1711 iov.iov_base = &syscall; 1712 iov.iov_len = sizeof(syscall); 1713 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, 1714 &iov); 1715 EXPECT_EQ(0, ret); 1716 } 1717 1718 #else 1719 ASSERT_EQ(1, 0) { 1720 TH_LOG("How is the syscall changed on this architecture?"); 1721 } 1722 #endif 1723 1724 /* If syscall is skipped, change return value. */ 1725 if (syscall == -1) 1726 #ifdef SYSCALL_NUM_RET_SHARE_REG 1727 TH_LOG("Can't modify syscall return on this architecture"); 1728 #else 1729 regs.SYSCALL_RET = result; 1730 #endif 1731 1732 #ifdef HAVE_GETREGS 1733 ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); 1734 #else 1735 iov.iov_base = ®s; 1736 iov.iov_len = sizeof(regs); 1737 ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); 1738 #endif 1739 EXPECT_EQ(0, ret); 1740 } 1741 1742 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, 1743 int status, void *args) 1744 { 1745 int ret; 1746 unsigned long msg; 1747 1748 /* Make sure we got the right message. */ 1749 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1750 EXPECT_EQ(0, ret); 1751 1752 /* Validate and take action on expected syscalls. */ 1753 switch (msg) { 1754 case 0x1002: 1755 /* change getpid to getppid. */ 1756 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); 1757 change_syscall(_metadata, tracee, __NR_getppid, 0); 1758 break; 1759 case 0x1003: 1760 /* skip gettid with valid return code. */ 1761 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); 1762 change_syscall(_metadata, tracee, -1, 45000); 1763 break; 1764 case 0x1004: 1765 /* skip openat with error. */ 1766 EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); 1767 change_syscall(_metadata, tracee, -1, -ESRCH); 1768 break; 1769 case 0x1005: 1770 /* do nothing (allow getppid) */ 1771 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); 1772 break; 1773 default: 1774 EXPECT_EQ(0, msg) { 1775 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); 1776 kill(tracee, SIGKILL); 1777 } 1778 } 1779 1780 } 1781 1782 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, 1783 int status, void *args) 1784 { 1785 int ret, nr; 1786 unsigned long msg; 1787 static bool entry; 1788 1789 /* 1790 * The traditional way to tell PTRACE_SYSCALL entry/exit 1791 * is by counting. 1792 */ 1793 entry = !entry; 1794 1795 /* Make sure we got an appropriate message. */ 1796 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); 1797 EXPECT_EQ(0, ret); 1798 EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY 1799 : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); 1800 1801 if (!entry) 1802 return; 1803 1804 nr = get_syscall(_metadata, tracee); 1805 1806 if (nr == __NR_getpid) 1807 change_syscall(_metadata, tracee, __NR_getppid, 0); 1808 if (nr == __NR_gettid) 1809 change_syscall(_metadata, tracee, -1, 45000); 1810 if (nr == __NR_openat) 1811 change_syscall(_metadata, tracee, -1, -ESRCH); 1812 } 1813 1814 FIXTURE_DATA(TRACE_syscall) { 1815 struct sock_fprog prog; 1816 pid_t tracer, mytid, mypid, parent; 1817 }; 1818 1819 FIXTURE_SETUP(TRACE_syscall) 1820 { 1821 struct sock_filter filter[] = { 1822 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1823 offsetof(struct seccomp_data, nr)), 1824 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 1825 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), 1826 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), 1827 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), 1828 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), 1829 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), 1830 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1831 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), 1832 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1833 }; 1834 1835 memset(&self->prog, 0, sizeof(self->prog)); 1836 self->prog.filter = malloc(sizeof(filter)); 1837 ASSERT_NE(NULL, self->prog.filter); 1838 memcpy(self->prog.filter, filter, sizeof(filter)); 1839 self->prog.len = (unsigned short)ARRAY_SIZE(filter); 1840 1841 /* Prepare some testable syscall results. */ 1842 self->mytid = syscall(__NR_gettid); 1843 ASSERT_GT(self->mytid, 0); 1844 ASSERT_NE(self->mytid, 1) { 1845 TH_LOG("Running this test as init is not supported. :)"); 1846 } 1847 1848 self->mypid = getpid(); 1849 ASSERT_GT(self->mypid, 0); 1850 ASSERT_EQ(self->mytid, self->mypid); 1851 1852 self->parent = getppid(); 1853 ASSERT_GT(self->parent, 0); 1854 ASSERT_NE(self->parent, self->mypid); 1855 1856 /* Launch tracer. */ 1857 self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL, 1858 false); 1859 } 1860 1861 FIXTURE_TEARDOWN(TRACE_syscall) 1862 { 1863 teardown_trace_fixture(_metadata, self->tracer); 1864 if (self->prog.filter) 1865 free(self->prog.filter); 1866 } 1867 1868 TEST_F(TRACE_syscall, ptrace_syscall_redirected) 1869 { 1870 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1871 teardown_trace_fixture(_metadata, self->tracer); 1872 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1873 true); 1874 1875 /* Tracer will redirect getpid to getppid. */ 1876 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 1877 } 1878 1879 TEST_F(TRACE_syscall, ptrace_syscall_errno) 1880 { 1881 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1882 teardown_trace_fixture(_metadata, self->tracer); 1883 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1884 true); 1885 1886 /* Tracer should skip the open syscall, resulting in ESRCH. */ 1887 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 1888 } 1889 1890 TEST_F(TRACE_syscall, ptrace_syscall_faked) 1891 { 1892 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 1893 teardown_trace_fixture(_metadata, self->tracer); 1894 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 1895 true); 1896 1897 /* Tracer should skip the gettid syscall, resulting fake pid. */ 1898 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 1899 } 1900 1901 TEST_F(TRACE_syscall, syscall_allowed) 1902 { 1903 long ret; 1904 1905 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1906 ASSERT_EQ(0, ret); 1907 1908 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1909 ASSERT_EQ(0, ret); 1910 1911 /* getppid works as expected (no changes). */ 1912 EXPECT_EQ(self->parent, syscall(__NR_getppid)); 1913 EXPECT_NE(self->mypid, syscall(__NR_getppid)); 1914 } 1915 1916 TEST_F(TRACE_syscall, syscall_redirected) 1917 { 1918 long ret; 1919 1920 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1921 ASSERT_EQ(0, ret); 1922 1923 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1924 ASSERT_EQ(0, ret); 1925 1926 /* getpid has been redirected to getppid as expected. */ 1927 EXPECT_EQ(self->parent, syscall(__NR_getpid)); 1928 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 1929 } 1930 1931 TEST_F(TRACE_syscall, syscall_errno) 1932 { 1933 long ret; 1934 1935 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1936 ASSERT_EQ(0, ret); 1937 1938 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1939 ASSERT_EQ(0, ret); 1940 1941 /* openat has been skipped and an errno return. */ 1942 EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); 1943 } 1944 1945 TEST_F(TRACE_syscall, syscall_faked) 1946 { 1947 long ret; 1948 1949 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1950 ASSERT_EQ(0, ret); 1951 1952 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1953 ASSERT_EQ(0, ret); 1954 1955 /* gettid has been skipped and an altered return value stored. */ 1956 EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); 1957 } 1958 1959 TEST_F(TRACE_syscall, skip_after_RET_TRACE) 1960 { 1961 struct sock_filter filter[] = { 1962 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1963 offsetof(struct seccomp_data, nr)), 1964 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1965 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 1966 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1967 }; 1968 struct sock_fprog prog = { 1969 .len = (unsigned short)ARRAY_SIZE(filter), 1970 .filter = filter, 1971 }; 1972 long ret; 1973 1974 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 1975 ASSERT_EQ(0, ret); 1976 1977 /* Install fixture filter. */ 1978 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 1979 ASSERT_EQ(0, ret); 1980 1981 /* Install "errno on getppid" filter. */ 1982 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 1983 ASSERT_EQ(0, ret); 1984 1985 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 1986 errno = 0; 1987 EXPECT_EQ(-1, syscall(__NR_getpid)); 1988 EXPECT_EQ(EPERM, errno); 1989 } 1990 1991 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) 1992 { 1993 struct sock_filter filter[] = { 1994 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 1995 offsetof(struct seccomp_data, nr)), 1996 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 1997 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 1998 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 1999 }; 2000 struct sock_fprog prog = { 2001 .len = (unsigned short)ARRAY_SIZE(filter), 2002 .filter = filter, 2003 }; 2004 long ret; 2005 2006 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2007 ASSERT_EQ(0, ret); 2008 2009 /* Install fixture filter. */ 2010 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); 2011 ASSERT_EQ(0, ret); 2012 2013 /* Install "death on getppid" filter. */ 2014 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2015 ASSERT_EQ(0, ret); 2016 2017 /* Tracer will redirect getpid to getppid, and we should die. */ 2018 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2019 } 2020 2021 TEST_F(TRACE_syscall, skip_after_ptrace) 2022 { 2023 struct sock_filter filter[] = { 2024 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2025 offsetof(struct seccomp_data, nr)), 2026 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2027 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), 2028 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2029 }; 2030 struct sock_fprog prog = { 2031 .len = (unsigned short)ARRAY_SIZE(filter), 2032 .filter = filter, 2033 }; 2034 long ret; 2035 2036 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 2037 teardown_trace_fixture(_metadata, self->tracer); 2038 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 2039 true); 2040 2041 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2042 ASSERT_EQ(0, ret); 2043 2044 /* Install "errno on getppid" filter. */ 2045 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2046 ASSERT_EQ(0, ret); 2047 2048 /* Tracer will redirect getpid to getppid, and we should see EPERM. */ 2049 EXPECT_EQ(-1, syscall(__NR_getpid)); 2050 EXPECT_EQ(EPERM, errno); 2051 } 2052 2053 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) 2054 { 2055 struct sock_filter filter[] = { 2056 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2057 offsetof(struct seccomp_data, nr)), 2058 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), 2059 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2060 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2061 }; 2062 struct sock_fprog prog = { 2063 .len = (unsigned short)ARRAY_SIZE(filter), 2064 .filter = filter, 2065 }; 2066 long ret; 2067 2068 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ 2069 teardown_trace_fixture(_metadata, self->tracer); 2070 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, 2071 true); 2072 2073 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2074 ASSERT_EQ(0, ret); 2075 2076 /* Install "death on getppid" filter. */ 2077 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2078 ASSERT_EQ(0, ret); 2079 2080 /* Tracer will redirect getpid to getppid, and we should die. */ 2081 EXPECT_NE(self->mypid, syscall(__NR_getpid)); 2082 } 2083 2084 TEST(seccomp_syscall) 2085 { 2086 struct sock_filter filter[] = { 2087 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2088 }; 2089 struct sock_fprog prog = { 2090 .len = (unsigned short)ARRAY_SIZE(filter), 2091 .filter = filter, 2092 }; 2093 long ret; 2094 2095 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2096 ASSERT_EQ(0, ret) { 2097 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2098 } 2099 2100 /* Reject insane operation. */ 2101 ret = seccomp(-1, 0, &prog); 2102 ASSERT_NE(ENOSYS, errno) { 2103 TH_LOG("Kernel does not support seccomp syscall!"); 2104 } 2105 EXPECT_EQ(EINVAL, errno) { 2106 TH_LOG("Did not reject crazy op value!"); 2107 } 2108 2109 /* Reject strict with flags or pointer. */ 2110 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); 2111 EXPECT_EQ(EINVAL, errno) { 2112 TH_LOG("Did not reject mode strict with flags!"); 2113 } 2114 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); 2115 EXPECT_EQ(EINVAL, errno) { 2116 TH_LOG("Did not reject mode strict with uargs!"); 2117 } 2118 2119 /* Reject insane args for filter. */ 2120 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); 2121 EXPECT_EQ(EINVAL, errno) { 2122 TH_LOG("Did not reject crazy filter flags!"); 2123 } 2124 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); 2125 EXPECT_EQ(EFAULT, errno) { 2126 TH_LOG("Did not reject NULL filter!"); 2127 } 2128 2129 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2130 EXPECT_EQ(0, errno) { 2131 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", 2132 strerror(errno)); 2133 } 2134 } 2135 2136 TEST(seccomp_syscall_mode_lock) 2137 { 2138 struct sock_filter filter[] = { 2139 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2140 }; 2141 struct sock_fprog prog = { 2142 .len = (unsigned short)ARRAY_SIZE(filter), 2143 .filter = filter, 2144 }; 2145 long ret; 2146 2147 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2148 ASSERT_EQ(0, ret) { 2149 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2150 } 2151 2152 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2153 ASSERT_NE(ENOSYS, errno) { 2154 TH_LOG("Kernel does not support seccomp syscall!"); 2155 } 2156 EXPECT_EQ(0, ret) { 2157 TH_LOG("Could not install filter!"); 2158 } 2159 2160 /* Make sure neither entry point will switch to strict. */ 2161 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); 2162 EXPECT_EQ(EINVAL, errno) { 2163 TH_LOG("Switched to mode strict!"); 2164 } 2165 2166 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); 2167 EXPECT_EQ(EINVAL, errno) { 2168 TH_LOG("Switched to mode strict!"); 2169 } 2170 } 2171 2172 /* 2173 * Test detection of known and unknown filter flags. Userspace needs to be able 2174 * to check if a filter flag is supported by the current kernel and a good way 2175 * of doing that is by attempting to enter filter mode, with the flag bit in 2176 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates 2177 * that the flag is valid and EINVAL indicates that the flag is invalid. 2178 */ 2179 TEST(detect_seccomp_filter_flags) 2180 { 2181 unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, 2182 SECCOMP_FILTER_FLAG_LOG, 2183 SECCOMP_FILTER_FLAG_SPEC_ALLOW, 2184 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2185 unsigned int exclusive[] = { 2186 SECCOMP_FILTER_FLAG_TSYNC, 2187 SECCOMP_FILTER_FLAG_NEW_LISTENER }; 2188 unsigned int flag, all_flags, exclusive_mask; 2189 int i; 2190 long ret; 2191 2192 /* Test detection of individual known-good filter flags */ 2193 for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { 2194 int bits = 0; 2195 2196 flag = flags[i]; 2197 /* Make sure the flag is a single bit! */ 2198 while (flag) { 2199 if (flag & 0x1) 2200 bits ++; 2201 flag >>= 1; 2202 } 2203 ASSERT_EQ(1, bits); 2204 flag = flags[i]; 2205 2206 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2207 ASSERT_NE(ENOSYS, errno) { 2208 TH_LOG("Kernel does not support seccomp syscall!"); 2209 } 2210 EXPECT_EQ(-1, ret); 2211 EXPECT_EQ(EFAULT, errno) { 2212 TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", 2213 flag); 2214 } 2215 2216 all_flags |= flag; 2217 } 2218 2219 /* 2220 * Test detection of all known-good filter flags combined. But 2221 * for the exclusive flags we need to mask them out and try them 2222 * individually for the "all flags" testing. 2223 */ 2224 exclusive_mask = 0; 2225 for (i = 0; i < ARRAY_SIZE(exclusive); i++) 2226 exclusive_mask |= exclusive[i]; 2227 for (i = 0; i < ARRAY_SIZE(exclusive); i++) { 2228 flag = all_flags & ~exclusive_mask; 2229 flag |= exclusive[i]; 2230 2231 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2232 EXPECT_EQ(-1, ret); 2233 EXPECT_EQ(EFAULT, errno) { 2234 TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", 2235 flag); 2236 } 2237 } 2238 2239 /* Test detection of an unknown filter flags, without exclusives. */ 2240 flag = -1; 2241 flag &= ~exclusive_mask; 2242 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2243 EXPECT_EQ(-1, ret); 2244 EXPECT_EQ(EINVAL, errno) { 2245 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", 2246 flag); 2247 } 2248 2249 /* 2250 * Test detection of an unknown filter flag that may simply need to be 2251 * added to this test 2252 */ 2253 flag = flags[ARRAY_SIZE(flags) - 1] << 1; 2254 ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); 2255 EXPECT_EQ(-1, ret); 2256 EXPECT_EQ(EINVAL, errno) { 2257 TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", 2258 flag); 2259 } 2260 } 2261 2262 TEST(TSYNC_first) 2263 { 2264 struct sock_filter filter[] = { 2265 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2266 }; 2267 struct sock_fprog prog = { 2268 .len = (unsigned short)ARRAY_SIZE(filter), 2269 .filter = filter, 2270 }; 2271 long ret; 2272 2273 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); 2274 ASSERT_EQ(0, ret) { 2275 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2276 } 2277 2278 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2279 &prog); 2280 ASSERT_NE(ENOSYS, errno) { 2281 TH_LOG("Kernel does not support seccomp syscall!"); 2282 } 2283 EXPECT_EQ(0, ret) { 2284 TH_LOG("Could not install initial filter with TSYNC!"); 2285 } 2286 } 2287 2288 #define TSYNC_SIBLINGS 2 2289 struct tsync_sibling { 2290 pthread_t tid; 2291 pid_t system_tid; 2292 sem_t *started; 2293 pthread_cond_t *cond; 2294 pthread_mutex_t *mutex; 2295 int diverge; 2296 int num_waits; 2297 struct sock_fprog *prog; 2298 struct __test_metadata *metadata; 2299 }; 2300 2301 /* 2302 * To avoid joining joined threads (which is not allowed by Bionic), 2303 * make sure we both successfully join and clear the tid to skip a 2304 * later join attempt during fixture teardown. Any remaining threads 2305 * will be directly killed during teardown. 2306 */ 2307 #define PTHREAD_JOIN(tid, status) \ 2308 do { \ 2309 int _rc = pthread_join(tid, status); \ 2310 if (_rc) { \ 2311 TH_LOG("pthread_join of tid %u failed: %d\n", \ 2312 (unsigned int)tid, _rc); \ 2313 } else { \ 2314 tid = 0; \ 2315 } \ 2316 } while (0) 2317 2318 FIXTURE_DATA(TSYNC) { 2319 struct sock_fprog root_prog, apply_prog; 2320 struct tsync_sibling sibling[TSYNC_SIBLINGS]; 2321 sem_t started; 2322 pthread_cond_t cond; 2323 pthread_mutex_t mutex; 2324 int sibling_count; 2325 }; 2326 2327 FIXTURE_SETUP(TSYNC) 2328 { 2329 struct sock_filter root_filter[] = { 2330 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2331 }; 2332 struct sock_filter apply_filter[] = { 2333 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2334 offsetof(struct seccomp_data, nr)), 2335 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), 2336 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2337 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2338 }; 2339 2340 memset(&self->root_prog, 0, sizeof(self->root_prog)); 2341 memset(&self->apply_prog, 0, sizeof(self->apply_prog)); 2342 memset(&self->sibling, 0, sizeof(self->sibling)); 2343 self->root_prog.filter = malloc(sizeof(root_filter)); 2344 ASSERT_NE(NULL, self->root_prog.filter); 2345 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); 2346 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); 2347 2348 self->apply_prog.filter = malloc(sizeof(apply_filter)); 2349 ASSERT_NE(NULL, self->apply_prog.filter); 2350 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); 2351 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); 2352 2353 self->sibling_count = 0; 2354 pthread_mutex_init(&self->mutex, NULL); 2355 pthread_cond_init(&self->cond, NULL); 2356 sem_init(&self->started, 0, 0); 2357 self->sibling[0].tid = 0; 2358 self->sibling[0].cond = &self->cond; 2359 self->sibling[0].started = &self->started; 2360 self->sibling[0].mutex = &self->mutex; 2361 self->sibling[0].diverge = 0; 2362 self->sibling[0].num_waits = 1; 2363 self->sibling[0].prog = &self->root_prog; 2364 self->sibling[0].metadata = _metadata; 2365 self->sibling[1].tid = 0; 2366 self->sibling[1].cond = &self->cond; 2367 self->sibling[1].started = &self->started; 2368 self->sibling[1].mutex = &self->mutex; 2369 self->sibling[1].diverge = 0; 2370 self->sibling[1].prog = &self->root_prog; 2371 self->sibling[1].num_waits = 1; 2372 self->sibling[1].metadata = _metadata; 2373 } 2374 2375 FIXTURE_TEARDOWN(TSYNC) 2376 { 2377 int sib = 0; 2378 2379 if (self->root_prog.filter) 2380 free(self->root_prog.filter); 2381 if (self->apply_prog.filter) 2382 free(self->apply_prog.filter); 2383 2384 for ( ; sib < self->sibling_count; ++sib) { 2385 struct tsync_sibling *s = &self->sibling[sib]; 2386 2387 if (!s->tid) 2388 continue; 2389 /* 2390 * If a thread is still running, it may be stuck, so hit 2391 * it over the head really hard. 2392 */ 2393 pthread_kill(s->tid, 9); 2394 } 2395 pthread_mutex_destroy(&self->mutex); 2396 pthread_cond_destroy(&self->cond); 2397 sem_destroy(&self->started); 2398 } 2399 2400 void *tsync_sibling(void *data) 2401 { 2402 long ret = 0; 2403 struct tsync_sibling *me = data; 2404 2405 me->system_tid = syscall(__NR_gettid); 2406 2407 pthread_mutex_lock(me->mutex); 2408 if (me->diverge) { 2409 /* Just re-apply the root prog to fork the tree */ 2410 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 2411 me->prog, 0, 0); 2412 } 2413 sem_post(me->started); 2414 /* Return outside of started so parent notices failures. */ 2415 if (ret) { 2416 pthread_mutex_unlock(me->mutex); 2417 return (void *)SIBLING_EXIT_FAILURE; 2418 } 2419 do { 2420 pthread_cond_wait(me->cond, me->mutex); 2421 me->num_waits = me->num_waits - 1; 2422 } while (me->num_waits); 2423 pthread_mutex_unlock(me->mutex); 2424 2425 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 2426 if (!ret) 2427 return (void *)SIBLING_EXIT_NEWPRIVS; 2428 read(0, NULL, 0); 2429 return (void *)SIBLING_EXIT_UNKILLED; 2430 } 2431 2432 void tsync_start_sibling(struct tsync_sibling *sibling) 2433 { 2434 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); 2435 } 2436 2437 TEST_F(TSYNC, siblings_fail_prctl) 2438 { 2439 long ret; 2440 void *status; 2441 struct sock_filter filter[] = { 2442 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2443 offsetof(struct seccomp_data, nr)), 2444 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), 2445 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), 2446 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2447 }; 2448 struct sock_fprog prog = { 2449 .len = (unsigned short)ARRAY_SIZE(filter), 2450 .filter = filter, 2451 }; 2452 2453 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2454 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2455 } 2456 2457 /* Check prctl failure detection by requesting sib 0 diverge. */ 2458 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); 2459 ASSERT_NE(ENOSYS, errno) { 2460 TH_LOG("Kernel does not support seccomp syscall!"); 2461 } 2462 ASSERT_EQ(0, ret) { 2463 TH_LOG("setting filter failed"); 2464 } 2465 2466 self->sibling[0].diverge = 1; 2467 tsync_start_sibling(&self->sibling[0]); 2468 tsync_start_sibling(&self->sibling[1]); 2469 2470 while (self->sibling_count < TSYNC_SIBLINGS) { 2471 sem_wait(&self->started); 2472 self->sibling_count++; 2473 } 2474 2475 /* Signal the threads to clean up*/ 2476 pthread_mutex_lock(&self->mutex); 2477 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2478 TH_LOG("cond broadcast non-zero"); 2479 } 2480 pthread_mutex_unlock(&self->mutex); 2481 2482 /* Ensure diverging sibling failed to call prctl. */ 2483 PTHREAD_JOIN(self->sibling[0].tid, &status); 2484 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); 2485 PTHREAD_JOIN(self->sibling[1].tid, &status); 2486 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2487 } 2488 2489 TEST_F(TSYNC, two_siblings_with_ancestor) 2490 { 2491 long ret; 2492 void *status; 2493 2494 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2495 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2496 } 2497 2498 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2499 ASSERT_NE(ENOSYS, errno) { 2500 TH_LOG("Kernel does not support seccomp syscall!"); 2501 } 2502 ASSERT_EQ(0, ret) { 2503 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2504 } 2505 tsync_start_sibling(&self->sibling[0]); 2506 tsync_start_sibling(&self->sibling[1]); 2507 2508 while (self->sibling_count < TSYNC_SIBLINGS) { 2509 sem_wait(&self->started); 2510 self->sibling_count++; 2511 } 2512 2513 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2514 &self->apply_prog); 2515 ASSERT_EQ(0, ret) { 2516 TH_LOG("Could install filter on all threads!"); 2517 } 2518 /* Tell the siblings to test the policy */ 2519 pthread_mutex_lock(&self->mutex); 2520 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2521 TH_LOG("cond broadcast non-zero"); 2522 } 2523 pthread_mutex_unlock(&self->mutex); 2524 /* Ensure they are both killed and don't exit cleanly. */ 2525 PTHREAD_JOIN(self->sibling[0].tid, &status); 2526 EXPECT_EQ(0x0, (long)status); 2527 PTHREAD_JOIN(self->sibling[1].tid, &status); 2528 EXPECT_EQ(0x0, (long)status); 2529 } 2530 2531 TEST_F(TSYNC, two_sibling_want_nnp) 2532 { 2533 void *status; 2534 2535 /* start siblings before any prctl() operations */ 2536 tsync_start_sibling(&self->sibling[0]); 2537 tsync_start_sibling(&self->sibling[1]); 2538 while (self->sibling_count < TSYNC_SIBLINGS) { 2539 sem_wait(&self->started); 2540 self->sibling_count++; 2541 } 2542 2543 /* Tell the siblings to test no policy */ 2544 pthread_mutex_lock(&self->mutex); 2545 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2546 TH_LOG("cond broadcast non-zero"); 2547 } 2548 pthread_mutex_unlock(&self->mutex); 2549 2550 /* Ensure they are both upset about lacking nnp. */ 2551 PTHREAD_JOIN(self->sibling[0].tid, &status); 2552 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2553 PTHREAD_JOIN(self->sibling[1].tid, &status); 2554 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); 2555 } 2556 2557 TEST_F(TSYNC, two_siblings_with_no_filter) 2558 { 2559 long ret; 2560 void *status; 2561 2562 /* start siblings before any prctl() operations */ 2563 tsync_start_sibling(&self->sibling[0]); 2564 tsync_start_sibling(&self->sibling[1]); 2565 while (self->sibling_count < TSYNC_SIBLINGS) { 2566 sem_wait(&self->started); 2567 self->sibling_count++; 2568 } 2569 2570 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2571 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2572 } 2573 2574 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2575 &self->apply_prog); 2576 ASSERT_NE(ENOSYS, errno) { 2577 TH_LOG("Kernel does not support seccomp syscall!"); 2578 } 2579 ASSERT_EQ(0, ret) { 2580 TH_LOG("Could install filter on all threads!"); 2581 } 2582 2583 /* Tell the siblings to test the policy */ 2584 pthread_mutex_lock(&self->mutex); 2585 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2586 TH_LOG("cond broadcast non-zero"); 2587 } 2588 pthread_mutex_unlock(&self->mutex); 2589 2590 /* Ensure they are both killed and don't exit cleanly. */ 2591 PTHREAD_JOIN(self->sibling[0].tid, &status); 2592 EXPECT_EQ(0x0, (long)status); 2593 PTHREAD_JOIN(self->sibling[1].tid, &status); 2594 EXPECT_EQ(0x0, (long)status); 2595 } 2596 2597 TEST_F(TSYNC, two_siblings_with_one_divergence) 2598 { 2599 long ret; 2600 void *status; 2601 2602 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2603 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2604 } 2605 2606 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2607 ASSERT_NE(ENOSYS, errno) { 2608 TH_LOG("Kernel does not support seccomp syscall!"); 2609 } 2610 ASSERT_EQ(0, ret) { 2611 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2612 } 2613 self->sibling[0].diverge = 1; 2614 tsync_start_sibling(&self->sibling[0]); 2615 tsync_start_sibling(&self->sibling[1]); 2616 2617 while (self->sibling_count < TSYNC_SIBLINGS) { 2618 sem_wait(&self->started); 2619 self->sibling_count++; 2620 } 2621 2622 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2623 &self->apply_prog); 2624 ASSERT_EQ(self->sibling[0].system_tid, ret) { 2625 TH_LOG("Did not fail on diverged sibling."); 2626 } 2627 2628 /* Wake the threads */ 2629 pthread_mutex_lock(&self->mutex); 2630 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2631 TH_LOG("cond broadcast non-zero"); 2632 } 2633 pthread_mutex_unlock(&self->mutex); 2634 2635 /* Ensure they are both unkilled. */ 2636 PTHREAD_JOIN(self->sibling[0].tid, &status); 2637 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2638 PTHREAD_JOIN(self->sibling[1].tid, &status); 2639 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2640 } 2641 2642 TEST_F(TSYNC, two_siblings_not_under_filter) 2643 { 2644 long ret, sib; 2645 void *status; 2646 struct timespec delay = { .tv_nsec = 100000000 }; 2647 2648 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2649 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2650 } 2651 2652 /* 2653 * Sibling 0 will have its own seccomp policy 2654 * and Sibling 1 will not be under seccomp at 2655 * all. Sibling 1 will enter seccomp and 0 2656 * will cause failure. 2657 */ 2658 self->sibling[0].diverge = 1; 2659 tsync_start_sibling(&self->sibling[0]); 2660 tsync_start_sibling(&self->sibling[1]); 2661 2662 while (self->sibling_count < TSYNC_SIBLINGS) { 2663 sem_wait(&self->started); 2664 self->sibling_count++; 2665 } 2666 2667 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); 2668 ASSERT_NE(ENOSYS, errno) { 2669 TH_LOG("Kernel does not support seccomp syscall!"); 2670 } 2671 ASSERT_EQ(0, ret) { 2672 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); 2673 } 2674 2675 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2676 &self->apply_prog); 2677 ASSERT_EQ(ret, self->sibling[0].system_tid) { 2678 TH_LOG("Did not fail on diverged sibling."); 2679 } 2680 sib = 1; 2681 if (ret == self->sibling[0].system_tid) 2682 sib = 0; 2683 2684 pthread_mutex_lock(&self->mutex); 2685 2686 /* Increment the other siblings num_waits so we can clean up 2687 * the one we just saw. 2688 */ 2689 self->sibling[!sib].num_waits += 1; 2690 2691 /* Signal the thread to clean up*/ 2692 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2693 TH_LOG("cond broadcast non-zero"); 2694 } 2695 pthread_mutex_unlock(&self->mutex); 2696 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2697 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); 2698 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2699 while (!kill(self->sibling[sib].system_tid, 0)) 2700 nanosleep(&delay, NULL); 2701 /* Switch to the remaining sibling */ 2702 sib = !sib; 2703 2704 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2705 &self->apply_prog); 2706 ASSERT_EQ(0, ret) { 2707 TH_LOG("Expected the remaining sibling to sync"); 2708 }; 2709 2710 pthread_mutex_lock(&self->mutex); 2711 2712 /* If remaining sibling didn't have a chance to wake up during 2713 * the first broadcast, manually reduce the num_waits now. 2714 */ 2715 if (self->sibling[sib].num_waits > 1) 2716 self->sibling[sib].num_waits = 1; 2717 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { 2718 TH_LOG("cond broadcast non-zero"); 2719 } 2720 pthread_mutex_unlock(&self->mutex); 2721 PTHREAD_JOIN(self->sibling[sib].tid, &status); 2722 EXPECT_EQ(0, (long)status); 2723 /* Poll for actual task death. pthread_join doesn't guarantee it. */ 2724 while (!kill(self->sibling[sib].system_tid, 0)) 2725 nanosleep(&delay, NULL); 2726 2727 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, 2728 &self->apply_prog); 2729 ASSERT_EQ(0, ret); /* just us chickens */ 2730 } 2731 2732 /* Make sure restarted syscalls are seen directly as "restart_syscall". */ 2733 TEST(syscall_restart) 2734 { 2735 long ret; 2736 unsigned long msg; 2737 pid_t child_pid; 2738 int pipefd[2]; 2739 int status; 2740 siginfo_t info = { }; 2741 struct sock_filter filter[] = { 2742 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2743 offsetof(struct seccomp_data, nr)), 2744 2745 #ifdef __NR_sigreturn 2746 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), 2747 #endif 2748 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), 2749 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), 2750 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), 2751 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0), 2752 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), 2753 2754 /* Allow __NR_write for easy logging. */ 2755 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), 2756 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2757 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2758 /* The nanosleep jump target. */ 2759 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), 2760 /* The restart_syscall jump target. */ 2761 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), 2762 }; 2763 struct sock_fprog prog = { 2764 .len = (unsigned short)ARRAY_SIZE(filter), 2765 .filter = filter, 2766 }; 2767 #if defined(__arm__) 2768 struct utsname utsbuf; 2769 #endif 2770 2771 ASSERT_EQ(0, pipe(pipefd)); 2772 2773 child_pid = fork(); 2774 ASSERT_LE(0, child_pid); 2775 if (child_pid == 0) { 2776 /* Child uses EXPECT not ASSERT to deliver status correctly. */ 2777 char buf = ' '; 2778 struct timespec timeout = { }; 2779 2780 /* Attach parent as tracer and stop. */ 2781 EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); 2782 EXPECT_EQ(0, raise(SIGSTOP)); 2783 2784 EXPECT_EQ(0, close(pipefd[1])); 2785 2786 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 2787 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 2788 } 2789 2790 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); 2791 EXPECT_EQ(0, ret) { 2792 TH_LOG("Failed to install filter!"); 2793 } 2794 2795 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2796 TH_LOG("Failed to read() sync from parent"); 2797 } 2798 EXPECT_EQ('.', buf) { 2799 TH_LOG("Failed to get sync data from read()"); 2800 } 2801 2802 /* Start nanosleep to be interrupted. */ 2803 timeout.tv_sec = 1; 2804 errno = 0; 2805 EXPECT_EQ(0, nanosleep(&timeout, NULL)) { 2806 TH_LOG("Call to nanosleep() failed (errno %d)", errno); 2807 } 2808 2809 /* Read final sync from parent. */ 2810 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { 2811 TH_LOG("Failed final read() from parent"); 2812 } 2813 EXPECT_EQ('!', buf) { 2814 TH_LOG("Failed to get final data from read()"); 2815 } 2816 2817 /* Directly report the status of our test harness results. */ 2818 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS 2819 : EXIT_FAILURE); 2820 } 2821 EXPECT_EQ(0, close(pipefd[0])); 2822 2823 /* Attach to child, setup options, and release. */ 2824 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2825 ASSERT_EQ(true, WIFSTOPPED(status)); 2826 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, 2827 PTRACE_O_TRACESECCOMP)); 2828 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2829 ASSERT_EQ(1, write(pipefd[1], ".", 1)); 2830 2831 /* Wait for nanosleep() to start. */ 2832 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2833 ASSERT_EQ(true, WIFSTOPPED(status)); 2834 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2835 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2836 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2837 ASSERT_EQ(0x100, msg); 2838 EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid)); 2839 2840 /* Might as well check siginfo for sanity while we're here. */ 2841 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2842 ASSERT_EQ(SIGTRAP, info.si_signo); 2843 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); 2844 EXPECT_EQ(0, info.si_errno); 2845 EXPECT_EQ(getuid(), info.si_uid); 2846 /* Verify signal delivery came from child (seccomp-triggered). */ 2847 EXPECT_EQ(child_pid, info.si_pid); 2848 2849 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ 2850 ASSERT_EQ(0, kill(child_pid, SIGSTOP)); 2851 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2852 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2853 ASSERT_EQ(true, WIFSTOPPED(status)); 2854 ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); 2855 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); 2856 /* 2857 * There is no siginfo on SIGSTOP any more, so we can't verify 2858 * signal delivery came from parent now (getpid() == info.si_pid). 2859 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com 2860 * At least verify the SIGSTOP via PTRACE_GETSIGINFO. 2861 */ 2862 EXPECT_EQ(SIGSTOP, info.si_signo); 2863 2864 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ 2865 ASSERT_EQ(0, kill(child_pid, SIGCONT)); 2866 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2867 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2868 ASSERT_EQ(true, WIFSTOPPED(status)); 2869 ASSERT_EQ(SIGCONT, WSTOPSIG(status)); 2870 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2871 2872 /* Wait for restart_syscall() to start. */ 2873 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2874 ASSERT_EQ(true, WIFSTOPPED(status)); 2875 ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); 2876 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); 2877 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); 2878 2879 ASSERT_EQ(0x200, msg); 2880 ret = get_syscall(_metadata, child_pid); 2881 #if defined(__arm__) 2882 /* 2883 * FIXME: 2884 * - native ARM registers do NOT expose true syscall. 2885 * - compat ARM registers on ARM64 DO expose true syscall. 2886 */ 2887 ASSERT_EQ(0, uname(&utsbuf)); 2888 if (strncmp(utsbuf.machine, "arm", 3) == 0) { 2889 EXPECT_EQ(__NR_nanosleep, ret); 2890 } else 2891 #endif 2892 { 2893 EXPECT_EQ(__NR_restart_syscall, ret); 2894 } 2895 2896 /* Write again to end test. */ 2897 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); 2898 ASSERT_EQ(1, write(pipefd[1], "!", 1)); 2899 EXPECT_EQ(0, close(pipefd[1])); 2900 2901 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); 2902 if (WIFSIGNALED(status) || WEXITSTATUS(status)) 2903 _metadata->passed = 0; 2904 } 2905 2906 TEST_SIGNAL(filter_flag_log, SIGSYS) 2907 { 2908 struct sock_filter allow_filter[] = { 2909 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2910 }; 2911 struct sock_filter kill_filter[] = { 2912 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, 2913 offsetof(struct seccomp_data, nr)), 2914 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), 2915 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), 2916 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 2917 }; 2918 struct sock_fprog allow_prog = { 2919 .len = (unsigned short)ARRAY_SIZE(allow_filter), 2920 .filter = allow_filter, 2921 }; 2922 struct sock_fprog kill_prog = { 2923 .len = (unsigned short)ARRAY_SIZE(kill_filter), 2924 .filter = kill_filter, 2925 }; 2926 long ret; 2927 pid_t parent = getppid(); 2928 2929 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 2930 ASSERT_EQ(0, ret); 2931 2932 /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ 2933 ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, 2934 &allow_prog); 2935 ASSERT_NE(ENOSYS, errno) { 2936 TH_LOG("Kernel does not support seccomp syscall!"); 2937 } 2938 EXPECT_NE(0, ret) { 2939 TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); 2940 } 2941 EXPECT_EQ(EINVAL, errno) { 2942 TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); 2943 } 2944 2945 /* Verify that a simple, permissive filter can be added with no flags */ 2946 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); 2947 EXPECT_EQ(0, ret); 2948 2949 /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ 2950 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 2951 &allow_prog); 2952 ASSERT_NE(EINVAL, errno) { 2953 TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); 2954 } 2955 EXPECT_EQ(0, ret); 2956 2957 /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ 2958 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, 2959 &kill_prog); 2960 EXPECT_EQ(0, ret); 2961 2962 EXPECT_EQ(parent, syscall(__NR_getppid)); 2963 /* getpid() should never return. */ 2964 EXPECT_EQ(0, syscall(__NR_getpid)); 2965 } 2966 2967 TEST(get_action_avail) 2968 { 2969 __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, 2970 SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, 2971 SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; 2972 __u32 unknown_action = 0x10000000U; 2973 int i; 2974 long ret; 2975 2976 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); 2977 ASSERT_NE(ENOSYS, errno) { 2978 TH_LOG("Kernel does not support seccomp syscall!"); 2979 } 2980 ASSERT_NE(EINVAL, errno) { 2981 TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); 2982 } 2983 EXPECT_EQ(ret, 0); 2984 2985 for (i = 0; i < ARRAY_SIZE(actions); i++) { 2986 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); 2987 EXPECT_EQ(ret, 0) { 2988 TH_LOG("Expected action (0x%X) not available!", 2989 actions[i]); 2990 } 2991 } 2992 2993 /* Check that an unknown action is handled properly (EOPNOTSUPP) */ 2994 ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); 2995 EXPECT_EQ(ret, -1); 2996 EXPECT_EQ(errno, EOPNOTSUPP); 2997 } 2998 2999 TEST(get_metadata) 3000 { 3001 pid_t pid; 3002 int pipefd[2]; 3003 char buf; 3004 struct seccomp_metadata md; 3005 long ret; 3006 3007 /* Only real root can get metadata. */ 3008 if (geteuid()) { 3009 XFAIL(return, "get_metadata requires real root"); 3010 return; 3011 } 3012 3013 ASSERT_EQ(0, pipe(pipefd)); 3014 3015 pid = fork(); 3016 ASSERT_GE(pid, 0); 3017 if (pid == 0) { 3018 struct sock_filter filter[] = { 3019 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3020 }; 3021 struct sock_fprog prog = { 3022 .len = (unsigned short)ARRAY_SIZE(filter), 3023 .filter = filter, 3024 }; 3025 3026 /* one with log, one without */ 3027 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 3028 SECCOMP_FILTER_FLAG_LOG, &prog)); 3029 EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog)); 3030 3031 EXPECT_EQ(0, close(pipefd[0])); 3032 ASSERT_EQ(1, write(pipefd[1], "1", 1)); 3033 ASSERT_EQ(0, close(pipefd[1])); 3034 3035 while (1) 3036 sleep(100); 3037 } 3038 3039 ASSERT_EQ(0, close(pipefd[1])); 3040 ASSERT_EQ(1, read(pipefd[0], &buf, 1)); 3041 3042 ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); 3043 ASSERT_EQ(pid, waitpid(pid, NULL, 0)); 3044 3045 /* Past here must not use ASSERT or child process is never killed. */ 3046 3047 md.filter_off = 0; 3048 errno = 0; 3049 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3050 EXPECT_EQ(sizeof(md), ret) { 3051 if (errno == EINVAL) 3052 XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); 3053 } 3054 3055 EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); 3056 EXPECT_EQ(md.filter_off, 0); 3057 3058 md.filter_off = 1; 3059 ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); 3060 EXPECT_EQ(sizeof(md), ret); 3061 EXPECT_EQ(md.flags, 0); 3062 EXPECT_EQ(md.filter_off, 1); 3063 3064 skip: 3065 ASSERT_EQ(0, kill(pid, SIGKILL)); 3066 } 3067 3068 static int user_trap_syscall(int nr, unsigned int flags) 3069 { 3070 struct sock_filter filter[] = { 3071 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 3072 offsetof(struct seccomp_data, nr)), 3073 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), 3074 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), 3075 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), 3076 }; 3077 3078 struct sock_fprog prog = { 3079 .len = (unsigned short)ARRAY_SIZE(filter), 3080 .filter = filter, 3081 }; 3082 3083 return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); 3084 } 3085 3086 #define USER_NOTIF_MAGIC 116983961184613L 3087 TEST(user_notification_basic) 3088 { 3089 pid_t pid; 3090 long ret; 3091 int status, listener; 3092 struct seccomp_notif req = {}; 3093 struct seccomp_notif_resp resp = {}; 3094 struct pollfd pollfd; 3095 3096 struct sock_filter filter[] = { 3097 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), 3098 }; 3099 struct sock_fprog prog = { 3100 .len = (unsigned short)ARRAY_SIZE(filter), 3101 .filter = filter, 3102 }; 3103 3104 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3105 ASSERT_EQ(0, ret) { 3106 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3107 } 3108 3109 pid = fork(); 3110 ASSERT_GE(pid, 0); 3111 3112 /* Check that we get -ENOSYS with no listener attached */ 3113 if (pid == 0) { 3114 if (user_trap_syscall(__NR_getppid, 0) < 0) 3115 exit(1); 3116 ret = syscall(__NR_getppid); 3117 exit(ret >= 0 || errno != ENOSYS); 3118 } 3119 3120 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3121 EXPECT_EQ(true, WIFEXITED(status)); 3122 EXPECT_EQ(0, WEXITSTATUS(status)); 3123 3124 /* Add some no-op filters for grins. */ 3125 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3126 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3127 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3128 EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); 3129 3130 /* Check that the basic notification machinery works */ 3131 listener = user_trap_syscall(__NR_getppid, 3132 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3133 ASSERT_GE(listener, 0); 3134 3135 /* Installing a second listener in the chain should EBUSY */ 3136 EXPECT_EQ(user_trap_syscall(__NR_getppid, 3137 SECCOMP_FILTER_FLAG_NEW_LISTENER), 3138 -1); 3139 EXPECT_EQ(errno, EBUSY); 3140 3141 pid = fork(); 3142 ASSERT_GE(pid, 0); 3143 3144 if (pid == 0) { 3145 ret = syscall(__NR_getppid); 3146 exit(ret != USER_NOTIF_MAGIC); 3147 } 3148 3149 pollfd.fd = listener; 3150 pollfd.events = POLLIN | POLLOUT; 3151 3152 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3153 EXPECT_EQ(pollfd.revents, POLLIN); 3154 3155 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3156 3157 pollfd.fd = listener; 3158 pollfd.events = POLLIN | POLLOUT; 3159 3160 EXPECT_GT(poll(&pollfd, 1, -1), 0); 3161 EXPECT_EQ(pollfd.revents, POLLOUT); 3162 3163 EXPECT_EQ(req.data.nr, __NR_getppid); 3164 3165 resp.id = req.id; 3166 resp.error = 0; 3167 resp.val = USER_NOTIF_MAGIC; 3168 3169 /* check that we make sure flags == 0 */ 3170 resp.flags = 1; 3171 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3172 EXPECT_EQ(errno, EINVAL); 3173 3174 resp.flags = 0; 3175 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3176 3177 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3178 EXPECT_EQ(true, WIFEXITED(status)); 3179 EXPECT_EQ(0, WEXITSTATUS(status)); 3180 } 3181 3182 TEST(user_notification_kill_in_middle) 3183 { 3184 pid_t pid; 3185 long ret; 3186 int listener; 3187 struct seccomp_notif req = {}; 3188 struct seccomp_notif_resp resp = {}; 3189 3190 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3191 ASSERT_EQ(0, ret) { 3192 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3193 } 3194 3195 listener = user_trap_syscall(__NR_getppid, 3196 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3197 ASSERT_GE(listener, 0); 3198 3199 /* 3200 * Check that nothing bad happens when we kill the task in the middle 3201 * of a syscall. 3202 */ 3203 pid = fork(); 3204 ASSERT_GE(pid, 0); 3205 3206 if (pid == 0) { 3207 ret = syscall(__NR_getppid); 3208 exit(ret != USER_NOTIF_MAGIC); 3209 } 3210 3211 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3212 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); 3213 3214 EXPECT_EQ(kill(pid, SIGKILL), 0); 3215 EXPECT_EQ(waitpid(pid, NULL, 0), pid); 3216 3217 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); 3218 3219 resp.id = req.id; 3220 ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); 3221 EXPECT_EQ(ret, -1); 3222 EXPECT_EQ(errno, ENOENT); 3223 } 3224 3225 static int handled = -1; 3226 3227 static void signal_handler(int signal) 3228 { 3229 if (write(handled, "c", 1) != 1) 3230 perror("write from signal"); 3231 } 3232 3233 TEST(user_notification_signal) 3234 { 3235 pid_t pid; 3236 long ret; 3237 int status, listener, sk_pair[2]; 3238 struct seccomp_notif req = {}; 3239 struct seccomp_notif_resp resp = {}; 3240 char c; 3241 3242 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3243 ASSERT_EQ(0, ret) { 3244 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3245 } 3246 3247 ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); 3248 3249 listener = user_trap_syscall(__NR_gettid, 3250 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3251 ASSERT_GE(listener, 0); 3252 3253 pid = fork(); 3254 ASSERT_GE(pid, 0); 3255 3256 if (pid == 0) { 3257 close(sk_pair[0]); 3258 handled = sk_pair[1]; 3259 if (signal(SIGUSR1, signal_handler) == SIG_ERR) { 3260 perror("signal"); 3261 exit(1); 3262 } 3263 /* 3264 * ERESTARTSYS behavior is a bit hard to test, because we need 3265 * to rely on a signal that has not yet been handled. Let's at 3266 * least check that the error code gets propagated through, and 3267 * hope that it doesn't break when there is actually a signal :) 3268 */ 3269 ret = syscall(__NR_gettid); 3270 exit(!(ret == -1 && errno == 512)); 3271 } 3272 3273 close(sk_pair[1]); 3274 3275 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3276 3277 EXPECT_EQ(kill(pid, SIGUSR1), 0); 3278 3279 /* 3280 * Make sure the signal really is delivered, which means we're not 3281 * stuck in the user notification code any more and the notification 3282 * should be dead. 3283 */ 3284 EXPECT_EQ(read(sk_pair[0], &c, 1), 1); 3285 3286 resp.id = req.id; 3287 resp.error = -EPERM; 3288 resp.val = 0; 3289 3290 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); 3291 EXPECT_EQ(errno, ENOENT); 3292 3293 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3294 3295 resp.id = req.id; 3296 resp.error = -512; /* -ERESTARTSYS */ 3297 resp.val = 0; 3298 3299 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3300 3301 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3302 EXPECT_EQ(true, WIFEXITED(status)); 3303 EXPECT_EQ(0, WEXITSTATUS(status)); 3304 } 3305 3306 TEST(user_notification_closed_listener) 3307 { 3308 pid_t pid; 3309 long ret; 3310 int status, listener; 3311 3312 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 3313 ASSERT_EQ(0, ret) { 3314 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3315 } 3316 3317 listener = user_trap_syscall(__NR_getppid, 3318 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3319 ASSERT_GE(listener, 0); 3320 3321 /* 3322 * Check that we get an ENOSYS when the listener is closed. 3323 */ 3324 pid = fork(); 3325 ASSERT_GE(pid, 0); 3326 if (pid == 0) { 3327 close(listener); 3328 ret = syscall(__NR_getppid); 3329 exit(ret != -1 && errno != ENOSYS); 3330 } 3331 3332 close(listener); 3333 3334 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3335 EXPECT_EQ(true, WIFEXITED(status)); 3336 EXPECT_EQ(0, WEXITSTATUS(status)); 3337 } 3338 3339 /* 3340 * Check that a pid in a child namespace still shows up as valid in ours. 3341 */ 3342 TEST(user_notification_child_pid_ns) 3343 { 3344 pid_t pid; 3345 int status, listener; 3346 struct seccomp_notif req = {}; 3347 struct seccomp_notif_resp resp = {}; 3348 3349 ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0); 3350 3351 listener = user_trap_syscall(__NR_getppid, 3352 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3353 ASSERT_GE(listener, 0); 3354 3355 pid = fork(); 3356 ASSERT_GE(pid, 0); 3357 3358 if (pid == 0) 3359 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3360 3361 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3362 EXPECT_EQ(req.pid, pid); 3363 3364 resp.id = req.id; 3365 resp.error = 0; 3366 resp.val = USER_NOTIF_MAGIC; 3367 3368 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3369 3370 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3371 EXPECT_EQ(true, WIFEXITED(status)); 3372 EXPECT_EQ(0, WEXITSTATUS(status)); 3373 close(listener); 3374 } 3375 3376 /* 3377 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. 3378 * invalid. 3379 */ 3380 TEST(user_notification_sibling_pid_ns) 3381 { 3382 pid_t pid, pid2; 3383 int status, listener; 3384 struct seccomp_notif req = {}; 3385 struct seccomp_notif_resp resp = {}; 3386 3387 ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) { 3388 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); 3389 } 3390 3391 listener = user_trap_syscall(__NR_getppid, 3392 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3393 ASSERT_GE(listener, 0); 3394 3395 pid = fork(); 3396 ASSERT_GE(pid, 0); 3397 3398 if (pid == 0) { 3399 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3400 3401 pid2 = fork(); 3402 ASSERT_GE(pid2, 0); 3403 3404 if (pid2 == 0) 3405 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3406 3407 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3408 EXPECT_EQ(true, WIFEXITED(status)); 3409 EXPECT_EQ(0, WEXITSTATUS(status)); 3410 exit(WEXITSTATUS(status)); 3411 } 3412 3413 /* Create the sibling ns, and sibling in it. */ 3414 ASSERT_EQ(unshare(CLONE_NEWPID), 0); 3415 ASSERT_EQ(errno, 0); 3416 3417 pid2 = fork(); 3418 ASSERT_GE(pid2, 0); 3419 3420 if (pid2 == 0) { 3421 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3422 /* 3423 * The pid should be 0, i.e. the task is in some namespace that 3424 * we can't "see". 3425 */ 3426 EXPECT_EQ(req.pid, 0); 3427 3428 resp.id = req.id; 3429 resp.error = 0; 3430 resp.val = USER_NOTIF_MAGIC; 3431 3432 ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3433 exit(0); 3434 } 3435 3436 close(listener); 3437 3438 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3439 EXPECT_EQ(true, WIFEXITED(status)); 3440 EXPECT_EQ(0, WEXITSTATUS(status)); 3441 3442 EXPECT_EQ(waitpid(pid2, &status, 0), pid2); 3443 EXPECT_EQ(true, WIFEXITED(status)); 3444 EXPECT_EQ(0, WEXITSTATUS(status)); 3445 } 3446 3447 TEST(user_notification_fault_recv) 3448 { 3449 pid_t pid; 3450 int status, listener; 3451 struct seccomp_notif req = {}; 3452 struct seccomp_notif_resp resp = {}; 3453 3454 ASSERT_EQ(unshare(CLONE_NEWUSER), 0); 3455 3456 listener = user_trap_syscall(__NR_getppid, 3457 SECCOMP_FILTER_FLAG_NEW_LISTENER); 3458 ASSERT_GE(listener, 0); 3459 3460 pid = fork(); 3461 ASSERT_GE(pid, 0); 3462 3463 if (pid == 0) 3464 exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); 3465 3466 /* Do a bad recv() */ 3467 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); 3468 EXPECT_EQ(errno, EFAULT); 3469 3470 /* We should still be able to receive this notification, though. */ 3471 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); 3472 EXPECT_EQ(req.pid, pid); 3473 3474 resp.id = req.id; 3475 resp.error = 0; 3476 resp.val = USER_NOTIF_MAGIC; 3477 3478 EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); 3479 3480 EXPECT_EQ(waitpid(pid, &status, 0), pid); 3481 EXPECT_EQ(true, WIFEXITED(status)); 3482 EXPECT_EQ(0, WEXITSTATUS(status)); 3483 } 3484 3485 TEST(seccomp_get_notif_sizes) 3486 { 3487 struct seccomp_notif_sizes sizes; 3488 3489 ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); 3490 EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); 3491 EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); 3492 } 3493 3494 /* 3495 * TODO: 3496 * - add microbenchmarks 3497 * - expand NNP testing 3498 * - better arch-specific TRACE and TRAP handlers. 3499 * - endianness checking when appropriate 3500 * - 64-bit arg prodding 3501 * - arch value testing (x86 modes especially) 3502 * - verify that FILTER_FLAG_LOG filters generate log messages 3503 * - verify that RET_LOG generates log messages 3504 * - ... 3505 */ 3506 3507 TEST_HARNESS_MAIN 3508