1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2026 Christian Brauner <brauner@kernel.org> 3 4 #define _GNU_SOURCE 5 #include <errno.h> 6 #include <linux/types.h> 7 #include <poll.h> 8 #include <pthread.h> 9 #include <sched.h> 10 #include <signal.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 #include <syscall.h> 15 #include <sys/ioctl.h> 16 #include <sys/prctl.h> 17 #include <sys/socket.h> 18 #include <sys/types.h> 19 #include <sys/wait.h> 20 #include <unistd.h> 21 22 #include "pidfd.h" 23 #include "kselftest_harness.h" 24 25 #ifndef CLONE_AUTOREAP 26 #define CLONE_AUTOREAP (1ULL << 34) 27 #endif 28 29 #ifndef CLONE_NNP 30 #define CLONE_NNP (1ULL << 35) 31 #endif 32 33 #ifndef CLONE_PIDFD_AUTOKILL 34 #define CLONE_PIDFD_AUTOKILL (1ULL << 36) 35 #endif 36 37 #ifndef _LINUX_CAPABILITY_VERSION_3 38 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 39 #endif 40 41 struct cap_header { 42 __u32 version; 43 int pid; 44 }; 45 46 struct cap_data { 47 __u32 effective; 48 __u32 permitted; 49 __u32 inheritable; 50 }; 51 52 static int drop_all_caps(void) 53 { 54 struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 }; 55 struct cap_data data[2] = {}; 56 57 return syscall(__NR_capset, &hdr, data); 58 } 59 60 static pid_t create_autoreap_child(int *pidfd) 61 { 62 struct __clone_args args = { 63 .flags = CLONE_PIDFD | CLONE_AUTOREAP, 64 .exit_signal = 0, 65 .pidfd = ptr_to_u64(pidfd), 66 }; 67 68 return sys_clone3(&args, sizeof(args)); 69 } 70 71 /* 72 * Test that CLONE_AUTOREAP works without CLONE_PIDFD (fire-and-forget). 73 */ 74 TEST(autoreap_without_pidfd) 75 { 76 struct __clone_args args = { 77 .flags = CLONE_AUTOREAP, 78 .exit_signal = 0, 79 }; 80 pid_t pid; 81 int ret; 82 83 pid = sys_clone3(&args, sizeof(args)); 84 if (pid < 0 && errno == EINVAL) 85 SKIP(return, "CLONE_AUTOREAP not supported"); 86 ASSERT_GE(pid, 0); 87 88 if (pid == 0) 89 _exit(0); 90 91 /* 92 * Give the child a moment to exit and be autoreaped. 93 * Then verify no zombie remains. 94 */ 95 usleep(200000); 96 ret = waitpid(pid, NULL, WNOHANG); 97 ASSERT_EQ(ret, -1); 98 ASSERT_EQ(errno, ECHILD); 99 } 100 101 /* 102 * Test that CLONE_AUTOREAP with a non-zero exit_signal fails. 103 */ 104 TEST(autoreap_rejects_exit_signal) 105 { 106 struct __clone_args args = { 107 .flags = CLONE_AUTOREAP, 108 .exit_signal = SIGCHLD, 109 }; 110 pid_t pid; 111 112 pid = sys_clone3(&args, sizeof(args)); 113 ASSERT_EQ(pid, -1); 114 ASSERT_EQ(errno, EINVAL); 115 } 116 117 /* 118 * Test that CLONE_AUTOREAP with CLONE_PARENT fails. 119 */ 120 TEST(autoreap_rejects_parent) 121 { 122 struct __clone_args args = { 123 .flags = CLONE_AUTOREAP | CLONE_PARENT, 124 .exit_signal = 0, 125 }; 126 pid_t pid; 127 128 pid = sys_clone3(&args, sizeof(args)); 129 ASSERT_EQ(pid, -1); 130 ASSERT_EQ(errno, EINVAL); 131 } 132 133 /* 134 * Test that CLONE_AUTOREAP with CLONE_THREAD fails. 135 */ 136 TEST(autoreap_rejects_thread) 137 { 138 struct __clone_args args = { 139 .flags = CLONE_AUTOREAP | CLONE_THREAD | 140 CLONE_SIGHAND | CLONE_VM, 141 .exit_signal = 0, 142 }; 143 pid_t pid; 144 145 pid = sys_clone3(&args, sizeof(args)); 146 ASSERT_EQ(pid, -1); 147 ASSERT_EQ(errno, EINVAL); 148 } 149 150 /* 151 * Basic test: create an autoreap child, let it exit, verify: 152 * - pidfd becomes readable (poll returns POLLIN) 153 * - PIDFD_GET_INFO returns the correct exit code 154 * - waitpid() returns -1/ECHILD (no zombie) 155 */ 156 TEST(autoreap_basic) 157 { 158 struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 159 int pidfd = -1, ret; 160 struct pollfd pfd; 161 pid_t pid; 162 163 pid = create_autoreap_child(&pidfd); 164 if (pid < 0 && errno == EINVAL) 165 SKIP(return, "CLONE_AUTOREAP not supported"); 166 ASSERT_GE(pid, 0); 167 168 if (pid == 0) 169 _exit(42); 170 171 ASSERT_GE(pidfd, 0); 172 173 /* Wait for the child to exit via pidfd poll. */ 174 pfd.fd = pidfd; 175 pfd.events = POLLIN; 176 ret = poll(&pfd, 1, 5000); 177 ASSERT_EQ(ret, 1); 178 ASSERT_TRUE(pfd.revents & POLLIN); 179 180 /* Verify exit info via PIDFD_GET_INFO. */ 181 ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 182 ASSERT_EQ(ret, 0); 183 ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 184 /* 185 * exit_code is in waitpid format: for _exit(42), 186 * WIFEXITED is true and WEXITSTATUS is 42. 187 */ 188 ASSERT_TRUE(WIFEXITED(info.exit_code)); 189 ASSERT_EQ(WEXITSTATUS(info.exit_code), 42); 190 191 /* Verify no zombie: waitpid should fail with ECHILD. */ 192 ret = waitpid(pid, NULL, WNOHANG); 193 ASSERT_EQ(ret, -1); 194 ASSERT_EQ(errno, ECHILD); 195 196 close(pidfd); 197 } 198 199 /* 200 * Test that an autoreap child killed by a signal reports 201 * the correct exit info. 202 */ 203 TEST(autoreap_signaled) 204 { 205 struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 206 int pidfd = -1, ret; 207 struct pollfd pfd; 208 pid_t pid; 209 210 pid = create_autoreap_child(&pidfd); 211 if (pid < 0 && errno == EINVAL) 212 SKIP(return, "CLONE_AUTOREAP not supported"); 213 ASSERT_GE(pid, 0); 214 215 if (pid == 0) { 216 pause(); 217 _exit(1); 218 } 219 220 ASSERT_GE(pidfd, 0); 221 222 /* Kill the child. */ 223 ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); 224 ASSERT_EQ(ret, 0); 225 226 /* Wait for exit via pidfd. */ 227 pfd.fd = pidfd; 228 pfd.events = POLLIN; 229 ret = poll(&pfd, 1, 5000); 230 ASSERT_EQ(ret, 1); 231 ASSERT_TRUE(pfd.revents & POLLIN); 232 233 /* Verify signal info. */ 234 ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 235 ASSERT_EQ(ret, 0); 236 ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 237 ASSERT_TRUE(WIFSIGNALED(info.exit_code)); 238 ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); 239 240 /* No zombie. */ 241 ret = waitpid(pid, NULL, WNOHANG); 242 ASSERT_EQ(ret, -1); 243 ASSERT_EQ(errno, ECHILD); 244 245 close(pidfd); 246 } 247 248 /* 249 * Test autoreap survives reparenting: middle process creates an 250 * autoreap grandchild, then exits. The grandchild gets reparented 251 * to us (the grandparent, which is a subreaper). When the grandchild 252 * exits, it should still be autoreaped - no zombie under us. 253 */ 254 TEST(autoreap_reparent) 255 { 256 int ipc_sockets[2], ret; 257 int pidfd = -1; 258 struct pollfd pfd; 259 pid_t mid_pid, grandchild_pid; 260 char buf[32] = {}; 261 262 /* Make ourselves a subreaper so reparented children come to us. */ 263 ret = prctl(PR_SET_CHILD_SUBREAPER, 1); 264 ASSERT_EQ(ret, 0); 265 266 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 267 ASSERT_EQ(ret, 0); 268 269 mid_pid = fork(); 270 ASSERT_GE(mid_pid, 0); 271 272 if (mid_pid == 0) { 273 /* Middle child: create an autoreap grandchild. */ 274 int gc_pidfd = -1; 275 276 close(ipc_sockets[0]); 277 278 grandchild_pid = create_autoreap_child(&gc_pidfd); 279 if (grandchild_pid < 0) { 280 write_nointr(ipc_sockets[1], "E", 1); 281 close(ipc_sockets[1]); 282 _exit(1); 283 } 284 285 if (grandchild_pid == 0) { 286 /* Grandchild: wait for signal to exit. */ 287 close(ipc_sockets[1]); 288 if (gc_pidfd >= 0) 289 close(gc_pidfd); 290 pause(); 291 _exit(0); 292 } 293 294 /* Send grandchild PID to grandparent. */ 295 snprintf(buf, sizeof(buf), "%d", grandchild_pid); 296 write_nointr(ipc_sockets[1], buf, strlen(buf)); 297 close(ipc_sockets[1]); 298 if (gc_pidfd >= 0) 299 close(gc_pidfd); 300 301 /* Middle child exits, grandchild gets reparented. */ 302 _exit(0); 303 } 304 305 close(ipc_sockets[1]); 306 307 /* Read grandchild's PID. */ 308 ret = read_nointr(ipc_sockets[0], buf, sizeof(buf) - 1); 309 close(ipc_sockets[0]); 310 ASSERT_GT(ret, 0); 311 312 if (buf[0] == 'E') { 313 waitpid(mid_pid, NULL, 0); 314 prctl(PR_SET_CHILD_SUBREAPER, 0); 315 SKIP(return, "CLONE_AUTOREAP not supported"); 316 } 317 318 grandchild_pid = atoi(buf); 319 ASSERT_GT(grandchild_pid, 0); 320 321 /* Wait for the middle child to exit. */ 322 ret = waitpid(mid_pid, NULL, 0); 323 ASSERT_EQ(ret, mid_pid); 324 325 /* 326 * Now the grandchild is reparented to us (subreaper). 327 * Open a pidfd for the grandchild and kill it. 328 */ 329 pidfd = sys_pidfd_open(grandchild_pid, 0); 330 ASSERT_GE(pidfd, 0); 331 332 ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); 333 ASSERT_EQ(ret, 0); 334 335 /* Wait for it to exit via pidfd poll. */ 336 pfd.fd = pidfd; 337 pfd.events = POLLIN; 338 ret = poll(&pfd, 1, 5000); 339 ASSERT_EQ(ret, 1); 340 ASSERT_TRUE(pfd.revents & POLLIN); 341 342 /* 343 * The grandchild should have been autoreaped even though 344 * we (the new parent) haven't set SA_NOCLDWAIT. 345 * waitpid should return -1/ECHILD. 346 */ 347 ret = waitpid(grandchild_pid, NULL, WNOHANG); 348 EXPECT_EQ(ret, -1); 349 EXPECT_EQ(errno, ECHILD); 350 351 close(pidfd); 352 353 /* Clean up subreaper status. */ 354 prctl(PR_SET_CHILD_SUBREAPER, 0); 355 } 356 357 static int thread_sock_fd; 358 359 static void *thread_func(void *arg) 360 { 361 /* Signal parent we're running. */ 362 write_nointr(thread_sock_fd, "1", 1); 363 364 /* Give main thread time to call _exit() first. */ 365 usleep(200000); 366 367 return NULL; 368 } 369 370 /* 371 * Test that an autoreap child with multiple threads is properly 372 * autoreaped only after all threads have exited. 373 */ 374 TEST(autoreap_multithreaded) 375 { 376 struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 377 int ipc_sockets[2], ret; 378 int pidfd = -1; 379 struct pollfd pfd; 380 pid_t pid; 381 char c; 382 383 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 384 ASSERT_EQ(ret, 0); 385 386 pid = create_autoreap_child(&pidfd); 387 if (pid < 0 && errno == EINVAL) { 388 close(ipc_sockets[0]); 389 close(ipc_sockets[1]); 390 SKIP(return, "CLONE_AUTOREAP not supported"); 391 } 392 ASSERT_GE(pid, 0); 393 394 if (pid == 0) { 395 pthread_t thread; 396 397 close(ipc_sockets[0]); 398 399 /* 400 * Create a sub-thread that outlives the main thread. 401 * The thread signals readiness, then sleeps. 402 * The main thread waits briefly, then calls _exit(). 403 */ 404 thread_sock_fd = ipc_sockets[1]; 405 pthread_create(&thread, NULL, thread_func, NULL); 406 pthread_detach(thread); 407 408 /* Wait for thread to be running. */ 409 usleep(100000); 410 411 /* Main thread exits; sub-thread is still alive. */ 412 _exit(99); 413 } 414 415 close(ipc_sockets[1]); 416 417 /* Wait for the sub-thread to signal readiness. */ 418 ret = read_nointr(ipc_sockets[0], &c, 1); 419 close(ipc_sockets[0]); 420 ASSERT_EQ(ret, 1); 421 422 /* Wait for the process to fully exit via pidfd poll. */ 423 pfd.fd = pidfd; 424 pfd.events = POLLIN; 425 ret = poll(&pfd, 1, 5000); 426 ASSERT_EQ(ret, 1); 427 ASSERT_TRUE(pfd.revents & POLLIN); 428 429 /* Verify exit info. */ 430 ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 431 ASSERT_EQ(ret, 0); 432 ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 433 ASSERT_TRUE(WIFEXITED(info.exit_code)); 434 ASSERT_EQ(WEXITSTATUS(info.exit_code), 99); 435 436 /* No zombie. */ 437 ret = waitpid(pid, NULL, WNOHANG); 438 ASSERT_EQ(ret, -1); 439 ASSERT_EQ(errno, ECHILD); 440 441 close(pidfd); 442 } 443 444 /* 445 * Test that autoreap is NOT inherited by grandchildren. 446 */ 447 TEST(autoreap_no_inherit) 448 { 449 int ipc_sockets[2], ret; 450 int pidfd = -1; 451 pid_t pid; 452 char buf[2] = {}; 453 struct pollfd pfd; 454 455 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 456 ASSERT_EQ(ret, 0); 457 458 pid = create_autoreap_child(&pidfd); 459 if (pid < 0 && errno == EINVAL) { 460 close(ipc_sockets[0]); 461 close(ipc_sockets[1]); 462 SKIP(return, "CLONE_AUTOREAP not supported"); 463 } 464 ASSERT_GE(pid, 0); 465 466 if (pid == 0) { 467 pid_t gc; 468 int status; 469 470 close(ipc_sockets[0]); 471 472 /* Autoreap child forks a grandchild (without autoreap). */ 473 gc = fork(); 474 if (gc < 0) { 475 write_nointr(ipc_sockets[1], "E", 1); 476 _exit(1); 477 } 478 if (gc == 0) { 479 /* Grandchild: exit immediately. */ 480 close(ipc_sockets[1]); 481 _exit(77); 482 } 483 484 /* 485 * The grandchild should become a regular zombie 486 * since it was NOT created with CLONE_AUTOREAP. 487 * Wait for it to verify. 488 */ 489 ret = waitpid(gc, &status, 0); 490 if (ret == gc && WIFEXITED(status) && 491 WEXITSTATUS(status) == 77) { 492 write_nointr(ipc_sockets[1], "P", 1); 493 } else { 494 write_nointr(ipc_sockets[1], "F", 1); 495 } 496 close(ipc_sockets[1]); 497 _exit(0); 498 } 499 500 close(ipc_sockets[1]); 501 502 ret = read_nointr(ipc_sockets[0], buf, 1); 503 close(ipc_sockets[0]); 504 ASSERT_EQ(ret, 1); 505 506 /* 507 * 'P' means the autoreap child was able to waitpid() its 508 * grandchild (correct - grandchild should be a normal zombie, 509 * not autoreaped). 510 */ 511 ASSERT_EQ(buf[0], 'P'); 512 513 /* Wait for the autoreap child to exit. */ 514 pfd.fd = pidfd; 515 pfd.events = POLLIN; 516 ret = poll(&pfd, 1, 5000); 517 ASSERT_EQ(ret, 1); 518 519 /* Autoreap child itself should be autoreaped. */ 520 ret = waitpid(pid, NULL, WNOHANG); 521 ASSERT_EQ(ret, -1); 522 ASSERT_EQ(errno, ECHILD); 523 524 close(pidfd); 525 } 526 527 /* 528 * Test that CLONE_NNP sets no_new_privs on the child. 529 * The child checks via prctl(PR_GET_NO_NEW_PRIVS) and reports back. 530 * The parent must NOT have no_new_privs set afterwards. 531 */ 532 TEST(nnp_sets_no_new_privs) 533 { 534 struct __clone_args args = { 535 .flags = CLONE_PIDFD | CLONE_AUTOREAP | CLONE_NNP, 536 .exit_signal = 0, 537 }; 538 struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 539 int pidfd = -1, ret; 540 struct pollfd pfd; 541 pid_t pid; 542 543 /* Ensure parent does not already have no_new_privs. */ 544 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 545 ASSERT_EQ(ret, 0) { 546 TH_LOG("Parent already has no_new_privs set, cannot run test"); 547 } 548 549 args.pidfd = ptr_to_u64(&pidfd); 550 551 pid = sys_clone3(&args, sizeof(args)); 552 if (pid < 0 && errno == EINVAL) 553 SKIP(return, "CLONE_NNP not supported"); 554 ASSERT_GE(pid, 0); 555 556 if (pid == 0) { 557 /* 558 * Child: check no_new_privs. Exit 0 if set, 1 if not. 559 */ 560 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 561 _exit(ret == 1 ? 0 : 1); 562 } 563 564 ASSERT_GE(pidfd, 0); 565 566 /* Parent must still NOT have no_new_privs. */ 567 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 568 ASSERT_EQ(ret, 0) { 569 TH_LOG("Parent got no_new_privs after creating CLONE_NNP child"); 570 } 571 572 /* Wait for child to exit. */ 573 pfd.fd = pidfd; 574 pfd.events = POLLIN; 575 ret = poll(&pfd, 1, 5000); 576 ASSERT_EQ(ret, 1); 577 578 /* Verify child exited with 0 (no_new_privs was set). */ 579 ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 580 ASSERT_EQ(ret, 0); 581 ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 582 ASSERT_TRUE(WIFEXITED(info.exit_code)); 583 ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) { 584 TH_LOG("Child did not have no_new_privs set"); 585 } 586 587 close(pidfd); 588 } 589 590 /* 591 * Test that CLONE_NNP with CLONE_THREAD fails with EINVAL. 592 */ 593 TEST(nnp_rejects_thread) 594 { 595 struct __clone_args args = { 596 .flags = CLONE_NNP | CLONE_THREAD | 597 CLONE_SIGHAND | CLONE_VM, 598 .exit_signal = 0, 599 }; 600 pid_t pid; 601 602 pid = sys_clone3(&args, sizeof(args)); 603 ASSERT_EQ(pid, -1); 604 ASSERT_EQ(errno, EINVAL); 605 } 606 607 /* 608 * Test that a plain CLONE_AUTOREAP child does NOT get no_new_privs. 609 * Only CLONE_NNP should set it. 610 */ 611 TEST(autoreap_no_new_privs_unset) 612 { 613 struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 614 int pidfd = -1, ret; 615 struct pollfd pfd; 616 pid_t pid; 617 618 pid = create_autoreap_child(&pidfd); 619 if (pid < 0 && errno == EINVAL) 620 SKIP(return, "CLONE_AUTOREAP not supported"); 621 ASSERT_GE(pid, 0); 622 623 if (pid == 0) { 624 /* 625 * Child: check no_new_privs. Exit 0 if NOT set, 1 if set. 626 */ 627 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 628 _exit(ret == 0 ? 0 : 1); 629 } 630 631 ASSERT_GE(pidfd, 0); 632 633 pfd.fd = pidfd; 634 pfd.events = POLLIN; 635 ret = poll(&pfd, 1, 5000); 636 ASSERT_EQ(ret, 1); 637 638 ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 639 ASSERT_EQ(ret, 0); 640 ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 641 ASSERT_TRUE(WIFEXITED(info.exit_code)); 642 ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) { 643 TH_LOG("Plain autoreap child unexpectedly has no_new_privs"); 644 } 645 646 close(pidfd); 647 } 648 649 /* 650 * Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP. 651 */ 652 static pid_t create_autokill_child(int *pidfd) 653 { 654 struct __clone_args args = { 655 .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 656 CLONE_AUTOREAP | CLONE_NNP, 657 .exit_signal = 0, 658 .pidfd = ptr_to_u64(pidfd), 659 }; 660 661 return sys_clone3(&args, sizeof(args)); 662 } 663 664 /* 665 * Basic autokill test: child blocks in pause(), parent closes the 666 * clone3 pidfd, child should be killed and autoreaped. 667 */ 668 TEST(autokill_basic) 669 { 670 int pidfd = -1, pollfd_fd = -1, ret; 671 struct pollfd pfd; 672 pid_t pid; 673 674 pid = create_autokill_child(&pidfd); 675 if (pid < 0 && errno == EINVAL) 676 SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); 677 ASSERT_GE(pid, 0); 678 679 if (pid == 0) { 680 pause(); 681 _exit(1); 682 } 683 684 ASSERT_GE(pidfd, 0); 685 686 /* 687 * Open a second pidfd via pidfd_open() so we can observe the 688 * child's death after closing the clone3 pidfd. 689 */ 690 pollfd_fd = sys_pidfd_open(pid, 0); 691 ASSERT_GE(pollfd_fd, 0); 692 693 /* Close the clone3 pidfd — this should trigger autokill. */ 694 close(pidfd); 695 696 /* Wait for the child to die via the pidfd_open'd fd. */ 697 pfd.fd = pollfd_fd; 698 pfd.events = POLLIN; 699 ret = poll(&pfd, 1, 5000); 700 ASSERT_EQ(ret, 1); 701 ASSERT_TRUE(pfd.revents & POLLIN); 702 703 /* Child should be autoreaped — no zombie. */ 704 usleep(100000); 705 ret = waitpid(pid, NULL, WNOHANG); 706 ASSERT_EQ(ret, -1); 707 ASSERT_EQ(errno, ECHILD); 708 709 close(pollfd_fd); 710 } 711 712 /* 713 * CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL. 714 */ 715 TEST(autokill_requires_pidfd) 716 { 717 struct __clone_args args = { 718 .flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP, 719 .exit_signal = 0, 720 }; 721 pid_t pid; 722 723 pid = sys_clone3(&args, sizeof(args)); 724 ASSERT_EQ(pid, -1); 725 ASSERT_EQ(errno, EINVAL); 726 } 727 728 /* 729 * CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL. 730 */ 731 TEST(autokill_requires_autoreap) 732 { 733 int pidfd = -1; 734 struct __clone_args args = { 735 .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL, 736 .exit_signal = 0, 737 .pidfd = ptr_to_u64(&pidfd), 738 }; 739 pid_t pid; 740 741 pid = sys_clone3(&args, sizeof(args)); 742 ASSERT_EQ(pid, -1); 743 ASSERT_EQ(errno, EINVAL); 744 } 745 746 /* 747 * CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL. 748 */ 749 TEST(autokill_rejects_thread) 750 { 751 int pidfd = -1; 752 struct __clone_args args = { 753 .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 754 CLONE_AUTOREAP | CLONE_THREAD | 755 CLONE_SIGHAND | CLONE_VM, 756 .exit_signal = 0, 757 .pidfd = ptr_to_u64(&pidfd), 758 }; 759 pid_t pid; 760 761 pid = sys_clone3(&args, sizeof(args)); 762 ASSERT_EQ(pid, -1); 763 ASSERT_EQ(errno, EINVAL); 764 } 765 766 /* 767 * Test that only the clone3 pidfd triggers autokill, not pidfd_open(). 768 * Close the pidfd_open'd fd first — child should survive. 769 * Then close the clone3 pidfd — child should be killed and autoreaped. 770 */ 771 TEST(autokill_pidfd_open_no_effect) 772 { 773 int pidfd = -1, open_fd = -1, ret; 774 struct pollfd pfd; 775 pid_t pid; 776 777 pid = create_autokill_child(&pidfd); 778 if (pid < 0 && errno == EINVAL) 779 SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); 780 ASSERT_GE(pid, 0); 781 782 if (pid == 0) { 783 pause(); 784 _exit(1); 785 } 786 787 ASSERT_GE(pidfd, 0); 788 789 /* Open a second pidfd via pidfd_open(). */ 790 open_fd = sys_pidfd_open(pid, 0); 791 ASSERT_GE(open_fd, 0); 792 793 /* 794 * Close the pidfd_open'd fd — child should survive because 795 * only the clone3 pidfd has autokill. 796 */ 797 close(open_fd); 798 usleep(200000); 799 800 /* Verify child is still alive by polling the clone3 pidfd. */ 801 pfd.fd = pidfd; 802 pfd.events = POLLIN; 803 ret = poll(&pfd, 1, 0); 804 ASSERT_EQ(ret, 0) { 805 TH_LOG("Child died after closing pidfd_open fd — should still be alive"); 806 } 807 808 /* Open another observation fd before triggering autokill. */ 809 open_fd = sys_pidfd_open(pid, 0); 810 ASSERT_GE(open_fd, 0); 811 812 /* Now close the clone3 pidfd — this triggers autokill. */ 813 close(pidfd); 814 815 pfd.fd = open_fd; 816 pfd.events = POLLIN; 817 ret = poll(&pfd, 1, 5000); 818 ASSERT_EQ(ret, 1); 819 ASSERT_TRUE(pfd.revents & POLLIN); 820 821 /* Child should be autoreaped — no zombie. */ 822 usleep(100000); 823 ret = waitpid(pid, NULL, WNOHANG); 824 ASSERT_EQ(ret, -1); 825 ASSERT_EQ(errno, ECHILD); 826 827 close(open_fd); 828 } 829 830 /* 831 * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM 832 * for an unprivileged caller. 833 */ 834 TEST(autokill_requires_cap_sys_admin) 835 { 836 int pidfd = -1, ret; 837 struct __clone_args args = { 838 .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 839 CLONE_AUTOREAP, 840 .exit_signal = 0, 841 .pidfd = ptr_to_u64(&pidfd), 842 }; 843 pid_t pid; 844 845 /* Drop all capabilities so we lack CAP_SYS_ADMIN. */ 846 ret = drop_all_caps(); 847 ASSERT_EQ(ret, 0); 848 849 pid = sys_clone3(&args, sizeof(args)); 850 ASSERT_EQ(pid, -1); 851 ASSERT_EQ(errno, EPERM); 852 } 853 854 /* 855 * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with 856 * CAP_SYS_ADMIN. 857 */ 858 TEST(autokill_without_nnp_with_cap) 859 { 860 struct __clone_args args = { 861 .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 862 CLONE_AUTOREAP, 863 .exit_signal = 0, 864 }; 865 struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 866 int pidfd = -1, ret; 867 struct pollfd pfd; 868 pid_t pid; 869 870 if (geteuid() != 0) 871 SKIP(return, "Need root/CAP_SYS_ADMIN"); 872 873 args.pidfd = ptr_to_u64(&pidfd); 874 875 pid = sys_clone3(&args, sizeof(args)); 876 if (pid < 0 && errno == EINVAL) 877 SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); 878 ASSERT_GE(pid, 0); 879 880 if (pid == 0) 881 _exit(0); 882 883 ASSERT_GE(pidfd, 0); 884 885 /* Wait for child to exit. */ 886 pfd.fd = pidfd; 887 pfd.events = POLLIN; 888 ret = poll(&pfd, 1, 5000); 889 ASSERT_EQ(ret, 1); 890 891 ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 892 ASSERT_EQ(ret, 0); 893 ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 894 ASSERT_TRUE(WIFEXITED(info.exit_code)); 895 ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); 896 897 close(pidfd); 898 } 899 900 TEST_HARNESS_MAIN 901