176d46ad2SChristian Brauner // SPDX-License-Identifier: GPL-2.0 276d46ad2SChristian Brauner // Copyright (c) 2026 Christian Brauner <brauner@kernel.org> 376d46ad2SChristian Brauner 476d46ad2SChristian Brauner #define _GNU_SOURCE 576d46ad2SChristian Brauner #include <errno.h> 676d46ad2SChristian Brauner #include <linux/types.h> 776d46ad2SChristian Brauner #include <poll.h> 876d46ad2SChristian Brauner #include <pthread.h> 976d46ad2SChristian Brauner #include <sched.h> 1076d46ad2SChristian Brauner #include <signal.h> 1176d46ad2SChristian Brauner #include <stdio.h> 1276d46ad2SChristian Brauner #include <stdlib.h> 1376d46ad2SChristian Brauner #include <string.h> 1476d46ad2SChristian Brauner #include <syscall.h> 1576d46ad2SChristian Brauner #include <sys/ioctl.h> 1676d46ad2SChristian Brauner #include <sys/prctl.h> 1776d46ad2SChristian Brauner #include <sys/socket.h> 1876d46ad2SChristian Brauner #include <sys/types.h> 1976d46ad2SChristian Brauner #include <sys/wait.h> 2076d46ad2SChristian Brauner #include <unistd.h> 2176d46ad2SChristian Brauner 2276d46ad2SChristian Brauner #include "pidfd.h" 2376d46ad2SChristian Brauner #include "kselftest_harness.h" 2476d46ad2SChristian Brauner 2576d46ad2SChristian Brauner #ifndef CLONE_AUTOREAP 2676d46ad2SChristian Brauner #define CLONE_AUTOREAP (1ULL << 34) 2776d46ad2SChristian Brauner #endif 2876d46ad2SChristian Brauner 292a4d85aaSChristian Brauner #ifndef CLONE_NNP 302a4d85aaSChristian Brauner #define CLONE_NNP (1ULL << 35) 312a4d85aaSChristian Brauner #endif 322a4d85aaSChristian Brauner 33*ec26879eSChristian Brauner #ifndef CLONE_PIDFD_AUTOKILL 34*ec26879eSChristian Brauner #define CLONE_PIDFD_AUTOKILL (1ULL << 36) 35*ec26879eSChristian Brauner #endif 36*ec26879eSChristian Brauner 37*ec26879eSChristian Brauner #ifndef _LINUX_CAPABILITY_VERSION_3 38*ec26879eSChristian Brauner #define _LINUX_CAPABILITY_VERSION_3 0x20080522 39*ec26879eSChristian Brauner #endif 40*ec26879eSChristian Brauner 41*ec26879eSChristian Brauner struct cap_header { 42*ec26879eSChristian Brauner __u32 version; 43*ec26879eSChristian Brauner int pid; 44*ec26879eSChristian Brauner }; 45*ec26879eSChristian Brauner 46*ec26879eSChristian Brauner struct cap_data { 47*ec26879eSChristian Brauner __u32 effective; 48*ec26879eSChristian Brauner __u32 permitted; 49*ec26879eSChristian Brauner __u32 inheritable; 50*ec26879eSChristian Brauner }; 51*ec26879eSChristian Brauner 52*ec26879eSChristian Brauner static int drop_all_caps(void) 53*ec26879eSChristian Brauner { 54*ec26879eSChristian Brauner struct cap_header hdr = { .version = _LINUX_CAPABILITY_VERSION_3 }; 55*ec26879eSChristian Brauner struct cap_data data[2] = {}; 56*ec26879eSChristian Brauner 57*ec26879eSChristian Brauner return syscall(__NR_capset, &hdr, data); 58*ec26879eSChristian Brauner } 59*ec26879eSChristian Brauner 6076d46ad2SChristian Brauner static pid_t create_autoreap_child(int *pidfd) 6176d46ad2SChristian Brauner { 6276d46ad2SChristian Brauner struct __clone_args args = { 6376d46ad2SChristian Brauner .flags = CLONE_PIDFD | CLONE_AUTOREAP, 6476d46ad2SChristian Brauner .exit_signal = 0, 6576d46ad2SChristian Brauner .pidfd = ptr_to_u64(pidfd), 6676d46ad2SChristian Brauner }; 6776d46ad2SChristian Brauner 6876d46ad2SChristian Brauner return sys_clone3(&args, sizeof(args)); 6976d46ad2SChristian Brauner } 7076d46ad2SChristian Brauner 7176d46ad2SChristian Brauner /* 7276d46ad2SChristian Brauner * Test that CLONE_AUTOREAP works without CLONE_PIDFD (fire-and-forget). 7376d46ad2SChristian Brauner */ 7476d46ad2SChristian Brauner TEST(autoreap_without_pidfd) 7576d46ad2SChristian Brauner { 7676d46ad2SChristian Brauner struct __clone_args args = { 7776d46ad2SChristian Brauner .flags = CLONE_AUTOREAP, 7876d46ad2SChristian Brauner .exit_signal = 0, 7976d46ad2SChristian Brauner }; 8076d46ad2SChristian Brauner pid_t pid; 8176d46ad2SChristian Brauner int ret; 8276d46ad2SChristian Brauner 8376d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args)); 8476d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL) 8576d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported"); 8676d46ad2SChristian Brauner ASSERT_GE(pid, 0); 8776d46ad2SChristian Brauner 8876d46ad2SChristian Brauner if (pid == 0) 8976d46ad2SChristian Brauner _exit(0); 9076d46ad2SChristian Brauner 9176d46ad2SChristian Brauner /* 9276d46ad2SChristian Brauner * Give the child a moment to exit and be autoreaped. 9376d46ad2SChristian Brauner * Then verify no zombie remains. 9476d46ad2SChristian Brauner */ 9576d46ad2SChristian Brauner usleep(200000); 9676d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG); 9776d46ad2SChristian Brauner ASSERT_EQ(ret, -1); 9876d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD); 9976d46ad2SChristian Brauner } 10076d46ad2SChristian Brauner 10176d46ad2SChristian Brauner /* 10276d46ad2SChristian Brauner * Test that CLONE_AUTOREAP with a non-zero exit_signal fails. 10376d46ad2SChristian Brauner */ 10476d46ad2SChristian Brauner TEST(autoreap_rejects_exit_signal) 10576d46ad2SChristian Brauner { 10676d46ad2SChristian Brauner struct __clone_args args = { 10776d46ad2SChristian Brauner .flags = CLONE_AUTOREAP, 10876d46ad2SChristian Brauner .exit_signal = SIGCHLD, 10976d46ad2SChristian Brauner }; 11076d46ad2SChristian Brauner pid_t pid; 11176d46ad2SChristian Brauner 11276d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args)); 11376d46ad2SChristian Brauner ASSERT_EQ(pid, -1); 11476d46ad2SChristian Brauner ASSERT_EQ(errno, EINVAL); 11576d46ad2SChristian Brauner } 11676d46ad2SChristian Brauner 11776d46ad2SChristian Brauner /* 11876d46ad2SChristian Brauner * Test that CLONE_AUTOREAP with CLONE_PARENT fails. 11976d46ad2SChristian Brauner */ 12076d46ad2SChristian Brauner TEST(autoreap_rejects_parent) 12176d46ad2SChristian Brauner { 12276d46ad2SChristian Brauner struct __clone_args args = { 12376d46ad2SChristian Brauner .flags = CLONE_AUTOREAP | CLONE_PARENT, 12476d46ad2SChristian Brauner .exit_signal = 0, 12576d46ad2SChristian Brauner }; 12676d46ad2SChristian Brauner pid_t pid; 12776d46ad2SChristian Brauner 12876d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args)); 12976d46ad2SChristian Brauner ASSERT_EQ(pid, -1); 13076d46ad2SChristian Brauner ASSERT_EQ(errno, EINVAL); 13176d46ad2SChristian Brauner } 13276d46ad2SChristian Brauner 13376d46ad2SChristian Brauner /* 13476d46ad2SChristian Brauner * Test that CLONE_AUTOREAP with CLONE_THREAD fails. 13576d46ad2SChristian Brauner */ 13676d46ad2SChristian Brauner TEST(autoreap_rejects_thread) 13776d46ad2SChristian Brauner { 13876d46ad2SChristian Brauner struct __clone_args args = { 13976d46ad2SChristian Brauner .flags = CLONE_AUTOREAP | CLONE_THREAD | 14076d46ad2SChristian Brauner CLONE_SIGHAND | CLONE_VM, 14176d46ad2SChristian Brauner .exit_signal = 0, 14276d46ad2SChristian Brauner }; 14376d46ad2SChristian Brauner pid_t pid; 14476d46ad2SChristian Brauner 14576d46ad2SChristian Brauner pid = sys_clone3(&args, sizeof(args)); 14676d46ad2SChristian Brauner ASSERT_EQ(pid, -1); 14776d46ad2SChristian Brauner ASSERT_EQ(errno, EINVAL); 14876d46ad2SChristian Brauner } 14976d46ad2SChristian Brauner 15076d46ad2SChristian Brauner /* 15176d46ad2SChristian Brauner * Basic test: create an autoreap child, let it exit, verify: 15276d46ad2SChristian Brauner * - pidfd becomes readable (poll returns POLLIN) 15376d46ad2SChristian Brauner * - PIDFD_GET_INFO returns the correct exit code 15476d46ad2SChristian Brauner * - waitpid() returns -1/ECHILD (no zombie) 15576d46ad2SChristian Brauner */ 15676d46ad2SChristian Brauner TEST(autoreap_basic) 15776d46ad2SChristian Brauner { 15876d46ad2SChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 15976d46ad2SChristian Brauner int pidfd = -1, ret; 16076d46ad2SChristian Brauner struct pollfd pfd; 16176d46ad2SChristian Brauner pid_t pid; 16276d46ad2SChristian Brauner 16376d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd); 16476d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL) 16576d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported"); 16676d46ad2SChristian Brauner ASSERT_GE(pid, 0); 16776d46ad2SChristian Brauner 16876d46ad2SChristian Brauner if (pid == 0) 16976d46ad2SChristian Brauner _exit(42); 17076d46ad2SChristian Brauner 17176d46ad2SChristian Brauner ASSERT_GE(pidfd, 0); 17276d46ad2SChristian Brauner 17376d46ad2SChristian Brauner /* Wait for the child to exit via pidfd poll. */ 17476d46ad2SChristian Brauner pfd.fd = pidfd; 17576d46ad2SChristian Brauner pfd.events = POLLIN; 17676d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000); 17776d46ad2SChristian Brauner ASSERT_EQ(ret, 1); 17876d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN); 17976d46ad2SChristian Brauner 18076d46ad2SChristian Brauner /* Verify exit info via PIDFD_GET_INFO. */ 18176d46ad2SChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 18276d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 18376d46ad2SChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 18476d46ad2SChristian Brauner /* 18576d46ad2SChristian Brauner * exit_code is in waitpid format: for _exit(42), 18676d46ad2SChristian Brauner * WIFEXITED is true and WEXITSTATUS is 42. 18776d46ad2SChristian Brauner */ 18876d46ad2SChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code)); 18976d46ad2SChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 42); 19076d46ad2SChristian Brauner 19176d46ad2SChristian Brauner /* Verify no zombie: waitpid should fail with ECHILD. */ 19276d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG); 19376d46ad2SChristian Brauner ASSERT_EQ(ret, -1); 19476d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD); 19576d46ad2SChristian Brauner 19676d46ad2SChristian Brauner close(pidfd); 19776d46ad2SChristian Brauner } 19876d46ad2SChristian Brauner 19976d46ad2SChristian Brauner /* 20076d46ad2SChristian Brauner * Test that an autoreap child killed by a signal reports 20176d46ad2SChristian Brauner * the correct exit info. 20276d46ad2SChristian Brauner */ 20376d46ad2SChristian Brauner TEST(autoreap_signaled) 20476d46ad2SChristian Brauner { 20576d46ad2SChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 20676d46ad2SChristian Brauner int pidfd = -1, ret; 20776d46ad2SChristian Brauner struct pollfd pfd; 20876d46ad2SChristian Brauner pid_t pid; 20976d46ad2SChristian Brauner 21076d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd); 21176d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL) 21276d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported"); 21376d46ad2SChristian Brauner ASSERT_GE(pid, 0); 21476d46ad2SChristian Brauner 21576d46ad2SChristian Brauner if (pid == 0) { 21676d46ad2SChristian Brauner pause(); 21776d46ad2SChristian Brauner _exit(1); 21876d46ad2SChristian Brauner } 21976d46ad2SChristian Brauner 22076d46ad2SChristian Brauner ASSERT_GE(pidfd, 0); 22176d46ad2SChristian Brauner 22276d46ad2SChristian Brauner /* Kill the child. */ 22376d46ad2SChristian Brauner ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); 22476d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 22576d46ad2SChristian Brauner 22676d46ad2SChristian Brauner /* Wait for exit via pidfd. */ 22776d46ad2SChristian Brauner pfd.fd = pidfd; 22876d46ad2SChristian Brauner pfd.events = POLLIN; 22976d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000); 23076d46ad2SChristian Brauner ASSERT_EQ(ret, 1); 23176d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN); 23276d46ad2SChristian Brauner 23376d46ad2SChristian Brauner /* Verify signal info. */ 23476d46ad2SChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 23576d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 23676d46ad2SChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 23776d46ad2SChristian Brauner ASSERT_TRUE(WIFSIGNALED(info.exit_code)); 23876d46ad2SChristian Brauner ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); 23976d46ad2SChristian Brauner 24076d46ad2SChristian Brauner /* No zombie. */ 24176d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG); 24276d46ad2SChristian Brauner ASSERT_EQ(ret, -1); 24376d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD); 24476d46ad2SChristian Brauner 24576d46ad2SChristian Brauner close(pidfd); 24676d46ad2SChristian Brauner } 24776d46ad2SChristian Brauner 24876d46ad2SChristian Brauner /* 24976d46ad2SChristian Brauner * Test autoreap survives reparenting: middle process creates an 25076d46ad2SChristian Brauner * autoreap grandchild, then exits. The grandchild gets reparented 25176d46ad2SChristian Brauner * to us (the grandparent, which is a subreaper). When the grandchild 25276d46ad2SChristian Brauner * exits, it should still be autoreaped - no zombie under us. 25376d46ad2SChristian Brauner */ 25476d46ad2SChristian Brauner TEST(autoreap_reparent) 25576d46ad2SChristian Brauner { 25676d46ad2SChristian Brauner int ipc_sockets[2], ret; 25776d46ad2SChristian Brauner int pidfd = -1; 25876d46ad2SChristian Brauner struct pollfd pfd; 25976d46ad2SChristian Brauner pid_t mid_pid, grandchild_pid; 26076d46ad2SChristian Brauner char buf[32] = {}; 26176d46ad2SChristian Brauner 26276d46ad2SChristian Brauner /* Make ourselves a subreaper so reparented children come to us. */ 26376d46ad2SChristian Brauner ret = prctl(PR_SET_CHILD_SUBREAPER, 1); 26476d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 26576d46ad2SChristian Brauner 26676d46ad2SChristian Brauner ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 26776d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 26876d46ad2SChristian Brauner 26976d46ad2SChristian Brauner mid_pid = fork(); 27076d46ad2SChristian Brauner ASSERT_GE(mid_pid, 0); 27176d46ad2SChristian Brauner 27276d46ad2SChristian Brauner if (mid_pid == 0) { 27376d46ad2SChristian Brauner /* Middle child: create an autoreap grandchild. */ 27476d46ad2SChristian Brauner int gc_pidfd = -1; 27576d46ad2SChristian Brauner 27676d46ad2SChristian Brauner close(ipc_sockets[0]); 27776d46ad2SChristian Brauner 27876d46ad2SChristian Brauner grandchild_pid = create_autoreap_child(&gc_pidfd); 27976d46ad2SChristian Brauner if (grandchild_pid < 0) { 28076d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "E", 1); 28176d46ad2SChristian Brauner close(ipc_sockets[1]); 28276d46ad2SChristian Brauner _exit(1); 28376d46ad2SChristian Brauner } 28476d46ad2SChristian Brauner 28576d46ad2SChristian Brauner if (grandchild_pid == 0) { 28676d46ad2SChristian Brauner /* Grandchild: wait for signal to exit. */ 28776d46ad2SChristian Brauner close(ipc_sockets[1]); 28876d46ad2SChristian Brauner if (gc_pidfd >= 0) 28976d46ad2SChristian Brauner close(gc_pidfd); 29076d46ad2SChristian Brauner pause(); 29176d46ad2SChristian Brauner _exit(0); 29276d46ad2SChristian Brauner } 29376d46ad2SChristian Brauner 29476d46ad2SChristian Brauner /* Send grandchild PID to grandparent. */ 29576d46ad2SChristian Brauner snprintf(buf, sizeof(buf), "%d", grandchild_pid); 29676d46ad2SChristian Brauner write_nointr(ipc_sockets[1], buf, strlen(buf)); 29776d46ad2SChristian Brauner close(ipc_sockets[1]); 29876d46ad2SChristian Brauner if (gc_pidfd >= 0) 29976d46ad2SChristian Brauner close(gc_pidfd); 30076d46ad2SChristian Brauner 30176d46ad2SChristian Brauner /* Middle child exits, grandchild gets reparented. */ 30276d46ad2SChristian Brauner _exit(0); 30376d46ad2SChristian Brauner } 30476d46ad2SChristian Brauner 30576d46ad2SChristian Brauner close(ipc_sockets[1]); 30676d46ad2SChristian Brauner 30776d46ad2SChristian Brauner /* Read grandchild's PID. */ 30876d46ad2SChristian Brauner ret = read_nointr(ipc_sockets[0], buf, sizeof(buf) - 1); 30976d46ad2SChristian Brauner close(ipc_sockets[0]); 31076d46ad2SChristian Brauner ASSERT_GT(ret, 0); 31176d46ad2SChristian Brauner 31276d46ad2SChristian Brauner if (buf[0] == 'E') { 31376d46ad2SChristian Brauner waitpid(mid_pid, NULL, 0); 31476d46ad2SChristian Brauner prctl(PR_SET_CHILD_SUBREAPER, 0); 31576d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported"); 31676d46ad2SChristian Brauner } 31776d46ad2SChristian Brauner 31876d46ad2SChristian Brauner grandchild_pid = atoi(buf); 31976d46ad2SChristian Brauner ASSERT_GT(grandchild_pid, 0); 32076d46ad2SChristian Brauner 32176d46ad2SChristian Brauner /* Wait for the middle child to exit. */ 32276d46ad2SChristian Brauner ret = waitpid(mid_pid, NULL, 0); 32376d46ad2SChristian Brauner ASSERT_EQ(ret, mid_pid); 32476d46ad2SChristian Brauner 32576d46ad2SChristian Brauner /* 32676d46ad2SChristian Brauner * Now the grandchild is reparented to us (subreaper). 32776d46ad2SChristian Brauner * Open a pidfd for the grandchild and kill it. 32876d46ad2SChristian Brauner */ 32976d46ad2SChristian Brauner pidfd = sys_pidfd_open(grandchild_pid, 0); 33076d46ad2SChristian Brauner ASSERT_GE(pidfd, 0); 33176d46ad2SChristian Brauner 33276d46ad2SChristian Brauner ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); 33376d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 33476d46ad2SChristian Brauner 33576d46ad2SChristian Brauner /* Wait for it to exit via pidfd poll. */ 33676d46ad2SChristian Brauner pfd.fd = pidfd; 33776d46ad2SChristian Brauner pfd.events = POLLIN; 33876d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000); 33976d46ad2SChristian Brauner ASSERT_EQ(ret, 1); 34076d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN); 34176d46ad2SChristian Brauner 34276d46ad2SChristian Brauner /* 34376d46ad2SChristian Brauner * The grandchild should have been autoreaped even though 34476d46ad2SChristian Brauner * we (the new parent) haven't set SA_NOCLDWAIT. 34576d46ad2SChristian Brauner * waitpid should return -1/ECHILD. 34676d46ad2SChristian Brauner */ 34776d46ad2SChristian Brauner ret = waitpid(grandchild_pid, NULL, WNOHANG); 34876d46ad2SChristian Brauner EXPECT_EQ(ret, -1); 34976d46ad2SChristian Brauner EXPECT_EQ(errno, ECHILD); 35076d46ad2SChristian Brauner 35176d46ad2SChristian Brauner close(pidfd); 35276d46ad2SChristian Brauner 35376d46ad2SChristian Brauner /* Clean up subreaper status. */ 35476d46ad2SChristian Brauner prctl(PR_SET_CHILD_SUBREAPER, 0); 35576d46ad2SChristian Brauner } 35676d46ad2SChristian Brauner 35776d46ad2SChristian Brauner static int thread_sock_fd; 35876d46ad2SChristian Brauner 35976d46ad2SChristian Brauner static void *thread_func(void *arg) 36076d46ad2SChristian Brauner { 36176d46ad2SChristian Brauner /* Signal parent we're running. */ 36276d46ad2SChristian Brauner write_nointr(thread_sock_fd, "1", 1); 36376d46ad2SChristian Brauner 36476d46ad2SChristian Brauner /* Give main thread time to call _exit() first. */ 36576d46ad2SChristian Brauner usleep(200000); 36676d46ad2SChristian Brauner 36776d46ad2SChristian Brauner return NULL; 36876d46ad2SChristian Brauner } 36976d46ad2SChristian Brauner 37076d46ad2SChristian Brauner /* 37176d46ad2SChristian Brauner * Test that an autoreap child with multiple threads is properly 37276d46ad2SChristian Brauner * autoreaped only after all threads have exited. 37376d46ad2SChristian Brauner */ 37476d46ad2SChristian Brauner TEST(autoreap_multithreaded) 37576d46ad2SChristian Brauner { 37676d46ad2SChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 37776d46ad2SChristian Brauner int ipc_sockets[2], ret; 37876d46ad2SChristian Brauner int pidfd = -1; 37976d46ad2SChristian Brauner struct pollfd pfd; 38076d46ad2SChristian Brauner pid_t pid; 38176d46ad2SChristian Brauner char c; 38276d46ad2SChristian Brauner 38376d46ad2SChristian Brauner ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 38476d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 38576d46ad2SChristian Brauner 38676d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd); 38776d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL) { 38876d46ad2SChristian Brauner close(ipc_sockets[0]); 38976d46ad2SChristian Brauner close(ipc_sockets[1]); 39076d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported"); 39176d46ad2SChristian Brauner } 39276d46ad2SChristian Brauner ASSERT_GE(pid, 0); 39376d46ad2SChristian Brauner 39476d46ad2SChristian Brauner if (pid == 0) { 39576d46ad2SChristian Brauner pthread_t thread; 39676d46ad2SChristian Brauner 39776d46ad2SChristian Brauner close(ipc_sockets[0]); 39876d46ad2SChristian Brauner 39976d46ad2SChristian Brauner /* 40076d46ad2SChristian Brauner * Create a sub-thread that outlives the main thread. 40176d46ad2SChristian Brauner * The thread signals readiness, then sleeps. 40276d46ad2SChristian Brauner * The main thread waits briefly, then calls _exit(). 40376d46ad2SChristian Brauner */ 40476d46ad2SChristian Brauner thread_sock_fd = ipc_sockets[1]; 40576d46ad2SChristian Brauner pthread_create(&thread, NULL, thread_func, NULL); 40676d46ad2SChristian Brauner pthread_detach(thread); 40776d46ad2SChristian Brauner 40876d46ad2SChristian Brauner /* Wait for thread to be running. */ 40976d46ad2SChristian Brauner usleep(100000); 41076d46ad2SChristian Brauner 41176d46ad2SChristian Brauner /* Main thread exits; sub-thread is still alive. */ 41276d46ad2SChristian Brauner _exit(99); 41376d46ad2SChristian Brauner } 41476d46ad2SChristian Brauner 41576d46ad2SChristian Brauner close(ipc_sockets[1]); 41676d46ad2SChristian Brauner 41776d46ad2SChristian Brauner /* Wait for the sub-thread to signal readiness. */ 41876d46ad2SChristian Brauner ret = read_nointr(ipc_sockets[0], &c, 1); 41976d46ad2SChristian Brauner close(ipc_sockets[0]); 42076d46ad2SChristian Brauner ASSERT_EQ(ret, 1); 42176d46ad2SChristian Brauner 42276d46ad2SChristian Brauner /* Wait for the process to fully exit via pidfd poll. */ 42376d46ad2SChristian Brauner pfd.fd = pidfd; 42476d46ad2SChristian Brauner pfd.events = POLLIN; 42576d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000); 42676d46ad2SChristian Brauner ASSERT_EQ(ret, 1); 42776d46ad2SChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN); 42876d46ad2SChristian Brauner 42976d46ad2SChristian Brauner /* Verify exit info. */ 43076d46ad2SChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 43176d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 43276d46ad2SChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 43376d46ad2SChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code)); 43476d46ad2SChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 99); 43576d46ad2SChristian Brauner 43676d46ad2SChristian Brauner /* No zombie. */ 43776d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG); 43876d46ad2SChristian Brauner ASSERT_EQ(ret, -1); 43976d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD); 44076d46ad2SChristian Brauner 44176d46ad2SChristian Brauner close(pidfd); 44276d46ad2SChristian Brauner } 44376d46ad2SChristian Brauner 44476d46ad2SChristian Brauner /* 44576d46ad2SChristian Brauner * Test that autoreap is NOT inherited by grandchildren. 44676d46ad2SChristian Brauner */ 44776d46ad2SChristian Brauner TEST(autoreap_no_inherit) 44876d46ad2SChristian Brauner { 44976d46ad2SChristian Brauner int ipc_sockets[2], ret; 45076d46ad2SChristian Brauner int pidfd = -1; 45176d46ad2SChristian Brauner pid_t pid; 45276d46ad2SChristian Brauner char buf[2] = {}; 45376d46ad2SChristian Brauner struct pollfd pfd; 45476d46ad2SChristian Brauner 45576d46ad2SChristian Brauner ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); 45676d46ad2SChristian Brauner ASSERT_EQ(ret, 0); 45776d46ad2SChristian Brauner 45876d46ad2SChristian Brauner pid = create_autoreap_child(&pidfd); 45976d46ad2SChristian Brauner if (pid < 0 && errno == EINVAL) { 46076d46ad2SChristian Brauner close(ipc_sockets[0]); 46176d46ad2SChristian Brauner close(ipc_sockets[1]); 46276d46ad2SChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported"); 46376d46ad2SChristian Brauner } 46476d46ad2SChristian Brauner ASSERT_GE(pid, 0); 46576d46ad2SChristian Brauner 46676d46ad2SChristian Brauner if (pid == 0) { 46776d46ad2SChristian Brauner pid_t gc; 46876d46ad2SChristian Brauner int status; 46976d46ad2SChristian Brauner 47076d46ad2SChristian Brauner close(ipc_sockets[0]); 47176d46ad2SChristian Brauner 47276d46ad2SChristian Brauner /* Autoreap child forks a grandchild (without autoreap). */ 47376d46ad2SChristian Brauner gc = fork(); 47476d46ad2SChristian Brauner if (gc < 0) { 47576d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "E", 1); 47676d46ad2SChristian Brauner _exit(1); 47776d46ad2SChristian Brauner } 47876d46ad2SChristian Brauner if (gc == 0) { 47976d46ad2SChristian Brauner /* Grandchild: exit immediately. */ 48076d46ad2SChristian Brauner close(ipc_sockets[1]); 48176d46ad2SChristian Brauner _exit(77); 48276d46ad2SChristian Brauner } 48376d46ad2SChristian Brauner 48476d46ad2SChristian Brauner /* 48576d46ad2SChristian Brauner * The grandchild should become a regular zombie 48676d46ad2SChristian Brauner * since it was NOT created with CLONE_AUTOREAP. 48776d46ad2SChristian Brauner * Wait for it to verify. 48876d46ad2SChristian Brauner */ 48976d46ad2SChristian Brauner ret = waitpid(gc, &status, 0); 49076d46ad2SChristian Brauner if (ret == gc && WIFEXITED(status) && 49176d46ad2SChristian Brauner WEXITSTATUS(status) == 77) { 49276d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "P", 1); 49376d46ad2SChristian Brauner } else { 49476d46ad2SChristian Brauner write_nointr(ipc_sockets[1], "F", 1); 49576d46ad2SChristian Brauner } 49676d46ad2SChristian Brauner close(ipc_sockets[1]); 49776d46ad2SChristian Brauner _exit(0); 49876d46ad2SChristian Brauner } 49976d46ad2SChristian Brauner 50076d46ad2SChristian Brauner close(ipc_sockets[1]); 50176d46ad2SChristian Brauner 50276d46ad2SChristian Brauner ret = read_nointr(ipc_sockets[0], buf, 1); 50376d46ad2SChristian Brauner close(ipc_sockets[0]); 50476d46ad2SChristian Brauner ASSERT_EQ(ret, 1); 50576d46ad2SChristian Brauner 50676d46ad2SChristian Brauner /* 50776d46ad2SChristian Brauner * 'P' means the autoreap child was able to waitpid() its 50876d46ad2SChristian Brauner * grandchild (correct - grandchild should be a normal zombie, 50976d46ad2SChristian Brauner * not autoreaped). 51076d46ad2SChristian Brauner */ 51176d46ad2SChristian Brauner ASSERT_EQ(buf[0], 'P'); 51276d46ad2SChristian Brauner 51376d46ad2SChristian Brauner /* Wait for the autoreap child to exit. */ 51476d46ad2SChristian Brauner pfd.fd = pidfd; 51576d46ad2SChristian Brauner pfd.events = POLLIN; 51676d46ad2SChristian Brauner ret = poll(&pfd, 1, 5000); 51776d46ad2SChristian Brauner ASSERT_EQ(ret, 1); 51876d46ad2SChristian Brauner 51976d46ad2SChristian Brauner /* Autoreap child itself should be autoreaped. */ 52076d46ad2SChristian Brauner ret = waitpid(pid, NULL, WNOHANG); 52176d46ad2SChristian Brauner ASSERT_EQ(ret, -1); 52276d46ad2SChristian Brauner ASSERT_EQ(errno, ECHILD); 52376d46ad2SChristian Brauner 52476d46ad2SChristian Brauner close(pidfd); 52576d46ad2SChristian Brauner } 52676d46ad2SChristian Brauner 5272a4d85aaSChristian Brauner /* 5282a4d85aaSChristian Brauner * Test that CLONE_NNP sets no_new_privs on the child. 5292a4d85aaSChristian Brauner * The child checks via prctl(PR_GET_NO_NEW_PRIVS) and reports back. 5302a4d85aaSChristian Brauner * The parent must NOT have no_new_privs set afterwards. 5312a4d85aaSChristian Brauner */ 5322a4d85aaSChristian Brauner TEST(nnp_sets_no_new_privs) 5332a4d85aaSChristian Brauner { 5342a4d85aaSChristian Brauner struct __clone_args args = { 5352a4d85aaSChristian Brauner .flags = CLONE_PIDFD | CLONE_AUTOREAP | CLONE_NNP, 5362a4d85aaSChristian Brauner .exit_signal = 0, 5372a4d85aaSChristian Brauner }; 5382a4d85aaSChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 5392a4d85aaSChristian Brauner int pidfd = -1, ret; 5402a4d85aaSChristian Brauner struct pollfd pfd; 5412a4d85aaSChristian Brauner pid_t pid; 5422a4d85aaSChristian Brauner 5432a4d85aaSChristian Brauner /* Ensure parent does not already have no_new_privs. */ 5442a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 5452a4d85aaSChristian Brauner ASSERT_EQ(ret, 0) { 5462a4d85aaSChristian Brauner TH_LOG("Parent already has no_new_privs set, cannot run test"); 5472a4d85aaSChristian Brauner } 5482a4d85aaSChristian Brauner 5492a4d85aaSChristian Brauner args.pidfd = ptr_to_u64(&pidfd); 5502a4d85aaSChristian Brauner 5512a4d85aaSChristian Brauner pid = sys_clone3(&args, sizeof(args)); 5522a4d85aaSChristian Brauner if (pid < 0 && errno == EINVAL) 5532a4d85aaSChristian Brauner SKIP(return, "CLONE_NNP not supported"); 5542a4d85aaSChristian Brauner ASSERT_GE(pid, 0); 5552a4d85aaSChristian Brauner 5562a4d85aaSChristian Brauner if (pid == 0) { 5572a4d85aaSChristian Brauner /* 5582a4d85aaSChristian Brauner * Child: check no_new_privs. Exit 0 if set, 1 if not. 5592a4d85aaSChristian Brauner */ 5602a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 5612a4d85aaSChristian Brauner _exit(ret == 1 ? 0 : 1); 5622a4d85aaSChristian Brauner } 5632a4d85aaSChristian Brauner 5642a4d85aaSChristian Brauner ASSERT_GE(pidfd, 0); 5652a4d85aaSChristian Brauner 5662a4d85aaSChristian Brauner /* Parent must still NOT have no_new_privs. */ 5672a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 5682a4d85aaSChristian Brauner ASSERT_EQ(ret, 0) { 5692a4d85aaSChristian Brauner TH_LOG("Parent got no_new_privs after creating CLONE_NNP child"); 5702a4d85aaSChristian Brauner } 5712a4d85aaSChristian Brauner 5722a4d85aaSChristian Brauner /* Wait for child to exit. */ 5732a4d85aaSChristian Brauner pfd.fd = pidfd; 5742a4d85aaSChristian Brauner pfd.events = POLLIN; 5752a4d85aaSChristian Brauner ret = poll(&pfd, 1, 5000); 5762a4d85aaSChristian Brauner ASSERT_EQ(ret, 1); 5772a4d85aaSChristian Brauner 5782a4d85aaSChristian Brauner /* Verify child exited with 0 (no_new_privs was set). */ 5792a4d85aaSChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 5802a4d85aaSChristian Brauner ASSERT_EQ(ret, 0); 5812a4d85aaSChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 5822a4d85aaSChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code)); 5832a4d85aaSChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) { 5842a4d85aaSChristian Brauner TH_LOG("Child did not have no_new_privs set"); 5852a4d85aaSChristian Brauner } 5862a4d85aaSChristian Brauner 5872a4d85aaSChristian Brauner close(pidfd); 5882a4d85aaSChristian Brauner } 5892a4d85aaSChristian Brauner 5902a4d85aaSChristian Brauner /* 5912a4d85aaSChristian Brauner * Test that CLONE_NNP with CLONE_THREAD fails with EINVAL. 5922a4d85aaSChristian Brauner */ 5932a4d85aaSChristian Brauner TEST(nnp_rejects_thread) 5942a4d85aaSChristian Brauner { 5952a4d85aaSChristian Brauner struct __clone_args args = { 5962a4d85aaSChristian Brauner .flags = CLONE_NNP | CLONE_THREAD | 5972a4d85aaSChristian Brauner CLONE_SIGHAND | CLONE_VM, 5982a4d85aaSChristian Brauner .exit_signal = 0, 5992a4d85aaSChristian Brauner }; 6002a4d85aaSChristian Brauner pid_t pid; 6012a4d85aaSChristian Brauner 6022a4d85aaSChristian Brauner pid = sys_clone3(&args, sizeof(args)); 6032a4d85aaSChristian Brauner ASSERT_EQ(pid, -1); 6042a4d85aaSChristian Brauner ASSERT_EQ(errno, EINVAL); 6052a4d85aaSChristian Brauner } 6062a4d85aaSChristian Brauner 6072a4d85aaSChristian Brauner /* 6082a4d85aaSChristian Brauner * Test that a plain CLONE_AUTOREAP child does NOT get no_new_privs. 6092a4d85aaSChristian Brauner * Only CLONE_NNP should set it. 6102a4d85aaSChristian Brauner */ 6112a4d85aaSChristian Brauner TEST(autoreap_no_new_privs_unset) 6122a4d85aaSChristian Brauner { 6132a4d85aaSChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 6142a4d85aaSChristian Brauner int pidfd = -1, ret; 6152a4d85aaSChristian Brauner struct pollfd pfd; 6162a4d85aaSChristian Brauner pid_t pid; 6172a4d85aaSChristian Brauner 6182a4d85aaSChristian Brauner pid = create_autoreap_child(&pidfd); 6192a4d85aaSChristian Brauner if (pid < 0 && errno == EINVAL) 6202a4d85aaSChristian Brauner SKIP(return, "CLONE_AUTOREAP not supported"); 6212a4d85aaSChristian Brauner ASSERT_GE(pid, 0); 6222a4d85aaSChristian Brauner 6232a4d85aaSChristian Brauner if (pid == 0) { 6242a4d85aaSChristian Brauner /* 6252a4d85aaSChristian Brauner * Child: check no_new_privs. Exit 0 if NOT set, 1 if set. 6262a4d85aaSChristian Brauner */ 6272a4d85aaSChristian Brauner ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); 6282a4d85aaSChristian Brauner _exit(ret == 0 ? 0 : 1); 6292a4d85aaSChristian Brauner } 6302a4d85aaSChristian Brauner 6312a4d85aaSChristian Brauner ASSERT_GE(pidfd, 0); 6322a4d85aaSChristian Brauner 6332a4d85aaSChristian Brauner pfd.fd = pidfd; 6342a4d85aaSChristian Brauner pfd.events = POLLIN; 6352a4d85aaSChristian Brauner ret = poll(&pfd, 1, 5000); 6362a4d85aaSChristian Brauner ASSERT_EQ(ret, 1); 6372a4d85aaSChristian Brauner 6382a4d85aaSChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 6392a4d85aaSChristian Brauner ASSERT_EQ(ret, 0); 6402a4d85aaSChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 6412a4d85aaSChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code)); 6422a4d85aaSChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 0) { 6432a4d85aaSChristian Brauner TH_LOG("Plain autoreap child unexpectedly has no_new_privs"); 6442a4d85aaSChristian Brauner } 6452a4d85aaSChristian Brauner 6462a4d85aaSChristian Brauner close(pidfd); 6472a4d85aaSChristian Brauner } 6482a4d85aaSChristian Brauner 649*ec26879eSChristian Brauner /* 650*ec26879eSChristian Brauner * Helper: create a child with CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP | CLONE_NNP. 651*ec26879eSChristian Brauner */ 652*ec26879eSChristian Brauner static pid_t create_autokill_child(int *pidfd) 653*ec26879eSChristian Brauner { 654*ec26879eSChristian Brauner struct __clone_args args = { 655*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 656*ec26879eSChristian Brauner CLONE_AUTOREAP | CLONE_NNP, 657*ec26879eSChristian Brauner .exit_signal = 0, 658*ec26879eSChristian Brauner .pidfd = ptr_to_u64(pidfd), 659*ec26879eSChristian Brauner }; 660*ec26879eSChristian Brauner 661*ec26879eSChristian Brauner return sys_clone3(&args, sizeof(args)); 662*ec26879eSChristian Brauner } 663*ec26879eSChristian Brauner 664*ec26879eSChristian Brauner /* 665*ec26879eSChristian Brauner * Basic autokill test: child blocks in pause(), parent closes the 666*ec26879eSChristian Brauner * clone3 pidfd, child should be killed and autoreaped. 667*ec26879eSChristian Brauner */ 668*ec26879eSChristian Brauner TEST(autokill_basic) 669*ec26879eSChristian Brauner { 670*ec26879eSChristian Brauner int pidfd = -1, pollfd_fd = -1, ret; 671*ec26879eSChristian Brauner struct pollfd pfd; 672*ec26879eSChristian Brauner pid_t pid; 673*ec26879eSChristian Brauner 674*ec26879eSChristian Brauner pid = create_autokill_child(&pidfd); 675*ec26879eSChristian Brauner if (pid < 0 && errno == EINVAL) 676*ec26879eSChristian Brauner SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); 677*ec26879eSChristian Brauner ASSERT_GE(pid, 0); 678*ec26879eSChristian Brauner 679*ec26879eSChristian Brauner if (pid == 0) { 680*ec26879eSChristian Brauner pause(); 681*ec26879eSChristian Brauner _exit(1); 682*ec26879eSChristian Brauner } 683*ec26879eSChristian Brauner 684*ec26879eSChristian Brauner ASSERT_GE(pidfd, 0); 685*ec26879eSChristian Brauner 686*ec26879eSChristian Brauner /* 687*ec26879eSChristian Brauner * Open a second pidfd via pidfd_open() so we can observe the 688*ec26879eSChristian Brauner * child's death after closing the clone3 pidfd. 689*ec26879eSChristian Brauner */ 690*ec26879eSChristian Brauner pollfd_fd = sys_pidfd_open(pid, 0); 691*ec26879eSChristian Brauner ASSERT_GE(pollfd_fd, 0); 692*ec26879eSChristian Brauner 693*ec26879eSChristian Brauner /* Close the clone3 pidfd — this should trigger autokill. */ 694*ec26879eSChristian Brauner close(pidfd); 695*ec26879eSChristian Brauner 696*ec26879eSChristian Brauner /* Wait for the child to die via the pidfd_open'd fd. */ 697*ec26879eSChristian Brauner pfd.fd = pollfd_fd; 698*ec26879eSChristian Brauner pfd.events = POLLIN; 699*ec26879eSChristian Brauner ret = poll(&pfd, 1, 5000); 700*ec26879eSChristian Brauner ASSERT_EQ(ret, 1); 701*ec26879eSChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN); 702*ec26879eSChristian Brauner 703*ec26879eSChristian Brauner /* Child should be autoreaped — no zombie. */ 704*ec26879eSChristian Brauner usleep(100000); 705*ec26879eSChristian Brauner ret = waitpid(pid, NULL, WNOHANG); 706*ec26879eSChristian Brauner ASSERT_EQ(ret, -1); 707*ec26879eSChristian Brauner ASSERT_EQ(errno, ECHILD); 708*ec26879eSChristian Brauner 709*ec26879eSChristian Brauner close(pollfd_fd); 710*ec26879eSChristian Brauner } 711*ec26879eSChristian Brauner 712*ec26879eSChristian Brauner /* 713*ec26879eSChristian Brauner * CLONE_PIDFD_AUTOKILL without CLONE_PIDFD must fail with EINVAL. 714*ec26879eSChristian Brauner */ 715*ec26879eSChristian Brauner TEST(autokill_requires_pidfd) 716*ec26879eSChristian Brauner { 717*ec26879eSChristian Brauner struct __clone_args args = { 718*ec26879eSChristian Brauner .flags = CLONE_PIDFD_AUTOKILL | CLONE_AUTOREAP, 719*ec26879eSChristian Brauner .exit_signal = 0, 720*ec26879eSChristian Brauner }; 721*ec26879eSChristian Brauner pid_t pid; 722*ec26879eSChristian Brauner 723*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args)); 724*ec26879eSChristian Brauner ASSERT_EQ(pid, -1); 725*ec26879eSChristian Brauner ASSERT_EQ(errno, EINVAL); 726*ec26879eSChristian Brauner } 727*ec26879eSChristian Brauner 728*ec26879eSChristian Brauner /* 729*ec26879eSChristian Brauner * CLONE_PIDFD_AUTOKILL without CLONE_AUTOREAP must fail with EINVAL. 730*ec26879eSChristian Brauner */ 731*ec26879eSChristian Brauner TEST(autokill_requires_autoreap) 732*ec26879eSChristian Brauner { 733*ec26879eSChristian Brauner int pidfd = -1; 734*ec26879eSChristian Brauner struct __clone_args args = { 735*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL, 736*ec26879eSChristian Brauner .exit_signal = 0, 737*ec26879eSChristian Brauner .pidfd = ptr_to_u64(&pidfd), 738*ec26879eSChristian Brauner }; 739*ec26879eSChristian Brauner pid_t pid; 740*ec26879eSChristian Brauner 741*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args)); 742*ec26879eSChristian Brauner ASSERT_EQ(pid, -1); 743*ec26879eSChristian Brauner ASSERT_EQ(errno, EINVAL); 744*ec26879eSChristian Brauner } 745*ec26879eSChristian Brauner 746*ec26879eSChristian Brauner /* 747*ec26879eSChristian Brauner * CLONE_PIDFD_AUTOKILL with CLONE_THREAD must fail with EINVAL. 748*ec26879eSChristian Brauner */ 749*ec26879eSChristian Brauner TEST(autokill_rejects_thread) 750*ec26879eSChristian Brauner { 751*ec26879eSChristian Brauner int pidfd = -1; 752*ec26879eSChristian Brauner struct __clone_args args = { 753*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 754*ec26879eSChristian Brauner CLONE_AUTOREAP | CLONE_THREAD | 755*ec26879eSChristian Brauner CLONE_SIGHAND | CLONE_VM, 756*ec26879eSChristian Brauner .exit_signal = 0, 757*ec26879eSChristian Brauner .pidfd = ptr_to_u64(&pidfd), 758*ec26879eSChristian Brauner }; 759*ec26879eSChristian Brauner pid_t pid; 760*ec26879eSChristian Brauner 761*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args)); 762*ec26879eSChristian Brauner ASSERT_EQ(pid, -1); 763*ec26879eSChristian Brauner ASSERT_EQ(errno, EINVAL); 764*ec26879eSChristian Brauner } 765*ec26879eSChristian Brauner 766*ec26879eSChristian Brauner /* 767*ec26879eSChristian Brauner * Test that only the clone3 pidfd triggers autokill, not pidfd_open(). 768*ec26879eSChristian Brauner * Close the pidfd_open'd fd first — child should survive. 769*ec26879eSChristian Brauner * Then close the clone3 pidfd — child should be killed and autoreaped. 770*ec26879eSChristian Brauner */ 771*ec26879eSChristian Brauner TEST(autokill_pidfd_open_no_effect) 772*ec26879eSChristian Brauner { 773*ec26879eSChristian Brauner int pidfd = -1, open_fd = -1, ret; 774*ec26879eSChristian Brauner struct pollfd pfd; 775*ec26879eSChristian Brauner pid_t pid; 776*ec26879eSChristian Brauner 777*ec26879eSChristian Brauner pid = create_autokill_child(&pidfd); 778*ec26879eSChristian Brauner if (pid < 0 && errno == EINVAL) 779*ec26879eSChristian Brauner SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); 780*ec26879eSChristian Brauner ASSERT_GE(pid, 0); 781*ec26879eSChristian Brauner 782*ec26879eSChristian Brauner if (pid == 0) { 783*ec26879eSChristian Brauner pause(); 784*ec26879eSChristian Brauner _exit(1); 785*ec26879eSChristian Brauner } 786*ec26879eSChristian Brauner 787*ec26879eSChristian Brauner ASSERT_GE(pidfd, 0); 788*ec26879eSChristian Brauner 789*ec26879eSChristian Brauner /* Open a second pidfd via pidfd_open(). */ 790*ec26879eSChristian Brauner open_fd = sys_pidfd_open(pid, 0); 791*ec26879eSChristian Brauner ASSERT_GE(open_fd, 0); 792*ec26879eSChristian Brauner 793*ec26879eSChristian Brauner /* 794*ec26879eSChristian Brauner * Close the pidfd_open'd fd — child should survive because 795*ec26879eSChristian Brauner * only the clone3 pidfd has autokill. 796*ec26879eSChristian Brauner */ 797*ec26879eSChristian Brauner close(open_fd); 798*ec26879eSChristian Brauner usleep(200000); 799*ec26879eSChristian Brauner 800*ec26879eSChristian Brauner /* Verify child is still alive by polling the clone3 pidfd. */ 801*ec26879eSChristian Brauner pfd.fd = pidfd; 802*ec26879eSChristian Brauner pfd.events = POLLIN; 803*ec26879eSChristian Brauner ret = poll(&pfd, 1, 0); 804*ec26879eSChristian Brauner ASSERT_EQ(ret, 0) { 805*ec26879eSChristian Brauner TH_LOG("Child died after closing pidfd_open fd — should still be alive"); 806*ec26879eSChristian Brauner } 807*ec26879eSChristian Brauner 808*ec26879eSChristian Brauner /* Open another observation fd before triggering autokill. */ 809*ec26879eSChristian Brauner open_fd = sys_pidfd_open(pid, 0); 810*ec26879eSChristian Brauner ASSERT_GE(open_fd, 0); 811*ec26879eSChristian Brauner 812*ec26879eSChristian Brauner /* Now close the clone3 pidfd — this triggers autokill. */ 813*ec26879eSChristian Brauner close(pidfd); 814*ec26879eSChristian Brauner 815*ec26879eSChristian Brauner pfd.fd = open_fd; 816*ec26879eSChristian Brauner pfd.events = POLLIN; 817*ec26879eSChristian Brauner ret = poll(&pfd, 1, 5000); 818*ec26879eSChristian Brauner ASSERT_EQ(ret, 1); 819*ec26879eSChristian Brauner ASSERT_TRUE(pfd.revents & POLLIN); 820*ec26879eSChristian Brauner 821*ec26879eSChristian Brauner /* Child should be autoreaped — no zombie. */ 822*ec26879eSChristian Brauner usleep(100000); 823*ec26879eSChristian Brauner ret = waitpid(pid, NULL, WNOHANG); 824*ec26879eSChristian Brauner ASSERT_EQ(ret, -1); 825*ec26879eSChristian Brauner ASSERT_EQ(errno, ECHILD); 826*ec26879eSChristian Brauner 827*ec26879eSChristian Brauner close(open_fd); 828*ec26879eSChristian Brauner } 829*ec26879eSChristian Brauner 830*ec26879eSChristian Brauner /* 831*ec26879eSChristian Brauner * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP fails with EPERM 832*ec26879eSChristian Brauner * for an unprivileged caller. 833*ec26879eSChristian Brauner */ 834*ec26879eSChristian Brauner TEST(autokill_requires_cap_sys_admin) 835*ec26879eSChristian Brauner { 836*ec26879eSChristian Brauner int pidfd = -1, ret; 837*ec26879eSChristian Brauner struct __clone_args args = { 838*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 839*ec26879eSChristian Brauner CLONE_AUTOREAP, 840*ec26879eSChristian Brauner .exit_signal = 0, 841*ec26879eSChristian Brauner .pidfd = ptr_to_u64(&pidfd), 842*ec26879eSChristian Brauner }; 843*ec26879eSChristian Brauner pid_t pid; 844*ec26879eSChristian Brauner 845*ec26879eSChristian Brauner /* Drop all capabilities so we lack CAP_SYS_ADMIN. */ 846*ec26879eSChristian Brauner ret = drop_all_caps(); 847*ec26879eSChristian Brauner ASSERT_EQ(ret, 0); 848*ec26879eSChristian Brauner 849*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args)); 850*ec26879eSChristian Brauner ASSERT_EQ(pid, -1); 851*ec26879eSChristian Brauner ASSERT_EQ(errno, EPERM); 852*ec26879eSChristian Brauner } 853*ec26879eSChristian Brauner 854*ec26879eSChristian Brauner /* 855*ec26879eSChristian Brauner * Test that CLONE_PIDFD_AUTOKILL without CLONE_NNP succeeds with 856*ec26879eSChristian Brauner * CAP_SYS_ADMIN. 857*ec26879eSChristian Brauner */ 858*ec26879eSChristian Brauner TEST(autokill_without_nnp_with_cap) 859*ec26879eSChristian Brauner { 860*ec26879eSChristian Brauner struct __clone_args args = { 861*ec26879eSChristian Brauner .flags = CLONE_PIDFD | CLONE_PIDFD_AUTOKILL | 862*ec26879eSChristian Brauner CLONE_AUTOREAP, 863*ec26879eSChristian Brauner .exit_signal = 0, 864*ec26879eSChristian Brauner }; 865*ec26879eSChristian Brauner struct pidfd_info info = { .mask = PIDFD_INFO_EXIT }; 866*ec26879eSChristian Brauner int pidfd = -1, ret; 867*ec26879eSChristian Brauner struct pollfd pfd; 868*ec26879eSChristian Brauner pid_t pid; 869*ec26879eSChristian Brauner 870*ec26879eSChristian Brauner if (geteuid() != 0) 871*ec26879eSChristian Brauner SKIP(return, "Need root/CAP_SYS_ADMIN"); 872*ec26879eSChristian Brauner 873*ec26879eSChristian Brauner args.pidfd = ptr_to_u64(&pidfd); 874*ec26879eSChristian Brauner 875*ec26879eSChristian Brauner pid = sys_clone3(&args, sizeof(args)); 876*ec26879eSChristian Brauner if (pid < 0 && errno == EINVAL) 877*ec26879eSChristian Brauner SKIP(return, "CLONE_PIDFD_AUTOKILL not supported"); 878*ec26879eSChristian Brauner ASSERT_GE(pid, 0); 879*ec26879eSChristian Brauner 880*ec26879eSChristian Brauner if (pid == 0) 881*ec26879eSChristian Brauner _exit(0); 882*ec26879eSChristian Brauner 883*ec26879eSChristian Brauner ASSERT_GE(pidfd, 0); 884*ec26879eSChristian Brauner 885*ec26879eSChristian Brauner /* Wait for child to exit. */ 886*ec26879eSChristian Brauner pfd.fd = pidfd; 887*ec26879eSChristian Brauner pfd.events = POLLIN; 888*ec26879eSChristian Brauner ret = poll(&pfd, 1, 5000); 889*ec26879eSChristian Brauner ASSERT_EQ(ret, 1); 890*ec26879eSChristian Brauner 891*ec26879eSChristian Brauner ret = ioctl(pidfd, PIDFD_GET_INFO, &info); 892*ec26879eSChristian Brauner ASSERT_EQ(ret, 0); 893*ec26879eSChristian Brauner ASSERT_TRUE(info.mask & PIDFD_INFO_EXIT); 894*ec26879eSChristian Brauner ASSERT_TRUE(WIFEXITED(info.exit_code)); 895*ec26879eSChristian Brauner ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); 896*ec26879eSChristian Brauner 897*ec26879eSChristian Brauner close(pidfd); 898*ec26879eSChristian Brauner } 899*ec26879eSChristian Brauner 90076d46ad2SChristian Brauner TEST_HARNESS_MAIN 901