xref: /linux/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <errno.h>
4 #include <sched.h>
5 #include <signal.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <sys/socket.h>
10 #include <unistd.h>
11 #include "../pidfd/pidfd.h"
12 #include "../kselftest_harness.h"
13 
14 /*
15  * Regression tests for the setns(pidfd) active reference counting bug.
16  *
17  * These tests are based on the reproducers that triggered the race condition
18  * fixed by commit 1c465d0518dc ("ns: handle setns(pidfd, ...) cleanly").
19  *
20  * The bug: When using setns() with a pidfd, if the target task exits between
21  * prepare_nsset() and commit_nsset(), the namespaces would become inactive.
22  * Then ns_ref_active_get() would increment from 0 without properly resurrecting
23  * the owner chain, causing active reference count underflows.
24  */
25 
26 /*
27  * Simple pidfd setns test using create_child()+unshare().
28  *
29  * Without the fix, this would trigger active refcount warnings when the
30  * parent exits after doing setns(pidfd) on a child that has already exited.
31  */
32 TEST(simple_pidfd_setns)
33 {
34 	pid_t child_pid;
35 	int pidfd = -1;
36 	int ret;
37 	int sv[2];
38 	char c;
39 
40 	/* Ignore SIGCHLD for autoreap */
41 	ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
42 
43 	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
44 
45 	/* Create a child process without namespaces initially */
46 	child_pid = create_child(&pidfd, 0);
47 	ASSERT_GE(child_pid, 0);
48 
49 	if (child_pid == 0) {
50 		close(sv[0]);
51 
52 		if (unshare(CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUSER) < 0) {
53 			close(sv[1]);
54 			_exit(1);
55 		}
56 
57 		/* Signal parent that namespaces are ready */
58 		if (write_nointr(sv[1], "1", 1) < 0) {
59 			close(sv[1]);
60 			_exit(1);
61 		}
62 
63 		close(sv[1]);
64 		_exit(0);
65 	}
66 	ASSERT_GE(pidfd, 0);
67 	EXPECT_EQ(close(sv[1]), 0);
68 
69 	ret = read_nointr(sv[0], &c, 1);
70 	ASSERT_EQ(ret, 1);
71 	EXPECT_EQ(close(sv[0]), 0);
72 
73 	/* Set to child's namespaces via pidfd */
74 	ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
75 	TH_LOG("setns() returned %d", ret);
76 	close(pidfd);
77 }
78 
79 /*
80  * Simple pidfd setns test using create_child().
81  *
82  * This variation uses create_child() with namespace flags directly.
83  * Namespaces are created immediately at clone time.
84  */
85 TEST(simple_pidfd_setns_clone)
86 {
87 	pid_t child_pid;
88 	int pidfd = -1;
89 	int ret;
90 
91 	/* Ignore SIGCHLD for autoreap */
92 	ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
93 
94 	/* Create a child process with new namespaces using create_child() */
95 	child_pid = create_child(&pidfd, CLONE_NEWUSER | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET);
96 	ASSERT_GE(child_pid, 0);
97 
98 	if (child_pid == 0) {
99 		/* Child: sleep for a while so parent can setns to us */
100 		sleep(2);
101 		_exit(0);
102 	}
103 
104 	/* Parent: pidfd was already created by create_child() */
105 	ASSERT_GE(pidfd, 0);
106 
107 	/* Set to child's namespaces via pidfd */
108 	ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
109 	close(pidfd);
110 	TH_LOG("setns() returned %d", ret);
111 }
112 
113 TEST_HARNESS_MAIN
114