xref: /linux/tools/testing/selftests/pidfd/pidfd_setns_test.c (revision ce79097a8f8391fdec835d1deaa112fba4b18362)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <linux/types.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <syscall.h>
14 #include <sys/prctl.h>
15 #include <sys/wait.h>
16 #include <unistd.h>
17 #include <sys/socket.h>
18 #include <sys/stat.h>
19 #include <linux/kcmp.h>
20 
21 #include "pidfd.h"
22 #include "../clone3/clone3_selftests.h"
23 #include "../kselftest_harness.h"
24 
25 enum {
26 	PIDFD_NS_USER,
27 	PIDFD_NS_MNT,
28 	PIDFD_NS_PID,
29 	PIDFD_NS_UTS,
30 	PIDFD_NS_IPC,
31 	PIDFD_NS_NET,
32 	PIDFD_NS_CGROUP,
33 	PIDFD_NS_PIDCLD,
34 	PIDFD_NS_MAX
35 };
36 
37 const struct ns_info {
38 	const char *name;
39 	int flag;
40 } ns_info[] = {
41 	[PIDFD_NS_USER]   = { "user",             CLONE_NEWUSER,   },
42 	[PIDFD_NS_MNT]    = { "mnt",              CLONE_NEWNS,     },
43 	[PIDFD_NS_PID]    = { "pid",              CLONE_NEWPID,    },
44 	[PIDFD_NS_UTS]    = { "uts",              CLONE_NEWUTS,    },
45 	[PIDFD_NS_IPC]    = { "ipc",              CLONE_NEWIPC,    },
46 	[PIDFD_NS_NET]    = { "net",              CLONE_NEWNET,    },
47 	[PIDFD_NS_CGROUP] = { "cgroup",           CLONE_NEWCGROUP, },
48 	[PIDFD_NS_PIDCLD] = { "pid_for_children", 0,               },
49 };
50 
51 FIXTURE(current_nsset)
52 {
53 	pid_t pid;
54 	int pidfd;
55 	int nsfds[PIDFD_NS_MAX];
56 
57 	pid_t child_pid_exited;
58 	int child_pidfd_exited;
59 
60 	pid_t child_pid1;
61 	int child_pidfd1;
62 	int child_nsfds1[PIDFD_NS_MAX];
63 
64 	pid_t child_pid2;
65 	int child_pidfd2;
66 	int child_nsfds2[PIDFD_NS_MAX];
67 };
68 
69 static int sys_waitid(int which, pid_t pid, int options)
70 {
71 	return syscall(__NR_waitid, which, pid, NULL, options, NULL);
72 }
73 
74 pid_t create_child(int *pidfd, unsigned flags)
75 {
76 	struct clone_args args = {
77 		.flags		= CLONE_PIDFD | flags,
78 		.exit_signal	= SIGCHLD,
79 		.pidfd		= ptr_to_u64(pidfd),
80 	};
81 
82 	return sys_clone3(&args, sizeof(struct clone_args));
83 }
84 
85 FIXTURE_SETUP(current_nsset)
86 {
87 	int i, proc_fd, ret;
88 
89 	for (i = 0; i < PIDFD_NS_MAX; i++) {
90 		self->nsfds[i]		= -EBADF;
91 		self->child_nsfds1[i]	= -EBADF;
92 		self->child_nsfds2[i]	= -EBADF;
93 	}
94 
95 	proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
96 	ASSERT_GE(proc_fd, 0) {
97 		TH_LOG("%m - Failed to open /proc/self/ns");
98 	}
99 
100 	self->pid = getpid();
101 	for (i = 0; i < PIDFD_NS_MAX; i++) {
102 		const struct ns_info *info = &ns_info[i];
103 		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
104 		if (self->nsfds[i] < 0) {
105 			EXPECT_EQ(errno, ENOENT) {
106 				TH_LOG("%m - Failed to open %s namespace for process %d",
107 				       info->name, self->pid);
108 			}
109 		}
110 	}
111 
112 	self->pidfd = sys_pidfd_open(self->pid, 0);
113 	EXPECT_GT(self->pidfd, 0) {
114 		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
115 	}
116 
117 	/* Create task that exits right away. */
118 	self->child_pid_exited = create_child(&self->child_pidfd_exited,
119 					      CLONE_NEWUSER | CLONE_NEWNET);
120 	EXPECT_GT(self->child_pid_exited, 0);
121 
122 	if (self->child_pid_exited == 0)
123 		_exit(EXIT_SUCCESS);
124 
125 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
126 
127 	self->pidfd = sys_pidfd_open(self->pid, 0);
128 	EXPECT_GE(self->pidfd, 0) {
129 		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
130 	}
131 
132 	/* Create tasks that will be stopped. */
133 	self->child_pid1 = create_child(&self->child_pidfd1,
134 					CLONE_NEWUSER | CLONE_NEWNS |
135 					CLONE_NEWCGROUP | CLONE_NEWIPC |
136 					CLONE_NEWUTS | CLONE_NEWPID |
137 					CLONE_NEWNET);
138 	EXPECT_GE(self->child_pid1, 0);
139 
140 	if (self->child_pid1 == 0) {
141 		pause();
142 		_exit(EXIT_SUCCESS);
143 	}
144 
145 	self->child_pid2 = create_child(&self->child_pidfd2,
146 					CLONE_NEWUSER | CLONE_NEWNS |
147 					CLONE_NEWCGROUP | CLONE_NEWIPC |
148 					CLONE_NEWUTS | CLONE_NEWPID |
149 					CLONE_NEWNET);
150 	EXPECT_GE(self->child_pid2, 0);
151 
152 	if (self->child_pid2 == 0) {
153 		pause();
154 		_exit(EXIT_SUCCESS);
155 	}
156 
157 	for (i = 0; i < PIDFD_NS_MAX; i++) {
158 		char p[100];
159 
160 		const struct ns_info *info = &ns_info[i];
161 
162 		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
163 		if (self->nsfds[i] < 0) {
164 			EXPECT_EQ(errno, ENOENT) {
165 				TH_LOG("%m - Failed to open %s namespace for process %d",
166 				       info->name, self->pid);
167 			}
168 		}
169 
170 		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
171 			       self->child_pid1, info->name);
172 		EXPECT_GT(ret, 0);
173 		EXPECT_LT(ret, sizeof(p));
174 
175 		self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
176 		if (self->child_nsfds1[i] < 0) {
177 			EXPECT_EQ(errno, ENOENT) {
178 				TH_LOG("%m - Failed to open %s namespace for process %d",
179 				       info->name, self->child_pid1);
180 			}
181 		}
182 
183 		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
184 			       self->child_pid2, info->name);
185 		EXPECT_GT(ret, 0);
186 		EXPECT_LT(ret, sizeof(p));
187 
188 		self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
189 		if (self->child_nsfds2[i] < 0) {
190 			EXPECT_EQ(errno, ENOENT) {
191 				TH_LOG("%m - Failed to open %s namespace for process %d",
192 				       info->name, self->child_pid1);
193 			}
194 		}
195 	}
196 
197 	close(proc_fd);
198 }
199 
200 FIXTURE_TEARDOWN(current_nsset)
201 {
202 	int i;
203 
204 	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
205 					SIGKILL, NULL, 0), 0);
206 	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
207 					SIGKILL, NULL, 0), 0);
208 
209 	for (i = 0; i < PIDFD_NS_MAX; i++) {
210 		if (self->nsfds[i] >= 0)
211 			close(self->nsfds[i]);
212 		if (self->child_nsfds1[i] >= 0)
213 			close(self->child_nsfds1[i]);
214 		if (self->child_nsfds2[i] >= 0)
215 			close(self->child_nsfds2[i]);
216 	}
217 
218 	if (self->child_pidfd1 >= 0)
219 		EXPECT_EQ(0, close(self->child_pidfd1));
220 	if (self->child_pidfd2 >= 0)
221 		EXPECT_EQ(0, close(self->child_pidfd2));
222 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
223 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
224 	ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
225 }
226 
227 static int preserve_ns(const int pid, const char *ns)
228 {
229 	int ret;
230 	char path[50];
231 
232 	ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
233 	if (ret < 0 || (size_t)ret >= sizeof(path))
234 		return -EIO;
235 
236 	return open(path, O_RDONLY | O_CLOEXEC);
237 }
238 
239 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
240 {
241 	int ns_fd2 = -EBADF;
242 	int ret = -1;
243 	struct stat ns_st1, ns_st2;
244 
245 	ret = fstat(ns_fd1, &ns_st1);
246 	if (ret < 0)
247 		return -1;
248 
249 	ns_fd2 = preserve_ns(pid2, ns);
250 	if (ns_fd2 < 0)
251 		return -1;
252 
253 	ret = fstat(ns_fd2, &ns_st2);
254 	close(ns_fd2);
255 	if (ret < 0)
256 		return -1;
257 
258 	/* processes are in the same namespace */
259 	if ((ns_st1.st_dev == ns_st2.st_dev) &&
260 	    (ns_st1.st_ino == ns_st2.st_ino))
261 		return 1;
262 
263 	/* processes are in different namespaces */
264 	return 0;
265 }
266 
267 /* Test that we can't pass garbage to the kernel. */
268 TEST_F(current_nsset, invalid_flags)
269 {
270 	ASSERT_NE(setns(self->pidfd, 0), 0);
271 	EXPECT_EQ(errno, EINVAL);
272 
273 	ASSERT_NE(setns(self->pidfd, -1), 0);
274 	EXPECT_EQ(errno, EINVAL);
275 
276 	ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
277 	EXPECT_EQ(errno, EINVAL);
278 
279 	ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
280 	EXPECT_EQ(errno, EINVAL);
281 }
282 
283 /* Test that we can't attach to a task that has already exited. */
284 TEST_F(current_nsset, pidfd_exited_child)
285 {
286 	int i;
287 	pid_t pid;
288 
289 	ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
290 		  0);
291 	EXPECT_EQ(errno, ESRCH);
292 
293 	pid = getpid();
294 	for (i = 0; i < PIDFD_NS_MAX; i++) {
295 		const struct ns_info *info = &ns_info[i];
296 		/* Verify that we haven't changed any namespaces. */
297 		if (self->nsfds[i] >= 0)
298 			ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
299 	}
300 }
301 
302 TEST_F(current_nsset, pidfd_incremental_setns)
303 {
304 	int i;
305 	pid_t pid;
306 
307 	pid = getpid();
308 	for (i = 0; i < PIDFD_NS_MAX; i++) {
309 		const struct ns_info *info = &ns_info[i];
310 		int nsfd;
311 
312 		if (self->child_nsfds1[i] < 0)
313 			continue;
314 
315 		if (info->flag) {
316 			ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
317 				TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
318 				       info->name, self->child_pid1,
319 				       self->child_pidfd1);
320 			}
321 		}
322 
323 		/* Verify that we have changed to the correct namespaces. */
324 		if (info->flag == CLONE_NEWPID)
325 			nsfd = self->nsfds[i];
326 		else
327 			nsfd = self->child_nsfds1[i];
328 		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
329 			TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
330 			       info->name, self->child_pid1,
331 			       self->child_pidfd1);
332 		}
333 		TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
334 		       info->name, self->child_pid1, self->child_pidfd1);
335 	}
336 }
337 
338 TEST_F(current_nsset, nsfd_incremental_setns)
339 {
340 	int i;
341 	pid_t pid;
342 
343 	pid = getpid();
344 	for (i = 0; i < PIDFD_NS_MAX; i++) {
345 		const struct ns_info *info = &ns_info[i];
346 		int nsfd;
347 
348 		if (self->child_nsfds1[i] < 0)
349 			continue;
350 
351 		if (info->flag) {
352 			ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
353 				TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
354 				       info->name, self->child_pid1,
355 				       self->child_nsfds1[i]);
356 			}
357 		}
358 
359 		/* Verify that we have changed to the correct namespaces. */
360 		if (info->flag == CLONE_NEWPID)
361 			nsfd = self->nsfds[i];
362 		else
363 			nsfd = self->child_nsfds1[i];
364 		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
365 			TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
366 			       info->name, self->child_pid1,
367 			       self->child_nsfds1[i]);
368 		}
369 		TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
370 		       info->name, self->child_pid1, self->child_nsfds1[i]);
371 	}
372 }
373 
374 TEST_F(current_nsset, pidfd_one_shot_setns)
375 {
376 	unsigned flags = 0;
377 	int i;
378 	pid_t pid;
379 
380 	for (i = 0; i < PIDFD_NS_MAX; i++) {
381 		const struct ns_info *info = &ns_info[i];
382 
383 		if (self->child_nsfds1[i] < 0)
384 			continue;
385 
386 		flags |= info->flag;
387 		TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
388 		       info->name, self->child_pid1);
389 	}
390 
391 	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
392 		TH_LOG("%m - Failed to setns to namespaces of %d",
393 		       self->child_pid1);
394 	}
395 
396 	pid = getpid();
397 	for (i = 0; i < PIDFD_NS_MAX; i++) {
398 		const struct ns_info *info = &ns_info[i];
399 		int nsfd;
400 
401 		if (self->child_nsfds1[i] < 0)
402 			continue;
403 
404 		/* Verify that we have changed to the correct namespaces. */
405 		if (info->flag == CLONE_NEWPID)
406 			nsfd = self->nsfds[i];
407 		else
408 			nsfd = self->child_nsfds1[i];
409 		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
410 			TH_LOG("setns failed to place us correctly into %s namespace of %d",
411 			       info->name, self->child_pid1);
412 		}
413 		TH_LOG("Managed to correctly setns to %s namespace of %d",
414 		       info->name, self->child_pid1);
415 	}
416 }
417 
418 TEST_F(current_nsset, no_foul_play)
419 {
420 	unsigned flags = 0;
421 	int i;
422 
423 	for (i = 0; i < PIDFD_NS_MAX; i++) {
424 		const struct ns_info *info = &ns_info[i];
425 
426 		if (self->child_nsfds1[i] < 0)
427 			continue;
428 
429 		flags |= info->flag;
430 		if (info->flag) /* No use logging pid_for_children. */
431 			TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
432 			       info->name, self->child_pid1);
433 	}
434 
435 	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
436 		TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
437 		       self->child_pid1, self->child_pidfd1);
438 	}
439 
440 	/*
441 	 * Can't setns to a user namespace outside of our hierarchy since we
442 	 * don't have caps in there and didn't create it. That means that under
443 	 * no circumstances should we be able to setns to any of the other
444 	 * ones since they aren't owned by our user namespace.
445 	 */
446 	for (i = 0; i < PIDFD_NS_MAX; i++) {
447 		const struct ns_info *info = &ns_info[i];
448 
449 		if (self->child_nsfds2[i] < 0 || !info->flag)
450 			continue;
451 
452 		ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
453 			TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
454 			       info->name, self->child_pid2,
455 			       self->child_pidfd2);
456 		}
457 		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
458 		       info->name, self->child_pid2,
459 		       self->child_pidfd2);
460 
461 		ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
462 			TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
463 			       info->name, self->child_pid2,
464 			       self->child_nsfds2[i]);
465 		}
466 		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
467 		       info->name, self->child_pid2,
468 		       self->child_nsfds2[i]);
469 	}
470 }
471 
472 TEST(setns_einval)
473 {
474 	int fd;
475 
476 	fd = sys_memfd_create("rostock", 0);
477 	EXPECT_GT(fd, 0);
478 
479 	ASSERT_NE(setns(fd, 0), 0);
480 	EXPECT_EQ(errno, EINVAL);
481 	close(fd);
482 }
483 
484 TEST_HARNESS_MAIN
485