xref: /linux/tools/testing/selftests/namespaces/ns_active_ref_test.c (revision 47a5fd8ce18bee0b08aac119ab81dc4de3b40f3f)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <limits.h>
6 #include <sched.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <linux/nsfs.h>
11 #include <sys/mount.h>
12 #include <sys/stat.h>
13 #include <sys/types.h>
14 #include <sys/wait.h>
15 #include <unistd.h>
16 #include "../kselftest_harness.h"
17 #include "../filesystems/utils.h"
18 
19 #ifndef FD_NSFS_ROOT
20 #define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
21 #endif
22 
23 /*
24  * Test that initial namespaces can be reopened via file handle.
25  * Initial namespaces should have active ref count of 1 from boot.
26  */
27 TEST(init_ns_always_active)
28 {
29 	struct file_handle *handle;
30 	int mount_id;
31 	int ret;
32 	int fd1, fd2;
33 	struct stat st1, st2;
34 
35 	handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
36 	ASSERT_NE(handle, NULL);
37 
38 	/* Open initial network namespace */
39 	fd1 = open("/proc/1/ns/net", O_RDONLY);
40 	ASSERT_GE(fd1, 0);
41 
42 	/* Get file handle for initial namespace */
43 	handle->handle_bytes = MAX_HANDLE_SZ;
44 	ret = name_to_handle_at(fd1, "", handle, &mount_id, AT_EMPTY_PATH);
45 	if (ret < 0 && errno == EOPNOTSUPP) {
46 		SKIP(free(handle); close(fd1);
47 		     return, "nsfs doesn't support file handles");
48 	}
49 	ASSERT_EQ(ret, 0);
50 
51 	/* Close the namespace fd */
52 	close(fd1);
53 
54 	/* Try to reopen via file handle - should succeed since init ns is always active */
55 	fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
56 	if (fd2 < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
57 		SKIP(free(handle);
58 		     return, "open_by_handle_at with FD_NSFS_ROOT not supported");
59 	}
60 	ASSERT_GE(fd2, 0);
61 
62 	/* Verify we opened the same namespace */
63 	fd1 = open("/proc/1/ns/net", O_RDONLY);
64 	ASSERT_GE(fd1, 0);
65 	ASSERT_EQ(fstat(fd1, &st1), 0);
66 	ASSERT_EQ(fstat(fd2, &st2), 0);
67 	ASSERT_EQ(st1.st_ino, st2.st_ino);
68 
69 	close(fd1);
70 	close(fd2);
71 	free(handle);
72 }
73 
74 /*
75  * Test namespace lifecycle: create a namespace in a child process,
76  * get a file handle while it's active, then try to reopen after
77  * the process exits (namespace becomes inactive).
78  */
79 TEST(ns_inactive_after_exit)
80 {
81 	struct file_handle *handle;
82 	int mount_id;
83 	int ret;
84 	int fd;
85 	int pipefd[2];
86 	pid_t pid;
87 	int status;
88 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
89 
90 	/* Create pipe for passing file handle from child */
91 	ASSERT_EQ(pipe(pipefd), 0);
92 
93 	pid = fork();
94 	ASSERT_GE(pid, 0);
95 
96 	if (pid == 0) {
97 		/* Child process */
98 		close(pipefd[0]);
99 
100 		/* Create new network namespace */
101 		ret = unshare(CLONE_NEWNET);
102 		if (ret < 0) {
103 			close(pipefd[1]);
104 			exit(1);
105 		}
106 
107 		/* Open our new namespace */
108 		fd = open("/proc/self/ns/net", O_RDONLY);
109 		if (fd < 0) {
110 			close(pipefd[1]);
111 			exit(1);
112 		}
113 
114 		/* Get file handle for the namespace */
115 		handle = (struct file_handle *)buf;
116 		handle->handle_bytes = MAX_HANDLE_SZ;
117 		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
118 		close(fd);
119 
120 		if (ret < 0) {
121 			close(pipefd[1]);
122 			exit(1);
123 		}
124 
125 		/* Send handle to parent */
126 		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
127 		close(pipefd[1]);
128 
129 		/* Exit - namespace should become inactive */
130 		exit(0);
131 	}
132 
133 	/* Parent process */
134 	close(pipefd[1]);
135 
136 	/* Read file handle from child */
137 	ret = read(pipefd[0], buf, sizeof(buf));
138 	close(pipefd[0]);
139 
140 	/* Wait for child to exit */
141 	waitpid(pid, &status, 0);
142 	ASSERT_TRUE(WIFEXITED(status));
143 	ASSERT_EQ(WEXITSTATUS(status), 0);
144 
145 	ASSERT_GT(ret, 0);
146 	handle = (struct file_handle *)buf;
147 
148 	/* Try to reopen namespace - should fail with ENOENT since it's inactive */
149 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
150 	ASSERT_LT(fd, 0);
151 	/* Should fail with ENOENT (namespace inactive) or ESTALE */
152 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
153 }
154 
155 /*
156  * Test that a namespace remains active while a process is using it,
157  * even after the creating process exits.
158  */
159 TEST(ns_active_with_multiple_processes)
160 {
161 	struct file_handle *handle;
162 	int mount_id;
163 	int ret;
164 	int fd;
165 	int pipefd[2];
166 	int syncpipe[2];
167 	pid_t pid1, pid2;
168 	int status;
169 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
170 	char sync_byte;
171 
172 	/* Create pipes for communication */
173 	ASSERT_EQ(pipe(pipefd), 0);
174 	ASSERT_EQ(pipe(syncpipe), 0);
175 
176 	pid1 = fork();
177 	ASSERT_GE(pid1, 0);
178 
179 	if (pid1 == 0) {
180 		/* First child - creates namespace */
181 		close(pipefd[0]);
182 		close(syncpipe[1]);
183 
184 		/* Create new network namespace */
185 		ret = unshare(CLONE_NEWNET);
186 		if (ret < 0) {
187 			close(pipefd[1]);
188 			close(syncpipe[0]);
189 			exit(1);
190 		}
191 
192 		/* Open and get handle */
193 		fd = open("/proc/self/ns/net", O_RDONLY);
194 		if (fd < 0) {
195 			close(pipefd[1]);
196 			close(syncpipe[0]);
197 			exit(1);
198 		}
199 
200 		handle = (struct file_handle *)buf;
201 		handle->handle_bytes = MAX_HANDLE_SZ;
202 		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
203 		close(fd);
204 
205 		if (ret < 0) {
206 			close(pipefd[1]);
207 			close(syncpipe[0]);
208 			exit(1);
209 		}
210 
211 		/* Send handle to parent */
212 		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
213 		close(pipefd[1]);
214 
215 		/* Wait for signal before exiting */
216 		read(syncpipe[0], &sync_byte, 1);
217 		close(syncpipe[0]);
218 		exit(0);
219 	}
220 
221 	/* Parent reads handle */
222 	close(pipefd[1]);
223 	ret = read(pipefd[0], buf, sizeof(buf));
224 	close(pipefd[0]);
225 	ASSERT_GT(ret, 0);
226 
227 	handle = (struct file_handle *)buf;
228 
229 	/* Create second child that will keep namespace active */
230 	pid2 = fork();
231 	ASSERT_GE(pid2, 0);
232 
233 	if (pid2 == 0) {
234 		/* Second child - reopens the namespace */
235 		close(syncpipe[0]);
236 		close(syncpipe[1]);
237 
238 		/* Open the namespace via handle */
239 		fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
240 		if (fd < 0) {
241 			exit(1);
242 		}
243 
244 		/* Join the namespace */
245 		ret = setns(fd, CLONE_NEWNET);
246 		close(fd);
247 		if (ret < 0) {
248 			exit(1);
249 		}
250 
251 		/* Sleep to keep namespace active */
252 		sleep(1);
253 		exit(0);
254 	}
255 
256 	/* Let second child enter the namespace */
257 	usleep(100000); /* 100ms */
258 
259 	/* Signal first child to exit */
260 	close(syncpipe[0]);
261 	sync_byte = 'X';
262 	write(syncpipe[1], &sync_byte, 1);
263 	close(syncpipe[1]);
264 
265 	/* Wait for first child */
266 	waitpid(pid1, &status, 0);
267 	ASSERT_TRUE(WIFEXITED(status));
268 
269 	/* Namespace should still be active because second child is using it */
270 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
271 	ASSERT_GE(fd, 0);
272 	close(fd);
273 
274 	/* Wait for second child */
275 	waitpid(pid2, &status, 0);
276 	ASSERT_TRUE(WIFEXITED(status));
277 }
278 
279 /*
280  * Test user namespace active ref tracking via credential lifecycle
281  */
282 TEST(userns_active_ref_lifecycle)
283 {
284 	struct file_handle *handle;
285 	int mount_id;
286 	int ret;
287 	int fd;
288 	int pipefd[2];
289 	pid_t pid;
290 	int status;
291 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
292 
293 	ASSERT_EQ(pipe(pipefd), 0);
294 
295 	pid = fork();
296 	ASSERT_GE(pid, 0);
297 
298 	if (pid == 0) {
299 		/* Child process */
300 		close(pipefd[0]);
301 
302 		/* Create new user namespace */
303 		ret = unshare(CLONE_NEWUSER);
304 		if (ret < 0) {
305 			close(pipefd[1]);
306 			exit(1);
307 		}
308 
309 		/* Set up uid/gid mappings */
310 		int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
311 		int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
312 		int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
313 
314 		if (uid_map_fd >= 0 && gid_map_fd >= 0 && setgroups_fd >= 0) {
315 			write(setgroups_fd, "deny", 4);
316 			close(setgroups_fd);
317 
318 			char mapping[64];
319 			snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
320 			write(uid_map_fd, mapping, strlen(mapping));
321 			close(uid_map_fd);
322 
323 			snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
324 			write(gid_map_fd, mapping, strlen(mapping));
325 			close(gid_map_fd);
326 		}
327 
328 		/* Get file handle */
329 		fd = open("/proc/self/ns/user", O_RDONLY);
330 		if (fd < 0) {
331 			close(pipefd[1]);
332 			exit(1);
333 		}
334 
335 		handle = (struct file_handle *)buf;
336 		handle->handle_bytes = MAX_HANDLE_SZ;
337 		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
338 		close(fd);
339 
340 		if (ret < 0) {
341 			close(pipefd[1]);
342 			exit(1);
343 		}
344 
345 		/* Send handle to parent */
346 		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
347 		close(pipefd[1]);
348 		exit(0);
349 	}
350 
351 	/* Parent */
352 	close(pipefd[1]);
353 	ret = read(pipefd[0], buf, sizeof(buf));
354 	close(pipefd[0]);
355 
356 	waitpid(pid, &status, 0);
357 	ASSERT_TRUE(WIFEXITED(status));
358 	ASSERT_EQ(WEXITSTATUS(status), 0);
359 
360 	ASSERT_GT(ret, 0);
361 	handle = (struct file_handle *)buf;
362 
363 	/* Namespace should be inactive after all tasks exit */
364 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
365 	ASSERT_LT(fd, 0);
366 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
367 }
368 
369 /*
370  * Test PID namespace active ref tracking
371  */
372 TEST(pidns_active_ref_lifecycle)
373 {
374 	struct file_handle *handle;
375 	int mount_id;
376 	int ret;
377 	int fd;
378 	int pipefd[2];
379 	pid_t pid;
380 	int status;
381 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
382 
383 	ASSERT_EQ(pipe(pipefd), 0);
384 
385 	pid = fork();
386 	ASSERT_GE(pid, 0);
387 
388 	if (pid == 0) {
389 		/* Child process */
390 		close(pipefd[0]);
391 
392 		/* Create new PID namespace */
393 		ret = unshare(CLONE_NEWPID);
394 		if (ret < 0) {
395 			close(pipefd[1]);
396 			exit(1);
397 		}
398 
399 		/* Fork to actually enter the PID namespace */
400 		pid_t child = fork();
401 		if (child < 0) {
402 			close(pipefd[1]);
403 			exit(1);
404 		}
405 
406 		if (child == 0) {
407 			/* Grandchild - in new PID namespace */
408 			fd = open("/proc/self/ns/pid", O_RDONLY);
409 			if (fd < 0) {
410 				exit(1);
411 			}
412 
413 			handle = (struct file_handle *)buf;
414 			handle->handle_bytes = MAX_HANDLE_SZ;
415 			ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
416 			close(fd);
417 
418 			if (ret < 0) {
419 				exit(1);
420 			}
421 
422 			/* Send handle to grandparent */
423 			write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
424 			close(pipefd[1]);
425 			exit(0);
426 		}
427 
428 		/* Wait for grandchild */
429 		waitpid(child, NULL, 0);
430 		exit(0);
431 	}
432 
433 	/* Parent */
434 	close(pipefd[1]);
435 	ret = read(pipefd[0], buf, sizeof(buf));
436 	close(pipefd[0]);
437 
438 	waitpid(pid, &status, 0);
439 	ASSERT_TRUE(WIFEXITED(status));
440 	ASSERT_EQ(WEXITSTATUS(status), 0);
441 
442 	ASSERT_GT(ret, 0);
443 	handle = (struct file_handle *)buf;
444 
445 	/* Namespace should be inactive after all processes exit */
446 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
447 	ASSERT_LT(fd, 0);
448 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
449 }
450 
451 /*
452  * Test that an open file descriptor keeps a namespace active.
453  * Even after the creating process exits, the namespace should remain
454  * active as long as an fd is held open.
455  */
456 TEST(ns_fd_keeps_active)
457 {
458 	struct file_handle *handle;
459 	int mount_id;
460 	int ret;
461 	int nsfd;
462 	int pipe_child_ready[2];
463 	int pipe_parent_ready[2];
464 	pid_t pid;
465 	int status;
466 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
467 	char sync_byte;
468 	char proc_path[64];
469 
470 	ASSERT_EQ(pipe(pipe_child_ready), 0);
471 	ASSERT_EQ(pipe(pipe_parent_ready), 0);
472 
473 	pid = fork();
474 	ASSERT_GE(pid, 0);
475 
476 	if (pid == 0) {
477 		/* Child process */
478 		close(pipe_child_ready[0]);
479 		close(pipe_parent_ready[1]);
480 
481 		TH_LOG("Child: creating new network namespace");
482 
483 		/* Create new network namespace */
484 		ret = unshare(CLONE_NEWNET);
485 		if (ret < 0) {
486 			TH_LOG("Child: unshare(CLONE_NEWNET) failed: %s", strerror(errno));
487 			close(pipe_child_ready[1]);
488 			close(pipe_parent_ready[0]);
489 			exit(1);
490 		}
491 
492 		TH_LOG("Child: network namespace created successfully");
493 
494 		/* Get file handle for the namespace */
495 		nsfd = open("/proc/self/ns/net", O_RDONLY);
496 		if (nsfd < 0) {
497 			TH_LOG("Child: failed to open /proc/self/ns/net: %s", strerror(errno));
498 			close(pipe_child_ready[1]);
499 			close(pipe_parent_ready[0]);
500 			exit(1);
501 		}
502 
503 		TH_LOG("Child: opened namespace fd %d", nsfd);
504 
505 		handle = (struct file_handle *)buf;
506 		handle->handle_bytes = MAX_HANDLE_SZ;
507 		ret = name_to_handle_at(nsfd, "", handle, &mount_id, AT_EMPTY_PATH);
508 		close(nsfd);
509 
510 		if (ret < 0) {
511 			TH_LOG("Child: name_to_handle_at failed: %s", strerror(errno));
512 			close(pipe_child_ready[1]);
513 			close(pipe_parent_ready[0]);
514 			exit(1);
515 		}
516 
517 		TH_LOG("Child: got file handle (bytes=%u)", handle->handle_bytes);
518 
519 		/* Send file handle to parent */
520 		ret = write(pipe_child_ready[1], buf, sizeof(*handle) + handle->handle_bytes);
521 		TH_LOG("Child: sent %d bytes of file handle to parent", ret);
522 		close(pipe_child_ready[1]);
523 
524 		/* Wait for parent to open the fd */
525 		TH_LOG("Child: waiting for parent to open fd");
526 		ret = read(pipe_parent_ready[0], &sync_byte, 1);
527 		close(pipe_parent_ready[0]);
528 
529 		TH_LOG("Child: parent signaled (read %d bytes), exiting now", ret);
530 		/* Exit - namespace should stay active because parent holds fd */
531 		exit(0);
532 	}
533 
534 	/* Parent process */
535 	close(pipe_child_ready[1]);
536 	close(pipe_parent_ready[0]);
537 
538 	TH_LOG("Parent: reading file handle from child");
539 
540 	/* Read file handle from child */
541 	ret = read(pipe_child_ready[0], buf, sizeof(buf));
542 	close(pipe_child_ready[0]);
543 	ASSERT_GT(ret, 0);
544 	handle = (struct file_handle *)buf;
545 
546 	TH_LOG("Parent: received %d bytes, handle size=%u", ret, handle->handle_bytes);
547 
548 	/* Open the child's namespace while it's still alive */
549 	snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/net", pid);
550 	TH_LOG("Parent: opening child's namespace at %s", proc_path);
551 	nsfd = open(proc_path, O_RDONLY);
552 	if (nsfd < 0) {
553 		TH_LOG("Parent: failed to open %s: %s", proc_path, strerror(errno));
554 		close(pipe_parent_ready[1]);
555 		kill(pid, SIGKILL);
556 		waitpid(pid, NULL, 0);
557 		SKIP(return, "Failed to open child's namespace");
558 	}
559 
560 	TH_LOG("Parent: opened child's namespace, got fd %d", nsfd);
561 
562 	/* Signal child that we have the fd */
563 	sync_byte = 'G';
564 	write(pipe_parent_ready[1], &sync_byte, 1);
565 	close(pipe_parent_ready[1]);
566 	TH_LOG("Parent: signaled child that we have the fd");
567 
568 	/* Wait for child to exit */
569 	waitpid(pid, &status, 0);
570 	ASSERT_TRUE(WIFEXITED(status));
571 	ASSERT_EQ(WEXITSTATUS(status), 0);
572 
573 	TH_LOG("Child exited, parent holds fd %d to namespace", nsfd);
574 
575 	/*
576 	 * Namespace should still be ACTIVE because we hold an fd.
577 	 * We should be able to reopen it via file handle.
578 	 */
579 	TH_LOG("Attempting to reopen namespace via file handle (should succeed - fd held)");
580 	int fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
581 	ASSERT_GE(fd2, 0);
582 
583 	TH_LOG("Successfully reopened namespace via file handle, got fd %d", fd2);
584 
585 	/* Verify it's the same namespace */
586 	struct stat st1, st2;
587 	ASSERT_EQ(fstat(nsfd, &st1), 0);
588 	ASSERT_EQ(fstat(fd2, &st2), 0);
589 	TH_LOG("Namespace inodes: nsfd=%lu, fd2=%lu", st1.st_ino, st2.st_ino);
590 	ASSERT_EQ(st1.st_ino, st2.st_ino);
591 	close(fd2);
592 
593 	/* Now close the fd - namespace should become inactive */
594 	TH_LOG("Closing fd %d - namespace should become inactive", nsfd);
595 	close(nsfd);
596 
597 	/* Now reopening should fail - namespace is inactive */
598 	TH_LOG("Attempting to reopen namespace via file handle (should fail - inactive)");
599 	fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
600 	ASSERT_LT(fd2, 0);
601 	/* Should fail with ENOENT (inactive) or ESTALE (gone) */
602 	TH_LOG("Reopen failed as expected: %s (errno=%d)", strerror(errno), errno);
603 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
604 }
605 
606 TEST_HARNESS_MAIN
607