xref: /linux/tools/testing/selftests/namespaces/ns_active_ref_test.c (revision a8ce47a1ac4b1fad5038663c62223490c0666135)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <limits.h>
6 #include <sched.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <linux/nsfs.h>
11 #include <sys/mount.h>
12 #include <sys/stat.h>
13 #include <sys/types.h>
14 #include <sys/wait.h>
15 #include <unistd.h>
16 #include "../kselftest_harness.h"
17 #include "../filesystems/utils.h"
18 
19 #ifndef FD_NSFS_ROOT
20 #define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
21 #endif
22 
23 #ifndef FILEID_NSFS
24 #define FILEID_NSFS 0xf1
25 #endif
26 
27 /*
28  * Test that initial namespaces can be reopened via file handle.
29  * Initial namespaces should have active ref count of 1 from boot.
30  */
31 TEST(init_ns_always_active)
32 {
33 	struct file_handle *handle;
34 	int mount_id;
35 	int ret;
36 	int fd1, fd2;
37 	struct stat st1, st2;
38 
39 	handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
40 	ASSERT_NE(handle, NULL);
41 
42 	/* Open initial network namespace */
43 	fd1 = open("/proc/1/ns/net", O_RDONLY);
44 	ASSERT_GE(fd1, 0);
45 
46 	/* Get file handle for initial namespace */
47 	handle->handle_bytes = MAX_HANDLE_SZ;
48 	ret = name_to_handle_at(fd1, "", handle, &mount_id, AT_EMPTY_PATH);
49 	if (ret < 0 && errno == EOPNOTSUPP) {
50 		SKIP(free(handle); close(fd1);
51 		     return, "nsfs doesn't support file handles");
52 	}
53 	ASSERT_EQ(ret, 0);
54 
55 	/* Close the namespace fd */
56 	close(fd1);
57 
58 	/* Try to reopen via file handle - should succeed since init ns is always active */
59 	fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
60 	if (fd2 < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
61 		SKIP(free(handle);
62 		     return, "open_by_handle_at with FD_NSFS_ROOT not supported");
63 	}
64 	ASSERT_GE(fd2, 0);
65 
66 	/* Verify we opened the same namespace */
67 	fd1 = open("/proc/1/ns/net", O_RDONLY);
68 	ASSERT_GE(fd1, 0);
69 	ASSERT_EQ(fstat(fd1, &st1), 0);
70 	ASSERT_EQ(fstat(fd2, &st2), 0);
71 	ASSERT_EQ(st1.st_ino, st2.st_ino);
72 
73 	close(fd1);
74 	close(fd2);
75 	free(handle);
76 }
77 
78 /*
79  * Test namespace lifecycle: create a namespace in a child process,
80  * get a file handle while it's active, then try to reopen after
81  * the process exits (namespace becomes inactive).
82  */
83 TEST(ns_inactive_after_exit)
84 {
85 	struct file_handle *handle;
86 	int mount_id;
87 	int ret;
88 	int fd;
89 	int pipefd[2];
90 	pid_t pid;
91 	int status;
92 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
93 
94 	/* Create pipe for passing file handle from child */
95 	ASSERT_EQ(pipe(pipefd), 0);
96 
97 	pid = fork();
98 	ASSERT_GE(pid, 0);
99 
100 	if (pid == 0) {
101 		/* Child process */
102 		close(pipefd[0]);
103 
104 		/* Create new network namespace */
105 		ret = unshare(CLONE_NEWNET);
106 		if (ret < 0) {
107 			close(pipefd[1]);
108 			exit(1);
109 		}
110 
111 		/* Open our new namespace */
112 		fd = open("/proc/self/ns/net", O_RDONLY);
113 		if (fd < 0) {
114 			close(pipefd[1]);
115 			exit(1);
116 		}
117 
118 		/* Get file handle for the namespace */
119 		handle = (struct file_handle *)buf;
120 		handle->handle_bytes = MAX_HANDLE_SZ;
121 		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
122 		close(fd);
123 
124 		if (ret < 0) {
125 			close(pipefd[1]);
126 			exit(1);
127 		}
128 
129 		/* Send handle to parent */
130 		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
131 		close(pipefd[1]);
132 
133 		/* Exit - namespace should become inactive */
134 		exit(0);
135 	}
136 
137 	/* Parent process */
138 	close(pipefd[1]);
139 
140 	/* Read file handle from child */
141 	ret = read(pipefd[0], buf, sizeof(buf));
142 	close(pipefd[0]);
143 
144 	/* Wait for child to exit */
145 	waitpid(pid, &status, 0);
146 	ASSERT_TRUE(WIFEXITED(status));
147 	ASSERT_EQ(WEXITSTATUS(status), 0);
148 
149 	ASSERT_GT(ret, 0);
150 	handle = (struct file_handle *)buf;
151 
152 	/* Try to reopen namespace - should fail with ENOENT since it's inactive */
153 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
154 	ASSERT_LT(fd, 0);
155 	/* Should fail with ENOENT (namespace inactive) or ESTALE */
156 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
157 }
158 
159 /*
160  * Test that a namespace remains active while a process is using it,
161  * even after the creating process exits.
162  */
163 TEST(ns_active_with_multiple_processes)
164 {
165 	struct file_handle *handle;
166 	int mount_id;
167 	int ret;
168 	int fd;
169 	int pipefd[2];
170 	int syncpipe[2];
171 	pid_t pid1, pid2;
172 	int status;
173 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
174 	char sync_byte;
175 
176 	/* Create pipes for communication */
177 	ASSERT_EQ(pipe(pipefd), 0);
178 	ASSERT_EQ(pipe(syncpipe), 0);
179 
180 	pid1 = fork();
181 	ASSERT_GE(pid1, 0);
182 
183 	if (pid1 == 0) {
184 		/* First child - creates namespace */
185 		close(pipefd[0]);
186 		close(syncpipe[1]);
187 
188 		/* Create new network namespace */
189 		ret = unshare(CLONE_NEWNET);
190 		if (ret < 0) {
191 			close(pipefd[1]);
192 			close(syncpipe[0]);
193 			exit(1);
194 		}
195 
196 		/* Open and get handle */
197 		fd = open("/proc/self/ns/net", O_RDONLY);
198 		if (fd < 0) {
199 			close(pipefd[1]);
200 			close(syncpipe[0]);
201 			exit(1);
202 		}
203 
204 		handle = (struct file_handle *)buf;
205 		handle->handle_bytes = MAX_HANDLE_SZ;
206 		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
207 		close(fd);
208 
209 		if (ret < 0) {
210 			close(pipefd[1]);
211 			close(syncpipe[0]);
212 			exit(1);
213 		}
214 
215 		/* Send handle to parent */
216 		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
217 		close(pipefd[1]);
218 
219 		/* Wait for signal before exiting */
220 		read(syncpipe[0], &sync_byte, 1);
221 		close(syncpipe[0]);
222 		exit(0);
223 	}
224 
225 	/* Parent reads handle */
226 	close(pipefd[1]);
227 	ret = read(pipefd[0], buf, sizeof(buf));
228 	close(pipefd[0]);
229 	ASSERT_GT(ret, 0);
230 
231 	handle = (struct file_handle *)buf;
232 
233 	/* Create second child that will keep namespace active */
234 	pid2 = fork();
235 	ASSERT_GE(pid2, 0);
236 
237 	if (pid2 == 0) {
238 		/* Second child - reopens the namespace */
239 		close(syncpipe[0]);
240 		close(syncpipe[1]);
241 
242 		/* Open the namespace via handle */
243 		fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
244 		if (fd < 0) {
245 			exit(1);
246 		}
247 
248 		/* Join the namespace */
249 		ret = setns(fd, CLONE_NEWNET);
250 		close(fd);
251 		if (ret < 0) {
252 			exit(1);
253 		}
254 
255 		/* Sleep to keep namespace active */
256 		sleep(1);
257 		exit(0);
258 	}
259 
260 	/* Let second child enter the namespace */
261 	usleep(100000); /* 100ms */
262 
263 	/* Signal first child to exit */
264 	close(syncpipe[0]);
265 	sync_byte = 'X';
266 	write(syncpipe[1], &sync_byte, 1);
267 	close(syncpipe[1]);
268 
269 	/* Wait for first child */
270 	waitpid(pid1, &status, 0);
271 	ASSERT_TRUE(WIFEXITED(status));
272 
273 	/* Namespace should still be active because second child is using it */
274 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
275 	ASSERT_GE(fd, 0);
276 	close(fd);
277 
278 	/* Wait for second child */
279 	waitpid(pid2, &status, 0);
280 	ASSERT_TRUE(WIFEXITED(status));
281 }
282 
283 /*
284  * Test user namespace active ref tracking via credential lifecycle
285  */
286 TEST(userns_active_ref_lifecycle)
287 {
288 	struct file_handle *handle;
289 	int mount_id;
290 	int ret;
291 	int fd;
292 	int pipefd[2];
293 	pid_t pid;
294 	int status;
295 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
296 
297 	ASSERT_EQ(pipe(pipefd), 0);
298 
299 	pid = fork();
300 	ASSERT_GE(pid, 0);
301 
302 	if (pid == 0) {
303 		/* Child process */
304 		close(pipefd[0]);
305 
306 		/* Create new user namespace */
307 		ret = unshare(CLONE_NEWUSER);
308 		if (ret < 0) {
309 			close(pipefd[1]);
310 			exit(1);
311 		}
312 
313 		/* Set up uid/gid mappings */
314 		int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
315 		int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
316 		int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
317 
318 		if (uid_map_fd >= 0 && gid_map_fd >= 0 && setgroups_fd >= 0) {
319 			write(setgroups_fd, "deny", 4);
320 			close(setgroups_fd);
321 
322 			char mapping[64];
323 			snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
324 			write(uid_map_fd, mapping, strlen(mapping));
325 			close(uid_map_fd);
326 
327 			snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
328 			write(gid_map_fd, mapping, strlen(mapping));
329 			close(gid_map_fd);
330 		}
331 
332 		/* Get file handle */
333 		fd = open("/proc/self/ns/user", O_RDONLY);
334 		if (fd < 0) {
335 			close(pipefd[1]);
336 			exit(1);
337 		}
338 
339 		handle = (struct file_handle *)buf;
340 		handle->handle_bytes = MAX_HANDLE_SZ;
341 		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
342 		close(fd);
343 
344 		if (ret < 0) {
345 			close(pipefd[1]);
346 			exit(1);
347 		}
348 
349 		/* Send handle to parent */
350 		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
351 		close(pipefd[1]);
352 		exit(0);
353 	}
354 
355 	/* Parent */
356 	close(pipefd[1]);
357 	ret = read(pipefd[0], buf, sizeof(buf));
358 	close(pipefd[0]);
359 
360 	waitpid(pid, &status, 0);
361 	ASSERT_TRUE(WIFEXITED(status));
362 	ASSERT_EQ(WEXITSTATUS(status), 0);
363 
364 	ASSERT_GT(ret, 0);
365 	handle = (struct file_handle *)buf;
366 
367 	/* Namespace should be inactive after all tasks exit */
368 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
369 	ASSERT_LT(fd, 0);
370 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
371 }
372 
373 /*
374  * Test PID namespace active ref tracking
375  */
376 TEST(pidns_active_ref_lifecycle)
377 {
378 	struct file_handle *handle;
379 	int mount_id;
380 	int ret;
381 	int fd;
382 	int pipefd[2];
383 	pid_t pid;
384 	int status;
385 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
386 
387 	ASSERT_EQ(pipe(pipefd), 0);
388 
389 	pid = fork();
390 	ASSERT_GE(pid, 0);
391 
392 	if (pid == 0) {
393 		/* Child process */
394 		close(pipefd[0]);
395 
396 		/* Create new PID namespace */
397 		ret = unshare(CLONE_NEWPID);
398 		if (ret < 0) {
399 			close(pipefd[1]);
400 			exit(1);
401 		}
402 
403 		/* Fork to actually enter the PID namespace */
404 		pid_t child = fork();
405 		if (child < 0) {
406 			close(pipefd[1]);
407 			exit(1);
408 		}
409 
410 		if (child == 0) {
411 			/* Grandchild - in new PID namespace */
412 			fd = open("/proc/self/ns/pid", O_RDONLY);
413 			if (fd < 0) {
414 				exit(1);
415 			}
416 
417 			handle = (struct file_handle *)buf;
418 			handle->handle_bytes = MAX_HANDLE_SZ;
419 			ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
420 			close(fd);
421 
422 			if (ret < 0) {
423 				exit(1);
424 			}
425 
426 			/* Send handle to grandparent */
427 			write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
428 			close(pipefd[1]);
429 			exit(0);
430 		}
431 
432 		/* Wait for grandchild */
433 		waitpid(child, NULL, 0);
434 		exit(0);
435 	}
436 
437 	/* Parent */
438 	close(pipefd[1]);
439 	ret = read(pipefd[0], buf, sizeof(buf));
440 	close(pipefd[0]);
441 
442 	waitpid(pid, &status, 0);
443 	ASSERT_TRUE(WIFEXITED(status));
444 	ASSERT_EQ(WEXITSTATUS(status), 0);
445 
446 	ASSERT_GT(ret, 0);
447 	handle = (struct file_handle *)buf;
448 
449 	/* Namespace should be inactive after all processes exit */
450 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
451 	ASSERT_LT(fd, 0);
452 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
453 }
454 
455 /*
456  * Test that an open file descriptor keeps a namespace active.
457  * Even after the creating process exits, the namespace should remain
458  * active as long as an fd is held open.
459  */
460 TEST(ns_fd_keeps_active)
461 {
462 	struct file_handle *handle;
463 	int mount_id;
464 	int ret;
465 	int nsfd;
466 	int pipe_child_ready[2];
467 	int pipe_parent_ready[2];
468 	pid_t pid;
469 	int status;
470 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
471 	char sync_byte;
472 	char proc_path[64];
473 
474 	ASSERT_EQ(pipe(pipe_child_ready), 0);
475 	ASSERT_EQ(pipe(pipe_parent_ready), 0);
476 
477 	pid = fork();
478 	ASSERT_GE(pid, 0);
479 
480 	if (pid == 0) {
481 		/* Child process */
482 		close(pipe_child_ready[0]);
483 		close(pipe_parent_ready[1]);
484 
485 		TH_LOG("Child: creating new network namespace");
486 
487 		/* Create new network namespace */
488 		ret = unshare(CLONE_NEWNET);
489 		if (ret < 0) {
490 			TH_LOG("Child: unshare(CLONE_NEWNET) failed: %s", strerror(errno));
491 			close(pipe_child_ready[1]);
492 			close(pipe_parent_ready[0]);
493 			exit(1);
494 		}
495 
496 		TH_LOG("Child: network namespace created successfully");
497 
498 		/* Get file handle for the namespace */
499 		nsfd = open("/proc/self/ns/net", O_RDONLY);
500 		if (nsfd < 0) {
501 			TH_LOG("Child: failed to open /proc/self/ns/net: %s", strerror(errno));
502 			close(pipe_child_ready[1]);
503 			close(pipe_parent_ready[0]);
504 			exit(1);
505 		}
506 
507 		TH_LOG("Child: opened namespace fd %d", nsfd);
508 
509 		handle = (struct file_handle *)buf;
510 		handle->handle_bytes = MAX_HANDLE_SZ;
511 		ret = name_to_handle_at(nsfd, "", handle, &mount_id, AT_EMPTY_PATH);
512 		close(nsfd);
513 
514 		if (ret < 0) {
515 			TH_LOG("Child: name_to_handle_at failed: %s", strerror(errno));
516 			close(pipe_child_ready[1]);
517 			close(pipe_parent_ready[0]);
518 			exit(1);
519 		}
520 
521 		TH_LOG("Child: got file handle (bytes=%u)", handle->handle_bytes);
522 
523 		/* Send file handle to parent */
524 		ret = write(pipe_child_ready[1], buf, sizeof(*handle) + handle->handle_bytes);
525 		TH_LOG("Child: sent %d bytes of file handle to parent", ret);
526 		close(pipe_child_ready[1]);
527 
528 		/* Wait for parent to open the fd */
529 		TH_LOG("Child: waiting for parent to open fd");
530 		ret = read(pipe_parent_ready[0], &sync_byte, 1);
531 		close(pipe_parent_ready[0]);
532 
533 		TH_LOG("Child: parent signaled (read %d bytes), exiting now", ret);
534 		/* Exit - namespace should stay active because parent holds fd */
535 		exit(0);
536 	}
537 
538 	/* Parent process */
539 	close(pipe_child_ready[1]);
540 	close(pipe_parent_ready[0]);
541 
542 	TH_LOG("Parent: reading file handle from child");
543 
544 	/* Read file handle from child */
545 	ret = read(pipe_child_ready[0], buf, sizeof(buf));
546 	close(pipe_child_ready[0]);
547 	ASSERT_GT(ret, 0);
548 	handle = (struct file_handle *)buf;
549 
550 	TH_LOG("Parent: received %d bytes, handle size=%u", ret, handle->handle_bytes);
551 
552 	/* Open the child's namespace while it's still alive */
553 	snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/net", pid);
554 	TH_LOG("Parent: opening child's namespace at %s", proc_path);
555 	nsfd = open(proc_path, O_RDONLY);
556 	if (nsfd < 0) {
557 		TH_LOG("Parent: failed to open %s: %s", proc_path, strerror(errno));
558 		close(pipe_parent_ready[1]);
559 		kill(pid, SIGKILL);
560 		waitpid(pid, NULL, 0);
561 		SKIP(return, "Failed to open child's namespace");
562 	}
563 
564 	TH_LOG("Parent: opened child's namespace, got fd %d", nsfd);
565 
566 	/* Signal child that we have the fd */
567 	sync_byte = 'G';
568 	write(pipe_parent_ready[1], &sync_byte, 1);
569 	close(pipe_parent_ready[1]);
570 	TH_LOG("Parent: signaled child that we have the fd");
571 
572 	/* Wait for child to exit */
573 	waitpid(pid, &status, 0);
574 	ASSERT_TRUE(WIFEXITED(status));
575 	ASSERT_EQ(WEXITSTATUS(status), 0);
576 
577 	TH_LOG("Child exited, parent holds fd %d to namespace", nsfd);
578 
579 	/*
580 	 * Namespace should still be ACTIVE because we hold an fd.
581 	 * We should be able to reopen it via file handle.
582 	 */
583 	TH_LOG("Attempting to reopen namespace via file handle (should succeed - fd held)");
584 	int fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
585 	ASSERT_GE(fd2, 0);
586 
587 	TH_LOG("Successfully reopened namespace via file handle, got fd %d", fd2);
588 
589 	/* Verify it's the same namespace */
590 	struct stat st1, st2;
591 	ASSERT_EQ(fstat(nsfd, &st1), 0);
592 	ASSERT_EQ(fstat(fd2, &st2), 0);
593 	TH_LOG("Namespace inodes: nsfd=%lu, fd2=%lu", st1.st_ino, st2.st_ino);
594 	ASSERT_EQ(st1.st_ino, st2.st_ino);
595 	close(fd2);
596 
597 	/* Now close the fd - namespace should become inactive */
598 	TH_LOG("Closing fd %d - namespace should become inactive", nsfd);
599 	close(nsfd);
600 
601 	/* Now reopening should fail - namespace is inactive */
602 	TH_LOG("Attempting to reopen namespace via file handle (should fail - inactive)");
603 	fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
604 	ASSERT_LT(fd2, 0);
605 	/* Should fail with ENOENT (inactive) or ESTALE (gone) */
606 	TH_LOG("Reopen failed as expected: %s (errno=%d)", strerror(errno), errno);
607 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
608 }
609 
610 /*
611  * Test hierarchical active reference propagation.
612  * When a child namespace is active, its owning user namespace should also
613  * be active automatically due to hierarchical active reference propagation.
614  * This ensures parents are always reachable when children are active.
615  */
616 TEST(ns_parent_always_reachable)
617 {
618 	struct file_handle *parent_handle, *child_handle;
619 	int ret;
620 	int child_nsfd;
621 	int pipefd[2];
622 	pid_t pid;
623 	int status;
624 	__u64 parent_id, child_id;
625 	char parent_buf[sizeof(*parent_handle) + MAX_HANDLE_SZ];
626 	char child_buf[sizeof(*child_handle) + MAX_HANDLE_SZ];
627 
628 	ASSERT_EQ(pipe(pipefd), 0);
629 
630 	pid = fork();
631 	ASSERT_GE(pid, 0);
632 
633 	if (pid == 0) {
634 		/* Child process */
635 		close(pipefd[0]);
636 
637 		TH_LOG("Child: creating parent user namespace and setting up mappings");
638 
639 		/* Create parent user namespace with mappings */
640 		ret = setup_userns();
641 		if (ret < 0) {
642 			TH_LOG("Child: setup_userns() for parent failed: %s", strerror(errno));
643 			close(pipefd[1]);
644 			exit(1);
645 		}
646 
647 		TH_LOG("Child: parent user namespace created, now uid=%d gid=%d", getuid(), getgid());
648 
649 		/* Get namespace ID for parent user namespace */
650 		int parent_fd = open("/proc/self/ns/user", O_RDONLY);
651 		if (parent_fd < 0) {
652 			TH_LOG("Child: failed to open parent /proc/self/ns/user: %s", strerror(errno));
653 			close(pipefd[1]);
654 			exit(1);
655 		}
656 
657 		TH_LOG("Child: opened parent userns fd %d", parent_fd);
658 
659 		if (ioctl(parent_fd, NS_GET_ID, &parent_id) < 0) {
660 			TH_LOG("Child: NS_GET_ID for parent failed: %s", strerror(errno));
661 			close(parent_fd);
662 			close(pipefd[1]);
663 			exit(1);
664 		}
665 		close(parent_fd);
666 
667 		TH_LOG("Child: got parent namespace ID %llu", (unsigned long long)parent_id);
668 
669 		/* Create child user namespace within parent */
670 		TH_LOG("Child: creating nested child user namespace");
671 		ret = setup_userns();
672 		if (ret < 0) {
673 			TH_LOG("Child: setup_userns() for child failed: %s", strerror(errno));
674 			close(pipefd[1]);
675 			exit(1);
676 		}
677 
678 		TH_LOG("Child: nested child user namespace created, uid=%d gid=%d", getuid(), getgid());
679 
680 		/* Get namespace ID for child user namespace */
681 		int child_fd = open("/proc/self/ns/user", O_RDONLY);
682 		if (child_fd < 0) {
683 			TH_LOG("Child: failed to open child /proc/self/ns/user: %s", strerror(errno));
684 			close(pipefd[1]);
685 			exit(1);
686 		}
687 
688 		TH_LOG("Child: opened child userns fd %d", child_fd);
689 
690 		if (ioctl(child_fd, NS_GET_ID, &child_id) < 0) {
691 			TH_LOG("Child: NS_GET_ID for child failed: %s", strerror(errno));
692 			close(child_fd);
693 			close(pipefd[1]);
694 			exit(1);
695 		}
696 		close(child_fd);
697 
698 		TH_LOG("Child: got child namespace ID %llu", (unsigned long long)child_id);
699 
700 		/* Send both namespace IDs to parent */
701 		TH_LOG("Child: sending both namespace IDs to parent");
702 		write(pipefd[1], &parent_id, sizeof(parent_id));
703 		write(pipefd[1], &child_id, sizeof(child_id));
704 		close(pipefd[1]);
705 
706 		TH_LOG("Child: exiting - parent userns should become inactive");
707 		/* Exit - parent user namespace should become inactive */
708 		exit(0);
709 	}
710 
711 	/* Parent process */
712 	close(pipefd[1]);
713 
714 	TH_LOG("Parent: reading both namespace IDs from child");
715 
716 	/* Read both namespace IDs - fixed size, no parsing needed */
717 	ret = read(pipefd[0], &parent_id, sizeof(parent_id));
718 	if (ret != sizeof(parent_id)) {
719 		close(pipefd[0]);
720 		waitpid(pid, NULL, 0);
721 		SKIP(return, "Failed to read parent namespace ID from child");
722 	}
723 
724 	ret = read(pipefd[0], &child_id, sizeof(child_id));
725 	close(pipefd[0]);
726 	if (ret != sizeof(child_id)) {
727 		waitpid(pid, NULL, 0);
728 		SKIP(return, "Failed to read child namespace ID from child");
729 	}
730 
731 	TH_LOG("Parent: received parent_id=%llu, child_id=%llu",
732 	       (unsigned long long)parent_id, (unsigned long long)child_id);
733 
734 	/* Construct file handles from namespace IDs */
735 	parent_handle = (struct file_handle *)parent_buf;
736 	parent_handle->handle_bytes = sizeof(struct nsfs_file_handle);
737 	parent_handle->handle_type = FILEID_NSFS;
738 	struct nsfs_file_handle *parent_fh = (struct nsfs_file_handle *)parent_handle->f_handle;
739 	parent_fh->ns_id = parent_id;
740 	parent_fh->ns_type = 0;
741 	parent_fh->ns_inum = 0;
742 
743 	child_handle = (struct file_handle *)child_buf;
744 	child_handle->handle_bytes = sizeof(struct nsfs_file_handle);
745 	child_handle->handle_type = FILEID_NSFS;
746 	struct nsfs_file_handle *child_fh = (struct nsfs_file_handle *)child_handle->f_handle;
747 	child_fh->ns_id = child_id;
748 	child_fh->ns_type = 0;
749 	child_fh->ns_inum = 0;
750 
751 	TH_LOG("Parent: opening child namespace BEFORE child exits");
752 
753 	/* Open child namespace while child is still alive to keep it active */
754 	child_nsfd = open_by_handle_at(FD_NSFS_ROOT, child_handle, O_RDONLY);
755 	if (child_nsfd < 0) {
756 		TH_LOG("Failed to open child namespace: %s (errno=%d)", strerror(errno), errno);
757 		waitpid(pid, NULL, 0);
758 		SKIP(return, "Failed to open child namespace");
759 	}
760 
761 	TH_LOG("Opened child namespace fd %d", child_nsfd);
762 
763 	/* Now wait for child to exit */
764 	TH_LOG("Parent: waiting for child to exit");
765 	waitpid(pid, &status, 0);
766 	ASSERT_TRUE(WIFEXITED(status));
767 	ASSERT_EQ(WEXITSTATUS(status), 0);
768 
769 	TH_LOG("Child process exited, parent holds fd to child namespace");
770 
771 	/*
772 	 * With hierarchical active reference propagation:
773 	 * Since the child namespace is active (parent process holds fd),
774 	 * the parent user namespace should ALSO be active automatically.
775 	 * This is because when we took an active reference on the child,
776 	 * it propagated up to the owning user namespace.
777 	 */
778 	TH_LOG("Attempting to reopen parent namespace (should SUCCEED - hierarchical propagation)");
779 	int parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
780 	ASSERT_GE(parent_fd, 0);
781 
782 	TH_LOG("SUCCESS: Parent namespace is active (fd=%d) due to active child", parent_fd);
783 
784 	/* Verify we can also get parent via NS_GET_USERNS */
785 	TH_LOG("Verifying NS_GET_USERNS also works");
786 	int parent_fd2 = ioctl(child_nsfd, NS_GET_USERNS);
787 	if (parent_fd2 < 0) {
788 		close(parent_fd);
789 		close(child_nsfd);
790 		TH_LOG("NS_GET_USERNS failed: %s (errno=%d)", strerror(errno), errno);
791 		SKIP(return, "NS_GET_USERNS not supported or failed");
792 	}
793 
794 	TH_LOG("NS_GET_USERNS succeeded, got parent fd %d", parent_fd2);
795 
796 	/* Verify both methods give us the same namespace */
797 	struct stat st1, st2;
798 	ASSERT_EQ(fstat(parent_fd, &st1), 0);
799 	ASSERT_EQ(fstat(parent_fd2, &st2), 0);
800 	TH_LOG("Parent namespace inodes: parent_fd=%lu, parent_fd2=%lu", st1.st_ino, st2.st_ino);
801 	ASSERT_EQ(st1.st_ino, st2.st_ino);
802 
803 	/*
804 	 * Close child fd - parent should remain active because we still
805 	 * hold direct references to it (parent_fd and parent_fd2).
806 	 */
807 	TH_LOG("Closing child fd - parent should remain active (direct refs held)");
808 	close(child_nsfd);
809 
810 	/* Parent should still be openable */
811 	TH_LOG("Verifying parent still active via file handle");
812 	int parent_fd3 = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
813 	ASSERT_GE(parent_fd3, 0);
814 	close(parent_fd3);
815 
816 	TH_LOG("Closing all fds to parent namespace");
817 	close(parent_fd);
818 	close(parent_fd2);
819 
820 	/* Both should now be inactive */
821 	TH_LOG("Attempting to reopen parent (should fail - inactive, no refs)");
822 	parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
823 	ASSERT_LT(parent_fd, 0);
824 	TH_LOG("Parent inactive as expected: %s (errno=%d)", strerror(errno), errno);
825 	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
826 }
827 
828 /*
829  * Test that bind mounts keep namespaces in the tree even when inactive
830  */
831 TEST(ns_bind_mount_keeps_in_tree)
832 {
833 	struct file_handle *handle;
834 	int mount_id;
835 	int ret;
836 	int fd;
837 	int pipefd[2];
838 	pid_t pid;
839 	int status;
840 	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
841 	char tmpfile[] = "/tmp/ns-test-XXXXXX";
842 	int tmpfd;
843 
844 	/* Create temporary file for bind mount */
845 	tmpfd = mkstemp(tmpfile);
846 	if (tmpfd < 0) {
847 		SKIP(return, "Cannot create temporary file");
848 	}
849 	close(tmpfd);
850 
851 	ASSERT_EQ(pipe(pipefd), 0);
852 
853 	pid = fork();
854 	ASSERT_GE(pid, 0);
855 
856 	if (pid == 0) {
857 		/* Child process */
858 		close(pipefd[0]);
859 
860 		/* Unshare mount namespace and make mounts private to avoid propagation */
861 		ret = unshare(CLONE_NEWNS);
862 		if (ret < 0) {
863 			close(pipefd[1]);
864 			unlink(tmpfile);
865 			exit(1);
866 		}
867 		ret = mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL);
868 		if (ret < 0) {
869 			close(pipefd[1]);
870 			unlink(tmpfile);
871 			exit(1);
872 		}
873 
874 		/* Create new network namespace */
875 		ret = unshare(CLONE_NEWNET);
876 		if (ret < 0) {
877 			close(pipefd[1]);
878 			unlink(tmpfile);
879 			exit(1);
880 		}
881 
882 		/* Bind mount the namespace */
883 		ret = mount("/proc/self/ns/net", tmpfile, NULL, MS_BIND, NULL);
884 		if (ret < 0) {
885 			close(pipefd[1]);
886 			unlink(tmpfile);
887 			exit(1);
888 		}
889 
890 		/* Get file handle */
891 		fd = open("/proc/self/ns/net", O_RDONLY);
892 		if (fd < 0) {
893 			umount(tmpfile);
894 			close(pipefd[1]);
895 			unlink(tmpfile);
896 			exit(1);
897 		}
898 
899 		handle = (struct file_handle *)buf;
900 		handle->handle_bytes = MAX_HANDLE_SZ;
901 		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
902 		close(fd);
903 
904 		if (ret < 0) {
905 			umount(tmpfile);
906 			close(pipefd[1]);
907 			unlink(tmpfile);
908 			exit(1);
909 		}
910 
911 		/* Send handle to parent */
912 		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
913 		close(pipefd[1]);
914 		exit(0);
915 	}
916 
917 	/* Parent */
918 	close(pipefd[1]);
919 	ret = read(pipefd[0], buf, sizeof(buf));
920 	close(pipefd[0]);
921 
922 	waitpid(pid, &status, 0);
923 	ASSERT_TRUE(WIFEXITED(status));
924 	ASSERT_EQ(WEXITSTATUS(status), 0);
925 
926 	ASSERT_GT(ret, 0);
927 	handle = (struct file_handle *)buf;
928 
929 	/*
930 	 * Namespace should be inactive but still in tree due to bind mount.
931 	 * Reopening should fail with ENOENT (inactive) not ESTALE (not in tree).
932 	 */
933 	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
934 	ASSERT_LT(fd, 0);
935 	/* Should be ENOENT (inactive) since bind mount keeps it in tree */
936 	if (errno != ENOENT && errno != ESTALE) {
937 		TH_LOG("Unexpected error: %d", errno);
938 	}
939 
940 	/* Cleanup */
941 	umount(tmpfile);
942 	unlink(tmpfile);
943 }
944 
945 /*
946  * Test multi-level hierarchy (3+ levels deep).
947  * Grandparent → Parent → Child
948  * When child is active, both parent AND grandparent should be active.
949  */
950 TEST(ns_multilevel_hierarchy)
951 {
952 	struct file_handle *gp_handle, *p_handle, *c_handle;
953 	int ret, pipefd[2];
954 	pid_t pid;
955 	int status;
956 	__u64 gp_id, p_id, c_id;
957 	char gp_buf[sizeof(*gp_handle) + MAX_HANDLE_SZ];
958 	char p_buf[sizeof(*p_handle) + MAX_HANDLE_SZ];
959 	char c_buf[sizeof(*c_handle) + MAX_HANDLE_SZ];
960 
961 	ASSERT_EQ(pipe(pipefd), 0);
962 	pid = fork();
963 	ASSERT_GE(pid, 0);
964 
965 	if (pid == 0) {
966 		close(pipefd[0]);
967 
968 		/* Create grandparent user namespace */
969 		if (setup_userns() < 0) {
970 			close(pipefd[1]);
971 			exit(1);
972 		}
973 
974 		int gp_fd = open("/proc/self/ns/user", O_RDONLY);
975 		if (gp_fd < 0) {
976 			close(pipefd[1]);
977 			exit(1);
978 		}
979 		if (ioctl(gp_fd, NS_GET_ID, &gp_id) < 0) {
980 			close(gp_fd);
981 			close(pipefd[1]);
982 			exit(1);
983 		}
984 		close(gp_fd);
985 
986 		/* Create parent user namespace */
987 		if (setup_userns() < 0) {
988 			close(pipefd[1]);
989 			exit(1);
990 		}
991 
992 		int p_fd = open("/proc/self/ns/user", O_RDONLY);
993 		if (p_fd < 0) {
994 			close(pipefd[1]);
995 			exit(1);
996 		}
997 		if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
998 			close(p_fd);
999 			close(pipefd[1]);
1000 			exit(1);
1001 		}
1002 		close(p_fd);
1003 
1004 		/* Create child user namespace */
1005 		if (setup_userns() < 0) {
1006 			close(pipefd[1]);
1007 			exit(1);
1008 		}
1009 
1010 		int c_fd = open("/proc/self/ns/user", O_RDONLY);
1011 		if (c_fd < 0) {
1012 			close(pipefd[1]);
1013 			exit(1);
1014 		}
1015 		if (ioctl(c_fd, NS_GET_ID, &c_id) < 0) {
1016 			close(c_fd);
1017 			close(pipefd[1]);
1018 			exit(1);
1019 		}
1020 		close(c_fd);
1021 
1022 		/* Send all three namespace IDs */
1023 		write(pipefd[1], &gp_id, sizeof(gp_id));
1024 		write(pipefd[1], &p_id, sizeof(p_id));
1025 		write(pipefd[1], &c_id, sizeof(c_id));
1026 		close(pipefd[1]);
1027 		exit(0);
1028 	}
1029 
1030 	close(pipefd[1]);
1031 
1032 	/* Read all three namespace IDs - fixed size, no parsing needed */
1033 	ret = read(pipefd[0], &gp_id, sizeof(gp_id));
1034 	if (ret != sizeof(gp_id)) {
1035 		close(pipefd[0]);
1036 		waitpid(pid, NULL, 0);
1037 		SKIP(return, "Failed to read grandparent namespace ID from child");
1038 	}
1039 
1040 	ret = read(pipefd[0], &p_id, sizeof(p_id));
1041 	if (ret != sizeof(p_id)) {
1042 		close(pipefd[0]);
1043 		waitpid(pid, NULL, 0);
1044 		SKIP(return, "Failed to read parent namespace ID from child");
1045 	}
1046 
1047 	ret = read(pipefd[0], &c_id, sizeof(c_id));
1048 	close(pipefd[0]);
1049 	if (ret != sizeof(c_id)) {
1050 		waitpid(pid, NULL, 0);
1051 		SKIP(return, "Failed to read child namespace ID from child");
1052 	}
1053 
1054 	/* Construct file handles from namespace IDs */
1055 	gp_handle = (struct file_handle *)gp_buf;
1056 	gp_handle->handle_bytes = sizeof(struct nsfs_file_handle);
1057 	gp_handle->handle_type = FILEID_NSFS;
1058 	struct nsfs_file_handle *gp_fh = (struct nsfs_file_handle *)gp_handle->f_handle;
1059 	gp_fh->ns_id = gp_id;
1060 	gp_fh->ns_type = 0;
1061 	gp_fh->ns_inum = 0;
1062 
1063 	p_handle = (struct file_handle *)p_buf;
1064 	p_handle->handle_bytes = sizeof(struct nsfs_file_handle);
1065 	p_handle->handle_type = FILEID_NSFS;
1066 	struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)p_handle->f_handle;
1067 	p_fh->ns_id = p_id;
1068 	p_fh->ns_type = 0;
1069 	p_fh->ns_inum = 0;
1070 
1071 	c_handle = (struct file_handle *)c_buf;
1072 	c_handle->handle_bytes = sizeof(struct nsfs_file_handle);
1073 	c_handle->handle_type = FILEID_NSFS;
1074 	struct nsfs_file_handle *c_fh = (struct nsfs_file_handle *)c_handle->f_handle;
1075 	c_fh->ns_id = c_id;
1076 	c_fh->ns_type = 0;
1077 	c_fh->ns_inum = 0;
1078 
1079 	/* Open child before process exits */
1080 	int c_fd = open_by_handle_at(FD_NSFS_ROOT, c_handle, O_RDONLY);
1081 	if (c_fd < 0) {
1082 		waitpid(pid, NULL, 0);
1083 		SKIP(return, "Failed to open child namespace");
1084 	}
1085 
1086 	waitpid(pid, &status, 0);
1087 	ASSERT_TRUE(WIFEXITED(status));
1088 	ASSERT_EQ(WEXITSTATUS(status), 0);
1089 
1090 	/*
1091 	 * With 3-level hierarchy and child active:
1092 	 * - Child is active (we hold fd)
1093 	 * - Parent should be active (propagated from child)
1094 	 * - Grandparent should be active (propagated from parent)
1095 	 */
1096 	TH_LOG("Testing parent active when child is active");
1097 	int p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
1098 	ASSERT_GE(p_fd, 0);
1099 
1100 	TH_LOG("Testing grandparent active when child is active");
1101 	int gp_fd = open_by_handle_at(FD_NSFS_ROOT, gp_handle, O_RDONLY);
1102 	ASSERT_GE(gp_fd, 0);
1103 
1104 	close(c_fd);
1105 	close(p_fd);
1106 	close(gp_fd);
1107 }
1108 
1109 TEST_HARNESS_MAIN
1110