xref: /linux/tools/testing/selftests/filesystems/empty_mntns/empty_mntns_test.c (revision 7c8a4671dc3247a26a702e5f5996e9f453d7070d)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Tests for empty mount namespace creation via UNSHARE_EMPTY_MNTNS
4  *
5  * Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
6  */
7 
8 #define _GNU_SOURCE
9 #include <fcntl.h>
10 #include <linux/mount.h>
11 #include <linux/stat.h>
12 #include <sched.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <sys/mount.h>
16 #include <sys/stat.h>
17 #include <sys/types.h>
18 #include <sys/wait.h>
19 #include <unistd.h>
20 
21 #include "../utils.h"
22 #include "../wrappers.h"
23 #include "empty_mntns.h"
24 #include "kselftest_harness.h"
25 
26 static bool unshare_empty_mntns_supported(void)
27 {
28 	pid_t pid;
29 	int status;
30 
31 	pid = fork();
32 	if (pid < 0)
33 		return false;
34 
35 	if (pid == 0) {
36 		if (enter_userns())
37 			_exit(1);
38 
39 		if (unshare(UNSHARE_EMPTY_MNTNS) && errno == EINVAL)
40 			_exit(1);
41 		_exit(0);
42 	}
43 
44 	if (waitpid(pid, &status, 0) != pid)
45 		return false;
46 
47 	if (!WIFEXITED(status))
48 		return false;
49 
50 	return WEXITSTATUS(status) == 0;
51 }
52 
53 
54 FIXTURE(empty_mntns) {};
55 
56 FIXTURE_SETUP(empty_mntns)
57 {
58 	if (!unshare_empty_mntns_supported())
59 		SKIP(return, "UNSHARE_EMPTY_MNTNS not supported");
60 }
61 
62 FIXTURE_TEARDOWN(empty_mntns) {}
63 
64 /* Verify unshare succeeds, produces exactly 1 mount, and root == cwd */
65 TEST_F(empty_mntns, basic)
66 {
67 	pid_t pid;
68 
69 	pid = fork();
70 	ASSERT_GE(pid, 0);
71 
72 	if (pid == 0) {
73 		uint64_t root_id, cwd_id;
74 
75 		if (enter_userns())
76 			_exit(1);
77 
78 		if (unshare(UNSHARE_EMPTY_MNTNS))
79 			_exit(2);
80 
81 		if (count_mounts() != 1)
82 			_exit(3);
83 
84 		root_id = get_unique_mnt_id("/");
85 		cwd_id = get_unique_mnt_id(".");
86 		if (root_id == 0 || cwd_id == 0)
87 			_exit(4);
88 
89 		if (root_id != cwd_id)
90 			_exit(5);
91 
92 		_exit(0);
93 	}
94 
95 	ASSERT_EQ(wait_for_pid(pid), 0);
96 }
97 
98 /*
99  * UNSHARE_EMPTY_MNTNS combined with CLONE_NEWUSER.
100  *
101  * The user namespace must be created first so /proc is still accessible
102  * for writing uid_map/gid_map.  The empty mount namespace is created
103  * afterwards.
104  */
105 TEST_F(empty_mntns, with_clone_newuser)
106 {
107 	pid_t pid;
108 
109 	pid = fork();
110 	ASSERT_GE(pid, 0);
111 
112 	if (pid == 0) {
113 		uid_t uid = getuid();
114 		gid_t gid = getgid();
115 		char map[100];
116 
117 		if (unshare(CLONE_NEWUSER))
118 			_exit(1);
119 
120 		snprintf(map, sizeof(map), "0 %d 1", uid);
121 		if (write_file("/proc/self/uid_map", map))
122 			_exit(2);
123 
124 		if (write_file("/proc/self/setgroups", "deny"))
125 			_exit(3);
126 
127 		snprintf(map, sizeof(map), "0 %d 1", gid);
128 		if (write_file("/proc/self/gid_map", map))
129 			_exit(4);
130 
131 		if (unshare(UNSHARE_EMPTY_MNTNS))
132 			_exit(5);
133 
134 		if (count_mounts() != 1)
135 			_exit(6);
136 
137 		_exit(0);
138 	}
139 
140 	ASSERT_EQ(wait_for_pid(pid), 0);
141 }
142 
143 /* UNSHARE_EMPTY_MNTNS combined with other namespace flags */
144 TEST_F(empty_mntns, with_other_ns_flags)
145 {
146 	pid_t pid;
147 
148 	pid = fork();
149 	ASSERT_GE(pid, 0);
150 
151 	if (pid == 0) {
152 		if (enter_userns())
153 			_exit(1);
154 
155 		if (unshare(UNSHARE_EMPTY_MNTNS | CLONE_NEWUTS | CLONE_NEWIPC))
156 			_exit(2);
157 
158 		if (count_mounts() != 1)
159 			_exit(3);
160 
161 		_exit(0);
162 	}
163 
164 	ASSERT_EQ(wait_for_pid(pid), 0);
165 }
166 
167 /* EPERM without proper capabilities */
168 TEST_F(empty_mntns, eperm_without_caps)
169 {
170 	pid_t pid;
171 
172 	pid = fork();
173 	ASSERT_GE(pid, 0);
174 
175 	if (pid == 0) {
176 		/* Skip if already root */
177 		if (getuid() == 0)
178 			_exit(0);
179 
180 		if (unshare(UNSHARE_EMPTY_MNTNS) == 0)
181 			_exit(1);
182 
183 		if (errno != EPERM)
184 			_exit(2);
185 
186 		_exit(0);
187 	}
188 
189 	ASSERT_EQ(wait_for_pid(pid), 0);
190 }
191 
192 /* Many source mounts still result in exactly 1 mount */
193 TEST_F(empty_mntns, many_source_mounts)
194 {
195 	pid_t pid;
196 
197 	pid = fork();
198 	ASSERT_GE(pid, 0);
199 
200 	if (pid == 0) {
201 		char tmpdir[] = "/tmp/empty_mntns_test.XXXXXX";
202 		int i;
203 
204 		if (enter_userns())
205 			_exit(1);
206 
207 		if (unshare(CLONE_NEWNS))
208 			_exit(2);
209 
210 		if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL))
211 			_exit(3);
212 
213 		if (!mkdtemp(tmpdir))
214 			_exit(4);
215 
216 		if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M"))
217 			_exit(5);
218 
219 		for (i = 0; i < 5; i++) {
220 			char subdir[256];
221 
222 			snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i);
223 			if (mkdir(subdir, 0755) && errno != EEXIST)
224 				_exit(6);
225 			if (mount(subdir, subdir, NULL, MS_BIND, NULL))
226 				_exit(7);
227 		}
228 
229 		if (count_mounts() < 5)
230 			_exit(8);
231 
232 		if (unshare(UNSHARE_EMPTY_MNTNS))
233 			_exit(9);
234 
235 		if (count_mounts() != 1)
236 			_exit(10);
237 
238 		_exit(0);
239 	}
240 
241 	ASSERT_EQ(wait_for_pid(pid), 0);
242 }
243 
244 /* CWD on a different mount gets reset to root */
245 TEST_F(empty_mntns, cwd_reset)
246 {
247 	pid_t pid;
248 
249 	pid = fork();
250 	ASSERT_GE(pid, 0);
251 
252 	if (pid == 0) {
253 		char tmpdir[] = "/tmp/empty_mntns_cwd.XXXXXX";
254 		uint64_t root_id, cwd_id;
255 		struct statmount *sm;
256 
257 		if (enter_userns())
258 			_exit(1);
259 
260 		if (unshare(CLONE_NEWNS))
261 			_exit(2);
262 
263 		if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL))
264 			_exit(3);
265 
266 		if (!mkdtemp(tmpdir))
267 			_exit(4);
268 
269 		if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M"))
270 			_exit(5);
271 
272 		if (chdir(tmpdir))
273 			_exit(6);
274 
275 		if (unshare(UNSHARE_EMPTY_MNTNS))
276 			_exit(7);
277 
278 		root_id = get_unique_mnt_id("/");
279 		cwd_id = get_unique_mnt_id(".");
280 		if (root_id == 0 || cwd_id == 0)
281 			_exit(8);
282 
283 		if (root_id != cwd_id)
284 			_exit(9);
285 
286 		sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, 0);
287 		if (!sm)
288 			_exit(10);
289 
290 		if (strcmp(sm->str + sm->mnt_point, "/") != 0)
291 			_exit(11);
292 
293 		free(sm);
294 		_exit(0);
295 	}
296 
297 	ASSERT_EQ(wait_for_pid(pid), 0);
298 }
299 
300 /* Verify statmount properties of the root mount */
301 TEST_F(empty_mntns, mount_properties)
302 {
303 	pid_t pid;
304 
305 	pid = fork();
306 	ASSERT_GE(pid, 0);
307 
308 	if (pid == 0) {
309 		struct statmount *sm;
310 		uint64_t root_id;
311 
312 		if (enter_userns())
313 			_exit(1);
314 
315 		if (unshare(UNSHARE_EMPTY_MNTNS))
316 			_exit(2);
317 
318 		root_id = get_unique_mnt_id("/");
319 		if (!root_id)
320 			_exit(3);
321 
322 		sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_ROOT |
323 				     STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE, 0);
324 		if (!sm)
325 			_exit(4);
326 
327 		if (!(sm->mask & STATMOUNT_MNT_POINT))
328 			_exit(5);
329 
330 		if (strcmp(sm->str + sm->mnt_point, "/") != 0)
331 			_exit(6);
332 
333 		if (!(sm->mask & STATMOUNT_MNT_BASIC))
334 			_exit(7);
335 
336 		if (sm->mnt_id != root_id)
337 			_exit(8);
338 
339 		free(sm);
340 		_exit(0);
341 	}
342 
343 	ASSERT_EQ(wait_for_pid(pid), 0);
344 }
345 
346 /* Consecutive UNSHARE_EMPTY_MNTNS calls produce new namespaces */
347 TEST_F(empty_mntns, repeated_unshare)
348 {
349 	pid_t pid;
350 
351 	pid = fork();
352 	ASSERT_GE(pid, 0);
353 
354 	if (pid == 0) {
355 		uint64_t first_root_id, second_root_id;
356 
357 		if (enter_userns())
358 			_exit(1);
359 
360 		if (unshare(UNSHARE_EMPTY_MNTNS))
361 			_exit(2);
362 
363 		if (count_mounts() != 1)
364 			_exit(3);
365 
366 		first_root_id = get_unique_mnt_id("/");
367 
368 		if (unshare(UNSHARE_EMPTY_MNTNS))
369 			_exit(4);
370 
371 		if (count_mounts() != 1)
372 			_exit(5);
373 
374 		second_root_id = get_unique_mnt_id("/");
375 
376 		if (first_root_id == second_root_id)
377 			_exit(6);
378 
379 		_exit(0);
380 	}
381 
382 	ASSERT_EQ(wait_for_pid(pid), 0);
383 }
384 
385 /* Root mount's parent is itself */
386 TEST_F(empty_mntns, root_is_own_parent)
387 {
388 	pid_t pid;
389 
390 	pid = fork();
391 	ASSERT_GE(pid, 0);
392 
393 	if (pid == 0) {
394 		struct statmount sm;
395 		uint64_t root_id;
396 
397 		if (enter_userns())
398 			_exit(1);
399 
400 		if (unshare(UNSHARE_EMPTY_MNTNS))
401 			_exit(2);
402 
403 		root_id = get_unique_mnt_id("/");
404 		if (!root_id)
405 			_exit(3);
406 
407 		if (statmount(root_id, 0, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0) < 0)
408 			_exit(4);
409 
410 		if (!(sm.mask & STATMOUNT_MNT_BASIC))
411 			_exit(5);
412 
413 		if (sm.mnt_parent_id != sm.mnt_id)
414 			_exit(6);
415 
416 		_exit(0);
417 	}
418 
419 	ASSERT_EQ(wait_for_pid(pid), 0);
420 }
421 
422 /* Listmount returns only the root mount */
423 TEST_F(empty_mntns, listmount_single_entry)
424 {
425 	pid_t pid;
426 
427 	pid = fork();
428 	ASSERT_GE(pid, 0);
429 
430 	if (pid == 0) {
431 		uint64_t list[16];
432 		ssize_t nr_mounts;
433 		uint64_t root_id;
434 
435 		if (enter_userns())
436 			_exit(1);
437 
438 		if (unshare(UNSHARE_EMPTY_MNTNS))
439 			_exit(2);
440 
441 		nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 16, 0);
442 		if (nr_mounts != 1)
443 			_exit(3);
444 
445 		root_id = get_unique_mnt_id("/");
446 		if (!root_id)
447 			_exit(4);
448 
449 		if (list[0] != root_id)
450 			_exit(5);
451 
452 		_exit(0);
453 	}
454 
455 	ASSERT_EQ(wait_for_pid(pid), 0);
456 }
457 
458 /*
459  * Mount tmpfs over nullfs root to build a writable filesystem from scratch.
460  * This exercises the intended usage pattern: create an empty mount namespace
461  * (which has a nullfs root), then mount a real filesystem over it.
462  *
463  * Because resolving "/" returns the process root directly (via nd_jump_root)
464  * without following overmounts, we use the new mount API (fsopen/fsmount)
465  * to obtain a mount fd, then fchdir + chroot to enter the new filesystem.
466  */
467 TEST_F(empty_mntns, overmount_tmpfs)
468 {
469 	pid_t pid;
470 
471 	pid = fork();
472 	ASSERT_GE(pid, 0);
473 
474 	if (pid == 0) {
475 		struct statmount *sm;
476 		uint64_t root_id, cwd_id;
477 		int fd, fsfd, mntfd;
478 
479 		if (enter_userns())
480 			_exit(1);
481 
482 		if (unshare(UNSHARE_EMPTY_MNTNS))
483 			_exit(2);
484 
485 		if (count_mounts() != 1)
486 			_exit(3);
487 
488 		root_id = get_unique_mnt_id("/");
489 		if (!root_id)
490 			_exit(4);
491 
492 		/* Verify root is nullfs */
493 		sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0);
494 		if (!sm)
495 			_exit(5);
496 
497 		if (!(sm->mask & STATMOUNT_FS_TYPE))
498 			_exit(6);
499 
500 		if (strcmp(sm->str + sm->fs_type, "nullfs") != 0)
501 			_exit(7);
502 
503 		free(sm);
504 
505 		cwd_id = get_unique_mnt_id(".");
506 		if (!cwd_id || root_id != cwd_id)
507 			_exit(8);
508 
509 		/*
510 		 * nullfs root is immutable.  open(O_CREAT) returns ENOENT
511 		 * because empty_dir_lookup() returns -ENOENT before the
512 		 * IS_IMMUTABLE permission check in may_o_create() is reached.
513 		 */
514 		fd = open("/test", O_CREAT | O_RDWR, 0644);
515 		if (fd >= 0) {
516 			close(fd);
517 			_exit(9);
518 		}
519 		if (errno != ENOENT)
520 			_exit(10);
521 
522 		/*
523 		 * Use the new mount API to create tmpfs and get a mount fd.
524 		 * We need the fd because after attaching the tmpfs on top of
525 		 * "/", path resolution of "/" still returns the process root
526 		 * (nullfs) without following the overmount.  The mount fd
527 		 * lets us fchdir + chroot into the tmpfs.
528 		 */
529 		fsfd = sys_fsopen("tmpfs", 0);
530 		if (fsfd < 0)
531 			_exit(11);
532 
533 		if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "1M", 0)) {
534 			close(fsfd);
535 			_exit(12);
536 		}
537 
538 		if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) {
539 			close(fsfd);
540 			_exit(13);
541 		}
542 
543 		mntfd = sys_fsmount(fsfd, 0, 0);
544 		close(fsfd);
545 		if (mntfd < 0)
546 			_exit(14);
547 
548 		if (sys_move_mount(mntfd, "", AT_FDCWD, "/",
549 				   MOVE_MOUNT_F_EMPTY_PATH)) {
550 			close(mntfd);
551 			_exit(15);
552 		}
553 
554 		if (count_mounts() != 2) {
555 			close(mntfd);
556 			_exit(16);
557 		}
558 
559 		/* Enter the tmpfs via the mount fd */
560 		if (fchdir(mntfd)) {
561 			close(mntfd);
562 			_exit(17);
563 		}
564 
565 		if (chroot(".")) {
566 			close(mntfd);
567 			_exit(18);
568 		}
569 
570 		close(mntfd);
571 
572 		/* Verify "/" now resolves to tmpfs */
573 		root_id = get_unique_mnt_id("/");
574 		if (!root_id)
575 			_exit(19);
576 
577 		sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0);
578 		if (!sm)
579 			_exit(20);
580 
581 		if (!(sm->mask & STATMOUNT_FS_TYPE))
582 			_exit(21);
583 
584 		if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0)
585 			_exit(22);
586 
587 		free(sm);
588 
589 		/* Verify tmpfs is writable */
590 		fd = open("/testfile", O_CREAT | O_RDWR, 0644);
591 		if (fd < 0)
592 			_exit(23);
593 
594 		if (write(fd, "test", 4) != 4) {
595 			close(fd);
596 			_exit(24);
597 		}
598 
599 		close(fd);
600 
601 		if (access("/testfile", F_OK))
602 			_exit(25);
603 
604 		_exit(0);
605 	}
606 
607 	ASSERT_EQ(wait_for_pid(pid), 0);
608 }
609 
610 /*
611  * Tests below do not require UNSHARE_EMPTY_MNTNS support.
612  */
613 
614 /* Invalid unshare flags return EINVAL */
615 TEST(invalid_flags)
616 {
617 	pid_t pid;
618 
619 	pid = fork();
620 	ASSERT_GE(pid, 0);
621 
622 	if (pid == 0) {
623 		if (enter_userns())
624 			_exit(1);
625 
626 		if (unshare(0x80000000) == 0)
627 			_exit(2);
628 
629 		if (errno != EINVAL)
630 			_exit(3);
631 
632 		_exit(0);
633 	}
634 
635 	ASSERT_EQ(wait_for_pid(pid), 0);
636 }
637 
638 /* Regular CLONE_NEWNS still copies the full mount tree */
639 TEST(clone_newns_full_copy)
640 {
641 	pid_t pid;
642 
643 	pid = fork();
644 	ASSERT_GE(pid, 0);
645 
646 	if (pid == 0) {
647 		ssize_t nr_mounts_before, nr_mounts_after;
648 		char tmpdir[] = "/tmp/empty_mntns_regr.XXXXXX";
649 		int i;
650 
651 		if (enter_userns())
652 			_exit(1);
653 
654 		if (unshare(CLONE_NEWNS))
655 			_exit(2);
656 
657 		if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL))
658 			_exit(3);
659 
660 		if (!mkdtemp(tmpdir))
661 			_exit(4);
662 
663 		if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M"))
664 			_exit(5);
665 
666 		for (i = 0; i < 3; i++) {
667 			char subdir[256];
668 
669 			snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i);
670 			if (mkdir(subdir, 0755) && errno != EEXIST)
671 				_exit(6);
672 			if (mount(subdir, subdir, NULL, MS_BIND, NULL))
673 				_exit(7);
674 		}
675 
676 		nr_mounts_before = count_mounts();
677 		if (nr_mounts_before < 3)
678 			_exit(8);
679 
680 		if (unshare(CLONE_NEWNS))
681 			_exit(9);
682 
683 		nr_mounts_after = count_mounts();
684 		if (nr_mounts_after < nr_mounts_before)
685 			_exit(10);
686 
687 		_exit(0);
688 	}
689 
690 	ASSERT_EQ(wait_for_pid(pid), 0);
691 }
692 
693 /* Other namespace unshares are unaffected */
694 TEST(other_ns_unaffected)
695 {
696 	pid_t pid;
697 
698 	pid = fork();
699 	ASSERT_GE(pid, 0);
700 
701 	if (pid == 0) {
702 		char hostname[256];
703 
704 		if (enter_userns())
705 			_exit(1);
706 
707 		if (unshare(CLONE_NEWUTS))
708 			_exit(2);
709 
710 		if (sethostname("test-empty-mntns", 16))
711 			_exit(3);
712 
713 		if (gethostname(hostname, sizeof(hostname)))
714 			_exit(4);
715 
716 		if (strcmp(hostname, "test-empty-mntns") != 0)
717 			_exit(5);
718 
719 		_exit(0);
720 	}
721 
722 	ASSERT_EQ(wait_for_pid(pid), 0);
723 }
724 
725 TEST_HARNESS_MAIN
726