1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Tests for empty mount namespace creation via UNSHARE_EMPTY_MNTNS 4 * 5 * Copyright (c) 2024 Christian Brauner <brauner@kernel.org> 6 */ 7 8 #define _GNU_SOURCE 9 #include <fcntl.h> 10 #include <linux/mount.h> 11 #include <linux/stat.h> 12 #include <sched.h> 13 #include <stdio.h> 14 #include <string.h> 15 #include <sys/mount.h> 16 #include <sys/stat.h> 17 #include <sys/types.h> 18 #include <sys/wait.h> 19 #include <unistd.h> 20 21 #include "../utils.h" 22 #include "../wrappers.h" 23 #include "empty_mntns.h" 24 #include "kselftest_harness.h" 25 26 static bool unshare_empty_mntns_supported(void) 27 { 28 pid_t pid; 29 int status; 30 31 pid = fork(); 32 if (pid < 0) 33 return false; 34 35 if (pid == 0) { 36 if (enter_userns()) 37 _exit(1); 38 39 if (unshare(UNSHARE_EMPTY_MNTNS) && errno == EINVAL) 40 _exit(1); 41 _exit(0); 42 } 43 44 if (waitpid(pid, &status, 0) != pid) 45 return false; 46 47 if (!WIFEXITED(status)) 48 return false; 49 50 return WEXITSTATUS(status) == 0; 51 } 52 53 54 FIXTURE(empty_mntns) {}; 55 56 FIXTURE_SETUP(empty_mntns) 57 { 58 if (!unshare_empty_mntns_supported()) 59 SKIP(return, "UNSHARE_EMPTY_MNTNS not supported"); 60 } 61 62 FIXTURE_TEARDOWN(empty_mntns) {} 63 64 /* Verify unshare succeeds, produces exactly 1 mount, and root == cwd */ 65 TEST_F(empty_mntns, basic) 66 { 67 pid_t pid; 68 69 pid = fork(); 70 ASSERT_GE(pid, 0); 71 72 if (pid == 0) { 73 uint64_t root_id, cwd_id; 74 75 if (enter_userns()) 76 _exit(1); 77 78 if (unshare(UNSHARE_EMPTY_MNTNS)) 79 _exit(2); 80 81 if (count_mounts() != 1) 82 _exit(3); 83 84 root_id = get_unique_mnt_id("/"); 85 cwd_id = get_unique_mnt_id("."); 86 if (root_id == 0 || cwd_id == 0) 87 _exit(4); 88 89 if (root_id != cwd_id) 90 _exit(5); 91 92 _exit(0); 93 } 94 95 ASSERT_EQ(wait_for_pid(pid), 0); 96 } 97 98 /* 99 * UNSHARE_EMPTY_MNTNS combined with CLONE_NEWUSER. 100 * 101 * The user namespace must be created first so /proc is still accessible 102 * for writing uid_map/gid_map. The empty mount namespace is created 103 * afterwards. 104 */ 105 TEST_F(empty_mntns, with_clone_newuser) 106 { 107 pid_t pid; 108 109 pid = fork(); 110 ASSERT_GE(pid, 0); 111 112 if (pid == 0) { 113 uid_t uid = getuid(); 114 gid_t gid = getgid(); 115 char map[100]; 116 117 if (unshare(CLONE_NEWUSER)) 118 _exit(1); 119 120 snprintf(map, sizeof(map), "0 %d 1", uid); 121 if (write_file("/proc/self/uid_map", map)) 122 _exit(2); 123 124 if (write_file("/proc/self/setgroups", "deny")) 125 _exit(3); 126 127 snprintf(map, sizeof(map), "0 %d 1", gid); 128 if (write_file("/proc/self/gid_map", map)) 129 _exit(4); 130 131 if (unshare(UNSHARE_EMPTY_MNTNS)) 132 _exit(5); 133 134 if (count_mounts() != 1) 135 _exit(6); 136 137 _exit(0); 138 } 139 140 ASSERT_EQ(wait_for_pid(pid), 0); 141 } 142 143 /* UNSHARE_EMPTY_MNTNS combined with other namespace flags */ 144 TEST_F(empty_mntns, with_other_ns_flags) 145 { 146 pid_t pid; 147 148 pid = fork(); 149 ASSERT_GE(pid, 0); 150 151 if (pid == 0) { 152 if (enter_userns()) 153 _exit(1); 154 155 if (unshare(UNSHARE_EMPTY_MNTNS | CLONE_NEWUTS | CLONE_NEWIPC)) 156 _exit(2); 157 158 if (count_mounts() != 1) 159 _exit(3); 160 161 _exit(0); 162 } 163 164 ASSERT_EQ(wait_for_pid(pid), 0); 165 } 166 167 /* EPERM without proper capabilities */ 168 TEST_F(empty_mntns, eperm_without_caps) 169 { 170 pid_t pid; 171 172 pid = fork(); 173 ASSERT_GE(pid, 0); 174 175 if (pid == 0) { 176 /* Skip if already root */ 177 if (getuid() == 0) 178 _exit(0); 179 180 if (unshare(UNSHARE_EMPTY_MNTNS) == 0) 181 _exit(1); 182 183 if (errno != EPERM) 184 _exit(2); 185 186 _exit(0); 187 } 188 189 ASSERT_EQ(wait_for_pid(pid), 0); 190 } 191 192 /* Many source mounts still result in exactly 1 mount */ 193 TEST_F(empty_mntns, many_source_mounts) 194 { 195 pid_t pid; 196 197 pid = fork(); 198 ASSERT_GE(pid, 0); 199 200 if (pid == 0) { 201 char tmpdir[] = "/tmp/empty_mntns_test.XXXXXX"; 202 int i; 203 204 if (enter_userns()) 205 _exit(1); 206 207 if (unshare(CLONE_NEWNS)) 208 _exit(2); 209 210 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 211 _exit(3); 212 213 if (!mkdtemp(tmpdir)) 214 _exit(4); 215 216 if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) 217 _exit(5); 218 219 for (i = 0; i < 5; i++) { 220 char subdir[256]; 221 222 snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); 223 if (mkdir(subdir, 0755) && errno != EEXIST) 224 _exit(6); 225 if (mount(subdir, subdir, NULL, MS_BIND, NULL)) 226 _exit(7); 227 } 228 229 if (count_mounts() < 5) 230 _exit(8); 231 232 if (unshare(UNSHARE_EMPTY_MNTNS)) 233 _exit(9); 234 235 if (count_mounts() != 1) 236 _exit(10); 237 238 _exit(0); 239 } 240 241 ASSERT_EQ(wait_for_pid(pid), 0); 242 } 243 244 /* CWD on a different mount gets reset to root */ 245 TEST_F(empty_mntns, cwd_reset) 246 { 247 pid_t pid; 248 249 pid = fork(); 250 ASSERT_GE(pid, 0); 251 252 if (pid == 0) { 253 char tmpdir[] = "/tmp/empty_mntns_cwd.XXXXXX"; 254 uint64_t root_id, cwd_id; 255 struct statmount *sm; 256 257 if (enter_userns()) 258 _exit(1); 259 260 if (unshare(CLONE_NEWNS)) 261 _exit(2); 262 263 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 264 _exit(3); 265 266 if (!mkdtemp(tmpdir)) 267 _exit(4); 268 269 if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) 270 _exit(5); 271 272 if (chdir(tmpdir)) 273 _exit(6); 274 275 if (unshare(UNSHARE_EMPTY_MNTNS)) 276 _exit(7); 277 278 root_id = get_unique_mnt_id("/"); 279 cwd_id = get_unique_mnt_id("."); 280 if (root_id == 0 || cwd_id == 0) 281 _exit(8); 282 283 if (root_id != cwd_id) 284 _exit(9); 285 286 sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT, 0); 287 if (!sm) 288 _exit(10); 289 290 if (strcmp(sm->str + sm->mnt_point, "/") != 0) 291 _exit(11); 292 293 free(sm); 294 _exit(0); 295 } 296 297 ASSERT_EQ(wait_for_pid(pid), 0); 298 } 299 300 /* Verify statmount properties of the root mount */ 301 TEST_F(empty_mntns, mount_properties) 302 { 303 pid_t pid; 304 305 pid = fork(); 306 ASSERT_GE(pid, 0); 307 308 if (pid == 0) { 309 struct statmount *sm; 310 uint64_t root_id; 311 312 if (enter_userns()) 313 _exit(1); 314 315 if (unshare(UNSHARE_EMPTY_MNTNS)) 316 _exit(2); 317 318 root_id = get_unique_mnt_id("/"); 319 if (!root_id) 320 _exit(3); 321 322 sm = statmount_alloc(root_id, 0, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_ROOT | 323 STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE, 0); 324 if (!sm) 325 _exit(4); 326 327 if (!(sm->mask & STATMOUNT_MNT_POINT)) 328 _exit(5); 329 330 if (strcmp(sm->str + sm->mnt_point, "/") != 0) 331 _exit(6); 332 333 if (!(sm->mask & STATMOUNT_MNT_BASIC)) 334 _exit(7); 335 336 if (sm->mnt_id != root_id) 337 _exit(8); 338 339 free(sm); 340 _exit(0); 341 } 342 343 ASSERT_EQ(wait_for_pid(pid), 0); 344 } 345 346 /* Consecutive UNSHARE_EMPTY_MNTNS calls produce new namespaces */ 347 TEST_F(empty_mntns, repeated_unshare) 348 { 349 pid_t pid; 350 351 pid = fork(); 352 ASSERT_GE(pid, 0); 353 354 if (pid == 0) { 355 uint64_t first_root_id, second_root_id; 356 357 if (enter_userns()) 358 _exit(1); 359 360 if (unshare(UNSHARE_EMPTY_MNTNS)) 361 _exit(2); 362 363 if (count_mounts() != 1) 364 _exit(3); 365 366 first_root_id = get_unique_mnt_id("/"); 367 368 if (unshare(UNSHARE_EMPTY_MNTNS)) 369 _exit(4); 370 371 if (count_mounts() != 1) 372 _exit(5); 373 374 second_root_id = get_unique_mnt_id("/"); 375 376 if (first_root_id == second_root_id) 377 _exit(6); 378 379 _exit(0); 380 } 381 382 ASSERT_EQ(wait_for_pid(pid), 0); 383 } 384 385 /* Root mount's parent is itself */ 386 TEST_F(empty_mntns, root_is_own_parent) 387 { 388 pid_t pid; 389 390 pid = fork(); 391 ASSERT_GE(pid, 0); 392 393 if (pid == 0) { 394 struct statmount sm; 395 uint64_t root_id; 396 397 if (enter_userns()) 398 _exit(1); 399 400 if (unshare(UNSHARE_EMPTY_MNTNS)) 401 _exit(2); 402 403 root_id = get_unique_mnt_id("/"); 404 if (!root_id) 405 _exit(3); 406 407 if (statmount(root_id, 0, 0, STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0) < 0) 408 _exit(4); 409 410 if (!(sm.mask & STATMOUNT_MNT_BASIC)) 411 _exit(5); 412 413 if (sm.mnt_parent_id != sm.mnt_id) 414 _exit(6); 415 416 _exit(0); 417 } 418 419 ASSERT_EQ(wait_for_pid(pid), 0); 420 } 421 422 /* Listmount returns only the root mount */ 423 TEST_F(empty_mntns, listmount_single_entry) 424 { 425 pid_t pid; 426 427 pid = fork(); 428 ASSERT_GE(pid, 0); 429 430 if (pid == 0) { 431 uint64_t list[16]; 432 ssize_t nr_mounts; 433 uint64_t root_id; 434 435 if (enter_userns()) 436 _exit(1); 437 438 if (unshare(UNSHARE_EMPTY_MNTNS)) 439 _exit(2); 440 441 nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 16, 0); 442 if (nr_mounts != 1) 443 _exit(3); 444 445 root_id = get_unique_mnt_id("/"); 446 if (!root_id) 447 _exit(4); 448 449 if (list[0] != root_id) 450 _exit(5); 451 452 _exit(0); 453 } 454 455 ASSERT_EQ(wait_for_pid(pid), 0); 456 } 457 458 /* 459 * Mount tmpfs over nullfs root to build a writable filesystem from scratch. 460 * This exercises the intended usage pattern: create an empty mount namespace 461 * (which has a nullfs root), then mount a real filesystem over it. 462 * 463 * Because resolving "/" returns the process root directly (via nd_jump_root) 464 * without following overmounts, we use the new mount API (fsopen/fsmount) 465 * to obtain a mount fd, then fchdir + chroot to enter the new filesystem. 466 */ 467 TEST_F(empty_mntns, overmount_tmpfs) 468 { 469 pid_t pid; 470 471 pid = fork(); 472 ASSERT_GE(pid, 0); 473 474 if (pid == 0) { 475 struct statmount *sm; 476 uint64_t root_id, cwd_id; 477 int fd, fsfd, mntfd; 478 479 if (enter_userns()) 480 _exit(1); 481 482 if (unshare(UNSHARE_EMPTY_MNTNS)) 483 _exit(2); 484 485 if (count_mounts() != 1) 486 _exit(3); 487 488 root_id = get_unique_mnt_id("/"); 489 if (!root_id) 490 _exit(4); 491 492 /* Verify root is nullfs */ 493 sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); 494 if (!sm) 495 _exit(5); 496 497 if (!(sm->mask & STATMOUNT_FS_TYPE)) 498 _exit(6); 499 500 if (strcmp(sm->str + sm->fs_type, "nullfs") != 0) 501 _exit(7); 502 503 free(sm); 504 505 cwd_id = get_unique_mnt_id("."); 506 if (!cwd_id || root_id != cwd_id) 507 _exit(8); 508 509 /* 510 * nullfs root is immutable. open(O_CREAT) returns ENOENT 511 * because empty_dir_lookup() returns -ENOENT before the 512 * IS_IMMUTABLE permission check in may_o_create() is reached. 513 */ 514 fd = open("/test", O_CREAT | O_RDWR, 0644); 515 if (fd >= 0) { 516 close(fd); 517 _exit(9); 518 } 519 if (errno != ENOENT) 520 _exit(10); 521 522 /* 523 * Use the new mount API to create tmpfs and get a mount fd. 524 * We need the fd because after attaching the tmpfs on top of 525 * "/", path resolution of "/" still returns the process root 526 * (nullfs) without following the overmount. The mount fd 527 * lets us fchdir + chroot into the tmpfs. 528 */ 529 fsfd = sys_fsopen("tmpfs", 0); 530 if (fsfd < 0) 531 _exit(11); 532 533 if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "size", "1M", 0)) { 534 close(fsfd); 535 _exit(12); 536 } 537 538 if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { 539 close(fsfd); 540 _exit(13); 541 } 542 543 mntfd = sys_fsmount(fsfd, 0, 0); 544 close(fsfd); 545 if (mntfd < 0) 546 _exit(14); 547 548 if (sys_move_mount(mntfd, "", AT_FDCWD, "/", 549 MOVE_MOUNT_F_EMPTY_PATH)) { 550 close(mntfd); 551 _exit(15); 552 } 553 554 if (count_mounts() != 2) { 555 close(mntfd); 556 _exit(16); 557 } 558 559 /* Enter the tmpfs via the mount fd */ 560 if (fchdir(mntfd)) { 561 close(mntfd); 562 _exit(17); 563 } 564 565 if (chroot(".")) { 566 close(mntfd); 567 _exit(18); 568 } 569 570 close(mntfd); 571 572 /* Verify "/" now resolves to tmpfs */ 573 root_id = get_unique_mnt_id("/"); 574 if (!root_id) 575 _exit(19); 576 577 sm = statmount_alloc(root_id, 0, STATMOUNT_FS_TYPE, 0); 578 if (!sm) 579 _exit(20); 580 581 if (!(sm->mask & STATMOUNT_FS_TYPE)) 582 _exit(21); 583 584 if (strcmp(sm->str + sm->fs_type, "tmpfs") != 0) 585 _exit(22); 586 587 free(sm); 588 589 /* Verify tmpfs is writable */ 590 fd = open("/testfile", O_CREAT | O_RDWR, 0644); 591 if (fd < 0) 592 _exit(23); 593 594 if (write(fd, "test", 4) != 4) { 595 close(fd); 596 _exit(24); 597 } 598 599 close(fd); 600 601 if (access("/testfile", F_OK)) 602 _exit(25); 603 604 _exit(0); 605 } 606 607 ASSERT_EQ(wait_for_pid(pid), 0); 608 } 609 610 /* 611 * Tests below do not require UNSHARE_EMPTY_MNTNS support. 612 */ 613 614 /* Invalid unshare flags return EINVAL */ 615 TEST(invalid_flags) 616 { 617 pid_t pid; 618 619 pid = fork(); 620 ASSERT_GE(pid, 0); 621 622 if (pid == 0) { 623 if (enter_userns()) 624 _exit(1); 625 626 if (unshare(0x80000000) == 0) 627 _exit(2); 628 629 if (errno != EINVAL) 630 _exit(3); 631 632 _exit(0); 633 } 634 635 ASSERT_EQ(wait_for_pid(pid), 0); 636 } 637 638 /* Regular CLONE_NEWNS still copies the full mount tree */ 639 TEST(clone_newns_full_copy) 640 { 641 pid_t pid; 642 643 pid = fork(); 644 ASSERT_GE(pid, 0); 645 646 if (pid == 0) { 647 ssize_t nr_mounts_before, nr_mounts_after; 648 char tmpdir[] = "/tmp/empty_mntns_regr.XXXXXX"; 649 int i; 650 651 if (enter_userns()) 652 _exit(1); 653 654 if (unshare(CLONE_NEWNS)) 655 _exit(2); 656 657 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) 658 _exit(3); 659 660 if (!mkdtemp(tmpdir)) 661 _exit(4); 662 663 if (mount("tmpfs", tmpdir, "tmpfs", 0, "size=1M")) 664 _exit(5); 665 666 for (i = 0; i < 3; i++) { 667 char subdir[256]; 668 669 snprintf(subdir, sizeof(subdir), "%s/sub%d", tmpdir, i); 670 if (mkdir(subdir, 0755) && errno != EEXIST) 671 _exit(6); 672 if (mount(subdir, subdir, NULL, MS_BIND, NULL)) 673 _exit(7); 674 } 675 676 nr_mounts_before = count_mounts(); 677 if (nr_mounts_before < 3) 678 _exit(8); 679 680 if (unshare(CLONE_NEWNS)) 681 _exit(9); 682 683 nr_mounts_after = count_mounts(); 684 if (nr_mounts_after < nr_mounts_before) 685 _exit(10); 686 687 _exit(0); 688 } 689 690 ASSERT_EQ(wait_for_pid(pid), 0); 691 } 692 693 /* Other namespace unshares are unaffected */ 694 TEST(other_ns_unaffected) 695 { 696 pid_t pid; 697 698 pid = fork(); 699 ASSERT_GE(pid, 0); 700 701 if (pid == 0) { 702 char hostname[256]; 703 704 if (enter_userns()) 705 _exit(1); 706 707 if (unshare(CLONE_NEWUTS)) 708 _exit(2); 709 710 if (sethostname("test-empty-mntns", 16)) 711 _exit(3); 712 713 if (gethostname(hostname, sizeof(hostname))) 714 _exit(4); 715 716 if (strcmp(hostname, "test-empty-mntns") != 0) 717 _exit(5); 718 719 _exit(0); 720 } 721 722 ASSERT_EQ(wait_for_pid(pid), 0); 723 } 724 725 TEST_HARNESS_MAIN 726