1 // SPDX-License-Identifier: GPL-2.0-or-later 2 // Copyright (c) 2026 Christian Brauner <brauner@kernel.org> 3 4 #define _GNU_SOURCE 5 6 #include <errno.h> 7 #include <fcntl.h> 8 #include <sched.h> 9 #include <stdio.h> 10 #include <string.h> 11 #include <sys/stat.h> 12 #include <sys/mount.h> 13 #include <unistd.h> 14 #include <sys/syscall.h> 15 16 #include "../wrappers.h" 17 #include "../utils.h" 18 #include "../statmount/statmount.h" 19 #include "../../kselftest_harness.h" 20 21 #include <linux/stat.h> 22 23 #ifndef MOVE_MOUNT_BENEATH 24 #define MOVE_MOUNT_BENEATH 0x00000200 25 #endif 26 27 static uint64_t get_unique_mnt_id_fd(int fd) 28 { 29 struct statx sx; 30 int ret; 31 32 ret = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &sx); 33 if (ret) 34 return 0; 35 36 if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) 37 return 0; 38 39 return sx.stx_mnt_id; 40 } 41 42 /* 43 * Create a locked overmount stack at /mnt_dir for testing MNT_LOCKED 44 * transfer on non-rootfs mounts. 45 * 46 * Mounts tmpfs A at /mnt_dir, overmounts with tmpfs B, then enters a 47 * new user+mount namespace where both become locked. Returns the exit 48 * code to use on failure, or 0 on success. 49 */ 50 static int setup_locked_overmount(void) 51 { 52 /* Isolate so mounts don't leak. */ 53 if (unshare(CLONE_NEWNS)) 54 return 1; 55 if (mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL)) 56 return 2; 57 58 /* 59 * Create mounts while still in the initial user namespace so 60 * they become locked after the subsequent user namespace 61 * unshare. 62 */ 63 rmdir("/mnt_dir"); 64 if (mkdir("/mnt_dir", 0755)) 65 return 3; 66 67 /* Mount tmpfs A */ 68 if (mount("tmpfs", "/mnt_dir", "tmpfs", 0, NULL)) 69 return 4; 70 71 /* Overmount with tmpfs B */ 72 if (mount("tmpfs", "/mnt_dir", "tmpfs", 0, NULL)) 73 return 5; 74 75 /* 76 * Create user+mount namespace. Mounts A and B become locked 77 * because they might be covering something that is not supposed 78 * to be revealed. 79 */ 80 if (setup_userns()) 81 return 6; 82 83 /* Sanity check: B must be locked */ 84 if (!umount2("/mnt_dir", MNT_DETACH) || errno != EINVAL) 85 return 7; 86 87 return 0; 88 } 89 90 /* 91 * Create a detached tmpfs mount and return its fd, or -1 on failure. 92 */ 93 static int create_detached_tmpfs(void) 94 { 95 int fs_fd, mnt_fd; 96 97 fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC); 98 if (fs_fd < 0) 99 return -1; 100 101 if (sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { 102 close(fs_fd); 103 return -1; 104 } 105 106 mnt_fd = sys_fsmount(fs_fd, FSMOUNT_CLOEXEC, 0); 107 close(fs_fd); 108 return mnt_fd; 109 } 110 111 FIXTURE(move_mount) { 112 uint64_t orig_root_id; 113 }; 114 115 FIXTURE_SETUP(move_mount) 116 { 117 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 118 119 ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0); 120 121 self->orig_root_id = get_unique_mnt_id("/"); 122 ASSERT_NE(self->orig_root_id, 0); 123 } 124 125 FIXTURE_TEARDOWN(move_mount) 126 { 127 } 128 129 /* 130 * Test successful MOVE_MOUNT_BENEATH on the rootfs. 131 * Mount a clone beneath /, fchdir to the clone, chroot to switch root, 132 * then detach the old root. 133 */ 134 TEST_F(move_mount, beneath_rootfs_success) 135 { 136 int fd_tree, ret; 137 uint64_t clone_id, root_id; 138 139 fd_tree = sys_open_tree(AT_FDCWD, "/", 140 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 141 ASSERT_GE(fd_tree, 0); 142 143 clone_id = get_unique_mnt_id_fd(fd_tree); 144 ASSERT_NE(clone_id, 0); 145 ASSERT_NE(clone_id, self->orig_root_id); 146 147 ASSERT_EQ(fchdir(fd_tree), 0); 148 149 ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", 150 MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); 151 ASSERT_EQ(ret, 0); 152 153 close(fd_tree); 154 155 /* Switch root to the clone */ 156 ASSERT_EQ(chroot("."), 0); 157 158 /* Verify "/" is now the clone */ 159 root_id = get_unique_mnt_id("/"); 160 ASSERT_NE(root_id, 0); 161 ASSERT_EQ(root_id, clone_id); 162 163 /* Detach old root */ 164 ASSERT_EQ(umount2(".", MNT_DETACH), 0); 165 } 166 167 /* 168 * Test that after MOVE_MOUNT_BENEATH on the rootfs the old root is 169 * stacked on top of the clone. Verify via statmount that the old 170 * root's parent is the clone. 171 */ 172 TEST_F(move_mount, beneath_rootfs_old_root_stacked) 173 { 174 int fd_tree, ret; 175 uint64_t clone_id; 176 struct statmount sm; 177 178 fd_tree = sys_open_tree(AT_FDCWD, "/", 179 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 180 ASSERT_GE(fd_tree, 0); 181 182 clone_id = get_unique_mnt_id_fd(fd_tree); 183 ASSERT_NE(clone_id, 0); 184 ASSERT_NE(clone_id, self->orig_root_id); 185 186 ASSERT_EQ(fchdir(fd_tree), 0); 187 188 ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", 189 MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); 190 ASSERT_EQ(ret, 0); 191 192 close(fd_tree); 193 194 ASSERT_EQ(chroot("."), 0); 195 196 /* Old root's parent should now be the clone */ 197 ASSERT_EQ(statmount(self->orig_root_id, 0, 0, 198 STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0), 0); 199 ASSERT_EQ(sm.mnt_parent_id, clone_id); 200 201 ASSERT_EQ(umount2(".", MNT_DETACH), 0); 202 } 203 204 /* 205 * Test that MOVE_MOUNT_BENEATH on rootfs fails when chroot'd into a 206 * subdirectory of the same mount. The caller's fs->root.dentry doesn't 207 * match mnt->mnt_root so the kernel rejects it. 208 */ 209 TEST_F(move_mount, beneath_rootfs_in_chroot_fail) 210 { 211 int fd_tree, ret; 212 uint64_t chroot_id, clone_id; 213 214 rmdir("/chroot_dir"); 215 ASSERT_EQ(mkdir("/chroot_dir", 0755), 0); 216 217 chroot_id = get_unique_mnt_id("/chroot_dir"); 218 ASSERT_NE(chroot_id, 0); 219 ASSERT_EQ(self->orig_root_id, chroot_id); 220 221 ASSERT_EQ(chdir("/chroot_dir"), 0); 222 ASSERT_EQ(chroot("."), 0); 223 224 fd_tree = sys_open_tree(AT_FDCWD, "/", 225 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 226 ASSERT_GE(fd_tree, 0); 227 228 clone_id = get_unique_mnt_id_fd(fd_tree); 229 ASSERT_NE(clone_id, 0); 230 ASSERT_NE(clone_id, chroot_id); 231 232 ASSERT_EQ(fchdir(fd_tree), 0); 233 234 /* 235 * Should fail: fs->root.dentry (/chroot_dir) doesn't match 236 * the mount's mnt_root (/). 237 */ 238 ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", 239 MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); 240 ASSERT_EQ(ret, -1); 241 ASSERT_EQ(errno, EINVAL); 242 243 close(fd_tree); 244 } 245 246 /* 247 * Test that MOVE_MOUNT_BENEATH on rootfs succeeds when chroot'd into a 248 * separate tmpfs mount. The caller's root dentry matches the mount's 249 * mnt_root since it's a dedicated mount. 250 */ 251 TEST_F(move_mount, beneath_rootfs_in_chroot_success) 252 { 253 int fd_tree, ret; 254 uint64_t chroot_id, clone_id, root_id; 255 struct statmount sm; 256 257 rmdir("/chroot_dir"); 258 ASSERT_EQ(mkdir("/chroot_dir", 0755), 0); 259 ASSERT_EQ(mount("tmpfs", "/chroot_dir", "tmpfs", 0, NULL), 0); 260 261 chroot_id = get_unique_mnt_id("/chroot_dir"); 262 ASSERT_NE(chroot_id, 0); 263 264 ASSERT_EQ(chdir("/chroot_dir"), 0); 265 ASSERT_EQ(chroot("."), 0); 266 267 ASSERT_EQ(get_unique_mnt_id("/"), chroot_id); 268 269 fd_tree = sys_open_tree(AT_FDCWD, "/", 270 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); 271 ASSERT_GE(fd_tree, 0); 272 273 clone_id = get_unique_mnt_id_fd(fd_tree); 274 ASSERT_NE(clone_id, 0); 275 ASSERT_NE(clone_id, chroot_id); 276 277 ASSERT_EQ(fchdir(fd_tree), 0); 278 279 ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", 280 MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH); 281 ASSERT_EQ(ret, 0); 282 283 close(fd_tree); 284 285 ASSERT_EQ(chroot("."), 0); 286 287 root_id = get_unique_mnt_id("/"); 288 ASSERT_NE(root_id, 0); 289 ASSERT_EQ(root_id, clone_id); 290 291 ASSERT_EQ(statmount(chroot_id, 0, 0, 292 STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0), 0); 293 ASSERT_EQ(sm.mnt_parent_id, clone_id); 294 295 ASSERT_EQ(umount2(".", MNT_DETACH), 0); 296 } 297 298 /* 299 * Test MNT_LOCKED transfer when mounting beneath rootfs in a user+mount 300 * namespace. After mount-beneath the new root gets MNT_LOCKED and the 301 * old root has MNT_LOCKED cleared so it can be unmounted. 302 */ 303 TEST_F(move_mount, beneath_rootfs_locked_transfer) 304 { 305 int fd_tree, ret; 306 uint64_t clone_id, root_id; 307 308 ASSERT_EQ(setup_userns(), 0); 309 310 ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0); 311 312 fd_tree = sys_open_tree(AT_FDCWD, "/", 313 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | 314 AT_RECURSIVE); 315 ASSERT_GE(fd_tree, 0); 316 317 clone_id = get_unique_mnt_id_fd(fd_tree); 318 ASSERT_NE(clone_id, 0); 319 320 ASSERT_EQ(fchdir(fd_tree), 0); 321 322 ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", 323 MOVE_MOUNT_F_EMPTY_PATH | 324 MOVE_MOUNT_BENEATH); 325 ASSERT_EQ(ret, 0); 326 327 close(fd_tree); 328 329 ASSERT_EQ(chroot("."), 0); 330 331 root_id = get_unique_mnt_id("/"); 332 ASSERT_EQ(root_id, clone_id); 333 334 /* 335 * The old root should be unmountable (MNT_LOCKED was 336 * transferred to the clone). If MNT_LOCKED wasn't 337 * cleared, this would fail with EINVAL. 338 */ 339 ASSERT_EQ(umount2(".", MNT_DETACH), 0); 340 341 /* Verify "/" is still the clone after detaching old root */ 342 root_id = get_unique_mnt_id("/"); 343 ASSERT_EQ(root_id, clone_id); 344 } 345 346 /* 347 * Test containment invariant: after mount-beneath rootfs in a user+mount 348 * namespace, the new root must be MNT_LOCKED. The lock transfer from the 349 * old root preserves containment -- the process cannot unmount the new root 350 * to escape the namespace. 351 */ 352 TEST_F(move_mount, beneath_rootfs_locked_containment) 353 { 354 int fd_tree, ret; 355 uint64_t clone_id, root_id; 356 357 ASSERT_EQ(setup_userns(), 0); 358 359 ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0); 360 361 /* Sanity: rootfs must be locked in the new userns */ 362 ASSERT_EQ(umount2("/", MNT_DETACH), -1); 363 ASSERT_EQ(errno, EINVAL); 364 365 fd_tree = sys_open_tree(AT_FDCWD, "/", 366 OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | 367 AT_RECURSIVE); 368 ASSERT_GE(fd_tree, 0); 369 370 clone_id = get_unique_mnt_id_fd(fd_tree); 371 ASSERT_NE(clone_id, 0); 372 373 ASSERT_EQ(fchdir(fd_tree), 0); 374 375 ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/", 376 MOVE_MOUNT_F_EMPTY_PATH | 377 MOVE_MOUNT_BENEATH); 378 ASSERT_EQ(ret, 0); 379 380 close(fd_tree); 381 382 ASSERT_EQ(chroot("."), 0); 383 384 root_id = get_unique_mnt_id("/"); 385 ASSERT_EQ(root_id, clone_id); 386 387 /* Detach old root (MNT_LOCKED was cleared from it) */ 388 ASSERT_EQ(umount2(".", MNT_DETACH), 0); 389 390 /* Verify "/" is still the clone after detaching old root */ 391 root_id = get_unique_mnt_id("/"); 392 ASSERT_EQ(root_id, clone_id); 393 394 /* 395 * The new root must be locked (MNT_LOCKED was transferred 396 * from the old root). Attempting to unmount it must fail 397 * with EINVAL, preserving the containment invariant. 398 */ 399 ASSERT_EQ(umount2("/", MNT_DETACH), -1); 400 ASSERT_EQ(errno, EINVAL); 401 } 402 403 /* 404 * Test MNT_LOCKED transfer when mounting beneath a non-rootfs locked mount. 405 * Mounts created before unshare(CLONE_NEWUSER | CLONE_NEWNS) become locked 406 * in the new namespace. Mount-beneath transfers the lock from the displaced 407 * mount to the new mount, so the displaced mount can be unmounted. 408 */ 409 TEST_F(move_mount, beneath_non_rootfs_locked_transfer) 410 { 411 int mnt_fd, ret; 412 uint64_t mnt_new_id, mnt_visible_id; 413 414 ASSERT_EQ(setup_locked_overmount(), 0); 415 416 mnt_fd = create_detached_tmpfs(); 417 ASSERT_GE(mnt_fd, 0); 418 419 mnt_new_id = get_unique_mnt_id_fd(mnt_fd); 420 ASSERT_NE(mnt_new_id, 0); 421 422 /* Move mount beneath B (which is locked) */ 423 ret = sys_move_mount(mnt_fd, "", AT_FDCWD, "/mnt_dir", 424 MOVE_MOUNT_F_EMPTY_PATH | 425 MOVE_MOUNT_BENEATH); 426 ASSERT_EQ(ret, 0); 427 428 close(mnt_fd); 429 430 /* 431 * B should now be unmountable (MNT_LOCKED was transferred 432 * to the new mount beneath it). If MNT_LOCKED wasn't 433 * cleared from B, this would fail with EINVAL. 434 */ 435 ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), 0); 436 437 /* Verify the new mount is now visible */ 438 mnt_visible_id = get_unique_mnt_id("/mnt_dir"); 439 ASSERT_EQ(mnt_visible_id, mnt_new_id); 440 } 441 442 /* 443 * Test MNT_LOCKED containment when mounting beneath a non-rootfs mount 444 * that was locked during unshare(CLONE_NEWUSER | CLONE_NEWNS). 445 * Mounts created before unshare become locked in the new namespace. 446 * Mount-beneath transfers the lock, preserving containment: the new 447 * mount cannot be unmounted, but the displaced mount can. 448 */ 449 TEST_F(move_mount, beneath_non_rootfs_locked_containment) 450 { 451 int mnt_fd, ret; 452 uint64_t mnt_new_id, mnt_visible_id; 453 454 ASSERT_EQ(setup_locked_overmount(), 0); 455 456 mnt_fd = create_detached_tmpfs(); 457 ASSERT_GE(mnt_fd, 0); 458 459 mnt_new_id = get_unique_mnt_id_fd(mnt_fd); 460 ASSERT_NE(mnt_new_id, 0); 461 462 /* 463 * Move new tmpfs beneath B at /mnt_dir. 464 * Stack becomes: A -> new -> B 465 * Lock transfers from B to new. 466 */ 467 ret = sys_move_mount(mnt_fd, "", AT_FDCWD, "/mnt_dir", 468 MOVE_MOUNT_F_EMPTY_PATH | 469 MOVE_MOUNT_BENEATH); 470 ASSERT_EQ(ret, 0); 471 472 close(mnt_fd); 473 474 /* 475 * B lost MNT_LOCKED -- unmounting it must succeed. 476 * This reveals the new mount at /mnt_dir. 477 */ 478 ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), 0); 479 480 /* Verify the new mount is now visible */ 481 mnt_visible_id = get_unique_mnt_id("/mnt_dir"); 482 ASSERT_EQ(mnt_visible_id, mnt_new_id); 483 484 /* 485 * The new mount gained MNT_LOCKED -- unmounting it must 486 * fail with EINVAL, preserving the containment invariant. 487 */ 488 ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), -1); 489 ASSERT_EQ(errno, EINVAL); 490 } 491 492 TEST_HARNESS_MAIN 493