xref: /linux/tools/testing/selftests/filesystems/move_mount/move_mount_test.c (revision 7a5f1cd22d47f8ca4b760b6334378ae42c1bd24b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 // Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
3 
4 #define _GNU_SOURCE
5 
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <sched.h>
9 #include <stdio.h>
10 #include <string.h>
11 #include <sys/stat.h>
12 #include <sys/mount.h>
13 #include <unistd.h>
14 #include <sys/syscall.h>
15 
16 #include "../wrappers.h"
17 #include "../utils.h"
18 #include "../statmount/statmount.h"
19 #include "../../kselftest_harness.h"
20 
21 #include <linux/stat.h>
22 
23 #ifndef MOVE_MOUNT_BENEATH
24 #define MOVE_MOUNT_BENEATH 0x00000200
25 #endif
26 
27 static uint64_t get_unique_mnt_id_fd(int fd)
28 {
29 	struct statx sx;
30 	int ret;
31 
32 	ret = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &sx);
33 	if (ret)
34 		return 0;
35 
36 	if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE))
37 		return 0;
38 
39 	return sx.stx_mnt_id;
40 }
41 
42 /*
43  * Create a locked overmount stack at /mnt_dir for testing MNT_LOCKED
44  * transfer on non-rootfs mounts.
45  *
46  * Mounts tmpfs A at /mnt_dir, overmounts with tmpfs B, then enters a
47  * new user+mount namespace where both become locked. Returns the exit
48  * code to use on failure, or 0 on success.
49  */
50 static int setup_locked_overmount(void)
51 {
52 	/* Isolate so mounts don't leak. */
53 	if (unshare(CLONE_NEWNS))
54 		return 1;
55 	if (mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL))
56 		return 2;
57 
58 	/*
59 	 * Create mounts while still in the initial user namespace so
60 	 * they become locked after the subsequent user namespace
61 	 * unshare.
62 	 */
63 	rmdir("/mnt_dir");
64 	if (mkdir("/mnt_dir", 0755))
65 		return 3;
66 
67 	/* Mount tmpfs A */
68 	if (mount("tmpfs", "/mnt_dir", "tmpfs", 0, NULL))
69 		return 4;
70 
71 	/* Overmount with tmpfs B */
72 	if (mount("tmpfs", "/mnt_dir", "tmpfs", 0, NULL))
73 		return 5;
74 
75 	/*
76 	 * Create user+mount namespace. Mounts A and B become locked
77 	 * because they might be covering something that is not supposed
78 	 * to be revealed.
79 	 */
80 	if (setup_userns())
81 		return 6;
82 
83 	/* Sanity check: B must be locked */
84 	if (!umount2("/mnt_dir", MNT_DETACH) || errno != EINVAL)
85 		return 7;
86 
87 	return 0;
88 }
89 
90 /*
91  * Create a detached tmpfs mount and return its fd, or -1 on failure.
92  */
93 static int create_detached_tmpfs(void)
94 {
95 	int fs_fd, mnt_fd;
96 
97 	fs_fd = sys_fsopen("tmpfs", FSOPEN_CLOEXEC);
98 	if (fs_fd < 0)
99 		return -1;
100 
101 	if (sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) {
102 		close(fs_fd);
103 		return -1;
104 	}
105 
106 	mnt_fd = sys_fsmount(fs_fd, FSMOUNT_CLOEXEC, 0);
107 	close(fs_fd);
108 	return mnt_fd;
109 }
110 
111 FIXTURE(move_mount) {
112 	uint64_t orig_root_id;
113 };
114 
115 FIXTURE_SETUP(move_mount)
116 {
117 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
118 
119 	ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0);
120 
121 	self->orig_root_id = get_unique_mnt_id("/");
122 	ASSERT_NE(self->orig_root_id, 0);
123 }
124 
125 FIXTURE_TEARDOWN(move_mount)
126 {
127 }
128 
129 /*
130  * Test successful MOVE_MOUNT_BENEATH on the rootfs.
131  * Mount a clone beneath /, fchdir to the clone, chroot to switch root,
132  * then detach the old root.
133  */
134 TEST_F(move_mount, beneath_rootfs_success)
135 {
136 	int fd_tree, ret;
137 	uint64_t clone_id, root_id;
138 
139 	fd_tree = sys_open_tree(AT_FDCWD, "/",
140 				OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
141 	ASSERT_GE(fd_tree, 0);
142 
143 	clone_id = get_unique_mnt_id_fd(fd_tree);
144 	ASSERT_NE(clone_id, 0);
145 	ASSERT_NE(clone_id, self->orig_root_id);
146 
147 	ASSERT_EQ(fchdir(fd_tree), 0);
148 
149 	ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/",
150 			     MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH);
151 	ASSERT_EQ(ret, 0);
152 
153 	close(fd_tree);
154 
155 	/* Switch root to the clone */
156 	ASSERT_EQ(chroot("."), 0);
157 
158 	/* Verify "/" is now the clone */
159 	root_id = get_unique_mnt_id("/");
160 	ASSERT_NE(root_id, 0);
161 	ASSERT_EQ(root_id, clone_id);
162 
163 	/* Detach old root */
164 	ASSERT_EQ(umount2(".", MNT_DETACH), 0);
165 }
166 
167 /*
168  * Test that after MOVE_MOUNT_BENEATH on the rootfs the old root is
169  * stacked on top of the clone. Verify via statmount that the old
170  * root's parent is the clone.
171  */
172 TEST_F(move_mount, beneath_rootfs_old_root_stacked)
173 {
174 	int fd_tree, ret;
175 	uint64_t clone_id;
176 	struct statmount sm;
177 
178 	fd_tree = sys_open_tree(AT_FDCWD, "/",
179 				OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
180 	ASSERT_GE(fd_tree, 0);
181 
182 	clone_id = get_unique_mnt_id_fd(fd_tree);
183 	ASSERT_NE(clone_id, 0);
184 	ASSERT_NE(clone_id, self->orig_root_id);
185 
186 	ASSERT_EQ(fchdir(fd_tree), 0);
187 
188 	ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/",
189 			     MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH);
190 	ASSERT_EQ(ret, 0);
191 
192 	close(fd_tree);
193 
194 	ASSERT_EQ(chroot("."), 0);
195 
196 	/* Old root's parent should now be the clone */
197 	ASSERT_EQ(statmount(self->orig_root_id, 0, 0,
198 			     STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0), 0);
199 	ASSERT_EQ(sm.mnt_parent_id, clone_id);
200 
201 	ASSERT_EQ(umount2(".", MNT_DETACH), 0);
202 }
203 
204 /*
205  * Test that MOVE_MOUNT_BENEATH on rootfs fails when chroot'd into a
206  * subdirectory of the same mount. The caller's fs->root.dentry doesn't
207  * match mnt->mnt_root so the kernel rejects it.
208  */
209 TEST_F(move_mount, beneath_rootfs_in_chroot_fail)
210 {
211 	int fd_tree, ret;
212 	uint64_t chroot_id, clone_id;
213 
214 	rmdir("/chroot_dir");
215 	ASSERT_EQ(mkdir("/chroot_dir", 0755), 0);
216 
217 	chroot_id = get_unique_mnt_id("/chroot_dir");
218 	ASSERT_NE(chroot_id, 0);
219 	ASSERT_EQ(self->orig_root_id, chroot_id);
220 
221 	ASSERT_EQ(chdir("/chroot_dir"), 0);
222 	ASSERT_EQ(chroot("."), 0);
223 
224 	fd_tree = sys_open_tree(AT_FDCWD, "/",
225 				OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
226 	ASSERT_GE(fd_tree, 0);
227 
228 	clone_id = get_unique_mnt_id_fd(fd_tree);
229 	ASSERT_NE(clone_id, 0);
230 	ASSERT_NE(clone_id, chroot_id);
231 
232 	ASSERT_EQ(fchdir(fd_tree), 0);
233 
234 	/*
235 	 * Should fail: fs->root.dentry (/chroot_dir) doesn't match
236 	 * the mount's mnt_root (/).
237 	 */
238 	ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/",
239 			     MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH);
240 	ASSERT_EQ(ret, -1);
241 	ASSERT_EQ(errno, EINVAL);
242 
243 	close(fd_tree);
244 }
245 
246 /*
247  * Test that MOVE_MOUNT_BENEATH on rootfs succeeds when chroot'd into a
248  * separate tmpfs mount. The caller's root dentry matches the mount's
249  * mnt_root since it's a dedicated mount.
250  */
251 TEST_F(move_mount, beneath_rootfs_in_chroot_success)
252 {
253 	int fd_tree, ret;
254 	uint64_t chroot_id, clone_id, root_id;
255 	struct statmount sm;
256 
257 	rmdir("/chroot_dir");
258 	ASSERT_EQ(mkdir("/chroot_dir", 0755), 0);
259 	ASSERT_EQ(mount("tmpfs", "/chroot_dir", "tmpfs", 0, NULL), 0);
260 
261 	chroot_id = get_unique_mnt_id("/chroot_dir");
262 	ASSERT_NE(chroot_id, 0);
263 
264 	ASSERT_EQ(chdir("/chroot_dir"), 0);
265 	ASSERT_EQ(chroot("."), 0);
266 
267 	ASSERT_EQ(get_unique_mnt_id("/"), chroot_id);
268 
269 	fd_tree = sys_open_tree(AT_FDCWD, "/",
270 				OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
271 	ASSERT_GE(fd_tree, 0);
272 
273 	clone_id = get_unique_mnt_id_fd(fd_tree);
274 	ASSERT_NE(clone_id, 0);
275 	ASSERT_NE(clone_id, chroot_id);
276 
277 	ASSERT_EQ(fchdir(fd_tree), 0);
278 
279 	ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/",
280 			     MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_BENEATH);
281 	ASSERT_EQ(ret, 0);
282 
283 	close(fd_tree);
284 
285 	ASSERT_EQ(chroot("."), 0);
286 
287 	root_id = get_unique_mnt_id("/");
288 	ASSERT_NE(root_id, 0);
289 	ASSERT_EQ(root_id, clone_id);
290 
291 	ASSERT_EQ(statmount(chroot_id, 0, 0,
292 			     STATMOUNT_MNT_BASIC, &sm, sizeof(sm), 0), 0);
293 	ASSERT_EQ(sm.mnt_parent_id, clone_id);
294 
295 	ASSERT_EQ(umount2(".", MNT_DETACH), 0);
296 }
297 
298 /*
299  * Test MNT_LOCKED transfer when mounting beneath rootfs in a user+mount
300  * namespace. After mount-beneath the new root gets MNT_LOCKED and the
301  * old root has MNT_LOCKED cleared so it can be unmounted.
302  */
303 TEST_F(move_mount, beneath_rootfs_locked_transfer)
304 {
305 	int fd_tree, ret;
306 	uint64_t clone_id, root_id;
307 
308 	ASSERT_EQ(setup_userns(), 0);
309 
310 	ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0);
311 
312 	fd_tree = sys_open_tree(AT_FDCWD, "/",
313 				OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
314 				AT_RECURSIVE);
315 	ASSERT_GE(fd_tree, 0);
316 
317 	clone_id = get_unique_mnt_id_fd(fd_tree);
318 	ASSERT_NE(clone_id, 0);
319 
320 	ASSERT_EQ(fchdir(fd_tree), 0);
321 
322 	ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/",
323 			     MOVE_MOUNT_F_EMPTY_PATH |
324 			     MOVE_MOUNT_BENEATH);
325 	ASSERT_EQ(ret, 0);
326 
327 	close(fd_tree);
328 
329 	ASSERT_EQ(chroot("."), 0);
330 
331 	root_id = get_unique_mnt_id("/");
332 	ASSERT_EQ(root_id, clone_id);
333 
334 	/*
335 	 * The old root should be unmountable (MNT_LOCKED was
336 	 * transferred to the clone). If MNT_LOCKED wasn't
337 	 * cleared, this would fail with EINVAL.
338 	 */
339 	ASSERT_EQ(umount2(".", MNT_DETACH), 0);
340 
341 	/* Verify "/" is still the clone after detaching old root */
342 	root_id = get_unique_mnt_id("/");
343 	ASSERT_EQ(root_id, clone_id);
344 }
345 
346 /*
347  * Test containment invariant: after mount-beneath rootfs in a user+mount
348  * namespace, the new root must be MNT_LOCKED. The lock transfer from the
349  * old root preserves containment -- the process cannot unmount the new root
350  * to escape the namespace.
351  */
352 TEST_F(move_mount, beneath_rootfs_locked_containment)
353 {
354 	int fd_tree, ret;
355 	uint64_t clone_id, root_id;
356 
357 	ASSERT_EQ(setup_userns(), 0);
358 
359 	ASSERT_EQ(mount("", "/", NULL, MS_REC | MS_PRIVATE, NULL), 0);
360 
361 	/* Sanity: rootfs must be locked in the new userns */
362 	ASSERT_EQ(umount2("/", MNT_DETACH), -1);
363 	ASSERT_EQ(errno, EINVAL);
364 
365 	fd_tree = sys_open_tree(AT_FDCWD, "/",
366 				OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC |
367 				AT_RECURSIVE);
368 	ASSERT_GE(fd_tree, 0);
369 
370 	clone_id = get_unique_mnt_id_fd(fd_tree);
371 	ASSERT_NE(clone_id, 0);
372 
373 	ASSERT_EQ(fchdir(fd_tree), 0);
374 
375 	ret = sys_move_mount(fd_tree, "", AT_FDCWD, "/",
376 			     MOVE_MOUNT_F_EMPTY_PATH |
377 			     MOVE_MOUNT_BENEATH);
378 	ASSERT_EQ(ret, 0);
379 
380 	close(fd_tree);
381 
382 	ASSERT_EQ(chroot("."), 0);
383 
384 	root_id = get_unique_mnt_id("/");
385 	ASSERT_EQ(root_id, clone_id);
386 
387 	/* Detach old root (MNT_LOCKED was cleared from it) */
388 	ASSERT_EQ(umount2(".", MNT_DETACH), 0);
389 
390 	/* Verify "/" is still the clone after detaching old root */
391 	root_id = get_unique_mnt_id("/");
392 	ASSERT_EQ(root_id, clone_id);
393 
394 	/*
395 	 * The new root must be locked (MNT_LOCKED was transferred
396 	 * from the old root). Attempting to unmount it must fail
397 	 * with EINVAL, preserving the containment invariant.
398 	 */
399 	ASSERT_EQ(umount2("/", MNT_DETACH), -1);
400 	ASSERT_EQ(errno, EINVAL);
401 }
402 
403 /*
404  * Test MNT_LOCKED transfer when mounting beneath a non-rootfs locked mount.
405  * Mounts created before unshare(CLONE_NEWUSER | CLONE_NEWNS) become locked
406  * in the new namespace. Mount-beneath transfers the lock from the displaced
407  * mount to the new mount, so the displaced mount can be unmounted.
408  */
409 TEST_F(move_mount, beneath_non_rootfs_locked_transfer)
410 {
411 	int mnt_fd, ret;
412 	uint64_t mnt_new_id, mnt_visible_id;
413 
414 	ASSERT_EQ(setup_locked_overmount(), 0);
415 
416 	mnt_fd = create_detached_tmpfs();
417 	ASSERT_GE(mnt_fd, 0);
418 
419 	mnt_new_id = get_unique_mnt_id_fd(mnt_fd);
420 	ASSERT_NE(mnt_new_id, 0);
421 
422 	/* Move mount beneath B (which is locked) */
423 	ret = sys_move_mount(mnt_fd, "", AT_FDCWD, "/mnt_dir",
424 			     MOVE_MOUNT_F_EMPTY_PATH |
425 			     MOVE_MOUNT_BENEATH);
426 	ASSERT_EQ(ret, 0);
427 
428 	close(mnt_fd);
429 
430 	/*
431 	 * B should now be unmountable (MNT_LOCKED was transferred
432 	 * to the new mount beneath it). If MNT_LOCKED wasn't
433 	 * cleared from B, this would fail with EINVAL.
434 	 */
435 	ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), 0);
436 
437 	/* Verify the new mount is now visible */
438 	mnt_visible_id = get_unique_mnt_id("/mnt_dir");
439 	ASSERT_EQ(mnt_visible_id, mnt_new_id);
440 }
441 
442 /*
443  * Test MNT_LOCKED containment when mounting beneath a non-rootfs mount
444  * that was locked during unshare(CLONE_NEWUSER | CLONE_NEWNS).
445  * Mounts created before unshare become locked in the new namespace.
446  * Mount-beneath transfers the lock, preserving containment: the new
447  * mount cannot be unmounted, but the displaced mount can.
448  */
449 TEST_F(move_mount, beneath_non_rootfs_locked_containment)
450 {
451 	int mnt_fd, ret;
452 	uint64_t mnt_new_id, mnt_visible_id;
453 
454 	ASSERT_EQ(setup_locked_overmount(), 0);
455 
456 	mnt_fd = create_detached_tmpfs();
457 	ASSERT_GE(mnt_fd, 0);
458 
459 	mnt_new_id = get_unique_mnt_id_fd(mnt_fd);
460 	ASSERT_NE(mnt_new_id, 0);
461 
462 	/*
463 	 * Move new tmpfs beneath B at /mnt_dir.
464 	 * Stack becomes: A -> new -> B
465 	 * Lock transfers from B to new.
466 	 */
467 	ret = sys_move_mount(mnt_fd, "", AT_FDCWD, "/mnt_dir",
468 			     MOVE_MOUNT_F_EMPTY_PATH |
469 			     MOVE_MOUNT_BENEATH);
470 	ASSERT_EQ(ret, 0);
471 
472 	close(mnt_fd);
473 
474 	/*
475 	 * B lost MNT_LOCKED -- unmounting it must succeed.
476 	 * This reveals the new mount at /mnt_dir.
477 	 */
478 	ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), 0);
479 
480 	/* Verify the new mount is now visible */
481 	mnt_visible_id = get_unique_mnt_id("/mnt_dir");
482 	ASSERT_EQ(mnt_visible_id, mnt_new_id);
483 
484 	/*
485 	 * The new mount gained MNT_LOCKED -- unmounting it must
486 	 * fail with EINVAL, preserving the containment invariant.
487 	 */
488 	ASSERT_EQ(umount2("/mnt_dir", MNT_DETACH), -1);
489 	ASSERT_EQ(errno, EINVAL);
490 }
491 
492 TEST_HARNESS_MAIN
493