xref: /linux/tools/testing/selftests/mount_setattr/mount_setattr_test.c (revision 7a012a692e7cfbca245d195a80f23634d3d74fcc)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 #include <linux/mount.h>
22 
23 #include "../filesystems/overlayfs/wrappers.h"
24 #include "../kselftest_harness.h"
25 
26 #ifndef CLONE_NEWNS
27 #define CLONE_NEWNS 0x00020000
28 #endif
29 
30 #ifndef CLONE_NEWUSER
31 #define CLONE_NEWUSER 0x10000000
32 #endif
33 
34 #ifndef MS_REC
35 #define MS_REC 16384
36 #endif
37 
38 #ifndef MS_RELATIME
39 #define MS_RELATIME (1 << 21)
40 #endif
41 
42 #ifndef MS_STRICTATIME
43 #define MS_STRICTATIME (1 << 24)
44 #endif
45 
46 #ifndef MOUNT_ATTR_RDONLY
47 #define MOUNT_ATTR_RDONLY 0x00000001
48 #endif
49 
50 #ifndef MOUNT_ATTR_NOSUID
51 #define MOUNT_ATTR_NOSUID 0x00000002
52 #endif
53 
54 #ifndef MOUNT_ATTR_NOEXEC
55 #define MOUNT_ATTR_NOEXEC 0x00000008
56 #endif
57 
58 #ifndef MOUNT_ATTR_NODIRATIME
59 #define MOUNT_ATTR_NODIRATIME 0x00000080
60 #endif
61 
62 #ifndef MOUNT_ATTR__ATIME
63 #define MOUNT_ATTR__ATIME 0x00000070
64 #endif
65 
66 #ifndef MOUNT_ATTR_RELATIME
67 #define MOUNT_ATTR_RELATIME 0x00000000
68 #endif
69 
70 #ifndef MOUNT_ATTR_NOATIME
71 #define MOUNT_ATTR_NOATIME 0x00000010
72 #endif
73 
74 #ifndef MOUNT_ATTR_STRICTATIME
75 #define MOUNT_ATTR_STRICTATIME 0x00000020
76 #endif
77 
78 #ifndef AT_RECURSIVE
79 #define AT_RECURSIVE 0x8000
80 #endif
81 
82 #ifndef MS_SHARED
83 #define MS_SHARED (1 << 20)
84 #endif
85 
86 #define DEFAULT_THREADS 4
87 #define ptr_to_int(p) ((int)((intptr_t)(p)))
88 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
89 
90 #ifndef __NR_mount_setattr
91 	#if defined __alpha__
92 		#define __NR_mount_setattr 552
93 	#elif defined _MIPS_SIM
94 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
95 			#define __NR_mount_setattr (442 + 4000)
96 		#endif
97 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
98 			#define __NR_mount_setattr (442 + 6000)
99 		#endif
100 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
101 			#define __NR_mount_setattr (442 + 5000)
102 		#endif
103 	#elif defined __ia64__
104 		#define __NR_mount_setattr (442 + 1024)
105 	#else
106 		#define __NR_mount_setattr 442
107 	#endif
108 #endif
109 
110 #ifndef __NR_open_tree
111 	#if defined __alpha__
112 		#define __NR_open_tree 538
113 	#elif defined _MIPS_SIM
114 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
115 			#define __NR_open_tree 4428
116 		#endif
117 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
118 			#define __NR_open_tree 6428
119 		#endif
120 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
121 			#define __NR_open_tree 5428
122 		#endif
123 	#elif defined __ia64__
124 		#define __NR_open_tree (428 + 1024)
125 	#else
126 		#define __NR_open_tree 428
127 	#endif
128 #endif
129 
130 #ifndef __NR_move_mount
131 	#if defined __alpha__
132 		#define __NR_move_mount 539
133 	#elif defined _MIPS_SIM
134 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
135 			#define __NR_move_mount 4429
136 		#endif
137 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
138 			#define __NR_move_mount 6429
139 		#endif
140 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
141 			#define __NR_move_mount 5429
142 		#endif
143 	#elif defined __ia64__
144 		#define __NR_move_mount (428 + 1024)
145 	#else
146 		#define __NR_move_mount 429
147 	#endif
148 #endif
149 
150 #ifndef MOUNT_ATTR_IDMAP
151 #define MOUNT_ATTR_IDMAP 0x00100000
152 #endif
153 
154 #ifndef MOUNT_ATTR_NOSYMFOLLOW
155 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
156 #endif
157 
158 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
159 				    struct mount_attr *attr, size_t size)
160 {
161 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
162 }
163 
164 #ifndef OPEN_TREE_CLONE
165 #define OPEN_TREE_CLONE 1
166 #endif
167 
168 #ifndef OPEN_TREE_CLOEXEC
169 #define OPEN_TREE_CLOEXEC O_CLOEXEC
170 #endif
171 
172 #ifndef AT_RECURSIVE
173 #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
174 #endif
175 
176 static ssize_t write_nointr(int fd, const void *buf, size_t count)
177 {
178 	ssize_t ret;
179 
180 	do {
181 		ret = write(fd, buf, count);
182 	} while (ret < 0 && errno == EINTR);
183 
184 	return ret;
185 }
186 
187 static int write_file(const char *path, const void *buf, size_t count)
188 {
189 	int fd;
190 	ssize_t ret;
191 
192 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
193 	if (fd < 0)
194 		return -1;
195 
196 	ret = write_nointr(fd, buf, count);
197 	close(fd);
198 	if (ret < 0 || (size_t)ret != count)
199 		return -1;
200 
201 	return 0;
202 }
203 
204 static int create_and_enter_userns(void)
205 {
206 	uid_t uid;
207 	gid_t gid;
208 	char map[100];
209 
210 	uid = getuid();
211 	gid = getgid();
212 
213 	if (unshare(CLONE_NEWUSER))
214 		return -1;
215 
216 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
217 	    errno != ENOENT)
218 		return -1;
219 
220 	snprintf(map, sizeof(map), "0 %d 1", uid);
221 	if (write_file("/proc/self/uid_map", map, strlen(map)))
222 		return -1;
223 
224 
225 	snprintf(map, sizeof(map), "0 %d 1", gid);
226 	if (write_file("/proc/self/gid_map", map, strlen(map)))
227 		return -1;
228 
229 	if (setgid(0))
230 		return -1;
231 
232 	if (setuid(0))
233 		return -1;
234 
235 	return 0;
236 }
237 
238 static int prepare_unpriv_mountns(void)
239 {
240 	if (create_and_enter_userns())
241 		return -1;
242 
243 	if (unshare(CLONE_NEWNS))
244 		return -1;
245 
246 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
247 		return -1;
248 
249 	return 0;
250 }
251 
252 #ifndef ST_NOSYMFOLLOW
253 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
254 #endif
255 
256 static int read_mnt_flags(const char *path)
257 {
258 	int ret;
259 	struct statvfs stat;
260 	unsigned int mnt_flags;
261 
262 	ret = statvfs(path, &stat);
263 	if (ret != 0)
264 		return -EINVAL;
265 
266 	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
267 			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
268 			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
269 		return -EINVAL;
270 
271 	mnt_flags = 0;
272 	if (stat.f_flag & ST_RDONLY)
273 		mnt_flags |= MS_RDONLY;
274 	if (stat.f_flag & ST_NOSUID)
275 		mnt_flags |= MS_NOSUID;
276 	if (stat.f_flag & ST_NODEV)
277 		mnt_flags |= MS_NODEV;
278 	if (stat.f_flag & ST_NOEXEC)
279 		mnt_flags |= MS_NOEXEC;
280 	if (stat.f_flag & ST_NOATIME)
281 		mnt_flags |= MS_NOATIME;
282 	if (stat.f_flag & ST_NODIRATIME)
283 		mnt_flags |= MS_NODIRATIME;
284 	if (stat.f_flag & ST_RELATIME)
285 		mnt_flags |= MS_RELATIME;
286 	if (stat.f_flag & ST_SYNCHRONOUS)
287 		mnt_flags |= MS_SYNCHRONOUS;
288 	if (stat.f_flag & ST_MANDLOCK)
289 		mnt_flags |= ST_MANDLOCK;
290 	if (stat.f_flag & ST_NOSYMFOLLOW)
291 		mnt_flags |= ST_NOSYMFOLLOW;
292 
293 	return mnt_flags;
294 }
295 
296 static char *get_field(char *src, int nfields)
297 {
298 	int i;
299 	char *p = src;
300 
301 	for (i = 0; i < nfields; i++) {
302 		while (*p && *p != ' ' && *p != '\t')
303 			p++;
304 
305 		if (!*p)
306 			break;
307 
308 		p++;
309 	}
310 
311 	return p;
312 }
313 
314 static void null_endofword(char *word)
315 {
316 	while (*word && *word != ' ' && *word != '\t')
317 		word++;
318 	*word = '\0';
319 }
320 
321 static bool is_shared_mount(const char *path)
322 {
323 	size_t len = 0;
324 	char *line = NULL;
325 	FILE *f = NULL;
326 
327 	f = fopen("/proc/self/mountinfo", "re");
328 	if (!f)
329 		return false;
330 
331 	while (getline(&line, &len, f) != -1) {
332 		char *opts, *target;
333 
334 		target = get_field(line, 4);
335 		if (!target)
336 			continue;
337 
338 		opts = get_field(target, 2);
339 		if (!opts)
340 			continue;
341 
342 		null_endofword(target);
343 
344 		if (strcmp(target, path) != 0)
345 			continue;
346 
347 		null_endofword(opts);
348 		if (strstr(opts, "shared:"))
349 			return true;
350 	}
351 
352 	free(line);
353 	fclose(f);
354 
355 	return false;
356 }
357 
358 static void *mount_setattr_thread(void *data)
359 {
360 	struct mount_attr attr = {
361 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
362 		.attr_clr	= 0,
363 		.propagation	= MS_SHARED,
364 	};
365 
366 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
367 		pthread_exit(int_to_ptr(-1));
368 
369 	pthread_exit(int_to_ptr(0));
370 }
371 
372 /* Attempt to de-conflict with the selftests tree. */
373 #ifndef SKIP
374 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
375 #endif
376 
377 static bool mount_setattr_supported(void)
378 {
379 	int ret;
380 
381 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
382 	if (ret < 0 && errno == ENOSYS)
383 		return false;
384 
385 	return true;
386 }
387 
388 FIXTURE(mount_setattr) {
389 };
390 
391 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
392 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
393 
394 FIXTURE_SETUP(mount_setattr)
395 {
396 	int fd = -EBADF;
397 
398 	if (!mount_setattr_supported())
399 		SKIP(return, "mount_setattr syscall not supported");
400 
401 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
402 
403 	(void)umount2("/mnt", MNT_DETACH);
404 	(void)umount2("/tmp", MNT_DETACH);
405 
406 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
407 			"size=100000,mode=700"), 0);
408 
409 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
410 
411 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
412 			"size=100000,mode=700"), 0);
413 
414 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
415 
416 	ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
417 
418 	ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
419 
420 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
421 			"size=100000,mode=700"), 0);
422 
423 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
424 			"size=100000,mode=700"), 0);
425 
426 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
427 
428 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
429 			"size=100000,mode=700"), 0);
430 
431 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
432 
433 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
434 
435 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
436 
437 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
438 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
439 
440 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
441 
442 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
443 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
444 
445 	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
446 	ASSERT_GT(fd, 0);
447 	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
448 	ASSERT_EQ(close(fd), 0);
449 }
450 
451 FIXTURE_TEARDOWN(mount_setattr)
452 {
453 	if (!mount_setattr_supported())
454 		SKIP(return, "mount_setattr syscall not supported");
455 
456 	(void)umount2("/mnt/A", MNT_DETACH);
457 	(void)umount2("/tmp", MNT_DETACH);
458 }
459 
460 TEST_F(mount_setattr, invalid_attributes)
461 {
462 	struct mount_attr invalid_attr = {
463 		.attr_set = (1U << 31),
464 	};
465 
466 	if (!mount_setattr_supported())
467 		SKIP(return, "mount_setattr syscall not supported");
468 
469 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
470 				    sizeof(invalid_attr)), 0);
471 
472 	invalid_attr.attr_set	= 0;
473 	invalid_attr.attr_clr	= (1U << 31);
474 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
475 				    sizeof(invalid_attr)), 0);
476 
477 	invalid_attr.attr_clr		= 0;
478 	invalid_attr.propagation	= (1U << 31);
479 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
480 				    sizeof(invalid_attr)), 0);
481 
482 	invalid_attr.attr_set		= (1U << 31);
483 	invalid_attr.attr_clr		= (1U << 31);
484 	invalid_attr.propagation	= (1U << 31);
485 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
486 				    sizeof(invalid_attr)), 0);
487 
488 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
489 				    sizeof(invalid_attr)), 0);
490 }
491 
492 TEST_F(mount_setattr, extensibility)
493 {
494 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
495 	char *s = "dummy";
496 	struct mount_attr invalid_attr = {};
497 	struct mount_attr_large {
498 		struct mount_attr attr1;
499 		struct mount_attr attr2;
500 		struct mount_attr attr3;
501 	} large_attr = {};
502 
503 	if (!mount_setattr_supported())
504 		SKIP(return, "mount_setattr syscall not supported");
505 
506 	old_flags = read_mnt_flags("/mnt/A");
507 	ASSERT_GT(old_flags, 0);
508 
509 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
510 				    sizeof(invalid_attr)), 0);
511 	ASSERT_EQ(errno, EFAULT);
512 
513 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
514 				    sizeof(invalid_attr)), 0);
515 	ASSERT_EQ(errno, EINVAL);
516 
517 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
518 	ASSERT_EQ(errno, EINVAL);
519 
520 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
521 				    sizeof(invalid_attr) / 2), 0);
522 	ASSERT_EQ(errno, EINVAL);
523 
524 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
525 				    sizeof(invalid_attr) / 2), 0);
526 	ASSERT_EQ(errno, EINVAL);
527 
528 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
529 				    (void *)&large_attr, sizeof(large_attr)), 0);
530 
531 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
532 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
533 				    (void *)&large_attr, sizeof(large_attr)), 0);
534 
535 	large_attr.attr3.attr_set = 0;
536 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
537 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
538 				    (void *)&large_attr, sizeof(large_attr)), 0);
539 
540 	expected_flags = old_flags;
541 	expected_flags |= MS_RDONLY;
542 
543 	new_flags = read_mnt_flags("/mnt/A");
544 	ASSERT_EQ(new_flags, expected_flags);
545 
546 	new_flags = read_mnt_flags("/mnt/A/AA");
547 	ASSERT_EQ(new_flags, expected_flags);
548 
549 	new_flags = read_mnt_flags("/mnt/A/AA/B");
550 	ASSERT_EQ(new_flags, expected_flags);
551 
552 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
553 	ASSERT_EQ(new_flags, expected_flags);
554 }
555 
556 TEST_F(mount_setattr, basic)
557 {
558 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
559 	struct mount_attr attr = {
560 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
561 		.attr_clr	= MOUNT_ATTR__ATIME,
562 	};
563 
564 	if (!mount_setattr_supported())
565 		SKIP(return, "mount_setattr syscall not supported");
566 
567 	old_flags = read_mnt_flags("/mnt/A");
568 	ASSERT_GT(old_flags, 0);
569 
570 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
571 
572 	expected_flags = old_flags;
573 	expected_flags |= MS_RDONLY;
574 	expected_flags |= MS_NOEXEC;
575 	expected_flags &= ~MS_NOATIME;
576 	expected_flags |= MS_RELATIME;
577 
578 	new_flags = read_mnt_flags("/mnt/A");
579 	ASSERT_EQ(new_flags, expected_flags);
580 
581 	new_flags = read_mnt_flags("/mnt/A/AA");
582 	ASSERT_EQ(new_flags, old_flags);
583 
584 	new_flags = read_mnt_flags("/mnt/A/AA/B");
585 	ASSERT_EQ(new_flags, old_flags);
586 
587 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
588 	ASSERT_EQ(new_flags, old_flags);
589 }
590 
591 TEST_F(mount_setattr, basic_recursive)
592 {
593 	int fd;
594 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
595 	struct mount_attr attr = {
596 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
597 		.attr_clr	= MOUNT_ATTR__ATIME,
598 	};
599 
600 	if (!mount_setattr_supported())
601 		SKIP(return, "mount_setattr syscall not supported");
602 
603 	old_flags = read_mnt_flags("/mnt/A");
604 	ASSERT_GT(old_flags, 0);
605 
606 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
607 
608 	expected_flags = old_flags;
609 	expected_flags |= MS_RDONLY;
610 	expected_flags |= MS_NOEXEC;
611 	expected_flags &= ~MS_NOATIME;
612 	expected_flags |= MS_RELATIME;
613 
614 	new_flags = read_mnt_flags("/mnt/A");
615 	ASSERT_EQ(new_flags, expected_flags);
616 
617 	new_flags = read_mnt_flags("/mnt/A/AA");
618 	ASSERT_EQ(new_flags, expected_flags);
619 
620 	new_flags = read_mnt_flags("/mnt/A/AA/B");
621 	ASSERT_EQ(new_flags, expected_flags);
622 
623 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
624 	ASSERT_EQ(new_flags, expected_flags);
625 
626 	memset(&attr, 0, sizeof(attr));
627 	attr.attr_clr = MOUNT_ATTR_RDONLY;
628 	attr.propagation = MS_SHARED;
629 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
630 
631 	expected_flags &= ~MS_RDONLY;
632 	new_flags = read_mnt_flags("/mnt/A");
633 	ASSERT_EQ(new_flags, expected_flags);
634 
635 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
636 
637 	new_flags = read_mnt_flags("/mnt/A/AA");
638 	ASSERT_EQ(new_flags, expected_flags);
639 
640 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
641 
642 	new_flags = read_mnt_flags("/mnt/A/AA/B");
643 	ASSERT_EQ(new_flags, expected_flags);
644 
645 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
646 
647 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
648 	ASSERT_EQ(new_flags, expected_flags);
649 
650 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
651 
652 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
653 	ASSERT_GE(fd, 0);
654 
655 	/*
656 	 * We're holding a fd open for writing so this needs to fail somewhere
657 	 * in the middle and the mount options need to be unchanged.
658 	 */
659 	attr.attr_set = MOUNT_ATTR_RDONLY;
660 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
661 
662 	new_flags = read_mnt_flags("/mnt/A");
663 	ASSERT_EQ(new_flags, expected_flags);
664 
665 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
666 
667 	new_flags = read_mnt_flags("/mnt/A/AA");
668 	ASSERT_EQ(new_flags, expected_flags);
669 
670 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
671 
672 	new_flags = read_mnt_flags("/mnt/A/AA/B");
673 	ASSERT_EQ(new_flags, expected_flags);
674 
675 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
676 
677 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
678 	ASSERT_EQ(new_flags, expected_flags);
679 
680 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
681 
682 	EXPECT_EQ(close(fd), 0);
683 }
684 
685 TEST_F(mount_setattr, mount_has_writers)
686 {
687 	int fd, dfd;
688 	unsigned int old_flags = 0, new_flags = 0;
689 	struct mount_attr attr = {
690 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
691 		.attr_clr	= MOUNT_ATTR__ATIME,
692 		.propagation	= MS_SHARED,
693 	};
694 
695 	if (!mount_setattr_supported())
696 		SKIP(return, "mount_setattr syscall not supported");
697 
698 	old_flags = read_mnt_flags("/mnt/A");
699 	ASSERT_GT(old_flags, 0);
700 
701 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
702 	ASSERT_GE(fd, 0);
703 
704 	/*
705 	 * We're holding a fd open to a mount somwhere in the middle so this
706 	 * needs to fail somewhere in the middle. After this the mount options
707 	 * need to be unchanged.
708 	 */
709 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
710 
711 	new_flags = read_mnt_flags("/mnt/A");
712 	ASSERT_EQ(new_flags, old_flags);
713 
714 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
715 
716 	new_flags = read_mnt_flags("/mnt/A/AA");
717 	ASSERT_EQ(new_flags, old_flags);
718 
719 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
720 
721 	new_flags = read_mnt_flags("/mnt/A/AA/B");
722 	ASSERT_EQ(new_flags, old_flags);
723 
724 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
725 
726 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
727 	ASSERT_EQ(new_flags, old_flags);
728 
729 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
730 
731 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
732 	ASSERT_GE(dfd, 0);
733 	EXPECT_EQ(fsync(dfd), 0);
734 	EXPECT_EQ(close(dfd), 0);
735 
736 	EXPECT_EQ(fsync(fd), 0);
737 	EXPECT_EQ(close(fd), 0);
738 
739 	/* All writers are gone so this should succeed. */
740 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
741 }
742 
743 TEST_F(mount_setattr, mixed_mount_options)
744 {
745 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
746 	struct mount_attr attr = {
747 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
748 		.attr_set = MOUNT_ATTR_RELATIME,
749 	};
750 
751 	if (!mount_setattr_supported())
752 		SKIP(return, "mount_setattr syscall not supported");
753 
754 	old_flags1 = read_mnt_flags("/mnt/B");
755 	ASSERT_GT(old_flags1, 0);
756 
757 	old_flags2 = read_mnt_flags("/mnt/B/BB");
758 	ASSERT_GT(old_flags2, 0);
759 
760 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
761 
762 	expected_flags = old_flags2;
763 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
764 	expected_flags |= MS_RELATIME;
765 
766 	new_flags = read_mnt_flags("/mnt/B");
767 	ASSERT_EQ(new_flags, expected_flags);
768 
769 	expected_flags = old_flags2;
770 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
771 	expected_flags |= MS_RELATIME;
772 
773 	new_flags = read_mnt_flags("/mnt/B/BB");
774 	ASSERT_EQ(new_flags, expected_flags);
775 }
776 
777 TEST_F(mount_setattr, time_changes)
778 {
779 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
780 	struct mount_attr attr = {
781 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
782 	};
783 
784 	if (!mount_setattr_supported())
785 		SKIP(return, "mount_setattr syscall not supported");
786 
787 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
788 
789 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
790 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
791 
792 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
793 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
794 
795 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
796 	attr.attr_clr = MOUNT_ATTR__ATIME;
797 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
798 
799 	attr.attr_set = 0;
800 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
801 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
802 
803 	attr.attr_clr = MOUNT_ATTR_NOATIME;
804 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
805 
806 	old_flags = read_mnt_flags("/mnt/A");
807 	ASSERT_GT(old_flags, 0);
808 
809 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
810 	attr.attr_clr = MOUNT_ATTR__ATIME;
811 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
812 
813 	expected_flags = old_flags;
814 	expected_flags |= MS_NOATIME;
815 	expected_flags |= MS_NODIRATIME;
816 
817 	new_flags = read_mnt_flags("/mnt/A");
818 	ASSERT_EQ(new_flags, expected_flags);
819 
820 	new_flags = read_mnt_flags("/mnt/A/AA");
821 	ASSERT_EQ(new_flags, expected_flags);
822 
823 	new_flags = read_mnt_flags("/mnt/A/AA/B");
824 	ASSERT_EQ(new_flags, expected_flags);
825 
826 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
827 	ASSERT_EQ(new_flags, expected_flags);
828 
829 	memset(&attr, 0, sizeof(attr));
830 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
831 	attr.attr_set |= MOUNT_ATTR_RELATIME;
832 	attr.attr_clr |= MOUNT_ATTR__ATIME;
833 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
834 
835 	expected_flags &= ~MS_NOATIME;
836 	expected_flags |= MS_RELATIME;
837 
838 	new_flags = read_mnt_flags("/mnt/A");
839 	ASSERT_EQ(new_flags, expected_flags);
840 
841 	new_flags = read_mnt_flags("/mnt/A/AA");
842 	ASSERT_EQ(new_flags, expected_flags);
843 
844 	new_flags = read_mnt_flags("/mnt/A/AA/B");
845 	ASSERT_EQ(new_flags, expected_flags);
846 
847 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
848 	ASSERT_EQ(new_flags, expected_flags);
849 
850 	memset(&attr, 0, sizeof(attr));
851 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
852 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
853 	attr.attr_clr |= MOUNT_ATTR__ATIME;
854 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
855 
856 	expected_flags &= ~MS_RELATIME;
857 
858 	new_flags = read_mnt_flags("/mnt/A");
859 	ASSERT_EQ(new_flags, expected_flags);
860 
861 	new_flags = read_mnt_flags("/mnt/A/AA");
862 	ASSERT_EQ(new_flags, expected_flags);
863 
864 	new_flags = read_mnt_flags("/mnt/A/AA/B");
865 	ASSERT_EQ(new_flags, expected_flags);
866 
867 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
868 	ASSERT_EQ(new_flags, expected_flags);
869 
870 	memset(&attr, 0, sizeof(attr));
871 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
872 	attr.attr_set |= MOUNT_ATTR_NOATIME;
873 	attr.attr_clr |= MOUNT_ATTR__ATIME;
874 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
875 
876 	expected_flags |= MS_NOATIME;
877 	new_flags = read_mnt_flags("/mnt/A");
878 	ASSERT_EQ(new_flags, expected_flags);
879 
880 	new_flags = read_mnt_flags("/mnt/A/AA");
881 	ASSERT_EQ(new_flags, expected_flags);
882 
883 	new_flags = read_mnt_flags("/mnt/A/AA/B");
884 	ASSERT_EQ(new_flags, expected_flags);
885 
886 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
887 	ASSERT_EQ(new_flags, expected_flags);
888 
889 	memset(&attr, 0, sizeof(attr));
890 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
891 
892 	new_flags = read_mnt_flags("/mnt/A");
893 	ASSERT_EQ(new_flags, expected_flags);
894 
895 	new_flags = read_mnt_flags("/mnt/A/AA");
896 	ASSERT_EQ(new_flags, expected_flags);
897 
898 	new_flags = read_mnt_flags("/mnt/A/AA/B");
899 	ASSERT_EQ(new_flags, expected_flags);
900 
901 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
902 	ASSERT_EQ(new_flags, expected_flags);
903 
904 	memset(&attr, 0, sizeof(attr));
905 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
906 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
907 
908 	expected_flags &= ~MS_NODIRATIME;
909 
910 	new_flags = read_mnt_flags("/mnt/A");
911 	ASSERT_EQ(new_flags, expected_flags);
912 
913 	new_flags = read_mnt_flags("/mnt/A/AA");
914 	ASSERT_EQ(new_flags, expected_flags);
915 
916 	new_flags = read_mnt_flags("/mnt/A/AA/B");
917 	ASSERT_EQ(new_flags, expected_flags);
918 
919 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
920 	ASSERT_EQ(new_flags, expected_flags);
921 }
922 
923 TEST_F(mount_setattr, multi_threaded)
924 {
925 	int i, j, nthreads, ret = 0;
926 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
927 	pthread_attr_t pattr;
928 	pthread_t threads[DEFAULT_THREADS];
929 
930 	if (!mount_setattr_supported())
931 		SKIP(return, "mount_setattr syscall not supported");
932 
933 	old_flags = read_mnt_flags("/mnt/A");
934 	ASSERT_GT(old_flags, 0);
935 
936 	/* Try to change mount options from multiple threads. */
937 	nthreads = get_nprocs_conf();
938 	if (nthreads > DEFAULT_THREADS)
939 		nthreads = DEFAULT_THREADS;
940 
941 	pthread_attr_init(&pattr);
942 	for (i = 0; i < nthreads; i++)
943 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
944 
945 	for (j = 0; j < i; j++) {
946 		void *retptr = NULL;
947 
948 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
949 
950 		ret += ptr_to_int(retptr);
951 		EXPECT_EQ(ret, 0);
952 	}
953 	pthread_attr_destroy(&pattr);
954 
955 	ASSERT_EQ(ret, 0);
956 
957 	expected_flags = old_flags;
958 	expected_flags |= MS_RDONLY;
959 	expected_flags |= MS_NOSUID;
960 	new_flags = read_mnt_flags("/mnt/A");
961 	ASSERT_EQ(new_flags, expected_flags);
962 
963 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
964 
965 	new_flags = read_mnt_flags("/mnt/A/AA");
966 	ASSERT_EQ(new_flags, expected_flags);
967 
968 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
969 
970 	new_flags = read_mnt_flags("/mnt/A/AA/B");
971 	ASSERT_EQ(new_flags, expected_flags);
972 
973 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
974 
975 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
976 	ASSERT_EQ(new_flags, expected_flags);
977 
978 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
979 }
980 
981 TEST_F(mount_setattr, wrong_user_namespace)
982 {
983 	int ret;
984 	struct mount_attr attr = {
985 		.attr_set = MOUNT_ATTR_RDONLY,
986 	};
987 
988 	if (!mount_setattr_supported())
989 		SKIP(return, "mount_setattr syscall not supported");
990 
991 	EXPECT_EQ(create_and_enter_userns(), 0);
992 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
993 	ASSERT_LT(ret, 0);
994 	ASSERT_EQ(errno, EPERM);
995 }
996 
997 TEST_F(mount_setattr, wrong_mount_namespace)
998 {
999 	int fd, ret;
1000 	struct mount_attr attr = {
1001 		.attr_set = MOUNT_ATTR_RDONLY,
1002 	};
1003 
1004 	if (!mount_setattr_supported())
1005 		SKIP(return, "mount_setattr syscall not supported");
1006 
1007 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
1008 	ASSERT_GE(fd, 0);
1009 
1010 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1011 
1012 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
1013 	ASSERT_LT(ret, 0);
1014 	ASSERT_EQ(errno, EINVAL);
1015 }
1016 
1017 FIXTURE(mount_setattr_idmapped) {
1018 };
1019 
1020 FIXTURE_SETUP(mount_setattr_idmapped)
1021 {
1022 	int img_fd = -EBADF;
1023 
1024 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1025 
1026 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1027 
1028 	(void)umount2("/mnt", MNT_DETACH);
1029 	(void)umount2("/tmp", MNT_DETACH);
1030 
1031 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1032 			"size=100000,mode=700"), 0);
1033 
1034 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1035 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1036 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1037 
1038 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1039 			"size=100000,mode=700"), 0);
1040 
1041 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1042 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1043 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1044 
1045 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1046 			"size=100000,mode=700"), 0);
1047 
1048 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1049 			"size=2m,mode=700"), 0);
1050 
1051 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1052 
1053 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1054 			"size=100000,mode=700"), 0);
1055 
1056 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1057 
1058 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1059 
1060 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1061 
1062 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1063 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1064 
1065 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1066 
1067 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1068 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1069 
1070 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1071 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1072 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1073 	ASSERT_GE(img_fd, 0);
1074 	ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
1075 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1076 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1077 	ASSERT_EQ(close(img_fd), 0);
1078 }
1079 
1080 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1081 {
1082 	(void)umount2("/mnt/A", MNT_DETACH);
1083 	(void)umount2("/tmp", MNT_DETACH);
1084 }
1085 
1086 /**
1087  * Validate that negative fd values are rejected.
1088  */
1089 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1090 {
1091 	struct mount_attr attr = {
1092 		.attr_set	= MOUNT_ATTR_IDMAP,
1093 		.userns_fd	= -EBADF,
1094 	};
1095 
1096 	if (!mount_setattr_supported())
1097 		SKIP(return, "mount_setattr syscall not supported");
1098 
1099 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1100 		TH_LOG("failure: created idmapped mount with negative fd");
1101 	}
1102 }
1103 
1104 /**
1105  * Validate that excessively large fd values are rejected.
1106  */
1107 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1108 {
1109 	struct mount_attr attr = {
1110 		.attr_set	= MOUNT_ATTR_IDMAP,
1111 		.userns_fd	= INT64_MAX,
1112 	};
1113 
1114 	if (!mount_setattr_supported())
1115 		SKIP(return, "mount_setattr syscall not supported");
1116 
1117 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1118 		TH_LOG("failure: created idmapped mount with too large fd value");
1119 	}
1120 }
1121 
1122 /**
1123  * Validate that closed fd values are rejected.
1124  */
1125 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1126 {
1127 	int fd;
1128 	struct mount_attr attr = {
1129 		.attr_set = MOUNT_ATTR_IDMAP,
1130 	};
1131 
1132 	if (!mount_setattr_supported())
1133 		SKIP(return, "mount_setattr syscall not supported");
1134 
1135 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1136 	ASSERT_GE(fd, 0);
1137 	ASSERT_GE(close(fd), 0);
1138 
1139 	attr.userns_fd = fd;
1140 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1141 		TH_LOG("failure: created idmapped mount with closed fd");
1142 	}
1143 }
1144 
1145 /**
1146  * Validate that the initial user namespace is rejected.
1147  */
1148 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1149 {
1150 	int open_tree_fd = -EBADF;
1151 	struct mount_attr attr = {
1152 		.attr_set = MOUNT_ATTR_IDMAP,
1153 	};
1154 
1155 	if (!mount_setattr_supported())
1156 		SKIP(return, "mount_setattr syscall not supported");
1157 
1158 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1159 				     AT_NO_AUTOMOUNT |
1160 				     AT_SYMLINK_NOFOLLOW |
1161 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1162 	ASSERT_GE(open_tree_fd, 0);
1163 
1164 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1165 	ASSERT_GE(attr.userns_fd, 0);
1166 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1167 	ASSERT_EQ(errno, EPERM);
1168 	ASSERT_EQ(close(attr.userns_fd), 0);
1169 	ASSERT_EQ(close(open_tree_fd), 0);
1170 }
1171 
1172 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1173 		   unsigned long range)
1174 {
1175 	char map[100], procfile[256];
1176 
1177 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1178 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1179 	if (write_file(procfile, map, strlen(map)))
1180 		return -1;
1181 
1182 
1183 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1184 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1185 	if (write_file(procfile, map, strlen(map)))
1186 		return -1;
1187 
1188 	return 0;
1189 }
1190 
1191 #define __STACK_SIZE (8 * 1024 * 1024)
1192 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1193 {
1194 	void *stack;
1195 
1196 	stack = malloc(__STACK_SIZE);
1197 	if (!stack)
1198 		return -ENOMEM;
1199 
1200 #ifdef __ia64__
1201 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1202 #else
1203 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1204 #endif
1205 }
1206 
1207 static int get_userns_fd_cb(void *data)
1208 {
1209 	return kill(getpid(), SIGSTOP);
1210 }
1211 
1212 static int wait_for_pid(pid_t pid)
1213 {
1214 	int status, ret;
1215 
1216 again:
1217 	ret = waitpid(pid, &status, 0);
1218 	if (ret == -1) {
1219 		if (errno == EINTR)
1220 			goto again;
1221 
1222 		return -1;
1223 	}
1224 
1225 	if (!WIFEXITED(status))
1226 		return -1;
1227 
1228 	return WEXITSTATUS(status);
1229 }
1230 
1231 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1232 {
1233 	int ret;
1234 	pid_t pid;
1235 	char path[256];
1236 
1237 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1238 	if (pid < 0)
1239 		return -errno;
1240 
1241 	ret = map_ids(pid, nsid, hostid, range);
1242 	if (ret < 0)
1243 		return ret;
1244 
1245 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1246 	ret = open(path, O_RDONLY | O_CLOEXEC);
1247 	kill(pid, SIGKILL);
1248 	wait_for_pid(pid);
1249 	return ret;
1250 }
1251 
1252 /**
1253  * Validate that an attached mount in our mount namespace cannot be idmapped.
1254  * (The kernel enforces that the mount's mount namespace and the caller's mount
1255  *  namespace match.)
1256  */
1257 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1258 {
1259 	int open_tree_fd = -EBADF;
1260 	struct mount_attr attr = {
1261 		.attr_set = MOUNT_ATTR_IDMAP,
1262 	};
1263 
1264 	if (!mount_setattr_supported())
1265 		SKIP(return, "mount_setattr syscall not supported");
1266 
1267 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1268 				     AT_EMPTY_PATH |
1269 				     AT_NO_AUTOMOUNT |
1270 				     AT_SYMLINK_NOFOLLOW |
1271 				     OPEN_TREE_CLOEXEC);
1272 	ASSERT_GE(open_tree_fd, 0);
1273 
1274 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1275 	ASSERT_GE(attr.userns_fd, 0);
1276 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1277 	ASSERT_EQ(close(attr.userns_fd), 0);
1278 	ASSERT_EQ(close(open_tree_fd), 0);
1279 }
1280 
1281 /**
1282  * Validate that idmapping a mount is rejected if the mount's mount namespace
1283  * and our mount namespace don't match.
1284  * (The kernel enforces that the mount's mount namespace and the caller's mount
1285  *  namespace match.)
1286  */
1287 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1288 {
1289 	int open_tree_fd = -EBADF;
1290 	struct mount_attr attr = {
1291 		.attr_set = MOUNT_ATTR_IDMAP,
1292 	};
1293 
1294 	if (!mount_setattr_supported())
1295 		SKIP(return, "mount_setattr syscall not supported");
1296 
1297 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1298 				     AT_EMPTY_PATH |
1299 				     AT_NO_AUTOMOUNT |
1300 				     AT_SYMLINK_NOFOLLOW |
1301 				     OPEN_TREE_CLOEXEC);
1302 	ASSERT_GE(open_tree_fd, 0);
1303 
1304 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1305 
1306 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1307 	ASSERT_GE(attr.userns_fd, 0);
1308 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1309 				    sizeof(attr)), 0);
1310 	ASSERT_EQ(close(attr.userns_fd), 0);
1311 	ASSERT_EQ(close(open_tree_fd), 0);
1312 }
1313 
1314 /**
1315  * Validate that an attached mount in our mount namespace can be idmapped.
1316  */
1317 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1318 {
1319 	int open_tree_fd = -EBADF;
1320 	struct mount_attr attr = {
1321 		.attr_set = MOUNT_ATTR_IDMAP,
1322 	};
1323 
1324 	if (!mount_setattr_supported())
1325 		SKIP(return, "mount_setattr syscall not supported");
1326 
1327 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1328 				     AT_EMPTY_PATH |
1329 				     AT_NO_AUTOMOUNT |
1330 				     AT_SYMLINK_NOFOLLOW |
1331 				     OPEN_TREE_CLOEXEC |
1332 				     OPEN_TREE_CLONE);
1333 	ASSERT_GE(open_tree_fd, 0);
1334 
1335 	/* Changing mount properties on a detached mount. */
1336 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1337 	ASSERT_GE(attr.userns_fd, 0);
1338 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1339 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1340 	ASSERT_EQ(close(attr.userns_fd), 0);
1341 	ASSERT_EQ(close(open_tree_fd), 0);
1342 }
1343 
1344 /**
1345  * Validate that a detached mount not in our mount namespace can be idmapped.
1346  */
1347 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1348 {
1349 	int open_tree_fd = -EBADF;
1350 	struct mount_attr attr = {
1351 		.attr_set = MOUNT_ATTR_IDMAP,
1352 	};
1353 
1354 	if (!mount_setattr_supported())
1355 		SKIP(return, "mount_setattr syscall not supported");
1356 
1357 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1358 				     AT_EMPTY_PATH |
1359 				     AT_NO_AUTOMOUNT |
1360 				     AT_SYMLINK_NOFOLLOW |
1361 				     OPEN_TREE_CLOEXEC |
1362 				     OPEN_TREE_CLONE);
1363 	ASSERT_GE(open_tree_fd, 0);
1364 
1365 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1366 
1367 	/* Changing mount properties on a detached mount. */
1368 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1369 	ASSERT_GE(attr.userns_fd, 0);
1370 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1371 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1372 	ASSERT_EQ(close(attr.userns_fd), 0);
1373 	ASSERT_EQ(close(open_tree_fd), 0);
1374 }
1375 
1376 /**
1377  * Validate that currently changing the idmapping of an idmapped mount fails.
1378  */
1379 TEST_F(mount_setattr_idmapped, change_idmapping)
1380 {
1381 	int open_tree_fd = -EBADF;
1382 	struct mount_attr attr = {
1383 		.attr_set = MOUNT_ATTR_IDMAP,
1384 	};
1385 
1386 	if (!mount_setattr_supported())
1387 		SKIP(return, "mount_setattr syscall not supported");
1388 
1389 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1390 				     AT_EMPTY_PATH |
1391 				     AT_NO_AUTOMOUNT |
1392 				     AT_SYMLINK_NOFOLLOW |
1393 				     OPEN_TREE_CLOEXEC |
1394 				     OPEN_TREE_CLONE);
1395 	ASSERT_GE(open_tree_fd, 0);
1396 
1397 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1398 	ASSERT_GE(attr.userns_fd, 0);
1399 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1400 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1401 	ASSERT_EQ(close(attr.userns_fd), 0);
1402 
1403 	/* Change idmapping on a detached mount that is already idmapped. */
1404 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1405 	ASSERT_GE(attr.userns_fd, 0);
1406 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1407 	ASSERT_EQ(close(attr.userns_fd), 0);
1408 	ASSERT_EQ(close(open_tree_fd), 0);
1409 }
1410 
1411 static bool expected_uid_gid(int dfd, const char *path, int flags,
1412 			     uid_t expected_uid, gid_t expected_gid)
1413 {
1414 	int ret;
1415 	struct stat st;
1416 
1417 	ret = fstatat(dfd, path, &st, flags);
1418 	if (ret < 0)
1419 		return false;
1420 
1421 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1422 }
1423 
1424 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1425 {
1426 	int open_tree_fd = -EBADF;
1427 	struct mount_attr attr = {
1428 		.attr_set = MOUNT_ATTR_IDMAP,
1429 	};
1430 
1431 	if (!mount_setattr_supported())
1432 		SKIP(return, "mount_setattr syscall not supported");
1433 
1434 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1435 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1436 
1437 	ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV,
1438 			"size=100000,mode=700"), 0);
1439 
1440 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1441 
1442 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1443 
1444 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1445 				     AT_RECURSIVE |
1446 				     AT_EMPTY_PATH |
1447 				     AT_NO_AUTOMOUNT |
1448 				     AT_SYMLINK_NOFOLLOW |
1449 				     OPEN_TREE_CLOEXEC |
1450 				     OPEN_TREE_CLONE);
1451 	ASSERT_GE(open_tree_fd, 0);
1452 
1453 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1454 	ASSERT_GE(attr.userns_fd, 0);
1455 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1456 	ASSERT_EQ(close(attr.userns_fd), 0);
1457 	ASSERT_EQ(close(open_tree_fd), 0);
1458 
1459 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1460 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1461 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1462 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1463 
1464 	(void)umount2("/mnt/A", MNT_DETACH);
1465 }
1466 
1467 TEST_F(mount_setattr, mount_attr_nosymfollow)
1468 {
1469 	int fd;
1470 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1471 	struct mount_attr attr = {
1472 		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1473 	};
1474 
1475 	if (!mount_setattr_supported())
1476 		SKIP(return, "mount_setattr syscall not supported");
1477 
1478 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1479 	ASSERT_GT(fd, 0);
1480 	ASSERT_EQ(close(fd), 0);
1481 
1482 	old_flags = read_mnt_flags("/mnt/A");
1483 	ASSERT_GT(old_flags, 0);
1484 
1485 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1486 
1487 	expected_flags = old_flags;
1488 	expected_flags |= ST_NOSYMFOLLOW;
1489 
1490 	new_flags = read_mnt_flags("/mnt/A");
1491 	ASSERT_EQ(new_flags, expected_flags);
1492 
1493 	new_flags = read_mnt_flags("/mnt/A/AA");
1494 	ASSERT_EQ(new_flags, expected_flags);
1495 
1496 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1497 	ASSERT_EQ(new_flags, expected_flags);
1498 
1499 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1500 	ASSERT_EQ(new_flags, expected_flags);
1501 
1502 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1503 	ASSERT_LT(fd, 0);
1504 	ASSERT_EQ(errno, ELOOP);
1505 
1506 	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1507 	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1508 
1509 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1510 
1511 	expected_flags &= ~ST_NOSYMFOLLOW;
1512 	new_flags = read_mnt_flags("/mnt/A");
1513 	ASSERT_EQ(new_flags, expected_flags);
1514 
1515 	new_flags = read_mnt_flags("/mnt/A/AA");
1516 	ASSERT_EQ(new_flags, expected_flags);
1517 
1518 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1519 	ASSERT_EQ(new_flags, expected_flags);
1520 
1521 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1522 	ASSERT_EQ(new_flags, expected_flags);
1523 
1524 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1525 	ASSERT_GT(fd, 0);
1526 	ASSERT_EQ(close(fd), 0);
1527 }
1528 
1529 TEST_F(mount_setattr, open_tree_detached)
1530 {
1531 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1532 	struct statx stx;
1533 
1534 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1535 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1536 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1537 				     OPEN_TREE_CLONE);
1538 	ASSERT_GE(fd_tree_base, 0);
1539 	/*
1540 	 * /mnt                   testing tmpfs
1541 	 * |-/mnt/A               testing tmpfs
1542 	 * | `-/mnt/A/AA          testing tmpfs
1543 	 * |   `-/mnt/A/AA/B      testing tmpfs
1544 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1545 	 * `-/mnt/B               testing ramfs
1546 	 */
1547 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1548 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1549 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1550 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1551 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1552 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1553 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1554 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1555 
1556 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1557 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1558 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1559 				       OPEN_TREE_CLONE);
1560 	ASSERT_GE(fd_tree_subdir, 0);
1561 	/*
1562 	 * /AA          testing tmpfs
1563 	 * `-/AA/B      testing tmpfs
1564 	 *   `-/AA/B/BB testing tmpfs
1565 	 */
1566 	ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
1567 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1568 	ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
1569 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1570 
1571 	ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1572 	/*
1573 	 * /tmp/target1          testing tmpfs
1574 	 * `-/tmp/target1/B      testing tmpfs
1575 	 *   `-/tmp/target1/B/BB testing tmpfs
1576 	 */
1577 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1578 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1579 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
1580 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1581 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
1582 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1583 
1584 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
1585 	/*
1586 	 * /tmp/target2                   testing tmpfs
1587 	 * |-/tmp/target2/A               testing tmpfs
1588 	 * | `-/tmp/target2/A/AA          testing tmpfs
1589 	 * |   `-/tmp/target2/A/AA/B      testing tmpfs
1590 	 * |     `-/tmp/target2/A/AA/B/BB testing tmpfs
1591 	 * `-/tmp/target2/B               testing ramfs
1592 	 */
1593 	ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
1594 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1595 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
1596 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1597 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
1598 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1599 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
1600 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1601 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
1602 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1603 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
1604 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1605 
1606 	EXPECT_EQ(close(fd_tree_base), 0);
1607 	EXPECT_EQ(close(fd_tree_subdir), 0);
1608 }
1609 
1610 TEST_F(mount_setattr, open_tree_detached_fail)
1611 {
1612 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1613 	struct statx stx;
1614 
1615 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1616 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1617 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1618 				     OPEN_TREE_CLONE);
1619 	ASSERT_GE(fd_tree_base, 0);
1620 	/*
1621 	 * /mnt                   testing tmpfs
1622 	 * |-/mnt/A               testing tmpfs
1623 	 * | `-/mnt/A/AA          testing tmpfs
1624 	 * |   `-/mnt/A/AA/B      testing tmpfs
1625 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1626 	 * `-/mnt/B               testing ramfs
1627 	 */
1628 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1629 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1630 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1631 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1632 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1633 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1634 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1635 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1636 
1637 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1638 
1639 	/*
1640 	 * The origin mount namespace of the anonymous mount namespace
1641 	 * of @fd_tree_base doesn't match the caller's mount namespace
1642 	 * anymore so creation of another detached mounts must fail.
1643 	 */
1644 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1645 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1646 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1647 				       OPEN_TREE_CLONE);
1648 	ASSERT_LT(fd_tree_subdir, 0);
1649 	ASSERT_EQ(errno, EINVAL);
1650 }
1651 
1652 TEST_F(mount_setattr, open_tree_detached_fail2)
1653 {
1654 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1655 	struct statx stx;
1656 
1657 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1658 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1659 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1660 				     OPEN_TREE_CLONE);
1661 	ASSERT_GE(fd_tree_base, 0);
1662 	/*
1663 	 * /mnt                   testing tmpfs
1664 	 * |-/mnt/A               testing tmpfs
1665 	 * | `-/mnt/A/AA          testing tmpfs
1666 	 * |   `-/mnt/A/AA/B      testing tmpfs
1667 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1668 	 * `-/mnt/B               testing ramfs
1669 	 */
1670 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1671 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1672 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1673 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1674 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1675 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1676 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1677 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1678 
1679 	EXPECT_EQ(create_and_enter_userns(), 0);
1680 
1681 	/*
1682 	 * The caller entered a new user namespace. They will have
1683 	 * CAP_SYS_ADMIN in this user namespace. However, they're still
1684 	 * located in a mount namespace that is owned by an ancestor
1685 	 * user namespace in which they hold no privilege. Creating a
1686 	 * detached mount must thus fail.
1687 	 */
1688 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1689 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1690 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1691 				       OPEN_TREE_CLONE);
1692 	ASSERT_LT(fd_tree_subdir, 0);
1693 	ASSERT_EQ(errno, EPERM);
1694 }
1695 
1696 TEST_F(mount_setattr, open_tree_detached_fail3)
1697 {
1698 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1699 	struct statx stx;
1700 
1701 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1702 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1703 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1704 				     OPEN_TREE_CLONE);
1705 	ASSERT_GE(fd_tree_base, 0);
1706 	/*
1707         * /mnt                   testing tmpfs
1708         * |-/mnt/A               testing tmpfs
1709         * | `-/mnt/A/AA          testing tmpfs
1710         * |   `-/mnt/A/AA/B      testing tmpfs
1711         * |     `-/mnt/A/AA/B/BB testing tmpfs
1712         * `-/mnt/B               testing ramfs
1713         */
1714 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1715 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1716 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1717 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1718 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1719 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1720 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1721 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1722 
1723 	EXPECT_EQ(prepare_unpriv_mountns(), 0);
1724 
1725 	/*
1726         * The caller entered a new mount namespace. They will have
1727         * CAP_SYS_ADMIN in the owning user namespace of their mount
1728         * namespace.
1729         *
1730         * However, the origin mount namespace of the anonymous mount
1731         * namespace of @fd_tree_base doesn't match the caller's mount
1732         * namespace anymore so creation of another detached mounts must
1733         * fail.
1734         */
1735 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1736 			               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1737 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1738 				       OPEN_TREE_CLONE);
1739 	ASSERT_LT(fd_tree_subdir, 0);
1740 	ASSERT_EQ(errno, EINVAL);
1741 }
1742 
1743 TEST_F(mount_setattr, open_tree_subfolder)
1744 {
1745 	int fd_context, fd_tmpfs, fd_tree;
1746 
1747 	fd_context = sys_fsopen("tmpfs", 0);
1748 	ASSERT_GE(fd_context, 0);
1749 
1750 	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
1751 
1752 	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
1753 	ASSERT_GE(fd_tmpfs, 0);
1754 
1755 	EXPECT_EQ(close(fd_context), 0);
1756 
1757 	ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
1758 
1759 	fd_tree = sys_open_tree(fd_tmpfs, "subdir",
1760 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1761 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1762 				OPEN_TREE_CLONE);
1763 	ASSERT_GE(fd_tree, 0);
1764 
1765 	EXPECT_EQ(close(fd_tmpfs), 0);
1766 
1767 	ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
1768 
1769 	ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
1770 
1771 	EXPECT_EQ(close(fd_tree), 0);
1772 
1773 	ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
1774 
1775 	EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
1776 }
1777 
1778 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
1779 {
1780 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1781 	struct statx stx;
1782 
1783 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1784 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1785 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1786 	ASSERT_GE(fd_tree_base, 0);
1787 	/*
1788 	 * /mnt testing tmpfs
1789 	 */
1790 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1791 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1792 
1793 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1794 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1795 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1796 				       OPEN_TREE_CLONE);
1797 	ASSERT_GE(fd_tree_subdir, 0);
1798 	/*
1799 	 * /mnt testing tmpfs
1800 	 */
1801 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1802 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1803 
1804 	/*
1805 	 * /mnt   testing tmpfs
1806 	 * `-/mnt testing tmpfs
1807 	 */
1808 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1809 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
1810 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1811 
1812 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1813 
1814 	EXPECT_EQ(close(fd_tree_base), 0);
1815 	EXPECT_EQ(close(fd_tree_subdir), 0);
1816 }
1817 
1818 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
1819 {
1820 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1821 	struct statx stx;
1822 	__u64 mnt_id = 0;
1823 
1824 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1825 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1826 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1827 	ASSERT_GE(fd_tree_base, 0);
1828 	/*
1829 	 * /mnt testing tmpfs
1830 	 */
1831 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1832 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1833 
1834 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1835 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1836 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1837 				       OPEN_TREE_CLONE);
1838 	ASSERT_GE(fd_tree_subdir, 0);
1839 	/*
1840 	 * /mnt testing tmpfs
1841 	 */
1842 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1843 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1844 
1845 	/*
1846 	 * /mnt   testing tmpfs
1847 	 * `-/mnt testing tmpfs
1848 	 */
1849 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1850 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
1851 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1852 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1853 	mnt_id = stx.stx_mnt_id;
1854 
1855 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1856 
1857 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1858 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
1859 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1860 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1861 	ASSERT_EQ(stx.stx_mnt_id, mnt_id);
1862 
1863 	EXPECT_EQ(close(fd_tree_base), 0);
1864 	EXPECT_EQ(close(fd_tree_subdir), 0);
1865 }
1866 
1867 TEST_F(mount_setattr, move_mount_detached_fail)
1868 {
1869 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1870 	struct statx stx;
1871 
1872 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1873 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1874 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1875 	ASSERT_GE(fd_tree_base, 0);
1876 
1877 	/* Attach the mount to the caller's mount namespace. */
1878 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1879 
1880 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1881 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1882 
1883 	fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
1884 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1885 				       OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1886 	ASSERT_GE(fd_tree_subdir, 0);
1887 	ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
1888 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1889 
1890 	/* Not allowed to move an attached mount to a detached mount. */
1891 	ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1892 	ASSERT_EQ(errno, EINVAL);
1893 
1894 	EXPECT_EQ(close(fd_tree_base), 0);
1895 	EXPECT_EQ(close(fd_tree_subdir), 0);
1896 }
1897 
1898 TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
1899 {
1900 	int fd_tree = -EBADF;
1901 	struct statx stx;
1902 
1903 	fd_tree = sys_open_tree(-EBADF, "/mnt",
1904 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1905 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1906 				OPEN_TREE_CLONE);
1907 	ASSERT_GE(fd_tree, 0);
1908 
1909 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
1910 	/* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
1911 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1912 
1913 	/* Attach the mount to the caller's mount namespace. */
1914 	ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1915 
1916 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1917 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1918 
1919 	ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
1920 
1921 	/*
1922 	 * This tests whether dissolve_on_fput() handles a NULL mount
1923 	 * namespace correctly, i.e., that it doesn't splat.
1924 	 */
1925 	EXPECT_EQ(close(fd_tree), 0);
1926 }
1927 
1928 TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
1929 {
1930 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
1931 
1932 	/*
1933 	 * |-/mnt/A               testing tmpfs
1934 	 *   `-/mnt/A/AA          testing tmpfs
1935 	 *     `-/mnt/A/AA/B      testing tmpfs
1936 	 *       `-/mnt/A/AA/B/BB testing tmpfs
1937 	 */
1938 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
1939 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1940 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1941 				 OPEN_TREE_CLONE);
1942 	ASSERT_GE(fd_tree1, 0);
1943 
1944 	/*
1945 	 * `-/mnt/B testing ramfs
1946 	 */
1947 	fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
1948 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1949 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1950 				 OPEN_TREE_CLONE);
1951 	ASSERT_GE(fd_tree2, 0);
1952 
1953 	/*
1954 	 * Move the source detached mount tree to the target detached
1955 	 * mount tree. This will move all the mounts in the source mount
1956 	 * tree from the source anonymous mount namespace to the target
1957 	 * anonymous mount namespace.
1958 	 *
1959 	 * The source detached mount tree and the target detached mount
1960 	 * tree now both refer to the same anonymous mount namespace.
1961 	 *
1962 	 * |-""                 testing ramfs
1963 	 *   `-""               testing tmpfs
1964 	 *     `-""/AA          testing tmpfs
1965 	 *       `-""/AA/B      testing tmpfs
1966 	 *         `-""/AA/B/BB testing tmpfs
1967 	 */
1968 	ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1969 
1970 	/*
1971 	 * The source detached mount tree @fd_tree1 is now an attached
1972 	 * mount, i.e., it has a parent. Specifically, it now has the
1973 	 * root mount of the mount tree of @fd_tree2 as its parent.
1974 	 *
1975 	 * That means we are no longer allowed to attach it as we only
1976 	 * allow attaching the root of an anonymous mount tree, not
1977 	 * random bits and pieces. Verify that the kernel enforces this.
1978 	 */
1979 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1980 
1981 	/*
1982 	 * Closing the source detached mount tree must not unmount and
1983 	 * free the shared anonymous mount namespace. The kernel will
1984 	 * quickly yell at us because the anonymous mount namespace
1985 	 * won't be empty when it's freed.
1986 	 */
1987 	EXPECT_EQ(close(fd_tree1), 0);
1988 
1989 	/*
1990 	 * Attach the mount tree to a non-anonymous mount namespace.
1991 	 * This can only succeed if closing fd_tree1 had proper
1992 	 * semantics and didn't cause the anonymous mount namespace to
1993 	 * be freed. If it did this will trigger a UAF which will be
1994 	 * visible on any KASAN enabled kernel.
1995 	 *
1996 	 * |-/tmp/target1                 testing ramfs
1997 	 *   `-/tmp/target1               testing tmpfs
1998 	 *     `-/tmp/target1/AA          testing tmpfs
1999 	 *       `-/tmp/target1/AA/B      testing tmpfs
2000 	 *         `-/tmp/target1/AA/B/BB testing tmpfs
2001 	 */
2002 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2003 	EXPECT_EQ(close(fd_tree2), 0);
2004 }
2005 
2006 TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
2007 {
2008 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
2009 
2010 	/*
2011 	 * Copy the following mount tree:
2012 	 *
2013 	 * |-/mnt/A               testing tmpfs
2014 	 *   `-/mnt/A/AA          testing tmpfs
2015 	 *     `-/mnt/A/AA/B      testing tmpfs
2016 	 *       `-/mnt/A/AA/B/BB testing tmpfs
2017 	 */
2018 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
2019 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2020 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2021 				 OPEN_TREE_CLONE);
2022 	ASSERT_GE(fd_tree1, 0);
2023 
2024 	/*
2025 	 * Create an O_PATH file descriptors with a separate struct file
2026 	 * that refers to the same detached mount tree as @fd_tree1
2027 	 */
2028 	fd_tree2 = sys_open_tree(fd_tree1, "",
2029 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2030 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
2031 	ASSERT_GE(fd_tree2, 0);
2032 
2033 	/*
2034 	 * Copy the following mount tree:
2035 	 *
2036 	 * |-/tmp/target1               testing tmpfs
2037 	 *   `-/tmp/target1/AA          testing tmpfs
2038 	 *     `-/tmp/target1/AA/B      testing tmpfs
2039 	 *       `-/tmp/target1/AA/B/BB testing tmpfs
2040 	 */
2041 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2042 
2043 	/*
2044 	 * This must fail as this would mean adding the same mount tree
2045 	 * into the same mount tree.
2046 	 */
2047 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2048 }
2049 
2050 TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
2051 {
2052 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
2053 
2054 	/*
2055 	 * Copy the following mount tree:
2056 	 *
2057 	 * |-/mnt/A               testing tmpfs
2058 	 *   `-/mnt/A/AA          testing tmpfs
2059 	 *     `-/mnt/A/AA/B      testing tmpfs
2060 	 *       `-/mnt/A/AA/B/BB testing tmpfs
2061 	 */
2062 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
2063 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2064 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2065 				 OPEN_TREE_CLONE);
2066 	ASSERT_GE(fd_tree1, 0);
2067 
2068 	/*
2069 	 * Create an O_PATH file descriptors with a separate struct file that
2070 	 * refers to a subtree of the same detached mount tree as @fd_tree1
2071 	 */
2072 	fd_tree2 = sys_open_tree(fd_tree1, "AA",
2073 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2074 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
2075 	ASSERT_GE(fd_tree2, 0);
2076 
2077 	/*
2078 	 * This must fail as it is only possible to attach the root of a
2079 	 * detached mount tree.
2080 	 */
2081 	ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2082 
2083 	ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2084 }
2085 
2086 TEST_F(mount_setattr, detached_tree_propagation)
2087 {
2088 	int fd_tree = -EBADF;
2089 	struct statx stx1, stx2, stx3, stx4;
2090 
2091 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
2092 	ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
2093 
2094 	/*
2095 	 * Copy the following mount tree:
2096 	 *
2097          * /mnt                   testing tmpfs
2098          * |-/mnt/A               testing tmpfs
2099          * | `-/mnt/A/AA          testing tmpfs
2100          * |   `-/mnt/A/AA/B      testing tmpfs
2101          * |     `-/mnt/A/AA/B/BB testing tmpfs
2102          * `-/mnt/B               testing ramfs
2103 	 */
2104 	fd_tree = sys_open_tree(-EBADF, "/mnt",
2105 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2106 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2107 				 OPEN_TREE_CLONE);
2108 	ASSERT_GE(fd_tree, 0);
2109 
2110 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
2111 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
2112 
2113 	/*
2114 	 * Copying the mount namespace like done above doesn't alter the
2115 	 * mounts in any way so the filesystem mounted on /mnt must be
2116 	 * identical even though the mounts will differ. Use the device
2117 	 * information to verify that. Note that tmpfs will have a 0
2118 	 * major number so comparing the major number is misleading.
2119 	 */
2120 	ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
2121 
2122 	/* Mount a tmpfs filesystem over /mnt/A. */
2123 	ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
2124 
2125 
2126 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
2127 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
2128 
2129 	/*
2130 	 * A new filesystem has been mounted on top of /mnt/A which
2131 	 * means that the device information will be different for any
2132 	 * statx() that was taken from /mnt/A before the mount compared
2133 	 * to one after the mount.
2134 	 *
2135 	 * Since we already now that the device information between the
2136 	 * stx1 and stx2 samples are identical we also now that stx2 and
2137 	 * stx3 device information will necessarily differ.
2138 	 */
2139 	ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
2140 
2141 	/*
2142 	 * If mount propagation worked correctly then the tmpfs mount
2143 	 * that was created after the mount namespace was unshared will
2144 	 * have propagated onto /mnt/A in the detached mount tree.
2145 	 *
2146 	 * Verify that the device information for stx3 and stx4 are
2147 	 * identical. It is already established that stx3 is different
2148 	 * from both stx1 and stx2 sampled before the tmpfs mount was
2149 	 * done so if stx3 and stx4 are identical the proof is done.
2150 	 */
2151 	ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
2152 
2153 	EXPECT_EQ(close(fd_tree), 0);
2154 }
2155 
2156 TEST_HARNESS_MAIN
2157