xref: /linux/tools/testing/selftests/mount_setattr/mount_setattr_test.c (revision e0c0ab04f6785abaa71b9b8dc252cb1a2072c225)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 #include <linux/mount.h>
22 
23 #include "../filesystems/wrappers.h"
24 #include "../kselftest_harness.h"
25 
26 #ifndef CLONE_NEWNS
27 #define CLONE_NEWNS 0x00020000
28 #endif
29 
30 #ifndef CLONE_NEWUSER
31 #define CLONE_NEWUSER 0x10000000
32 #endif
33 
34 #ifndef MS_REC
35 #define MS_REC 16384
36 #endif
37 
38 #ifndef MS_RELATIME
39 #define MS_RELATIME (1 << 21)
40 #endif
41 
42 #ifndef MS_STRICTATIME
43 #define MS_STRICTATIME (1 << 24)
44 #endif
45 
46 #ifndef MOUNT_ATTR_RDONLY
47 #define MOUNT_ATTR_RDONLY 0x00000001
48 #endif
49 
50 #ifndef MOUNT_ATTR_NOSUID
51 #define MOUNT_ATTR_NOSUID 0x00000002
52 #endif
53 
54 #ifndef MOUNT_ATTR_NOEXEC
55 #define MOUNT_ATTR_NOEXEC 0x00000008
56 #endif
57 
58 #ifndef MOUNT_ATTR_NODIRATIME
59 #define MOUNT_ATTR_NODIRATIME 0x00000080
60 #endif
61 
62 #ifndef MOUNT_ATTR__ATIME
63 #define MOUNT_ATTR__ATIME 0x00000070
64 #endif
65 
66 #ifndef MOUNT_ATTR_RELATIME
67 #define MOUNT_ATTR_RELATIME 0x00000000
68 #endif
69 
70 #ifndef MOUNT_ATTR_NOATIME
71 #define MOUNT_ATTR_NOATIME 0x00000010
72 #endif
73 
74 #ifndef MOUNT_ATTR_STRICTATIME
75 #define MOUNT_ATTR_STRICTATIME 0x00000020
76 #endif
77 
78 #ifndef AT_RECURSIVE
79 #define AT_RECURSIVE 0x8000
80 #endif
81 
82 #ifndef MS_SHARED
83 #define MS_SHARED (1 << 20)
84 #endif
85 
86 #define DEFAULT_THREADS 4
87 #define ptr_to_int(p) ((int)((intptr_t)(p)))
88 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
89 
90 #ifndef __NR_mount_setattr
91 	#if defined __alpha__
92 		#define __NR_mount_setattr 552
93 	#elif defined _MIPS_SIM
94 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
95 			#define __NR_mount_setattr (442 + 4000)
96 		#endif
97 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
98 			#define __NR_mount_setattr (442 + 6000)
99 		#endif
100 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
101 			#define __NR_mount_setattr (442 + 5000)
102 		#endif
103 	#elif defined __ia64__
104 		#define __NR_mount_setattr (442 + 1024)
105 	#else
106 		#define __NR_mount_setattr 442
107 	#endif
108 #endif
109 
110 #ifndef MOUNT_ATTR_IDMAP
111 #define MOUNT_ATTR_IDMAP 0x00100000
112 #endif
113 
114 #ifndef MOUNT_ATTR_NOSYMFOLLOW
115 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
116 #endif
117 
118 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
119 				    struct mount_attr *attr, size_t size)
120 {
121 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
122 }
123 
124 static ssize_t write_nointr(int fd, const void *buf, size_t count)
125 {
126 	ssize_t ret;
127 
128 	do {
129 		ret = write(fd, buf, count);
130 	} while (ret < 0 && errno == EINTR);
131 
132 	return ret;
133 }
134 
135 static int write_file(const char *path, const void *buf, size_t count)
136 {
137 	int fd;
138 	ssize_t ret;
139 
140 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
141 	if (fd < 0)
142 		return -1;
143 
144 	ret = write_nointr(fd, buf, count);
145 	close(fd);
146 	if (ret < 0 || (size_t)ret != count)
147 		return -1;
148 
149 	return 0;
150 }
151 
152 static int create_and_enter_userns(void)
153 {
154 	uid_t uid;
155 	gid_t gid;
156 	char map[100];
157 
158 	uid = getuid();
159 	gid = getgid();
160 
161 	if (unshare(CLONE_NEWUSER))
162 		return -1;
163 
164 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
165 	    errno != ENOENT)
166 		return -1;
167 
168 	snprintf(map, sizeof(map), "0 %d 1", uid);
169 	if (write_file("/proc/self/uid_map", map, strlen(map)))
170 		return -1;
171 
172 
173 	snprintf(map, sizeof(map), "0 %d 1", gid);
174 	if (write_file("/proc/self/gid_map", map, strlen(map)))
175 		return -1;
176 
177 	if (setgid(0))
178 		return -1;
179 
180 	if (setuid(0))
181 		return -1;
182 
183 	return 0;
184 }
185 
186 static int prepare_unpriv_mountns(void)
187 {
188 	if (create_and_enter_userns())
189 		return -1;
190 
191 	if (unshare(CLONE_NEWNS))
192 		return -1;
193 
194 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
195 		return -1;
196 
197 	return 0;
198 }
199 
200 #ifndef ST_NOSYMFOLLOW
201 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
202 #endif
203 
204 static int read_mnt_flags(const char *path)
205 {
206 	int ret;
207 	struct statvfs stat;
208 	unsigned int mnt_flags;
209 
210 	ret = statvfs(path, &stat);
211 	if (ret != 0)
212 		return -EINVAL;
213 
214 	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
215 			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
216 			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
217 		return -EINVAL;
218 
219 	mnt_flags = 0;
220 	if (stat.f_flag & ST_RDONLY)
221 		mnt_flags |= MS_RDONLY;
222 	if (stat.f_flag & ST_NOSUID)
223 		mnt_flags |= MS_NOSUID;
224 	if (stat.f_flag & ST_NODEV)
225 		mnt_flags |= MS_NODEV;
226 	if (stat.f_flag & ST_NOEXEC)
227 		mnt_flags |= MS_NOEXEC;
228 	if (stat.f_flag & ST_NOATIME)
229 		mnt_flags |= MS_NOATIME;
230 	if (stat.f_flag & ST_NODIRATIME)
231 		mnt_flags |= MS_NODIRATIME;
232 	if (stat.f_flag & ST_RELATIME)
233 		mnt_flags |= MS_RELATIME;
234 	if (stat.f_flag & ST_SYNCHRONOUS)
235 		mnt_flags |= MS_SYNCHRONOUS;
236 	if (stat.f_flag & ST_MANDLOCK)
237 		mnt_flags |= ST_MANDLOCK;
238 	if (stat.f_flag & ST_NOSYMFOLLOW)
239 		mnt_flags |= ST_NOSYMFOLLOW;
240 
241 	return mnt_flags;
242 }
243 
244 static char *get_field(char *src, int nfields)
245 {
246 	int i;
247 	char *p = src;
248 
249 	for (i = 0; i < nfields; i++) {
250 		while (*p && *p != ' ' && *p != '\t')
251 			p++;
252 
253 		if (!*p)
254 			break;
255 
256 		p++;
257 	}
258 
259 	return p;
260 }
261 
262 static void null_endofword(char *word)
263 {
264 	while (*word && *word != ' ' && *word != '\t')
265 		word++;
266 	*word = '\0';
267 }
268 
269 static bool is_shared_mount(const char *path)
270 {
271 	size_t len = 0;
272 	char *line = NULL;
273 	FILE *f = NULL;
274 
275 	f = fopen("/proc/self/mountinfo", "re");
276 	if (!f)
277 		return false;
278 
279 	while (getline(&line, &len, f) != -1) {
280 		char *opts, *target;
281 
282 		target = get_field(line, 4);
283 		if (!target)
284 			continue;
285 
286 		opts = get_field(target, 2);
287 		if (!opts)
288 			continue;
289 
290 		null_endofword(target);
291 
292 		if (strcmp(target, path) != 0)
293 			continue;
294 
295 		null_endofword(opts);
296 		if (strstr(opts, "shared:"))
297 			return true;
298 	}
299 
300 	free(line);
301 	fclose(f);
302 
303 	return false;
304 }
305 
306 static void *mount_setattr_thread(void *data)
307 {
308 	struct mount_attr attr = {
309 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
310 		.attr_clr	= 0,
311 		.propagation	= MS_SHARED,
312 	};
313 
314 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
315 		pthread_exit(int_to_ptr(-1));
316 
317 	pthread_exit(int_to_ptr(0));
318 }
319 
320 /* Attempt to de-conflict with the selftests tree. */
321 #ifndef SKIP
322 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
323 #endif
324 
325 static bool mount_setattr_supported(void)
326 {
327 	int ret;
328 
329 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
330 	if (ret < 0 && errno == ENOSYS)
331 		return false;
332 
333 	return true;
334 }
335 
336 FIXTURE(mount_setattr) {
337 };
338 
339 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
340 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
341 
342 FIXTURE_SETUP(mount_setattr)
343 {
344 	int fd = -EBADF;
345 
346 	if (!mount_setattr_supported())
347 		SKIP(return, "mount_setattr syscall not supported");
348 
349 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
350 
351 	(void)umount2("/mnt", MNT_DETACH);
352 	(void)umount2("/tmp", MNT_DETACH);
353 
354 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
355 			"size=100000,mode=700"), 0);
356 
357 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
358 
359 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
360 			"size=100000,mode=700"), 0);
361 
362 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
363 
364 	ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
365 
366 	ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
367 
368 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
369 			"size=100000,mode=700"), 0);
370 
371 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
372 			"size=100000,mode=700"), 0);
373 
374 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
375 
376 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
377 			"size=100000,mode=700"), 0);
378 
379 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
380 
381 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
382 
383 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
384 
385 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
386 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
387 
388 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
389 
390 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
391 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
392 
393 	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
394 	ASSERT_GT(fd, 0);
395 	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
396 	ASSERT_EQ(close(fd), 0);
397 }
398 
399 FIXTURE_TEARDOWN(mount_setattr)
400 {
401 	if (!mount_setattr_supported())
402 		SKIP(return, "mount_setattr syscall not supported");
403 
404 	(void)umount2("/mnt/A", MNT_DETACH);
405 	(void)umount2("/tmp", MNT_DETACH);
406 }
407 
408 TEST_F(mount_setattr, invalid_attributes)
409 {
410 	struct mount_attr invalid_attr = {
411 		.attr_set = (1U << 31),
412 	};
413 
414 	if (!mount_setattr_supported())
415 		SKIP(return, "mount_setattr syscall not supported");
416 
417 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
418 				    sizeof(invalid_attr)), 0);
419 
420 	invalid_attr.attr_set	= 0;
421 	invalid_attr.attr_clr	= (1U << 31);
422 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
423 				    sizeof(invalid_attr)), 0);
424 
425 	invalid_attr.attr_clr		= 0;
426 	invalid_attr.propagation	= (1U << 31);
427 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
428 				    sizeof(invalid_attr)), 0);
429 
430 	invalid_attr.attr_set		= (1U << 31);
431 	invalid_attr.attr_clr		= (1U << 31);
432 	invalid_attr.propagation	= (1U << 31);
433 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
434 				    sizeof(invalid_attr)), 0);
435 
436 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
437 				    sizeof(invalid_attr)), 0);
438 }
439 
440 TEST_F(mount_setattr, extensibility)
441 {
442 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
443 	char *s = "dummy";
444 	struct mount_attr invalid_attr = {};
445 	struct mount_attr_large {
446 		struct mount_attr attr1;
447 		struct mount_attr attr2;
448 		struct mount_attr attr3;
449 	} large_attr = {};
450 
451 	if (!mount_setattr_supported())
452 		SKIP(return, "mount_setattr syscall not supported");
453 
454 	old_flags = read_mnt_flags("/mnt/A");
455 	ASSERT_GT(old_flags, 0);
456 
457 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
458 				    sizeof(invalid_attr)), 0);
459 	ASSERT_EQ(errno, EFAULT);
460 
461 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
462 				    sizeof(invalid_attr)), 0);
463 	ASSERT_EQ(errno, EINVAL);
464 
465 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
466 	ASSERT_EQ(errno, EINVAL);
467 
468 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
469 				    sizeof(invalid_attr) / 2), 0);
470 	ASSERT_EQ(errno, EINVAL);
471 
472 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
473 				    sizeof(invalid_attr) / 2), 0);
474 	ASSERT_EQ(errno, EINVAL);
475 
476 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
477 				    (void *)&large_attr, sizeof(large_attr)), 0);
478 
479 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
480 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
481 				    (void *)&large_attr, sizeof(large_attr)), 0);
482 
483 	large_attr.attr3.attr_set = 0;
484 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
485 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
486 				    (void *)&large_attr, sizeof(large_attr)), 0);
487 
488 	expected_flags = old_flags;
489 	expected_flags |= MS_RDONLY;
490 
491 	new_flags = read_mnt_flags("/mnt/A");
492 	ASSERT_EQ(new_flags, expected_flags);
493 
494 	new_flags = read_mnt_flags("/mnt/A/AA");
495 	ASSERT_EQ(new_flags, expected_flags);
496 
497 	new_flags = read_mnt_flags("/mnt/A/AA/B");
498 	ASSERT_EQ(new_flags, expected_flags);
499 
500 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
501 	ASSERT_EQ(new_flags, expected_flags);
502 }
503 
504 TEST_F(mount_setattr, basic)
505 {
506 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
507 	struct mount_attr attr = {
508 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
509 		.attr_clr	= MOUNT_ATTR__ATIME,
510 	};
511 
512 	if (!mount_setattr_supported())
513 		SKIP(return, "mount_setattr syscall not supported");
514 
515 	old_flags = read_mnt_flags("/mnt/A");
516 	ASSERT_GT(old_flags, 0);
517 
518 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
519 
520 	expected_flags = old_flags;
521 	expected_flags |= MS_RDONLY;
522 	expected_flags |= MS_NOEXEC;
523 	expected_flags &= ~MS_NOATIME;
524 	expected_flags |= MS_RELATIME;
525 
526 	new_flags = read_mnt_flags("/mnt/A");
527 	ASSERT_EQ(new_flags, expected_flags);
528 
529 	new_flags = read_mnt_flags("/mnt/A/AA");
530 	ASSERT_EQ(new_flags, old_flags);
531 
532 	new_flags = read_mnt_flags("/mnt/A/AA/B");
533 	ASSERT_EQ(new_flags, old_flags);
534 
535 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
536 	ASSERT_EQ(new_flags, old_flags);
537 }
538 
539 TEST_F(mount_setattr, basic_recursive)
540 {
541 	int fd;
542 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
543 	struct mount_attr attr = {
544 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
545 		.attr_clr	= MOUNT_ATTR__ATIME,
546 	};
547 
548 	if (!mount_setattr_supported())
549 		SKIP(return, "mount_setattr syscall not supported");
550 
551 	old_flags = read_mnt_flags("/mnt/A");
552 	ASSERT_GT(old_flags, 0);
553 
554 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
555 
556 	expected_flags = old_flags;
557 	expected_flags |= MS_RDONLY;
558 	expected_flags |= MS_NOEXEC;
559 	expected_flags &= ~MS_NOATIME;
560 	expected_flags |= MS_RELATIME;
561 
562 	new_flags = read_mnt_flags("/mnt/A");
563 	ASSERT_EQ(new_flags, expected_flags);
564 
565 	new_flags = read_mnt_flags("/mnt/A/AA");
566 	ASSERT_EQ(new_flags, expected_flags);
567 
568 	new_flags = read_mnt_flags("/mnt/A/AA/B");
569 	ASSERT_EQ(new_flags, expected_flags);
570 
571 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
572 	ASSERT_EQ(new_flags, expected_flags);
573 
574 	memset(&attr, 0, sizeof(attr));
575 	attr.attr_clr = MOUNT_ATTR_RDONLY;
576 	attr.propagation = MS_SHARED;
577 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
578 
579 	expected_flags &= ~MS_RDONLY;
580 	new_flags = read_mnt_flags("/mnt/A");
581 	ASSERT_EQ(new_flags, expected_flags);
582 
583 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
584 
585 	new_flags = read_mnt_flags("/mnt/A/AA");
586 	ASSERT_EQ(new_flags, expected_flags);
587 
588 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
589 
590 	new_flags = read_mnt_flags("/mnt/A/AA/B");
591 	ASSERT_EQ(new_flags, expected_flags);
592 
593 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
594 
595 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
596 	ASSERT_EQ(new_flags, expected_flags);
597 
598 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
599 
600 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
601 	ASSERT_GE(fd, 0);
602 
603 	/*
604 	 * We're holding a fd open for writing so this needs to fail somewhere
605 	 * in the middle and the mount options need to be unchanged.
606 	 */
607 	attr.attr_set = MOUNT_ATTR_RDONLY;
608 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
609 
610 	new_flags = read_mnt_flags("/mnt/A");
611 	ASSERT_EQ(new_flags, expected_flags);
612 
613 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
614 
615 	new_flags = read_mnt_flags("/mnt/A/AA");
616 	ASSERT_EQ(new_flags, expected_flags);
617 
618 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
619 
620 	new_flags = read_mnt_flags("/mnt/A/AA/B");
621 	ASSERT_EQ(new_flags, expected_flags);
622 
623 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
624 
625 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
626 	ASSERT_EQ(new_flags, expected_flags);
627 
628 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
629 
630 	EXPECT_EQ(close(fd), 0);
631 }
632 
633 TEST_F(mount_setattr, mount_has_writers)
634 {
635 	int fd, dfd;
636 	unsigned int old_flags = 0, new_flags = 0;
637 	struct mount_attr attr = {
638 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
639 		.attr_clr	= MOUNT_ATTR__ATIME,
640 		.propagation	= MS_SHARED,
641 	};
642 
643 	if (!mount_setattr_supported())
644 		SKIP(return, "mount_setattr syscall not supported");
645 
646 	old_flags = read_mnt_flags("/mnt/A");
647 	ASSERT_GT(old_flags, 0);
648 
649 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
650 	ASSERT_GE(fd, 0);
651 
652 	/*
653 	 * We're holding a fd open to a mount somwhere in the middle so this
654 	 * needs to fail somewhere in the middle. After this the mount options
655 	 * need to be unchanged.
656 	 */
657 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
658 
659 	new_flags = read_mnt_flags("/mnt/A");
660 	ASSERT_EQ(new_flags, old_flags);
661 
662 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
663 
664 	new_flags = read_mnt_flags("/mnt/A/AA");
665 	ASSERT_EQ(new_flags, old_flags);
666 
667 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
668 
669 	new_flags = read_mnt_flags("/mnt/A/AA/B");
670 	ASSERT_EQ(new_flags, old_flags);
671 
672 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
673 
674 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
675 	ASSERT_EQ(new_flags, old_flags);
676 
677 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
678 
679 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
680 	ASSERT_GE(dfd, 0);
681 	EXPECT_EQ(fsync(dfd), 0);
682 	EXPECT_EQ(close(dfd), 0);
683 
684 	EXPECT_EQ(fsync(fd), 0);
685 	EXPECT_EQ(close(fd), 0);
686 
687 	/* All writers are gone so this should succeed. */
688 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
689 }
690 
691 TEST_F(mount_setattr, mixed_mount_options)
692 {
693 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
694 	struct mount_attr attr = {
695 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
696 		.attr_set = MOUNT_ATTR_RELATIME,
697 	};
698 
699 	if (!mount_setattr_supported())
700 		SKIP(return, "mount_setattr syscall not supported");
701 
702 	old_flags1 = read_mnt_flags("/mnt/B");
703 	ASSERT_GT(old_flags1, 0);
704 
705 	old_flags2 = read_mnt_flags("/mnt/B/BB");
706 	ASSERT_GT(old_flags2, 0);
707 
708 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
709 
710 	expected_flags = old_flags2;
711 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
712 	expected_flags |= MS_RELATIME;
713 
714 	new_flags = read_mnt_flags("/mnt/B");
715 	ASSERT_EQ(new_flags, expected_flags);
716 
717 	expected_flags = old_flags2;
718 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
719 	expected_flags |= MS_RELATIME;
720 
721 	new_flags = read_mnt_flags("/mnt/B/BB");
722 	ASSERT_EQ(new_flags, expected_flags);
723 }
724 
725 TEST_F(mount_setattr, time_changes)
726 {
727 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
728 	struct mount_attr attr = {
729 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
730 	};
731 
732 	if (!mount_setattr_supported())
733 		SKIP(return, "mount_setattr syscall not supported");
734 
735 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
736 
737 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
738 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
739 
740 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
741 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
742 
743 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
744 	attr.attr_clr = MOUNT_ATTR__ATIME;
745 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
746 
747 	attr.attr_set = 0;
748 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
749 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
750 
751 	attr.attr_clr = MOUNT_ATTR_NOATIME;
752 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
753 
754 	old_flags = read_mnt_flags("/mnt/A");
755 	ASSERT_GT(old_flags, 0);
756 
757 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
758 	attr.attr_clr = MOUNT_ATTR__ATIME;
759 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
760 
761 	expected_flags = old_flags;
762 	expected_flags |= MS_NOATIME;
763 	expected_flags |= MS_NODIRATIME;
764 
765 	new_flags = read_mnt_flags("/mnt/A");
766 	ASSERT_EQ(new_flags, expected_flags);
767 
768 	new_flags = read_mnt_flags("/mnt/A/AA");
769 	ASSERT_EQ(new_flags, expected_flags);
770 
771 	new_flags = read_mnt_flags("/mnt/A/AA/B");
772 	ASSERT_EQ(new_flags, expected_flags);
773 
774 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
775 	ASSERT_EQ(new_flags, expected_flags);
776 
777 	memset(&attr, 0, sizeof(attr));
778 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
779 	attr.attr_set |= MOUNT_ATTR_RELATIME;
780 	attr.attr_clr |= MOUNT_ATTR__ATIME;
781 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
782 
783 	expected_flags &= ~MS_NOATIME;
784 	expected_flags |= MS_RELATIME;
785 
786 	new_flags = read_mnt_flags("/mnt/A");
787 	ASSERT_EQ(new_flags, expected_flags);
788 
789 	new_flags = read_mnt_flags("/mnt/A/AA");
790 	ASSERT_EQ(new_flags, expected_flags);
791 
792 	new_flags = read_mnt_flags("/mnt/A/AA/B");
793 	ASSERT_EQ(new_flags, expected_flags);
794 
795 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
796 	ASSERT_EQ(new_flags, expected_flags);
797 
798 	memset(&attr, 0, sizeof(attr));
799 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
800 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
801 	attr.attr_clr |= MOUNT_ATTR__ATIME;
802 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
803 
804 	expected_flags &= ~MS_RELATIME;
805 
806 	new_flags = read_mnt_flags("/mnt/A");
807 	ASSERT_EQ(new_flags, expected_flags);
808 
809 	new_flags = read_mnt_flags("/mnt/A/AA");
810 	ASSERT_EQ(new_flags, expected_flags);
811 
812 	new_flags = read_mnt_flags("/mnt/A/AA/B");
813 	ASSERT_EQ(new_flags, expected_flags);
814 
815 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
816 	ASSERT_EQ(new_flags, expected_flags);
817 
818 	memset(&attr, 0, sizeof(attr));
819 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
820 	attr.attr_set |= MOUNT_ATTR_NOATIME;
821 	attr.attr_clr |= MOUNT_ATTR__ATIME;
822 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
823 
824 	expected_flags |= MS_NOATIME;
825 	new_flags = read_mnt_flags("/mnt/A");
826 	ASSERT_EQ(new_flags, expected_flags);
827 
828 	new_flags = read_mnt_flags("/mnt/A/AA");
829 	ASSERT_EQ(new_flags, expected_flags);
830 
831 	new_flags = read_mnt_flags("/mnt/A/AA/B");
832 	ASSERT_EQ(new_flags, expected_flags);
833 
834 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
835 	ASSERT_EQ(new_flags, expected_flags);
836 
837 	memset(&attr, 0, sizeof(attr));
838 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
839 
840 	new_flags = read_mnt_flags("/mnt/A");
841 	ASSERT_EQ(new_flags, expected_flags);
842 
843 	new_flags = read_mnt_flags("/mnt/A/AA");
844 	ASSERT_EQ(new_flags, expected_flags);
845 
846 	new_flags = read_mnt_flags("/mnt/A/AA/B");
847 	ASSERT_EQ(new_flags, expected_flags);
848 
849 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
850 	ASSERT_EQ(new_flags, expected_flags);
851 
852 	memset(&attr, 0, sizeof(attr));
853 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
854 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
855 
856 	expected_flags &= ~MS_NODIRATIME;
857 
858 	new_flags = read_mnt_flags("/mnt/A");
859 	ASSERT_EQ(new_flags, expected_flags);
860 
861 	new_flags = read_mnt_flags("/mnt/A/AA");
862 	ASSERT_EQ(new_flags, expected_flags);
863 
864 	new_flags = read_mnt_flags("/mnt/A/AA/B");
865 	ASSERT_EQ(new_flags, expected_flags);
866 
867 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
868 	ASSERT_EQ(new_flags, expected_flags);
869 }
870 
871 TEST_F(mount_setattr, multi_threaded)
872 {
873 	int i, j, nthreads, ret = 0;
874 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
875 	pthread_attr_t pattr;
876 	pthread_t threads[DEFAULT_THREADS];
877 
878 	if (!mount_setattr_supported())
879 		SKIP(return, "mount_setattr syscall not supported");
880 
881 	old_flags = read_mnt_flags("/mnt/A");
882 	ASSERT_GT(old_flags, 0);
883 
884 	/* Try to change mount options from multiple threads. */
885 	nthreads = get_nprocs_conf();
886 	if (nthreads > DEFAULT_THREADS)
887 		nthreads = DEFAULT_THREADS;
888 
889 	pthread_attr_init(&pattr);
890 	for (i = 0; i < nthreads; i++)
891 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
892 
893 	for (j = 0; j < i; j++) {
894 		void *retptr = NULL;
895 
896 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
897 
898 		ret += ptr_to_int(retptr);
899 		EXPECT_EQ(ret, 0);
900 	}
901 	pthread_attr_destroy(&pattr);
902 
903 	ASSERT_EQ(ret, 0);
904 
905 	expected_flags = old_flags;
906 	expected_flags |= MS_RDONLY;
907 	expected_flags |= MS_NOSUID;
908 	new_flags = read_mnt_flags("/mnt/A");
909 	ASSERT_EQ(new_flags, expected_flags);
910 
911 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
912 
913 	new_flags = read_mnt_flags("/mnt/A/AA");
914 	ASSERT_EQ(new_flags, expected_flags);
915 
916 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
917 
918 	new_flags = read_mnt_flags("/mnt/A/AA/B");
919 	ASSERT_EQ(new_flags, expected_flags);
920 
921 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
922 
923 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
924 	ASSERT_EQ(new_flags, expected_flags);
925 
926 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
927 }
928 
929 TEST_F(mount_setattr, wrong_user_namespace)
930 {
931 	int ret;
932 	struct mount_attr attr = {
933 		.attr_set = MOUNT_ATTR_RDONLY,
934 	};
935 
936 	if (!mount_setattr_supported())
937 		SKIP(return, "mount_setattr syscall not supported");
938 
939 	EXPECT_EQ(create_and_enter_userns(), 0);
940 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
941 	ASSERT_LT(ret, 0);
942 	ASSERT_EQ(errno, EPERM);
943 }
944 
945 TEST_F(mount_setattr, wrong_mount_namespace)
946 {
947 	int fd, ret;
948 	struct mount_attr attr = {
949 		.attr_set = MOUNT_ATTR_RDONLY,
950 	};
951 
952 	if (!mount_setattr_supported())
953 		SKIP(return, "mount_setattr syscall not supported");
954 
955 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
956 	ASSERT_GE(fd, 0);
957 
958 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
959 
960 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
961 	ASSERT_LT(ret, 0);
962 	ASSERT_EQ(errno, EINVAL);
963 }
964 
965 FIXTURE(mount_setattr_idmapped) {
966 };
967 
968 FIXTURE_SETUP(mount_setattr_idmapped)
969 {
970 	int img_fd = -EBADF;
971 
972 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
973 
974 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
975 
976 	(void)umount2("/mnt", MNT_DETACH);
977 	(void)umount2("/tmp", MNT_DETACH);
978 
979 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
980 			"size=100000,mode=700"), 0);
981 
982 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
983 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
984 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
985 
986 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
987 			"size=100000,mode=700"), 0);
988 
989 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
990 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
991 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
992 
993 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
994 			"size=100000,mode=700"), 0);
995 
996 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
997 			"size=2m,mode=700"), 0);
998 
999 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1000 
1001 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1002 			"size=100000,mode=700"), 0);
1003 
1004 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1005 
1006 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1007 
1008 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1009 
1010 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1011 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1012 
1013 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1014 
1015 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1016 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1017 
1018 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1019 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1020 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1021 	ASSERT_GE(img_fd, 0);
1022 	ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0);
1023 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1024 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1025 	ASSERT_EQ(close(img_fd), 0);
1026 }
1027 
1028 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1029 {
1030 	(void)umount2("/mnt/A", MNT_DETACH);
1031 	(void)umount2("/tmp", MNT_DETACH);
1032 }
1033 
1034 /**
1035  * Validate that negative fd values are rejected.
1036  */
1037 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1038 {
1039 	struct mount_attr attr = {
1040 		.attr_set	= MOUNT_ATTR_IDMAP,
1041 		.userns_fd	= -EBADF,
1042 	};
1043 
1044 	if (!mount_setattr_supported())
1045 		SKIP(return, "mount_setattr syscall not supported");
1046 
1047 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1048 		TH_LOG("failure: created idmapped mount with negative fd");
1049 	}
1050 }
1051 
1052 /**
1053  * Validate that excessively large fd values are rejected.
1054  */
1055 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1056 {
1057 	struct mount_attr attr = {
1058 		.attr_set	= MOUNT_ATTR_IDMAP,
1059 		.userns_fd	= INT64_MAX,
1060 	};
1061 
1062 	if (!mount_setattr_supported())
1063 		SKIP(return, "mount_setattr syscall not supported");
1064 
1065 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1066 		TH_LOG("failure: created idmapped mount with too large fd value");
1067 	}
1068 }
1069 
1070 /**
1071  * Validate that closed fd values are rejected.
1072  */
1073 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1074 {
1075 	int fd;
1076 	struct mount_attr attr = {
1077 		.attr_set = MOUNT_ATTR_IDMAP,
1078 	};
1079 
1080 	if (!mount_setattr_supported())
1081 		SKIP(return, "mount_setattr syscall not supported");
1082 
1083 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1084 	ASSERT_GE(fd, 0);
1085 	ASSERT_GE(close(fd), 0);
1086 
1087 	attr.userns_fd = fd;
1088 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1089 		TH_LOG("failure: created idmapped mount with closed fd");
1090 	}
1091 }
1092 
1093 /**
1094  * Validate that the initial user namespace is rejected.
1095  */
1096 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1097 {
1098 	int open_tree_fd = -EBADF;
1099 	struct mount_attr attr = {
1100 		.attr_set = MOUNT_ATTR_IDMAP,
1101 	};
1102 
1103 	if (!mount_setattr_supported())
1104 		SKIP(return, "mount_setattr syscall not supported");
1105 
1106 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1107 				     AT_NO_AUTOMOUNT |
1108 				     AT_SYMLINK_NOFOLLOW |
1109 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1110 	ASSERT_GE(open_tree_fd, 0);
1111 
1112 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1113 	ASSERT_GE(attr.userns_fd, 0);
1114 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1115 	ASSERT_EQ(errno, EPERM);
1116 	ASSERT_EQ(close(attr.userns_fd), 0);
1117 	ASSERT_EQ(close(open_tree_fd), 0);
1118 }
1119 
1120 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1121 		   unsigned long range)
1122 {
1123 	char map[100], procfile[256];
1124 
1125 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1126 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1127 	if (write_file(procfile, map, strlen(map)))
1128 		return -1;
1129 
1130 
1131 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1132 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1133 	if (write_file(procfile, map, strlen(map)))
1134 		return -1;
1135 
1136 	return 0;
1137 }
1138 
1139 #define __STACK_SIZE (8 * 1024 * 1024)
1140 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1141 {
1142 	void *stack;
1143 
1144 	stack = malloc(__STACK_SIZE);
1145 	if (!stack)
1146 		return -ENOMEM;
1147 
1148 #ifdef __ia64__
1149 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1150 #else
1151 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1152 #endif
1153 }
1154 
1155 static int get_userns_fd_cb(void *data)
1156 {
1157 	return kill(getpid(), SIGSTOP);
1158 }
1159 
1160 static int wait_for_pid(pid_t pid)
1161 {
1162 	int status, ret;
1163 
1164 again:
1165 	ret = waitpid(pid, &status, 0);
1166 	if (ret == -1) {
1167 		if (errno == EINTR)
1168 			goto again;
1169 
1170 		return -1;
1171 	}
1172 
1173 	if (!WIFEXITED(status))
1174 		return -1;
1175 
1176 	return WEXITSTATUS(status);
1177 }
1178 
1179 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1180 {
1181 	int ret;
1182 	pid_t pid;
1183 	char path[256];
1184 
1185 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1186 	if (pid < 0)
1187 		return -errno;
1188 
1189 	ret = map_ids(pid, nsid, hostid, range);
1190 	if (ret < 0)
1191 		return ret;
1192 
1193 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1194 	ret = open(path, O_RDONLY | O_CLOEXEC);
1195 	kill(pid, SIGKILL);
1196 	wait_for_pid(pid);
1197 	return ret;
1198 }
1199 
1200 /**
1201  * Validate that an attached mount in our mount namespace cannot be idmapped.
1202  * (The kernel enforces that the mount's mount namespace and the caller's mount
1203  *  namespace match.)
1204  */
1205 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1206 {
1207 	int open_tree_fd = -EBADF;
1208 	struct mount_attr attr = {
1209 		.attr_set = MOUNT_ATTR_IDMAP,
1210 	};
1211 
1212 	if (!mount_setattr_supported())
1213 		SKIP(return, "mount_setattr syscall not supported");
1214 
1215 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1216 				     AT_EMPTY_PATH |
1217 				     AT_NO_AUTOMOUNT |
1218 				     AT_SYMLINK_NOFOLLOW |
1219 				     OPEN_TREE_CLOEXEC);
1220 	ASSERT_GE(open_tree_fd, 0);
1221 
1222 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1223 	ASSERT_GE(attr.userns_fd, 0);
1224 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1225 	ASSERT_EQ(close(attr.userns_fd), 0);
1226 	ASSERT_EQ(close(open_tree_fd), 0);
1227 }
1228 
1229 /**
1230  * Validate that idmapping a mount is rejected if the mount's mount namespace
1231  * and our mount namespace don't match.
1232  * (The kernel enforces that the mount's mount namespace and the caller's mount
1233  *  namespace match.)
1234  */
1235 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1236 {
1237 	int open_tree_fd = -EBADF;
1238 	struct mount_attr attr = {
1239 		.attr_set = MOUNT_ATTR_IDMAP,
1240 	};
1241 
1242 	if (!mount_setattr_supported())
1243 		SKIP(return, "mount_setattr syscall not supported");
1244 
1245 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1246 				     AT_EMPTY_PATH |
1247 				     AT_NO_AUTOMOUNT |
1248 				     AT_SYMLINK_NOFOLLOW |
1249 				     OPEN_TREE_CLOEXEC);
1250 	ASSERT_GE(open_tree_fd, 0);
1251 
1252 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1253 
1254 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1255 	ASSERT_GE(attr.userns_fd, 0);
1256 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1257 				    sizeof(attr)), 0);
1258 	ASSERT_EQ(close(attr.userns_fd), 0);
1259 	ASSERT_EQ(close(open_tree_fd), 0);
1260 }
1261 
1262 /**
1263  * Validate that an attached mount in our mount namespace can be idmapped.
1264  */
1265 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1266 {
1267 	int open_tree_fd = -EBADF;
1268 	struct mount_attr attr = {
1269 		.attr_set = MOUNT_ATTR_IDMAP,
1270 	};
1271 
1272 	if (!mount_setattr_supported())
1273 		SKIP(return, "mount_setattr syscall not supported");
1274 
1275 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1276 				     AT_EMPTY_PATH |
1277 				     AT_NO_AUTOMOUNT |
1278 				     AT_SYMLINK_NOFOLLOW |
1279 				     OPEN_TREE_CLOEXEC |
1280 				     OPEN_TREE_CLONE);
1281 	ASSERT_GE(open_tree_fd, 0);
1282 
1283 	/* Changing mount properties on a detached mount. */
1284 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1285 	ASSERT_GE(attr.userns_fd, 0);
1286 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1287 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1288 	ASSERT_EQ(close(attr.userns_fd), 0);
1289 	ASSERT_EQ(close(open_tree_fd), 0);
1290 }
1291 
1292 /**
1293  * Validate that a detached mount not in our mount namespace can be idmapped.
1294  */
1295 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1296 {
1297 	int open_tree_fd = -EBADF;
1298 	struct mount_attr attr = {
1299 		.attr_set = MOUNT_ATTR_IDMAP,
1300 	};
1301 
1302 	if (!mount_setattr_supported())
1303 		SKIP(return, "mount_setattr syscall not supported");
1304 
1305 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1306 				     AT_EMPTY_PATH |
1307 				     AT_NO_AUTOMOUNT |
1308 				     AT_SYMLINK_NOFOLLOW |
1309 				     OPEN_TREE_CLOEXEC |
1310 				     OPEN_TREE_CLONE);
1311 	ASSERT_GE(open_tree_fd, 0);
1312 
1313 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1314 
1315 	/* Changing mount properties on a detached mount. */
1316 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1317 	ASSERT_GE(attr.userns_fd, 0);
1318 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1319 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1320 	ASSERT_EQ(close(attr.userns_fd), 0);
1321 	ASSERT_EQ(close(open_tree_fd), 0);
1322 }
1323 
1324 /**
1325  * Validate that currently changing the idmapping of an idmapped mount fails.
1326  */
1327 TEST_F(mount_setattr_idmapped, change_idmapping)
1328 {
1329 	int open_tree_fd = -EBADF;
1330 	struct mount_attr attr = {
1331 		.attr_set = MOUNT_ATTR_IDMAP,
1332 	};
1333 
1334 	if (!mount_setattr_supported())
1335 		SKIP(return, "mount_setattr syscall not supported");
1336 
1337 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1338 				     AT_EMPTY_PATH |
1339 				     AT_NO_AUTOMOUNT |
1340 				     AT_SYMLINK_NOFOLLOW |
1341 				     OPEN_TREE_CLOEXEC |
1342 				     OPEN_TREE_CLONE);
1343 	ASSERT_GE(open_tree_fd, 0);
1344 
1345 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1346 	ASSERT_GE(attr.userns_fd, 0);
1347 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1348 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1349 	ASSERT_EQ(close(attr.userns_fd), 0);
1350 
1351 	/* Change idmapping on a detached mount that is already idmapped. */
1352 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1353 	ASSERT_GE(attr.userns_fd, 0);
1354 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1355 	ASSERT_EQ(close(attr.userns_fd), 0);
1356 	ASSERT_EQ(close(open_tree_fd), 0);
1357 }
1358 
1359 static bool expected_uid_gid(int dfd, const char *path, int flags,
1360 			     uid_t expected_uid, gid_t expected_gid)
1361 {
1362 	int ret;
1363 	struct stat st;
1364 
1365 	ret = fstatat(dfd, path, &st, flags);
1366 	if (ret < 0)
1367 		return false;
1368 
1369 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1370 }
1371 
1372 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1373 {
1374 	int open_tree_fd = -EBADF;
1375 	struct mount_attr attr = {
1376 		.attr_set = MOUNT_ATTR_IDMAP,
1377 	};
1378 
1379 	if (!mount_setattr_supported())
1380 		SKIP(return, "mount_setattr syscall not supported");
1381 
1382 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1383 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1384 
1385 	ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV,
1386 			"size=100000,mode=700"), 0);
1387 
1388 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1389 
1390 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1391 
1392 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1393 				     AT_RECURSIVE |
1394 				     AT_EMPTY_PATH |
1395 				     AT_NO_AUTOMOUNT |
1396 				     AT_SYMLINK_NOFOLLOW |
1397 				     OPEN_TREE_CLOEXEC |
1398 				     OPEN_TREE_CLONE);
1399 	ASSERT_GE(open_tree_fd, 0);
1400 
1401 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1402 	ASSERT_GE(attr.userns_fd, 0);
1403 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1404 	ASSERT_EQ(close(attr.userns_fd), 0);
1405 	ASSERT_EQ(close(open_tree_fd), 0);
1406 
1407 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1408 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1409 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1410 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1411 
1412 	(void)umount2("/mnt/A", MNT_DETACH);
1413 }
1414 
1415 TEST_F(mount_setattr, mount_attr_nosymfollow)
1416 {
1417 	int fd;
1418 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1419 	struct mount_attr attr = {
1420 		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1421 	};
1422 
1423 	if (!mount_setattr_supported())
1424 		SKIP(return, "mount_setattr syscall not supported");
1425 
1426 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1427 	ASSERT_GT(fd, 0);
1428 	ASSERT_EQ(close(fd), 0);
1429 
1430 	old_flags = read_mnt_flags("/mnt/A");
1431 	ASSERT_GT(old_flags, 0);
1432 
1433 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1434 
1435 	expected_flags = old_flags;
1436 	expected_flags |= ST_NOSYMFOLLOW;
1437 
1438 	new_flags = read_mnt_flags("/mnt/A");
1439 	ASSERT_EQ(new_flags, expected_flags);
1440 
1441 	new_flags = read_mnt_flags("/mnt/A/AA");
1442 	ASSERT_EQ(new_flags, expected_flags);
1443 
1444 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1445 	ASSERT_EQ(new_flags, expected_flags);
1446 
1447 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1448 	ASSERT_EQ(new_flags, expected_flags);
1449 
1450 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1451 	ASSERT_LT(fd, 0);
1452 	ASSERT_EQ(errno, ELOOP);
1453 
1454 	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1455 	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1456 
1457 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1458 
1459 	expected_flags &= ~ST_NOSYMFOLLOW;
1460 	new_flags = read_mnt_flags("/mnt/A");
1461 	ASSERT_EQ(new_flags, expected_flags);
1462 
1463 	new_flags = read_mnt_flags("/mnt/A/AA");
1464 	ASSERT_EQ(new_flags, expected_flags);
1465 
1466 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1467 	ASSERT_EQ(new_flags, expected_flags);
1468 
1469 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1470 	ASSERT_EQ(new_flags, expected_flags);
1471 
1472 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1473 	ASSERT_GT(fd, 0);
1474 	ASSERT_EQ(close(fd), 0);
1475 }
1476 
1477 TEST_F(mount_setattr, open_tree_detached)
1478 {
1479 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1480 	struct statx stx;
1481 
1482 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1483 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1484 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1485 				     OPEN_TREE_CLONE);
1486 	ASSERT_GE(fd_tree_base, 0);
1487 	/*
1488 	 * /mnt                   testing tmpfs
1489 	 * |-/mnt/A               testing tmpfs
1490 	 * | `-/mnt/A/AA          testing tmpfs
1491 	 * |   `-/mnt/A/AA/B      testing tmpfs
1492 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1493 	 * `-/mnt/B               testing ramfs
1494 	 */
1495 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1496 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1497 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1498 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1499 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1500 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1501 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1502 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1503 
1504 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1505 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1506 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1507 				       OPEN_TREE_CLONE);
1508 	ASSERT_GE(fd_tree_subdir, 0);
1509 	/*
1510 	 * /AA          testing tmpfs
1511 	 * `-/AA/B      testing tmpfs
1512 	 *   `-/AA/B/BB testing tmpfs
1513 	 */
1514 	ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
1515 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1516 	ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
1517 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1518 
1519 	ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1520 	/*
1521 	 * /tmp/target1          testing tmpfs
1522 	 * `-/tmp/target1/B      testing tmpfs
1523 	 *   `-/tmp/target1/B/BB testing tmpfs
1524 	 */
1525 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1526 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1527 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
1528 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1529 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
1530 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1531 
1532 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
1533 	/*
1534 	 * /tmp/target2                   testing tmpfs
1535 	 * |-/tmp/target2/A               testing tmpfs
1536 	 * | `-/tmp/target2/A/AA          testing tmpfs
1537 	 * |   `-/tmp/target2/A/AA/B      testing tmpfs
1538 	 * |     `-/tmp/target2/A/AA/B/BB testing tmpfs
1539 	 * `-/tmp/target2/B               testing ramfs
1540 	 */
1541 	ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
1542 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1543 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
1544 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1545 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
1546 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1547 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
1548 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1549 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
1550 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1551 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
1552 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1553 
1554 	EXPECT_EQ(close(fd_tree_base), 0);
1555 	EXPECT_EQ(close(fd_tree_subdir), 0);
1556 }
1557 
1558 TEST_F(mount_setattr, open_tree_detached_fail)
1559 {
1560 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1561 	struct statx stx;
1562 
1563 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1564 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1565 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1566 				     OPEN_TREE_CLONE);
1567 	ASSERT_GE(fd_tree_base, 0);
1568 	/*
1569 	 * /mnt                   testing tmpfs
1570 	 * |-/mnt/A               testing tmpfs
1571 	 * | `-/mnt/A/AA          testing tmpfs
1572 	 * |   `-/mnt/A/AA/B      testing tmpfs
1573 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1574 	 * `-/mnt/B               testing ramfs
1575 	 */
1576 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1577 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1578 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1579 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1580 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1581 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1582 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1583 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1584 
1585 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1586 
1587 	/*
1588 	 * The origin mount namespace of the anonymous mount namespace
1589 	 * of @fd_tree_base doesn't match the caller's mount namespace
1590 	 * anymore so creation of another detached mounts must fail.
1591 	 */
1592 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1593 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1594 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1595 				       OPEN_TREE_CLONE);
1596 	ASSERT_LT(fd_tree_subdir, 0);
1597 	ASSERT_EQ(errno, EINVAL);
1598 }
1599 
1600 TEST_F(mount_setattr, open_tree_detached_fail2)
1601 {
1602 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1603 	struct statx stx;
1604 
1605 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1606 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1607 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1608 				     OPEN_TREE_CLONE);
1609 	ASSERT_GE(fd_tree_base, 0);
1610 	/*
1611 	 * /mnt                   testing tmpfs
1612 	 * |-/mnt/A               testing tmpfs
1613 	 * | `-/mnt/A/AA          testing tmpfs
1614 	 * |   `-/mnt/A/AA/B      testing tmpfs
1615 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1616 	 * `-/mnt/B               testing ramfs
1617 	 */
1618 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1619 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1620 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1621 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1622 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1623 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1624 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1625 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1626 
1627 	EXPECT_EQ(create_and_enter_userns(), 0);
1628 
1629 	/*
1630 	 * The caller entered a new user namespace. They will have
1631 	 * CAP_SYS_ADMIN in this user namespace. However, they're still
1632 	 * located in a mount namespace that is owned by an ancestor
1633 	 * user namespace in which they hold no privilege. Creating a
1634 	 * detached mount must thus fail.
1635 	 */
1636 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1637 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1638 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1639 				       OPEN_TREE_CLONE);
1640 	ASSERT_LT(fd_tree_subdir, 0);
1641 	ASSERT_EQ(errno, EPERM);
1642 }
1643 
1644 TEST_F(mount_setattr, open_tree_detached_fail3)
1645 {
1646 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1647 	struct statx stx;
1648 
1649 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1650 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1651 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1652 				     OPEN_TREE_CLONE);
1653 	ASSERT_GE(fd_tree_base, 0);
1654 	/*
1655         * /mnt                   testing tmpfs
1656         * |-/mnt/A               testing tmpfs
1657         * | `-/mnt/A/AA          testing tmpfs
1658         * |   `-/mnt/A/AA/B      testing tmpfs
1659         * |     `-/mnt/A/AA/B/BB testing tmpfs
1660         * `-/mnt/B               testing ramfs
1661         */
1662 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1663 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1664 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1665 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1666 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1667 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1668 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1669 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1670 
1671 	EXPECT_EQ(prepare_unpriv_mountns(), 0);
1672 
1673 	/*
1674         * The caller entered a new mount namespace. They will have
1675         * CAP_SYS_ADMIN in the owning user namespace of their mount
1676         * namespace.
1677         *
1678         * However, the origin mount namespace of the anonymous mount
1679         * namespace of @fd_tree_base doesn't match the caller's mount
1680         * namespace anymore so creation of another detached mounts must
1681         * fail.
1682         */
1683 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1684 			               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1685 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1686 				       OPEN_TREE_CLONE);
1687 	ASSERT_LT(fd_tree_subdir, 0);
1688 	ASSERT_EQ(errno, EINVAL);
1689 }
1690 
1691 TEST_F(mount_setattr, open_tree_subfolder)
1692 {
1693 	int fd_context, fd_tmpfs, fd_tree;
1694 
1695 	fd_context = sys_fsopen("tmpfs", 0);
1696 	ASSERT_GE(fd_context, 0);
1697 
1698 	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
1699 
1700 	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
1701 	ASSERT_GE(fd_tmpfs, 0);
1702 
1703 	EXPECT_EQ(close(fd_context), 0);
1704 
1705 	ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
1706 
1707 	fd_tree = sys_open_tree(fd_tmpfs, "subdir",
1708 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1709 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1710 				OPEN_TREE_CLONE);
1711 	ASSERT_GE(fd_tree, 0);
1712 
1713 	EXPECT_EQ(close(fd_tmpfs), 0);
1714 
1715 	ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
1716 
1717 	ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
1718 
1719 	EXPECT_EQ(close(fd_tree), 0);
1720 
1721 	ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
1722 
1723 	EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
1724 }
1725 
1726 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
1727 {
1728 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1729 	struct statx stx;
1730 
1731 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1732 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1733 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1734 	ASSERT_GE(fd_tree_base, 0);
1735 	/*
1736 	 * /mnt testing tmpfs
1737 	 */
1738 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1739 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1740 
1741 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1742 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1743 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1744 				       OPEN_TREE_CLONE);
1745 	ASSERT_GE(fd_tree_subdir, 0);
1746 	/*
1747 	 * /mnt testing tmpfs
1748 	 */
1749 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1750 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1751 
1752 	/*
1753 	 * /mnt   testing tmpfs
1754 	 * `-/mnt testing tmpfs
1755 	 */
1756 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1757 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
1758 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1759 
1760 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1761 
1762 	EXPECT_EQ(close(fd_tree_base), 0);
1763 	EXPECT_EQ(close(fd_tree_subdir), 0);
1764 }
1765 
1766 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
1767 {
1768 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1769 	struct statx stx;
1770 	__u64 mnt_id = 0;
1771 
1772 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1773 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1774 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1775 	ASSERT_GE(fd_tree_base, 0);
1776 	/*
1777 	 * /mnt testing tmpfs
1778 	 */
1779 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1780 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1781 
1782 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1783 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1784 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1785 				       OPEN_TREE_CLONE);
1786 	ASSERT_GE(fd_tree_subdir, 0);
1787 	/*
1788 	 * /mnt testing tmpfs
1789 	 */
1790 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1791 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1792 
1793 	/*
1794 	 * /mnt   testing tmpfs
1795 	 * `-/mnt testing tmpfs
1796 	 */
1797 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1798 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
1799 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1800 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1801 	mnt_id = stx.stx_mnt_id;
1802 
1803 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1804 
1805 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1806 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
1807 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1808 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1809 	ASSERT_EQ(stx.stx_mnt_id, mnt_id);
1810 
1811 	EXPECT_EQ(close(fd_tree_base), 0);
1812 	EXPECT_EQ(close(fd_tree_subdir), 0);
1813 }
1814 
1815 TEST_F(mount_setattr, move_mount_detached_fail)
1816 {
1817 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1818 	struct statx stx;
1819 
1820 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1821 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1822 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1823 	ASSERT_GE(fd_tree_base, 0);
1824 
1825 	/* Attach the mount to the caller's mount namespace. */
1826 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1827 
1828 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1829 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1830 
1831 	fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
1832 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1833 				       OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1834 	ASSERT_GE(fd_tree_subdir, 0);
1835 	ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
1836 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1837 
1838 	/* Not allowed to move an attached mount to a detached mount. */
1839 	ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1840 	ASSERT_EQ(errno, EINVAL);
1841 
1842 	EXPECT_EQ(close(fd_tree_base), 0);
1843 	EXPECT_EQ(close(fd_tree_subdir), 0);
1844 }
1845 
1846 TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
1847 {
1848 	int fd_tree = -EBADF;
1849 	struct statx stx;
1850 
1851 	fd_tree = sys_open_tree(-EBADF, "/mnt",
1852 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1853 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1854 				OPEN_TREE_CLONE);
1855 	ASSERT_GE(fd_tree, 0);
1856 
1857 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
1858 	/* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
1859 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1860 
1861 	/* Attach the mount to the caller's mount namespace. */
1862 	ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1863 
1864 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1865 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1866 
1867 	ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
1868 
1869 	/*
1870 	 * This tests whether dissolve_on_fput() handles a NULL mount
1871 	 * namespace correctly, i.e., that it doesn't splat.
1872 	 */
1873 	EXPECT_EQ(close(fd_tree), 0);
1874 }
1875 
1876 TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
1877 {
1878 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
1879 
1880 	/*
1881 	 * |-/mnt/A               testing tmpfs
1882 	 *   `-/mnt/A/AA          testing tmpfs
1883 	 *     `-/mnt/A/AA/B      testing tmpfs
1884 	 *       `-/mnt/A/AA/B/BB testing tmpfs
1885 	 */
1886 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
1887 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1888 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1889 				 OPEN_TREE_CLONE);
1890 	ASSERT_GE(fd_tree1, 0);
1891 
1892 	/*
1893 	 * `-/mnt/B testing ramfs
1894 	 */
1895 	fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
1896 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1897 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1898 				 OPEN_TREE_CLONE);
1899 	ASSERT_GE(fd_tree2, 0);
1900 
1901 	/*
1902 	 * Move the source detached mount tree to the target detached
1903 	 * mount tree. This will move all the mounts in the source mount
1904 	 * tree from the source anonymous mount namespace to the target
1905 	 * anonymous mount namespace.
1906 	 *
1907 	 * The source detached mount tree and the target detached mount
1908 	 * tree now both refer to the same anonymous mount namespace.
1909 	 *
1910 	 * |-""                 testing ramfs
1911 	 *   `-""               testing tmpfs
1912 	 *     `-""/AA          testing tmpfs
1913 	 *       `-""/AA/B      testing tmpfs
1914 	 *         `-""/AA/B/BB testing tmpfs
1915 	 */
1916 	ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1917 
1918 	/*
1919 	 * The source detached mount tree @fd_tree1 is now an attached
1920 	 * mount, i.e., it has a parent. Specifically, it now has the
1921 	 * root mount of the mount tree of @fd_tree2 as its parent.
1922 	 *
1923 	 * That means we are no longer allowed to attach it as we only
1924 	 * allow attaching the root of an anonymous mount tree, not
1925 	 * random bits and pieces. Verify that the kernel enforces this.
1926 	 */
1927 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1928 
1929 	/*
1930 	 * Closing the source detached mount tree must not unmount and
1931 	 * free the shared anonymous mount namespace. The kernel will
1932 	 * quickly yell at us because the anonymous mount namespace
1933 	 * won't be empty when it's freed.
1934 	 */
1935 	EXPECT_EQ(close(fd_tree1), 0);
1936 
1937 	/*
1938 	 * Attach the mount tree to a non-anonymous mount namespace.
1939 	 * This can only succeed if closing fd_tree1 had proper
1940 	 * semantics and didn't cause the anonymous mount namespace to
1941 	 * be freed. If it did this will trigger a UAF which will be
1942 	 * visible on any KASAN enabled kernel.
1943 	 *
1944 	 * |-/tmp/target1                 testing ramfs
1945 	 *   `-/tmp/target1               testing tmpfs
1946 	 *     `-/tmp/target1/AA          testing tmpfs
1947 	 *       `-/tmp/target1/AA/B      testing tmpfs
1948 	 *         `-/tmp/target1/AA/B/BB testing tmpfs
1949 	 */
1950 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1951 	EXPECT_EQ(close(fd_tree2), 0);
1952 }
1953 
1954 TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
1955 {
1956 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
1957 
1958 	/*
1959 	 * Copy the following mount tree:
1960 	 *
1961 	 * |-/mnt/A               testing tmpfs
1962 	 *   `-/mnt/A/AA          testing tmpfs
1963 	 *     `-/mnt/A/AA/B      testing tmpfs
1964 	 *       `-/mnt/A/AA/B/BB testing tmpfs
1965 	 */
1966 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
1967 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1968 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1969 				 OPEN_TREE_CLONE);
1970 	ASSERT_GE(fd_tree1, 0);
1971 
1972 	/*
1973 	 * Create an O_PATH file descriptors with a separate struct file
1974 	 * that refers to the same detached mount tree as @fd_tree1
1975 	 */
1976 	fd_tree2 = sys_open_tree(fd_tree1, "",
1977 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1978 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
1979 	ASSERT_GE(fd_tree2, 0);
1980 
1981 	/*
1982 	 * Copy the following mount tree:
1983 	 *
1984 	 * |-/tmp/target1               testing tmpfs
1985 	 *   `-/tmp/target1/AA          testing tmpfs
1986 	 *     `-/tmp/target1/AA/B      testing tmpfs
1987 	 *       `-/tmp/target1/AA/B/BB testing tmpfs
1988 	 */
1989 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1990 
1991 	/*
1992 	 * This must fail as this would mean adding the same mount tree
1993 	 * into the same mount tree.
1994 	 */
1995 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1996 }
1997 
1998 TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
1999 {
2000 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
2001 
2002 	/*
2003 	 * Copy the following mount tree:
2004 	 *
2005 	 * |-/mnt/A               testing tmpfs
2006 	 *   `-/mnt/A/AA          testing tmpfs
2007 	 *     `-/mnt/A/AA/B      testing tmpfs
2008 	 *       `-/mnt/A/AA/B/BB testing tmpfs
2009 	 */
2010 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
2011 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2012 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2013 				 OPEN_TREE_CLONE);
2014 	ASSERT_GE(fd_tree1, 0);
2015 
2016 	/*
2017 	 * Create an O_PATH file descriptors with a separate struct file that
2018 	 * refers to a subtree of the same detached mount tree as @fd_tree1
2019 	 */
2020 	fd_tree2 = sys_open_tree(fd_tree1, "AA",
2021 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2022 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
2023 	ASSERT_GE(fd_tree2, 0);
2024 
2025 	/*
2026 	 * This must fail as it is only possible to attach the root of a
2027 	 * detached mount tree.
2028 	 */
2029 	ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2030 
2031 	ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2032 }
2033 
2034 TEST_F(mount_setattr, detached_tree_propagation)
2035 {
2036 	int fd_tree = -EBADF;
2037 	struct statx stx1, stx2, stx3, stx4;
2038 
2039 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
2040 	ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
2041 
2042 	/*
2043 	 * Copy the following mount tree:
2044 	 *
2045          * /mnt                   testing tmpfs
2046          * |-/mnt/A               testing tmpfs
2047          * | `-/mnt/A/AA          testing tmpfs
2048          * |   `-/mnt/A/AA/B      testing tmpfs
2049          * |     `-/mnt/A/AA/B/BB testing tmpfs
2050          * `-/mnt/B               testing ramfs
2051 	 */
2052 	fd_tree = sys_open_tree(-EBADF, "/mnt",
2053 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2054 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2055 				 OPEN_TREE_CLONE);
2056 	ASSERT_GE(fd_tree, 0);
2057 
2058 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
2059 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
2060 
2061 	/*
2062 	 * Copying the mount namespace like done above doesn't alter the
2063 	 * mounts in any way so the filesystem mounted on /mnt must be
2064 	 * identical even though the mounts will differ. Use the device
2065 	 * information to verify that. Note that tmpfs will have a 0
2066 	 * major number so comparing the major number is misleading.
2067 	 */
2068 	ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
2069 
2070 	/* Mount a tmpfs filesystem over /mnt/A. */
2071 	ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
2072 
2073 
2074 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
2075 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
2076 
2077 	/*
2078 	 * A new filesystem has been mounted on top of /mnt/A which
2079 	 * means that the device information will be different for any
2080 	 * statx() that was taken from /mnt/A before the mount compared
2081 	 * to one after the mount.
2082 	 */
2083 	ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
2084 	ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor);
2085 
2086 	EXPECT_EQ(close(fd_tree), 0);
2087 }
2088 
2089 TEST_HARNESS_MAIN
2090