xref: /linux/tools/testing/selftests/mount_setattr/mount_setattr_test.c (revision 055f213075fbfa8e950bed8f2c50d01ac71bbf37)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <stdio.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <string.h>
8 #include <sys/stat.h>
9 #include <sys/types.h>
10 #include <sys/mount.h>
11 #include <sys/wait.h>
12 #include <sys/vfs.h>
13 #include <sys/statvfs.h>
14 #include <sys/sysinfo.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <fcntl.h>
18 #include <grp.h>
19 #include <stdbool.h>
20 #include <stdarg.h>
21 #include <linux/mount.h>
22 
23 #include "../filesystems/wrappers.h"
24 #include "../kselftest_harness.h"
25 
26 #ifndef CLONE_NEWNS
27 #define CLONE_NEWNS 0x00020000
28 #endif
29 
30 #ifndef CLONE_NEWUSER
31 #define CLONE_NEWUSER 0x10000000
32 #endif
33 
34 #ifndef MS_REC
35 #define MS_REC 16384
36 #endif
37 
38 #ifndef MS_RELATIME
39 #define MS_RELATIME (1 << 21)
40 #endif
41 
42 #ifndef MS_STRICTATIME
43 #define MS_STRICTATIME (1 << 24)
44 #endif
45 
46 #ifndef MOUNT_ATTR_RDONLY
47 #define MOUNT_ATTR_RDONLY 0x00000001
48 #endif
49 
50 #ifndef MOUNT_ATTR_NOSUID
51 #define MOUNT_ATTR_NOSUID 0x00000002
52 #endif
53 
54 #ifndef MOUNT_ATTR_NOEXEC
55 #define MOUNT_ATTR_NOEXEC 0x00000008
56 #endif
57 
58 #ifndef MOUNT_ATTR_NODIRATIME
59 #define MOUNT_ATTR_NODIRATIME 0x00000080
60 #endif
61 
62 #ifndef MOUNT_ATTR__ATIME
63 #define MOUNT_ATTR__ATIME 0x00000070
64 #endif
65 
66 #ifndef MOUNT_ATTR_RELATIME
67 #define MOUNT_ATTR_RELATIME 0x00000000
68 #endif
69 
70 #ifndef MOUNT_ATTR_NOATIME
71 #define MOUNT_ATTR_NOATIME 0x00000010
72 #endif
73 
74 #ifndef MOUNT_ATTR_STRICTATIME
75 #define MOUNT_ATTR_STRICTATIME 0x00000020
76 #endif
77 
78 #ifndef AT_RECURSIVE
79 #define AT_RECURSIVE 0x8000
80 #endif
81 
82 #ifndef MS_SHARED
83 #define MS_SHARED (1 << 20)
84 #endif
85 
86 #define DEFAULT_THREADS 4
87 #define ptr_to_int(p) ((int)((intptr_t)(p)))
88 #define int_to_ptr(u) ((void *)((intptr_t)(u)))
89 
90 #ifndef __NR_mount_setattr
91 	#if defined __alpha__
92 		#define __NR_mount_setattr 552
93 	#elif defined _MIPS_SIM
94 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
95 			#define __NR_mount_setattr (442 + 4000)
96 		#endif
97 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
98 			#define __NR_mount_setattr (442 + 6000)
99 		#endif
100 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
101 			#define __NR_mount_setattr (442 + 5000)
102 		#endif
103 	#elif defined __ia64__
104 		#define __NR_mount_setattr (442 + 1024)
105 	#else
106 		#define __NR_mount_setattr 442
107 	#endif
108 #endif
109 
110 #ifndef __NR_open_tree_attr
111 	#if defined __alpha__
112 		#define __NR_open_tree_attr 577
113 	#elif defined _MIPS_SIM
114 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
115 			#define __NR_open_tree_attr (467 + 4000)
116 		#endif
117 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
118 			#define __NR_open_tree_attr (467 + 6000)
119 		#endif
120 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
121 			#define __NR_open_tree_attr (467 + 5000)
122 		#endif
123 	#elif defined __ia64__
124 		#define __NR_open_tree_attr (467 + 1024)
125 	#else
126 		#define __NR_open_tree_attr 467
127 	#endif
128 #endif
129 
130 #ifndef MOUNT_ATTR_IDMAP
131 #define MOUNT_ATTR_IDMAP 0x00100000
132 #endif
133 
134 #ifndef MOUNT_ATTR_NOSYMFOLLOW
135 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
136 #endif
137 
sys_mount_setattr(int dfd,const char * path,unsigned int flags,struct mount_attr * attr,size_t size)138 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
139 				    struct mount_attr *attr, size_t size)
140 {
141 	return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
142 }
143 
sys_open_tree_attr(int dfd,const char * path,unsigned int flags,struct mount_attr * attr,size_t size)144 static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags,
145 				     struct mount_attr *attr, size_t size)
146 {
147 	return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size);
148 }
149 
write_nointr(int fd,const void * buf,size_t count)150 static ssize_t write_nointr(int fd, const void *buf, size_t count)
151 {
152 	ssize_t ret;
153 
154 	do {
155 		ret = write(fd, buf, count);
156 	} while (ret < 0 && errno == EINTR);
157 
158 	return ret;
159 }
160 
write_file(const char * path,const void * buf,size_t count)161 static int write_file(const char *path, const void *buf, size_t count)
162 {
163 	int fd;
164 	ssize_t ret;
165 
166 	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
167 	if (fd < 0)
168 		return -1;
169 
170 	ret = write_nointr(fd, buf, count);
171 	close(fd);
172 	if (ret < 0 || (size_t)ret != count)
173 		return -1;
174 
175 	return 0;
176 }
177 
create_and_enter_userns(void)178 static int create_and_enter_userns(void)
179 {
180 	uid_t uid;
181 	gid_t gid;
182 	char map[100];
183 
184 	uid = getuid();
185 	gid = getgid();
186 
187 	if (unshare(CLONE_NEWUSER))
188 		return -1;
189 
190 	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
191 	    errno != ENOENT)
192 		return -1;
193 
194 	snprintf(map, sizeof(map), "0 %d 1", uid);
195 	if (write_file("/proc/self/uid_map", map, strlen(map)))
196 		return -1;
197 
198 
199 	snprintf(map, sizeof(map), "0 %d 1", gid);
200 	if (write_file("/proc/self/gid_map", map, strlen(map)))
201 		return -1;
202 
203 	if (setgid(0))
204 		return -1;
205 
206 	if (setuid(0))
207 		return -1;
208 
209 	return 0;
210 }
211 
prepare_unpriv_mountns(void)212 static int prepare_unpriv_mountns(void)
213 {
214 	if (create_and_enter_userns())
215 		return -1;
216 
217 	if (unshare(CLONE_NEWNS))
218 		return -1;
219 
220 	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
221 		return -1;
222 
223 	return 0;
224 }
225 
226 #ifndef ST_NOSYMFOLLOW
227 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
228 #endif
229 
read_mnt_flags(const char * path)230 static int read_mnt_flags(const char *path)
231 {
232 	int ret;
233 	struct statvfs stat;
234 	unsigned int mnt_flags;
235 
236 	ret = statvfs(path, &stat);
237 	if (ret != 0)
238 		return -EINVAL;
239 
240 	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
241 			    ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
242 			    ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
243 		return -EINVAL;
244 
245 	mnt_flags = 0;
246 	if (stat.f_flag & ST_RDONLY)
247 		mnt_flags |= MS_RDONLY;
248 	if (stat.f_flag & ST_NOSUID)
249 		mnt_flags |= MS_NOSUID;
250 	if (stat.f_flag & ST_NODEV)
251 		mnt_flags |= MS_NODEV;
252 	if (stat.f_flag & ST_NOEXEC)
253 		mnt_flags |= MS_NOEXEC;
254 	if (stat.f_flag & ST_NOATIME)
255 		mnt_flags |= MS_NOATIME;
256 	if (stat.f_flag & ST_NODIRATIME)
257 		mnt_flags |= MS_NODIRATIME;
258 	if (stat.f_flag & ST_RELATIME)
259 		mnt_flags |= MS_RELATIME;
260 	if (stat.f_flag & ST_SYNCHRONOUS)
261 		mnt_flags |= MS_SYNCHRONOUS;
262 	if (stat.f_flag & ST_MANDLOCK)
263 		mnt_flags |= ST_MANDLOCK;
264 	if (stat.f_flag & ST_NOSYMFOLLOW)
265 		mnt_flags |= ST_NOSYMFOLLOW;
266 
267 	return mnt_flags;
268 }
269 
get_field(char * src,int nfields)270 static char *get_field(char *src, int nfields)
271 {
272 	int i;
273 	char *p = src;
274 
275 	for (i = 0; i < nfields; i++) {
276 		while (*p && *p != ' ' && *p != '\t')
277 			p++;
278 
279 		if (!*p)
280 			break;
281 
282 		p++;
283 	}
284 
285 	return p;
286 }
287 
null_endofword(char * word)288 static void null_endofword(char *word)
289 {
290 	while (*word && *word != ' ' && *word != '\t')
291 		word++;
292 	*word = '\0';
293 }
294 
is_shared_mount(const char * path)295 static bool is_shared_mount(const char *path)
296 {
297 	size_t len = 0;
298 	char *line = NULL;
299 	FILE *f = NULL;
300 
301 	f = fopen("/proc/self/mountinfo", "re");
302 	if (!f)
303 		return false;
304 
305 	while (getline(&line, &len, f) != -1) {
306 		char *opts, *target;
307 
308 		target = get_field(line, 4);
309 		if (!target)
310 			continue;
311 
312 		opts = get_field(target, 2);
313 		if (!opts)
314 			continue;
315 
316 		null_endofword(target);
317 
318 		if (strcmp(target, path) != 0)
319 			continue;
320 
321 		null_endofword(opts);
322 		if (strstr(opts, "shared:"))
323 			return true;
324 	}
325 
326 	free(line);
327 	fclose(f);
328 
329 	return false;
330 }
331 
mount_setattr_thread(void * data)332 static void *mount_setattr_thread(void *data)
333 {
334 	struct mount_attr attr = {
335 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
336 		.attr_clr	= 0,
337 		.propagation	= MS_SHARED,
338 	};
339 
340 	if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
341 		pthread_exit(int_to_ptr(-1));
342 
343 	pthread_exit(int_to_ptr(0));
344 }
345 
346 /* Attempt to de-conflict with the selftests tree. */
347 #ifndef SKIP
348 #define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
349 #endif
350 
mount_setattr_supported(void)351 static bool mount_setattr_supported(void)
352 {
353 	int ret;
354 
355 	ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
356 	if (ret < 0 && errno == ENOSYS)
357 		return false;
358 
359 	return true;
360 }
361 
FIXTURE(mount_setattr)362 FIXTURE(mount_setattr) {
363 };
364 
365 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
366 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
367 
FIXTURE_SETUP(mount_setattr)368 FIXTURE_SETUP(mount_setattr)
369 {
370 	int fd = -EBADF;
371 
372 	if (!mount_setattr_supported())
373 		SKIP(return, "mount_setattr syscall not supported");
374 
375 	ASSERT_EQ(prepare_unpriv_mountns(), 0);
376 
377 	(void)umount2("/mnt", MNT_DETACH);
378 	(void)umount2("/tmp", MNT_DETACH);
379 
380 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
381 			"size=100000,mode=700"), 0);
382 
383 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
384 
385 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
386 			"size=100000,mode=700"), 0);
387 
388 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
389 
390 	ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
391 
392 	ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
393 
394 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
395 			"size=100000,mode=700"), 0);
396 
397 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
398 			"size=100000,mode=700"), 0);
399 
400 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
401 
402 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
403 			"size=100000,mode=700"), 0);
404 
405 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
406 
407 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
408 
409 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
410 
411 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
412 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
413 
414 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
415 
416 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
417 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
418 
419 	fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
420 	ASSERT_GT(fd, 0);
421 	ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
422 	ASSERT_EQ(close(fd), 0);
423 }
424 
FIXTURE_TEARDOWN(mount_setattr)425 FIXTURE_TEARDOWN(mount_setattr)
426 {
427 	if (!mount_setattr_supported())
428 		SKIP(return, "mount_setattr syscall not supported");
429 
430 	(void)umount2("/mnt/A", MNT_DETACH);
431 	(void)umount2("/tmp", MNT_DETACH);
432 }
433 
TEST_F(mount_setattr,invalid_attributes)434 TEST_F(mount_setattr, invalid_attributes)
435 {
436 	struct mount_attr invalid_attr = {
437 		.attr_set = (1U << 31),
438 	};
439 
440 	if (!mount_setattr_supported())
441 		SKIP(return, "mount_setattr syscall not supported");
442 
443 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
444 				    sizeof(invalid_attr)), 0);
445 
446 	invalid_attr.attr_set	= 0;
447 	invalid_attr.attr_clr	= (1U << 31);
448 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
449 				    sizeof(invalid_attr)), 0);
450 
451 	invalid_attr.attr_clr		= 0;
452 	invalid_attr.propagation	= (1U << 31);
453 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
454 				    sizeof(invalid_attr)), 0);
455 
456 	invalid_attr.attr_set		= (1U << 31);
457 	invalid_attr.attr_clr		= (1U << 31);
458 	invalid_attr.propagation	= (1U << 31);
459 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
460 				    sizeof(invalid_attr)), 0);
461 
462 	ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
463 				    sizeof(invalid_attr)), 0);
464 }
465 
TEST_F(mount_setattr,extensibility)466 TEST_F(mount_setattr, extensibility)
467 {
468 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
469 	char *s = "dummy";
470 	struct mount_attr invalid_attr = {};
471 	struct mount_attr_large {
472 		struct mount_attr attr1;
473 		struct mount_attr attr2;
474 		struct mount_attr attr3;
475 	} large_attr = {};
476 
477 	if (!mount_setattr_supported())
478 		SKIP(return, "mount_setattr syscall not supported");
479 
480 	old_flags = read_mnt_flags("/mnt/A");
481 	ASSERT_GT(old_flags, 0);
482 
483 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
484 				    sizeof(invalid_attr)), 0);
485 	ASSERT_EQ(errno, EFAULT);
486 
487 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
488 				    sizeof(invalid_attr)), 0);
489 	ASSERT_EQ(errno, EINVAL);
490 
491 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
492 	ASSERT_EQ(errno, EINVAL);
493 
494 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
495 				    sizeof(invalid_attr) / 2), 0);
496 	ASSERT_EQ(errno, EINVAL);
497 
498 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
499 				    sizeof(invalid_attr) / 2), 0);
500 	ASSERT_EQ(errno, EINVAL);
501 
502 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
503 				    (void *)&large_attr, sizeof(large_attr)), 0);
504 
505 	large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
506 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
507 				    (void *)&large_attr, sizeof(large_attr)), 0);
508 
509 	large_attr.attr3.attr_set = 0;
510 	large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
511 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
512 				    (void *)&large_attr, sizeof(large_attr)), 0);
513 
514 	expected_flags = old_flags;
515 	expected_flags |= MS_RDONLY;
516 
517 	new_flags = read_mnt_flags("/mnt/A");
518 	ASSERT_EQ(new_flags, expected_flags);
519 
520 	new_flags = read_mnt_flags("/mnt/A/AA");
521 	ASSERT_EQ(new_flags, expected_flags);
522 
523 	new_flags = read_mnt_flags("/mnt/A/AA/B");
524 	ASSERT_EQ(new_flags, expected_flags);
525 
526 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
527 	ASSERT_EQ(new_flags, expected_flags);
528 }
529 
TEST_F(mount_setattr,basic)530 TEST_F(mount_setattr, basic)
531 {
532 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
533 	struct mount_attr attr = {
534 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
535 		.attr_clr	= MOUNT_ATTR__ATIME,
536 	};
537 
538 	if (!mount_setattr_supported())
539 		SKIP(return, "mount_setattr syscall not supported");
540 
541 	old_flags = read_mnt_flags("/mnt/A");
542 	ASSERT_GT(old_flags, 0);
543 
544 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
545 
546 	expected_flags = old_flags;
547 	expected_flags |= MS_RDONLY;
548 	expected_flags |= MS_NOEXEC;
549 	expected_flags &= ~MS_NOATIME;
550 	expected_flags |= MS_RELATIME;
551 
552 	new_flags = read_mnt_flags("/mnt/A");
553 	ASSERT_EQ(new_flags, expected_flags);
554 
555 	new_flags = read_mnt_flags("/mnt/A/AA");
556 	ASSERT_EQ(new_flags, old_flags);
557 
558 	new_flags = read_mnt_flags("/mnt/A/AA/B");
559 	ASSERT_EQ(new_flags, old_flags);
560 
561 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
562 	ASSERT_EQ(new_flags, old_flags);
563 }
564 
TEST_F(mount_setattr,basic_recursive)565 TEST_F(mount_setattr, basic_recursive)
566 {
567 	int fd;
568 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
569 	struct mount_attr attr = {
570 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
571 		.attr_clr	= MOUNT_ATTR__ATIME,
572 	};
573 
574 	if (!mount_setattr_supported())
575 		SKIP(return, "mount_setattr syscall not supported");
576 
577 	old_flags = read_mnt_flags("/mnt/A");
578 	ASSERT_GT(old_flags, 0);
579 
580 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
581 
582 	expected_flags = old_flags;
583 	expected_flags |= MS_RDONLY;
584 	expected_flags |= MS_NOEXEC;
585 	expected_flags &= ~MS_NOATIME;
586 	expected_flags |= MS_RELATIME;
587 
588 	new_flags = read_mnt_flags("/mnt/A");
589 	ASSERT_EQ(new_flags, expected_flags);
590 
591 	new_flags = read_mnt_flags("/mnt/A/AA");
592 	ASSERT_EQ(new_flags, expected_flags);
593 
594 	new_flags = read_mnt_flags("/mnt/A/AA/B");
595 	ASSERT_EQ(new_flags, expected_flags);
596 
597 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
598 	ASSERT_EQ(new_flags, expected_flags);
599 
600 	memset(&attr, 0, sizeof(attr));
601 	attr.attr_clr = MOUNT_ATTR_RDONLY;
602 	attr.propagation = MS_SHARED;
603 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
604 
605 	expected_flags &= ~MS_RDONLY;
606 	new_flags = read_mnt_flags("/mnt/A");
607 	ASSERT_EQ(new_flags, expected_flags);
608 
609 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
610 
611 	new_flags = read_mnt_flags("/mnt/A/AA");
612 	ASSERT_EQ(new_flags, expected_flags);
613 
614 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
615 
616 	new_flags = read_mnt_flags("/mnt/A/AA/B");
617 	ASSERT_EQ(new_flags, expected_flags);
618 
619 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
620 
621 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
622 	ASSERT_EQ(new_flags, expected_flags);
623 
624 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
625 
626 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
627 	ASSERT_GE(fd, 0);
628 
629 	/*
630 	 * We're holding a fd open for writing so this needs to fail somewhere
631 	 * in the middle and the mount options need to be unchanged.
632 	 */
633 	attr.attr_set = MOUNT_ATTR_RDONLY;
634 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
635 
636 	new_flags = read_mnt_flags("/mnt/A");
637 	ASSERT_EQ(new_flags, expected_flags);
638 
639 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
640 
641 	new_flags = read_mnt_flags("/mnt/A/AA");
642 	ASSERT_EQ(new_flags, expected_flags);
643 
644 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
645 
646 	new_flags = read_mnt_flags("/mnt/A/AA/B");
647 	ASSERT_EQ(new_flags, expected_flags);
648 
649 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
650 
651 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
652 	ASSERT_EQ(new_flags, expected_flags);
653 
654 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
655 
656 	EXPECT_EQ(close(fd), 0);
657 }
658 
TEST_F(mount_setattr,mount_has_writers)659 TEST_F(mount_setattr, mount_has_writers)
660 {
661 	int fd, dfd;
662 	unsigned int old_flags = 0, new_flags = 0;
663 	struct mount_attr attr = {
664 		.attr_set	= MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
665 		.attr_clr	= MOUNT_ATTR__ATIME,
666 		.propagation	= MS_SHARED,
667 	};
668 
669 	if (!mount_setattr_supported())
670 		SKIP(return, "mount_setattr syscall not supported");
671 
672 	old_flags = read_mnt_flags("/mnt/A");
673 	ASSERT_GT(old_flags, 0);
674 
675 	fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
676 	ASSERT_GE(fd, 0);
677 
678 	/*
679 	 * We're holding a fd open to a mount somwhere in the middle so this
680 	 * needs to fail somewhere in the middle. After this the mount options
681 	 * need to be unchanged.
682 	 */
683 	ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
684 
685 	new_flags = read_mnt_flags("/mnt/A");
686 	ASSERT_EQ(new_flags, old_flags);
687 
688 	ASSERT_EQ(is_shared_mount("/mnt/A"), false);
689 
690 	new_flags = read_mnt_flags("/mnt/A/AA");
691 	ASSERT_EQ(new_flags, old_flags);
692 
693 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
694 
695 	new_flags = read_mnt_flags("/mnt/A/AA/B");
696 	ASSERT_EQ(new_flags, old_flags);
697 
698 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
699 
700 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
701 	ASSERT_EQ(new_flags, old_flags);
702 
703 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
704 
705 	dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
706 	ASSERT_GE(dfd, 0);
707 	EXPECT_EQ(fsync(dfd), 0);
708 	EXPECT_EQ(close(dfd), 0);
709 
710 	EXPECT_EQ(fsync(fd), 0);
711 	EXPECT_EQ(close(fd), 0);
712 
713 	/* All writers are gone so this should succeed. */
714 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
715 }
716 
TEST_F(mount_setattr,mixed_mount_options)717 TEST_F(mount_setattr, mixed_mount_options)
718 {
719 	unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
720 	struct mount_attr attr = {
721 		.attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
722 		.attr_set = MOUNT_ATTR_RELATIME,
723 	};
724 
725 	if (!mount_setattr_supported())
726 		SKIP(return, "mount_setattr syscall not supported");
727 
728 	old_flags1 = read_mnt_flags("/mnt/B");
729 	ASSERT_GT(old_flags1, 0);
730 
731 	old_flags2 = read_mnt_flags("/mnt/B/BB");
732 	ASSERT_GT(old_flags2, 0);
733 
734 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
735 
736 	expected_flags = old_flags2;
737 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
738 	expected_flags |= MS_RELATIME;
739 
740 	new_flags = read_mnt_flags("/mnt/B");
741 	ASSERT_EQ(new_flags, expected_flags);
742 
743 	expected_flags = old_flags2;
744 	expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
745 	expected_flags |= MS_RELATIME;
746 
747 	new_flags = read_mnt_flags("/mnt/B/BB");
748 	ASSERT_EQ(new_flags, expected_flags);
749 }
750 
TEST_F(mount_setattr,time_changes)751 TEST_F(mount_setattr, time_changes)
752 {
753 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
754 	struct mount_attr attr = {
755 		.attr_set	= MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
756 	};
757 
758 	if (!mount_setattr_supported())
759 		SKIP(return, "mount_setattr syscall not supported");
760 
761 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
762 
763 	attr.attr_set = MOUNT_ATTR_STRICTATIME;
764 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
765 
766 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
767 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
768 
769 	attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
770 	attr.attr_clr = MOUNT_ATTR__ATIME;
771 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
772 
773 	attr.attr_set = 0;
774 	attr.attr_clr = MOUNT_ATTR_STRICTATIME;
775 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
776 
777 	attr.attr_clr = MOUNT_ATTR_NOATIME;
778 	ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
779 
780 	old_flags = read_mnt_flags("/mnt/A");
781 	ASSERT_GT(old_flags, 0);
782 
783 	attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
784 	attr.attr_clr = MOUNT_ATTR__ATIME;
785 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
786 
787 	expected_flags = old_flags;
788 	expected_flags |= MS_NOATIME;
789 	expected_flags |= MS_NODIRATIME;
790 
791 	new_flags = read_mnt_flags("/mnt/A");
792 	ASSERT_EQ(new_flags, expected_flags);
793 
794 	new_flags = read_mnt_flags("/mnt/A/AA");
795 	ASSERT_EQ(new_flags, expected_flags);
796 
797 	new_flags = read_mnt_flags("/mnt/A/AA/B");
798 	ASSERT_EQ(new_flags, expected_flags);
799 
800 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
801 	ASSERT_EQ(new_flags, expected_flags);
802 
803 	memset(&attr, 0, sizeof(attr));
804 	attr.attr_set &= ~MOUNT_ATTR_NOATIME;
805 	attr.attr_set |= MOUNT_ATTR_RELATIME;
806 	attr.attr_clr |= MOUNT_ATTR__ATIME;
807 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
808 
809 	expected_flags &= ~MS_NOATIME;
810 	expected_flags |= MS_RELATIME;
811 
812 	new_flags = read_mnt_flags("/mnt/A");
813 	ASSERT_EQ(new_flags, expected_flags);
814 
815 	new_flags = read_mnt_flags("/mnt/A/AA");
816 	ASSERT_EQ(new_flags, expected_flags);
817 
818 	new_flags = read_mnt_flags("/mnt/A/AA/B");
819 	ASSERT_EQ(new_flags, expected_flags);
820 
821 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
822 	ASSERT_EQ(new_flags, expected_flags);
823 
824 	memset(&attr, 0, sizeof(attr));
825 	attr.attr_set &= ~MOUNT_ATTR_RELATIME;
826 	attr.attr_set |= MOUNT_ATTR_STRICTATIME;
827 	attr.attr_clr |= MOUNT_ATTR__ATIME;
828 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
829 
830 	expected_flags &= ~MS_RELATIME;
831 
832 	new_flags = read_mnt_flags("/mnt/A");
833 	ASSERT_EQ(new_flags, expected_flags);
834 
835 	new_flags = read_mnt_flags("/mnt/A/AA");
836 	ASSERT_EQ(new_flags, expected_flags);
837 
838 	new_flags = read_mnt_flags("/mnt/A/AA/B");
839 	ASSERT_EQ(new_flags, expected_flags);
840 
841 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
842 	ASSERT_EQ(new_flags, expected_flags);
843 
844 	memset(&attr, 0, sizeof(attr));
845 	attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
846 	attr.attr_set |= MOUNT_ATTR_NOATIME;
847 	attr.attr_clr |= MOUNT_ATTR__ATIME;
848 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
849 
850 	expected_flags |= MS_NOATIME;
851 	new_flags = read_mnt_flags("/mnt/A");
852 	ASSERT_EQ(new_flags, expected_flags);
853 
854 	new_flags = read_mnt_flags("/mnt/A/AA");
855 	ASSERT_EQ(new_flags, expected_flags);
856 
857 	new_flags = read_mnt_flags("/mnt/A/AA/B");
858 	ASSERT_EQ(new_flags, expected_flags);
859 
860 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
861 	ASSERT_EQ(new_flags, expected_flags);
862 
863 	memset(&attr, 0, sizeof(attr));
864 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
865 
866 	new_flags = read_mnt_flags("/mnt/A");
867 	ASSERT_EQ(new_flags, expected_flags);
868 
869 	new_flags = read_mnt_flags("/mnt/A/AA");
870 	ASSERT_EQ(new_flags, expected_flags);
871 
872 	new_flags = read_mnt_flags("/mnt/A/AA/B");
873 	ASSERT_EQ(new_flags, expected_flags);
874 
875 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
876 	ASSERT_EQ(new_flags, expected_flags);
877 
878 	memset(&attr, 0, sizeof(attr));
879 	attr.attr_clr = MOUNT_ATTR_NODIRATIME;
880 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
881 
882 	expected_flags &= ~MS_NODIRATIME;
883 
884 	new_flags = read_mnt_flags("/mnt/A");
885 	ASSERT_EQ(new_flags, expected_flags);
886 
887 	new_flags = read_mnt_flags("/mnt/A/AA");
888 	ASSERT_EQ(new_flags, expected_flags);
889 
890 	new_flags = read_mnt_flags("/mnt/A/AA/B");
891 	ASSERT_EQ(new_flags, expected_flags);
892 
893 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
894 	ASSERT_EQ(new_flags, expected_flags);
895 }
896 
TEST_F(mount_setattr,multi_threaded)897 TEST_F(mount_setattr, multi_threaded)
898 {
899 	int i, j, nthreads, ret = 0;
900 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
901 	pthread_attr_t pattr;
902 	pthread_t threads[DEFAULT_THREADS];
903 
904 	if (!mount_setattr_supported())
905 		SKIP(return, "mount_setattr syscall not supported");
906 
907 	old_flags = read_mnt_flags("/mnt/A");
908 	ASSERT_GT(old_flags, 0);
909 
910 	/* Try to change mount options from multiple threads. */
911 	nthreads = get_nprocs_conf();
912 	if (nthreads > DEFAULT_THREADS)
913 		nthreads = DEFAULT_THREADS;
914 
915 	pthread_attr_init(&pattr);
916 	for (i = 0; i < nthreads; i++)
917 		ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
918 
919 	for (j = 0; j < i; j++) {
920 		void *retptr = NULL;
921 
922 		EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
923 
924 		ret += ptr_to_int(retptr);
925 		EXPECT_EQ(ret, 0);
926 	}
927 	pthread_attr_destroy(&pattr);
928 
929 	ASSERT_EQ(ret, 0);
930 
931 	expected_flags = old_flags;
932 	expected_flags |= MS_RDONLY;
933 	expected_flags |= MS_NOSUID;
934 	new_flags = read_mnt_flags("/mnt/A");
935 	ASSERT_EQ(new_flags, expected_flags);
936 
937 	ASSERT_EQ(is_shared_mount("/mnt/A"), true);
938 
939 	new_flags = read_mnt_flags("/mnt/A/AA");
940 	ASSERT_EQ(new_flags, expected_flags);
941 
942 	ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
943 
944 	new_flags = read_mnt_flags("/mnt/A/AA/B");
945 	ASSERT_EQ(new_flags, expected_flags);
946 
947 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
948 
949 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
950 	ASSERT_EQ(new_flags, expected_flags);
951 
952 	ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
953 }
954 
TEST_F(mount_setattr,wrong_user_namespace)955 TEST_F(mount_setattr, wrong_user_namespace)
956 {
957 	int ret;
958 	struct mount_attr attr = {
959 		.attr_set = MOUNT_ATTR_RDONLY,
960 	};
961 
962 	if (!mount_setattr_supported())
963 		SKIP(return, "mount_setattr syscall not supported");
964 
965 	EXPECT_EQ(create_and_enter_userns(), 0);
966 	ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
967 	ASSERT_LT(ret, 0);
968 	ASSERT_EQ(errno, EPERM);
969 }
970 
TEST_F(mount_setattr,wrong_mount_namespace)971 TEST_F(mount_setattr, wrong_mount_namespace)
972 {
973 	int fd, ret;
974 	struct mount_attr attr = {
975 		.attr_set = MOUNT_ATTR_RDONLY,
976 	};
977 
978 	if (!mount_setattr_supported())
979 		SKIP(return, "mount_setattr syscall not supported");
980 
981 	fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
982 	ASSERT_GE(fd, 0);
983 
984 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
985 
986 	ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
987 	ASSERT_LT(ret, 0);
988 	ASSERT_EQ(errno, EINVAL);
989 }
990 
FIXTURE(mount_setattr_idmapped)991 FIXTURE(mount_setattr_idmapped) {
992 };
993 
FIXTURE_SETUP(mount_setattr_idmapped)994 FIXTURE_SETUP(mount_setattr_idmapped)
995 {
996 	int img_fd = -EBADF;
997 
998 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
999 
1000 	ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
1001 
1002 	(void)umount2("/mnt", MNT_DETACH);
1003 	(void)umount2("/tmp", MNT_DETACH);
1004 
1005 	ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
1006 			"size=100000,mode=700"), 0);
1007 
1008 	ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
1009 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
1010 	ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
1011 
1012 	ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
1013 			"size=100000,mode=700"), 0);
1014 
1015 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
1016 	ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
1017 	ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
1018 
1019 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
1020 			"size=100000,mode=700"), 0);
1021 
1022 	ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
1023 			"size=2m,mode=700"), 0);
1024 
1025 	ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
1026 
1027 	ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
1028 			"size=100000,mode=700"), 0);
1029 
1030 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1031 
1032 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1033 
1034 	ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
1035 
1036 	ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
1037 			MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
1038 
1039 	ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
1040 
1041 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
1042 			MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
1043 
1044 	ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
1045 	ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
1046 	img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
1047 	ASSERT_GE(img_fd, 0);
1048 	ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0);
1049 	ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
1050 	ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
1051 	ASSERT_EQ(close(img_fd), 0);
1052 }
1053 
FIXTURE_TEARDOWN(mount_setattr_idmapped)1054 FIXTURE_TEARDOWN(mount_setattr_idmapped)
1055 {
1056 	(void)umount2("/mnt/A", MNT_DETACH);
1057 	(void)umount2("/tmp", MNT_DETACH);
1058 }
1059 
1060 /**
1061  * Validate that negative fd values are rejected.
1062  */
TEST_F(mount_setattr_idmapped,invalid_fd_negative)1063 TEST_F(mount_setattr_idmapped, invalid_fd_negative)
1064 {
1065 	struct mount_attr attr = {
1066 		.attr_set	= MOUNT_ATTR_IDMAP,
1067 		.userns_fd	= -EBADF,
1068 	};
1069 
1070 	if (!mount_setattr_supported())
1071 		SKIP(return, "mount_setattr syscall not supported");
1072 
1073 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1074 		TH_LOG("failure: created idmapped mount with negative fd");
1075 	}
1076 }
1077 
1078 /**
1079  * Validate that excessively large fd values are rejected.
1080  */
TEST_F(mount_setattr_idmapped,invalid_fd_large)1081 TEST_F(mount_setattr_idmapped, invalid_fd_large)
1082 {
1083 	struct mount_attr attr = {
1084 		.attr_set	= MOUNT_ATTR_IDMAP,
1085 		.userns_fd	= INT64_MAX,
1086 	};
1087 
1088 	if (!mount_setattr_supported())
1089 		SKIP(return, "mount_setattr syscall not supported");
1090 
1091 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1092 		TH_LOG("failure: created idmapped mount with too large fd value");
1093 	}
1094 }
1095 
1096 /**
1097  * Validate that closed fd values are rejected.
1098  */
TEST_F(mount_setattr_idmapped,invalid_fd_closed)1099 TEST_F(mount_setattr_idmapped, invalid_fd_closed)
1100 {
1101 	int fd;
1102 	struct mount_attr attr = {
1103 		.attr_set = MOUNT_ATTR_IDMAP,
1104 	};
1105 
1106 	if (!mount_setattr_supported())
1107 		SKIP(return, "mount_setattr syscall not supported");
1108 
1109 	fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
1110 	ASSERT_GE(fd, 0);
1111 	ASSERT_GE(close(fd), 0);
1112 
1113 	attr.userns_fd = fd;
1114 	ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
1115 		TH_LOG("failure: created idmapped mount with closed fd");
1116 	}
1117 }
1118 
1119 /**
1120  * Validate that the initial user namespace is rejected.
1121  */
TEST_F(mount_setattr_idmapped,invalid_fd_initial_userns)1122 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
1123 {
1124 	int open_tree_fd = -EBADF;
1125 	struct mount_attr attr = {
1126 		.attr_set = MOUNT_ATTR_IDMAP,
1127 	};
1128 
1129 	if (!mount_setattr_supported())
1130 		SKIP(return, "mount_setattr syscall not supported");
1131 
1132 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1133 				     AT_NO_AUTOMOUNT |
1134 				     AT_SYMLINK_NOFOLLOW |
1135 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1136 	ASSERT_GE(open_tree_fd, 0);
1137 
1138 	attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
1139 	ASSERT_GE(attr.userns_fd, 0);
1140 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1141 	ASSERT_EQ(errno, EPERM);
1142 	ASSERT_EQ(close(attr.userns_fd), 0);
1143 	ASSERT_EQ(close(open_tree_fd), 0);
1144 }
1145 
map_ids(pid_t pid,unsigned long nsid,unsigned long hostid,unsigned long range)1146 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
1147 		   unsigned long range)
1148 {
1149 	char map[100], procfile[256];
1150 
1151 	snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
1152 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1153 	if (write_file(procfile, map, strlen(map)))
1154 		return -1;
1155 
1156 
1157 	snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
1158 	snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
1159 	if (write_file(procfile, map, strlen(map)))
1160 		return -1;
1161 
1162 	return 0;
1163 }
1164 
1165 #define __STACK_SIZE (8 * 1024 * 1024)
do_clone(int (* fn)(void *),void * arg,int flags)1166 static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
1167 {
1168 	void *stack;
1169 
1170 	stack = malloc(__STACK_SIZE);
1171 	if (!stack)
1172 		return -ENOMEM;
1173 
1174 #ifdef __ia64__
1175 	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1176 #else
1177 	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
1178 #endif
1179 }
1180 
get_userns_fd_cb(void * data)1181 static int get_userns_fd_cb(void *data)
1182 {
1183 	return kill(getpid(), SIGSTOP);
1184 }
1185 
wait_for_pid(pid_t pid)1186 static int wait_for_pid(pid_t pid)
1187 {
1188 	int status, ret;
1189 
1190 again:
1191 	ret = waitpid(pid, &status, 0);
1192 	if (ret == -1) {
1193 		if (errno == EINTR)
1194 			goto again;
1195 
1196 		return -1;
1197 	}
1198 
1199 	if (!WIFEXITED(status))
1200 		return -1;
1201 
1202 	return WEXITSTATUS(status);
1203 }
1204 
get_userns_fd(unsigned long nsid,unsigned long hostid,unsigned long range)1205 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
1206 {
1207 	int ret;
1208 	pid_t pid;
1209 	char path[256];
1210 
1211 	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
1212 	if (pid < 0)
1213 		return -errno;
1214 
1215 	ret = map_ids(pid, nsid, hostid, range);
1216 	if (ret < 0)
1217 		return ret;
1218 
1219 	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
1220 	ret = open(path, O_RDONLY | O_CLOEXEC);
1221 	kill(pid, SIGKILL);
1222 	wait_for_pid(pid);
1223 	return ret;
1224 }
1225 
1226 /**
1227  * Validate that an attached mount in our mount namespace cannot be idmapped.
1228  * (The kernel enforces that the mount's mount namespace and the caller's mount
1229  *  namespace match.)
1230  */
TEST_F(mount_setattr_idmapped,attached_mount_inside_current_mount_namespace)1231 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
1232 {
1233 	int open_tree_fd = -EBADF;
1234 	struct mount_attr attr = {
1235 		.attr_set = MOUNT_ATTR_IDMAP,
1236 	};
1237 
1238 	if (!mount_setattr_supported())
1239 		SKIP(return, "mount_setattr syscall not supported");
1240 
1241 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1242 				     AT_EMPTY_PATH |
1243 				     AT_NO_AUTOMOUNT |
1244 				     AT_SYMLINK_NOFOLLOW |
1245 				     OPEN_TREE_CLOEXEC);
1246 	ASSERT_GE(open_tree_fd, 0);
1247 
1248 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1249 	ASSERT_GE(attr.userns_fd, 0);
1250 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1251 	/*
1252 	 * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
1253 	 * to bypass this mount_setattr() restriction.
1254 	 */
1255 	ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1256 
1257 	ASSERT_EQ(close(attr.userns_fd), 0);
1258 	ASSERT_EQ(close(open_tree_fd), 0);
1259 }
1260 
1261 /**
1262  * Validate that idmapping a mount is rejected if the mount's mount namespace
1263  * and our mount namespace don't match.
1264  * (The kernel enforces that the mount's mount namespace and the caller's mount
1265  *  namespace match.)
1266  */
TEST_F(mount_setattr_idmapped,attached_mount_outside_current_mount_namespace)1267 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
1268 {
1269 	int open_tree_fd = -EBADF;
1270 	struct mount_attr attr = {
1271 		.attr_set = MOUNT_ATTR_IDMAP,
1272 	};
1273 
1274 	if (!mount_setattr_supported())
1275 		SKIP(return, "mount_setattr syscall not supported");
1276 
1277 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1278 				     AT_EMPTY_PATH |
1279 				     AT_NO_AUTOMOUNT |
1280 				     AT_SYMLINK_NOFOLLOW |
1281 				     OPEN_TREE_CLOEXEC);
1282 	ASSERT_GE(open_tree_fd, 0);
1283 
1284 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1285 
1286 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1287 	ASSERT_GE(attr.userns_fd, 0);
1288 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
1289 				    sizeof(attr)), 0);
1290 	/*
1291 	 * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
1292 	 * to bypass this mount_setattr() restriction.
1293 	 */
1294 	ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1295 
1296 	ASSERT_EQ(close(attr.userns_fd), 0);
1297 	ASSERT_EQ(close(open_tree_fd), 0);
1298 }
1299 
1300 /**
1301  * Validate that an attached mount in our mount namespace can be idmapped.
1302  */
TEST_F(mount_setattr_idmapped,detached_mount_inside_current_mount_namespace)1303 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
1304 {
1305 	int open_tree_fd = -EBADF;
1306 	struct mount_attr attr = {
1307 		.attr_set = MOUNT_ATTR_IDMAP,
1308 	};
1309 
1310 	if (!mount_setattr_supported())
1311 		SKIP(return, "mount_setattr syscall not supported");
1312 
1313 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1314 				     AT_EMPTY_PATH |
1315 				     AT_NO_AUTOMOUNT |
1316 				     AT_SYMLINK_NOFOLLOW |
1317 				     OPEN_TREE_CLOEXEC |
1318 				     OPEN_TREE_CLONE);
1319 	ASSERT_GE(open_tree_fd, 0);
1320 
1321 	/* Changing mount properties on a detached mount. */
1322 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1323 	ASSERT_GE(attr.userns_fd, 0);
1324 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1325 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1326 	ASSERT_EQ(close(attr.userns_fd), 0);
1327 	ASSERT_EQ(close(open_tree_fd), 0);
1328 }
1329 
1330 /**
1331  * Validate that a detached mount not in our mount namespace can be idmapped.
1332  */
TEST_F(mount_setattr_idmapped,detached_mount_outside_current_mount_namespace)1333 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
1334 {
1335 	int open_tree_fd = -EBADF;
1336 	struct mount_attr attr = {
1337 		.attr_set = MOUNT_ATTR_IDMAP,
1338 	};
1339 
1340 	if (!mount_setattr_supported())
1341 		SKIP(return, "mount_setattr syscall not supported");
1342 
1343 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1344 				     AT_EMPTY_PATH |
1345 				     AT_NO_AUTOMOUNT |
1346 				     AT_SYMLINK_NOFOLLOW |
1347 				     OPEN_TREE_CLOEXEC |
1348 				     OPEN_TREE_CLONE);
1349 	ASSERT_GE(open_tree_fd, 0);
1350 
1351 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1352 
1353 	/* Changing mount properties on a detached mount. */
1354 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1355 	ASSERT_GE(attr.userns_fd, 0);
1356 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1357 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1358 	ASSERT_EQ(close(attr.userns_fd), 0);
1359 	ASSERT_EQ(close(open_tree_fd), 0);
1360 }
1361 
expected_uid_gid(int dfd,const char * path,int flags,uid_t expected_uid,gid_t expected_gid)1362 static bool expected_uid_gid(int dfd, const char *path, int flags,
1363 			     uid_t expected_uid, gid_t expected_gid)
1364 {
1365 	int ret;
1366 	struct stat st;
1367 
1368 	ret = fstatat(dfd, path, &st, flags);
1369 	if (ret < 0)
1370 		return false;
1371 
1372 	return st.st_uid == expected_uid && st.st_gid == expected_gid;
1373 }
1374 
1375 /**
1376  * Validate that currently changing the idmapping of an idmapped mount fails.
1377  */
TEST_F(mount_setattr_idmapped,change_idmapping)1378 TEST_F(mount_setattr_idmapped, change_idmapping)
1379 {
1380 	int open_tree_fd = -EBADF;
1381 	struct mount_attr attr = {
1382 		.attr_set = MOUNT_ATTR_IDMAP,
1383 	};
1384 
1385 	ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0));
1386 
1387 	if (!mount_setattr_supported())
1388 		SKIP(return, "mount_setattr syscall not supported");
1389 
1390 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
1391 				     AT_EMPTY_PATH |
1392 				     AT_NO_AUTOMOUNT |
1393 				     AT_SYMLINK_NOFOLLOW |
1394 				     OPEN_TREE_CLOEXEC |
1395 				     OPEN_TREE_CLONE);
1396 	ASSERT_GE(open_tree_fd, 0);
1397 
1398 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1399 	ASSERT_GE(attr.userns_fd, 0);
1400 	ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
1401 				    AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1402 	ASSERT_EQ(close(attr.userns_fd), 0);
1403 
1404 	EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0));
1405 	EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
1406 
1407 	/* Change idmapping on a detached mount that is already idmapped. */
1408 	attr.userns_fd	= get_userns_fd(0, 20000, 10000);
1409 	ASSERT_GE(attr.userns_fd, 0);
1410 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1411 	/*
1412 	 * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
1413 	 * to bypass this mount_setattr() restriction.
1414 	 */
1415 	EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1416 	EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000));
1417 	EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
1418 
1419 	ASSERT_EQ(close(attr.userns_fd), 0);
1420 	ASSERT_EQ(close(open_tree_fd), 0);
1421 }
1422 
TEST_F(mount_setattr_idmapped,idmap_mount_tree_invalid)1423 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
1424 {
1425 	int open_tree_fd = -EBADF;
1426 	struct mount_attr attr = {
1427 		.attr_set = MOUNT_ATTR_IDMAP,
1428 	};
1429 
1430 	if (!mount_setattr_supported())
1431 		SKIP(return, "mount_setattr syscall not supported");
1432 
1433 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1434 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1435 
1436 	ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV,
1437 			"size=100000,mode=700"), 0);
1438 
1439 	ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
1440 
1441 	ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
1442 
1443 	open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
1444 				     AT_RECURSIVE |
1445 				     AT_EMPTY_PATH |
1446 				     AT_NO_AUTOMOUNT |
1447 				     AT_SYMLINK_NOFOLLOW |
1448 				     OPEN_TREE_CLOEXEC |
1449 				     OPEN_TREE_CLONE);
1450 	ASSERT_GE(open_tree_fd, 0);
1451 
1452 	attr.userns_fd	= get_userns_fd(0, 10000, 10000);
1453 	ASSERT_GE(attr.userns_fd, 0);
1454 	ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
1455 	ASSERT_EQ(close(attr.userns_fd), 0);
1456 	ASSERT_EQ(close(open_tree_fd), 0);
1457 
1458 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
1459 	ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
1460 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
1461 	ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
1462 
1463 	(void)umount2("/mnt/A", MNT_DETACH);
1464 }
1465 
TEST_F(mount_setattr,mount_attr_nosymfollow)1466 TEST_F(mount_setattr, mount_attr_nosymfollow)
1467 {
1468 	int fd;
1469 	unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
1470 	struct mount_attr attr = {
1471 		.attr_set	= MOUNT_ATTR_NOSYMFOLLOW,
1472 	};
1473 
1474 	if (!mount_setattr_supported())
1475 		SKIP(return, "mount_setattr syscall not supported");
1476 
1477 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1478 	ASSERT_GT(fd, 0);
1479 	ASSERT_EQ(close(fd), 0);
1480 
1481 	old_flags = read_mnt_flags("/mnt/A");
1482 	ASSERT_GT(old_flags, 0);
1483 
1484 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1485 
1486 	expected_flags = old_flags;
1487 	expected_flags |= ST_NOSYMFOLLOW;
1488 
1489 	new_flags = read_mnt_flags("/mnt/A");
1490 	ASSERT_EQ(new_flags, expected_flags);
1491 
1492 	new_flags = read_mnt_flags("/mnt/A/AA");
1493 	ASSERT_EQ(new_flags, expected_flags);
1494 
1495 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1496 	ASSERT_EQ(new_flags, expected_flags);
1497 
1498 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1499 	ASSERT_EQ(new_flags, expected_flags);
1500 
1501 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1502 	ASSERT_LT(fd, 0);
1503 	ASSERT_EQ(errno, ELOOP);
1504 
1505 	attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
1506 	attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
1507 
1508 	ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
1509 
1510 	expected_flags &= ~ST_NOSYMFOLLOW;
1511 	new_flags = read_mnt_flags("/mnt/A");
1512 	ASSERT_EQ(new_flags, expected_flags);
1513 
1514 	new_flags = read_mnt_flags("/mnt/A/AA");
1515 	ASSERT_EQ(new_flags, expected_flags);
1516 
1517 	new_flags = read_mnt_flags("/mnt/A/AA/B");
1518 	ASSERT_EQ(new_flags, expected_flags);
1519 
1520 	new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
1521 	ASSERT_EQ(new_flags, expected_flags);
1522 
1523 	fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
1524 	ASSERT_GT(fd, 0);
1525 	ASSERT_EQ(close(fd), 0);
1526 }
1527 
TEST_F(mount_setattr,open_tree_detached)1528 TEST_F(mount_setattr, open_tree_detached)
1529 {
1530 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1531 	struct statx stx;
1532 
1533 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1534 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1535 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1536 				     OPEN_TREE_CLONE);
1537 	ASSERT_GE(fd_tree_base, 0);
1538 	/*
1539 	 * /mnt                   testing tmpfs
1540 	 * |-/mnt/A               testing tmpfs
1541 	 * | `-/mnt/A/AA          testing tmpfs
1542 	 * |   `-/mnt/A/AA/B      testing tmpfs
1543 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1544 	 * `-/mnt/B               testing ramfs
1545 	 */
1546 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1547 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1548 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1549 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1550 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1551 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1552 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1553 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1554 
1555 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1556 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1557 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1558 				       OPEN_TREE_CLONE);
1559 	ASSERT_GE(fd_tree_subdir, 0);
1560 	/*
1561 	 * /AA          testing tmpfs
1562 	 * `-/AA/B      testing tmpfs
1563 	 *   `-/AA/B/BB testing tmpfs
1564 	 */
1565 	ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
1566 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1567 	ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
1568 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1569 
1570 	ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1571 	/*
1572 	 * /tmp/target1          testing tmpfs
1573 	 * `-/tmp/target1/B      testing tmpfs
1574 	 *   `-/tmp/target1/B/BB testing tmpfs
1575 	 */
1576 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1577 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1578 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
1579 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1580 	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
1581 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1582 
1583 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
1584 	/*
1585 	 * /tmp/target2                   testing tmpfs
1586 	 * |-/tmp/target2/A               testing tmpfs
1587 	 * | `-/tmp/target2/A/AA          testing tmpfs
1588 	 * |   `-/tmp/target2/A/AA/B      testing tmpfs
1589 	 * |     `-/tmp/target2/A/AA/B/BB testing tmpfs
1590 	 * `-/tmp/target2/B               testing ramfs
1591 	 */
1592 	ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
1593 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1594 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
1595 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1596 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
1597 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1598 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
1599 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1600 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
1601 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1602 	ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
1603 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1604 
1605 	EXPECT_EQ(close(fd_tree_base), 0);
1606 	EXPECT_EQ(close(fd_tree_subdir), 0);
1607 }
1608 
TEST_F(mount_setattr,open_tree_detached_fail)1609 TEST_F(mount_setattr, open_tree_detached_fail)
1610 {
1611 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1612 	struct statx stx;
1613 
1614 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1615 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1616 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1617 				     OPEN_TREE_CLONE);
1618 	ASSERT_GE(fd_tree_base, 0);
1619 	/*
1620 	 * /mnt                   testing tmpfs
1621 	 * |-/mnt/A               testing tmpfs
1622 	 * | `-/mnt/A/AA          testing tmpfs
1623 	 * |   `-/mnt/A/AA/B      testing tmpfs
1624 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1625 	 * `-/mnt/B               testing ramfs
1626 	 */
1627 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1628 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1629 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1630 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1631 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1632 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1633 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1634 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1635 
1636 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
1637 
1638 	/*
1639 	 * The origin mount namespace of the anonymous mount namespace
1640 	 * of @fd_tree_base doesn't match the caller's mount namespace
1641 	 * anymore so creation of another detached mounts must fail.
1642 	 */
1643 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1644 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1645 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1646 				       OPEN_TREE_CLONE);
1647 	ASSERT_LT(fd_tree_subdir, 0);
1648 	ASSERT_EQ(errno, EINVAL);
1649 }
1650 
TEST_F(mount_setattr,open_tree_detached_fail2)1651 TEST_F(mount_setattr, open_tree_detached_fail2)
1652 {
1653 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1654 	struct statx stx;
1655 
1656 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1657 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1658 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1659 				     OPEN_TREE_CLONE);
1660 	ASSERT_GE(fd_tree_base, 0);
1661 	/*
1662 	 * /mnt                   testing tmpfs
1663 	 * |-/mnt/A               testing tmpfs
1664 	 * | `-/mnt/A/AA          testing tmpfs
1665 	 * |   `-/mnt/A/AA/B      testing tmpfs
1666 	 * |     `-/mnt/A/AA/B/BB testing tmpfs
1667 	 * `-/mnt/B               testing ramfs
1668 	 */
1669 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1670 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1671 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1672 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1673 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1674 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1675 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1676 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1677 
1678 	EXPECT_EQ(create_and_enter_userns(), 0);
1679 
1680 	/*
1681 	 * The caller entered a new user namespace. They will have
1682 	 * CAP_SYS_ADMIN in this user namespace. However, they're still
1683 	 * located in a mount namespace that is owned by an ancestor
1684 	 * user namespace in which they hold no privilege. Creating a
1685 	 * detached mount must thus fail.
1686 	 */
1687 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1688 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1689 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1690 				       OPEN_TREE_CLONE);
1691 	ASSERT_LT(fd_tree_subdir, 0);
1692 	ASSERT_EQ(errno, EPERM);
1693 }
1694 
TEST_F(mount_setattr,open_tree_detached_fail3)1695 TEST_F(mount_setattr, open_tree_detached_fail3)
1696 {
1697 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1698 	struct statx stx;
1699 
1700 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1701 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1702 				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1703 				     OPEN_TREE_CLONE);
1704 	ASSERT_GE(fd_tree_base, 0);
1705 	/*
1706         * /mnt                   testing tmpfs
1707         * |-/mnt/A               testing tmpfs
1708         * | `-/mnt/A/AA          testing tmpfs
1709         * |   `-/mnt/A/AA/B      testing tmpfs
1710         * |     `-/mnt/A/AA/B/BB testing tmpfs
1711         * `-/mnt/B               testing ramfs
1712         */
1713 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1714 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1715 	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
1716 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1717 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
1718 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1719 	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
1720 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1721 
1722 	EXPECT_EQ(prepare_unpriv_mountns(), 0);
1723 
1724 	/*
1725         * The caller entered a new mount namespace. They will have
1726         * CAP_SYS_ADMIN in the owning user namespace of their mount
1727         * namespace.
1728         *
1729         * However, the origin mount namespace of the anonymous mount
1730         * namespace of @fd_tree_base doesn't match the caller's mount
1731         * namespace anymore so creation of another detached mounts must
1732         * fail.
1733         */
1734 	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
1735 			               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1736 				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1737 				       OPEN_TREE_CLONE);
1738 	ASSERT_LT(fd_tree_subdir, 0);
1739 	ASSERT_EQ(errno, EINVAL);
1740 }
1741 
TEST_F(mount_setattr,open_tree_subfolder)1742 TEST_F(mount_setattr, open_tree_subfolder)
1743 {
1744 	int fd_context, fd_tmpfs, fd_tree;
1745 
1746 	fd_context = sys_fsopen("tmpfs", 0);
1747 	ASSERT_GE(fd_context, 0);
1748 
1749 	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
1750 
1751 	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
1752 	ASSERT_GE(fd_tmpfs, 0);
1753 
1754 	EXPECT_EQ(close(fd_context), 0);
1755 
1756 	ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
1757 
1758 	fd_tree = sys_open_tree(fd_tmpfs, "subdir",
1759 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1760 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1761 				OPEN_TREE_CLONE);
1762 	ASSERT_GE(fd_tree, 0);
1763 
1764 	EXPECT_EQ(close(fd_tmpfs), 0);
1765 
1766 	ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
1767 
1768 	ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
1769 
1770 	EXPECT_EQ(close(fd_tree), 0);
1771 
1772 	ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
1773 
1774 	EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
1775 }
1776 
TEST_F(mount_setattr,mount_detached_mount_on_detached_mount_then_close)1777 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
1778 {
1779 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1780 	struct statx stx;
1781 
1782 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1783 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1784 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1785 	ASSERT_GE(fd_tree_base, 0);
1786 	/*
1787 	 * /mnt testing tmpfs
1788 	 */
1789 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1790 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1791 
1792 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1793 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1794 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1795 				       OPEN_TREE_CLONE);
1796 	ASSERT_GE(fd_tree_subdir, 0);
1797 	/*
1798 	 * /mnt testing tmpfs
1799 	 */
1800 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1801 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1802 
1803 	/*
1804 	 * /mnt   testing tmpfs
1805 	 * `-/mnt testing tmpfs
1806 	 */
1807 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1808 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
1809 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1810 
1811 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1812 
1813 	EXPECT_EQ(close(fd_tree_base), 0);
1814 	EXPECT_EQ(close(fd_tree_subdir), 0);
1815 }
1816 
TEST_F(mount_setattr,mount_detached_mount_on_detached_mount_and_attach)1817 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
1818 {
1819 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1820 	struct statx stx;
1821 	__u64 mnt_id = 0;
1822 
1823 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1824 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1825 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1826 	ASSERT_GE(fd_tree_base, 0);
1827 	/*
1828 	 * /mnt testing tmpfs
1829 	 */
1830 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1831 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1832 
1833 	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
1834 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1835 				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1836 				       OPEN_TREE_CLONE);
1837 	ASSERT_GE(fd_tree_subdir, 0);
1838 	/*
1839 	 * /mnt testing tmpfs
1840 	 */
1841 	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
1842 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1843 
1844 	/*
1845 	 * /mnt   testing tmpfs
1846 	 * `-/mnt testing tmpfs
1847 	 */
1848 	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1849 	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
1850 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1851 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1852 	mnt_id = stx.stx_mnt_id;
1853 
1854 	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1855 
1856 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1857 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
1858 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1859 	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
1860 	ASSERT_EQ(stx.stx_mnt_id, mnt_id);
1861 
1862 	EXPECT_EQ(close(fd_tree_base), 0);
1863 	EXPECT_EQ(close(fd_tree_subdir), 0);
1864 }
1865 
TEST_F(mount_setattr,move_mount_detached_fail)1866 TEST_F(mount_setattr, move_mount_detached_fail)
1867 {
1868 	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
1869 	struct statx stx;
1870 
1871 	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
1872 				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1873 				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1874 	ASSERT_GE(fd_tree_base, 0);
1875 
1876 	/* Attach the mount to the caller's mount namespace. */
1877 	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1878 
1879 	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
1880 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1881 
1882 	fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
1883 				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1884 				       OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
1885 	ASSERT_GE(fd_tree_subdir, 0);
1886 	ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
1887 	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1888 
1889 	/* Not allowed to move an attached mount to a detached mount. */
1890 	ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1891 	ASSERT_EQ(errno, EINVAL);
1892 
1893 	EXPECT_EQ(close(fd_tree_base), 0);
1894 	EXPECT_EQ(close(fd_tree_subdir), 0);
1895 }
1896 
TEST_F(mount_setattr,attach_detached_mount_then_umount_then_close)1897 TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
1898 {
1899 	int fd_tree = -EBADF;
1900 	struct statx stx;
1901 
1902 	fd_tree = sys_open_tree(-EBADF, "/mnt",
1903 				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1904 				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1905 				OPEN_TREE_CLONE);
1906 	ASSERT_GE(fd_tree, 0);
1907 
1908 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
1909 	/* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
1910 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1911 
1912 	/* Attach the mount to the caller's mount namespace. */
1913 	ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1914 
1915 	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
1916 	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
1917 
1918 	ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
1919 
1920 	/*
1921 	 * This tests whether dissolve_on_fput() handles a NULL mount
1922 	 * namespace correctly, i.e., that it doesn't splat.
1923 	 */
1924 	EXPECT_EQ(close(fd_tree), 0);
1925 }
1926 
TEST_F(mount_setattr,mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)1927 TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
1928 {
1929 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
1930 
1931 	/*
1932 	 * |-/mnt/A               testing tmpfs
1933 	 *   `-/mnt/A/AA          testing tmpfs
1934 	 *     `-/mnt/A/AA/B      testing tmpfs
1935 	 *       `-/mnt/A/AA/B/BB testing tmpfs
1936 	 */
1937 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
1938 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1939 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
1940 				 OPEN_TREE_CLONE);
1941 	ASSERT_GE(fd_tree1, 0);
1942 
1943 	/*
1944 	 * `-/mnt/B testing ramfs
1945 	 */
1946 	fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
1947 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
1948 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
1949 				 OPEN_TREE_CLONE);
1950 	ASSERT_GE(fd_tree2, 0);
1951 
1952 	/*
1953 	 * Move the source detached mount tree to the target detached
1954 	 * mount tree. This will move all the mounts in the source mount
1955 	 * tree from the source anonymous mount namespace to the target
1956 	 * anonymous mount namespace.
1957 	 *
1958 	 * The source detached mount tree and the target detached mount
1959 	 * tree now both refer to the same anonymous mount namespace.
1960 	 *
1961 	 * |-""                 testing ramfs
1962 	 *   `-""               testing tmpfs
1963 	 *     `-""/AA          testing tmpfs
1964 	 *       `-""/AA/B      testing tmpfs
1965 	 *         `-""/AA/B/BB testing tmpfs
1966 	 */
1967 	ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
1968 
1969 	/*
1970 	 * The source detached mount tree @fd_tree1 is now an attached
1971 	 * mount, i.e., it has a parent. Specifically, it now has the
1972 	 * root mount of the mount tree of @fd_tree2 as its parent.
1973 	 *
1974 	 * That means we are no longer allowed to attach it as we only
1975 	 * allow attaching the root of an anonymous mount tree, not
1976 	 * random bits and pieces. Verify that the kernel enforces this.
1977 	 */
1978 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
1979 
1980 	/*
1981 	 * Closing the source detached mount tree must not unmount and
1982 	 * free the shared anonymous mount namespace. The kernel will
1983 	 * quickly yell at us because the anonymous mount namespace
1984 	 * won't be empty when it's freed.
1985 	 */
1986 	EXPECT_EQ(close(fd_tree1), 0);
1987 
1988 	/*
1989 	 * Attach the mount tree to a non-anonymous mount namespace.
1990 	 * This can only succeed if closing fd_tree1 had proper
1991 	 * semantics and didn't cause the anonymous mount namespace to
1992 	 * be freed. If it did this will trigger a UAF which will be
1993 	 * visible on any KASAN enabled kernel.
1994 	 *
1995 	 * |-/tmp/target1                 testing ramfs
1996 	 *   `-/tmp/target1               testing tmpfs
1997 	 *     `-/tmp/target1/AA          testing tmpfs
1998 	 *       `-/tmp/target1/AA/B      testing tmpfs
1999 	 *         `-/tmp/target1/AA/B/BB testing tmpfs
2000 	 */
2001 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2002 	EXPECT_EQ(close(fd_tree2), 0);
2003 }
2004 
TEST_F(mount_setattr,two_detached_mounts_referring_to_same_anonymous_mount_namespace)2005 TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
2006 {
2007 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
2008 
2009 	/*
2010 	 * Copy the following mount tree:
2011 	 *
2012 	 * |-/mnt/A               testing tmpfs
2013 	 *   `-/mnt/A/AA          testing tmpfs
2014 	 *     `-/mnt/A/AA/B      testing tmpfs
2015 	 *       `-/mnt/A/AA/B/BB testing tmpfs
2016 	 */
2017 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
2018 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2019 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2020 				 OPEN_TREE_CLONE);
2021 	ASSERT_GE(fd_tree1, 0);
2022 
2023 	/*
2024 	 * Create an O_PATH file descriptors with a separate struct file
2025 	 * that refers to the same detached mount tree as @fd_tree1
2026 	 */
2027 	fd_tree2 = sys_open_tree(fd_tree1, "",
2028 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2029 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
2030 	ASSERT_GE(fd_tree2, 0);
2031 
2032 	/*
2033 	 * Copy the following mount tree:
2034 	 *
2035 	 * |-/tmp/target1               testing tmpfs
2036 	 *   `-/tmp/target1/AA          testing tmpfs
2037 	 *     `-/tmp/target1/AA/B      testing tmpfs
2038 	 *       `-/tmp/target1/AA/B/BB testing tmpfs
2039 	 */
2040 	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2041 
2042 	/*
2043 	 * This must fail as this would mean adding the same mount tree
2044 	 * into the same mount tree.
2045 	 */
2046 	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2047 }
2048 
TEST_F(mount_setattr,two_detached_subtrees_of_same_anonymous_mount_namespace)2049 TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
2050 {
2051 	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
2052 
2053 	/*
2054 	 * Copy the following mount tree:
2055 	 *
2056 	 * |-/mnt/A               testing tmpfs
2057 	 *   `-/mnt/A/AA          testing tmpfs
2058 	 *     `-/mnt/A/AA/B      testing tmpfs
2059 	 *       `-/mnt/A/AA/B/BB testing tmpfs
2060 	 */
2061 	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
2062 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2063 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2064 				 OPEN_TREE_CLONE);
2065 	ASSERT_GE(fd_tree1, 0);
2066 
2067 	/*
2068 	 * Create an O_PATH file descriptors with a separate struct file that
2069 	 * refers to a subtree of the same detached mount tree as @fd_tree1
2070 	 */
2071 	fd_tree2 = sys_open_tree(fd_tree1, "AA",
2072 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2073 				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
2074 	ASSERT_GE(fd_tree2, 0);
2075 
2076 	/*
2077 	 * This must fail as it is only possible to attach the root of a
2078 	 * detached mount tree.
2079 	 */
2080 	ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2081 
2082 	ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
2083 }
2084 
TEST_F(mount_setattr,detached_tree_propagation)2085 TEST_F(mount_setattr, detached_tree_propagation)
2086 {
2087 	int fd_tree = -EBADF;
2088 	struct statx stx1, stx2, stx3, stx4;
2089 
2090 	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
2091 	ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
2092 
2093 	/*
2094 	 * Copy the following mount tree:
2095 	 *
2096          * /mnt                   testing tmpfs
2097          * |-/mnt/A               testing tmpfs
2098          * | `-/mnt/A/AA          testing tmpfs
2099          * |   `-/mnt/A/AA/B      testing tmpfs
2100          * |     `-/mnt/A/AA/B/BB testing tmpfs
2101          * `-/mnt/B               testing ramfs
2102 	 */
2103 	fd_tree = sys_open_tree(-EBADF, "/mnt",
2104 				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
2105 				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
2106 				 OPEN_TREE_CLONE);
2107 	ASSERT_GE(fd_tree, 0);
2108 
2109 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
2110 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
2111 
2112 	/*
2113 	 * Copying the mount namespace like done above doesn't alter the
2114 	 * mounts in any way so the filesystem mounted on /mnt must be
2115 	 * identical even though the mounts will differ. Use the device
2116 	 * information to verify that. Note that tmpfs will have a 0
2117 	 * major number so comparing the major number is misleading.
2118 	 */
2119 	ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
2120 
2121 	/* Mount a tmpfs filesystem over /mnt/A. */
2122 	ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
2123 
2124 
2125 	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
2126 	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
2127 
2128 	/*
2129 	 * A new filesystem has been mounted on top of /mnt/A which
2130 	 * means that the device information will be different for any
2131 	 * statx() that was taken from /mnt/A before the mount compared
2132 	 * to one after the mount.
2133 	 */
2134 	ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
2135 	ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor);
2136 
2137 	EXPECT_EQ(close(fd_tree), 0);
2138 }
2139 
2140 TEST_HARNESS_MAIN
2141