1 // SPDX-License-Identifier: GPL-2.0 2 #define _GNU_SOURCE 3 #include <sched.h> 4 #include <stdio.h> 5 #include <errno.h> 6 #include <pthread.h> 7 #include <string.h> 8 #include <sys/stat.h> 9 #include <sys/types.h> 10 #include <sys/mount.h> 11 #include <sys/wait.h> 12 #include <sys/vfs.h> 13 #include <sys/statvfs.h> 14 #include <sys/sysinfo.h> 15 #include <stdlib.h> 16 #include <unistd.h> 17 #include <fcntl.h> 18 #include <grp.h> 19 #include <stdbool.h> 20 #include <stdarg.h> 21 #include <linux/mount.h> 22 23 #include "../filesystems/wrappers.h" 24 #include "../kselftest_harness.h" 25 26 #ifndef CLONE_NEWNS 27 #define CLONE_NEWNS 0x00020000 28 #endif 29 30 #ifndef CLONE_NEWUSER 31 #define CLONE_NEWUSER 0x10000000 32 #endif 33 34 #ifndef MS_REC 35 #define MS_REC 16384 36 #endif 37 38 #ifndef MS_RELATIME 39 #define MS_RELATIME (1 << 21) 40 #endif 41 42 #ifndef MS_STRICTATIME 43 #define MS_STRICTATIME (1 << 24) 44 #endif 45 46 #ifndef MOUNT_ATTR_RDONLY 47 #define MOUNT_ATTR_RDONLY 0x00000001 48 #endif 49 50 #ifndef MOUNT_ATTR_NOSUID 51 #define MOUNT_ATTR_NOSUID 0x00000002 52 #endif 53 54 #ifndef MOUNT_ATTR_NOEXEC 55 #define MOUNT_ATTR_NOEXEC 0x00000008 56 #endif 57 58 #ifndef MOUNT_ATTR_NODIRATIME 59 #define MOUNT_ATTR_NODIRATIME 0x00000080 60 #endif 61 62 #ifndef MOUNT_ATTR__ATIME 63 #define MOUNT_ATTR__ATIME 0x00000070 64 #endif 65 66 #ifndef MOUNT_ATTR_RELATIME 67 #define MOUNT_ATTR_RELATIME 0x00000000 68 #endif 69 70 #ifndef MOUNT_ATTR_NOATIME 71 #define MOUNT_ATTR_NOATIME 0x00000010 72 #endif 73 74 #ifndef MOUNT_ATTR_STRICTATIME 75 #define MOUNT_ATTR_STRICTATIME 0x00000020 76 #endif 77 78 #ifndef AT_RECURSIVE 79 #define AT_RECURSIVE 0x8000 80 #endif 81 82 #ifndef MS_SHARED 83 #define MS_SHARED (1 << 20) 84 #endif 85 86 #define DEFAULT_THREADS 4 87 #define ptr_to_int(p) ((int)((intptr_t)(p))) 88 #define int_to_ptr(u) ((void *)((intptr_t)(u))) 89 90 #ifndef __NR_mount_setattr 91 #if defined __alpha__ 92 #define __NR_mount_setattr 552 93 #elif defined _MIPS_SIM 94 #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ 95 #define __NR_mount_setattr (442 + 4000) 96 #endif 97 #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ 98 #define __NR_mount_setattr (442 + 6000) 99 #endif 100 #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ 101 #define __NR_mount_setattr (442 + 5000) 102 #endif 103 #elif defined __ia64__ 104 #define __NR_mount_setattr (442 + 1024) 105 #else 106 #define __NR_mount_setattr 442 107 #endif 108 #endif 109 110 #ifndef MOUNT_ATTR_IDMAP 111 #define MOUNT_ATTR_IDMAP 0x00100000 112 #endif 113 114 #ifndef MOUNT_ATTR_NOSYMFOLLOW 115 #define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 116 #endif 117 118 static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags, 119 struct mount_attr *attr, size_t size) 120 { 121 return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); 122 } 123 124 static ssize_t write_nointr(int fd, const void *buf, size_t count) 125 { 126 ssize_t ret; 127 128 do { 129 ret = write(fd, buf, count); 130 } while (ret < 0 && errno == EINTR); 131 132 return ret; 133 } 134 135 static int write_file(const char *path, const void *buf, size_t count) 136 { 137 int fd; 138 ssize_t ret; 139 140 fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); 141 if (fd < 0) 142 return -1; 143 144 ret = write_nointr(fd, buf, count); 145 close(fd); 146 if (ret < 0 || (size_t)ret != count) 147 return -1; 148 149 return 0; 150 } 151 152 static int create_and_enter_userns(void) 153 { 154 uid_t uid; 155 gid_t gid; 156 char map[100]; 157 158 uid = getuid(); 159 gid = getgid(); 160 161 if (unshare(CLONE_NEWUSER)) 162 return -1; 163 164 if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && 165 errno != ENOENT) 166 return -1; 167 168 snprintf(map, sizeof(map), "0 %d 1", uid); 169 if (write_file("/proc/self/uid_map", map, strlen(map))) 170 return -1; 171 172 173 snprintf(map, sizeof(map), "0 %d 1", gid); 174 if (write_file("/proc/self/gid_map", map, strlen(map))) 175 return -1; 176 177 if (setgid(0)) 178 return -1; 179 180 if (setuid(0)) 181 return -1; 182 183 return 0; 184 } 185 186 static int prepare_unpriv_mountns(void) 187 { 188 if (create_and_enter_userns()) 189 return -1; 190 191 if (unshare(CLONE_NEWNS)) 192 return -1; 193 194 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) 195 return -1; 196 197 return 0; 198 } 199 200 #ifndef ST_NOSYMFOLLOW 201 #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */ 202 #endif 203 204 static int read_mnt_flags(const char *path) 205 { 206 int ret; 207 struct statvfs stat; 208 unsigned int mnt_flags; 209 210 ret = statvfs(path, &stat); 211 if (ret != 0) 212 return -EINVAL; 213 214 if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | 215 ST_NOATIME | ST_NODIRATIME | ST_RELATIME | 216 ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW)) 217 return -EINVAL; 218 219 mnt_flags = 0; 220 if (stat.f_flag & ST_RDONLY) 221 mnt_flags |= MS_RDONLY; 222 if (stat.f_flag & ST_NOSUID) 223 mnt_flags |= MS_NOSUID; 224 if (stat.f_flag & ST_NODEV) 225 mnt_flags |= MS_NODEV; 226 if (stat.f_flag & ST_NOEXEC) 227 mnt_flags |= MS_NOEXEC; 228 if (stat.f_flag & ST_NOATIME) 229 mnt_flags |= MS_NOATIME; 230 if (stat.f_flag & ST_NODIRATIME) 231 mnt_flags |= MS_NODIRATIME; 232 if (stat.f_flag & ST_RELATIME) 233 mnt_flags |= MS_RELATIME; 234 if (stat.f_flag & ST_SYNCHRONOUS) 235 mnt_flags |= MS_SYNCHRONOUS; 236 if (stat.f_flag & ST_MANDLOCK) 237 mnt_flags |= ST_MANDLOCK; 238 if (stat.f_flag & ST_NOSYMFOLLOW) 239 mnt_flags |= ST_NOSYMFOLLOW; 240 241 return mnt_flags; 242 } 243 244 static char *get_field(char *src, int nfields) 245 { 246 int i; 247 char *p = src; 248 249 for (i = 0; i < nfields; i++) { 250 while (*p && *p != ' ' && *p != '\t') 251 p++; 252 253 if (!*p) 254 break; 255 256 p++; 257 } 258 259 return p; 260 } 261 262 static void null_endofword(char *word) 263 { 264 while (*word && *word != ' ' && *word != '\t') 265 word++; 266 *word = '\0'; 267 } 268 269 static bool is_shared_mount(const char *path) 270 { 271 size_t len = 0; 272 char *line = NULL; 273 FILE *f = NULL; 274 275 f = fopen("/proc/self/mountinfo", "re"); 276 if (!f) 277 return false; 278 279 while (getline(&line, &len, f) != -1) { 280 char *opts, *target; 281 282 target = get_field(line, 4); 283 if (!target) 284 continue; 285 286 opts = get_field(target, 2); 287 if (!opts) 288 continue; 289 290 null_endofword(target); 291 292 if (strcmp(target, path) != 0) 293 continue; 294 295 null_endofword(opts); 296 if (strstr(opts, "shared:")) 297 return true; 298 } 299 300 free(line); 301 fclose(f); 302 303 return false; 304 } 305 306 static void *mount_setattr_thread(void *data) 307 { 308 struct mount_attr attr = { 309 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID, 310 .attr_clr = 0, 311 .propagation = MS_SHARED, 312 }; 313 314 if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr))) 315 pthread_exit(int_to_ptr(-1)); 316 317 pthread_exit(int_to_ptr(0)); 318 } 319 320 /* Attempt to de-conflict with the selftests tree. */ 321 #ifndef SKIP 322 #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) 323 #endif 324 325 static bool mount_setattr_supported(void) 326 { 327 int ret; 328 329 ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0); 330 if (ret < 0 && errno == ENOSYS) 331 return false; 332 333 return true; 334 } 335 336 FIXTURE(mount_setattr) { 337 }; 338 339 #define NOSYMFOLLOW_TARGET "/mnt/A/AA/data" 340 #define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink" 341 342 FIXTURE_SETUP(mount_setattr) 343 { 344 int fd = -EBADF; 345 346 if (!mount_setattr_supported()) 347 SKIP(return, "mount_setattr syscall not supported"); 348 349 ASSERT_EQ(prepare_unpriv_mountns(), 0); 350 351 (void)umount2("/mnt", MNT_DETACH); 352 (void)umount2("/tmp", MNT_DETACH); 353 354 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, 355 "size=100000,mode=700"), 0); 356 357 ASSERT_EQ(mkdir("/tmp/B", 0777), 0); 358 359 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, 360 "size=100000,mode=700"), 0); 361 362 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); 363 364 ASSERT_EQ(mkdir("/tmp/target1", 0777), 0); 365 366 ASSERT_EQ(mkdir("/tmp/target2", 0777), 0); 367 368 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, 369 "size=100000,mode=700"), 0); 370 371 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, 372 "size=100000,mode=700"), 0); 373 374 ASSERT_EQ(mkdir("/mnt/A", 0777), 0); 375 376 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, 377 "size=100000,mode=700"), 0); 378 379 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); 380 381 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); 382 383 ASSERT_EQ(mkdir("/mnt/B", 0777), 0); 384 385 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", 386 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); 387 388 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); 389 390 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", 391 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); 392 393 fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC); 394 ASSERT_GT(fd, 0); 395 ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0); 396 ASSERT_EQ(close(fd), 0); 397 } 398 399 FIXTURE_TEARDOWN(mount_setattr) 400 { 401 if (!mount_setattr_supported()) 402 SKIP(return, "mount_setattr syscall not supported"); 403 404 (void)umount2("/mnt/A", MNT_DETACH); 405 (void)umount2("/tmp", MNT_DETACH); 406 } 407 408 TEST_F(mount_setattr, invalid_attributes) 409 { 410 struct mount_attr invalid_attr = { 411 .attr_set = (1U << 31), 412 }; 413 414 if (!mount_setattr_supported()) 415 SKIP(return, "mount_setattr syscall not supported"); 416 417 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 418 sizeof(invalid_attr)), 0); 419 420 invalid_attr.attr_set = 0; 421 invalid_attr.attr_clr = (1U << 31); 422 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 423 sizeof(invalid_attr)), 0); 424 425 invalid_attr.attr_clr = 0; 426 invalid_attr.propagation = (1U << 31); 427 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 428 sizeof(invalid_attr)), 0); 429 430 invalid_attr.attr_set = (1U << 31); 431 invalid_attr.attr_clr = (1U << 31); 432 invalid_attr.propagation = (1U << 31); 433 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 434 sizeof(invalid_attr)), 0); 435 436 ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr, 437 sizeof(invalid_attr)), 0); 438 } 439 440 TEST_F(mount_setattr, extensibility) 441 { 442 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 443 char *s = "dummy"; 444 struct mount_attr invalid_attr = {}; 445 struct mount_attr_large { 446 struct mount_attr attr1; 447 struct mount_attr attr2; 448 struct mount_attr attr3; 449 } large_attr = {}; 450 451 if (!mount_setattr_supported()) 452 SKIP(return, "mount_setattr syscall not supported"); 453 454 old_flags = read_mnt_flags("/mnt/A"); 455 ASSERT_GT(old_flags, 0); 456 457 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL, 458 sizeof(invalid_attr)), 0); 459 ASSERT_EQ(errno, EFAULT); 460 461 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s, 462 sizeof(invalid_attr)), 0); 463 ASSERT_EQ(errno, EINVAL); 464 465 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0); 466 ASSERT_EQ(errno, EINVAL); 467 468 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 469 sizeof(invalid_attr) / 2), 0); 470 ASSERT_EQ(errno, EINVAL); 471 472 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 473 sizeof(invalid_attr) / 2), 0); 474 ASSERT_EQ(errno, EINVAL); 475 476 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, 477 (void *)&large_attr, sizeof(large_attr)), 0); 478 479 large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY; 480 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, 481 (void *)&large_attr, sizeof(large_attr)), 0); 482 483 large_attr.attr3.attr_set = 0; 484 large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY; 485 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, 486 (void *)&large_attr, sizeof(large_attr)), 0); 487 488 expected_flags = old_flags; 489 expected_flags |= MS_RDONLY; 490 491 new_flags = read_mnt_flags("/mnt/A"); 492 ASSERT_EQ(new_flags, expected_flags); 493 494 new_flags = read_mnt_flags("/mnt/A/AA"); 495 ASSERT_EQ(new_flags, expected_flags); 496 497 new_flags = read_mnt_flags("/mnt/A/AA/B"); 498 ASSERT_EQ(new_flags, expected_flags); 499 500 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 501 ASSERT_EQ(new_flags, expected_flags); 502 } 503 504 TEST_F(mount_setattr, basic) 505 { 506 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 507 struct mount_attr attr = { 508 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, 509 .attr_clr = MOUNT_ATTR__ATIME, 510 }; 511 512 if (!mount_setattr_supported()) 513 SKIP(return, "mount_setattr syscall not supported"); 514 515 old_flags = read_mnt_flags("/mnt/A"); 516 ASSERT_GT(old_flags, 0); 517 518 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0); 519 520 expected_flags = old_flags; 521 expected_flags |= MS_RDONLY; 522 expected_flags |= MS_NOEXEC; 523 expected_flags &= ~MS_NOATIME; 524 expected_flags |= MS_RELATIME; 525 526 new_flags = read_mnt_flags("/mnt/A"); 527 ASSERT_EQ(new_flags, expected_flags); 528 529 new_flags = read_mnt_flags("/mnt/A/AA"); 530 ASSERT_EQ(new_flags, old_flags); 531 532 new_flags = read_mnt_flags("/mnt/A/AA/B"); 533 ASSERT_EQ(new_flags, old_flags); 534 535 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 536 ASSERT_EQ(new_flags, old_flags); 537 } 538 539 TEST_F(mount_setattr, basic_recursive) 540 { 541 int fd; 542 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 543 struct mount_attr attr = { 544 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, 545 .attr_clr = MOUNT_ATTR__ATIME, 546 }; 547 548 if (!mount_setattr_supported()) 549 SKIP(return, "mount_setattr syscall not supported"); 550 551 old_flags = read_mnt_flags("/mnt/A"); 552 ASSERT_GT(old_flags, 0); 553 554 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 555 556 expected_flags = old_flags; 557 expected_flags |= MS_RDONLY; 558 expected_flags |= MS_NOEXEC; 559 expected_flags &= ~MS_NOATIME; 560 expected_flags |= MS_RELATIME; 561 562 new_flags = read_mnt_flags("/mnt/A"); 563 ASSERT_EQ(new_flags, expected_flags); 564 565 new_flags = read_mnt_flags("/mnt/A/AA"); 566 ASSERT_EQ(new_flags, expected_flags); 567 568 new_flags = read_mnt_flags("/mnt/A/AA/B"); 569 ASSERT_EQ(new_flags, expected_flags); 570 571 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 572 ASSERT_EQ(new_flags, expected_flags); 573 574 memset(&attr, 0, sizeof(attr)); 575 attr.attr_clr = MOUNT_ATTR_RDONLY; 576 attr.propagation = MS_SHARED; 577 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 578 579 expected_flags &= ~MS_RDONLY; 580 new_flags = read_mnt_flags("/mnt/A"); 581 ASSERT_EQ(new_flags, expected_flags); 582 583 ASSERT_EQ(is_shared_mount("/mnt/A"), true); 584 585 new_flags = read_mnt_flags("/mnt/A/AA"); 586 ASSERT_EQ(new_flags, expected_flags); 587 588 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); 589 590 new_flags = read_mnt_flags("/mnt/A/AA/B"); 591 ASSERT_EQ(new_flags, expected_flags); 592 593 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); 594 595 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 596 ASSERT_EQ(new_flags, expected_flags); 597 598 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); 599 600 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); 601 ASSERT_GE(fd, 0); 602 603 /* 604 * We're holding a fd open for writing so this needs to fail somewhere 605 * in the middle and the mount options need to be unchanged. 606 */ 607 attr.attr_set = MOUNT_ATTR_RDONLY; 608 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 609 610 new_flags = read_mnt_flags("/mnt/A"); 611 ASSERT_EQ(new_flags, expected_flags); 612 613 ASSERT_EQ(is_shared_mount("/mnt/A"), true); 614 615 new_flags = read_mnt_flags("/mnt/A/AA"); 616 ASSERT_EQ(new_flags, expected_flags); 617 618 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); 619 620 new_flags = read_mnt_flags("/mnt/A/AA/B"); 621 ASSERT_EQ(new_flags, expected_flags); 622 623 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); 624 625 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 626 ASSERT_EQ(new_flags, expected_flags); 627 628 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); 629 630 EXPECT_EQ(close(fd), 0); 631 } 632 633 TEST_F(mount_setattr, mount_has_writers) 634 { 635 int fd, dfd; 636 unsigned int old_flags = 0, new_flags = 0; 637 struct mount_attr attr = { 638 .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, 639 .attr_clr = MOUNT_ATTR__ATIME, 640 .propagation = MS_SHARED, 641 }; 642 643 if (!mount_setattr_supported()) 644 SKIP(return, "mount_setattr syscall not supported"); 645 646 old_flags = read_mnt_flags("/mnt/A"); 647 ASSERT_GT(old_flags, 0); 648 649 fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); 650 ASSERT_GE(fd, 0); 651 652 /* 653 * We're holding a fd open to a mount somwhere in the middle so this 654 * needs to fail somewhere in the middle. After this the mount options 655 * need to be unchanged. 656 */ 657 ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 658 659 new_flags = read_mnt_flags("/mnt/A"); 660 ASSERT_EQ(new_flags, old_flags); 661 662 ASSERT_EQ(is_shared_mount("/mnt/A"), false); 663 664 new_flags = read_mnt_flags("/mnt/A/AA"); 665 ASSERT_EQ(new_flags, old_flags); 666 667 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false); 668 669 new_flags = read_mnt_flags("/mnt/A/AA/B"); 670 ASSERT_EQ(new_flags, old_flags); 671 672 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false); 673 674 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 675 ASSERT_EQ(new_flags, old_flags); 676 677 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false); 678 679 dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC); 680 ASSERT_GE(dfd, 0); 681 EXPECT_EQ(fsync(dfd), 0); 682 EXPECT_EQ(close(dfd), 0); 683 684 EXPECT_EQ(fsync(fd), 0); 685 EXPECT_EQ(close(fd), 0); 686 687 /* All writers are gone so this should succeed. */ 688 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 689 } 690 691 TEST_F(mount_setattr, mixed_mount_options) 692 { 693 unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0; 694 struct mount_attr attr = { 695 .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME, 696 .attr_set = MOUNT_ATTR_RELATIME, 697 }; 698 699 if (!mount_setattr_supported()) 700 SKIP(return, "mount_setattr syscall not supported"); 701 702 old_flags1 = read_mnt_flags("/mnt/B"); 703 ASSERT_GT(old_flags1, 0); 704 705 old_flags2 = read_mnt_flags("/mnt/B/BB"); 706 ASSERT_GT(old_flags2, 0); 707 708 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0); 709 710 expected_flags = old_flags2; 711 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); 712 expected_flags |= MS_RELATIME; 713 714 new_flags = read_mnt_flags("/mnt/B"); 715 ASSERT_EQ(new_flags, expected_flags); 716 717 expected_flags = old_flags2; 718 expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); 719 expected_flags |= MS_RELATIME; 720 721 new_flags = read_mnt_flags("/mnt/B/BB"); 722 ASSERT_EQ(new_flags, expected_flags); 723 } 724 725 TEST_F(mount_setattr, time_changes) 726 { 727 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 728 struct mount_attr attr = { 729 .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME, 730 }; 731 732 if (!mount_setattr_supported()) 733 SKIP(return, "mount_setattr syscall not supported"); 734 735 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 736 737 attr.attr_set = MOUNT_ATTR_STRICTATIME; 738 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 739 740 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; 741 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 742 743 attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; 744 attr.attr_clr = MOUNT_ATTR__ATIME; 745 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 746 747 attr.attr_set = 0; 748 attr.attr_clr = MOUNT_ATTR_STRICTATIME; 749 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 750 751 attr.attr_clr = MOUNT_ATTR_NOATIME; 752 ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 753 754 old_flags = read_mnt_flags("/mnt/A"); 755 ASSERT_GT(old_flags, 0); 756 757 attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME; 758 attr.attr_clr = MOUNT_ATTR__ATIME; 759 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 760 761 expected_flags = old_flags; 762 expected_flags |= MS_NOATIME; 763 expected_flags |= MS_NODIRATIME; 764 765 new_flags = read_mnt_flags("/mnt/A"); 766 ASSERT_EQ(new_flags, expected_flags); 767 768 new_flags = read_mnt_flags("/mnt/A/AA"); 769 ASSERT_EQ(new_flags, expected_flags); 770 771 new_flags = read_mnt_flags("/mnt/A/AA/B"); 772 ASSERT_EQ(new_flags, expected_flags); 773 774 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 775 ASSERT_EQ(new_flags, expected_flags); 776 777 memset(&attr, 0, sizeof(attr)); 778 attr.attr_set &= ~MOUNT_ATTR_NOATIME; 779 attr.attr_set |= MOUNT_ATTR_RELATIME; 780 attr.attr_clr |= MOUNT_ATTR__ATIME; 781 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 782 783 expected_flags &= ~MS_NOATIME; 784 expected_flags |= MS_RELATIME; 785 786 new_flags = read_mnt_flags("/mnt/A"); 787 ASSERT_EQ(new_flags, expected_flags); 788 789 new_flags = read_mnt_flags("/mnt/A/AA"); 790 ASSERT_EQ(new_flags, expected_flags); 791 792 new_flags = read_mnt_flags("/mnt/A/AA/B"); 793 ASSERT_EQ(new_flags, expected_flags); 794 795 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 796 ASSERT_EQ(new_flags, expected_flags); 797 798 memset(&attr, 0, sizeof(attr)); 799 attr.attr_set &= ~MOUNT_ATTR_RELATIME; 800 attr.attr_set |= MOUNT_ATTR_STRICTATIME; 801 attr.attr_clr |= MOUNT_ATTR__ATIME; 802 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 803 804 expected_flags &= ~MS_RELATIME; 805 806 new_flags = read_mnt_flags("/mnt/A"); 807 ASSERT_EQ(new_flags, expected_flags); 808 809 new_flags = read_mnt_flags("/mnt/A/AA"); 810 ASSERT_EQ(new_flags, expected_flags); 811 812 new_flags = read_mnt_flags("/mnt/A/AA/B"); 813 ASSERT_EQ(new_flags, expected_flags); 814 815 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 816 ASSERT_EQ(new_flags, expected_flags); 817 818 memset(&attr, 0, sizeof(attr)); 819 attr.attr_set &= ~MOUNT_ATTR_STRICTATIME; 820 attr.attr_set |= MOUNT_ATTR_NOATIME; 821 attr.attr_clr |= MOUNT_ATTR__ATIME; 822 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 823 824 expected_flags |= MS_NOATIME; 825 new_flags = read_mnt_flags("/mnt/A"); 826 ASSERT_EQ(new_flags, expected_flags); 827 828 new_flags = read_mnt_flags("/mnt/A/AA"); 829 ASSERT_EQ(new_flags, expected_flags); 830 831 new_flags = read_mnt_flags("/mnt/A/AA/B"); 832 ASSERT_EQ(new_flags, expected_flags); 833 834 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 835 ASSERT_EQ(new_flags, expected_flags); 836 837 memset(&attr, 0, sizeof(attr)); 838 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 839 840 new_flags = read_mnt_flags("/mnt/A"); 841 ASSERT_EQ(new_flags, expected_flags); 842 843 new_flags = read_mnt_flags("/mnt/A/AA"); 844 ASSERT_EQ(new_flags, expected_flags); 845 846 new_flags = read_mnt_flags("/mnt/A/AA/B"); 847 ASSERT_EQ(new_flags, expected_flags); 848 849 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 850 ASSERT_EQ(new_flags, expected_flags); 851 852 memset(&attr, 0, sizeof(attr)); 853 attr.attr_clr = MOUNT_ATTR_NODIRATIME; 854 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 855 856 expected_flags &= ~MS_NODIRATIME; 857 858 new_flags = read_mnt_flags("/mnt/A"); 859 ASSERT_EQ(new_flags, expected_flags); 860 861 new_flags = read_mnt_flags("/mnt/A/AA"); 862 ASSERT_EQ(new_flags, expected_flags); 863 864 new_flags = read_mnt_flags("/mnt/A/AA/B"); 865 ASSERT_EQ(new_flags, expected_flags); 866 867 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 868 ASSERT_EQ(new_flags, expected_flags); 869 } 870 871 TEST_F(mount_setattr, multi_threaded) 872 { 873 int i, j, nthreads, ret = 0; 874 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 875 pthread_attr_t pattr; 876 pthread_t threads[DEFAULT_THREADS]; 877 878 if (!mount_setattr_supported()) 879 SKIP(return, "mount_setattr syscall not supported"); 880 881 old_flags = read_mnt_flags("/mnt/A"); 882 ASSERT_GT(old_flags, 0); 883 884 /* Try to change mount options from multiple threads. */ 885 nthreads = get_nprocs_conf(); 886 if (nthreads > DEFAULT_THREADS) 887 nthreads = DEFAULT_THREADS; 888 889 pthread_attr_init(&pattr); 890 for (i = 0; i < nthreads; i++) 891 ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0); 892 893 for (j = 0; j < i; j++) { 894 void *retptr = NULL; 895 896 EXPECT_EQ(pthread_join(threads[j], &retptr), 0); 897 898 ret += ptr_to_int(retptr); 899 EXPECT_EQ(ret, 0); 900 } 901 pthread_attr_destroy(&pattr); 902 903 ASSERT_EQ(ret, 0); 904 905 expected_flags = old_flags; 906 expected_flags |= MS_RDONLY; 907 expected_flags |= MS_NOSUID; 908 new_flags = read_mnt_flags("/mnt/A"); 909 ASSERT_EQ(new_flags, expected_flags); 910 911 ASSERT_EQ(is_shared_mount("/mnt/A"), true); 912 913 new_flags = read_mnt_flags("/mnt/A/AA"); 914 ASSERT_EQ(new_flags, expected_flags); 915 916 ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); 917 918 new_flags = read_mnt_flags("/mnt/A/AA/B"); 919 ASSERT_EQ(new_flags, expected_flags); 920 921 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); 922 923 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 924 ASSERT_EQ(new_flags, expected_flags); 925 926 ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); 927 } 928 929 TEST_F(mount_setattr, wrong_user_namespace) 930 { 931 int ret; 932 struct mount_attr attr = { 933 .attr_set = MOUNT_ATTR_RDONLY, 934 }; 935 936 if (!mount_setattr_supported()) 937 SKIP(return, "mount_setattr syscall not supported"); 938 939 EXPECT_EQ(create_and_enter_userns(), 0); 940 ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)); 941 ASSERT_LT(ret, 0); 942 ASSERT_EQ(errno, EPERM); 943 } 944 945 TEST_F(mount_setattr, wrong_mount_namespace) 946 { 947 int fd, ret; 948 struct mount_attr attr = { 949 .attr_set = MOUNT_ATTR_RDONLY, 950 }; 951 952 if (!mount_setattr_supported()) 953 SKIP(return, "mount_setattr syscall not supported"); 954 955 fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC); 956 ASSERT_GE(fd, 0); 957 958 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 959 960 ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr)); 961 ASSERT_LT(ret, 0); 962 ASSERT_EQ(errno, EINVAL); 963 } 964 965 FIXTURE(mount_setattr_idmapped) { 966 }; 967 968 FIXTURE_SETUP(mount_setattr_idmapped) 969 { 970 int img_fd = -EBADF; 971 972 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 973 974 ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0); 975 976 (void)umount2("/mnt", MNT_DETACH); 977 (void)umount2("/tmp", MNT_DETACH); 978 979 ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, 980 "size=100000,mode=700"), 0); 981 982 ASSERT_EQ(mkdir("/tmp/B", 0777), 0); 983 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0); 984 ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0); 985 986 ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, 987 "size=100000,mode=700"), 0); 988 989 ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); 990 ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0); 991 ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0); 992 993 ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, 994 "size=100000,mode=700"), 0); 995 996 ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, 997 "size=2m,mode=700"), 0); 998 999 ASSERT_EQ(mkdir("/mnt/A", 0777), 0); 1000 1001 ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, 1002 "size=100000,mode=700"), 0); 1003 1004 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); 1005 1006 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); 1007 1008 ASSERT_EQ(mkdir("/mnt/B", 0777), 0); 1009 1010 ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", 1011 MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); 1012 1013 ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); 1014 1015 ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", 1016 MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); 1017 1018 ASSERT_EQ(mkdir("/mnt/C", 0777), 0); 1019 ASSERT_EQ(mkdir("/mnt/D", 0777), 0); 1020 img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); 1021 ASSERT_GE(img_fd, 0); 1022 ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0); 1023 ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); 1024 ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); 1025 ASSERT_EQ(close(img_fd), 0); 1026 } 1027 1028 FIXTURE_TEARDOWN(mount_setattr_idmapped) 1029 { 1030 (void)umount2("/mnt/A", MNT_DETACH); 1031 (void)umount2("/tmp", MNT_DETACH); 1032 } 1033 1034 /** 1035 * Validate that negative fd values are rejected. 1036 */ 1037 TEST_F(mount_setattr_idmapped, invalid_fd_negative) 1038 { 1039 struct mount_attr attr = { 1040 .attr_set = MOUNT_ATTR_IDMAP, 1041 .userns_fd = -EBADF, 1042 }; 1043 1044 if (!mount_setattr_supported()) 1045 SKIP(return, "mount_setattr syscall not supported"); 1046 1047 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { 1048 TH_LOG("failure: created idmapped mount with negative fd"); 1049 } 1050 } 1051 1052 /** 1053 * Validate that excessively large fd values are rejected. 1054 */ 1055 TEST_F(mount_setattr_idmapped, invalid_fd_large) 1056 { 1057 struct mount_attr attr = { 1058 .attr_set = MOUNT_ATTR_IDMAP, 1059 .userns_fd = INT64_MAX, 1060 }; 1061 1062 if (!mount_setattr_supported()) 1063 SKIP(return, "mount_setattr syscall not supported"); 1064 1065 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { 1066 TH_LOG("failure: created idmapped mount with too large fd value"); 1067 } 1068 } 1069 1070 /** 1071 * Validate that closed fd values are rejected. 1072 */ 1073 TEST_F(mount_setattr_idmapped, invalid_fd_closed) 1074 { 1075 int fd; 1076 struct mount_attr attr = { 1077 .attr_set = MOUNT_ATTR_IDMAP, 1078 }; 1079 1080 if (!mount_setattr_supported()) 1081 SKIP(return, "mount_setattr syscall not supported"); 1082 1083 fd = open("/dev/null", O_RDONLY | O_CLOEXEC); 1084 ASSERT_GE(fd, 0); 1085 ASSERT_GE(close(fd), 0); 1086 1087 attr.userns_fd = fd; 1088 ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { 1089 TH_LOG("failure: created idmapped mount with closed fd"); 1090 } 1091 } 1092 1093 /** 1094 * Validate that the initial user namespace is rejected. 1095 */ 1096 TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns) 1097 { 1098 int open_tree_fd = -EBADF; 1099 struct mount_attr attr = { 1100 .attr_set = MOUNT_ATTR_IDMAP, 1101 }; 1102 1103 if (!mount_setattr_supported()) 1104 SKIP(return, "mount_setattr syscall not supported"); 1105 1106 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1107 AT_NO_AUTOMOUNT | 1108 AT_SYMLINK_NOFOLLOW | 1109 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); 1110 ASSERT_GE(open_tree_fd, 0); 1111 1112 attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC); 1113 ASSERT_GE(attr.userns_fd, 0); 1114 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1115 ASSERT_EQ(errno, EPERM); 1116 ASSERT_EQ(close(attr.userns_fd), 0); 1117 ASSERT_EQ(close(open_tree_fd), 0); 1118 } 1119 1120 static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid, 1121 unsigned long range) 1122 { 1123 char map[100], procfile[256]; 1124 1125 snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid); 1126 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); 1127 if (write_file(procfile, map, strlen(map))) 1128 return -1; 1129 1130 1131 snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid); 1132 snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); 1133 if (write_file(procfile, map, strlen(map))) 1134 return -1; 1135 1136 return 0; 1137 } 1138 1139 #define __STACK_SIZE (8 * 1024 * 1024) 1140 static pid_t do_clone(int (*fn)(void *), void *arg, int flags) 1141 { 1142 void *stack; 1143 1144 stack = malloc(__STACK_SIZE); 1145 if (!stack) 1146 return -ENOMEM; 1147 1148 #ifdef __ia64__ 1149 return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL); 1150 #else 1151 return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL); 1152 #endif 1153 } 1154 1155 static int get_userns_fd_cb(void *data) 1156 { 1157 return kill(getpid(), SIGSTOP); 1158 } 1159 1160 static int wait_for_pid(pid_t pid) 1161 { 1162 int status, ret; 1163 1164 again: 1165 ret = waitpid(pid, &status, 0); 1166 if (ret == -1) { 1167 if (errno == EINTR) 1168 goto again; 1169 1170 return -1; 1171 } 1172 1173 if (!WIFEXITED(status)) 1174 return -1; 1175 1176 return WEXITSTATUS(status); 1177 } 1178 1179 static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range) 1180 { 1181 int ret; 1182 pid_t pid; 1183 char path[256]; 1184 1185 pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER); 1186 if (pid < 0) 1187 return -errno; 1188 1189 ret = map_ids(pid, nsid, hostid, range); 1190 if (ret < 0) 1191 return ret; 1192 1193 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); 1194 ret = open(path, O_RDONLY | O_CLOEXEC); 1195 kill(pid, SIGKILL); 1196 wait_for_pid(pid); 1197 return ret; 1198 } 1199 1200 /** 1201 * Validate that an attached mount in our mount namespace cannot be idmapped. 1202 * (The kernel enforces that the mount's mount namespace and the caller's mount 1203 * namespace match.) 1204 */ 1205 TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) 1206 { 1207 int open_tree_fd = -EBADF; 1208 struct mount_attr attr = { 1209 .attr_set = MOUNT_ATTR_IDMAP, 1210 }; 1211 1212 if (!mount_setattr_supported()) 1213 SKIP(return, "mount_setattr syscall not supported"); 1214 1215 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1216 AT_EMPTY_PATH | 1217 AT_NO_AUTOMOUNT | 1218 AT_SYMLINK_NOFOLLOW | 1219 OPEN_TREE_CLOEXEC); 1220 ASSERT_GE(open_tree_fd, 0); 1221 1222 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1223 ASSERT_GE(attr.userns_fd, 0); 1224 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1225 ASSERT_EQ(close(attr.userns_fd), 0); 1226 ASSERT_EQ(close(open_tree_fd), 0); 1227 } 1228 1229 /** 1230 * Validate that idmapping a mount is rejected if the mount's mount namespace 1231 * and our mount namespace don't match. 1232 * (The kernel enforces that the mount's mount namespace and the caller's mount 1233 * namespace match.) 1234 */ 1235 TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) 1236 { 1237 int open_tree_fd = -EBADF; 1238 struct mount_attr attr = { 1239 .attr_set = MOUNT_ATTR_IDMAP, 1240 }; 1241 1242 if (!mount_setattr_supported()) 1243 SKIP(return, "mount_setattr syscall not supported"); 1244 1245 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1246 AT_EMPTY_PATH | 1247 AT_NO_AUTOMOUNT | 1248 AT_SYMLINK_NOFOLLOW | 1249 OPEN_TREE_CLOEXEC); 1250 ASSERT_GE(open_tree_fd, 0); 1251 1252 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 1253 1254 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1255 ASSERT_GE(attr.userns_fd, 0); 1256 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, 1257 sizeof(attr)), 0); 1258 ASSERT_EQ(close(attr.userns_fd), 0); 1259 ASSERT_EQ(close(open_tree_fd), 0); 1260 } 1261 1262 /** 1263 * Validate that an attached mount in our mount namespace can be idmapped. 1264 */ 1265 TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace) 1266 { 1267 int open_tree_fd = -EBADF; 1268 struct mount_attr attr = { 1269 .attr_set = MOUNT_ATTR_IDMAP, 1270 }; 1271 1272 if (!mount_setattr_supported()) 1273 SKIP(return, "mount_setattr syscall not supported"); 1274 1275 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1276 AT_EMPTY_PATH | 1277 AT_NO_AUTOMOUNT | 1278 AT_SYMLINK_NOFOLLOW | 1279 OPEN_TREE_CLOEXEC | 1280 OPEN_TREE_CLONE); 1281 ASSERT_GE(open_tree_fd, 0); 1282 1283 /* Changing mount properties on a detached mount. */ 1284 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1285 ASSERT_GE(attr.userns_fd, 0); 1286 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", 1287 AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1288 ASSERT_EQ(close(attr.userns_fd), 0); 1289 ASSERT_EQ(close(open_tree_fd), 0); 1290 } 1291 1292 /** 1293 * Validate that a detached mount not in our mount namespace can be idmapped. 1294 */ 1295 TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) 1296 { 1297 int open_tree_fd = -EBADF; 1298 struct mount_attr attr = { 1299 .attr_set = MOUNT_ATTR_IDMAP, 1300 }; 1301 1302 if (!mount_setattr_supported()) 1303 SKIP(return, "mount_setattr syscall not supported"); 1304 1305 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1306 AT_EMPTY_PATH | 1307 AT_NO_AUTOMOUNT | 1308 AT_SYMLINK_NOFOLLOW | 1309 OPEN_TREE_CLOEXEC | 1310 OPEN_TREE_CLONE); 1311 ASSERT_GE(open_tree_fd, 0); 1312 1313 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 1314 1315 /* Changing mount properties on a detached mount. */ 1316 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1317 ASSERT_GE(attr.userns_fd, 0); 1318 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", 1319 AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1320 ASSERT_EQ(close(attr.userns_fd), 0); 1321 ASSERT_EQ(close(open_tree_fd), 0); 1322 } 1323 1324 /** 1325 * Validate that currently changing the idmapping of an idmapped mount fails. 1326 */ 1327 TEST_F(mount_setattr_idmapped, change_idmapping) 1328 { 1329 int open_tree_fd = -EBADF; 1330 struct mount_attr attr = { 1331 .attr_set = MOUNT_ATTR_IDMAP, 1332 }; 1333 1334 if (!mount_setattr_supported()) 1335 SKIP(return, "mount_setattr syscall not supported"); 1336 1337 open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", 1338 AT_EMPTY_PATH | 1339 AT_NO_AUTOMOUNT | 1340 AT_SYMLINK_NOFOLLOW | 1341 OPEN_TREE_CLOEXEC | 1342 OPEN_TREE_CLONE); 1343 ASSERT_GE(open_tree_fd, 0); 1344 1345 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1346 ASSERT_GE(attr.userns_fd, 0); 1347 ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", 1348 AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1349 ASSERT_EQ(close(attr.userns_fd), 0); 1350 1351 /* Change idmapping on a detached mount that is already idmapped. */ 1352 attr.userns_fd = get_userns_fd(0, 20000, 10000); 1353 ASSERT_GE(attr.userns_fd, 0); 1354 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1355 ASSERT_EQ(close(attr.userns_fd), 0); 1356 ASSERT_EQ(close(open_tree_fd), 0); 1357 } 1358 1359 static bool expected_uid_gid(int dfd, const char *path, int flags, 1360 uid_t expected_uid, gid_t expected_gid) 1361 { 1362 int ret; 1363 struct stat st; 1364 1365 ret = fstatat(dfd, path, &st, flags); 1366 if (ret < 0) 1367 return false; 1368 1369 return st.st_uid == expected_uid && st.st_gid == expected_gid; 1370 } 1371 1372 TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) 1373 { 1374 int open_tree_fd = -EBADF; 1375 struct mount_attr attr = { 1376 .attr_set = MOUNT_ATTR_IDMAP, 1377 }; 1378 1379 if (!mount_setattr_supported()) 1380 SKIP(return, "mount_setattr syscall not supported"); 1381 1382 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); 1383 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); 1384 1385 ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV, 1386 "size=100000,mode=700"), 0); 1387 1388 ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); 1389 1390 ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); 1391 1392 open_tree_fd = sys_open_tree(-EBADF, "/mnt/A", 1393 AT_RECURSIVE | 1394 AT_EMPTY_PATH | 1395 AT_NO_AUTOMOUNT | 1396 AT_SYMLINK_NOFOLLOW | 1397 OPEN_TREE_CLOEXEC | 1398 OPEN_TREE_CLONE); 1399 ASSERT_GE(open_tree_fd, 0); 1400 1401 attr.userns_fd = get_userns_fd(0, 10000, 10000); 1402 ASSERT_GE(attr.userns_fd, 0); 1403 ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); 1404 ASSERT_EQ(close(attr.userns_fd), 0); 1405 ASSERT_EQ(close(open_tree_fd), 0); 1406 1407 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); 1408 ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); 1409 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0); 1410 ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); 1411 1412 (void)umount2("/mnt/A", MNT_DETACH); 1413 } 1414 1415 TEST_F(mount_setattr, mount_attr_nosymfollow) 1416 { 1417 int fd; 1418 unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; 1419 struct mount_attr attr = { 1420 .attr_set = MOUNT_ATTR_NOSYMFOLLOW, 1421 }; 1422 1423 if (!mount_setattr_supported()) 1424 SKIP(return, "mount_setattr syscall not supported"); 1425 1426 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); 1427 ASSERT_GT(fd, 0); 1428 ASSERT_EQ(close(fd), 0); 1429 1430 old_flags = read_mnt_flags("/mnt/A"); 1431 ASSERT_GT(old_flags, 0); 1432 1433 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 1434 1435 expected_flags = old_flags; 1436 expected_flags |= ST_NOSYMFOLLOW; 1437 1438 new_flags = read_mnt_flags("/mnt/A"); 1439 ASSERT_EQ(new_flags, expected_flags); 1440 1441 new_flags = read_mnt_flags("/mnt/A/AA"); 1442 ASSERT_EQ(new_flags, expected_flags); 1443 1444 new_flags = read_mnt_flags("/mnt/A/AA/B"); 1445 ASSERT_EQ(new_flags, expected_flags); 1446 1447 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 1448 ASSERT_EQ(new_flags, expected_flags); 1449 1450 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); 1451 ASSERT_LT(fd, 0); 1452 ASSERT_EQ(errno, ELOOP); 1453 1454 attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW; 1455 attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW; 1456 1457 ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); 1458 1459 expected_flags &= ~ST_NOSYMFOLLOW; 1460 new_flags = read_mnt_flags("/mnt/A"); 1461 ASSERT_EQ(new_flags, expected_flags); 1462 1463 new_flags = read_mnt_flags("/mnt/A/AA"); 1464 ASSERT_EQ(new_flags, expected_flags); 1465 1466 new_flags = read_mnt_flags("/mnt/A/AA/B"); 1467 ASSERT_EQ(new_flags, expected_flags); 1468 1469 new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); 1470 ASSERT_EQ(new_flags, expected_flags); 1471 1472 fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC); 1473 ASSERT_GT(fd, 0); 1474 ASSERT_EQ(close(fd), 0); 1475 } 1476 1477 TEST_F(mount_setattr, open_tree_detached) 1478 { 1479 int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF; 1480 struct statx stx; 1481 1482 fd_tree_base = sys_open_tree(-EBADF, "/mnt", 1483 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1484 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1485 OPEN_TREE_CLONE); 1486 ASSERT_GE(fd_tree_base, 0); 1487 /* 1488 * /mnt testing tmpfs 1489 * |-/mnt/A testing tmpfs 1490 * | `-/mnt/A/AA testing tmpfs 1491 * | `-/mnt/A/AA/B testing tmpfs 1492 * | `-/mnt/A/AA/B/BB testing tmpfs 1493 * `-/mnt/B testing ramfs 1494 */ 1495 ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0); 1496 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1497 ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0); 1498 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1499 ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0); 1500 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1501 ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0); 1502 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1503 1504 fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA", 1505 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1506 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1507 OPEN_TREE_CLONE); 1508 ASSERT_GE(fd_tree_subdir, 0); 1509 /* 1510 * /AA testing tmpfs 1511 * `-/AA/B testing tmpfs 1512 * `-/AA/B/BB testing tmpfs 1513 */ 1514 ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0); 1515 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1516 ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0); 1517 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1518 1519 ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1520 /* 1521 * /tmp/target1 testing tmpfs 1522 * `-/tmp/target1/B testing tmpfs 1523 * `-/tmp/target1/B/BB testing tmpfs 1524 */ 1525 ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0); 1526 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1527 ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0); 1528 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1529 ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0); 1530 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1531 1532 ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0); 1533 /* 1534 * /tmp/target2 testing tmpfs 1535 * |-/tmp/target2/A testing tmpfs 1536 * | `-/tmp/target2/A/AA testing tmpfs 1537 * | `-/tmp/target2/A/AA/B testing tmpfs 1538 * | `-/tmp/target2/A/AA/B/BB testing tmpfs 1539 * `-/tmp/target2/B testing ramfs 1540 */ 1541 ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0); 1542 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1543 ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0); 1544 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1545 ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0); 1546 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1547 ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0); 1548 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1549 ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0); 1550 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1551 ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0); 1552 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1553 1554 EXPECT_EQ(close(fd_tree_base), 0); 1555 EXPECT_EQ(close(fd_tree_subdir), 0); 1556 } 1557 1558 TEST_F(mount_setattr, open_tree_detached_fail) 1559 { 1560 int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF; 1561 struct statx stx; 1562 1563 fd_tree_base = sys_open_tree(-EBADF, "/mnt", 1564 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1565 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1566 OPEN_TREE_CLONE); 1567 ASSERT_GE(fd_tree_base, 0); 1568 /* 1569 * /mnt testing tmpfs 1570 * |-/mnt/A testing tmpfs 1571 * | `-/mnt/A/AA testing tmpfs 1572 * | `-/mnt/A/AA/B testing tmpfs 1573 * | `-/mnt/A/AA/B/BB testing tmpfs 1574 * `-/mnt/B testing ramfs 1575 */ 1576 ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0); 1577 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1578 ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0); 1579 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1580 ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0); 1581 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1582 ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0); 1583 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1584 1585 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 1586 1587 /* 1588 * The origin mount namespace of the anonymous mount namespace 1589 * of @fd_tree_base doesn't match the caller's mount namespace 1590 * anymore so creation of another detached mounts must fail. 1591 */ 1592 fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA", 1593 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1594 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1595 OPEN_TREE_CLONE); 1596 ASSERT_LT(fd_tree_subdir, 0); 1597 ASSERT_EQ(errno, EINVAL); 1598 } 1599 1600 TEST_F(mount_setattr, open_tree_detached_fail2) 1601 { 1602 int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF; 1603 struct statx stx; 1604 1605 fd_tree_base = sys_open_tree(-EBADF, "/mnt", 1606 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1607 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1608 OPEN_TREE_CLONE); 1609 ASSERT_GE(fd_tree_base, 0); 1610 /* 1611 * /mnt testing tmpfs 1612 * |-/mnt/A testing tmpfs 1613 * | `-/mnt/A/AA testing tmpfs 1614 * | `-/mnt/A/AA/B testing tmpfs 1615 * | `-/mnt/A/AA/B/BB testing tmpfs 1616 * `-/mnt/B testing ramfs 1617 */ 1618 ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0); 1619 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1620 ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0); 1621 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1622 ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0); 1623 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1624 ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0); 1625 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1626 1627 EXPECT_EQ(create_and_enter_userns(), 0); 1628 1629 /* 1630 * The caller entered a new user namespace. They will have 1631 * CAP_SYS_ADMIN in this user namespace. However, they're still 1632 * located in a mount namespace that is owned by an ancestor 1633 * user namespace in which they hold no privilege. Creating a 1634 * detached mount must thus fail. 1635 */ 1636 fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA", 1637 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1638 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1639 OPEN_TREE_CLONE); 1640 ASSERT_LT(fd_tree_subdir, 0); 1641 ASSERT_EQ(errno, EPERM); 1642 } 1643 1644 TEST_F(mount_setattr, open_tree_detached_fail3) 1645 { 1646 int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF; 1647 struct statx stx; 1648 1649 fd_tree_base = sys_open_tree(-EBADF, "/mnt", 1650 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1651 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1652 OPEN_TREE_CLONE); 1653 ASSERT_GE(fd_tree_base, 0); 1654 /* 1655 * /mnt testing tmpfs 1656 * |-/mnt/A testing tmpfs 1657 * | `-/mnt/A/AA testing tmpfs 1658 * | `-/mnt/A/AA/B testing tmpfs 1659 * | `-/mnt/A/AA/B/BB testing tmpfs 1660 * `-/mnt/B testing ramfs 1661 */ 1662 ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0); 1663 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1664 ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0); 1665 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1666 ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0); 1667 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1668 ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0); 1669 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1670 1671 EXPECT_EQ(prepare_unpriv_mountns(), 0); 1672 1673 /* 1674 * The caller entered a new mount namespace. They will have 1675 * CAP_SYS_ADMIN in the owning user namespace of their mount 1676 * namespace. 1677 * 1678 * However, the origin mount namespace of the anonymous mount 1679 * namespace of @fd_tree_base doesn't match the caller's mount 1680 * namespace anymore so creation of another detached mounts must 1681 * fail. 1682 */ 1683 fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA", 1684 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1685 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1686 OPEN_TREE_CLONE); 1687 ASSERT_LT(fd_tree_subdir, 0); 1688 ASSERT_EQ(errno, EINVAL); 1689 } 1690 1691 TEST_F(mount_setattr, open_tree_subfolder) 1692 { 1693 int fd_context, fd_tmpfs, fd_tree; 1694 1695 fd_context = sys_fsopen("tmpfs", 0); 1696 ASSERT_GE(fd_context, 0); 1697 1698 ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); 1699 1700 fd_tmpfs = sys_fsmount(fd_context, 0, 0); 1701 ASSERT_GE(fd_tmpfs, 0); 1702 1703 EXPECT_EQ(close(fd_context), 0); 1704 1705 ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0); 1706 1707 fd_tree = sys_open_tree(fd_tmpfs, "subdir", 1708 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1709 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1710 OPEN_TREE_CLONE); 1711 ASSERT_GE(fd_tree, 0); 1712 1713 EXPECT_EQ(close(fd_tmpfs), 0); 1714 1715 ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0); 1716 1717 ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0); 1718 1719 EXPECT_EQ(close(fd_tree), 0); 1720 1721 ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0); 1722 1723 EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0); 1724 } 1725 1726 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close) 1727 { 1728 int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF; 1729 struct statx stx; 1730 1731 fd_tree_base = sys_open_tree(-EBADF, "/mnt", 1732 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1733 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); 1734 ASSERT_GE(fd_tree_base, 0); 1735 /* 1736 * /mnt testing tmpfs 1737 */ 1738 ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0); 1739 ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1740 1741 fd_tree_subdir = sys_open_tree(fd_tree_base, "", 1742 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1743 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC | 1744 OPEN_TREE_CLONE); 1745 ASSERT_GE(fd_tree_subdir, 0); 1746 /* 1747 * /mnt testing tmpfs 1748 */ 1749 ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0); 1750 ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1751 1752 /* 1753 * /mnt testing tmpfs 1754 * `-/mnt testing tmpfs 1755 */ 1756 ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); 1757 ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0); 1758 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1759 1760 ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); 1761 1762 EXPECT_EQ(close(fd_tree_base), 0); 1763 EXPECT_EQ(close(fd_tree_subdir), 0); 1764 } 1765 1766 TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach) 1767 { 1768 int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF; 1769 struct statx stx; 1770 __u64 mnt_id = 0; 1771 1772 fd_tree_base = sys_open_tree(-EBADF, "/mnt", 1773 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1774 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); 1775 ASSERT_GE(fd_tree_base, 0); 1776 /* 1777 * /mnt testing tmpfs 1778 */ 1779 ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0); 1780 ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1781 1782 fd_tree_subdir = sys_open_tree(fd_tree_base, "", 1783 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1784 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC | 1785 OPEN_TREE_CLONE); 1786 ASSERT_GE(fd_tree_subdir, 0); 1787 /* 1788 * /mnt testing tmpfs 1789 */ 1790 ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0); 1791 ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1792 1793 /* 1794 * /mnt testing tmpfs 1795 * `-/mnt testing tmpfs 1796 */ 1797 ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); 1798 ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0); 1799 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1800 ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE); 1801 mnt_id = stx.stx_mnt_id; 1802 1803 ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); 1804 1805 ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1806 ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0); 1807 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1808 ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE); 1809 ASSERT_EQ(stx.stx_mnt_id, mnt_id); 1810 1811 EXPECT_EQ(close(fd_tree_base), 0); 1812 EXPECT_EQ(close(fd_tree_subdir), 0); 1813 } 1814 1815 TEST_F(mount_setattr, move_mount_detached_fail) 1816 { 1817 int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF; 1818 struct statx stx; 1819 1820 fd_tree_base = sys_open_tree(-EBADF, "/mnt", 1821 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1822 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); 1823 ASSERT_GE(fd_tree_base, 0); 1824 1825 /* Attach the mount to the caller's mount namespace. */ 1826 ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1827 1828 ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0); 1829 ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1830 1831 fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B", 1832 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1833 OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); 1834 ASSERT_GE(fd_tree_subdir, 0); 1835 ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0); 1836 ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1837 1838 /* Not allowed to move an attached mount to a detached mount. */ 1839 ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); 1840 ASSERT_EQ(errno, EINVAL); 1841 1842 EXPECT_EQ(close(fd_tree_base), 0); 1843 EXPECT_EQ(close(fd_tree_subdir), 0); 1844 } 1845 1846 TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close) 1847 { 1848 int fd_tree = -EBADF; 1849 struct statx stx; 1850 1851 fd_tree = sys_open_tree(-EBADF, "/mnt", 1852 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1853 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1854 OPEN_TREE_CLONE); 1855 ASSERT_GE(fd_tree, 0); 1856 1857 ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0); 1858 /* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */ 1859 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1860 1861 /* Attach the mount to the caller's mount namespace. */ 1862 ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1863 1864 ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0); 1865 ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT); 1866 1867 ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0); 1868 1869 /* 1870 * This tests whether dissolve_on_fput() handles a NULL mount 1871 * namespace correctly, i.e., that it doesn't splat. 1872 */ 1873 EXPECT_EQ(close(fd_tree), 0); 1874 } 1875 1876 TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached) 1877 { 1878 int fd_tree1 = -EBADF, fd_tree2 = -EBADF; 1879 1880 /* 1881 * |-/mnt/A testing tmpfs 1882 * `-/mnt/A/AA testing tmpfs 1883 * `-/mnt/A/AA/B testing tmpfs 1884 * `-/mnt/A/AA/B/BB testing tmpfs 1885 */ 1886 fd_tree1 = sys_open_tree(-EBADF, "/mnt/A", 1887 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1888 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1889 OPEN_TREE_CLONE); 1890 ASSERT_GE(fd_tree1, 0); 1891 1892 /* 1893 * `-/mnt/B testing ramfs 1894 */ 1895 fd_tree2 = sys_open_tree(-EBADF, "/mnt/B", 1896 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1897 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC | 1898 OPEN_TREE_CLONE); 1899 ASSERT_GE(fd_tree2, 0); 1900 1901 /* 1902 * Move the source detached mount tree to the target detached 1903 * mount tree. This will move all the mounts in the source mount 1904 * tree from the source anonymous mount namespace to the target 1905 * anonymous mount namespace. 1906 * 1907 * The source detached mount tree and the target detached mount 1908 * tree now both refer to the same anonymous mount namespace. 1909 * 1910 * |-"" testing ramfs 1911 * `-"" testing tmpfs 1912 * `-""/AA testing tmpfs 1913 * `-""/AA/B testing tmpfs 1914 * `-""/AA/B/BB testing tmpfs 1915 */ 1916 ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); 1917 1918 /* 1919 * The source detached mount tree @fd_tree1 is now an attached 1920 * mount, i.e., it has a parent. Specifically, it now has the 1921 * root mount of the mount tree of @fd_tree2 as its parent. 1922 * 1923 * That means we are no longer allowed to attach it as we only 1924 * allow attaching the root of an anonymous mount tree, not 1925 * random bits and pieces. Verify that the kernel enforces this. 1926 */ 1927 ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1928 1929 /* 1930 * Closing the source detached mount tree must not unmount and 1931 * free the shared anonymous mount namespace. The kernel will 1932 * quickly yell at us because the anonymous mount namespace 1933 * won't be empty when it's freed. 1934 */ 1935 EXPECT_EQ(close(fd_tree1), 0); 1936 1937 /* 1938 * Attach the mount tree to a non-anonymous mount namespace. 1939 * This can only succeed if closing fd_tree1 had proper 1940 * semantics and didn't cause the anonymous mount namespace to 1941 * be freed. If it did this will trigger a UAF which will be 1942 * visible on any KASAN enabled kernel. 1943 * 1944 * |-/tmp/target1 testing ramfs 1945 * `-/tmp/target1 testing tmpfs 1946 * `-/tmp/target1/AA testing tmpfs 1947 * `-/tmp/target1/AA/B testing tmpfs 1948 * `-/tmp/target1/AA/B/BB testing tmpfs 1949 */ 1950 ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1951 EXPECT_EQ(close(fd_tree2), 0); 1952 } 1953 1954 TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace) 1955 { 1956 int fd_tree1 = -EBADF, fd_tree2 = -EBADF; 1957 1958 /* 1959 * Copy the following mount tree: 1960 * 1961 * |-/mnt/A testing tmpfs 1962 * `-/mnt/A/AA testing tmpfs 1963 * `-/mnt/A/AA/B testing tmpfs 1964 * `-/mnt/A/AA/B/BB testing tmpfs 1965 */ 1966 fd_tree1 = sys_open_tree(-EBADF, "/mnt/A", 1967 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1968 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 1969 OPEN_TREE_CLONE); 1970 ASSERT_GE(fd_tree1, 0); 1971 1972 /* 1973 * Create an O_PATH file descriptors with a separate struct file 1974 * that refers to the same detached mount tree as @fd_tree1 1975 */ 1976 fd_tree2 = sys_open_tree(fd_tree1, "", 1977 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 1978 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC); 1979 ASSERT_GE(fd_tree2, 0); 1980 1981 /* 1982 * Copy the following mount tree: 1983 * 1984 * |-/tmp/target1 testing tmpfs 1985 * `-/tmp/target1/AA testing tmpfs 1986 * `-/tmp/target1/AA/B testing tmpfs 1987 * `-/tmp/target1/AA/B/BB testing tmpfs 1988 */ 1989 ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1990 1991 /* 1992 * This must fail as this would mean adding the same mount tree 1993 * into the same mount tree. 1994 */ 1995 ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 1996 } 1997 1998 TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace) 1999 { 2000 int fd_tree1 = -EBADF, fd_tree2 = -EBADF; 2001 2002 /* 2003 * Copy the following mount tree: 2004 * 2005 * |-/mnt/A testing tmpfs 2006 * `-/mnt/A/AA testing tmpfs 2007 * `-/mnt/A/AA/B testing tmpfs 2008 * `-/mnt/A/AA/B/BB testing tmpfs 2009 */ 2010 fd_tree1 = sys_open_tree(-EBADF, "/mnt/A", 2011 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 2012 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 2013 OPEN_TREE_CLONE); 2014 ASSERT_GE(fd_tree1, 0); 2015 2016 /* 2017 * Create an O_PATH file descriptors with a separate struct file that 2018 * refers to a subtree of the same detached mount tree as @fd_tree1 2019 */ 2020 fd_tree2 = sys_open_tree(fd_tree1, "AA", 2021 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 2022 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC); 2023 ASSERT_GE(fd_tree2, 0); 2024 2025 /* 2026 * This must fail as it is only possible to attach the root of a 2027 * detached mount tree. 2028 */ 2029 ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 2030 2031 ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0); 2032 } 2033 2034 TEST_F(mount_setattr, detached_tree_propagation) 2035 { 2036 int fd_tree = -EBADF; 2037 struct statx stx1, stx2, stx3, stx4; 2038 2039 ASSERT_EQ(unshare(CLONE_NEWNS), 0); 2040 ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0); 2041 2042 /* 2043 * Copy the following mount tree: 2044 * 2045 * /mnt testing tmpfs 2046 * |-/mnt/A testing tmpfs 2047 * | `-/mnt/A/AA testing tmpfs 2048 * | `-/mnt/A/AA/B testing tmpfs 2049 * | `-/mnt/A/AA/B/BB testing tmpfs 2050 * `-/mnt/B testing ramfs 2051 */ 2052 fd_tree = sys_open_tree(-EBADF, "/mnt", 2053 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | 2054 AT_RECURSIVE | OPEN_TREE_CLOEXEC | 2055 OPEN_TREE_CLONE); 2056 ASSERT_GE(fd_tree, 0); 2057 2058 ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0); 2059 ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0); 2060 2061 /* 2062 * Copying the mount namespace like done above doesn't alter the 2063 * mounts in any way so the filesystem mounted on /mnt must be 2064 * identical even though the mounts will differ. Use the device 2065 * information to verify that. Note that tmpfs will have a 0 2066 * major number so comparing the major number is misleading. 2067 */ 2068 ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor); 2069 2070 /* Mount a tmpfs filesystem over /mnt/A. */ 2071 ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0); 2072 2073 2074 ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0); 2075 ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0); 2076 2077 /* 2078 * A new filesystem has been mounted on top of /mnt/A which 2079 * means that the device information will be different for any 2080 * statx() that was taken from /mnt/A before the mount compared 2081 * to one after the mount. 2082 */ 2083 ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor); 2084 ASSERT_EQ(stx1.stx_dev_minor, stx4.stx_dev_minor); 2085 2086 EXPECT_EQ(close(fd_tree), 0); 2087 } 2088 2089 TEST_HARNESS_MAIN 2090