1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 #ifndef _GNU_SOURCE 23 #define _GNU_SOURCE 24 #endif 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <stdbool.h> 29 #include <stddef.h> 30 #include <string.h> 31 #include <linux/types.h> 32 #include <sys/wait.h> 33 #include <sys/stat.h> 34 #include <sys/mount.h> 35 #include <fcntl.h> 36 #include <errno.h> 37 #include <sched.h> 38 #include <syscall.h> 39 #include <sys/socket.h> 40 41 #include <sys/list.h> 42 43 #ifndef UINT_MAX 44 #define UINT_MAX 4294967295U 45 #endif 46 47 #ifndef __NR_Linux 48 #if defined __alpha__ 49 #define __NR_Linux 110 50 #elif defined _MIPS_SIM 51 #if _MIPS_SIM == _MIPS_SIM_ABI32 52 #define __NR_Linux 4000 53 #endif 54 #if _MIPS_SIM == _MIPS_SIM_NABI32 55 #define __NR_Linux 6000 56 #endif 57 #if _MIPS_SIM == _MIPS_SIM_ABI64 58 #define __NR_Linux 5000 59 #endif 60 #elif defined __ia64__ 61 #define __NR_Linux 1024 62 #else 63 #define __NR_Linux 0 64 #endif 65 #endif 66 67 #ifndef __NR_mount_setattr 68 #define __NR_mount_setattr (442 + __NR_Linux) 69 #endif 70 71 #ifndef __NR_open_tree 72 #define __NR_open_tree (428 + __NR_Linux) 73 #endif 74 75 #ifndef __NR_move_mount 76 #define __NR_move_mount (429 + __NR_Linux) 77 #endif 78 79 #ifndef MNT_DETACH 80 #define MNT_DETACH 2 81 #endif 82 83 #ifndef MOVE_MOUNT_F_EMPTY_PATH 84 #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 85 #endif 86 87 #ifndef MOUNT_ATTR_IDMAP 88 #define MOUNT_ATTR_IDMAP 0x00100000 89 #endif 90 91 #ifndef OPEN_TREE_CLONE 92 #define OPEN_TREE_CLONE 1 93 #endif 94 95 #ifndef OPEN_TREE_CLOEXEC 96 #define OPEN_TREE_CLOEXEC O_CLOEXEC 97 #endif 98 99 #ifndef AT_RECURSIVE 100 #define AT_RECURSIVE 0x8000 101 #endif 102 103 typedef struct { 104 __u64 attr_set; 105 __u64 attr_clr; 106 __u64 propagation; 107 __u64 userns_fd; 108 } mount_attr_t; 109 110 static inline int 111 sys_mount_setattr(int dfd, const char *path, unsigned int flags, 112 mount_attr_t *attr, size_t size) 113 { 114 return (syscall(__NR_mount_setattr, dfd, path, flags, attr, size)); 115 } 116 117 static inline int 118 sys_open_tree(int dfd, const char *filename, unsigned int flags) 119 { 120 return (syscall(__NR_open_tree, dfd, filename, flags)); 121 } 122 123 static inline int sys_move_mount(int from_dfd, const char *from_pathname, 124 int to_dfd, const char *to_pathname, unsigned int flags) 125 { 126 return (syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, 127 to_pathname, flags)); 128 } 129 130 typedef enum idmap_type_t { 131 TYPE_UID, 132 TYPE_GID, 133 TYPE_BOTH 134 } idmap_type_t; 135 136 struct idmap_entry { 137 __u32 first; 138 __u32 lower_first; 139 __u32 count; 140 idmap_type_t type; 141 list_node_t node; 142 }; 143 144 static void 145 log_msg(const char *msg, ...) 146 { 147 va_list ap; 148 149 va_start(ap, msg); 150 vfprintf(stderr, msg, ap); 151 fputc('\n', stderr); 152 va_end(ap); 153 } 154 155 #define log_errno(msg, args...) \ 156 do { \ 157 log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\ 158 __FUNCTION__, ##args); \ 159 } while (0) 160 161 /* 162 * Parse the idmapping in the following format 163 * and add to the list: 164 * 165 * u:nsid_first:hostid_first:count 166 * g:nsid_first:hostid_first:count 167 * b:nsid_first:hostid_first:count 168 * 169 * The delimiter can be : or space character. 170 * 171 * Return: 172 * 0 if success 173 * ENOMEM if out of memory 174 * EINVAL if wrong arg or input 175 */ 176 static int 177 parse_idmap_entry(list_t *head, char *input) 178 { 179 char *token, *savedptr = NULL; 180 struct idmap_entry *entry; 181 unsigned long ul; 182 char *delimiter = (char *)": "; 183 char c; 184 185 if (!input || !head) 186 return (EINVAL); 187 entry = malloc(sizeof (*entry)); 188 if (!entry) 189 return (ENOMEM); 190 191 token = strtok_r(input, delimiter, &savedptr); 192 if (token) 193 c = token[0]; 194 if (!token || (c != 'b' && c != 'u' && c != 'g')) 195 goto errout; 196 entry->type = (c == 'b') ? TYPE_BOTH : 197 ((c == 'u') ? TYPE_UID : TYPE_GID); 198 199 token = strtok_r(NULL, delimiter, &savedptr); 200 if (!token) 201 goto errout; 202 ul = strtoul(token, NULL, 10); 203 if (ul > UINT_MAX || errno != 0) 204 goto errout; 205 entry->first = (__u32)ul; 206 207 token = strtok_r(NULL, delimiter, &savedptr); 208 if (!token) 209 goto errout; 210 ul = strtoul(token, NULL, 10); 211 if (ul > UINT_MAX || errno != 0) 212 goto errout; 213 entry->lower_first = (__u32)ul; 214 215 token = strtok_r(NULL, delimiter, &savedptr); 216 if (!token) 217 goto errout; 218 ul = strtoul(token, NULL, 10); 219 if (ul > UINT_MAX || errno != 0) 220 goto errout; 221 entry->count = (__u32)ul; 222 223 list_insert_tail(head, entry); 224 225 return (0); 226 227 errout: 228 free(entry); 229 return (EINVAL); 230 } 231 232 /* 233 * Release all the entries in the list 234 */ 235 static void 236 free_idmap(list_t *head) 237 { 238 struct idmap_entry *entry; 239 240 while ((entry = list_remove_head(head)) != NULL) 241 free(entry); 242 /* list_destroy() to be done by the caller */ 243 } 244 245 /* 246 * Write all bytes in the buffer to fd 247 */ 248 static ssize_t 249 write_buf(int fd, const char *buf, size_t buf_size) 250 { 251 ssize_t written, total_written = 0; 252 size_t remaining = buf_size; 253 char *position = (char *)buf; 254 255 for (;;) { 256 written = write(fd, position, remaining); 257 if (written < 0 && errno == EINTR) 258 continue; 259 if (written < 0) { 260 log_errno("write"); 261 return (written); 262 } 263 total_written += written; 264 if (total_written == buf_size) 265 break; 266 remaining -= written; 267 position += written; 268 } 269 270 return (total_written); 271 } 272 273 /* 274 * Read data from file into buffer 275 */ 276 static ssize_t 277 read_buf(int fd, char *buf, size_t buf_size) 278 { 279 int ret; 280 for (;;) { 281 ret = read(fd, buf, buf_size); 282 if (ret < 0 && errno == EINTR) 283 continue; 284 break; 285 } 286 if (ret < 0) 287 log_errno("read"); 288 return (ret); 289 } 290 291 /* 292 * Write idmap of the given type in the buffer to the 293 * process' uid_map or gid_map proc file. 294 * 295 * Return: 296 * 0 if success 297 * errno if there's any error 298 */ 299 static int 300 write_idmap(pid_t pid, char *buf, size_t buf_size, idmap_type_t type) 301 { 302 char path[PATH_MAX]; 303 int fd = -EBADF; 304 int ret; 305 306 (void) snprintf(path, sizeof (path), "/proc/%d/%cid_map", 307 pid, type == TYPE_UID ? 'u' : 'g'); 308 fd = open(path, O_WRONLY | O_CLOEXEC); 309 if (fd < 0) { 310 ret = errno; 311 log_errno("open(%s)", path); 312 goto out; 313 } 314 ret = write_buf(fd, buf, buf_size); 315 if (ret < 0) 316 ret = errno; 317 else 318 ret = 0; 319 out: 320 if (fd >= 0) 321 close(fd); 322 return (ret); 323 } 324 325 /* 326 * Write idmap info in the list to the process 327 * user namespace, i.e. its /proc/<pid>/uid_map 328 * and /proc/<pid>/gid_map file. 329 * 330 * Return: 331 * 0 if success 332 * errno if it fails 333 */ 334 static int 335 write_pid_idmaps(pid_t pid, list_t *head) 336 { 337 char *buf_uids, *buf_gids; 338 char *curr_bufu, *curr_bufg; 339 /* max 4k to be allowed for each map */ 340 int size_buf_uids = 4096, size_buf_gids = 4096; 341 struct idmap_entry *entry; 342 int uid_filled, gid_filled; 343 int ret = 0; 344 int has_uids = 0, has_gids = 0; 345 size_t buf_size; 346 347 buf_uids = malloc(size_buf_uids); 348 if (!buf_uids) 349 return (ENOMEM); 350 buf_gids = malloc(size_buf_gids); 351 if (!buf_gids) { 352 free(buf_uids); 353 return (ENOMEM); 354 } 355 curr_bufu = buf_uids; 356 curr_bufg = buf_gids; 357 358 for (entry = list_head(head); entry; entry = list_next(head, entry)) { 359 if (entry->type == TYPE_UID || entry->type == TYPE_BOTH) { 360 uid_filled = snprintf(curr_bufu, size_buf_uids, 361 "%u %u %u\n", entry->first, entry->lower_first, 362 entry->count); 363 if (uid_filled <= 0 || uid_filled >= size_buf_uids) { 364 ret = E2BIG; 365 goto out; 366 } 367 curr_bufu += uid_filled; 368 size_buf_uids -= uid_filled; 369 has_uids = 1; 370 } 371 if (entry->type == TYPE_GID || entry->type == TYPE_BOTH) { 372 gid_filled = snprintf(curr_bufg, size_buf_gids, 373 "%u %u %u\n", entry->first, entry->lower_first, 374 entry->count); 375 if (gid_filled <= 0 || gid_filled >= size_buf_gids) { 376 ret = E2BIG; 377 goto out; 378 } 379 curr_bufg += gid_filled; 380 size_buf_gids -= gid_filled; 381 has_gids = 1; 382 } 383 } 384 if (has_uids) { 385 buf_size = curr_bufu - buf_uids; 386 ret = write_idmap(pid, buf_uids, buf_size, TYPE_UID); 387 if (ret) 388 goto out; 389 } 390 if (has_gids) { 391 buf_size = curr_bufg - buf_gids; 392 ret = write_idmap(pid, buf_gids, buf_size, TYPE_GID); 393 } 394 395 out: 396 free(buf_uids); 397 free(buf_gids); 398 return (ret); 399 } 400 401 /* 402 * Wait for the child process to exit 403 * and reap it. 404 * 405 * Return: 406 * process exit code if available 407 */ 408 static int 409 wait_for_pid(pid_t pid) 410 { 411 int status; 412 int ret; 413 414 for (;;) { 415 ret = waitpid(pid, &status, 0); 416 if (ret < 0) { 417 if (errno == EINTR) 418 continue; 419 return (EXIT_FAILURE); 420 } 421 break; 422 } 423 if (!WIFEXITED(status)) 424 return (EXIT_FAILURE); 425 return (WEXITSTATUS(status)); 426 } 427 428 /* 429 * Get the file descriptor of the process user namespace 430 * given its pid. 431 * 432 * Return: 433 * fd if success 434 * -1 if it fails 435 */ 436 static int 437 userns_fd_from_pid(pid_t pid) 438 { 439 int fd; 440 char path[PATH_MAX]; 441 442 (void) snprintf(path, sizeof (path), "/proc/%d/ns/user", pid); 443 fd = open(path, O_RDONLY | O_CLOEXEC); 444 if (fd < 0) 445 log_errno("open(%s)", path); 446 return (fd); 447 } 448 449 /* 450 * Get the user namespace file descriptor given a list 451 * of idmap info. 452 * 453 * Return: 454 * fd if success 455 * -errno if it fails 456 */ 457 static int 458 userns_fd_from_idmap(list_t *head) 459 { 460 pid_t pid; 461 int ret, fd; 462 int fds[2]; 463 char c; 464 int saved_errno = 0; 465 466 /* socketpair for bidirectional communication */ 467 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fds); 468 if (ret) { 469 log_errno("socketpair"); 470 return (-errno); 471 } 472 473 pid = fork(); 474 if (pid < 0) { 475 log_errno("fork"); 476 fd = -errno; 477 goto out; 478 } 479 480 if (pid == 0) { 481 /* child process */ 482 ret = unshare(CLONE_NEWUSER); 483 if (ret == 0) { 484 /* notify the parent of success */ 485 ret = write_buf(fds[1], "1", 1); 486 if (ret < 0) 487 saved_errno = errno; 488 else { 489 /* 490 * Until the parent has written to idmap, 491 * we cannot exit, otherwise the defunct 492 * process is owned by the real root, writing 493 * to its idmap ends up with EPERM in the 494 * context of a user ns 495 */ 496 ret = read_buf(fds[1], &c, 1); 497 if (ret < 0) 498 saved_errno = errno; 499 } 500 } else { 501 saved_errno = errno; 502 log_errno("unshare"); 503 ret = write_buf(fds[1], "0", 1); 504 if (ret < 0) 505 saved_errno = errno; 506 } 507 exit(saved_errno); 508 } 509 510 /* parent process */ 511 ret = read_buf(fds[0], &c, 1); 512 if (ret == 1 && c == '1') { 513 ret = write_pid_idmaps(pid, head); 514 if (!ret) { 515 fd = userns_fd_from_pid(pid); 516 if (fd < 0) 517 fd = -errno; 518 } else { 519 fd = -ret; 520 } 521 /* Let child know it can exit */ 522 (void) write_buf(fds[0], "1", 1); 523 } else { 524 fd = -EBADF; 525 } 526 (void) wait_for_pid(pid); 527 out: 528 close(fds[0]); 529 close(fds[1]); 530 return (fd); 531 } 532 533 /* 534 * Check if the operating system supports idmapped mount on the 535 * given path or not. 536 * 537 * Return: 538 * true if supported 539 * false if not supported 540 */ 541 static bool 542 is_idmap_supported(char *path) 543 { 544 list_t head; 545 int ret; 546 int tree_fd = -EBADF, path_fd = -EBADF; 547 mount_attr_t attr = { 548 .attr_set = MOUNT_ATTR_IDMAP, 549 .userns_fd = -EBADF, 550 }; 551 552 /* strtok_r() won't be happy with a const string */ 553 /* To check if idmapped mount can be done in a user ns, map 0 to 0 */ 554 char *input = strdup("b:0:0:1"); 555 556 if (!input) { 557 errno = ENOMEM; 558 log_errno("strdup"); 559 return (false); 560 } 561 562 list_create(&head, sizeof (struct idmap_entry), 563 offsetof(struct idmap_entry, node)); 564 ret = parse_idmap_entry(&head, input); 565 if (ret) { 566 errno = ret; 567 log_errno("parse_idmap_entry(%s)", input); 568 goto out1; 569 } 570 ret = userns_fd_from_idmap(&head); 571 if (ret < 0) 572 goto out1; 573 attr.userns_fd = ret; 574 ret = openat(-EBADF, path, O_DIRECTORY | O_CLOEXEC); 575 if (ret < 0) { 576 log_errno("openat(%s)", path); 577 goto out; 578 } 579 path_fd = ret; 580 ret = sys_open_tree(path_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT | 581 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); 582 if (ret < 0) { 583 log_errno("sys_open_tree"); 584 goto out; 585 } 586 tree_fd = ret; 587 ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH, &attr, 588 sizeof (attr)); 589 if (ret < 0) { 590 log_errno("sys_mount_setattr"); 591 } 592 out: 593 close(attr.userns_fd); 594 out1: 595 free_idmap(&head); 596 list_destroy(&head); 597 if (tree_fd >= 0) 598 close(tree_fd); 599 if (path_fd >= 0) 600 close(path_fd); 601 free(input); 602 return (ret == 0); 603 } 604 605 /* 606 * Check if the given path is a mount point or not. 607 * 608 * Return: 609 * true if it is 610 * false otherwise 611 */ 612 static bool 613 is_mountpoint(char *path) 614 { 615 char *parent; 616 struct stat st_me, st_parent; 617 bool ret; 618 619 parent = malloc(strlen(path)+4); 620 if (!parent) { 621 errno = ENOMEM; 622 log_errno("malloc"); 623 return (false); 624 } 625 strcat(strcpy(parent, path), "/.."); 626 if (lstat(path, &st_me) != 0 || 627 lstat(parent, &st_parent) != 0) 628 ret = false; 629 else 630 if (st_me.st_dev != st_parent.st_dev || 631 st_me.st_ino == st_parent.st_ino) 632 ret = true; 633 else 634 ret = false; 635 free(parent); 636 return (ret); 637 } 638 639 /* 640 * Remount the source on the new target folder with the given 641 * list of idmap info. If target is NULL, the source will be 642 * unmounted and then remounted if it is a mountpoint, otherwise 643 * no unmount is done, the source is simply idmap remounted. 644 * 645 * Return: 646 * 0 if success 647 * -errno otherwise 648 */ 649 static int 650 do_idmap_mount(list_t *idmap, char *source, char *target, int flags) 651 { 652 int ret; 653 int tree_fd = -EBADF, source_fd = -EBADF; 654 mount_attr_t attr = { 655 .attr_set = MOUNT_ATTR_IDMAP, 656 .userns_fd = -EBADF, 657 }; 658 659 ret = userns_fd_from_idmap(idmap); 660 if (ret < 0) 661 goto out1; 662 attr.userns_fd = ret; 663 ret = openat(-EBADF, source, O_DIRECTORY | O_CLOEXEC); 664 if (ret < 0) { 665 ret = -errno; 666 log_errno("openat(%s)", source); 667 goto out; 668 } 669 source_fd = ret; 670 ret = sys_open_tree(source_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT | 671 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE | flags); 672 if (ret < 0) { 673 ret = -errno; 674 log_errno("sys_open_tree"); 675 goto out; 676 } 677 tree_fd = ret; 678 ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH | flags, &attr, 679 sizeof (attr)); 680 if (ret < 0) { 681 ret = -errno; 682 log_errno("sys_mount_setattr"); 683 goto out; 684 } 685 if (target == NULL && is_mountpoint(source)) { 686 ret = umount2(source, MNT_DETACH); 687 if (ret < 0) { 688 ret = -errno; 689 log_errno("umount2(%s)", source); 690 goto out; 691 } 692 } 693 ret = sys_move_mount(tree_fd, "", -EBADF, target == NULL ? 694 source : target, MOVE_MOUNT_F_EMPTY_PATH); 695 if (ret < 0) { 696 ret = -errno; 697 log_errno("sys_move_mount(%s)", target == NULL ? 698 source : target); 699 } 700 out: 701 close(attr.userns_fd); 702 out1: 703 if (tree_fd >= 0) 704 close(tree_fd); 705 if (source_fd >= 0) 706 close(source_fd); 707 return (ret); 708 } 709 710 static void 711 print_usage(char *argv[]) 712 { 713 fprintf(stderr, "Usage: %s [-r] [-c] [-m <idmap1>] [-m <idmap2>]" \ 714 " ... [<source>] [<target>]\n", argv[0]); 715 fprintf(stderr, "\n"); 716 fprintf(stderr, " -r Recursively do idmapped mount.\n"); 717 fprintf(stderr, "\n"); 718 fprintf(stderr, " -c Checks if idmapped mount is supported " \ 719 "on the <source> by the operating system or not.\n"); 720 fprintf(stderr, "\n"); 721 fprintf(stderr, " -m <idmap> to specify the idmap info, " \ 722 "in the following format:\n"); 723 fprintf(stderr, " <id_type>:<nsid_first>:<hostid_first>:<count>\n"); 724 fprintf(stderr, "\n"); 725 fprintf(stderr, " <id_type> can be either of 'b', 'u', and 'g'.\n"); 726 fprintf(stderr, "\n"); 727 fprintf(stderr, "The <source> folder will be mounted at <target> " \ 728 "with the provided idmap information.\nIf no <target> is " \ 729 "specified, and <source> is a mount point, " \ 730 "then <source> will be unmounted and then remounted.\n"); 731 } 732 733 int 734 main(int argc, char *argv[]) 735 { 736 int opt; 737 list_t idmap_head; 738 int check_supported = 0; 739 int ret = EXIT_SUCCESS; 740 char *source = NULL, *target = NULL; 741 int flags = 0; 742 743 list_create(&idmap_head, sizeof (struct idmap_entry), 744 offsetof(struct idmap_entry, node)); 745 746 while ((opt = getopt(argc, argv, "rcm:")) != -1) { 747 switch (opt) { 748 case 'r': 749 flags |= AT_RECURSIVE; 750 break; 751 case 'c': 752 check_supported = 1; 753 break; 754 case 'm': 755 ret = parse_idmap_entry(&idmap_head, optarg); 756 if (ret) { 757 errno = ret; 758 log_errno("parse_idmap_entry(%s)", optarg); 759 ret = EXIT_FAILURE; 760 goto out; 761 } 762 break; 763 default: 764 print_usage(argv); 765 exit(EXIT_FAILURE); 766 } 767 } 768 769 if (check_supported == 0 && list_is_empty(&idmap_head)) { 770 print_usage(argv); 771 ret = EXIT_FAILURE; 772 goto out; 773 } 774 775 if (optind >= argc) { 776 fprintf(stderr, "Expected to have <source>, <target>.\n"); 777 print_usage(argv); 778 ret = EXIT_FAILURE; 779 goto out; 780 } 781 782 source = argv[optind]; 783 if (optind < (argc - 1)) { 784 target = argv[optind + 1]; 785 } 786 787 if (check_supported) { 788 free_idmap(&idmap_head); 789 list_destroy(&idmap_head); 790 if (is_idmap_supported(source)) { 791 printf("idmapped mount is supported on [%s].\n", 792 source); 793 return (EXIT_SUCCESS); 794 } else { 795 printf("idmapped mount is NOT supported.\n"); 796 return (EXIT_FAILURE); 797 } 798 } 799 800 ret = do_idmap_mount(&idmap_head, source, target, flags); 801 if (ret) 802 ret = EXIT_FAILURE; 803 out: 804 free_idmap(&idmap_head); 805 list_destroy(&idmap_head); 806 807 exit(ret); 808 } 809