/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef UINT_MAX #define UINT_MAX 4294967295U #endif #ifndef __NR_Linux #if defined __alpha__ #define __NR_Linux 110 #elif defined _MIPS_SIM #if _MIPS_SIM == _MIPS_SIM_ABI32 #define __NR_Linux 4000 #endif #if _MIPS_SIM == _MIPS_SIM_NABI32 #define __NR_Linux 6000 #endif #if _MIPS_SIM == _MIPS_SIM_ABI64 #define __NR_Linux 5000 #endif #elif defined __ia64__ #define __NR_Linux 1024 #else #define __NR_Linux 0 #endif #endif #ifndef __NR_mount_setattr #define __NR_mount_setattr (442 + __NR_Linux) #endif #ifndef __NR_open_tree #define __NR_open_tree (428 + __NR_Linux) #endif #ifndef __NR_move_mount #define __NR_move_mount (429 + __NR_Linux) #endif #ifndef MNT_DETACH #define MNT_DETACH 2 #endif #ifndef MOVE_MOUNT_F_EMPTY_PATH #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 #endif #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif #ifndef OPEN_TREE_CLONE #define OPEN_TREE_CLONE 1 #endif #ifndef OPEN_TREE_CLOEXEC #define OPEN_TREE_CLOEXEC O_CLOEXEC #endif #ifndef AT_RECURSIVE #define AT_RECURSIVE 0x8000 #endif typedef struct { __u64 attr_set; __u64 attr_clr; __u64 propagation; __u64 userns_fd; } mount_attr_t; static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags, mount_attr_t *attr, size_t size) { return (syscall(__NR_mount_setattr, dfd, path, flags, attr, size)); } static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) { return (syscall(__NR_open_tree, dfd, filename, flags)); } static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) { return (syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags)); } typedef enum idmap_type_t { TYPE_UID, TYPE_GID, TYPE_BOTH } idmap_type_t; struct idmap_entry { __u32 first; __u32 lower_first; __u32 count; idmap_type_t type; list_node_t node; }; static void log_msg(const char *msg, ...) { va_list ap; va_start(ap, msg); vfprintf(stderr, msg, ap); fputc('\n', stderr); va_end(ap); } #define log_errno(msg, args...) \ do { \ log_msg("%s:%d:%s: [%m] " msg, __FILE__, __LINE__,\ __FUNCTION__, ##args); \ } while (0) /* * Parse the idmapping in the following format * and add to the list: * * u:nsid_first:hostid_first:count * g:nsid_first:hostid_first:count * b:nsid_first:hostid_first:count * * The delimiter can be : or space character. * * Return: * 0 if success * ENOMEM if out of memory * EINVAL if wrong arg or input */ static int parse_idmap_entry(list_t *head, char *input) { char *token, *savedptr = NULL; struct idmap_entry *entry; unsigned long ul; char *delimiter = (char *)": "; char c; if (!input || !head) return (EINVAL); entry = malloc(sizeof (*entry)); if (!entry) return (ENOMEM); token = strtok_r(input, delimiter, &savedptr); if (token) c = token[0]; if (!token || (c != 'b' && c != 'u' && c != 'g')) goto errout; entry->type = (c == 'b') ? TYPE_BOTH : ((c == 'u') ? TYPE_UID : TYPE_GID); token = strtok_r(NULL, delimiter, &savedptr); if (!token) goto errout; ul = strtoul(token, NULL, 10); if (ul > UINT_MAX || errno != 0) goto errout; entry->first = (__u32)ul; token = strtok_r(NULL, delimiter, &savedptr); if (!token) goto errout; ul = strtoul(token, NULL, 10); if (ul > UINT_MAX || errno != 0) goto errout; entry->lower_first = (__u32)ul; token = strtok_r(NULL, delimiter, &savedptr); if (!token) goto errout; ul = strtoul(token, NULL, 10); if (ul > UINT_MAX || errno != 0) goto errout; entry->count = (__u32)ul; list_insert_tail(head, entry); return (0); errout: free(entry); return (EINVAL); } /* * Release all the entries in the list */ static void free_idmap(list_t *head) { struct idmap_entry *entry; while ((entry = list_remove_head(head)) != NULL) free(entry); /* list_destroy() to be done by the caller */ } /* * Write all bytes in the buffer to fd */ static ssize_t write_buf(int fd, const char *buf, size_t buf_size) { ssize_t written, total_written = 0; size_t remaining = buf_size; char *position = (char *)buf; for (;;) { written = write(fd, position, remaining); if (written < 0 && errno == EINTR) continue; if (written < 0) { log_errno("write"); return (written); } total_written += written; if (total_written == buf_size) break; remaining -= written; position += written; } return (total_written); } /* * Read data from file into buffer */ static ssize_t read_buf(int fd, char *buf, size_t buf_size) { int ret; for (;;) { ret = read(fd, buf, buf_size); if (ret < 0 && errno == EINTR) continue; break; } if (ret < 0) log_errno("read"); return (ret); } /* * Write idmap of the given type in the buffer to the * process' uid_map or gid_map proc file. * * Return: * 0 if success * errno if there's any error */ static int write_idmap(pid_t pid, char *buf, size_t buf_size, idmap_type_t type) { char path[PATH_MAX]; int fd = -EBADF; int ret; (void) snprintf(path, sizeof (path), "/proc/%d/%cid_map", pid, type == TYPE_UID ? 'u' : 'g'); fd = open(path, O_WRONLY | O_CLOEXEC); if (fd < 0) { ret = errno; log_errno("open(%s)", path); goto out; } ret = write_buf(fd, buf, buf_size); if (ret < 0) ret = errno; else ret = 0; out: if (fd >= 0) close(fd); return (ret); } /* * Write idmap info in the list to the process * user namespace, i.e. its /proc//uid_map * and /proc//gid_map file. * * Return: * 0 if success * errno if it fails */ static int write_pid_idmaps(pid_t pid, list_t *head) { char *buf_uids, *buf_gids; char *curr_bufu, *curr_bufg; /* max 4k to be allowed for each map */ int size_buf_uids = 4096, size_buf_gids = 4096; struct idmap_entry *entry; int uid_filled, gid_filled; int ret = 0; int has_uids = 0, has_gids = 0; size_t buf_size; buf_uids = malloc(size_buf_uids); if (!buf_uids) return (ENOMEM); buf_gids = malloc(size_buf_gids); if (!buf_gids) { free(buf_uids); return (ENOMEM); } curr_bufu = buf_uids; curr_bufg = buf_gids; for (entry = list_head(head); entry; entry = list_next(head, entry)) { if (entry->type == TYPE_UID || entry->type == TYPE_BOTH) { uid_filled = snprintf(curr_bufu, size_buf_uids, "%u %u %u\n", entry->first, entry->lower_first, entry->count); if (uid_filled <= 0 || uid_filled >= size_buf_uids) { ret = E2BIG; goto out; } curr_bufu += uid_filled; size_buf_uids -= uid_filled; has_uids = 1; } if (entry->type == TYPE_GID || entry->type == TYPE_BOTH) { gid_filled = snprintf(curr_bufg, size_buf_gids, "%u %u %u\n", entry->first, entry->lower_first, entry->count); if (gid_filled <= 0 || gid_filled >= size_buf_gids) { ret = E2BIG; goto out; } curr_bufg += gid_filled; size_buf_gids -= gid_filled; has_gids = 1; } } if (has_uids) { buf_size = curr_bufu - buf_uids; ret = write_idmap(pid, buf_uids, buf_size, TYPE_UID); if (ret) goto out; } if (has_gids) { buf_size = curr_bufg - buf_gids; ret = write_idmap(pid, buf_gids, buf_size, TYPE_GID); } out: free(buf_uids); free(buf_gids); return (ret); } /* * Wait for the child process to exit * and reap it. * * Return: * process exit code if available */ static int wait_for_pid(pid_t pid) { int status; int ret; for (;;) { ret = waitpid(pid, &status, 0); if (ret < 0) { if (errno == EINTR) continue; return (EXIT_FAILURE); } break; } if (!WIFEXITED(status)) return (EXIT_FAILURE); return (WEXITSTATUS(status)); } /* * Get the file descriptor of the process user namespace * given its pid. * * Return: * fd if success * -1 if it fails */ static int userns_fd_from_pid(pid_t pid) { int fd; char path[PATH_MAX]; (void) snprintf(path, sizeof (path), "/proc/%d/ns/user", pid); fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) log_errno("open(%s)", path); return (fd); } /* * Get the user namespace file descriptor given a list * of idmap info. * * Return: * fd if success * -errno if it fails */ static int userns_fd_from_idmap(list_t *head) { pid_t pid; int ret, fd; int fds[2]; char c; int saved_errno = 0; /* socketpair for bidirectional communication */ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fds); if (ret) { log_errno("socketpair"); return (-errno); } pid = fork(); if (pid < 0) { log_errno("fork"); fd = -errno; goto out; } if (pid == 0) { /* child process */ ret = unshare(CLONE_NEWUSER); if (ret == 0) { /* notify the parent of success */ ret = write_buf(fds[1], "1", 1); if (ret < 0) saved_errno = errno; else { /* * Until the parent has written to idmap, * we cannot exit, otherwise the defunct * process is owned by the real root, writing * to its idmap ends up with EPERM in the * context of a user ns */ ret = read_buf(fds[1], &c, 1); if (ret < 0) saved_errno = errno; } } else { saved_errno = errno; log_errno("unshare"); ret = write_buf(fds[1], "0", 1); if (ret < 0) saved_errno = errno; } exit(saved_errno); } /* parent process */ ret = read_buf(fds[0], &c, 1); if (ret == 1 && c == '1') { ret = write_pid_idmaps(pid, head); if (!ret) { fd = userns_fd_from_pid(pid); if (fd < 0) fd = -errno; } else { fd = -ret; } /* Let child know it can exit */ (void) write_buf(fds[0], "1", 1); } else { fd = -EBADF; } (void) wait_for_pid(pid); out: close(fds[0]); close(fds[1]); return (fd); } /* * Check if the operating system supports idmapped mount on the * given path or not. * * Return: * true if supported * false if not supported */ static bool is_idmap_supported(char *path) { list_t head; int ret; int tree_fd = -EBADF, path_fd = -EBADF; mount_attr_t attr = { .attr_set = MOUNT_ATTR_IDMAP, .userns_fd = -EBADF, }; /* strtok_r() won't be happy with a const string */ /* To check if idmapped mount can be done in a user ns, map 0 to 0 */ char *input = strdup("b:0:0:1"); if (!input) { errno = ENOMEM; log_errno("strdup"); return (false); } list_create(&head, sizeof (struct idmap_entry), offsetof(struct idmap_entry, node)); ret = parse_idmap_entry(&head, input); if (ret) { errno = ret; log_errno("parse_idmap_entry(%s)", input); goto out1; } ret = userns_fd_from_idmap(&head); if (ret < 0) goto out1; attr.userns_fd = ret; ret = openat(-EBADF, path, O_DIRECTORY | O_CLOEXEC); if (ret < 0) { log_errno("openat(%s)", path); goto out; } path_fd = ret; ret = sys_open_tree(path_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); if (ret < 0) { log_errno("sys_open_tree"); goto out; } tree_fd = ret; ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH, &attr, sizeof (attr)); if (ret < 0) { log_errno("sys_mount_setattr"); } out: close(attr.userns_fd); out1: free_idmap(&head); list_destroy(&head); if (tree_fd >= 0) close(tree_fd); if (path_fd >= 0) close(path_fd); free(input); return (ret == 0); } /* * Check if the given path is a mount point or not. * * Return: * true if it is * false otherwise */ static bool is_mountpoint(char *path) { char *parent; struct stat st_me, st_parent; bool ret; parent = malloc(strlen(path)+4); if (!parent) { errno = ENOMEM; log_errno("malloc"); return (false); } strcat(strcpy(parent, path), "/.."); if (lstat(path, &st_me) != 0 || lstat(parent, &st_parent) != 0) ret = false; else if (st_me.st_dev != st_parent.st_dev || st_me.st_ino == st_parent.st_ino) ret = true; else ret = false; free(parent); return (ret); } /* * Remount the source on the new target folder with the given * list of idmap info. If target is NULL, the source will be * unmounted and then remounted if it is a mountpoint, otherwise * no unmount is done, the source is simply idmap remounted. * * Return: * 0 if success * -errno otherwise */ static int do_idmap_mount(list_t *idmap, char *source, char *target, int flags) { int ret; int tree_fd = -EBADF, source_fd = -EBADF; mount_attr_t attr = { .attr_set = MOUNT_ATTR_IDMAP, .userns_fd = -EBADF, }; ret = userns_fd_from_idmap(idmap); if (ret < 0) goto out1; attr.userns_fd = ret; ret = openat(-EBADF, source, O_DIRECTORY | O_CLOEXEC); if (ret < 0) { ret = -errno; log_errno("openat(%s)", source); goto out; } source_fd = ret; ret = sys_open_tree(source_fd, "", AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE | flags); if (ret < 0) { ret = -errno; log_errno("sys_open_tree"); goto out; } tree_fd = ret; ret = sys_mount_setattr(tree_fd, "", AT_EMPTY_PATH | flags, &attr, sizeof (attr)); if (ret < 0) { ret = -errno; log_errno("sys_mount_setattr"); goto out; } if (target == NULL && is_mountpoint(source)) { ret = umount2(source, MNT_DETACH); if (ret < 0) { ret = -errno; log_errno("umount2(%s)", source); goto out; } } ret = sys_move_mount(tree_fd, "", -EBADF, target == NULL ? source : target, MOVE_MOUNT_F_EMPTY_PATH); if (ret < 0) { ret = -errno; log_errno("sys_move_mount(%s)", target == NULL ? source : target); } out: close(attr.userns_fd); out1: if (tree_fd >= 0) close(tree_fd); if (source_fd >= 0) close(source_fd); return (ret); } static void print_usage(char *argv[]) { fprintf(stderr, "Usage: %s [-r] [-c] [-m ] [-m ]" \ " ... [] []\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, " -r Recursively do idmapped mount.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -c Checks if idmapped mount is supported " \ "on the by the operating system or not.\n"); fprintf(stderr, "\n"); fprintf(stderr, " -m to specify the idmap info, " \ "in the following format:\n"); fprintf(stderr, " :::\n"); fprintf(stderr, "\n"); fprintf(stderr, " can be either of 'b', 'u', and 'g'.\n"); fprintf(stderr, "\n"); fprintf(stderr, "The folder will be mounted at " \ "with the provided idmap information.\nIf no is " \ "specified, and is a mount point, " \ "then will be unmounted and then remounted.\n"); } int main(int argc, char *argv[]) { int opt; list_t idmap_head; int check_supported = 0; int ret = EXIT_SUCCESS; char *source = NULL, *target = NULL; int flags = 0; list_create(&idmap_head, sizeof (struct idmap_entry), offsetof(struct idmap_entry, node)); while ((opt = getopt(argc, argv, "rcm:")) != -1) { switch (opt) { case 'r': flags |= AT_RECURSIVE; break; case 'c': check_supported = 1; break; case 'm': ret = parse_idmap_entry(&idmap_head, optarg); if (ret) { errno = ret; log_errno("parse_idmap_entry(%s)", optarg); ret = EXIT_FAILURE; goto out; } break; default: print_usage(argv); exit(EXIT_FAILURE); } } if (check_supported == 0 && list_is_empty(&idmap_head)) { print_usage(argv); ret = EXIT_FAILURE; goto out; } if (optind >= argc) { fprintf(stderr, "Expected to have , .\n"); print_usage(argv); ret = EXIT_FAILURE; goto out; } source = argv[optind]; if (optind < (argc - 1)) { target = argv[optind + 1]; } if (check_supported) { free_idmap(&idmap_head); list_destroy(&idmap_head); if (is_idmap_supported(source)) { printf("idmapped mount is supported on [%s].\n", source); return (EXIT_SUCCESS); } else { printf("idmapped mount is NOT supported.\n"); return (EXIT_FAILURE); } } ret = do_idmap_mount(&idmap_head, source, target, flags); if (ret) ret = EXIT_FAILURE; out: free_idmap(&idmap_head); list_destroy(&idmap_head); exit(ret); }