1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/mount.h> 3 #include <linux/pseudo_fs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/proc_fs.h> 7 #include <linux/proc_ns.h> 8 #include <linux/magic.h> 9 #include <linux/ktime.h> 10 #include <linux/seq_file.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/user_namespace.h> 13 #include <linux/nsfs.h> 14 #include <linux/uaccess.h> 15 #include <linux/mnt_namespace.h> 16 #include <linux/ipc_namespace.h> 17 #include <linux/time_namespace.h> 18 #include <linux/utsname.h> 19 #include <linux/exportfs.h> 20 #include <linux/nstree.h> 21 #include <net/net_namespace.h> 22 23 #include "mount.h" 24 #include "internal.h" 25 26 static struct vfsmount *nsfs_mnt; 27 28 static struct path nsfs_root_path = {}; 29 30 void nsfs_get_root(struct path *path) 31 { 32 *path = nsfs_root_path; 33 path_get(path); 34 } 35 36 static long ns_ioctl(struct file *filp, unsigned int ioctl, 37 unsigned long arg); 38 static const struct file_operations ns_file_operations = { 39 .unlocked_ioctl = ns_ioctl, 40 .compat_ioctl = compat_ptr_ioctl, 41 }; 42 43 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 44 { 45 struct inode *inode = d_inode(dentry); 46 struct ns_common *ns = inode->i_private; 47 const struct proc_ns_operations *ns_ops = ns->ops; 48 49 return dynamic_dname(buffer, buflen, "%s:[%lu]", 50 ns_ops->name, inode->i_ino); 51 } 52 53 const struct dentry_operations ns_dentry_operations = { 54 .d_dname = ns_dname, 55 .d_prune = stashed_dentry_prune, 56 }; 57 58 static void nsfs_evict(struct inode *inode) 59 { 60 struct ns_common *ns = inode->i_private; 61 clear_inode(inode); 62 ns->ops->put(ns); 63 } 64 65 int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, 66 void *private_data) 67 { 68 struct ns_common *ns; 69 70 ns = ns_get_cb(private_data); 71 if (!ns) 72 return -ENOENT; 73 74 return path_from_stashed(&ns->stashed, nsfs_mnt, ns, path); 75 } 76 77 struct ns_get_path_task_args { 78 const struct proc_ns_operations *ns_ops; 79 struct task_struct *task; 80 }; 81 82 static struct ns_common *ns_get_path_task(void *private_data) 83 { 84 struct ns_get_path_task_args *args = private_data; 85 86 return args->ns_ops->get(args->task); 87 } 88 89 int ns_get_path(struct path *path, struct task_struct *task, 90 const struct proc_ns_operations *ns_ops) 91 { 92 struct ns_get_path_task_args args = { 93 .ns_ops = ns_ops, 94 .task = task, 95 }; 96 97 return ns_get_path_cb(path, ns_get_path_task, &args); 98 } 99 100 /** 101 * open_namespace - open a namespace 102 * @ns: the namespace to open 103 * 104 * This will consume a reference to @ns indendent of success or failure. 105 * 106 * Return: A file descriptor on success or a negative error code on failure. 107 */ 108 int open_namespace(struct ns_common *ns) 109 { 110 struct path path __free(path_put) = {}; 111 struct file *f; 112 int err; 113 114 /* call first to consume reference */ 115 err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 116 if (err < 0) 117 return err; 118 119 CLASS(get_unused_fd, fd)(O_CLOEXEC); 120 if (fd < 0) 121 return fd; 122 123 f = dentry_open(&path, O_RDONLY, current_cred()); 124 if (IS_ERR(f)) 125 return PTR_ERR(f); 126 127 fd_install(fd, f); 128 return take_fd(fd); 129 } 130 131 int open_related_ns(struct ns_common *ns, 132 struct ns_common *(*get_ns)(struct ns_common *ns)) 133 { 134 struct ns_common *relative; 135 136 relative = get_ns(ns); 137 if (IS_ERR(relative)) 138 return PTR_ERR(relative); 139 140 return open_namespace(relative); 141 } 142 EXPORT_SYMBOL_GPL(open_related_ns); 143 144 static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, 145 struct mnt_ns_info __user *uinfo, size_t usize, 146 struct mnt_ns_info *kinfo) 147 { 148 /* 149 * If userspace and the kernel have the same struct size it can just 150 * be copied. If userspace provides an older struct, only the bits that 151 * userspace knows about will be copied. If userspace provides a new 152 * struct, only the bits that the kernel knows aobut will be copied and 153 * the size value will be set to the size the kernel knows about. 154 */ 155 kinfo->size = min(usize, sizeof(*kinfo)); 156 kinfo->mnt_ns_id = mnt_ns->ns.ns_id; 157 kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); 158 /* Subtract the root mount of the mount namespace. */ 159 if (kinfo->nr_mounts) 160 kinfo->nr_mounts--; 161 162 if (copy_to_user(uinfo, kinfo, kinfo->size)) 163 return -EFAULT; 164 165 return 0; 166 } 167 168 static bool nsfs_ioctl_valid(unsigned int cmd) 169 { 170 switch (cmd) { 171 case NS_GET_USERNS: 172 case NS_GET_PARENT: 173 case NS_GET_NSTYPE: 174 case NS_GET_OWNER_UID: 175 case NS_GET_MNTNS_ID: 176 case NS_GET_PID_FROM_PIDNS: 177 case NS_GET_TGID_FROM_PIDNS: 178 case NS_GET_PID_IN_PIDNS: 179 case NS_GET_TGID_IN_PIDNS: 180 case NS_GET_ID: 181 return true; 182 } 183 184 /* Extensible ioctls require some extra handling. */ 185 switch (_IOC_NR(cmd)) { 186 case _IOC_NR(NS_MNT_GET_INFO): 187 return extensible_ioctl_valid(cmd, NS_MNT_GET_INFO, MNT_NS_INFO_SIZE_VER0); 188 case _IOC_NR(NS_MNT_GET_NEXT): 189 return extensible_ioctl_valid(cmd, NS_MNT_GET_NEXT, MNT_NS_INFO_SIZE_VER0); 190 case _IOC_NR(NS_MNT_GET_PREV): 191 return extensible_ioctl_valid(cmd, NS_MNT_GET_PREV, MNT_NS_INFO_SIZE_VER0); 192 } 193 194 return false; 195 } 196 197 static long ns_ioctl(struct file *filp, unsigned int ioctl, 198 unsigned long arg) 199 { 200 struct user_namespace *user_ns; 201 struct pid_namespace *pid_ns; 202 struct task_struct *tsk; 203 struct ns_common *ns; 204 struct mnt_namespace *mnt_ns; 205 bool previous = false; 206 uid_t __user *argp; 207 uid_t uid; 208 int ret; 209 210 if (!nsfs_ioctl_valid(ioctl)) 211 return -ENOIOCTLCMD; 212 213 ns = get_proc_ns(file_inode(filp)); 214 switch (ioctl) { 215 case NS_GET_USERNS: 216 return open_related_ns(ns, ns_get_owner); 217 case NS_GET_PARENT: 218 if (!ns->ops->get_parent) 219 return -EINVAL; 220 return open_related_ns(ns, ns->ops->get_parent); 221 case NS_GET_NSTYPE: 222 return ns->ns_type; 223 case NS_GET_OWNER_UID: 224 if (ns->ns_type != CLONE_NEWUSER) 225 return -EINVAL; 226 user_ns = container_of(ns, struct user_namespace, ns); 227 argp = (uid_t __user *) arg; 228 uid = from_kuid_munged(current_user_ns(), user_ns->owner); 229 return put_user(uid, argp); 230 case NS_GET_PID_FROM_PIDNS: 231 fallthrough; 232 case NS_GET_TGID_FROM_PIDNS: 233 fallthrough; 234 case NS_GET_PID_IN_PIDNS: 235 fallthrough; 236 case NS_GET_TGID_IN_PIDNS: { 237 if (ns->ns_type != CLONE_NEWPID) 238 return -EINVAL; 239 240 ret = -ESRCH; 241 pid_ns = container_of(ns, struct pid_namespace, ns); 242 243 guard(rcu)(); 244 245 if (ioctl == NS_GET_PID_IN_PIDNS || 246 ioctl == NS_GET_TGID_IN_PIDNS) 247 tsk = find_task_by_vpid(arg); 248 else 249 tsk = find_task_by_pid_ns(arg, pid_ns); 250 if (!tsk) 251 break; 252 253 switch (ioctl) { 254 case NS_GET_PID_FROM_PIDNS: 255 ret = task_pid_vnr(tsk); 256 break; 257 case NS_GET_TGID_FROM_PIDNS: 258 ret = task_tgid_vnr(tsk); 259 break; 260 case NS_GET_PID_IN_PIDNS: 261 ret = task_pid_nr_ns(tsk, pid_ns); 262 break; 263 case NS_GET_TGID_IN_PIDNS: 264 ret = task_tgid_nr_ns(tsk, pid_ns); 265 break; 266 default: 267 ret = 0; 268 break; 269 } 270 271 if (!ret) 272 ret = -ESRCH; 273 return ret; 274 } 275 case NS_GET_MNTNS_ID: 276 if (ns->ns_type != CLONE_NEWNS) 277 return -EINVAL; 278 fallthrough; 279 case NS_GET_ID: { 280 __u64 __user *idp; 281 __u64 id; 282 283 idp = (__u64 __user *)arg; 284 id = ns->ns_id; 285 return put_user(id, idp); 286 } 287 } 288 289 /* extensible ioctls */ 290 switch (_IOC_NR(ioctl)) { 291 case _IOC_NR(NS_MNT_GET_INFO): { 292 struct mnt_ns_info kinfo = {}; 293 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 294 size_t usize = _IOC_SIZE(ioctl); 295 296 if (ns->ns_type != CLONE_NEWNS) 297 return -EINVAL; 298 299 if (!uinfo) 300 return -EINVAL; 301 302 if (usize < MNT_NS_INFO_SIZE_VER0) 303 return -EINVAL; 304 305 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 306 } 307 case _IOC_NR(NS_MNT_GET_PREV): 308 previous = true; 309 fallthrough; 310 case _IOC_NR(NS_MNT_GET_NEXT): { 311 struct mnt_ns_info kinfo = {}; 312 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 313 struct path path __free(path_put) = {}; 314 struct file *f __free(fput) = NULL; 315 size_t usize = _IOC_SIZE(ioctl); 316 317 if (ns->ns_type != CLONE_NEWNS) 318 return -EINVAL; 319 320 if (usize < MNT_NS_INFO_SIZE_VER0) 321 return -EINVAL; 322 323 mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous); 324 if (IS_ERR(mnt_ns)) 325 return PTR_ERR(mnt_ns); 326 327 ns = to_ns_common(mnt_ns); 328 /* Transfer ownership of @mnt_ns reference to @path. */ 329 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 330 if (ret) 331 return ret; 332 333 CLASS(get_unused_fd, fd)(O_CLOEXEC); 334 if (fd < 0) 335 return fd; 336 337 f = dentry_open(&path, O_RDONLY, current_cred()); 338 if (IS_ERR(f)) 339 return PTR_ERR(f); 340 341 if (uinfo) { 342 /* 343 * If @uinfo is passed return all information about the 344 * mount namespace as well. 345 */ 346 ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 347 if (ret) 348 return ret; 349 } 350 351 /* Transfer reference of @f to caller's fdtable. */ 352 fd_install(fd, no_free_ptr(f)); 353 /* File descriptor is live so hand it off to the caller. */ 354 return take_fd(fd); 355 } 356 default: 357 ret = -ENOTTY; 358 } 359 360 return ret; 361 } 362 363 int ns_get_name(char *buf, size_t size, struct task_struct *task, 364 const struct proc_ns_operations *ns_ops) 365 { 366 struct ns_common *ns; 367 int res = -ENOENT; 368 const char *name; 369 ns = ns_ops->get(task); 370 if (ns) { 371 name = ns_ops->real_ns_name ? : ns_ops->name; 372 res = snprintf(buf, size, "%s:[%u]", name, ns->inum); 373 ns_ops->put(ns); 374 } 375 return res; 376 } 377 378 bool proc_ns_file(const struct file *file) 379 { 380 return file->f_op == &ns_file_operations; 381 } 382 383 /** 384 * ns_match() - Returns true if current namespace matches dev/ino provided. 385 * @ns: current namespace 386 * @dev: dev_t from nsfs that will be matched against current nsfs 387 * @ino: ino_t from nsfs that will be matched against current nsfs 388 * 389 * Return: true if dev and ino matches the current nsfs. 390 */ 391 bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) 392 { 393 return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); 394 } 395 396 397 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) 398 { 399 struct inode *inode = d_inode(dentry); 400 const struct ns_common *ns = inode->i_private; 401 const struct proc_ns_operations *ns_ops = ns->ops; 402 403 seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); 404 return 0; 405 } 406 407 static const struct super_operations nsfs_ops = { 408 .statfs = simple_statfs, 409 .evict_inode = nsfs_evict, 410 .show_path = nsfs_show_path, 411 }; 412 413 static int nsfs_init_inode(struct inode *inode, void *data) 414 { 415 struct ns_common *ns = data; 416 417 inode->i_private = data; 418 inode->i_mode |= S_IRUGO; 419 inode->i_fop = &ns_file_operations; 420 inode->i_ino = ns->inum; 421 return 0; 422 } 423 424 static void nsfs_put_data(void *data) 425 { 426 struct ns_common *ns = data; 427 ns->ops->put(ns); 428 } 429 430 static const struct stashed_operations nsfs_stashed_ops = { 431 .init_inode = nsfs_init_inode, 432 .put_data = nsfs_put_data, 433 }; 434 435 #define NSFS_FID_SIZE_U32_VER0 (NSFS_FILE_HANDLE_SIZE_VER0 / sizeof(u32)) 436 #define NSFS_FID_SIZE_U32_LATEST (NSFS_FILE_HANDLE_SIZE_LATEST / sizeof(u32)) 437 438 static int nsfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, 439 struct inode *parent) 440 { 441 struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh; 442 struct ns_common *ns = inode->i_private; 443 int len = *max_len; 444 445 if (parent) 446 return FILEID_INVALID; 447 448 if (len < NSFS_FID_SIZE_U32_VER0) { 449 *max_len = NSFS_FID_SIZE_U32_LATEST; 450 return FILEID_INVALID; 451 } else if (len > NSFS_FID_SIZE_U32_LATEST) { 452 *max_len = NSFS_FID_SIZE_U32_LATEST; 453 } 454 455 fid->ns_id = ns->ns_id; 456 fid->ns_type = ns->ns_type; 457 fid->ns_inum = inode->i_ino; 458 return FILEID_NSFS; 459 } 460 461 static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh, 462 int fh_len, int fh_type) 463 { 464 struct path path __free(path_put) = {}; 465 struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh; 466 struct user_namespace *owning_ns = NULL; 467 struct ns_common *ns; 468 int ret; 469 470 if (fh_len < NSFS_FID_SIZE_U32_VER0) 471 return NULL; 472 473 /* Check that any trailing bytes are zero. */ 474 if ((fh_len > NSFS_FID_SIZE_U32_LATEST) && 475 memchr_inv((void *)fid + NSFS_FID_SIZE_U32_LATEST, 0, 476 fh_len - NSFS_FID_SIZE_U32_LATEST)) 477 return NULL; 478 479 switch (fh_type) { 480 case FILEID_NSFS: 481 break; 482 default: 483 return NULL; 484 } 485 486 scoped_guard(rcu) { 487 ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type); 488 if (!ns) 489 return NULL; 490 491 VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id); 492 VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type); 493 VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum); 494 495 if (!__ns_ref_get(ns)) 496 return NULL; 497 } 498 499 switch (ns->ns_type) { 500 #ifdef CONFIG_CGROUPS 501 case CLONE_NEWCGROUP: 502 if (!current_in_namespace(to_cg_ns(ns))) 503 owning_ns = to_cg_ns(ns)->user_ns; 504 break; 505 #endif 506 #ifdef CONFIG_IPC_NS 507 case CLONE_NEWIPC: 508 if (!current_in_namespace(to_ipc_ns(ns))) 509 owning_ns = to_ipc_ns(ns)->user_ns; 510 break; 511 #endif 512 case CLONE_NEWNS: 513 if (!current_in_namespace(to_mnt_ns(ns))) 514 owning_ns = to_mnt_ns(ns)->user_ns; 515 break; 516 #ifdef CONFIG_NET_NS 517 case CLONE_NEWNET: 518 if (!current_in_namespace(to_net_ns(ns))) 519 owning_ns = to_net_ns(ns)->user_ns; 520 break; 521 #endif 522 #ifdef CONFIG_PID_NS 523 case CLONE_NEWPID: 524 if (!current_in_namespace(to_pid_ns(ns))) { 525 owning_ns = to_pid_ns(ns)->user_ns; 526 } else if (!READ_ONCE(to_pid_ns(ns)->child_reaper)) { 527 ns->ops->put(ns); 528 return ERR_PTR(-EPERM); 529 } 530 break; 531 #endif 532 #ifdef CONFIG_TIME_NS 533 case CLONE_NEWTIME: 534 if (!current_in_namespace(to_time_ns(ns))) 535 owning_ns = to_time_ns(ns)->user_ns; 536 break; 537 #endif 538 #ifdef CONFIG_USER_NS 539 case CLONE_NEWUSER: 540 if (!current_in_namespace(to_user_ns(ns))) 541 owning_ns = to_user_ns(ns); 542 break; 543 #endif 544 #ifdef CONFIG_UTS_NS 545 case CLONE_NEWUTS: 546 if (!current_in_namespace(to_uts_ns(ns))) 547 owning_ns = to_uts_ns(ns)->user_ns; 548 break; 549 #endif 550 default: 551 return ERR_PTR(-EOPNOTSUPP); 552 } 553 554 if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) { 555 ns->ops->put(ns); 556 return ERR_PTR(-EPERM); 557 } 558 559 /* path_from_stashed() unconditionally consumes the reference. */ 560 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 561 if (ret) 562 return ERR_PTR(ret); 563 564 return no_free_ptr(path.dentry); 565 } 566 567 static int nsfs_export_permission(struct handle_to_path_ctx *ctx, 568 unsigned int oflags) 569 { 570 /* nsfs_fh_to_dentry() performs all permission checks. */ 571 return 0; 572 } 573 574 static struct file *nsfs_export_open(struct path *path, unsigned int oflags) 575 { 576 return file_open_root(path, "", oflags, 0); 577 } 578 579 static const struct export_operations nsfs_export_operations = { 580 .encode_fh = nsfs_encode_fh, 581 .fh_to_dentry = nsfs_fh_to_dentry, 582 .open = nsfs_export_open, 583 .permission = nsfs_export_permission, 584 }; 585 586 static int nsfs_init_fs_context(struct fs_context *fc) 587 { 588 struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); 589 if (!ctx) 590 return -ENOMEM; 591 ctx->ops = &nsfs_ops; 592 ctx->eops = &nsfs_export_operations; 593 ctx->dops = &ns_dentry_operations; 594 fc->s_fs_info = (void *)&nsfs_stashed_ops; 595 return 0; 596 } 597 598 static struct file_system_type nsfs = { 599 .name = "nsfs", 600 .init_fs_context = nsfs_init_fs_context, 601 .kill_sb = kill_anon_super, 602 }; 603 604 void __init nsfs_init(void) 605 { 606 nsfs_mnt = kern_mount(&nsfs); 607 if (IS_ERR(nsfs_mnt)) 608 panic("can't set nsfs up\n"); 609 nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; 610 nsfs_root_path.mnt = nsfs_mnt; 611 nsfs_root_path.dentry = nsfs_mnt->mnt_root; 612 } 613