1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/mount.h> 3 #include <linux/pseudo_fs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/proc_fs.h> 7 #include <linux/proc_ns.h> 8 #include <linux/magic.h> 9 #include <linux/ktime.h> 10 #include <linux/seq_file.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/user_namespace.h> 13 #include <linux/nsfs.h> 14 #include <linux/uaccess.h> 15 #include <linux/mnt_namespace.h> 16 #include <linux/ipc_namespace.h> 17 #include <linux/time_namespace.h> 18 #include <linux/utsname.h> 19 #include <linux/exportfs.h> 20 #include <linux/nstree.h> 21 #include <net/net_namespace.h> 22 23 #include "mount.h" 24 #include "internal.h" 25 26 static struct vfsmount *nsfs_mnt; 27 28 static struct path nsfs_root_path = {}; 29 30 void nsfs_get_root(struct path *path) 31 { 32 *path = nsfs_root_path; 33 path_get(path); 34 } 35 36 static long ns_ioctl(struct file *filp, unsigned int ioctl, 37 unsigned long arg); 38 static const struct file_operations ns_file_operations = { 39 .unlocked_ioctl = ns_ioctl, 40 .compat_ioctl = compat_ptr_ioctl, 41 }; 42 43 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 44 { 45 struct inode *inode = d_inode(dentry); 46 struct ns_common *ns = inode->i_private; 47 const struct proc_ns_operations *ns_ops = ns->ops; 48 49 return dynamic_dname(buffer, buflen, "%s:[%lu]", 50 ns_ops->name, inode->i_ino); 51 } 52 53 const struct dentry_operations ns_dentry_operations = { 54 .d_dname = ns_dname, 55 .d_prune = stashed_dentry_prune, 56 }; 57 58 static void nsfs_evict(struct inode *inode) 59 { 60 struct ns_common *ns = inode->i_private; 61 clear_inode(inode); 62 ns->ops->put(ns); 63 } 64 65 int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, 66 void *private_data) 67 { 68 struct ns_common *ns; 69 70 ns = ns_get_cb(private_data); 71 if (!ns) 72 return -ENOENT; 73 74 return path_from_stashed(&ns->stashed, nsfs_mnt, ns, path); 75 } 76 77 struct ns_get_path_task_args { 78 const struct proc_ns_operations *ns_ops; 79 struct task_struct *task; 80 }; 81 82 static struct ns_common *ns_get_path_task(void *private_data) 83 { 84 struct ns_get_path_task_args *args = private_data; 85 86 return args->ns_ops->get(args->task); 87 } 88 89 int ns_get_path(struct path *path, struct task_struct *task, 90 const struct proc_ns_operations *ns_ops) 91 { 92 struct ns_get_path_task_args args = { 93 .ns_ops = ns_ops, 94 .task = task, 95 }; 96 97 return ns_get_path_cb(path, ns_get_path_task, &args); 98 } 99 100 /** 101 * open_namespace - open a namespace 102 * @ns: the namespace to open 103 * 104 * This will consume a reference to @ns indendent of success or failure. 105 * 106 * Return: A file descriptor on success or a negative error code on failure. 107 */ 108 int open_namespace(struct ns_common *ns) 109 { 110 struct path path __free(path_put) = {}; 111 int err; 112 113 /* call first to consume reference */ 114 err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 115 if (err < 0) 116 return err; 117 118 return FD_ADD(O_CLOEXEC, dentry_open(&path, O_RDONLY, current_cred())); 119 } 120 121 int open_related_ns(struct ns_common *ns, 122 struct ns_common *(*get_ns)(struct ns_common *ns)) 123 { 124 struct ns_common *relative; 125 126 relative = get_ns(ns); 127 if (IS_ERR(relative)) 128 return PTR_ERR(relative); 129 130 return open_namespace(relative); 131 } 132 EXPORT_SYMBOL_GPL(open_related_ns); 133 134 static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, 135 struct mnt_ns_info __user *uinfo, size_t usize, 136 struct mnt_ns_info *kinfo) 137 { 138 /* 139 * If userspace and the kernel have the same struct size it can just 140 * be copied. If userspace provides an older struct, only the bits that 141 * userspace knows about will be copied. If userspace provides a new 142 * struct, only the bits that the kernel knows aobut will be copied and 143 * the size value will be set to the size the kernel knows about. 144 */ 145 kinfo->size = min(usize, sizeof(*kinfo)); 146 kinfo->mnt_ns_id = mnt_ns->ns.ns_id; 147 kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); 148 /* Subtract the root mount of the mount namespace. */ 149 if (kinfo->nr_mounts) 150 kinfo->nr_mounts--; 151 152 if (copy_to_user(uinfo, kinfo, kinfo->size)) 153 return -EFAULT; 154 155 return 0; 156 } 157 158 static bool nsfs_ioctl_valid(unsigned int cmd) 159 { 160 switch (cmd) { 161 case NS_GET_USERNS: 162 case NS_GET_PARENT: 163 case NS_GET_NSTYPE: 164 case NS_GET_OWNER_UID: 165 case NS_GET_MNTNS_ID: 166 case NS_GET_PID_FROM_PIDNS: 167 case NS_GET_TGID_FROM_PIDNS: 168 case NS_GET_PID_IN_PIDNS: 169 case NS_GET_TGID_IN_PIDNS: 170 case NS_GET_ID: 171 return true; 172 } 173 174 /* Extensible ioctls require some extra handling. */ 175 switch (_IOC_NR(cmd)) { 176 case _IOC_NR(NS_MNT_GET_INFO): 177 return extensible_ioctl_valid(cmd, NS_MNT_GET_INFO, MNT_NS_INFO_SIZE_VER0); 178 case _IOC_NR(NS_MNT_GET_NEXT): 179 return extensible_ioctl_valid(cmd, NS_MNT_GET_NEXT, MNT_NS_INFO_SIZE_VER0); 180 case _IOC_NR(NS_MNT_GET_PREV): 181 return extensible_ioctl_valid(cmd, NS_MNT_GET_PREV, MNT_NS_INFO_SIZE_VER0); 182 } 183 184 return false; 185 } 186 187 static long ns_ioctl(struct file *filp, unsigned int ioctl, 188 unsigned long arg) 189 { 190 struct user_namespace *user_ns; 191 struct pid_namespace *pid_ns; 192 struct task_struct *tsk; 193 struct ns_common *ns; 194 struct mnt_namespace *mnt_ns; 195 bool previous = false; 196 uid_t __user *argp; 197 uid_t uid; 198 int ret; 199 200 if (!nsfs_ioctl_valid(ioctl)) 201 return -ENOIOCTLCMD; 202 203 ns = get_proc_ns(file_inode(filp)); 204 switch (ioctl) { 205 case NS_GET_USERNS: 206 return open_related_ns(ns, ns_get_owner); 207 case NS_GET_PARENT: 208 if (!ns->ops->get_parent) 209 return -EINVAL; 210 return open_related_ns(ns, ns->ops->get_parent); 211 case NS_GET_NSTYPE: 212 return ns->ns_type; 213 case NS_GET_OWNER_UID: 214 if (ns->ns_type != CLONE_NEWUSER) 215 return -EINVAL; 216 user_ns = container_of(ns, struct user_namespace, ns); 217 argp = (uid_t __user *) arg; 218 uid = from_kuid_munged(current_user_ns(), user_ns->owner); 219 return put_user(uid, argp); 220 case NS_GET_PID_FROM_PIDNS: 221 fallthrough; 222 case NS_GET_TGID_FROM_PIDNS: 223 fallthrough; 224 case NS_GET_PID_IN_PIDNS: 225 fallthrough; 226 case NS_GET_TGID_IN_PIDNS: { 227 if (ns->ns_type != CLONE_NEWPID) 228 return -EINVAL; 229 230 ret = -ESRCH; 231 pid_ns = container_of(ns, struct pid_namespace, ns); 232 233 guard(rcu)(); 234 235 if (ioctl == NS_GET_PID_IN_PIDNS || 236 ioctl == NS_GET_TGID_IN_PIDNS) 237 tsk = find_task_by_vpid(arg); 238 else 239 tsk = find_task_by_pid_ns(arg, pid_ns); 240 if (!tsk) 241 break; 242 243 switch (ioctl) { 244 case NS_GET_PID_FROM_PIDNS: 245 ret = task_pid_vnr(tsk); 246 break; 247 case NS_GET_TGID_FROM_PIDNS: 248 ret = task_tgid_vnr(tsk); 249 break; 250 case NS_GET_PID_IN_PIDNS: 251 ret = task_pid_nr_ns(tsk, pid_ns); 252 break; 253 case NS_GET_TGID_IN_PIDNS: 254 ret = task_tgid_nr_ns(tsk, pid_ns); 255 break; 256 default: 257 ret = 0; 258 break; 259 } 260 261 if (!ret) 262 ret = -ESRCH; 263 return ret; 264 } 265 case NS_GET_MNTNS_ID: 266 if (ns->ns_type != CLONE_NEWNS) 267 return -EINVAL; 268 fallthrough; 269 case NS_GET_ID: { 270 __u64 __user *idp; 271 __u64 id; 272 273 idp = (__u64 __user *)arg; 274 id = ns->ns_id; 275 return put_user(id, idp); 276 } 277 } 278 279 /* extensible ioctls */ 280 switch (_IOC_NR(ioctl)) { 281 case _IOC_NR(NS_MNT_GET_INFO): { 282 struct mnt_ns_info kinfo = {}; 283 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 284 size_t usize = _IOC_SIZE(ioctl); 285 286 if (ns->ns_type != CLONE_NEWNS) 287 return -EINVAL; 288 289 if (!uinfo) 290 return -EINVAL; 291 292 if (usize < MNT_NS_INFO_SIZE_VER0) 293 return -EINVAL; 294 295 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 296 } 297 case _IOC_NR(NS_MNT_GET_PREV): 298 previous = true; 299 fallthrough; 300 case _IOC_NR(NS_MNT_GET_NEXT): { 301 struct mnt_ns_info kinfo = {}; 302 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 303 struct path path __free(path_put) = {}; 304 size_t usize = _IOC_SIZE(ioctl); 305 306 if (ns->ns_type != CLONE_NEWNS) 307 return -EINVAL; 308 309 if (usize < MNT_NS_INFO_SIZE_VER0) 310 return -EINVAL; 311 312 mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous); 313 if (IS_ERR(mnt_ns)) 314 return PTR_ERR(mnt_ns); 315 316 ns = to_ns_common(mnt_ns); 317 /* Transfer ownership of @mnt_ns reference to @path. */ 318 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 319 if (ret) 320 return ret; 321 322 FD_PREPARE(fdf, O_CLOEXEC, dentry_open(&path, O_RDONLY, current_cred())); 323 if (fdf.err) 324 return fdf.err; 325 /* 326 * If @uinfo is passed return all information about the 327 * mount namespace as well. 328 */ 329 ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 330 if (ret) 331 return ret; 332 ret = fd_publish(fdf); 333 break; 334 } 335 default: 336 ret = -ENOTTY; 337 } 338 339 return ret; 340 } 341 342 int ns_get_name(char *buf, size_t size, struct task_struct *task, 343 const struct proc_ns_operations *ns_ops) 344 { 345 struct ns_common *ns; 346 int res = -ENOENT; 347 const char *name; 348 ns = ns_ops->get(task); 349 if (ns) { 350 name = ns_ops->real_ns_name ? : ns_ops->name; 351 res = snprintf(buf, size, "%s:[%u]", name, ns->inum); 352 ns_ops->put(ns); 353 } 354 return res; 355 } 356 357 bool proc_ns_file(const struct file *file) 358 { 359 return file->f_op == &ns_file_operations; 360 } 361 362 /** 363 * ns_match() - Returns true if current namespace matches dev/ino provided. 364 * @ns: current namespace 365 * @dev: dev_t from nsfs that will be matched against current nsfs 366 * @ino: ino_t from nsfs that will be matched against current nsfs 367 * 368 * Return: true if dev and ino matches the current nsfs. 369 */ 370 bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) 371 { 372 return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); 373 } 374 375 376 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) 377 { 378 struct inode *inode = d_inode(dentry); 379 const struct ns_common *ns = inode->i_private; 380 const struct proc_ns_operations *ns_ops = ns->ops; 381 382 seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); 383 return 0; 384 } 385 386 static const struct super_operations nsfs_ops = { 387 .statfs = simple_statfs, 388 .evict_inode = nsfs_evict, 389 .show_path = nsfs_show_path, 390 }; 391 392 static int nsfs_init_inode(struct inode *inode, void *data) 393 { 394 struct ns_common *ns = data; 395 396 inode->i_private = data; 397 inode->i_mode |= S_IRUGO; 398 inode->i_fop = &ns_file_operations; 399 inode->i_ino = ns->inum; 400 return 0; 401 } 402 403 static void nsfs_put_data(void *data) 404 { 405 struct ns_common *ns = data; 406 ns->ops->put(ns); 407 } 408 409 static const struct stashed_operations nsfs_stashed_ops = { 410 .init_inode = nsfs_init_inode, 411 .put_data = nsfs_put_data, 412 }; 413 414 #define NSFS_FID_SIZE_U32_VER0 (NSFS_FILE_HANDLE_SIZE_VER0 / sizeof(u32)) 415 #define NSFS_FID_SIZE_U32_LATEST (NSFS_FILE_HANDLE_SIZE_LATEST / sizeof(u32)) 416 417 static int nsfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, 418 struct inode *parent) 419 { 420 struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh; 421 struct ns_common *ns = inode->i_private; 422 int len = *max_len; 423 424 if (parent) 425 return FILEID_INVALID; 426 427 if (len < NSFS_FID_SIZE_U32_VER0) { 428 *max_len = NSFS_FID_SIZE_U32_LATEST; 429 return FILEID_INVALID; 430 } else if (len > NSFS_FID_SIZE_U32_LATEST) { 431 *max_len = NSFS_FID_SIZE_U32_LATEST; 432 } 433 434 fid->ns_id = ns->ns_id; 435 fid->ns_type = ns->ns_type; 436 fid->ns_inum = inode->i_ino; 437 return FILEID_NSFS; 438 } 439 440 static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh, 441 int fh_len, int fh_type) 442 { 443 struct path path __free(path_put) = {}; 444 struct nsfs_file_handle *fid = (struct nsfs_file_handle *)fh; 445 struct user_namespace *owning_ns = NULL; 446 struct ns_common *ns; 447 int ret; 448 449 if (fh_len < NSFS_FID_SIZE_U32_VER0) 450 return NULL; 451 452 /* Check that any trailing bytes are zero. */ 453 if ((fh_len > NSFS_FID_SIZE_U32_LATEST) && 454 memchr_inv((void *)fid + NSFS_FID_SIZE_U32_LATEST, 0, 455 fh_len - NSFS_FID_SIZE_U32_LATEST)) 456 return NULL; 457 458 switch (fh_type) { 459 case FILEID_NSFS: 460 break; 461 default: 462 return NULL; 463 } 464 465 scoped_guard(rcu) { 466 ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type); 467 if (!ns) 468 return NULL; 469 470 VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id); 471 VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type); 472 VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum); 473 474 if (!__ns_ref_get(ns)) 475 return NULL; 476 } 477 478 switch (ns->ns_type) { 479 #ifdef CONFIG_CGROUPS 480 case CLONE_NEWCGROUP: 481 if (!current_in_namespace(to_cg_ns(ns))) 482 owning_ns = to_cg_ns(ns)->user_ns; 483 break; 484 #endif 485 #ifdef CONFIG_IPC_NS 486 case CLONE_NEWIPC: 487 if (!current_in_namespace(to_ipc_ns(ns))) 488 owning_ns = to_ipc_ns(ns)->user_ns; 489 break; 490 #endif 491 case CLONE_NEWNS: 492 if (!current_in_namespace(to_mnt_ns(ns))) 493 owning_ns = to_mnt_ns(ns)->user_ns; 494 break; 495 #ifdef CONFIG_NET_NS 496 case CLONE_NEWNET: 497 if (!current_in_namespace(to_net_ns(ns))) 498 owning_ns = to_net_ns(ns)->user_ns; 499 break; 500 #endif 501 #ifdef CONFIG_PID_NS 502 case CLONE_NEWPID: 503 if (!current_in_namespace(to_pid_ns(ns))) { 504 owning_ns = to_pid_ns(ns)->user_ns; 505 } else if (!READ_ONCE(to_pid_ns(ns)->child_reaper)) { 506 ns->ops->put(ns); 507 return ERR_PTR(-EPERM); 508 } 509 break; 510 #endif 511 #ifdef CONFIG_TIME_NS 512 case CLONE_NEWTIME: 513 if (!current_in_namespace(to_time_ns(ns))) 514 owning_ns = to_time_ns(ns)->user_ns; 515 break; 516 #endif 517 #ifdef CONFIG_USER_NS 518 case CLONE_NEWUSER: 519 if (!current_in_namespace(to_user_ns(ns))) 520 owning_ns = to_user_ns(ns); 521 break; 522 #endif 523 #ifdef CONFIG_UTS_NS 524 case CLONE_NEWUTS: 525 if (!current_in_namespace(to_uts_ns(ns))) 526 owning_ns = to_uts_ns(ns)->user_ns; 527 break; 528 #endif 529 default: 530 return ERR_PTR(-EOPNOTSUPP); 531 } 532 533 if (owning_ns && !ns_capable(owning_ns, CAP_SYS_ADMIN)) { 534 ns->ops->put(ns); 535 return ERR_PTR(-EPERM); 536 } 537 538 /* path_from_stashed() unconditionally consumes the reference. */ 539 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 540 if (ret) 541 return ERR_PTR(ret); 542 543 return no_free_ptr(path.dentry); 544 } 545 546 static int nsfs_export_permission(struct handle_to_path_ctx *ctx, 547 unsigned int oflags) 548 { 549 /* nsfs_fh_to_dentry() performs all permission checks. */ 550 return 0; 551 } 552 553 static struct file *nsfs_export_open(const struct path *path, unsigned int oflags) 554 { 555 return file_open_root(path, "", oflags, 0); 556 } 557 558 static const struct export_operations nsfs_export_operations = { 559 .encode_fh = nsfs_encode_fh, 560 .fh_to_dentry = nsfs_fh_to_dentry, 561 .open = nsfs_export_open, 562 .permission = nsfs_export_permission, 563 }; 564 565 static int nsfs_init_fs_context(struct fs_context *fc) 566 { 567 struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); 568 if (!ctx) 569 return -ENOMEM; 570 ctx->ops = &nsfs_ops; 571 ctx->eops = &nsfs_export_operations; 572 ctx->dops = &ns_dentry_operations; 573 fc->s_fs_info = (void *)&nsfs_stashed_ops; 574 return 0; 575 } 576 577 static struct file_system_type nsfs = { 578 .name = "nsfs", 579 .init_fs_context = nsfs_init_fs_context, 580 .kill_sb = kill_anon_super, 581 }; 582 583 void __init nsfs_init(void) 584 { 585 nsfs_mnt = kern_mount(&nsfs); 586 if (IS_ERR(nsfs_mnt)) 587 panic("can't set nsfs up\n"); 588 nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; 589 nsfs_root_path.mnt = nsfs_mnt; 590 nsfs_root_path.dentry = nsfs_mnt->mnt_root; 591 } 592