1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/mount.h> 3 #include <linux/pseudo_fs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/proc_fs.h> 7 #include <linux/proc_ns.h> 8 #include <linux/magic.h> 9 #include <linux/ktime.h> 10 #include <linux/seq_file.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/user_namespace.h> 13 #include <linux/nsfs.h> 14 #include <linux/uaccess.h> 15 #include <linux/mnt_namespace.h> 16 17 #include "mount.h" 18 #include "internal.h" 19 20 static struct vfsmount *nsfs_mnt; 21 22 static long ns_ioctl(struct file *filp, unsigned int ioctl, 23 unsigned long arg); 24 static const struct file_operations ns_file_operations = { 25 .unlocked_ioctl = ns_ioctl, 26 .compat_ioctl = compat_ptr_ioctl, 27 }; 28 29 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 30 { 31 struct inode *inode = d_inode(dentry); 32 struct ns_common *ns = inode->i_private; 33 const struct proc_ns_operations *ns_ops = ns->ops; 34 35 return dynamic_dname(buffer, buflen, "%s:[%lu]", 36 ns_ops->name, inode->i_ino); 37 } 38 39 const struct dentry_operations ns_dentry_operations = { 40 .d_delete = always_delete_dentry, 41 .d_dname = ns_dname, 42 .d_prune = stashed_dentry_prune, 43 }; 44 45 static void nsfs_evict(struct inode *inode) 46 { 47 struct ns_common *ns = inode->i_private; 48 clear_inode(inode); 49 ns->ops->put(ns); 50 } 51 52 int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, 53 void *private_data) 54 { 55 struct ns_common *ns; 56 57 ns = ns_get_cb(private_data); 58 if (!ns) 59 return -ENOENT; 60 61 return path_from_stashed(&ns->stashed, nsfs_mnt, ns, path); 62 } 63 64 struct ns_get_path_task_args { 65 const struct proc_ns_operations *ns_ops; 66 struct task_struct *task; 67 }; 68 69 static struct ns_common *ns_get_path_task(void *private_data) 70 { 71 struct ns_get_path_task_args *args = private_data; 72 73 return args->ns_ops->get(args->task); 74 } 75 76 int ns_get_path(struct path *path, struct task_struct *task, 77 const struct proc_ns_operations *ns_ops) 78 { 79 struct ns_get_path_task_args args = { 80 .ns_ops = ns_ops, 81 .task = task, 82 }; 83 84 return ns_get_path_cb(path, ns_get_path_task, &args); 85 } 86 87 /** 88 * open_namespace - open a namespace 89 * @ns: the namespace to open 90 * 91 * This will consume a reference to @ns indendent of success or failure. 92 * 93 * Return: A file descriptor on success or a negative error code on failure. 94 */ 95 int open_namespace(struct ns_common *ns) 96 { 97 struct path path __free(path_put) = {}; 98 struct file *f; 99 int err; 100 101 /* call first to consume reference */ 102 err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 103 if (err < 0) 104 return err; 105 106 CLASS(get_unused_fd, fd)(O_CLOEXEC); 107 if (fd < 0) 108 return fd; 109 110 f = dentry_open(&path, O_RDONLY, current_cred()); 111 if (IS_ERR(f)) 112 return PTR_ERR(f); 113 114 fd_install(fd, f); 115 return take_fd(fd); 116 } 117 118 int open_related_ns(struct ns_common *ns, 119 struct ns_common *(*get_ns)(struct ns_common *ns)) 120 { 121 struct ns_common *relative; 122 123 relative = get_ns(ns); 124 if (IS_ERR(relative)) 125 return PTR_ERR(relative); 126 127 return open_namespace(relative); 128 } 129 EXPORT_SYMBOL_GPL(open_related_ns); 130 131 static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, 132 struct mnt_ns_info __user *uinfo, size_t usize, 133 struct mnt_ns_info *kinfo) 134 { 135 /* 136 * If userspace and the kernel have the same struct size it can just 137 * be copied. If userspace provides an older struct, only the bits that 138 * userspace knows about will be copied. If userspace provides a new 139 * struct, only the bits that the kernel knows aobut will be copied and 140 * the size value will be set to the size the kernel knows about. 141 */ 142 kinfo->size = min(usize, sizeof(*kinfo)); 143 kinfo->mnt_ns_id = mnt_ns->seq; 144 kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); 145 /* Subtract the root mount of the mount namespace. */ 146 if (kinfo->nr_mounts) 147 kinfo->nr_mounts--; 148 149 if (copy_to_user(uinfo, kinfo, kinfo->size)) 150 return -EFAULT; 151 152 return 0; 153 } 154 155 static bool nsfs_ioctl_valid(unsigned int cmd) 156 { 157 switch (cmd) { 158 case NS_GET_USERNS: 159 case NS_GET_PARENT: 160 case NS_GET_NSTYPE: 161 case NS_GET_OWNER_UID: 162 case NS_GET_MNTNS_ID: 163 case NS_GET_PID_FROM_PIDNS: 164 case NS_GET_TGID_FROM_PIDNS: 165 case NS_GET_PID_IN_PIDNS: 166 case NS_GET_TGID_IN_PIDNS: 167 return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd)); 168 } 169 170 /* Extensible ioctls require some extra handling. */ 171 switch (_IOC_NR(cmd)) { 172 case _IOC_NR(NS_MNT_GET_INFO): 173 case _IOC_NR(NS_MNT_GET_NEXT): 174 case _IOC_NR(NS_MNT_GET_PREV): 175 return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd)); 176 } 177 178 return false; 179 } 180 181 static long ns_ioctl(struct file *filp, unsigned int ioctl, 182 unsigned long arg) 183 { 184 struct user_namespace *user_ns; 185 struct pid_namespace *pid_ns; 186 struct task_struct *tsk; 187 struct ns_common *ns; 188 struct mnt_namespace *mnt_ns; 189 bool previous = false; 190 uid_t __user *argp; 191 uid_t uid; 192 int ret; 193 194 if (!nsfs_ioctl_valid(ioctl)) 195 return -ENOIOCTLCMD; 196 197 ns = get_proc_ns(file_inode(filp)); 198 switch (ioctl) { 199 case NS_GET_USERNS: 200 return open_related_ns(ns, ns_get_owner); 201 case NS_GET_PARENT: 202 if (!ns->ops->get_parent) 203 return -EINVAL; 204 return open_related_ns(ns, ns->ops->get_parent); 205 case NS_GET_NSTYPE: 206 return ns->ops->type; 207 case NS_GET_OWNER_UID: 208 if (ns->ops->type != CLONE_NEWUSER) 209 return -EINVAL; 210 user_ns = container_of(ns, struct user_namespace, ns); 211 argp = (uid_t __user *) arg; 212 uid = from_kuid_munged(current_user_ns(), user_ns->owner); 213 return put_user(uid, argp); 214 case NS_GET_MNTNS_ID: { 215 __u64 __user *idp; 216 __u64 id; 217 218 if (ns->ops->type != CLONE_NEWNS) 219 return -EINVAL; 220 221 mnt_ns = container_of(ns, struct mnt_namespace, ns); 222 idp = (__u64 __user *)arg; 223 id = mnt_ns->seq; 224 return put_user(id, idp); 225 } 226 case NS_GET_PID_FROM_PIDNS: 227 fallthrough; 228 case NS_GET_TGID_FROM_PIDNS: 229 fallthrough; 230 case NS_GET_PID_IN_PIDNS: 231 fallthrough; 232 case NS_GET_TGID_IN_PIDNS: { 233 if (ns->ops->type != CLONE_NEWPID) 234 return -EINVAL; 235 236 ret = -ESRCH; 237 pid_ns = container_of(ns, struct pid_namespace, ns); 238 239 guard(rcu)(); 240 241 if (ioctl == NS_GET_PID_IN_PIDNS || 242 ioctl == NS_GET_TGID_IN_PIDNS) 243 tsk = find_task_by_vpid(arg); 244 else 245 tsk = find_task_by_pid_ns(arg, pid_ns); 246 if (!tsk) 247 break; 248 249 switch (ioctl) { 250 case NS_GET_PID_FROM_PIDNS: 251 ret = task_pid_vnr(tsk); 252 break; 253 case NS_GET_TGID_FROM_PIDNS: 254 ret = task_tgid_vnr(tsk); 255 break; 256 case NS_GET_PID_IN_PIDNS: 257 ret = task_pid_nr_ns(tsk, pid_ns); 258 break; 259 case NS_GET_TGID_IN_PIDNS: 260 ret = task_tgid_nr_ns(tsk, pid_ns); 261 break; 262 default: 263 ret = 0; 264 break; 265 } 266 267 if (!ret) 268 ret = -ESRCH; 269 return ret; 270 } 271 } 272 273 /* extensible ioctls */ 274 switch (_IOC_NR(ioctl)) { 275 case _IOC_NR(NS_MNT_GET_INFO): { 276 struct mnt_ns_info kinfo = {}; 277 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 278 size_t usize = _IOC_SIZE(ioctl); 279 280 if (ns->ops->type != CLONE_NEWNS) 281 return -EINVAL; 282 283 if (!uinfo) 284 return -EINVAL; 285 286 if (usize < MNT_NS_INFO_SIZE_VER0) 287 return -EINVAL; 288 289 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 290 } 291 case _IOC_NR(NS_MNT_GET_PREV): 292 previous = true; 293 fallthrough; 294 case _IOC_NR(NS_MNT_GET_NEXT): { 295 struct mnt_ns_info kinfo = {}; 296 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 297 struct path path __free(path_put) = {}; 298 struct file *f __free(fput) = NULL; 299 size_t usize = _IOC_SIZE(ioctl); 300 301 if (ns->ops->type != CLONE_NEWNS) 302 return -EINVAL; 303 304 if (usize < MNT_NS_INFO_SIZE_VER0) 305 return -EINVAL; 306 307 mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous); 308 if (IS_ERR(mnt_ns)) 309 return PTR_ERR(mnt_ns); 310 311 ns = to_ns_common(mnt_ns); 312 /* Transfer ownership of @mnt_ns reference to @path. */ 313 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 314 if (ret) 315 return ret; 316 317 CLASS(get_unused_fd, fd)(O_CLOEXEC); 318 if (fd < 0) 319 return fd; 320 321 f = dentry_open(&path, O_RDONLY, current_cred()); 322 if (IS_ERR(f)) 323 return PTR_ERR(f); 324 325 if (uinfo) { 326 /* 327 * If @uinfo is passed return all information about the 328 * mount namespace as well. 329 */ 330 ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 331 if (ret) 332 return ret; 333 } 334 335 /* Transfer reference of @f to caller's fdtable. */ 336 fd_install(fd, no_free_ptr(f)); 337 /* File descriptor is live so hand it off to the caller. */ 338 return take_fd(fd); 339 } 340 default: 341 ret = -ENOTTY; 342 } 343 344 return ret; 345 } 346 347 int ns_get_name(char *buf, size_t size, struct task_struct *task, 348 const struct proc_ns_operations *ns_ops) 349 { 350 struct ns_common *ns; 351 int res = -ENOENT; 352 const char *name; 353 ns = ns_ops->get(task); 354 if (ns) { 355 name = ns_ops->real_ns_name ? : ns_ops->name; 356 res = snprintf(buf, size, "%s:[%u]", name, ns->inum); 357 ns_ops->put(ns); 358 } 359 return res; 360 } 361 362 bool proc_ns_file(const struct file *file) 363 { 364 return file->f_op == &ns_file_operations; 365 } 366 367 /** 368 * ns_match() - Returns true if current namespace matches dev/ino provided. 369 * @ns: current namespace 370 * @dev: dev_t from nsfs that will be matched against current nsfs 371 * @ino: ino_t from nsfs that will be matched against current nsfs 372 * 373 * Return: true if dev and ino matches the current nsfs. 374 */ 375 bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) 376 { 377 return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); 378 } 379 380 381 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) 382 { 383 struct inode *inode = d_inode(dentry); 384 const struct ns_common *ns = inode->i_private; 385 const struct proc_ns_operations *ns_ops = ns->ops; 386 387 seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); 388 return 0; 389 } 390 391 static const struct super_operations nsfs_ops = { 392 .statfs = simple_statfs, 393 .evict_inode = nsfs_evict, 394 .show_path = nsfs_show_path, 395 }; 396 397 static int nsfs_init_inode(struct inode *inode, void *data) 398 { 399 struct ns_common *ns = data; 400 401 inode->i_private = data; 402 inode->i_mode |= S_IRUGO; 403 inode->i_fop = &ns_file_operations; 404 inode->i_ino = ns->inum; 405 return 0; 406 } 407 408 static void nsfs_put_data(void *data) 409 { 410 struct ns_common *ns = data; 411 ns->ops->put(ns); 412 } 413 414 static const struct stashed_operations nsfs_stashed_ops = { 415 .init_inode = nsfs_init_inode, 416 .put_data = nsfs_put_data, 417 }; 418 419 static int nsfs_init_fs_context(struct fs_context *fc) 420 { 421 struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); 422 if (!ctx) 423 return -ENOMEM; 424 ctx->ops = &nsfs_ops; 425 ctx->dops = &ns_dentry_operations; 426 fc->s_fs_info = (void *)&nsfs_stashed_ops; 427 return 0; 428 } 429 430 static struct file_system_type nsfs = { 431 .name = "nsfs", 432 .init_fs_context = nsfs_init_fs_context, 433 .kill_sb = kill_anon_super, 434 }; 435 436 void __init nsfs_init(void) 437 { 438 nsfs_mnt = kern_mount(&nsfs); 439 if (IS_ERR(nsfs_mnt)) 440 panic("can't set nsfs up\n"); 441 nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; 442 } 443