1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/mount.h> 3 #include <linux/pseudo_fs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/proc_fs.h> 7 #include <linux/proc_ns.h> 8 #include <linux/magic.h> 9 #include <linux/ktime.h> 10 #include <linux/seq_file.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/user_namespace.h> 13 #include <linux/nsfs.h> 14 #include <linux/uaccess.h> 15 #include <linux/mnt_namespace.h> 16 17 #include "mount.h" 18 #include "internal.h" 19 20 static struct vfsmount *nsfs_mnt; 21 22 static long ns_ioctl(struct file *filp, unsigned int ioctl, 23 unsigned long arg); 24 static const struct file_operations ns_file_operations = { 25 .llseek = no_llseek, 26 .unlocked_ioctl = ns_ioctl, 27 .compat_ioctl = compat_ptr_ioctl, 28 }; 29 30 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 31 { 32 struct inode *inode = d_inode(dentry); 33 struct ns_common *ns = inode->i_private; 34 const struct proc_ns_operations *ns_ops = ns->ops; 35 36 return dynamic_dname(buffer, buflen, "%s:[%lu]", 37 ns_ops->name, inode->i_ino); 38 } 39 40 const struct dentry_operations ns_dentry_operations = { 41 .d_delete = always_delete_dentry, 42 .d_dname = ns_dname, 43 .d_prune = stashed_dentry_prune, 44 }; 45 46 static void nsfs_evict(struct inode *inode) 47 { 48 struct ns_common *ns = inode->i_private; 49 clear_inode(inode); 50 ns->ops->put(ns); 51 } 52 53 int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, 54 void *private_data) 55 { 56 struct ns_common *ns; 57 58 ns = ns_get_cb(private_data); 59 if (!ns) 60 return -ENOENT; 61 62 return path_from_stashed(&ns->stashed, nsfs_mnt, ns, path); 63 } 64 65 struct ns_get_path_task_args { 66 const struct proc_ns_operations *ns_ops; 67 struct task_struct *task; 68 }; 69 70 static struct ns_common *ns_get_path_task(void *private_data) 71 { 72 struct ns_get_path_task_args *args = private_data; 73 74 return args->ns_ops->get(args->task); 75 } 76 77 int ns_get_path(struct path *path, struct task_struct *task, 78 const struct proc_ns_operations *ns_ops) 79 { 80 struct ns_get_path_task_args args = { 81 .ns_ops = ns_ops, 82 .task = task, 83 }; 84 85 return ns_get_path_cb(path, ns_get_path_task, &args); 86 } 87 88 /** 89 * open_namespace - open a namespace 90 * @ns: the namespace to open 91 * 92 * This will consume a reference to @ns indendent of success or failure. 93 * 94 * Return: A file descriptor on success or a negative error code on failure. 95 */ 96 int open_namespace(struct ns_common *ns) 97 { 98 struct path path __free(path_put) = {}; 99 struct file *f; 100 int err; 101 102 /* call first to consume reference */ 103 err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 104 if (err < 0) 105 return err; 106 107 CLASS(get_unused_fd, fd)(O_CLOEXEC); 108 if (fd < 0) 109 return fd; 110 111 f = dentry_open(&path, O_RDONLY, current_cred()); 112 if (IS_ERR(f)) 113 return PTR_ERR(f); 114 115 fd_install(fd, f); 116 return take_fd(fd); 117 } 118 119 int open_related_ns(struct ns_common *ns, 120 struct ns_common *(*get_ns)(struct ns_common *ns)) 121 { 122 struct ns_common *relative; 123 124 relative = get_ns(ns); 125 if (IS_ERR(relative)) 126 return PTR_ERR(relative); 127 128 return open_namespace(relative); 129 } 130 EXPORT_SYMBOL_GPL(open_related_ns); 131 132 static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, 133 struct mnt_ns_info __user *uinfo, size_t usize, 134 struct mnt_ns_info *kinfo) 135 { 136 /* 137 * If userspace and the kernel have the same struct size it can just 138 * be copied. If userspace provides an older struct, only the bits that 139 * userspace knows about will be copied. If userspace provides a new 140 * struct, only the bits that the kernel knows aobut will be copied and 141 * the size value will be set to the size the kernel knows about. 142 */ 143 kinfo->size = min(usize, sizeof(*kinfo)); 144 kinfo->mnt_ns_id = mnt_ns->seq; 145 kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); 146 /* Subtract the root mount of the mount namespace. */ 147 if (kinfo->nr_mounts) 148 kinfo->nr_mounts--; 149 150 if (copy_to_user(uinfo, kinfo, kinfo->size)) 151 return -EFAULT; 152 153 return 0; 154 } 155 156 static long ns_ioctl(struct file *filp, unsigned int ioctl, 157 unsigned long arg) 158 { 159 struct user_namespace *user_ns; 160 struct pid_namespace *pid_ns; 161 struct task_struct *tsk; 162 struct ns_common *ns = get_proc_ns(file_inode(filp)); 163 struct mnt_namespace *mnt_ns; 164 bool previous = false; 165 uid_t __user *argp; 166 uid_t uid; 167 int ret; 168 169 switch (ioctl) { 170 case NS_GET_USERNS: 171 return open_related_ns(ns, ns_get_owner); 172 case NS_GET_PARENT: 173 if (!ns->ops->get_parent) 174 return -EINVAL; 175 return open_related_ns(ns, ns->ops->get_parent); 176 case NS_GET_NSTYPE: 177 return ns->ops->type; 178 case NS_GET_OWNER_UID: 179 if (ns->ops->type != CLONE_NEWUSER) 180 return -EINVAL; 181 user_ns = container_of(ns, struct user_namespace, ns); 182 argp = (uid_t __user *) arg; 183 uid = from_kuid_munged(current_user_ns(), user_ns->owner); 184 return put_user(uid, argp); 185 case NS_GET_MNTNS_ID: { 186 __u64 __user *idp; 187 __u64 id; 188 189 if (ns->ops->type != CLONE_NEWNS) 190 return -EINVAL; 191 192 mnt_ns = container_of(ns, struct mnt_namespace, ns); 193 idp = (__u64 __user *)arg; 194 id = mnt_ns->seq; 195 return put_user(id, idp); 196 } 197 case NS_GET_PID_FROM_PIDNS: 198 fallthrough; 199 case NS_GET_TGID_FROM_PIDNS: 200 fallthrough; 201 case NS_GET_PID_IN_PIDNS: 202 fallthrough; 203 case NS_GET_TGID_IN_PIDNS: { 204 if (ns->ops->type != CLONE_NEWPID) 205 return -EINVAL; 206 207 ret = -ESRCH; 208 pid_ns = container_of(ns, struct pid_namespace, ns); 209 210 guard(rcu)(); 211 212 if (ioctl == NS_GET_PID_IN_PIDNS || 213 ioctl == NS_GET_TGID_IN_PIDNS) 214 tsk = find_task_by_vpid(arg); 215 else 216 tsk = find_task_by_pid_ns(arg, pid_ns); 217 if (!tsk) 218 break; 219 220 switch (ioctl) { 221 case NS_GET_PID_FROM_PIDNS: 222 ret = task_pid_vnr(tsk); 223 break; 224 case NS_GET_TGID_FROM_PIDNS: 225 ret = task_tgid_vnr(tsk); 226 break; 227 case NS_GET_PID_IN_PIDNS: 228 ret = task_pid_nr_ns(tsk, pid_ns); 229 break; 230 case NS_GET_TGID_IN_PIDNS: 231 ret = task_tgid_nr_ns(tsk, pid_ns); 232 break; 233 default: 234 ret = 0; 235 break; 236 } 237 238 if (!ret) 239 ret = -ESRCH; 240 return ret; 241 } 242 } 243 244 /* extensible ioctls */ 245 switch (_IOC_NR(ioctl)) { 246 case _IOC_NR(NS_MNT_GET_INFO): { 247 struct mnt_ns_info kinfo = {}; 248 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 249 size_t usize = _IOC_SIZE(ioctl); 250 251 if (ns->ops->type != CLONE_NEWNS) 252 return -EINVAL; 253 254 if (!uinfo) 255 return -EINVAL; 256 257 if (usize < MNT_NS_INFO_SIZE_VER0) 258 return -EINVAL; 259 260 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 261 } 262 case _IOC_NR(NS_MNT_GET_PREV): 263 previous = true; 264 fallthrough; 265 case _IOC_NR(NS_MNT_GET_NEXT): { 266 struct mnt_ns_info kinfo = {}; 267 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 268 struct path path __free(path_put) = {}; 269 struct file *f __free(fput) = NULL; 270 size_t usize = _IOC_SIZE(ioctl); 271 272 if (ns->ops->type != CLONE_NEWNS) 273 return -EINVAL; 274 275 if (usize < MNT_NS_INFO_SIZE_VER0) 276 return -EINVAL; 277 278 if (previous) 279 mnt_ns = lookup_prev_mnt_ns(to_mnt_ns(ns)); 280 else 281 mnt_ns = lookup_next_mnt_ns(to_mnt_ns(ns)); 282 if (IS_ERR(mnt_ns)) 283 return PTR_ERR(mnt_ns); 284 285 ns = to_ns_common(mnt_ns); 286 /* Transfer ownership of @mnt_ns reference to @path. */ 287 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 288 if (ret) 289 return ret; 290 291 CLASS(get_unused_fd, fd)(O_CLOEXEC); 292 if (fd < 0) 293 return fd; 294 295 f = dentry_open(&path, O_RDONLY, current_cred()); 296 if (IS_ERR(f)) 297 return PTR_ERR(f); 298 299 if (uinfo) { 300 /* 301 * If @uinfo is passed return all information about the 302 * mount namespace as well. 303 */ 304 ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 305 if (ret) 306 return ret; 307 } 308 309 /* Transfer reference of @f to caller's fdtable. */ 310 fd_install(fd, no_free_ptr(f)); 311 /* File descriptor is live so hand it off to the caller. */ 312 return take_fd(fd); 313 } 314 default: 315 ret = -ENOTTY; 316 } 317 318 return ret; 319 } 320 321 int ns_get_name(char *buf, size_t size, struct task_struct *task, 322 const struct proc_ns_operations *ns_ops) 323 { 324 struct ns_common *ns; 325 int res = -ENOENT; 326 const char *name; 327 ns = ns_ops->get(task); 328 if (ns) { 329 name = ns_ops->real_ns_name ? : ns_ops->name; 330 res = snprintf(buf, size, "%s:[%u]", name, ns->inum); 331 ns_ops->put(ns); 332 } 333 return res; 334 } 335 336 bool proc_ns_file(const struct file *file) 337 { 338 return file->f_op == &ns_file_operations; 339 } 340 341 /** 342 * ns_match() - Returns true if current namespace matches dev/ino provided. 343 * @ns: current namespace 344 * @dev: dev_t from nsfs that will be matched against current nsfs 345 * @ino: ino_t from nsfs that will be matched against current nsfs 346 * 347 * Return: true if dev and ino matches the current nsfs. 348 */ 349 bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) 350 { 351 return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); 352 } 353 354 355 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) 356 { 357 struct inode *inode = d_inode(dentry); 358 const struct ns_common *ns = inode->i_private; 359 const struct proc_ns_operations *ns_ops = ns->ops; 360 361 seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); 362 return 0; 363 } 364 365 static const struct super_operations nsfs_ops = { 366 .statfs = simple_statfs, 367 .evict_inode = nsfs_evict, 368 .show_path = nsfs_show_path, 369 }; 370 371 static int nsfs_init_inode(struct inode *inode, void *data) 372 { 373 struct ns_common *ns = data; 374 375 inode->i_private = data; 376 inode->i_mode |= S_IRUGO; 377 inode->i_fop = &ns_file_operations; 378 inode->i_ino = ns->inum; 379 return 0; 380 } 381 382 static void nsfs_put_data(void *data) 383 { 384 struct ns_common *ns = data; 385 ns->ops->put(ns); 386 } 387 388 static const struct stashed_operations nsfs_stashed_ops = { 389 .init_inode = nsfs_init_inode, 390 .put_data = nsfs_put_data, 391 }; 392 393 static int nsfs_init_fs_context(struct fs_context *fc) 394 { 395 struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); 396 if (!ctx) 397 return -ENOMEM; 398 ctx->ops = &nsfs_ops; 399 ctx->dops = &ns_dentry_operations; 400 fc->s_fs_info = (void *)&nsfs_stashed_ops; 401 return 0; 402 } 403 404 static struct file_system_type nsfs = { 405 .name = "nsfs", 406 .init_fs_context = nsfs_init_fs_context, 407 .kill_sb = kill_anon_super, 408 }; 409 410 void __init nsfs_init(void) 411 { 412 nsfs_mnt = kern_mount(&nsfs); 413 if (IS_ERR(nsfs_mnt)) 414 panic("can't set nsfs up\n"); 415 nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; 416 } 417