1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/mount.h> 3 #include <linux/pseudo_fs.h> 4 #include <linux/file.h> 5 #include <linux/fs.h> 6 #include <linux/proc_fs.h> 7 #include <linux/proc_ns.h> 8 #include <linux/magic.h> 9 #include <linux/ktime.h> 10 #include <linux/seq_file.h> 11 #include <linux/pid_namespace.h> 12 #include <linux/user_namespace.h> 13 #include <linux/nsfs.h> 14 #include <linux/uaccess.h> 15 #include <linux/mnt_namespace.h> 16 17 #include "mount.h" 18 #include "internal.h" 19 20 static struct vfsmount *nsfs_mnt; 21 22 static long ns_ioctl(struct file *filp, unsigned int ioctl, 23 unsigned long arg); 24 static const struct file_operations ns_file_operations = { 25 .unlocked_ioctl = ns_ioctl, 26 .compat_ioctl = compat_ptr_ioctl, 27 }; 28 29 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 30 { 31 struct inode *inode = d_inode(dentry); 32 struct ns_common *ns = inode->i_private; 33 const struct proc_ns_operations *ns_ops = ns->ops; 34 35 return dynamic_dname(buffer, buflen, "%s:[%lu]", 36 ns_ops->name, inode->i_ino); 37 } 38 39 const struct dentry_operations ns_dentry_operations = { 40 .d_delete = always_delete_dentry, 41 .d_dname = ns_dname, 42 .d_prune = stashed_dentry_prune, 43 }; 44 45 static void nsfs_evict(struct inode *inode) 46 { 47 struct ns_common *ns = inode->i_private; 48 clear_inode(inode); 49 ns->ops->put(ns); 50 } 51 52 int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, 53 void *private_data) 54 { 55 struct ns_common *ns; 56 57 ns = ns_get_cb(private_data); 58 if (!ns) 59 return -ENOENT; 60 61 return path_from_stashed(&ns->stashed, nsfs_mnt, ns, path); 62 } 63 64 struct ns_get_path_task_args { 65 const struct proc_ns_operations *ns_ops; 66 struct task_struct *task; 67 }; 68 69 static struct ns_common *ns_get_path_task(void *private_data) 70 { 71 struct ns_get_path_task_args *args = private_data; 72 73 return args->ns_ops->get(args->task); 74 } 75 76 int ns_get_path(struct path *path, struct task_struct *task, 77 const struct proc_ns_operations *ns_ops) 78 { 79 struct ns_get_path_task_args args = { 80 .ns_ops = ns_ops, 81 .task = task, 82 }; 83 84 return ns_get_path_cb(path, ns_get_path_task, &args); 85 } 86 87 /** 88 * open_namespace - open a namespace 89 * @ns: the namespace to open 90 * 91 * This will consume a reference to @ns indendent of success or failure. 92 * 93 * Return: A file descriptor on success or a negative error code on failure. 94 */ 95 int open_namespace(struct ns_common *ns) 96 { 97 struct path path __free(path_put) = {}; 98 struct file *f; 99 int err; 100 101 /* call first to consume reference */ 102 err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 103 if (err < 0) 104 return err; 105 106 CLASS(get_unused_fd, fd)(O_CLOEXEC); 107 if (fd < 0) 108 return fd; 109 110 f = dentry_open(&path, O_RDONLY, current_cred()); 111 if (IS_ERR(f)) 112 return PTR_ERR(f); 113 114 fd_install(fd, f); 115 return take_fd(fd); 116 } 117 118 int open_related_ns(struct ns_common *ns, 119 struct ns_common *(*get_ns)(struct ns_common *ns)) 120 { 121 struct ns_common *relative; 122 123 relative = get_ns(ns); 124 if (IS_ERR(relative)) 125 return PTR_ERR(relative); 126 127 return open_namespace(relative); 128 } 129 EXPORT_SYMBOL_GPL(open_related_ns); 130 131 static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, 132 struct mnt_ns_info __user *uinfo, size_t usize, 133 struct mnt_ns_info *kinfo) 134 { 135 /* 136 * If userspace and the kernel have the same struct size it can just 137 * be copied. If userspace provides an older struct, only the bits that 138 * userspace knows about will be copied. If userspace provides a new 139 * struct, only the bits that the kernel knows aobut will be copied and 140 * the size value will be set to the size the kernel knows about. 141 */ 142 kinfo->size = min(usize, sizeof(*kinfo)); 143 kinfo->mnt_ns_id = mnt_ns->seq; 144 kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); 145 /* Subtract the root mount of the mount namespace. */ 146 if (kinfo->nr_mounts) 147 kinfo->nr_mounts--; 148 149 if (copy_to_user(uinfo, kinfo, kinfo->size)) 150 return -EFAULT; 151 152 return 0; 153 } 154 155 static long ns_ioctl(struct file *filp, unsigned int ioctl, 156 unsigned long arg) 157 { 158 struct user_namespace *user_ns; 159 struct pid_namespace *pid_ns; 160 struct task_struct *tsk; 161 struct ns_common *ns = get_proc_ns(file_inode(filp)); 162 struct mnt_namespace *mnt_ns; 163 bool previous = false; 164 uid_t __user *argp; 165 uid_t uid; 166 int ret; 167 168 switch (ioctl) { 169 case NS_GET_USERNS: 170 return open_related_ns(ns, ns_get_owner); 171 case NS_GET_PARENT: 172 if (!ns->ops->get_parent) 173 return -EINVAL; 174 return open_related_ns(ns, ns->ops->get_parent); 175 case NS_GET_NSTYPE: 176 return ns->ops->type; 177 case NS_GET_OWNER_UID: 178 if (ns->ops->type != CLONE_NEWUSER) 179 return -EINVAL; 180 user_ns = container_of(ns, struct user_namespace, ns); 181 argp = (uid_t __user *) arg; 182 uid = from_kuid_munged(current_user_ns(), user_ns->owner); 183 return put_user(uid, argp); 184 case NS_GET_MNTNS_ID: { 185 __u64 __user *idp; 186 __u64 id; 187 188 if (ns->ops->type != CLONE_NEWNS) 189 return -EINVAL; 190 191 mnt_ns = container_of(ns, struct mnt_namespace, ns); 192 idp = (__u64 __user *)arg; 193 id = mnt_ns->seq; 194 return put_user(id, idp); 195 } 196 case NS_GET_PID_FROM_PIDNS: 197 fallthrough; 198 case NS_GET_TGID_FROM_PIDNS: 199 fallthrough; 200 case NS_GET_PID_IN_PIDNS: 201 fallthrough; 202 case NS_GET_TGID_IN_PIDNS: { 203 if (ns->ops->type != CLONE_NEWPID) 204 return -EINVAL; 205 206 ret = -ESRCH; 207 pid_ns = container_of(ns, struct pid_namespace, ns); 208 209 guard(rcu)(); 210 211 if (ioctl == NS_GET_PID_IN_PIDNS || 212 ioctl == NS_GET_TGID_IN_PIDNS) 213 tsk = find_task_by_vpid(arg); 214 else 215 tsk = find_task_by_pid_ns(arg, pid_ns); 216 if (!tsk) 217 break; 218 219 switch (ioctl) { 220 case NS_GET_PID_FROM_PIDNS: 221 ret = task_pid_vnr(tsk); 222 break; 223 case NS_GET_TGID_FROM_PIDNS: 224 ret = task_tgid_vnr(tsk); 225 break; 226 case NS_GET_PID_IN_PIDNS: 227 ret = task_pid_nr_ns(tsk, pid_ns); 228 break; 229 case NS_GET_TGID_IN_PIDNS: 230 ret = task_tgid_nr_ns(tsk, pid_ns); 231 break; 232 default: 233 ret = 0; 234 break; 235 } 236 237 if (!ret) 238 ret = -ESRCH; 239 return ret; 240 } 241 } 242 243 /* extensible ioctls */ 244 switch (_IOC_NR(ioctl)) { 245 case _IOC_NR(NS_MNT_GET_INFO): { 246 struct mnt_ns_info kinfo = {}; 247 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 248 size_t usize = _IOC_SIZE(ioctl); 249 250 if (ns->ops->type != CLONE_NEWNS) 251 return -EINVAL; 252 253 if (!uinfo) 254 return -EINVAL; 255 256 if (usize < MNT_NS_INFO_SIZE_VER0) 257 return -EINVAL; 258 259 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 260 } 261 case _IOC_NR(NS_MNT_GET_PREV): 262 previous = true; 263 fallthrough; 264 case _IOC_NR(NS_MNT_GET_NEXT): { 265 struct mnt_ns_info kinfo = {}; 266 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; 267 struct path path __free(path_put) = {}; 268 struct file *f __free(fput) = NULL; 269 size_t usize = _IOC_SIZE(ioctl); 270 271 if (ns->ops->type != CLONE_NEWNS) 272 return -EINVAL; 273 274 if (usize < MNT_NS_INFO_SIZE_VER0) 275 return -EINVAL; 276 277 if (previous) 278 mnt_ns = lookup_prev_mnt_ns(to_mnt_ns(ns)); 279 else 280 mnt_ns = lookup_next_mnt_ns(to_mnt_ns(ns)); 281 if (IS_ERR(mnt_ns)) 282 return PTR_ERR(mnt_ns); 283 284 ns = to_ns_common(mnt_ns); 285 /* Transfer ownership of @mnt_ns reference to @path. */ 286 ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); 287 if (ret) 288 return ret; 289 290 CLASS(get_unused_fd, fd)(O_CLOEXEC); 291 if (fd < 0) 292 return fd; 293 294 f = dentry_open(&path, O_RDONLY, current_cred()); 295 if (IS_ERR(f)) 296 return PTR_ERR(f); 297 298 if (uinfo) { 299 /* 300 * If @uinfo is passed return all information about the 301 * mount namespace as well. 302 */ 303 ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo); 304 if (ret) 305 return ret; 306 } 307 308 /* Transfer reference of @f to caller's fdtable. */ 309 fd_install(fd, no_free_ptr(f)); 310 /* File descriptor is live so hand it off to the caller. */ 311 return take_fd(fd); 312 } 313 default: 314 ret = -ENOTTY; 315 } 316 317 return ret; 318 } 319 320 int ns_get_name(char *buf, size_t size, struct task_struct *task, 321 const struct proc_ns_operations *ns_ops) 322 { 323 struct ns_common *ns; 324 int res = -ENOENT; 325 const char *name; 326 ns = ns_ops->get(task); 327 if (ns) { 328 name = ns_ops->real_ns_name ? : ns_ops->name; 329 res = snprintf(buf, size, "%s:[%u]", name, ns->inum); 330 ns_ops->put(ns); 331 } 332 return res; 333 } 334 335 bool proc_ns_file(const struct file *file) 336 { 337 return file->f_op == &ns_file_operations; 338 } 339 340 /** 341 * ns_match() - Returns true if current namespace matches dev/ino provided. 342 * @ns: current namespace 343 * @dev: dev_t from nsfs that will be matched against current nsfs 344 * @ino: ino_t from nsfs that will be matched against current nsfs 345 * 346 * Return: true if dev and ino matches the current nsfs. 347 */ 348 bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) 349 { 350 return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); 351 } 352 353 354 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) 355 { 356 struct inode *inode = d_inode(dentry); 357 const struct ns_common *ns = inode->i_private; 358 const struct proc_ns_operations *ns_ops = ns->ops; 359 360 seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); 361 return 0; 362 } 363 364 static const struct super_operations nsfs_ops = { 365 .statfs = simple_statfs, 366 .evict_inode = nsfs_evict, 367 .show_path = nsfs_show_path, 368 }; 369 370 static int nsfs_init_inode(struct inode *inode, void *data) 371 { 372 struct ns_common *ns = data; 373 374 inode->i_private = data; 375 inode->i_mode |= S_IRUGO; 376 inode->i_fop = &ns_file_operations; 377 inode->i_ino = ns->inum; 378 return 0; 379 } 380 381 static void nsfs_put_data(void *data) 382 { 383 struct ns_common *ns = data; 384 ns->ops->put(ns); 385 } 386 387 static const struct stashed_operations nsfs_stashed_ops = { 388 .init_inode = nsfs_init_inode, 389 .put_data = nsfs_put_data, 390 }; 391 392 static int nsfs_init_fs_context(struct fs_context *fc) 393 { 394 struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); 395 if (!ctx) 396 return -ENOMEM; 397 ctx->ops = &nsfs_ops; 398 ctx->dops = &ns_dentry_operations; 399 fc->s_fs_info = (void *)&nsfs_stashed_ops; 400 return 0; 401 } 402 403 static struct file_system_type nsfs = { 404 .name = "nsfs", 405 .init_fs_context = nsfs_init_fs_context, 406 .kill_sb = kill_anon_super, 407 }; 408 409 void __init nsfs_init(void) 410 { 411 nsfs_mnt = kern_mount(&nsfs); 412 if (IS_ERR(nsfs_mnt)) 413 panic("can't set nsfs up\n"); 414 nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; 415 } 416