xref: /linux/fs/nsfs.c (revision 540dcf0f44042fd9c6e14ae863efb67780ae0084)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/mount.h>
3 #include <linux/pseudo_fs.h>
4 #include <linux/file.h>
5 #include <linux/fs.h>
6 #include <linux/proc_fs.h>
7 #include <linux/proc_ns.h>
8 #include <linux/magic.h>
9 #include <linux/ktime.h>
10 #include <linux/seq_file.h>
11 #include <linux/pid_namespace.h>
12 #include <linux/user_namespace.h>
13 #include <linux/nsfs.h>
14 #include <linux/uaccess.h>
15 #include <linux/mnt_namespace.h>
16 
17 #include "mount.h"
18 #include "internal.h"
19 
20 static struct vfsmount *nsfs_mnt;
21 
22 static long ns_ioctl(struct file *filp, unsigned int ioctl,
23 			unsigned long arg);
24 static const struct file_operations ns_file_operations = {
25 	.unlocked_ioctl = ns_ioctl,
26 	.compat_ioctl   = compat_ptr_ioctl,
27 };
28 
29 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
30 {
31 	struct inode *inode = d_inode(dentry);
32 	struct ns_common *ns = inode->i_private;
33 	const struct proc_ns_operations *ns_ops = ns->ops;
34 
35 	return dynamic_dname(buffer, buflen, "%s:[%lu]",
36 		ns_ops->name, inode->i_ino);
37 }
38 
39 const struct dentry_operations ns_dentry_operations = {
40 	.d_delete	= always_delete_dentry,
41 	.d_dname	= ns_dname,
42 	.d_prune	= stashed_dentry_prune,
43 };
44 
45 static void nsfs_evict(struct inode *inode)
46 {
47 	struct ns_common *ns = inode->i_private;
48 	clear_inode(inode);
49 	ns->ops->put(ns);
50 }
51 
52 int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
53 		     void *private_data)
54 {
55 	struct ns_common *ns;
56 
57 	ns = ns_get_cb(private_data);
58 	if (!ns)
59 		return -ENOENT;
60 
61 	return path_from_stashed(&ns->stashed, nsfs_mnt, ns, path);
62 }
63 
64 struct ns_get_path_task_args {
65 	const struct proc_ns_operations *ns_ops;
66 	struct task_struct *task;
67 };
68 
69 static struct ns_common *ns_get_path_task(void *private_data)
70 {
71 	struct ns_get_path_task_args *args = private_data;
72 
73 	return args->ns_ops->get(args->task);
74 }
75 
76 int ns_get_path(struct path *path, struct task_struct *task,
77 		  const struct proc_ns_operations *ns_ops)
78 {
79 	struct ns_get_path_task_args args = {
80 		.ns_ops	= ns_ops,
81 		.task	= task,
82 	};
83 
84 	return ns_get_path_cb(path, ns_get_path_task, &args);
85 }
86 
87 /**
88  * open_namespace - open a namespace
89  * @ns: the namespace to open
90  *
91  * This will consume a reference to @ns indendent of success or failure.
92  *
93  * Return: A file descriptor on success or a negative error code on failure.
94  */
95 int open_namespace(struct ns_common *ns)
96 {
97 	struct path path __free(path_put) = {};
98 	struct file *f;
99 	int err;
100 
101 	/* call first to consume reference */
102 	err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
103 	if (err < 0)
104 		return err;
105 
106 	CLASS(get_unused_fd, fd)(O_CLOEXEC);
107 	if (fd < 0)
108 		return fd;
109 
110 	f = dentry_open(&path, O_RDONLY, current_cred());
111 	if (IS_ERR(f))
112 		return PTR_ERR(f);
113 
114 	fd_install(fd, f);
115 	return take_fd(fd);
116 }
117 
118 int open_related_ns(struct ns_common *ns,
119 		   struct ns_common *(*get_ns)(struct ns_common *ns))
120 {
121 	struct ns_common *relative;
122 
123 	relative = get_ns(ns);
124 	if (IS_ERR(relative))
125 		return PTR_ERR(relative);
126 
127 	return open_namespace(relative);
128 }
129 EXPORT_SYMBOL_GPL(open_related_ns);
130 
131 static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns,
132 				struct mnt_ns_info __user *uinfo, size_t usize,
133 				struct mnt_ns_info *kinfo)
134 {
135 	/*
136 	 * If userspace and the kernel have the same struct size it can just
137 	 * be copied. If userspace provides an older struct, only the bits that
138 	 * userspace knows about will be copied. If userspace provides a new
139 	 * struct, only the bits that the kernel knows aobut will be copied and
140 	 * the size value will be set to the size the kernel knows about.
141 	 */
142 	kinfo->size		= min(usize, sizeof(*kinfo));
143 	kinfo->mnt_ns_id	= mnt_ns->seq;
144 	kinfo->nr_mounts	= READ_ONCE(mnt_ns->nr_mounts);
145 	/* Subtract the root mount of the mount namespace. */
146 	if (kinfo->nr_mounts)
147 		kinfo->nr_mounts--;
148 
149 	if (copy_to_user(uinfo, kinfo, kinfo->size))
150 		return -EFAULT;
151 
152 	return 0;
153 }
154 
155 static bool nsfs_ioctl_valid(unsigned int cmd)
156 {
157 	switch (cmd) {
158 	case NS_GET_USERNS:
159 	case NS_GET_PARENT:
160 	case NS_GET_NSTYPE:
161 	case NS_GET_OWNER_UID:
162 	case NS_GET_MNTNS_ID:
163 	case NS_GET_PID_FROM_PIDNS:
164 	case NS_GET_TGID_FROM_PIDNS:
165 	case NS_GET_PID_IN_PIDNS:
166 	case NS_GET_TGID_IN_PIDNS:
167 		return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd));
168 	}
169 
170 	/* Extensible ioctls require some extra handling. */
171 	switch (_IOC_NR(cmd)) {
172 	case _IOC_NR(NS_MNT_GET_INFO):
173 	case _IOC_NR(NS_MNT_GET_NEXT):
174 	case _IOC_NR(NS_MNT_GET_PREV):
175 		return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd));
176 	}
177 
178 	return false;
179 }
180 
181 static long ns_ioctl(struct file *filp, unsigned int ioctl,
182 			unsigned long arg)
183 {
184 	struct user_namespace *user_ns;
185 	struct pid_namespace *pid_ns;
186 	struct task_struct *tsk;
187 	struct ns_common *ns;
188 	struct mnt_namespace *mnt_ns;
189 	bool previous = false;
190 	uid_t __user *argp;
191 	uid_t uid;
192 	int ret;
193 
194 	if (!nsfs_ioctl_valid(ioctl))
195 		return -ENOIOCTLCMD;
196 
197 	ns = get_proc_ns(file_inode(filp));
198 	switch (ioctl) {
199 	case NS_GET_USERNS:
200 		return open_related_ns(ns, ns_get_owner);
201 	case NS_GET_PARENT:
202 		if (!ns->ops->get_parent)
203 			return -EINVAL;
204 		return open_related_ns(ns, ns->ops->get_parent);
205 	case NS_GET_NSTYPE:
206 		return ns->ops->type;
207 	case NS_GET_OWNER_UID:
208 		if (ns->ops->type != CLONE_NEWUSER)
209 			return -EINVAL;
210 		user_ns = container_of(ns, struct user_namespace, ns);
211 		argp = (uid_t __user *) arg;
212 		uid = from_kuid_munged(current_user_ns(), user_ns->owner);
213 		return put_user(uid, argp);
214 	case NS_GET_MNTNS_ID: {
215 		__u64 __user *idp;
216 		__u64 id;
217 
218 		if (ns->ops->type != CLONE_NEWNS)
219 			return -EINVAL;
220 
221 		mnt_ns = container_of(ns, struct mnt_namespace, ns);
222 		idp = (__u64 __user *)arg;
223 		id = mnt_ns->seq;
224 		return put_user(id, idp);
225 	}
226 	case NS_GET_PID_FROM_PIDNS:
227 		fallthrough;
228 	case NS_GET_TGID_FROM_PIDNS:
229 		fallthrough;
230 	case NS_GET_PID_IN_PIDNS:
231 		fallthrough;
232 	case NS_GET_TGID_IN_PIDNS: {
233 		if (ns->ops->type != CLONE_NEWPID)
234 			return -EINVAL;
235 
236 		ret = -ESRCH;
237 		pid_ns = container_of(ns, struct pid_namespace, ns);
238 
239 		guard(rcu)();
240 
241 		if (ioctl == NS_GET_PID_IN_PIDNS ||
242 		    ioctl == NS_GET_TGID_IN_PIDNS)
243 			tsk = find_task_by_vpid(arg);
244 		else
245 			tsk = find_task_by_pid_ns(arg, pid_ns);
246 		if (!tsk)
247 			break;
248 
249 		switch (ioctl) {
250 		case NS_GET_PID_FROM_PIDNS:
251 			ret = task_pid_vnr(tsk);
252 			break;
253 		case NS_GET_TGID_FROM_PIDNS:
254 			ret = task_tgid_vnr(tsk);
255 			break;
256 		case NS_GET_PID_IN_PIDNS:
257 			ret = task_pid_nr_ns(tsk, pid_ns);
258 			break;
259 		case NS_GET_TGID_IN_PIDNS:
260 			ret = task_tgid_nr_ns(tsk, pid_ns);
261 			break;
262 		default:
263 			ret = 0;
264 			break;
265 		}
266 
267 		if (!ret)
268 			ret = -ESRCH;
269 		return ret;
270 	}
271 	}
272 
273 	/* extensible ioctls */
274 	switch (_IOC_NR(ioctl)) {
275 	case _IOC_NR(NS_MNT_GET_INFO): {
276 		struct mnt_ns_info kinfo = {};
277 		struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg;
278 		size_t usize = _IOC_SIZE(ioctl);
279 
280 		if (ns->ops->type != CLONE_NEWNS)
281 			return -EINVAL;
282 
283 		if (!uinfo)
284 			return -EINVAL;
285 
286 		if (usize < MNT_NS_INFO_SIZE_VER0)
287 			return -EINVAL;
288 
289 		return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo);
290 	}
291 	case _IOC_NR(NS_MNT_GET_PREV):
292 		previous = true;
293 		fallthrough;
294 	case _IOC_NR(NS_MNT_GET_NEXT): {
295 		struct mnt_ns_info kinfo = {};
296 		struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg;
297 		struct path path __free(path_put) = {};
298 		struct file *f __free(fput) = NULL;
299 		size_t usize = _IOC_SIZE(ioctl);
300 
301 		if (ns->ops->type != CLONE_NEWNS)
302 			return -EINVAL;
303 
304 		if (usize < MNT_NS_INFO_SIZE_VER0)
305 			return -EINVAL;
306 
307 		mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous);
308 		if (IS_ERR(mnt_ns))
309 			return PTR_ERR(mnt_ns);
310 
311 		ns = to_ns_common(mnt_ns);
312 		/* Transfer ownership of @mnt_ns reference to @path. */
313 		ret = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
314 		if (ret)
315 			return ret;
316 
317 		CLASS(get_unused_fd, fd)(O_CLOEXEC);
318 		if (fd < 0)
319 			return fd;
320 
321 		f = dentry_open(&path, O_RDONLY, current_cred());
322 		if (IS_ERR(f))
323 			return PTR_ERR(f);
324 
325 		if (uinfo) {
326 			/*
327 			 * If @uinfo is passed return all information about the
328 			 * mount namespace as well.
329 			 */
330 			ret = copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo);
331 			if (ret)
332 				return ret;
333 		}
334 
335 		/* Transfer reference of @f to caller's fdtable. */
336 		fd_install(fd, no_free_ptr(f));
337 		/* File descriptor is live so hand it off to the caller. */
338 		return take_fd(fd);
339 	}
340 	default:
341 		ret = -ENOTTY;
342 	}
343 
344 	return ret;
345 }
346 
347 int ns_get_name(char *buf, size_t size, struct task_struct *task,
348 			const struct proc_ns_operations *ns_ops)
349 {
350 	struct ns_common *ns;
351 	int res = -ENOENT;
352 	const char *name;
353 	ns = ns_ops->get(task);
354 	if (ns) {
355 		name = ns_ops->real_ns_name ? : ns_ops->name;
356 		res = snprintf(buf, size, "%s:[%u]", name, ns->inum);
357 		ns_ops->put(ns);
358 	}
359 	return res;
360 }
361 
362 bool proc_ns_file(const struct file *file)
363 {
364 	return file->f_op == &ns_file_operations;
365 }
366 
367 /**
368  * ns_match() - Returns true if current namespace matches dev/ino provided.
369  * @ns: current namespace
370  * @dev: dev_t from nsfs that will be matched against current nsfs
371  * @ino: ino_t from nsfs that will be matched against current nsfs
372  *
373  * Return: true if dev and ino matches the current nsfs.
374  */
375 bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino)
376 {
377 	return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev);
378 }
379 
380 
381 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry)
382 {
383 	struct inode *inode = d_inode(dentry);
384 	const struct ns_common *ns = inode->i_private;
385 	const struct proc_ns_operations *ns_ops = ns->ops;
386 
387 	seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino);
388 	return 0;
389 }
390 
391 static const struct super_operations nsfs_ops = {
392 	.statfs = simple_statfs,
393 	.evict_inode = nsfs_evict,
394 	.show_path = nsfs_show_path,
395 };
396 
397 static int nsfs_init_inode(struct inode *inode, void *data)
398 {
399 	struct ns_common *ns = data;
400 
401 	inode->i_private = data;
402 	inode->i_mode |= S_IRUGO;
403 	inode->i_fop = &ns_file_operations;
404 	inode->i_ino = ns->inum;
405 	return 0;
406 }
407 
408 static void nsfs_put_data(void *data)
409 {
410 	struct ns_common *ns = data;
411 	ns->ops->put(ns);
412 }
413 
414 static const struct stashed_operations nsfs_stashed_ops = {
415 	.init_inode = nsfs_init_inode,
416 	.put_data = nsfs_put_data,
417 };
418 
419 static int nsfs_init_fs_context(struct fs_context *fc)
420 {
421 	struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
422 	if (!ctx)
423 		return -ENOMEM;
424 	ctx->ops = &nsfs_ops;
425 	ctx->dops = &ns_dentry_operations;
426 	fc->s_fs_info = (void *)&nsfs_stashed_ops;
427 	return 0;
428 }
429 
430 static struct file_system_type nsfs = {
431 	.name = "nsfs",
432 	.init_fs_context = nsfs_init_fs_context,
433 	.kill_sb = kill_anon_super,
434 };
435 
436 void __init nsfs_init(void)
437 {
438 	nsfs_mnt = kern_mount(&nsfs);
439 	if (IS_ERR(nsfs_mnt))
440 		panic("can't set nsfs up\n");
441 	nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER;
442 }
443