xref: /linux/fs/fhandle.c (revision 23b0f90ba871f096474e1c27c3d14f455189d2d9)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/syscalls.h>
3 #include <linux/slab.h>
4 #include <linux/fs.h>
5 #include <linux/file.h>
6 #include <linux/mount.h>
7 #include <linux/namei.h>
8 #include <linux/exportfs.h>
9 #include <linux/fs_struct.h>
10 #include <linux/fsnotify.h>
11 #include <linux/personality.h>
12 #include <linux/uaccess.h>
13 #include <linux/compat.h>
14 #include <linux/nsfs.h>
15 #include "internal.h"
16 #include "mount.h"
17 
18 static long do_sys_name_to_handle(const struct path *path,
19 				  struct file_handle __user *ufh,
20 				  void __user *mnt_id, bool unique_mntid,
21 				  int fh_flags)
22 {
23 	long retval;
24 	struct file_handle f_handle;
25 	int handle_dwords, handle_bytes;
26 	struct file_handle *handle = NULL;
27 
28 	/*
29 	 * We need to make sure whether the file system support decoding of
30 	 * the file handle if decodeable file handle was requested.
31 	 */
32 	if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags))
33 		return -EOPNOTSUPP;
34 
35 	/*
36 	 * A request to encode a connectable handle for a disconnected dentry
37 	 * is unexpected since AT_EMPTY_PATH is not allowed.
38 	 */
39 	if (fh_flags & EXPORT_FH_CONNECTABLE &&
40 	    WARN_ON(path->dentry->d_flags & DCACHE_DISCONNECTED))
41 		return -EINVAL;
42 
43 	if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
44 		return -EFAULT;
45 
46 	if (f_handle.handle_bytes > MAX_HANDLE_SZ)
47 		return -EINVAL;
48 
49 	handle = kzalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
50 			 GFP_KERNEL);
51 	if (!handle)
52 		return -ENOMEM;
53 
54 	/* convert handle size to multiple of sizeof(u32) */
55 	handle_dwords = f_handle.handle_bytes >> 2;
56 
57 	/* Encode a possibly decodeable/connectable file handle */
58 	retval = exportfs_encode_fh(path->dentry,
59 				    (struct fid *)handle->f_handle,
60 				    &handle_dwords, fh_flags);
61 	handle->handle_type = retval;
62 	/* convert handle size to bytes */
63 	handle_bytes = handle_dwords * sizeof(u32);
64 	handle->handle_bytes = handle_bytes;
65 	if ((handle->handle_bytes > f_handle.handle_bytes) ||
66 	    (retval == FILEID_INVALID) || (retval < 0)) {
67 		/* As per old exportfs_encode_fh documentation
68 		 * we could return ENOSPC to indicate overflow
69 		 * But file system returned 255 always. So handle
70 		 * both the values
71 		 */
72 		if (retval == FILEID_INVALID || retval == -ENOSPC)
73 			retval = -EOVERFLOW;
74 		/*
75 		 * set the handle size to zero so we copy only
76 		 * non variable part of the file_handle
77 		 */
78 		handle_bytes = 0;
79 	} else {
80 		/*
81 		 * When asked to encode a connectable file handle, encode this
82 		 * property in the file handle itself, so that we later know
83 		 * how to decode it.
84 		 * For sanity, also encode in the file handle if the encoded
85 		 * object is a directory and verify this during decode, because
86 		 * decoding directory file handles is quite different than
87 		 * decoding connectable non-directory file handles.
88 		 */
89 		if (fh_flags & EXPORT_FH_CONNECTABLE) {
90 			handle->handle_type |= FILEID_IS_CONNECTABLE;
91 			if (d_is_dir(path->dentry))
92 				handle->handle_type |= FILEID_IS_DIR;
93 		}
94 		retval = 0;
95 	}
96 	/* copy the mount id */
97 	if (unique_mntid) {
98 		if (put_user(real_mount(path->mnt)->mnt_id_unique,
99 			     (u64 __user *) mnt_id))
100 			retval = -EFAULT;
101 	} else {
102 		if (put_user(real_mount(path->mnt)->mnt_id,
103 			     (int __user *) mnt_id))
104 			retval = -EFAULT;
105 	}
106 	/* copy the handle */
107 	if (retval != -EFAULT &&
108 		copy_to_user(ufh, handle,
109 			     struct_size(handle, f_handle, handle_bytes)))
110 		retval = -EFAULT;
111 	kfree(handle);
112 	return retval;
113 }
114 
115 /**
116  * sys_name_to_handle_at: convert name to handle
117  * @dfd: directory relative to which name is interpreted if not absolute
118  * @name: name that should be converted to handle.
119  * @handle: resulting file handle
120  * @mnt_id: mount id of the file system containing the file
121  *          (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int)
122  * @flag: flag value to indicate whether to follow symlink or not
123  *        and whether a decodable file handle is required.
124  *
125  * @handle->handle_size indicate the space available to store the
126  * variable part of the file handle in bytes. If there is not
127  * enough space, the field is updated to return the minimum
128  * value required.
129  */
130 SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
131 		struct file_handle __user *, handle, void __user *, mnt_id,
132 		int, flag)
133 {
134 	struct path path;
135 	int lookup_flags;
136 	int fh_flags = 0;
137 	int err;
138 
139 	if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
140 		     AT_HANDLE_MNT_ID_UNIQUE | AT_HANDLE_CONNECTABLE))
141 		return -EINVAL;
142 
143 	/*
144 	 * AT_HANDLE_FID means there is no intention to decode file handle
145 	 * AT_HANDLE_CONNECTABLE means there is an intention to decode a
146 	 * connected fd (with known path), so these flags are conflicting.
147 	 * AT_EMPTY_PATH could be used along with a dfd that refers to a
148 	 * disconnected non-directory, which cannot be used to encode a
149 	 * connectable file handle, because its parent is unknown.
150 	 */
151 	if (flag & AT_HANDLE_CONNECTABLE &&
152 	    flag & (AT_HANDLE_FID | AT_EMPTY_PATH))
153 		return -EINVAL;
154 	else if (flag & AT_HANDLE_FID)
155 		fh_flags |= EXPORT_FH_FID;
156 	else if (flag & AT_HANDLE_CONNECTABLE)
157 		fh_flags |= EXPORT_FH_CONNECTABLE;
158 
159 	lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
160 	CLASS(filename_uflags, filename)(name, flag);
161 	err = filename_lookup(dfd, filename, lookup_flags, &path, NULL);
162 	if (!err) {
163 		err = do_sys_name_to_handle(&path, handle, mnt_id,
164 					    flag & AT_HANDLE_MNT_ID_UNIQUE,
165 					    fh_flags);
166 		path_put(&path);
167 	}
168 	return err;
169 }
170 
171 static int get_path_anchor(int fd, struct path *root)
172 {
173 	if (fd >= 0) {
174 		CLASS(fd, f)(fd);
175 		if (fd_empty(f))
176 			return -EBADF;
177 		*root = fd_file(f)->f_path;
178 		path_get(root);
179 		return 0;
180 	}
181 
182 	if (fd == AT_FDCWD) {
183 		get_fs_pwd(current->fs, root);
184 		return 0;
185 	}
186 
187 	if (fd == FD_PIDFS_ROOT) {
188 		pidfs_get_root(root);
189 		return 0;
190 	}
191 
192 	if (fd == FD_NSFS_ROOT) {
193 		nsfs_get_root(root);
194 		return 0;
195 	}
196 
197 	return -EBADF;
198 }
199 
200 static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
201 {
202 	struct handle_to_path_ctx *ctx = context;
203 	struct user_namespace *user_ns = current_user_ns();
204 	struct dentry *d, *root = ctx->root.dentry;
205 	struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt);
206 	int retval = 0;
207 
208 	if (!root)
209 		return 1;
210 
211 	/* Old permission model with global CAP_DAC_READ_SEARCH. */
212 	if (!ctx->flags)
213 		return 1;
214 
215 	/*
216 	 * Verify that the decoded dentry itself has a valid id mapping.
217 	 * In case the decoded dentry is the mountfd root itself, this
218 	 * verifies that the mountfd inode itself has a valid id mapping.
219 	 */
220 	if (!privileged_wrt_inode_uidgid(user_ns, idmap, d_inode(dentry)))
221 		return 0;
222 
223 	/*
224 	 * It's racy as we're not taking rename_lock but we're able to ignore
225 	 * permissions and we just need an approximation whether we were able
226 	 * to follow a path to the file.
227 	 *
228 	 * It's also potentially expensive on some filesystems especially if
229 	 * there is a deep path.
230 	 */
231 	d = dget(dentry);
232 	while (d != root && !IS_ROOT(d)) {
233 		struct dentry *parent = dget_parent(d);
234 
235 		/*
236 		 * We know that we have the ability to override DAC permissions
237 		 * as we've verified this earlier via CAP_DAC_READ_SEARCH. But
238 		 * we also need to make sure that there aren't any unmapped
239 		 * inodes in the path that would prevent us from reaching the
240 		 * file.
241 		 */
242 		if (!privileged_wrt_inode_uidgid(user_ns, idmap,
243 						 d_inode(parent))) {
244 			dput(d);
245 			dput(parent);
246 			return retval;
247 		}
248 
249 		dput(d);
250 		d = parent;
251 	}
252 
253 	if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root)
254 		retval = 1;
255 	/*
256 	 * exportfs_decode_fh_raw() does not call acceptable() callback with
257 	 * a disconnected directory dentry, so we should have reached either
258 	 * mount fd directory or sb root.
259 	 */
260 	if (ctx->fh_flags & EXPORT_FH_DIR_ONLY)
261 		WARN_ON_ONCE(d != root && d != root->d_sb->s_root);
262 	dput(d);
263 	return retval;
264 }
265 
266 static int do_handle_to_path(struct file_handle *handle, struct path *path,
267 			     struct handle_to_path_ctx *ctx)
268 {
269 	int handle_dwords;
270 	struct vfsmount *mnt = ctx->root.mnt;
271 	struct dentry *dentry;
272 
273 	/* change the handle size to multiple of sizeof(u32) */
274 	handle_dwords = handle->handle_bytes >> 2;
275 	dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle,
276 					handle_dwords, handle->handle_type,
277 					ctx->fh_flags, vfs_dentry_acceptable,
278 					ctx);
279 	if (IS_ERR_OR_NULL(dentry)) {
280 		if (dentry == ERR_PTR(-ENOMEM))
281 			return -ENOMEM;
282 		return -ESTALE;
283 	}
284 	path->dentry = dentry;
285 	path->mnt = mntget(mnt);
286 	return 0;
287 }
288 
289 static inline int may_decode_fh(struct handle_to_path_ctx *ctx,
290 				unsigned int o_flags)
291 {
292 	struct path *root = &ctx->root;
293 
294 	if (capable(CAP_DAC_READ_SEARCH))
295 		return 0;
296 
297 	/*
298 	 * Allow relaxed permissions of file handles if the caller has
299 	 * the ability to mount the filesystem or create a bind-mount of
300 	 * the provided @mountdirfd.
301 	 *
302 	 * In both cases the caller may be able to get an unobstructed
303 	 * way to the encoded file handle. If the caller is only able to
304 	 * create a bind-mount we need to verify that there are no
305 	 * locked mounts on top of it that could prevent us from getting
306 	 * to the encoded file.
307 	 *
308 	 * In principle, locked mounts can prevent the caller from
309 	 * mounting the filesystem but that only applies to procfs and
310 	 * sysfs neither of which support decoding file handles.
311 	 *
312 	 * Restrict to O_DIRECTORY to provide a deterministic API that
313 	 * avoids a confusing api in the face of disconnected non-dir
314 	 * dentries.
315 	 *
316 	 * There's only one dentry for each directory inode (VFS rule)...
317 	 */
318 	if (!(o_flags & O_DIRECTORY))
319 		return -EPERM;
320 
321 	if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
322 		ctx->flags = HANDLE_CHECK_PERMS;
323 	else if (is_mounted(root->mnt) &&
324 		 ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
325 			    CAP_SYS_ADMIN) &&
326 		 !has_locked_children(real_mount(root->mnt), root->dentry))
327 		ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
328 	else
329 		return -EPERM;
330 
331 	/* Are we able to override DAC permissions? */
332 	if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
333 		return -EPERM;
334 
335 	ctx->fh_flags = EXPORT_FH_DIR_ONLY;
336 	return 0;
337 }
338 
339 static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
340 		   struct path *path, unsigned int o_flags)
341 {
342 	int retval = 0;
343 	struct file_handle f_handle;
344 	struct file_handle *handle __free(kfree) = NULL;
345 	struct handle_to_path_ctx ctx = {};
346 	const struct export_operations *eops;
347 
348 	if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
349 		return -EFAULT;
350 
351 	if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
352 	    (f_handle.handle_bytes == 0))
353 		return -EINVAL;
354 
355 	if (f_handle.handle_type < 0 ||
356 	    FILEID_USER_FLAGS(f_handle.handle_type) & ~FILEID_VALID_USER_FLAGS)
357 		return -EINVAL;
358 
359 	retval = get_path_anchor(mountdirfd, &ctx.root);
360 	if (retval)
361 		return retval;
362 
363 	eops = ctx.root.mnt->mnt_sb->s_export_op;
364 	if (eops && eops->permission)
365 		retval = eops->permission(&ctx, o_flags);
366 	else
367 		retval = may_decode_fh(&ctx, o_flags);
368 	if (retval)
369 		goto out_path;
370 
371 	handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
372 			 GFP_KERNEL);
373 	if (!handle) {
374 		retval = -ENOMEM;
375 		goto out_path;
376 	}
377 	/* copy the full handle */
378 	*handle = f_handle;
379 	if (copy_from_user(&handle->f_handle,
380 			   &ufh->f_handle,
381 			   f_handle.handle_bytes)) {
382 		retval = -EFAULT;
383 		goto out_path;
384 	}
385 
386 	/*
387 	 * If handle was encoded with AT_HANDLE_CONNECTABLE, verify that we
388 	 * are decoding an fd with connected path, which is accessible from
389 	 * the mount fd path.
390 	 */
391 	if (f_handle.handle_type & FILEID_IS_CONNECTABLE) {
392 		ctx.fh_flags |= EXPORT_FH_CONNECTABLE;
393 		ctx.flags |= HANDLE_CHECK_SUBTREE;
394 	}
395 	if (f_handle.handle_type & FILEID_IS_DIR)
396 		ctx.fh_flags |= EXPORT_FH_DIR_ONLY;
397 	/* Filesystem code should not be exposed to user flags */
398 	handle->handle_type &= ~FILEID_USER_FLAGS_MASK;
399 	retval = do_handle_to_path(handle, path, &ctx);
400 
401 out_path:
402 	path_put(&ctx.root);
403 	return retval;
404 }
405 
406 static struct file *file_open_handle(struct path *path, int open_flag)
407 {
408 	const struct export_operations *eops;
409 
410 	eops = path->mnt->mnt_sb->s_export_op;
411 	if (eops->open)
412 		return eops->open(path, open_flag);
413 
414 	return file_open_root(path, "", open_flag, 0);
415 }
416 
417 static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
418 			   int open_flag)
419 {
420 	long retval;
421 	struct path path __free(path_put) = {};
422 
423 	retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
424 	if (retval)
425 		return retval;
426 
427 	return FD_ADD(open_flag, file_open_handle(&path, open_flag));
428 }
429 
430 /**
431  * sys_open_by_handle_at: Open the file handle
432  * @mountdirfd: directory file descriptor
433  * @handle: file handle to be opened
434  * @flags: open flags.
435  *
436  * @mountdirfd indicate the directory file descriptor
437  * of the mount point. file handle is decoded relative
438  * to the vfsmount pointed by the @mountdirfd. @flags
439  * value is same as the open(2) flags.
440  */
441 SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
442 		struct file_handle __user *, handle,
443 		int, flags)
444 {
445 	long ret;
446 
447 	if (force_o_largefile())
448 		flags |= O_LARGEFILE;
449 
450 	ret = do_handle_open(mountdirfd, handle, flags);
451 	return ret;
452 }
453 
454 #ifdef CONFIG_COMPAT
455 /*
456  * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
457  * doesn't set the O_LARGEFILE flag.
458  */
459 COMPAT_SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
460 			     struct file_handle __user *, handle, int, flags)
461 {
462 	return do_handle_open(mountdirfd, handle, flags);
463 }
464 #endif
465