xref: /linux/fs/ioctl.c (revision 02680c23d7b3febe45ea3d4f9818c2b2dc89020a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ioctl.c
4  *
5  *  Copyright (C) 1991, 1992  Linus Torvalds
6  */
7 
8 #include <linux/syscalls.h>
9 #include <linux/mm.h>
10 #include <linux/capability.h>
11 #include <linux/compat.h>
12 #include <linux/file.h>
13 #include <linux/fs.h>
14 #include <linux/security.h>
15 #include <linux/export.h>
16 #include <linux/uaccess.h>
17 #include <linux/writeback.h>
18 #include <linux/buffer_head.h>
19 #include <linux/falloc.h>
20 #include <linux/sched/signal.h>
21 #include <linux/fiemap.h>
22 #include <linux/mount.h>
23 #include <linux/fscrypt.h>
24 #include <linux/fileattr.h>
25 
26 #include "internal.h"
27 
28 #include <asm/ioctls.h>
29 
30 /* So that the fiemap access checks can't overflow on 32 bit machines. */
31 #define FIEMAP_MAX_EXTENTS	(UINT_MAX / sizeof(struct fiemap_extent))
32 
33 /**
34  * vfs_ioctl - call filesystem specific ioctl methods
35  * @filp:	open file to invoke ioctl method on
36  * @cmd:	ioctl command to execute
37  * @arg:	command-specific argument for ioctl
38  *
39  * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise
40  * returns -ENOTTY.
41  *
42  * Returns 0 on success, -errno on error.
43  */
44 long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
45 {
46 	int error = -ENOTTY;
47 
48 	if (!filp->f_op->unlocked_ioctl)
49 		goto out;
50 
51 	error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
52 	if (error == -ENOIOCTLCMD)
53 		error = -ENOTTY;
54  out:
55 	return error;
56 }
57 EXPORT_SYMBOL(vfs_ioctl);
58 
59 static int ioctl_fibmap(struct file *filp, int __user *p)
60 {
61 	struct inode *inode = file_inode(filp);
62 	struct super_block *sb = inode->i_sb;
63 	int error, ur_block;
64 	sector_t block;
65 
66 	if (!capable(CAP_SYS_RAWIO))
67 		return -EPERM;
68 
69 	error = get_user(ur_block, p);
70 	if (error)
71 		return error;
72 
73 	if (ur_block < 0)
74 		return -EINVAL;
75 
76 	block = ur_block;
77 	error = bmap(inode, &block);
78 
79 	if (block > INT_MAX) {
80 		error = -ERANGE;
81 		pr_warn_ratelimited("[%s/%d] FS: %s File: %pD4 would truncate fibmap result\n",
82 				    current->comm, task_pid_nr(current),
83 				    sb->s_id, filp);
84 	}
85 
86 	if (error)
87 		ur_block = 0;
88 	else
89 		ur_block = block;
90 
91 	if (put_user(ur_block, p))
92 		error = -EFAULT;
93 
94 	return error;
95 }
96 
97 /**
98  * fiemap_fill_next_extent - Fiemap helper function
99  * @fieinfo:	Fiemap context passed into ->fiemap
100  * @logical:	Extent logical start offset, in bytes
101  * @phys:	Extent physical start offset, in bytes
102  * @len:	Extent length, in bytes
103  * @flags:	FIEMAP_EXTENT flags that describe this extent
104  *
105  * Called from file system ->fiemap callback. Will populate extent
106  * info as passed in via arguments and copy to user memory. On
107  * success, extent count on fieinfo is incremented.
108  *
109  * Returns 0 on success, -errno on error, 1 if this was the last
110  * extent that will fit in user array.
111  */
112 #define SET_UNKNOWN_FLAGS	(FIEMAP_EXTENT_DELALLOC)
113 #define SET_NO_UNMOUNTED_IO_FLAGS	(FIEMAP_EXTENT_DATA_ENCRYPTED)
114 #define SET_NOT_ALIGNED_FLAGS	(FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
115 int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
116 			    u64 phys, u64 len, u32 flags)
117 {
118 	struct fiemap_extent extent;
119 	struct fiemap_extent __user *dest = fieinfo->fi_extents_start;
120 
121 	/* only count the extents */
122 	if (fieinfo->fi_extents_max == 0) {
123 		fieinfo->fi_extents_mapped++;
124 		return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
125 	}
126 
127 	if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
128 		return 1;
129 
130 	if (flags & SET_UNKNOWN_FLAGS)
131 		flags |= FIEMAP_EXTENT_UNKNOWN;
132 	if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
133 		flags |= FIEMAP_EXTENT_ENCODED;
134 	if (flags & SET_NOT_ALIGNED_FLAGS)
135 		flags |= FIEMAP_EXTENT_NOT_ALIGNED;
136 
137 	memset(&extent, 0, sizeof(extent));
138 	extent.fe_logical = logical;
139 	extent.fe_physical = phys;
140 	extent.fe_length = len;
141 	extent.fe_flags = flags;
142 
143 	dest += fieinfo->fi_extents_mapped;
144 	if (copy_to_user(dest, &extent, sizeof(extent)))
145 		return -EFAULT;
146 
147 	fieinfo->fi_extents_mapped++;
148 	if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
149 		return 1;
150 	return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
151 }
152 EXPORT_SYMBOL(fiemap_fill_next_extent);
153 
154 /**
155  * fiemap_prep - check validity of requested flags for fiemap
156  * @inode:	Inode to operate on
157  * @fieinfo:	Fiemap context passed into ->fiemap
158  * @start:	Start of the mapped range
159  * @len:	Length of the mapped range, can be truncated by this function.
160  * @supported_flags:	Set of fiemap flags that the file system understands
161  *
162  * This function must be called from each ->fiemap instance to validate the
163  * fiemap request against the file system parameters.
164  *
165  * Returns 0 on success, or a negative error on failure.
166  */
167 int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
168 		u64 start, u64 *len, u32 supported_flags)
169 {
170 	u64 maxbytes = inode->i_sb->s_maxbytes;
171 	u32 incompat_flags;
172 	int ret = 0;
173 
174 	if (*len == 0)
175 		return -EINVAL;
176 	if (start > maxbytes)
177 		return -EFBIG;
178 
179 	/*
180 	 * Shrink request scope to what the fs can actually handle.
181 	 */
182 	if (*len > maxbytes || (maxbytes - *len) < start)
183 		*len = maxbytes - start;
184 
185 	supported_flags |= FIEMAP_FLAG_SYNC;
186 	supported_flags &= FIEMAP_FLAGS_COMPAT;
187 	incompat_flags = fieinfo->fi_flags & ~supported_flags;
188 	if (incompat_flags) {
189 		fieinfo->fi_flags = incompat_flags;
190 		return -EBADR;
191 	}
192 
193 	if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
194 		ret = filemap_write_and_wait(inode->i_mapping);
195 	return ret;
196 }
197 EXPORT_SYMBOL(fiemap_prep);
198 
199 static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap)
200 {
201 	struct fiemap fiemap;
202 	struct fiemap_extent_info fieinfo = { 0, };
203 	struct inode *inode = file_inode(filp);
204 	int error;
205 
206 	if (!inode->i_op->fiemap)
207 		return -EOPNOTSUPP;
208 
209 	if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
210 		return -EFAULT;
211 
212 	if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
213 		return -EINVAL;
214 
215 	fieinfo.fi_flags = fiemap.fm_flags;
216 	fieinfo.fi_extents_max = fiemap.fm_extent_count;
217 	fieinfo.fi_extents_start = ufiemap->fm_extents;
218 
219 	error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start,
220 			fiemap.fm_length);
221 
222 	fiemap.fm_flags = fieinfo.fi_flags;
223 	fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
224 	if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
225 		error = -EFAULT;
226 
227 	return error;
228 }
229 
230 static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
231 			     u64 off, u64 olen, u64 destoff)
232 {
233 	struct fd src_file = fdget(srcfd);
234 	loff_t cloned;
235 	int ret;
236 
237 	if (!src_file.file)
238 		return -EBADF;
239 	ret = -EXDEV;
240 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
241 		goto fdput;
242 	cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
243 				      olen, 0);
244 	if (cloned < 0)
245 		ret = cloned;
246 	else if (olen && cloned != olen)
247 		ret = -EINVAL;
248 	else
249 		ret = 0;
250 fdput:
251 	fdput(src_file);
252 	return ret;
253 }
254 
255 static long ioctl_file_clone_range(struct file *file,
256 				   struct file_clone_range __user *argp)
257 {
258 	struct file_clone_range args;
259 
260 	if (copy_from_user(&args, argp, sizeof(args)))
261 		return -EFAULT;
262 	return ioctl_file_clone(file, args.src_fd, args.src_offset,
263 				args.src_length, args.dest_offset);
264 }
265 
266 #ifdef CONFIG_BLOCK
267 
268 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
269 {
270 	return (offset >> inode->i_blkbits);
271 }
272 
273 static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
274 {
275 	return (blk << inode->i_blkbits);
276 }
277 
278 /**
279  * __generic_block_fiemap - FIEMAP for block based inodes (no locking)
280  * @inode: the inode to map
281  * @fieinfo: the fiemap info struct that will be passed back to userspace
282  * @start: where to start mapping in the inode
283  * @len: how much space to map
284  * @get_block: the fs's get_block function
285  *
286  * This does FIEMAP for block based inodes.  Basically it will just loop
287  * through get_block until we hit the number of extents we want to map, or we
288  * go past the end of the file and hit a hole.
289  *
290  * If it is possible to have data blocks beyond a hole past @inode->i_size, then
291  * please do not use this function, it will stop at the first unmapped block
292  * beyond i_size.
293  *
294  * If you use this function directly, you need to do your own locking. Use
295  * generic_block_fiemap if you want the locking done for you.
296  */
297 static int __generic_block_fiemap(struct inode *inode,
298 			   struct fiemap_extent_info *fieinfo, loff_t start,
299 			   loff_t len, get_block_t *get_block)
300 {
301 	struct buffer_head map_bh;
302 	sector_t start_blk, last_blk;
303 	loff_t isize = i_size_read(inode);
304 	u64 logical = 0, phys = 0, size = 0;
305 	u32 flags = FIEMAP_EXTENT_MERGED;
306 	bool past_eof = false, whole_file = false;
307 	int ret = 0;
308 
309 	ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_SYNC);
310 	if (ret)
311 		return ret;
312 
313 	/*
314 	 * Either the i_mutex or other appropriate locking needs to be held
315 	 * since we expect isize to not change at all through the duration of
316 	 * this call.
317 	 */
318 	if (len >= isize) {
319 		whole_file = true;
320 		len = isize;
321 	}
322 
323 	/*
324 	 * Some filesystems can't deal with being asked to map less than
325 	 * blocksize, so make sure our len is at least block length.
326 	 */
327 	if (logical_to_blk(inode, len) == 0)
328 		len = blk_to_logical(inode, 1);
329 
330 	start_blk = logical_to_blk(inode, start);
331 	last_blk = logical_to_blk(inode, start + len - 1);
332 
333 	do {
334 		/*
335 		 * we set b_size to the total size we want so it will map as
336 		 * many contiguous blocks as possible at once
337 		 */
338 		memset(&map_bh, 0, sizeof(struct buffer_head));
339 		map_bh.b_size = len;
340 
341 		ret = get_block(inode, start_blk, &map_bh, 0);
342 		if (ret)
343 			break;
344 
345 		/* HOLE */
346 		if (!buffer_mapped(&map_bh)) {
347 			start_blk++;
348 
349 			/*
350 			 * We want to handle the case where there is an
351 			 * allocated block at the front of the file, and then
352 			 * nothing but holes up to the end of the file properly,
353 			 * to make sure that extent at the front gets properly
354 			 * marked with FIEMAP_EXTENT_LAST
355 			 */
356 			if (!past_eof &&
357 			    blk_to_logical(inode, start_blk) >= isize)
358 				past_eof = 1;
359 
360 			/*
361 			 * First hole after going past the EOF, this is our
362 			 * last extent
363 			 */
364 			if (past_eof && size) {
365 				flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
366 				ret = fiemap_fill_next_extent(fieinfo, logical,
367 							      phys, size,
368 							      flags);
369 			} else if (size) {
370 				ret = fiemap_fill_next_extent(fieinfo, logical,
371 							      phys, size, flags);
372 				size = 0;
373 			}
374 
375 			/* if we have holes up to/past EOF then we're done */
376 			if (start_blk > last_blk || past_eof || ret)
377 				break;
378 		} else {
379 			/*
380 			 * We have gone over the length of what we wanted to
381 			 * map, and it wasn't the entire file, so add the extent
382 			 * we got last time and exit.
383 			 *
384 			 * This is for the case where say we want to map all the
385 			 * way up to the second to the last block in a file, but
386 			 * the last block is a hole, making the second to last
387 			 * block FIEMAP_EXTENT_LAST.  In this case we want to
388 			 * see if there is a hole after the second to last block
389 			 * so we can mark it properly.  If we found data after
390 			 * we exceeded the length we were requesting, then we
391 			 * are good to go, just add the extent to the fieinfo
392 			 * and break
393 			 */
394 			if (start_blk > last_blk && !whole_file) {
395 				ret = fiemap_fill_next_extent(fieinfo, logical,
396 							      phys, size,
397 							      flags);
398 				break;
399 			}
400 
401 			/*
402 			 * if size != 0 then we know we already have an extent
403 			 * to add, so add it.
404 			 */
405 			if (size) {
406 				ret = fiemap_fill_next_extent(fieinfo, logical,
407 							      phys, size,
408 							      flags);
409 				if (ret)
410 					break;
411 			}
412 
413 			logical = blk_to_logical(inode, start_blk);
414 			phys = blk_to_logical(inode, map_bh.b_blocknr);
415 			size = map_bh.b_size;
416 			flags = FIEMAP_EXTENT_MERGED;
417 
418 			start_blk += logical_to_blk(inode, size);
419 
420 			/*
421 			 * If we are past the EOF, then we need to make sure as
422 			 * soon as we find a hole that the last extent we found
423 			 * is marked with FIEMAP_EXTENT_LAST
424 			 */
425 			if (!past_eof && logical + size >= isize)
426 				past_eof = true;
427 		}
428 		cond_resched();
429 		if (fatal_signal_pending(current)) {
430 			ret = -EINTR;
431 			break;
432 		}
433 
434 	} while (1);
435 
436 	/* If ret is 1 then we just hit the end of the extent array */
437 	if (ret == 1)
438 		ret = 0;
439 
440 	return ret;
441 }
442 
443 /**
444  * generic_block_fiemap - FIEMAP for block based inodes
445  * @inode: The inode to map
446  * @fieinfo: The mapping information
447  * @start: The initial block to map
448  * @len: The length of the extect to attempt to map
449  * @get_block: The block mapping function for the fs
450  *
451  * Calls __generic_block_fiemap to map the inode, after taking
452  * the inode's mutex lock.
453  */
454 
455 int generic_block_fiemap(struct inode *inode,
456 			 struct fiemap_extent_info *fieinfo, u64 start,
457 			 u64 len, get_block_t *get_block)
458 {
459 	int ret;
460 	inode_lock(inode);
461 	ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
462 	inode_unlock(inode);
463 	return ret;
464 }
465 EXPORT_SYMBOL(generic_block_fiemap);
466 
467 #endif  /*  CONFIG_BLOCK  */
468 
469 /*
470  * This provides compatibility with legacy XFS pre-allocation ioctls
471  * which predate the fallocate syscall.
472  *
473  * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
474  * are used here, rest are ignored.
475  */
476 static int ioctl_preallocate(struct file *filp, int mode, void __user *argp)
477 {
478 	struct inode *inode = file_inode(filp);
479 	struct space_resv sr;
480 
481 	if (copy_from_user(&sr, argp, sizeof(sr)))
482 		return -EFAULT;
483 
484 	switch (sr.l_whence) {
485 	case SEEK_SET:
486 		break;
487 	case SEEK_CUR:
488 		sr.l_start += filp->f_pos;
489 		break;
490 	case SEEK_END:
491 		sr.l_start += i_size_read(inode);
492 		break;
493 	default:
494 		return -EINVAL;
495 	}
496 
497 	return vfs_fallocate(filp, mode | FALLOC_FL_KEEP_SIZE, sr.l_start,
498 			sr.l_len);
499 }
500 
501 /* on ia32 l_start is on a 32-bit boundary */
502 #if defined CONFIG_COMPAT && defined(CONFIG_X86_64)
503 /* just account for different alignment */
504 static int compat_ioctl_preallocate(struct file *file, int mode,
505 				    struct space_resv_32 __user *argp)
506 {
507 	struct inode *inode = file_inode(file);
508 	struct space_resv_32 sr;
509 
510 	if (copy_from_user(&sr, argp, sizeof(sr)))
511 		return -EFAULT;
512 
513 	switch (sr.l_whence) {
514 	case SEEK_SET:
515 		break;
516 	case SEEK_CUR:
517 		sr.l_start += file->f_pos;
518 		break;
519 	case SEEK_END:
520 		sr.l_start += i_size_read(inode);
521 		break;
522 	default:
523 		return -EINVAL;
524 	}
525 
526 	return vfs_fallocate(file, mode | FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
527 }
528 #endif
529 
530 static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p)
531 {
532 	switch (cmd) {
533 	case FIBMAP:
534 		return ioctl_fibmap(filp, p);
535 	case FS_IOC_RESVSP:
536 	case FS_IOC_RESVSP64:
537 		return ioctl_preallocate(filp, 0, p);
538 	case FS_IOC_UNRESVSP:
539 	case FS_IOC_UNRESVSP64:
540 		return ioctl_preallocate(filp, FALLOC_FL_PUNCH_HOLE, p);
541 	case FS_IOC_ZERO_RANGE:
542 		return ioctl_preallocate(filp, FALLOC_FL_ZERO_RANGE, p);
543 	}
544 
545 	return -ENOIOCTLCMD;
546 }
547 
548 static int ioctl_fionbio(struct file *filp, int __user *argp)
549 {
550 	unsigned int flag;
551 	int on, error;
552 
553 	error = get_user(on, argp);
554 	if (error)
555 		return error;
556 	flag = O_NONBLOCK;
557 #ifdef __sparc__
558 	/* SunOS compatibility item. */
559 	if (O_NONBLOCK != O_NDELAY)
560 		flag |= O_NDELAY;
561 #endif
562 	spin_lock(&filp->f_lock);
563 	if (on)
564 		filp->f_flags |= flag;
565 	else
566 		filp->f_flags &= ~flag;
567 	spin_unlock(&filp->f_lock);
568 	return error;
569 }
570 
571 static int ioctl_fioasync(unsigned int fd, struct file *filp,
572 			  int __user *argp)
573 {
574 	unsigned int flag;
575 	int on, error;
576 
577 	error = get_user(on, argp);
578 	if (error)
579 		return error;
580 	flag = on ? FASYNC : 0;
581 
582 	/* Did FASYNC state change ? */
583 	if ((flag ^ filp->f_flags) & FASYNC) {
584 		if (filp->f_op->fasync)
585 			/* fasync() adjusts filp->f_flags */
586 			error = filp->f_op->fasync(fd, filp, on);
587 		else
588 			error = -ENOTTY;
589 	}
590 	return error < 0 ? error : 0;
591 }
592 
593 static int ioctl_fsfreeze(struct file *filp)
594 {
595 	struct super_block *sb = file_inode(filp)->i_sb;
596 
597 	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
598 		return -EPERM;
599 
600 	/* If filesystem doesn't support freeze feature, return. */
601 	if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL)
602 		return -EOPNOTSUPP;
603 
604 	/* Freeze */
605 	if (sb->s_op->freeze_super)
606 		return sb->s_op->freeze_super(sb);
607 	return freeze_super(sb);
608 }
609 
610 static int ioctl_fsthaw(struct file *filp)
611 {
612 	struct super_block *sb = file_inode(filp)->i_sb;
613 
614 	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
615 		return -EPERM;
616 
617 	/* Thaw */
618 	if (sb->s_op->thaw_super)
619 		return sb->s_op->thaw_super(sb);
620 	return thaw_super(sb);
621 }
622 
623 static int ioctl_file_dedupe_range(struct file *file,
624 				   struct file_dedupe_range __user *argp)
625 {
626 	struct file_dedupe_range *same = NULL;
627 	int ret;
628 	unsigned long size;
629 	u16 count;
630 
631 	if (get_user(count, &argp->dest_count)) {
632 		ret = -EFAULT;
633 		goto out;
634 	}
635 
636 	size = offsetof(struct file_dedupe_range __user, info[count]);
637 	if (size > PAGE_SIZE) {
638 		ret = -ENOMEM;
639 		goto out;
640 	}
641 
642 	same = memdup_user(argp, size);
643 	if (IS_ERR(same)) {
644 		ret = PTR_ERR(same);
645 		same = NULL;
646 		goto out;
647 	}
648 
649 	same->dest_count = count;
650 	ret = vfs_dedupe_file_range(file, same);
651 	if (ret)
652 		goto out;
653 
654 	ret = copy_to_user(argp, same, size);
655 	if (ret)
656 		ret = -EFAULT;
657 
658 out:
659 	kfree(same);
660 	return ret;
661 }
662 
663 /**
664  * fileattr_fill_xflags - initialize fileattr with xflags
665  * @fa:		fileattr pointer
666  * @xflags:	FS_XFLAG_* flags
667  *
668  * Set ->fsx_xflags, ->fsx_valid and ->flags (translated xflags).  All
669  * other fields are zeroed.
670  */
671 void fileattr_fill_xflags(struct fileattr *fa, u32 xflags)
672 {
673 	memset(fa, 0, sizeof(*fa));
674 	fa->fsx_valid = true;
675 	fa->fsx_xflags = xflags;
676 	if (fa->fsx_xflags & FS_XFLAG_IMMUTABLE)
677 		fa->flags |= FS_IMMUTABLE_FL;
678 	if (fa->fsx_xflags & FS_XFLAG_APPEND)
679 		fa->flags |= FS_APPEND_FL;
680 	if (fa->fsx_xflags & FS_XFLAG_SYNC)
681 		fa->flags |= FS_SYNC_FL;
682 	if (fa->fsx_xflags & FS_XFLAG_NOATIME)
683 		fa->flags |= FS_NOATIME_FL;
684 	if (fa->fsx_xflags & FS_XFLAG_NODUMP)
685 		fa->flags |= FS_NODUMP_FL;
686 	if (fa->fsx_xflags & FS_XFLAG_DAX)
687 		fa->flags |= FS_DAX_FL;
688 	if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
689 		fa->flags |= FS_PROJINHERIT_FL;
690 }
691 EXPORT_SYMBOL(fileattr_fill_xflags);
692 
693 /**
694  * fileattr_fill_flags - initialize fileattr with flags
695  * @fa:		fileattr pointer
696  * @flags:	FS_*_FL flags
697  *
698  * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
699  * All other fields are zeroed.
700  */
701 void fileattr_fill_flags(struct fileattr *fa, u32 flags)
702 {
703 	memset(fa, 0, sizeof(*fa));
704 	fa->flags_valid = true;
705 	fa->flags = flags;
706 	if (fa->flags & FS_SYNC_FL)
707 		fa->fsx_xflags |= FS_XFLAG_SYNC;
708 	if (fa->flags & FS_IMMUTABLE_FL)
709 		fa->fsx_xflags |= FS_XFLAG_IMMUTABLE;
710 	if (fa->flags & FS_APPEND_FL)
711 		fa->fsx_xflags |= FS_XFLAG_APPEND;
712 	if (fa->flags & FS_NODUMP_FL)
713 		fa->fsx_xflags |= FS_XFLAG_NODUMP;
714 	if (fa->flags & FS_NOATIME_FL)
715 		fa->fsx_xflags |= FS_XFLAG_NOATIME;
716 	if (fa->flags & FS_DAX_FL)
717 		fa->fsx_xflags |= FS_XFLAG_DAX;
718 	if (fa->flags & FS_PROJINHERIT_FL)
719 		fa->fsx_xflags |= FS_XFLAG_PROJINHERIT;
720 }
721 EXPORT_SYMBOL(fileattr_fill_flags);
722 
723 /**
724  * vfs_fileattr_get - retrieve miscellaneous file attributes
725  * @dentry:	the object to retrieve from
726  * @fa:		fileattr pointer
727  *
728  * Call i_op->fileattr_get() callback, if exists.
729  *
730  * Return: 0 on success, or a negative error on failure.
731  */
732 int vfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
733 {
734 	struct inode *inode = d_inode(dentry);
735 
736 	if (!inode->i_op->fileattr_get)
737 		return -ENOIOCTLCMD;
738 
739 	return inode->i_op->fileattr_get(dentry, fa);
740 }
741 EXPORT_SYMBOL(vfs_fileattr_get);
742 
743 /**
744  * copy_fsxattr_to_user - copy fsxattr to userspace.
745  * @fa:		fileattr pointer
746  * @ufa:	fsxattr user pointer
747  *
748  * Return: 0 on success, or -EFAULT on failure.
749  */
750 int copy_fsxattr_to_user(const struct fileattr *fa, struct fsxattr __user *ufa)
751 {
752 	struct fsxattr xfa;
753 
754 	memset(&xfa, 0, sizeof(xfa));
755 	xfa.fsx_xflags = fa->fsx_xflags;
756 	xfa.fsx_extsize = fa->fsx_extsize;
757 	xfa.fsx_nextents = fa->fsx_nextents;
758 	xfa.fsx_projid = fa->fsx_projid;
759 	xfa.fsx_cowextsize = fa->fsx_cowextsize;
760 
761 	if (copy_to_user(ufa, &xfa, sizeof(xfa)))
762 		return -EFAULT;
763 
764 	return 0;
765 }
766 EXPORT_SYMBOL(copy_fsxattr_to_user);
767 
768 static int copy_fsxattr_from_user(struct fileattr *fa,
769 				  struct fsxattr __user *ufa)
770 {
771 	struct fsxattr xfa;
772 
773 	if (copy_from_user(&xfa, ufa, sizeof(xfa)))
774 		return -EFAULT;
775 
776 	fileattr_fill_xflags(fa, xfa.fsx_xflags);
777 	fa->fsx_extsize = xfa.fsx_extsize;
778 	fa->fsx_nextents = xfa.fsx_nextents;
779 	fa->fsx_projid = xfa.fsx_projid;
780 	fa->fsx_cowextsize = xfa.fsx_cowextsize;
781 
782 	return 0;
783 }
784 
785 /*
786  * Generic function to check FS_IOC_FSSETXATTR/FS_IOC_SETFLAGS values and reject
787  * any invalid configurations.
788  *
789  * Note: must be called with inode lock held.
790  */
791 static int fileattr_set_prepare(struct inode *inode,
792 			      const struct fileattr *old_ma,
793 			      struct fileattr *fa)
794 {
795 	int err;
796 
797 	/*
798 	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
799 	 * the relevant capability.
800 	 */
801 	if ((fa->flags ^ old_ma->flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
802 	    !capable(CAP_LINUX_IMMUTABLE))
803 		return -EPERM;
804 
805 	err = fscrypt_prepare_setflags(inode, old_ma->flags, fa->flags);
806 	if (err)
807 		return err;
808 
809 	/*
810 	 * Project Quota ID state is only allowed to change from within the init
811 	 * namespace. Enforce that restriction only if we are trying to change
812 	 * the quota ID state. Everything else is allowed in user namespaces.
813 	 */
814 	if (current_user_ns() != &init_user_ns) {
815 		if (old_ma->fsx_projid != fa->fsx_projid)
816 			return -EINVAL;
817 		if ((old_ma->fsx_xflags ^ fa->fsx_xflags) &
818 				FS_XFLAG_PROJINHERIT)
819 			return -EINVAL;
820 	}
821 
822 	/* Check extent size hints. */
823 	if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
824 		return -EINVAL;
825 
826 	if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
827 			!S_ISDIR(inode->i_mode))
828 		return -EINVAL;
829 
830 	if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
831 	    !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
832 		return -EINVAL;
833 
834 	/*
835 	 * It is only valid to set the DAX flag on regular files and
836 	 * directories on filesystems.
837 	 */
838 	if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
839 	    !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
840 		return -EINVAL;
841 
842 	/* Extent size hints of zero turn off the flags. */
843 	if (fa->fsx_extsize == 0)
844 		fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
845 	if (fa->fsx_cowextsize == 0)
846 		fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
847 
848 	return 0;
849 }
850 
851 /**
852  * vfs_fileattr_set - change miscellaneous file attributes
853  * @mnt_userns:	user namespace of the mount
854  * @dentry:	the object to change
855  * @fa:		fileattr pointer
856  *
857  * After verifying permissions, call i_op->fileattr_set() callback, if
858  * exists.
859  *
860  * Verifying attributes involves retrieving current attributes with
861  * i_op->fileattr_get(), this also allows initializing attributes that have
862  * not been set by the caller to current values.  Inode lock is held
863  * thoughout to prevent racing with another instance.
864  *
865  * Return: 0 on success, or a negative error on failure.
866  */
867 int vfs_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
868 		     struct fileattr *fa)
869 {
870 	struct inode *inode = d_inode(dentry);
871 	struct fileattr old_ma = {};
872 	int err;
873 
874 	if (!inode->i_op->fileattr_set)
875 		return -ENOIOCTLCMD;
876 
877 	if (!inode_owner_or_capable(mnt_userns, inode))
878 		return -EPERM;
879 
880 	inode_lock(inode);
881 	err = vfs_fileattr_get(dentry, &old_ma);
882 	if (!err) {
883 		/* initialize missing bits from old_ma */
884 		if (fa->flags_valid) {
885 			fa->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON;
886 			fa->fsx_extsize = old_ma.fsx_extsize;
887 			fa->fsx_nextents = old_ma.fsx_nextents;
888 			fa->fsx_projid = old_ma.fsx_projid;
889 			fa->fsx_cowextsize = old_ma.fsx_cowextsize;
890 		} else {
891 			fa->flags |= old_ma.flags & ~FS_COMMON_FL;
892 		}
893 		err = fileattr_set_prepare(inode, &old_ma, fa);
894 		if (!err)
895 			err = inode->i_op->fileattr_set(mnt_userns, dentry, fa);
896 	}
897 	inode_unlock(inode);
898 
899 	return err;
900 }
901 EXPORT_SYMBOL(vfs_fileattr_set);
902 
903 static int ioctl_getflags(struct file *file, unsigned int __user *argp)
904 {
905 	struct fileattr fa = { .flags_valid = true }; /* hint only */
906 	int err;
907 
908 	err = vfs_fileattr_get(file->f_path.dentry, &fa);
909 	if (!err)
910 		err = put_user(fa.flags, argp);
911 	return err;
912 }
913 
914 static int ioctl_setflags(struct file *file, unsigned int __user *argp)
915 {
916 	struct user_namespace *mnt_userns = file_mnt_user_ns(file);
917 	struct dentry *dentry = file->f_path.dentry;
918 	struct fileattr fa;
919 	unsigned int flags;
920 	int err;
921 
922 	err = get_user(flags, argp);
923 	if (!err) {
924 		err = mnt_want_write_file(file);
925 		if (!err) {
926 			fileattr_fill_flags(&fa, flags);
927 			err = vfs_fileattr_set(mnt_userns, dentry, &fa);
928 			mnt_drop_write_file(file);
929 		}
930 	}
931 	return err;
932 }
933 
934 static int ioctl_fsgetxattr(struct file *file, void __user *argp)
935 {
936 	struct fileattr fa = { .fsx_valid = true }; /* hint only */
937 	int err;
938 
939 	err = vfs_fileattr_get(file->f_path.dentry, &fa);
940 	if (!err)
941 		err = copy_fsxattr_to_user(&fa, argp);
942 
943 	return err;
944 }
945 
946 static int ioctl_fssetxattr(struct file *file, void __user *argp)
947 {
948 	struct user_namespace *mnt_userns = file_mnt_user_ns(file);
949 	struct dentry *dentry = file->f_path.dentry;
950 	struct fileattr fa;
951 	int err;
952 
953 	err = copy_fsxattr_from_user(&fa, argp);
954 	if (!err) {
955 		err = mnt_want_write_file(file);
956 		if (!err) {
957 			err = vfs_fileattr_set(mnt_userns, dentry, &fa);
958 			mnt_drop_write_file(file);
959 		}
960 	}
961 	return err;
962 }
963 
964 /*
965  * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
966  * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
967  *
968  * When you add any new common ioctls to the switches above and below,
969  * please ensure they have compatible arguments in compat mode.
970  */
971 static int do_vfs_ioctl(struct file *filp, unsigned int fd,
972 			unsigned int cmd, unsigned long arg)
973 {
974 	void __user *argp = (void __user *)arg;
975 	struct inode *inode = file_inode(filp);
976 
977 	switch (cmd) {
978 	case FIOCLEX:
979 		set_close_on_exec(fd, 1);
980 		return 0;
981 
982 	case FIONCLEX:
983 		set_close_on_exec(fd, 0);
984 		return 0;
985 
986 	case FIONBIO:
987 		return ioctl_fionbio(filp, argp);
988 
989 	case FIOASYNC:
990 		return ioctl_fioasync(fd, filp, argp);
991 
992 	case FIOQSIZE:
993 		if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
994 		    S_ISLNK(inode->i_mode)) {
995 			loff_t res = inode_get_bytes(inode);
996 			return copy_to_user(argp, &res, sizeof(res)) ?
997 					    -EFAULT : 0;
998 		}
999 
1000 		return -ENOTTY;
1001 
1002 	case FIFREEZE:
1003 		return ioctl_fsfreeze(filp);
1004 
1005 	case FITHAW:
1006 		return ioctl_fsthaw(filp);
1007 
1008 	case FS_IOC_FIEMAP:
1009 		return ioctl_fiemap(filp, argp);
1010 
1011 	case FIGETBSZ:
1012 		/* anon_bdev filesystems may not have a block size */
1013 		if (!inode->i_sb->s_blocksize)
1014 			return -EINVAL;
1015 
1016 		return put_user(inode->i_sb->s_blocksize, (int __user *)argp);
1017 
1018 	case FICLONE:
1019 		return ioctl_file_clone(filp, arg, 0, 0, 0);
1020 
1021 	case FICLONERANGE:
1022 		return ioctl_file_clone_range(filp, argp);
1023 
1024 	case FIDEDUPERANGE:
1025 		return ioctl_file_dedupe_range(filp, argp);
1026 
1027 	case FIONREAD:
1028 		if (!S_ISREG(inode->i_mode))
1029 			return vfs_ioctl(filp, cmd, arg);
1030 
1031 		return put_user(i_size_read(inode) - filp->f_pos,
1032 				(int __user *)argp);
1033 
1034 	case FS_IOC_GETFLAGS:
1035 		return ioctl_getflags(filp, argp);
1036 
1037 	case FS_IOC_SETFLAGS:
1038 		return ioctl_setflags(filp, argp);
1039 
1040 	case FS_IOC_FSGETXATTR:
1041 		return ioctl_fsgetxattr(filp, argp);
1042 
1043 	case FS_IOC_FSSETXATTR:
1044 		return ioctl_fssetxattr(filp, argp);
1045 
1046 	default:
1047 		if (S_ISREG(inode->i_mode))
1048 			return file_ioctl(filp, cmd, argp);
1049 		break;
1050 	}
1051 
1052 	return -ENOIOCTLCMD;
1053 }
1054 
1055 SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
1056 {
1057 	struct fd f = fdget(fd);
1058 	int error;
1059 
1060 	if (!f.file)
1061 		return -EBADF;
1062 
1063 	error = security_file_ioctl(f.file, cmd, arg);
1064 	if (error)
1065 		goto out;
1066 
1067 	error = do_vfs_ioctl(f.file, fd, cmd, arg);
1068 	if (error == -ENOIOCTLCMD)
1069 		error = vfs_ioctl(f.file, cmd, arg);
1070 
1071 out:
1072 	fdput(f);
1073 	return error;
1074 }
1075 
1076 #ifdef CONFIG_COMPAT
1077 /**
1078  * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
1079  *
1080  * This is not normally called as a function, but instead set in struct
1081  * file_operations as
1082  *
1083  *     .compat_ioctl = compat_ptr_ioctl,
1084  *
1085  * On most architectures, the compat_ptr_ioctl() just passes all arguments
1086  * to the corresponding ->ioctl handler. The exception is arch/s390, where
1087  * compat_ptr() clears the top bit of a 32-bit pointer value, so user space
1088  * pointers to the second 2GB alias the first 2GB, as is the case for
1089  * native 32-bit s390 user space.
1090  *
1091  * The compat_ptr_ioctl() function must therefore be used only with ioctl
1092  * functions that either ignore the argument or pass a pointer to a
1093  * compatible data type.
1094  *
1095  * If any ioctl command handled by fops->unlocked_ioctl passes a plain
1096  * integer instead of a pointer, or any of the passed data types
1097  * is incompatible between 32-bit and 64-bit architectures, a proper
1098  * handler is required instead of compat_ptr_ioctl.
1099  */
1100 long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1101 {
1102 	if (!file->f_op->unlocked_ioctl)
1103 		return -ENOIOCTLCMD;
1104 
1105 	return file->f_op->unlocked_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
1106 }
1107 EXPORT_SYMBOL(compat_ptr_ioctl);
1108 
1109 COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
1110 		       compat_ulong_t, arg)
1111 {
1112 	struct fd f = fdget(fd);
1113 	int error;
1114 
1115 	if (!f.file)
1116 		return -EBADF;
1117 
1118 	/* RED-PEN how should LSM module know it's handling 32bit? */
1119 	error = security_file_ioctl(f.file, cmd, arg);
1120 	if (error)
1121 		goto out;
1122 
1123 	switch (cmd) {
1124 	/* FICLONE takes an int argument, so don't use compat_ptr() */
1125 	case FICLONE:
1126 		error = ioctl_file_clone(f.file, arg, 0, 0, 0);
1127 		break;
1128 
1129 #if defined(CONFIG_X86_64)
1130 	/* these get messy on amd64 due to alignment differences */
1131 	case FS_IOC_RESVSP_32:
1132 	case FS_IOC_RESVSP64_32:
1133 		error = compat_ioctl_preallocate(f.file, 0, compat_ptr(arg));
1134 		break;
1135 	case FS_IOC_UNRESVSP_32:
1136 	case FS_IOC_UNRESVSP64_32:
1137 		error = compat_ioctl_preallocate(f.file, FALLOC_FL_PUNCH_HOLE,
1138 				compat_ptr(arg));
1139 		break;
1140 	case FS_IOC_ZERO_RANGE_32:
1141 		error = compat_ioctl_preallocate(f.file, FALLOC_FL_ZERO_RANGE,
1142 				compat_ptr(arg));
1143 		break;
1144 #endif
1145 
1146 	/*
1147 	 * These access 32-bit values anyway so no further handling is
1148 	 * necessary.
1149 	 */
1150 	case FS_IOC32_GETFLAGS:
1151 	case FS_IOC32_SETFLAGS:
1152 		cmd = (cmd == FS_IOC32_GETFLAGS) ?
1153 			FS_IOC_GETFLAGS : FS_IOC_SETFLAGS;
1154 		fallthrough;
1155 	/*
1156 	 * everything else in do_vfs_ioctl() takes either a compatible
1157 	 * pointer argument or no argument -- call it with a modified
1158 	 * argument.
1159 	 */
1160 	default:
1161 		error = do_vfs_ioctl(f.file, fd, cmd,
1162 				     (unsigned long)compat_ptr(arg));
1163 		if (error != -ENOIOCTLCMD)
1164 			break;
1165 
1166 		if (f.file->f_op->compat_ioctl)
1167 			error = f.file->f_op->compat_ioctl(f.file, cmd, arg);
1168 		if (error == -ENOIOCTLCMD)
1169 			error = -ENOTTY;
1170 		break;
1171 	}
1172 
1173  out:
1174 	fdput(f);
1175 
1176 	return error;
1177 }
1178 #endif
1179