xref: /linux/fs/exfat/file.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
4  */
5 
6 #include <linux/slab.h>
7 #include <linux/compat.h>
8 #include <linux/cred.h>
9 #include <linux/buffer_head.h>
10 #include <linux/blkdev.h>
11 #include <linux/fsnotify.h>
12 #include <linux/security.h>
13 #include <linux/msdos_fs.h>
14 #include <linux/writeback.h>
15 #include <linux/filelock.h>
16 #include <linux/falloc.h>
17 #include <linux/fileattr.h>
18 #include <linux/iomap.h>
19 
20 #include "exfat_raw.h"
21 #include "exfat_fs.h"
22 #include "iomap.h"
23 
24 static int exfat_cont_expand(struct inode *inode, loff_t size)
25 {
26 	int ret;
27 	unsigned int num_clusters, new_num_clusters, last_clu;
28 	struct exfat_inode_info *ei = EXFAT_I(inode);
29 	struct super_block *sb = inode->i_sb;
30 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
31 	struct exfat_chain clu;
32 	loff_t oldsize = i_size_read(inode);
33 
34 	truncate_pagecache(inode, oldsize);
35 
36 	ret = inode_newsize_ok(inode, size);
37 	if (ret)
38 		return ret;
39 
40 	num_clusters = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
41 	/* integer overflow is already checked in inode_newsize_ok(). */
42 	new_num_clusters = exfat_bytes_to_cluster_round_up(sbi, size);
43 
44 	if (new_num_clusters == num_clusters)
45 		goto out;
46 
47 	if (num_clusters) {
48 		exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags);
49 		ret = exfat_find_last_cluster(sb, &clu, &last_clu);
50 		if (ret)
51 			return ret;
52 
53 		clu.dir = last_clu + 1;
54 	} else {
55 		last_clu = EXFAT_EOF_CLUSTER;
56 		clu.dir = EXFAT_EOF_CLUSTER;
57 	}
58 
59 	clu.size = 0;
60 	clu.flags = ei->flags;
61 
62 	ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters,
63 			&clu, inode_needs_sync(inode), false);
64 	if (ret)
65 		return ret;
66 
67 	/* Append new clusters to chain */
68 	if (num_clusters) {
69 		if (clu.flags != ei->flags)
70 			if (exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters))
71 				goto free_clu;
72 
73 		if (clu.flags == ALLOC_FAT_CHAIN)
74 			if (exfat_ent_set(sb, last_clu, clu.dir))
75 				goto free_clu;
76 	} else
77 		ei->start_clu = clu.dir;
78 
79 	ei->flags = clu.flags;
80 
81 out:
82 	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
83 	/* Expanded range not zeroed, do not update valid_size */
84 	i_size_write(inode, size);
85 	/*
86 	 * When extending file size, call truncate_pagecache() first,
87 	 * then update i_size, and call pagecache_isize_extended()
88 	 * to ensures the straddling folio is properly marked RO so
89 	 * page_mkwrite() is called and post-EOF area is zeroed.
90 	 */
91 	pagecache_isize_extended(inode, oldsize, inode->i_size);
92 
93 	inode->i_blocks = round_up(size, sbi->cluster_size) >> 9;
94 	mark_inode_dirty(inode);
95 
96 	if (IS_SYNC(inode))
97 		return write_inode_now(inode, 1);
98 
99 	return 0;
100 
101 free_clu:
102 	exfat_free_cluster(inode, &clu);
103 	return -EIO;
104 }
105 
106 /*
107  * Preallocate space for a file. This implements exfat's fallocate file
108  * operation, which gets called from sys_fallocate system call. User space
109  * requests len bytes at offset. In contrary to fat, we only support
110  * FALLOC_FL_ALLOCATE_RANGE because by leaving the valid data length(VDL)
111  * field, it is unnecessary to zero out the newly allocated clusters.
112  */
113 static long exfat_fallocate(struct file *file, int mode,
114 			  loff_t offset, loff_t len)
115 {
116 	struct inode *inode = file->f_mapping->host;
117 	loff_t newsize = offset + len;
118 	int err = 0;
119 
120 	/* No support for other modes */
121 	if (mode != FALLOC_FL_ALLOCATE_RANGE)
122 		return -EOPNOTSUPP;
123 
124 	/* No support for dir */
125 	if (!S_ISREG(inode->i_mode))
126 		return -EOPNOTSUPP;
127 
128 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
129 		return -EIO;
130 
131 	inode_lock(inode);
132 
133 	if (newsize <= i_size_read(inode))
134 		goto error;
135 
136 	/* This is just an expanding truncate */
137 	err = exfat_cont_expand(inode, newsize);
138 
139 error:
140 	inode_unlock(inode);
141 
142 	return err;
143 }
144 
145 static bool exfat_allow_set_time(struct mnt_idmap *idmap,
146 				 struct exfat_sb_info *sbi, struct inode *inode)
147 {
148 	mode_t allow_utime = sbi->options.allow_utime;
149 
150 	if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode),
151 			    current_fsuid())) {
152 		if (vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)))
153 			allow_utime >>= 3;
154 		if (allow_utime & MAY_WRITE)
155 			return true;
156 	}
157 
158 	/* use a default check */
159 	return false;
160 }
161 
162 static int exfat_sanitize_mode(const struct exfat_sb_info *sbi,
163 		struct inode *inode, umode_t *mode_ptr)
164 {
165 	mode_t i_mode, mask, perm;
166 
167 	i_mode = inode->i_mode;
168 
169 	mask = (S_ISREG(i_mode) || S_ISLNK(i_mode)) ?
170 		sbi->options.fs_fmask : sbi->options.fs_dmask;
171 	perm = *mode_ptr & ~(S_IFMT | mask);
172 
173 	/* Of the r and x bits, all (subject to umask) must be present.*/
174 	if ((perm & 0555) != (i_mode & 0555))
175 		return -EPERM;
176 
177 	if (exfat_mode_can_hold_ro(inode)) {
178 		/*
179 		 * Of the w bits, either all (subject to umask) or none must
180 		 * be present.
181 		 */
182 		if ((perm & 0222) && ((perm & 0222) != (0222 & ~mask)))
183 			return -EPERM;
184 	} else {
185 		/*
186 		 * If exfat_mode_can_hold_ro(inode) is false, can't change
187 		 * w bits.
188 		 */
189 		if ((perm & 0222) != (0222 & ~mask))
190 			return -EPERM;
191 	}
192 
193 	*mode_ptr &= S_IFMT | perm;
194 
195 	return 0;
196 }
197 
198 /* resize the file length */
199 int __exfat_truncate(struct inode *inode)
200 {
201 	unsigned int num_clusters_new, num_clusters_phys;
202 	unsigned int last_clu = EXFAT_FREE_CLUSTER;
203 	struct exfat_chain clu;
204 	struct super_block *sb = inode->i_sb;
205 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
206 	struct exfat_inode_info *ei = EXFAT_I(inode);
207 
208 	/* check if the given file ID is opened */
209 	if (ei->type != TYPE_FILE && ei->type != TYPE_DIR)
210 		return -EPERM;
211 
212 	exfat_set_volume_dirty(sb);
213 
214 	num_clusters_new = exfat_bytes_to_cluster_round_up(sbi, i_size_read(inode));
215 	num_clusters_phys = exfat_bytes_to_cluster(sbi, exfat_ondisk_size(inode));
216 
217 	exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
218 
219 	if (i_size_read(inode) > 0) {
220 		/*
221 		 * Truncate FAT chain num_clusters after the first cluster
222 		 * num_clusters = min(new, phys);
223 		 */
224 		unsigned int num_clusters =
225 			min(num_clusters_new, num_clusters_phys);
226 
227 		/*
228 		 * Follow FAT chain
229 		 * (defensive coding - works fine even with corrupted FAT table
230 		 */
231 		if (clu.flags == ALLOC_NO_FAT_CHAIN) {
232 			clu.dir += num_clusters;
233 			clu.size -= num_clusters;
234 		} else {
235 			while (num_clusters > 0) {
236 				last_clu = clu.dir;
237 				if (exfat_get_next_cluster(sb, &(clu.dir)))
238 					return -EIO;
239 
240 				num_clusters--;
241 				clu.size--;
242 			}
243 		}
244 	} else {
245 		ei->flags = ALLOC_NO_FAT_CHAIN;
246 		ei->start_clu = EXFAT_EOF_CLUSTER;
247 	}
248 
249 	if (i_size_read(inode) < ei->valid_size)
250 		ei->valid_size = ei->zeroed_size = i_size_read(inode);
251 
252 	if (ei->type == TYPE_FILE)
253 		ei->attr |= EXFAT_ATTR_ARCHIVE;
254 
255 	/*
256 	 * update the directory entry
257 	 *
258 	 * If the directory entry is updated by mark_inode_dirty(), the
259 	 * directory entry will be written after a writeback cycle of
260 	 * updating the bitmap/FAT, which may result in clusters being
261 	 * freed but referenced by the directory entry in the event of a
262 	 * sudden power failure.
263 	 * __exfat_write_inode() is called for directory entry, bitmap
264 	 * and FAT to be written in a same writeback.
265 	 */
266 	if (__exfat_write_inode(inode, inode_needs_sync(inode)))
267 		return -EIO;
268 
269 	/* cut off from the FAT chain */
270 	if (ei->flags == ALLOC_FAT_CHAIN && last_clu != EXFAT_FREE_CLUSTER &&
271 			last_clu != EXFAT_EOF_CLUSTER) {
272 		if (exfat_ent_set(sb, last_clu, EXFAT_EOF_CLUSTER))
273 			return -EIO;
274 	}
275 
276 	/* invalidate cache and free the clusters */
277 	/* clear exfat cache */
278 	exfat_cache_inval_inode(inode);
279 
280 	/* hint information */
281 	ei->hint_bmap.off = EXFAT_EOF_CLUSTER;
282 	ei->hint_bmap.clu = EXFAT_EOF_CLUSTER;
283 
284 	/* hint_stat will be used if this is directory. */
285 	ei->hint_stat.eidx = 0;
286 	ei->hint_stat.clu = ei->start_clu;
287 	ei->hint_femp.eidx = EXFAT_HINT_NONE;
288 
289 	/* free the clusters */
290 	if (exfat_free_cluster(inode, &clu))
291 		return -EIO;
292 
293 	return 0;
294 }
295 
296 static void exfat_truncate(struct inode *inode)
297 {
298 	struct super_block *sb = inode->i_sb;
299 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
300 	struct exfat_inode_info *ei = EXFAT_I(inode);
301 	int err;
302 
303 	mutex_lock(&sbi->s_lock);
304 	if (ei->start_clu == 0) {
305 		/*
306 		 * Empty start_clu != ~0 (not allocated)
307 		 */
308 		exfat_fs_error(sb, "tried to truncate zeroed cluster.");
309 		goto write_size;
310 	}
311 
312 	err = __exfat_truncate(inode);
313 	if (err)
314 		goto write_size;
315 
316 	inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
317 write_size:
318 	mutex_unlock(&sbi->s_lock);
319 }
320 
321 int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
322 		  struct kstat *stat, unsigned int request_mask,
323 		  unsigned int query_flags)
324 {
325 	struct inode *inode = d_backing_inode(path->dentry);
326 	struct exfat_inode_info *ei = EXFAT_I(inode);
327 
328 	generic_fillattr(idmap, request_mask, inode, stat);
329 	exfat_truncate_atime(&stat->atime);
330 	stat->result_mask |= STATX_BTIME;
331 	stat->btime.tv_sec = ei->i_crtime.tv_sec;
332 	stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
333 	stat->blksize = EXFAT_SB(inode->i_sb)->cluster_size;
334 	return 0;
335 }
336 
337 int exfat_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
338 {
339 	/*
340 	 * exFAT compares filenames through an upcase table, so lookup
341 	 * is always case-insensitive. Long names are stored in UTF-16
342 	 * with case intact; CASENONPRESERVING stays clear.
343 	 */
344 	fa->fsx_xflags |= FS_XFLAG_CASEFOLD;
345 	fa->flags |= FS_CASEFOLD_FL;
346 	return 0;
347 }
348 
349 int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
350 		  struct iattr *attr)
351 {
352 	struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb);
353 	struct inode *inode = dentry->d_inode;
354 	unsigned int ia_valid;
355 	int error;
356 
357 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
358 		return -EIO;
359 
360 	if ((attr->ia_valid & ATTR_SIZE) &&
361 	    attr->ia_size > i_size_read(inode)) {
362 		error = exfat_cont_expand(inode, attr->ia_size);
363 		if (error || attr->ia_valid == ATTR_SIZE)
364 			return error;
365 		attr->ia_valid &= ~ATTR_SIZE;
366 	}
367 
368 	/* Check for setting the inode time. */
369 	ia_valid = attr->ia_valid;
370 	if ((ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) &&
371 	    exfat_allow_set_time(idmap, sbi, inode)) {
372 		attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET |
373 				ATTR_TIMES_SET);
374 	}
375 
376 	error = setattr_prepare(idmap, dentry, attr);
377 	attr->ia_valid = ia_valid;
378 	if (error)
379 		goto out;
380 
381 	if (((attr->ia_valid & ATTR_UID) &&
382 	      (!uid_eq(from_vfsuid(idmap, i_user_ns(inode), attr->ia_vfsuid),
383 	       sbi->options.fs_uid))) ||
384 	    ((attr->ia_valid & ATTR_GID) &&
385 	      (!gid_eq(from_vfsgid(idmap, i_user_ns(inode), attr->ia_vfsgid),
386 	       sbi->options.fs_gid))) ||
387 	    ((attr->ia_valid & ATTR_MODE) &&
388 	     (attr->ia_mode & ~(S_IFREG | S_IFLNK | S_IFDIR | 0777)))) {
389 		error = -EPERM;
390 		goto out;
391 	}
392 
393 	/*
394 	 * We don't return -EPERM here. Yes, strange, but this is too
395 	 * old behavior.
396 	 */
397 	if (attr->ia_valid & ATTR_MODE) {
398 		if (exfat_sanitize_mode(sbi, inode, &attr->ia_mode) < 0)
399 			attr->ia_valid &= ~ATTR_MODE;
400 	}
401 
402 	if (attr->ia_valid & ATTR_SIZE)
403 		inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
404 
405 	setattr_copy(idmap, inode, attr);
406 	exfat_truncate_inode_atime(inode);
407 
408 	if (attr->ia_valid & ATTR_SIZE) {
409 		/*
410 		 * Wait for any in-flight DIO to finish before truncating to
411 		 * prevent a concurrent DIO from writing to clusters that are
412 		 * about to be freed.
413 		 */
414 		inode_dio_wait(inode);
415 		down_write(&EXFAT_I(inode)->truncate_lock);
416 		truncate_setsize(inode, attr->ia_size);
417 
418 		/*
419 		 * __exfat_write_inode() is called from exfat_truncate(), inode
420 		 * is already written by it, so mark_inode_dirty() is unneeded.
421 		 */
422 		exfat_truncate(inode);
423 		up_write(&EXFAT_I(inode)->truncate_lock);
424 	} else
425 		mark_inode_dirty(inode);
426 
427 out:
428 	return error;
429 }
430 
431 /*
432  * modified ioctls from fat/file.c by Welmer Almesberger
433  */
434 static int exfat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
435 {
436 	u32 attr;
437 
438 	inode_lock_shared(inode);
439 	attr = exfat_make_attr(inode);
440 	inode_unlock_shared(inode);
441 
442 	return put_user(attr, user_attr);
443 }
444 
445 static int exfat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
446 {
447 	struct inode *inode = file_inode(file);
448 	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
449 	int is_dir = S_ISDIR(inode->i_mode);
450 	u32 attr, oldattr;
451 	struct iattr ia;
452 	int err;
453 
454 	err = get_user(attr, user_attr);
455 	if (err)
456 		goto out;
457 
458 	err = mnt_want_write_file(file);
459 	if (err)
460 		goto out;
461 	inode_lock(inode);
462 
463 	oldattr = exfat_make_attr(inode);
464 
465 	/*
466 	 * Mask attributes so we don't set reserved fields.
467 	 */
468 	attr &= (EXFAT_ATTR_READONLY | EXFAT_ATTR_HIDDEN | EXFAT_ATTR_SYSTEM |
469 		 EXFAT_ATTR_ARCHIVE);
470 	attr |= (is_dir ? EXFAT_ATTR_SUBDIR : 0);
471 
472 	/* Equivalent to a chmod() */
473 	ia.ia_valid = ATTR_MODE | ATTR_CTIME;
474 	ia.ia_ctime = current_time(inode);
475 	if (is_dir)
476 		ia.ia_mode = exfat_make_mode(sbi, attr, 0777);
477 	else
478 		ia.ia_mode = exfat_make_mode(sbi, attr, 0666 | (inode->i_mode & 0111));
479 
480 	/* The root directory has no attributes */
481 	if (inode->i_ino == EXFAT_ROOT_INO && attr != EXFAT_ATTR_SUBDIR) {
482 		err = -EINVAL;
483 		goto out_unlock_inode;
484 	}
485 
486 	if (((attr | oldattr) & EXFAT_ATTR_SYSTEM) &&
487 	    !capable(CAP_LINUX_IMMUTABLE)) {
488 		err = -EPERM;
489 		goto out_unlock_inode;
490 	}
491 
492 	/*
493 	 * The security check is questionable...  We single
494 	 * out the RO attribute for checking by the security
495 	 * module, just because it maps to a file mode.
496 	 */
497 	err = security_inode_setattr(file_mnt_idmap(file),
498 				     file->f_path.dentry, &ia);
499 	if (err)
500 		goto out_unlock_inode;
501 
502 	/* This MUST be done before doing anything irreversible... */
503 	err = exfat_setattr(file_mnt_idmap(file), file->f_path.dentry, &ia);
504 	if (err)
505 		goto out_unlock_inode;
506 
507 	fsnotify_change(file->f_path.dentry, ia.ia_valid);
508 
509 	exfat_save_attr(inode, attr);
510 	mark_inode_dirty(inode);
511 out_unlock_inode:
512 	inode_unlock(inode);
513 	mnt_drop_write_file(file);
514 out:
515 	return err;
516 }
517 
518 static int exfat_ioctl_fitrim(struct inode *inode, unsigned long arg)
519 {
520 	struct fstrim_range range;
521 	int ret = 0;
522 
523 	if (!capable(CAP_SYS_ADMIN))
524 		return -EPERM;
525 
526 	if (!bdev_max_discard_sectors(inode->i_sb->s_bdev))
527 		return -EOPNOTSUPP;
528 
529 	if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
530 		return -EFAULT;
531 
532 	range.minlen = max_t(unsigned int, range.minlen,
533 				bdev_discard_granularity(inode->i_sb->s_bdev));
534 
535 	ret = exfat_trim_fs(inode, &range);
536 	if (ret < 0)
537 		return ret;
538 
539 	if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range)))
540 		return -EFAULT;
541 
542 	return 0;
543 }
544 
545 static int exfat_ioctl_shutdown(struct super_block *sb, unsigned long arg)
546 {
547 	u32 flags;
548 
549 	if (!capable(CAP_SYS_ADMIN))
550 		return -EPERM;
551 
552 	if (get_user(flags, (__u32 __user *)arg))
553 		return -EFAULT;
554 
555 	return exfat_force_shutdown(sb, flags);
556 }
557 
558 static int exfat_ioctl_get_volume_label(struct super_block *sb, unsigned long arg)
559 {
560 	int ret;
561 	char label[FSLABEL_MAX] = {0};
562 	struct exfat_uni_name uniname;
563 
564 	ret = exfat_read_volume_label(sb, &uniname);
565 	if (ret < 0)
566 		return ret;
567 
568 	ret = exfat_utf16_to_nls(sb, &uniname, label, uniname.name_len);
569 	if (ret < 0)
570 		return ret;
571 
572 	if (copy_to_user((char __user *)arg, label, ret + 1))
573 		return -EFAULT;
574 
575 	return 0;
576 }
577 
578 static int exfat_ioctl_set_volume_label(struct super_block *sb,
579 					unsigned long arg)
580 {
581 	int ret = 0, lossy, label_len;
582 	char label[FSLABEL_MAX] = {0};
583 	struct exfat_uni_name uniname;
584 
585 	if (!capable(CAP_SYS_ADMIN))
586 		return -EPERM;
587 
588 	if (copy_from_user(label, (char __user *)arg, FSLABEL_MAX))
589 		return -EFAULT;
590 
591 	memset(&uniname, 0, sizeof(uniname));
592 	label_len = strnlen(label, FSLABEL_MAX - 1);
593 	if (label[0]) {
594 		ret = exfat_nls_to_utf16(sb, label, label_len,
595 					 &uniname, &lossy);
596 		if (ret < 0)
597 			return ret;
598 		else if (lossy & NLS_NAME_LOSSY)
599 			return -EINVAL;
600 	}
601 
602 	uniname.name_len = ret;
603 
604 	return exfat_write_volume_label(sb, &uniname);
605 }
606 
607 long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
608 {
609 	struct inode *inode = file_inode(filp);
610 	u32 __user *user_attr = (u32 __user *)arg;
611 
612 	switch (cmd) {
613 	case FAT_IOCTL_GET_ATTRIBUTES:
614 		return exfat_ioctl_get_attributes(inode, user_attr);
615 	case FAT_IOCTL_SET_ATTRIBUTES:
616 		return exfat_ioctl_set_attributes(filp, user_attr);
617 	case EXFAT_IOC_SHUTDOWN:
618 		return exfat_ioctl_shutdown(inode->i_sb, arg);
619 	case FITRIM:
620 		return exfat_ioctl_fitrim(inode, arg);
621 	case FS_IOC_GETFSLABEL:
622 		return exfat_ioctl_get_volume_label(inode->i_sb, arg);
623 	case FS_IOC_SETFSLABEL:
624 		return exfat_ioctl_set_volume_label(inode->i_sb, arg);
625 	default:
626 		return -ENOTTY;
627 	}
628 }
629 
630 #ifdef CONFIG_COMPAT
631 long exfat_compat_ioctl(struct file *filp, unsigned int cmd,
632 				unsigned long arg)
633 {
634 	return exfat_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
635 }
636 #endif
637 
638 int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
639 {
640 	struct inode *inode = filp->f_mapping->host;
641 	int err;
642 
643 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
644 		return -EIO;
645 
646 	err = simple_fsync_noflush(filp, start, end, datasync);
647 	if (err)
648 		return err;
649 
650 	err = sync_blockdev(inode->i_sb->s_bdev);
651 	if (err)
652 		return err;
653 
654 	return blkdev_issue_flush(inode->i_sb->s_bdev);
655 }
656 
657 static int exfat_extend_valid_size(struct inode *inode, loff_t new_valid_size)
658 {
659 	struct exfat_inode_info *ei = EXFAT_I(inode);
660 	loff_t old_valid_size = ei->valid_size;
661 	int ret = 0;
662 
663 	if (old_valid_size < new_valid_size) {
664 		if (i_size_read(inode) < new_valid_size) {
665 			i_size_write(inode, new_valid_size);
666 			mark_inode_dirty(inode);
667 		}
668 
669 		ret = iomap_zero_range(inode, old_valid_size,
670 				new_valid_size - old_valid_size, NULL,
671 				&exfat_write_iomap_ops, NULL, NULL);
672 		if (ret) {
673 			truncate_setsize(inode, old_valid_size);
674 			exfat_truncate(inode);
675 		}
676 	}
677 
678 	return ret;
679 }
680 
681 static ssize_t exfat_fallback_buffered_write(struct kiocb *iocb,
682 		struct iov_iter *from)
683 {
684 	loff_t offset = iocb->ki_pos, end;
685 	ssize_t written;
686 	int ret;
687 
688 	iocb->ki_flags &= ~IOCB_DIRECT;
689 
690 	written = iomap_file_buffered_write(iocb, from, &exfat_write_iomap_ops,
691 			NULL, NULL);
692 	if (written < 0)
693 		return written;
694 
695 	end = iocb->ki_pos + written - 1;
696 	ret = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
697 			offset, end);
698 	if (ret)
699 		return -EIO;
700 
701 	invalidate_mapping_pages(iocb->ki_filp->f_mapping,
702 			offset >> PAGE_SHIFT,
703 			end >> PAGE_SHIFT);
704 
705 	return written;
706 }
707 
708 static ssize_t exfat_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
709 {
710 	ssize_t ret;
711 
712 	ret = iomap_dio_rw(iocb, from, &exfat_write_iomap_ops,
713 			&exfat_write_dio_ops, 0, NULL, 0);
714 	if (ret == -ENOTBLK)
715 		ret = 0;
716 	else if (ret < 0)
717 		return ret;
718 
719 	if (iov_iter_count(from)) {
720 		ssize_t written;
721 
722 		written = exfat_fallback_buffered_write(iocb, from);
723 		if (written < 0)
724 			return written;
725 		ret += written;
726 	}
727 
728 	return ret;
729 }
730 
731 static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
732 {
733 	ssize_t ret;
734 	struct file *file = iocb->ki_filp;
735 	struct inode *inode = file_inode(file);
736 	struct exfat_inode_info *ei = EXFAT_I(inode);
737 	loff_t pos = iocb->ki_pos;
738 	loff_t valid_size;
739 	int err;
740 
741 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
742 		return -EIO;
743 
744 	inode_lock(inode);
745 
746 	if (pos > i_size_read(inode))
747 		truncate_pagecache(inode, i_size_read(inode));
748 
749 	valid_size = ei->valid_size;
750 
751 	ret = generic_write_checks(iocb, iter);
752 	if (ret <= 0)
753 		goto unlock;
754 
755 	err = file_modified(iocb->ki_filp);
756 	if (err) {
757 		ret = err;
758 		goto unlock;
759 	}
760 
761 	if (pos > valid_size) {
762 		ret = exfat_extend_valid_size(inode, pos);
763 		if (ret < 0 && ret != -ENOSPC) {
764 			exfat_err(inode->i_sb,
765 				"write: fail to zero from %llu to %llu(%zd)",
766 				valid_size, pos, ret);
767 		}
768 		if (ret < 0)
769 			goto unlock;
770 	}
771 
772 	if (iocb->ki_flags & IOCB_DIRECT)
773 		ret = exfat_dio_write_iter(iocb, iter);
774 	else
775 		ret = iomap_file_buffered_write(iocb, iter,
776 				&exfat_write_iomap_ops, NULL, NULL);
777 	if (ret < 0)
778 		goto unlock;
779 
780 	inode_unlock(inode);
781 
782 	if (pos > valid_size)
783 		pos = valid_size;
784 
785 	if (iocb->ki_pos > pos) {
786 		ssize_t err = generic_write_sync(iocb, iocb->ki_pos - pos);
787 
788 		if (err < 0)
789 			return err;
790 	}
791 
792 	return ret;
793 
794 unlock:
795 	inode_unlock(inode);
796 
797 	return ret;
798 }
799 
800 static ssize_t exfat_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
801 {
802 	struct inode *inode = file_inode(iocb->ki_filp);
803 	ssize_t ret;
804 
805 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
806 		return -EIO;
807 
808 	inode_lock_shared(inode);
809 
810 	if (iocb->ki_flags & IOCB_DIRECT) {
811 		file_accessed(iocb->ki_filp);
812 		ret = iomap_dio_rw(iocb, iter, &exfat_iomap_ops, NULL, 0,
813 				NULL, 0);
814 	} else {
815 		ret = generic_file_read_iter(iocb, iter);
816 	}
817 
818 	inode_unlock_shared(inode);
819 
820 	return ret;
821 }
822 
823 static vm_fault_t exfat_page_mkwrite(struct vm_fault *vmf)
824 {
825 	struct inode *inode = file_inode(vmf->vma->vm_file);
826 	struct exfat_inode_info *ei = EXFAT_I(inode);
827 	vm_fault_t ret;
828 	loff_t new_valid_size, mmap_valid_size;
829 
830 	if (!inode_trylock(inode))
831 		return VM_FAULT_RETRY;
832 
833 	mmap_valid_size = ((loff_t)vmf->pgoff + 1) << PAGE_SHIFT;
834 	new_valid_size = min(mmap_valid_size, i_size_read(inode));
835 
836 	if (ei->valid_size < new_valid_size) {
837 		if (ei->zeroed_size < mmap_valid_size) {
838 			int err;
839 
840 			/*
841 			 * Only zero the range that hasn't been zeroed yet for
842 			 * this mmap write path. zeroed_size tracks the largest
843 			 * page-aligned offset that has already been zeroed.
844 			 *
845 			 * This prevents unnecessarily zeroing out the entire
846 			 * tail page on every page fault when userspace writes
847 			 * data byte-by-byte through mmap (after a small
848 			 * fallocate). It fixes data corruption in the tail page
849 			 * while preserving the existing valid_size semantics.
850 			 */
851 			err = iomap_zero_range(inode, ei->zeroed_size,
852 					mmap_valid_size - ei->zeroed_size, NULL,
853 					&exfat_iomap_ops, NULL, NULL);
854 			if (err < 0) {
855 				inode_unlock(inode);
856 				return vmf_fs_error(err);
857 			}
858 			ei->zeroed_size = mmap_valid_size;
859 		}
860 
861 		ei->valid_size = new_valid_size;
862 		mark_inode_dirty(inode);
863 	}
864 
865 	sb_start_pagefault(inode->i_sb);
866 	file_update_time(vmf->vma->vm_file);
867 
868 	filemap_invalidate_lock_shared(inode->i_mapping);
869 	ret = iomap_page_mkwrite(vmf, &exfat_write_iomap_ops, NULL);
870 	filemap_invalidate_unlock_shared(inode->i_mapping);
871 	sb_end_pagefault(inode->i_sb);
872 	inode_unlock(inode);
873 
874 	return ret;
875 }
876 
877 static const struct vm_operations_struct exfat_file_vm_ops = {
878 	.fault		= filemap_fault,
879 	.map_pages	= filemap_map_pages,
880 	.page_mkwrite	= exfat_page_mkwrite,
881 };
882 
883 static int exfat_file_mmap_prepare(struct vm_area_desc *desc)
884 {
885 	struct file *file = desc->file;
886 
887 	if (unlikely(exfat_forced_shutdown(file_inode(desc->file)->i_sb)))
888 		return -EIO;
889 
890 	if (vma_desc_test_all(desc, VMA_SHARED_BIT, VMA_MAYWRITE_BIT)) {
891 		struct inode *inode = file_inode(file);
892 		loff_t from, to;
893 		int err;
894 
895 		from = ((loff_t)desc->pgoff << PAGE_SHIFT);
896 		to = min_t(loff_t, i_size_read(inode),
897 				from + vma_desc_size(desc));
898 		if (EXFAT_I(inode)->valid_size < to) {
899 			err = exfat_extend_valid_size(inode, to);
900 			if (err)
901 				return err;
902 		}
903 	}
904 
905 	file_accessed(file);
906 	desc->vm_ops = &exfat_file_vm_ops;
907 	return 0;
908 }
909 
910 static ssize_t exfat_splice_read(struct file *in, loff_t *ppos,
911 		struct pipe_inode_info *pipe, size_t len, unsigned int flags)
912 {
913 	if (unlikely(exfat_forced_shutdown(file_inode(in)->i_sb)))
914 		return -EIO;
915 
916 	return filemap_splice_read(in, ppos, pipe, len, flags);
917 }
918 
919 static int exfat_file_open(struct inode *inode, struct file *filp)
920 {
921 	int err;
922 
923 	if (unlikely(exfat_forced_shutdown(inode->i_sb)))
924 		return -EIO;
925 
926 	err = generic_file_open(inode, filp);
927 	if (err)
928 		return err;
929 
930 	filp->f_mode |= FMODE_CAN_ODIRECT;
931 
932 	return 0;
933 }
934 
935 static loff_t exfat_file_llseek(struct file *file, loff_t offset, int whence)
936 {
937 	struct inode *inode = file->f_mapping->host;
938 
939 	switch (whence) {
940 	case SEEK_HOLE:
941 		inode_lock_shared(inode);
942 		offset = iomap_seek_hole(inode, offset, &exfat_iomap_ops);
943 		inode_unlock_shared(inode);
944 		break;
945 	case SEEK_DATA:
946 		inode_lock_shared(inode);
947 		offset = iomap_seek_data(inode, offset, &exfat_iomap_ops);
948 		inode_unlock_shared(inode);
949 		break;
950 	default:
951 		return generic_file_llseek(file, offset, whence);
952 	}
953 	if (offset < 0)
954 		return offset;
955 	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
956 }
957 
958 const struct file_operations exfat_file_operations = {
959 	.open		= exfat_file_open,
960 	.llseek		= exfat_file_llseek,
961 	.read_iter	= exfat_file_read_iter,
962 	.write_iter	= exfat_file_write_iter,
963 	.unlocked_ioctl = exfat_ioctl,
964 #ifdef CONFIG_COMPAT
965 	.compat_ioctl = exfat_compat_ioctl,
966 #endif
967 	.mmap_prepare	= exfat_file_mmap_prepare,
968 	.fsync		= exfat_file_fsync,
969 	.splice_read	= exfat_splice_read,
970 	.splice_write	= iter_file_splice_write,
971 	.fallocate	= exfat_fallocate,
972 	.setlease	= generic_setlease,
973 };
974 
975 const struct inode_operations exfat_file_inode_operations = {
976 	.setattr	= exfat_setattr,
977 	.getattr	= exfat_getattr,
978 	.fileattr_get	= exfat_fileattr_get,
979 };
980