xref: /linux/fs/ext4/file.c (revision 8a79db5e83a5d52c74e6f3c40d6f312cf899213e)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ext4/file.c
4  *
5  * Copyright (C) 1992, 1993, 1994, 1995
6  * Remy Card (card@masi.ibp.fr)
7  * Laboratoire MASI - Institut Blaise Pascal
8  * Universite Pierre et Marie Curie (Paris VI)
9  *
10  *  from
11  *
12  *  linux/fs/minix/file.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  ext4 fs regular file handling primitives
17  *
18  *  64-bit file support on 64-bit platforms by Jakub Jelinek
19  *	(jj@sunsite.ms.mff.cuni.cz)
20  */
21 
22 #include <linux/time.h>
23 #include <linux/fs.h>
24 #include <linux/iomap.h>
25 #include <linux/mount.h>
26 #include <linux/path.h>
27 #include <linux/dax.h>
28 #include <linux/quotaops.h>
29 #include <linux/pagevec.h>
30 #include <linux/uio.h>
31 #include <linux/mman.h>
32 #include <linux/backing-dev.h>
33 #include "ext4.h"
34 #include "ext4_jbd2.h"
35 #include "xattr.h"
36 #include "acl.h"
37 #include "truncate.h"
38 
39 static bool ext4_dio_supported(struct inode *inode)
40 {
41 	if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
42 		return false;
43 	if (fsverity_active(inode))
44 		return false;
45 	if (ext4_should_journal_data(inode))
46 		return false;
47 	if (ext4_has_inline_data(inode))
48 		return false;
49 	return true;
50 }
51 
52 static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
53 {
54 	ssize_t ret;
55 	struct inode *inode = file_inode(iocb->ki_filp);
56 
57 	if (iocb->ki_flags & IOCB_NOWAIT) {
58 		if (!inode_trylock_shared(inode))
59 			return -EAGAIN;
60 	} else {
61 		inode_lock_shared(inode);
62 	}
63 
64 	if (!ext4_dio_supported(inode)) {
65 		inode_unlock_shared(inode);
66 		/*
67 		 * Fallback to buffered I/O if the operation being performed on
68 		 * the inode is not supported by direct I/O. The IOCB_DIRECT
69 		 * flag needs to be cleared here in order to ensure that the
70 		 * direct I/O path within generic_file_read_iter() is not
71 		 * taken.
72 		 */
73 		iocb->ki_flags &= ~IOCB_DIRECT;
74 		return generic_file_read_iter(iocb, to);
75 	}
76 
77 	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
78 			   is_sync_kiocb(iocb));
79 	inode_unlock_shared(inode);
80 
81 	file_accessed(iocb->ki_filp);
82 	return ret;
83 }
84 
85 #ifdef CONFIG_FS_DAX
86 static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
87 {
88 	struct inode *inode = file_inode(iocb->ki_filp);
89 	ssize_t ret;
90 
91 	if (!inode_trylock_shared(inode)) {
92 		if (iocb->ki_flags & IOCB_NOWAIT)
93 			return -EAGAIN;
94 		inode_lock_shared(inode);
95 	}
96 	/*
97 	 * Recheck under inode lock - at this point we are sure it cannot
98 	 * change anymore
99 	 */
100 	if (!IS_DAX(inode)) {
101 		inode_unlock_shared(inode);
102 		/* Fallback to buffered IO in case we cannot support DAX */
103 		return generic_file_read_iter(iocb, to);
104 	}
105 	ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
106 	inode_unlock_shared(inode);
107 
108 	file_accessed(iocb->ki_filp);
109 	return ret;
110 }
111 #endif
112 
113 static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
114 {
115 	struct inode *inode = file_inode(iocb->ki_filp);
116 
117 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
118 		return -EIO;
119 
120 	if (!iov_iter_count(to))
121 		return 0; /* skip atime */
122 
123 #ifdef CONFIG_FS_DAX
124 	if (IS_DAX(inode))
125 		return ext4_dax_read_iter(iocb, to);
126 #endif
127 	if (iocb->ki_flags & IOCB_DIRECT)
128 		return ext4_dio_read_iter(iocb, to);
129 
130 	return generic_file_read_iter(iocb, to);
131 }
132 
133 /*
134  * Called when an inode is released. Note that this is different
135  * from ext4_file_open: open gets called at every open, but release
136  * gets called only when /all/ the files are closed.
137  */
138 static int ext4_release_file(struct inode *inode, struct file *filp)
139 {
140 	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
141 		ext4_alloc_da_blocks(inode);
142 		ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
143 	}
144 	/* if we are the last writer on the inode, drop the block reservation */
145 	if ((filp->f_mode & FMODE_WRITE) &&
146 			(atomic_read(&inode->i_writecount) == 1) &&
147 		        !EXT4_I(inode)->i_reserved_data_blocks)
148 	{
149 		down_write(&EXT4_I(inode)->i_data_sem);
150 		ext4_discard_preallocations(inode);
151 		up_write(&EXT4_I(inode)->i_data_sem);
152 	}
153 	if (is_dx(inode) && filp->private_data)
154 		ext4_htree_free_dir_info(filp->private_data);
155 
156 	return 0;
157 }
158 
159 /*
160  * This tests whether the IO in question is block-aligned or not.
161  * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
162  * are converted to written only after the IO is complete.  Until they are
163  * mapped, these blocks appear as holes, so dio_zero_block() will assume that
164  * it needs to zero out portions of the start and/or end block.  If 2 AIO
165  * threads are at work on the same unwritten block, they must be synchronized
166  * or one thread will zero the other's data, causing corruption.
167  */
168 static int
169 ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
170 {
171 	struct super_block *sb = inode->i_sb;
172 	int blockmask = sb->s_blocksize - 1;
173 
174 	if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize))
175 		return 0;
176 
177 	if ((pos | iov_iter_alignment(from)) & blockmask)
178 		return 1;
179 
180 	return 0;
181 }
182 
183 /* Is IO overwriting allocated and initialized blocks? */
184 static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
185 {
186 	struct ext4_map_blocks map;
187 	unsigned int blkbits = inode->i_blkbits;
188 	int err, blklen;
189 
190 	if (pos + len > i_size_read(inode))
191 		return false;
192 
193 	map.m_lblk = pos >> blkbits;
194 	map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
195 	blklen = map.m_len;
196 
197 	err = ext4_map_blocks(NULL, inode, &map, 0);
198 	/*
199 	 * 'err==len' means that all of the blocks have been preallocated,
200 	 * regardless of whether they have been initialized or not. To exclude
201 	 * unwritten extents, we need to check m_flags.
202 	 */
203 	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
204 }
205 
206 static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
207 {
208 	struct inode *inode = file_inode(iocb->ki_filp);
209 	ssize_t ret;
210 
211 	if (unlikely(IS_IMMUTABLE(inode)))
212 		return -EPERM;
213 
214 	ret = generic_write_checks(iocb, from);
215 	if (ret <= 0)
216 		return ret;
217 
218 	/*
219 	 * If we have encountered a bitmap-format file, the size limit
220 	 * is smaller than s_maxbytes, which is for extent-mapped files.
221 	 */
222 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
223 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
224 
225 		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
226 			return -EFBIG;
227 		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
228 	}
229 
230 	ret = file_modified(iocb->ki_filp);
231 	if (ret)
232 		return ret;
233 
234 	return iov_iter_count(from);
235 }
236 
237 static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
238 					struct iov_iter *from)
239 {
240 	ssize_t ret;
241 	struct inode *inode = file_inode(iocb->ki_filp);
242 
243 	if (iocb->ki_flags & IOCB_NOWAIT)
244 		return -EOPNOTSUPP;
245 
246 	inode_lock(inode);
247 	ret = ext4_write_checks(iocb, from);
248 	if (ret <= 0)
249 		goto out;
250 
251 	current->backing_dev_info = inode_to_bdi(inode);
252 	ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
253 	current->backing_dev_info = NULL;
254 
255 out:
256 	inode_unlock(inode);
257 	if (likely(ret > 0)) {
258 		iocb->ki_pos += ret;
259 		ret = generic_write_sync(iocb, ret);
260 	}
261 
262 	return ret;
263 }
264 
265 static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
266 					   ssize_t written, size_t count)
267 {
268 	handle_t *handle;
269 	bool truncate = false;
270 	u8 blkbits = inode->i_blkbits;
271 	ext4_lblk_t written_blk, end_blk;
272 
273 	/*
274 	 * Note that EXT4_I(inode)->i_disksize can get extended up to
275 	 * inode->i_size while the I/O was running due to writeback of delalloc
276 	 * blocks. But, the code in ext4_iomap_alloc() is careful to use
277 	 * zeroed/unwritten extents if this is possible; thus we won't leave
278 	 * uninitialized blocks in a file even if we didn't succeed in writing
279 	 * as much as we intended.
280 	 */
281 	WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
282 	if (offset + count <= EXT4_I(inode)->i_disksize) {
283 		/*
284 		 * We need to ensure that the inode is removed from the orphan
285 		 * list if it has been added prematurely, due to writeback of
286 		 * delalloc blocks.
287 		 */
288 		if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
289 			handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
290 
291 			if (IS_ERR(handle)) {
292 				ext4_orphan_del(NULL, inode);
293 				return PTR_ERR(handle);
294 			}
295 
296 			ext4_orphan_del(handle, inode);
297 			ext4_journal_stop(handle);
298 		}
299 
300 		return written;
301 	}
302 
303 	if (written < 0)
304 		goto truncate;
305 
306 	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
307 	if (IS_ERR(handle)) {
308 		written = PTR_ERR(handle);
309 		goto truncate;
310 	}
311 
312 	if (ext4_update_inode_size(inode, offset + written))
313 		ext4_mark_inode_dirty(handle, inode);
314 
315 	/*
316 	 * We may need to truncate allocated but not written blocks beyond EOF.
317 	 */
318 	written_blk = ALIGN(offset + written, 1 << blkbits);
319 	end_blk = ALIGN(offset + count, 1 << blkbits);
320 	if (written_blk < end_blk && ext4_can_truncate(inode))
321 		truncate = true;
322 
323 	/*
324 	 * Remove the inode from the orphan list if it has been extended and
325 	 * everything went OK.
326 	 */
327 	if (!truncate && inode->i_nlink)
328 		ext4_orphan_del(handle, inode);
329 	ext4_journal_stop(handle);
330 
331 	if (truncate) {
332 truncate:
333 		ext4_truncate_failed_write(inode);
334 		/*
335 		 * If the truncate operation failed early, then the inode may
336 		 * still be on the orphan list. In that case, we need to try
337 		 * remove the inode from the in-memory linked list.
338 		 */
339 		if (inode->i_nlink)
340 			ext4_orphan_del(NULL, inode);
341 	}
342 
343 	return written;
344 }
345 
346 static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
347 				 int error, unsigned int flags)
348 {
349 	loff_t offset = iocb->ki_pos;
350 	struct inode *inode = file_inode(iocb->ki_filp);
351 
352 	if (error)
353 		return error;
354 
355 	if (size && flags & IOMAP_DIO_UNWRITTEN)
356 		return ext4_convert_unwritten_extents(NULL, inode,
357 						      offset, size);
358 
359 	return 0;
360 }
361 
362 static const struct iomap_dio_ops ext4_dio_write_ops = {
363 	.end_io = ext4_dio_write_end_io,
364 };
365 
366 static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
367 {
368 	ssize_t ret;
369 	size_t count;
370 	loff_t offset;
371 	handle_t *handle;
372 	struct inode *inode = file_inode(iocb->ki_filp);
373 	bool extend = false, overwrite = false, unaligned_aio = false;
374 
375 	if (iocb->ki_flags & IOCB_NOWAIT) {
376 		if (!inode_trylock(inode))
377 			return -EAGAIN;
378 	} else {
379 		inode_lock(inode);
380 	}
381 
382 	if (!ext4_dio_supported(inode)) {
383 		inode_unlock(inode);
384 		/*
385 		 * Fallback to buffered I/O if the inode does not support
386 		 * direct I/O.
387 		 */
388 		return ext4_buffered_write_iter(iocb, from);
389 	}
390 
391 	ret = ext4_write_checks(iocb, from);
392 	if (ret <= 0) {
393 		inode_unlock(inode);
394 		return ret;
395 	}
396 
397 	/*
398 	 * Unaligned asynchronous direct I/O must be serialized among each
399 	 * other as the zeroing of partial blocks of two competing unaligned
400 	 * asynchronous direct I/O writes can result in data corruption.
401 	 */
402 	offset = iocb->ki_pos;
403 	count = iov_iter_count(from);
404 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
405 	    !is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) {
406 		unaligned_aio = true;
407 		inode_dio_wait(inode);
408 	}
409 
410 	/*
411 	 * Determine whether the I/O will overwrite allocated and initialized
412 	 * blocks. If so, check to see whether it is possible to take the
413 	 * dioread_nolock path.
414 	 */
415 	if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
416 	    ext4_should_dioread_nolock(inode)) {
417 		overwrite = true;
418 		downgrade_write(&inode->i_rwsem);
419 	}
420 
421 	if (offset + count > EXT4_I(inode)->i_disksize) {
422 		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
423 		if (IS_ERR(handle)) {
424 			ret = PTR_ERR(handle);
425 			goto out;
426 		}
427 
428 		ret = ext4_orphan_add(handle, inode);
429 		if (ret) {
430 			ext4_journal_stop(handle);
431 			goto out;
432 		}
433 
434 		extend = true;
435 		ext4_journal_stop(handle);
436 	}
437 
438 	ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops,
439 			   is_sync_kiocb(iocb) || unaligned_aio || extend);
440 
441 	if (extend)
442 		ret = ext4_handle_inode_extension(inode, offset, ret, count);
443 
444 out:
445 	if (overwrite)
446 		inode_unlock_shared(inode);
447 	else
448 		inode_unlock(inode);
449 
450 	if (ret >= 0 && iov_iter_count(from)) {
451 		ssize_t err;
452 		loff_t endbyte;
453 
454 		offset = iocb->ki_pos;
455 		err = ext4_buffered_write_iter(iocb, from);
456 		if (err < 0)
457 			return err;
458 
459 		/*
460 		 * We need to ensure that the pages within the page cache for
461 		 * the range covered by this I/O are written to disk and
462 		 * invalidated. This is in attempt to preserve the expected
463 		 * direct I/O semantics in the case we fallback to buffered I/O
464 		 * to complete off the I/O request.
465 		 */
466 		ret += err;
467 		endbyte = offset + err - 1;
468 		err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
469 						   offset, endbyte);
470 		if (!err)
471 			invalidate_mapping_pages(iocb->ki_filp->f_mapping,
472 						 offset >> PAGE_SHIFT,
473 						 endbyte >> PAGE_SHIFT);
474 	}
475 
476 	return ret;
477 }
478 
479 #ifdef CONFIG_FS_DAX
480 static ssize_t
481 ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
482 {
483 	ssize_t ret;
484 	size_t count;
485 	loff_t offset;
486 	handle_t *handle;
487 	bool extend = false;
488 	struct inode *inode = file_inode(iocb->ki_filp);
489 
490 	if (!inode_trylock(inode)) {
491 		if (iocb->ki_flags & IOCB_NOWAIT)
492 			return -EAGAIN;
493 		inode_lock(inode);
494 	}
495 
496 	ret = ext4_write_checks(iocb, from);
497 	if (ret <= 0)
498 		goto out;
499 
500 	offset = iocb->ki_pos;
501 	count = iov_iter_count(from);
502 
503 	if (offset + count > EXT4_I(inode)->i_disksize) {
504 		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
505 		if (IS_ERR(handle)) {
506 			ret = PTR_ERR(handle);
507 			goto out;
508 		}
509 
510 		ret = ext4_orphan_add(handle, inode);
511 		if (ret) {
512 			ext4_journal_stop(handle);
513 			goto out;
514 		}
515 
516 		extend = true;
517 		ext4_journal_stop(handle);
518 	}
519 
520 	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
521 
522 	if (extend)
523 		ret = ext4_handle_inode_extension(inode, offset, ret, count);
524 out:
525 	inode_unlock(inode);
526 	if (ret > 0)
527 		ret = generic_write_sync(iocb, ret);
528 	return ret;
529 }
530 #endif
531 
532 static ssize_t
533 ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
534 {
535 	struct inode *inode = file_inode(iocb->ki_filp);
536 
537 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
538 		return -EIO;
539 
540 #ifdef CONFIG_FS_DAX
541 	if (IS_DAX(inode))
542 		return ext4_dax_write_iter(iocb, from);
543 #endif
544 	if (iocb->ki_flags & IOCB_DIRECT)
545 		return ext4_dio_write_iter(iocb, from);
546 
547 	return ext4_buffered_write_iter(iocb, from);
548 }
549 
550 #ifdef CONFIG_FS_DAX
551 static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
552 		enum page_entry_size pe_size)
553 {
554 	int error = 0;
555 	vm_fault_t result;
556 	int retries = 0;
557 	handle_t *handle = NULL;
558 	struct inode *inode = file_inode(vmf->vma->vm_file);
559 	struct super_block *sb = inode->i_sb;
560 
561 	/*
562 	 * We have to distinguish real writes from writes which will result in a
563 	 * COW page; COW writes should *not* poke the journal (the file will not
564 	 * be changed). Doing so would cause unintended failures when mounted
565 	 * read-only.
566 	 *
567 	 * We check for VM_SHARED rather than vmf->cow_page since the latter is
568 	 * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
569 	 * other sizes, dax_iomap_fault will handle splitting / fallback so that
570 	 * we eventually come back with a COW page.
571 	 */
572 	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
573 		(vmf->vma->vm_flags & VM_SHARED);
574 	pfn_t pfn;
575 
576 	if (write) {
577 		sb_start_pagefault(sb);
578 		file_update_time(vmf->vma->vm_file);
579 		down_read(&EXT4_I(inode)->i_mmap_sem);
580 retry:
581 		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
582 					       EXT4_DATA_TRANS_BLOCKS(sb));
583 		if (IS_ERR(handle)) {
584 			up_read(&EXT4_I(inode)->i_mmap_sem);
585 			sb_end_pagefault(sb);
586 			return VM_FAULT_SIGBUS;
587 		}
588 	} else {
589 		down_read(&EXT4_I(inode)->i_mmap_sem);
590 	}
591 	result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
592 	if (write) {
593 		ext4_journal_stop(handle);
594 
595 		if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
596 		    ext4_should_retry_alloc(sb, &retries))
597 			goto retry;
598 		/* Handling synchronous page fault? */
599 		if (result & VM_FAULT_NEEDDSYNC)
600 			result = dax_finish_sync_fault(vmf, pe_size, pfn);
601 		up_read(&EXT4_I(inode)->i_mmap_sem);
602 		sb_end_pagefault(sb);
603 	} else {
604 		up_read(&EXT4_I(inode)->i_mmap_sem);
605 	}
606 
607 	return result;
608 }
609 
610 static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
611 {
612 	return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
613 }
614 
615 static const struct vm_operations_struct ext4_dax_vm_ops = {
616 	.fault		= ext4_dax_fault,
617 	.huge_fault	= ext4_dax_huge_fault,
618 	.page_mkwrite	= ext4_dax_fault,
619 	.pfn_mkwrite	= ext4_dax_fault,
620 };
621 #else
622 #define ext4_dax_vm_ops	ext4_file_vm_ops
623 #endif
624 
625 static const struct vm_operations_struct ext4_file_vm_ops = {
626 	.fault		= ext4_filemap_fault,
627 	.map_pages	= filemap_map_pages,
628 	.page_mkwrite   = ext4_page_mkwrite,
629 };
630 
631 static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
632 {
633 	struct inode *inode = file->f_mapping->host;
634 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
635 	struct dax_device *dax_dev = sbi->s_daxdev;
636 
637 	if (unlikely(ext4_forced_shutdown(sbi)))
638 		return -EIO;
639 
640 	/*
641 	 * We don't support synchronous mappings for non-DAX files and
642 	 * for DAX files if underneath dax_device is not synchronous.
643 	 */
644 	if (!daxdev_mapping_supported(vma, dax_dev))
645 		return -EOPNOTSUPP;
646 
647 	file_accessed(file);
648 	if (IS_DAX(file_inode(file))) {
649 		vma->vm_ops = &ext4_dax_vm_ops;
650 		vma->vm_flags |= VM_HUGEPAGE;
651 	} else {
652 		vma->vm_ops = &ext4_file_vm_ops;
653 	}
654 	return 0;
655 }
656 
657 static int ext4_sample_last_mounted(struct super_block *sb,
658 				    struct vfsmount *mnt)
659 {
660 	struct ext4_sb_info *sbi = EXT4_SB(sb);
661 	struct path path;
662 	char buf[64], *cp;
663 	handle_t *handle;
664 	int err;
665 
666 	if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
667 		return 0;
668 
669 	if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
670 		return 0;
671 
672 	sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
673 	/*
674 	 * Sample where the filesystem has been mounted and
675 	 * store it in the superblock for sysadmin convenience
676 	 * when trying to sort through large numbers of block
677 	 * devices or filesystem images.
678 	 */
679 	memset(buf, 0, sizeof(buf));
680 	path.mnt = mnt;
681 	path.dentry = mnt->mnt_root;
682 	cp = d_path(&path, buf, sizeof(buf));
683 	err = 0;
684 	if (IS_ERR(cp))
685 		goto out;
686 
687 	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
688 	err = PTR_ERR(handle);
689 	if (IS_ERR(handle))
690 		goto out;
691 	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
692 	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
693 	if (err)
694 		goto out_journal;
695 	strlcpy(sbi->s_es->s_last_mounted, cp,
696 		sizeof(sbi->s_es->s_last_mounted));
697 	ext4_handle_dirty_super(handle, sb);
698 out_journal:
699 	ext4_journal_stop(handle);
700 out:
701 	sb_end_intwrite(sb);
702 	return err;
703 }
704 
705 static int ext4_file_open(struct inode * inode, struct file * filp)
706 {
707 	int ret;
708 
709 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
710 		return -EIO;
711 
712 	ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
713 	if (ret)
714 		return ret;
715 
716 	ret = fscrypt_file_open(inode, filp);
717 	if (ret)
718 		return ret;
719 
720 	ret = fsverity_file_open(inode, filp);
721 	if (ret)
722 		return ret;
723 
724 	/*
725 	 * Set up the jbd2_inode if we are opening the inode for
726 	 * writing and the journal is present
727 	 */
728 	if (filp->f_mode & FMODE_WRITE) {
729 		ret = ext4_inode_attach_jinode(inode);
730 		if (ret < 0)
731 			return ret;
732 	}
733 
734 	filp->f_mode |= FMODE_NOWAIT;
735 	return dquot_file_open(inode, filp);
736 }
737 
738 /*
739  * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
740  * by calling generic_file_llseek_size() with the appropriate maxbytes
741  * value for each.
742  */
743 loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
744 {
745 	struct inode *inode = file->f_mapping->host;
746 	loff_t maxbytes;
747 
748 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
749 		maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
750 	else
751 		maxbytes = inode->i_sb->s_maxbytes;
752 
753 	switch (whence) {
754 	default:
755 		return generic_file_llseek_size(file, offset, whence,
756 						maxbytes, i_size_read(inode));
757 	case SEEK_HOLE:
758 		inode_lock_shared(inode);
759 		offset = iomap_seek_hole(inode, offset,
760 					 &ext4_iomap_report_ops);
761 		inode_unlock_shared(inode);
762 		break;
763 	case SEEK_DATA:
764 		inode_lock_shared(inode);
765 		offset = iomap_seek_data(inode, offset,
766 					 &ext4_iomap_report_ops);
767 		inode_unlock_shared(inode);
768 		break;
769 	}
770 
771 	if (offset < 0)
772 		return offset;
773 	return vfs_setpos(file, offset, maxbytes);
774 }
775 
776 const struct file_operations ext4_file_operations = {
777 	.llseek		= ext4_llseek,
778 	.read_iter	= ext4_file_read_iter,
779 	.write_iter	= ext4_file_write_iter,
780 	.unlocked_ioctl = ext4_ioctl,
781 #ifdef CONFIG_COMPAT
782 	.compat_ioctl	= ext4_compat_ioctl,
783 #endif
784 	.mmap		= ext4_file_mmap,
785 	.mmap_supported_flags = MAP_SYNC,
786 	.open		= ext4_file_open,
787 	.release	= ext4_release_file,
788 	.fsync		= ext4_sync_file,
789 	.get_unmapped_area = thp_get_unmapped_area,
790 	.splice_read	= generic_file_splice_read,
791 	.splice_write	= iter_file_splice_write,
792 	.fallocate	= ext4_fallocate,
793 };
794 
795 const struct inode_operations ext4_file_inode_operations = {
796 	.setattr	= ext4_setattr,
797 	.getattr	= ext4_file_getattr,
798 	.listxattr	= ext4_listxattr,
799 	.get_acl	= ext4_get_acl,
800 	.set_acl	= ext4_set_acl,
801 	.fiemap		= ext4_fiemap,
802 };
803 
804