xref: /linux/fs/ext2/file.c (revision ccdc2e0569f5ff83cd1c6a5c7bb214e33e21bdec)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ext2/file.c
4  *
5  * Copyright (C) 1992, 1993, 1994, 1995
6  * Remy Card (card@masi.ibp.fr)
7  * Laboratoire MASI - Institut Blaise Pascal
8  * Universite Pierre et Marie Curie (Paris VI)
9  *
10  *  from
11  *
12  *  linux/fs/minix/file.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  ext2 fs regular file handling primitives
17  *
18  *  64-bit file support on 64-bit platforms by Jakub Jelinek
19  * 	(jj@sunsite.ms.mff.cuni.cz)
20  */
21 
22 #include <linux/time.h>
23 #include <linux/pagemap.h>
24 #include <linux/dax.h>
25 #include <linux/filelock.h>
26 #include <linux/quotaops.h>
27 #include <linux/iomap.h>
28 #include <linux/uio.h>
29 #include <linux/buffer_head.h>
30 #include "ext2.h"
31 #include "xattr.h"
32 #include "acl.h"
33 #include "trace.h"
34 
35 #ifdef CONFIG_FS_DAX
36 static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
37 {
38 	struct inode *inode = iocb->ki_filp->f_mapping->host;
39 	ssize_t ret;
40 
41 	if (!iov_iter_count(to))
42 		return 0; /* skip atime */
43 
44 	inode_lock_shared(inode);
45 	ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
46 	inode_unlock_shared(inode);
47 
48 	file_accessed(iocb->ki_filp);
49 	return ret;
50 }
51 
52 static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
53 {
54 	struct file *file = iocb->ki_filp;
55 	struct inode *inode = file->f_mapping->host;
56 	ssize_t ret;
57 
58 	inode_lock(inode);
59 	ret = generic_write_checks(iocb, from);
60 	if (ret <= 0)
61 		goto out_unlock;
62 	ret = file_remove_privs(file);
63 	if (ret)
64 		goto out_unlock;
65 	ret = file_update_time(file);
66 	if (ret)
67 		goto out_unlock;
68 
69 	ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
70 	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
71 		i_size_write(inode, iocb->ki_pos);
72 		mark_inode_dirty(inode);
73 	}
74 
75 out_unlock:
76 	inode_unlock(inode);
77 	if (ret > 0)
78 		ret = generic_write_sync(iocb, ret);
79 	return ret;
80 }
81 
82 /*
83  * The lock ordering for ext2 DAX fault paths is:
84  *
85  * mmap_lock (MM)
86  *   sb_start_pagefault (vfs, freeze)
87  *     address_space->invalidate_lock
88  *       address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
89  *         ext2_inode_info->truncate_mutex
90  *
91  * The default page_lock and i_size verification done by non-DAX fault paths
92  * is sufficient because ext2 doesn't support hole punching.
93  */
94 static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
95 {
96 	struct inode *inode = file_inode(vmf->vma->vm_file);
97 	vm_fault_t ret;
98 	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
99 		(vmf->vma->vm_flags & VM_SHARED);
100 
101 	if (write) {
102 		sb_start_pagefault(inode->i_sb);
103 		file_update_time(vmf->vma->vm_file);
104 	}
105 	filemap_invalidate_lock_shared(inode->i_mapping);
106 
107 	ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops);
108 
109 	filemap_invalidate_unlock_shared(inode->i_mapping);
110 	if (write)
111 		sb_end_pagefault(inode->i_sb);
112 	return ret;
113 }
114 
115 static const struct vm_operations_struct ext2_dax_vm_ops = {
116 	.fault		= ext2_dax_fault,
117 	/*
118 	 * .huge_fault is not supported for DAX because allocation in ext2
119 	 * cannot be reliably aligned to huge page sizes and so pmd faults
120 	 * will always fail and fail back to regular faults.
121 	 */
122 	.page_mkwrite	= ext2_dax_fault,
123 	.pfn_mkwrite	= ext2_dax_fault,
124 };
125 
126 static int ext2_file_mmap_prepare(struct vm_area_desc *desc)
127 {
128 	struct file *file = desc->file;
129 
130 	if (!IS_DAX(file_inode(file)))
131 		return generic_file_mmap_prepare(desc);
132 
133 	file_accessed(file);
134 	desc->vm_ops = &ext2_dax_vm_ops;
135 	return 0;
136 }
137 #else
138 #define ext2_file_mmap_prepare	generic_file_mmap_prepare
139 #endif
140 
141 /*
142  * Called when filp is released. This happens when all file descriptors
143  * for a single struct file are closed. Note that different open() calls
144  * for the same file yield different struct file structures.
145  */
146 static int ext2_release_file (struct inode * inode, struct file * filp)
147 {
148 	if (filp->f_mode & FMODE_WRITE) {
149 		mutex_lock(&EXT2_I(inode)->truncate_mutex);
150 		ext2_discard_reservation(inode);
151 		mutex_unlock(&EXT2_I(inode)->truncate_mutex);
152 	}
153 	return 0;
154 }
155 
156 int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
157 {
158 	int ret;
159 	struct super_block *sb = file->f_mapping->host->i_sb;
160 
161 	ret = generic_buffers_fsync(file, start, end, datasync);
162 	if (ret == -EIO)
163 		/* We don't really know where the IO error happened... */
164 		ext2_error(sb, __func__,
165 			   "detected IO error when writing metadata buffers");
166 	return ret;
167 }
168 
169 static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
170 {
171 	struct file *file = iocb->ki_filp;
172 	struct inode *inode = file->f_mapping->host;
173 	ssize_t ret;
174 
175 	trace_ext2_dio_read_begin(iocb, to, 0);
176 	inode_lock_shared(inode);
177 	ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
178 	inode_unlock_shared(inode);
179 	trace_ext2_dio_read_end(iocb, to, ret);
180 
181 	return ret;
182 }
183 
184 static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
185 				 int error, unsigned int flags)
186 {
187 	loff_t pos = iocb->ki_pos;
188 	struct inode *inode = file_inode(iocb->ki_filp);
189 
190 	if (error)
191 		goto out;
192 
193 	/*
194 	 * If we are extending the file, we have to update i_size here before
195 	 * page cache gets invalidated in iomap_dio_rw(). This prevents racing
196 	 * buffered reads from zeroing out too much from page cache pages.
197 	 * Note that all extending writes always happens synchronously with
198 	 * inode lock held by ext2_dio_write_iter(). So it is safe to update
199 	 * inode size here for extending file writes.
200 	 */
201 	pos += size;
202 	if (pos > i_size_read(inode)) {
203 		i_size_write(inode, pos);
204 		mark_inode_dirty(inode);
205 	}
206 out:
207 	trace_ext2_dio_write_endio(iocb, size, error);
208 	return error;
209 }
210 
211 static const struct iomap_dio_ops ext2_dio_write_ops = {
212 	.end_io = ext2_dio_write_end_io,
213 };
214 
215 static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
216 {
217 	struct file *file = iocb->ki_filp;
218 	struct inode *inode = file->f_mapping->host;
219 	ssize_t ret;
220 	unsigned int flags = 0;
221 	unsigned long blocksize = inode->i_sb->s_blocksize;
222 	loff_t offset = iocb->ki_pos;
223 	loff_t count = iov_iter_count(from);
224 	ssize_t status = 0;
225 
226 	trace_ext2_dio_write_begin(iocb, from, 0);
227 	inode_lock(inode);
228 	ret = generic_write_checks(iocb, from);
229 	if (ret <= 0)
230 		goto out_unlock;
231 
232 	ret = kiocb_modified(iocb);
233 	if (ret)
234 		goto out_unlock;
235 
236 	/* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
237 	if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
238 	   (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
239 		flags |= IOMAP_DIO_FORCE_WAIT;
240 
241 	ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
242 			   flags, NULL, 0);
243 
244 	/* ENOTBLK is magic return value for fallback to buffered-io */
245 	if (ret == -ENOTBLK)
246 		ret = 0;
247 
248 	if (ret < 0 && ret != -EIOCBQUEUED)
249 		ext2_write_failed(inode->i_mapping, offset + count);
250 
251 	/* handle case for partial write and for fallback to buffered write */
252 	if (ret >= 0 && iov_iter_count(from)) {
253 		loff_t pos, endbyte;
254 		int ret2;
255 
256 		iocb->ki_flags &= ~IOCB_DIRECT;
257 		pos = iocb->ki_pos;
258 		status = generic_perform_write(iocb, from);
259 		if (unlikely(status < 0)) {
260 			ret = status;
261 			goto out_unlock;
262 		}
263 
264 		ret += status;
265 		endbyte = pos + status - 1;
266 		ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
267 						    endbyte);
268 		if (!ret2)
269 			invalidate_mapping_pages(inode->i_mapping,
270 						 pos >> PAGE_SHIFT,
271 						 endbyte >> PAGE_SHIFT);
272 		if (ret > 0)
273 			generic_write_sync(iocb, ret);
274 	}
275 
276 out_unlock:
277 	inode_unlock(inode);
278 	if (status)
279 		trace_ext2_dio_write_buff_end(iocb, from, status);
280 	trace_ext2_dio_write_end(iocb, from, ret);
281 	return ret;
282 }
283 
284 static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
285 {
286 #ifdef CONFIG_FS_DAX
287 	if (IS_DAX(iocb->ki_filp->f_mapping->host))
288 		return ext2_dax_read_iter(iocb, to);
289 #endif
290 	if (iocb->ki_flags & IOCB_DIRECT)
291 		return ext2_dio_read_iter(iocb, to);
292 
293 	return generic_file_read_iter(iocb, to);
294 }
295 
296 static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
297 {
298 #ifdef CONFIG_FS_DAX
299 	if (IS_DAX(iocb->ki_filp->f_mapping->host))
300 		return ext2_dax_write_iter(iocb, from);
301 #endif
302 	if (iocb->ki_flags & IOCB_DIRECT)
303 		return ext2_dio_write_iter(iocb, from);
304 
305 	return generic_file_write_iter(iocb, from);
306 }
307 
308 static int ext2_file_open(struct inode *inode, struct file *filp)
309 {
310 	filp->f_mode |= FMODE_CAN_ODIRECT;
311 	return dquot_file_open(inode, filp);
312 }
313 
314 const struct file_operations ext2_file_operations = {
315 	.llseek		= generic_file_llseek,
316 	.read_iter	= ext2_file_read_iter,
317 	.write_iter	= ext2_file_write_iter,
318 	.unlocked_ioctl = ext2_ioctl,
319 #ifdef CONFIG_COMPAT
320 	.compat_ioctl	= ext2_compat_ioctl,
321 #endif
322 	.mmap_prepare	= ext2_file_mmap_prepare,
323 	.open		= ext2_file_open,
324 	.release	= ext2_release_file,
325 	.fsync		= ext2_fsync,
326 	.get_unmapped_area = thp_get_unmapped_area,
327 	.splice_read	= filemap_splice_read,
328 	.splice_write	= iter_file_splice_write,
329 	.setlease	= generic_setlease,
330 };
331 
332 const struct inode_operations ext2_file_inode_operations = {
333 	.listxattr	= ext2_listxattr,
334 	.getattr	= ext2_getattr,
335 	.setattr	= ext2_setattr,
336 	.get_inode_acl	= ext2_get_acl,
337 	.set_acl	= ext2_set_acl,
338 	.fiemap		= ext2_fiemap,
339 	.fileattr_get	= ext2_fileattr_get,
340 	.fileattr_set	= ext2_fileattr_set,
341 };
342