1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/ext2/file.c 4 * 5 * Copyright (C) 1992, 1993, 1994, 1995 6 * Remy Card (card@masi.ibp.fr) 7 * Laboratoire MASI - Institut Blaise Pascal 8 * Universite Pierre et Marie Curie (Paris VI) 9 * 10 * from 11 * 12 * linux/fs/minix/file.c 13 * 14 * Copyright (C) 1991, 1992 Linus Torvalds 15 * 16 * ext2 fs regular file handling primitives 17 * 18 * 64-bit file support on 64-bit platforms by Jakub Jelinek 19 * (jj@sunsite.ms.mff.cuni.cz) 20 */ 21 22 #include <linux/time.h> 23 #include <linux/pagemap.h> 24 #include <linux/dax.h> 25 #include <linux/quotaops.h> 26 #include <linux/iomap.h> 27 #include <linux/uio.h> 28 #include <linux/buffer_head.h> 29 #include "ext2.h" 30 #include "xattr.h" 31 #include "acl.h" 32 #include "trace.h" 33 34 #ifdef CONFIG_FS_DAX 35 static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) 36 { 37 struct inode *inode = iocb->ki_filp->f_mapping->host; 38 ssize_t ret; 39 40 if (!iov_iter_count(to)) 41 return 0; /* skip atime */ 42 43 inode_lock_shared(inode); 44 ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops); 45 inode_unlock_shared(inode); 46 47 file_accessed(iocb->ki_filp); 48 return ret; 49 } 50 51 static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) 52 { 53 struct file *file = iocb->ki_filp; 54 struct inode *inode = file->f_mapping->host; 55 ssize_t ret; 56 57 inode_lock(inode); 58 ret = generic_write_checks(iocb, from); 59 if (ret <= 0) 60 goto out_unlock; 61 ret = file_remove_privs(file); 62 if (ret) 63 goto out_unlock; 64 ret = file_update_time(file); 65 if (ret) 66 goto out_unlock; 67 68 ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops); 69 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 70 i_size_write(inode, iocb->ki_pos); 71 mark_inode_dirty(inode); 72 } 73 74 out_unlock: 75 inode_unlock(inode); 76 if (ret > 0) 77 ret = generic_write_sync(iocb, ret); 78 return ret; 79 } 80 81 /* 82 * The lock ordering for ext2 DAX fault paths is: 83 * 84 * mmap_lock (MM) 85 * sb_start_pagefault (vfs, freeze) 86 * address_space->invalidate_lock 87 * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX) 88 * ext2_inode_info->truncate_mutex 89 * 90 * The default page_lock and i_size verification done by non-DAX fault paths 91 * is sufficient because ext2 doesn't support hole punching. 92 */ 93 static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) 94 { 95 struct inode *inode = file_inode(vmf->vma->vm_file); 96 vm_fault_t ret; 97 bool write = (vmf->flags & FAULT_FLAG_WRITE) && 98 (vmf->vma->vm_flags & VM_SHARED); 99 100 if (write) { 101 sb_start_pagefault(inode->i_sb); 102 file_update_time(vmf->vma->vm_file); 103 } 104 filemap_invalidate_lock_shared(inode->i_mapping); 105 106 ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops); 107 108 filemap_invalidate_unlock_shared(inode->i_mapping); 109 if (write) 110 sb_end_pagefault(inode->i_sb); 111 return ret; 112 } 113 114 static const struct vm_operations_struct ext2_dax_vm_ops = { 115 .fault = ext2_dax_fault, 116 /* 117 * .huge_fault is not supported for DAX because allocation in ext2 118 * cannot be reliably aligned to huge page sizes and so pmd faults 119 * will always fail and fail back to regular faults. 120 */ 121 .page_mkwrite = ext2_dax_fault, 122 .pfn_mkwrite = ext2_dax_fault, 123 }; 124 125 static int ext2_file_mmap_prepare(struct vm_area_desc *desc) 126 { 127 struct file *file = desc->file; 128 129 if (!IS_DAX(file_inode(file))) 130 return generic_file_mmap_prepare(desc); 131 132 file_accessed(file); 133 desc->vm_ops = &ext2_dax_vm_ops; 134 return 0; 135 } 136 #else 137 #define ext2_file_mmap_prepare generic_file_mmap_prepare 138 #endif 139 140 /* 141 * Called when filp is released. This happens when all file descriptors 142 * for a single struct file are closed. Note that different open() calls 143 * for the same file yield different struct file structures. 144 */ 145 static int ext2_release_file (struct inode * inode, struct file * filp) 146 { 147 if (filp->f_mode & FMODE_WRITE) { 148 mutex_lock(&EXT2_I(inode)->truncate_mutex); 149 ext2_discard_reservation(inode); 150 mutex_unlock(&EXT2_I(inode)->truncate_mutex); 151 } 152 return 0; 153 } 154 155 int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 156 { 157 int ret; 158 struct super_block *sb = file->f_mapping->host->i_sb; 159 160 ret = generic_buffers_fsync(file, start, end, datasync); 161 if (ret == -EIO) 162 /* We don't really know where the IO error happened... */ 163 ext2_error(sb, __func__, 164 "detected IO error when writing metadata buffers"); 165 return ret; 166 } 167 168 static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 169 { 170 struct file *file = iocb->ki_filp; 171 struct inode *inode = file->f_mapping->host; 172 ssize_t ret; 173 174 trace_ext2_dio_read_begin(iocb, to, 0); 175 inode_lock_shared(inode); 176 ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0); 177 inode_unlock_shared(inode); 178 trace_ext2_dio_read_end(iocb, to, ret); 179 180 return ret; 181 } 182 183 static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size, 184 int error, unsigned int flags) 185 { 186 loff_t pos = iocb->ki_pos; 187 struct inode *inode = file_inode(iocb->ki_filp); 188 189 if (error) 190 goto out; 191 192 /* 193 * If we are extending the file, we have to update i_size here before 194 * page cache gets invalidated in iomap_dio_rw(). This prevents racing 195 * buffered reads from zeroing out too much from page cache pages. 196 * Note that all extending writes always happens synchronously with 197 * inode lock held by ext2_dio_write_iter(). So it is safe to update 198 * inode size here for extending file writes. 199 */ 200 pos += size; 201 if (pos > i_size_read(inode)) { 202 i_size_write(inode, pos); 203 mark_inode_dirty(inode); 204 } 205 out: 206 trace_ext2_dio_write_endio(iocb, size, error); 207 return error; 208 } 209 210 static const struct iomap_dio_ops ext2_dio_write_ops = { 211 .end_io = ext2_dio_write_end_io, 212 }; 213 214 static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) 215 { 216 struct file *file = iocb->ki_filp; 217 struct inode *inode = file->f_mapping->host; 218 ssize_t ret; 219 unsigned int flags = 0; 220 unsigned long blocksize = inode->i_sb->s_blocksize; 221 loff_t offset = iocb->ki_pos; 222 loff_t count = iov_iter_count(from); 223 ssize_t status = 0; 224 225 trace_ext2_dio_write_begin(iocb, from, 0); 226 inode_lock(inode); 227 ret = generic_write_checks(iocb, from); 228 if (ret <= 0) 229 goto out_unlock; 230 231 ret = kiocb_modified(iocb); 232 if (ret) 233 goto out_unlock; 234 235 /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */ 236 if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) || 237 (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize))) 238 flags |= IOMAP_DIO_FORCE_WAIT; 239 240 ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops, 241 flags, NULL, 0); 242 243 /* ENOTBLK is magic return value for fallback to buffered-io */ 244 if (ret == -ENOTBLK) 245 ret = 0; 246 247 if (ret < 0 && ret != -EIOCBQUEUED) 248 ext2_write_failed(inode->i_mapping, offset + count); 249 250 /* handle case for partial write and for fallback to buffered write */ 251 if (ret >= 0 && iov_iter_count(from)) { 252 loff_t pos, endbyte; 253 int ret2; 254 255 iocb->ki_flags &= ~IOCB_DIRECT; 256 pos = iocb->ki_pos; 257 status = generic_perform_write(iocb, from); 258 if (unlikely(status < 0)) { 259 ret = status; 260 goto out_unlock; 261 } 262 263 ret += status; 264 endbyte = pos + status - 1; 265 ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, 266 endbyte); 267 if (!ret2) 268 invalidate_mapping_pages(inode->i_mapping, 269 pos >> PAGE_SHIFT, 270 endbyte >> PAGE_SHIFT); 271 if (ret > 0) 272 generic_write_sync(iocb, ret); 273 } 274 275 out_unlock: 276 inode_unlock(inode); 277 if (status) 278 trace_ext2_dio_write_buff_end(iocb, from, status); 279 trace_ext2_dio_write_end(iocb, from, ret); 280 return ret; 281 } 282 283 static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 284 { 285 #ifdef CONFIG_FS_DAX 286 if (IS_DAX(iocb->ki_filp->f_mapping->host)) 287 return ext2_dax_read_iter(iocb, to); 288 #endif 289 if (iocb->ki_flags & IOCB_DIRECT) 290 return ext2_dio_read_iter(iocb, to); 291 292 return generic_file_read_iter(iocb, to); 293 } 294 295 static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 296 { 297 #ifdef CONFIG_FS_DAX 298 if (IS_DAX(iocb->ki_filp->f_mapping->host)) 299 return ext2_dax_write_iter(iocb, from); 300 #endif 301 if (iocb->ki_flags & IOCB_DIRECT) 302 return ext2_dio_write_iter(iocb, from); 303 304 return generic_file_write_iter(iocb, from); 305 } 306 307 static int ext2_file_open(struct inode *inode, struct file *filp) 308 { 309 filp->f_mode |= FMODE_CAN_ODIRECT; 310 return dquot_file_open(inode, filp); 311 } 312 313 const struct file_operations ext2_file_operations = { 314 .llseek = generic_file_llseek, 315 .read_iter = ext2_file_read_iter, 316 .write_iter = ext2_file_write_iter, 317 .unlocked_ioctl = ext2_ioctl, 318 #ifdef CONFIG_COMPAT 319 .compat_ioctl = ext2_compat_ioctl, 320 #endif 321 .mmap_prepare = ext2_file_mmap_prepare, 322 .open = ext2_file_open, 323 .release = ext2_release_file, 324 .fsync = ext2_fsync, 325 .get_unmapped_area = thp_get_unmapped_area, 326 .splice_read = filemap_splice_read, 327 .splice_write = iter_file_splice_write, 328 }; 329 330 const struct inode_operations ext2_file_inode_operations = { 331 .listxattr = ext2_listxattr, 332 .getattr = ext2_getattr, 333 .setattr = ext2_setattr, 334 .get_inode_acl = ext2_get_acl, 335 .set_acl = ext2_set_acl, 336 .fiemap = ext2_fiemap, 337 .fileattr_get = ext2_fileattr_get, 338 .fileattr_set = ext2_fileattr_set, 339 }; 340