1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/ext2/file.c 4 * 5 * Copyright (C) 1992, 1993, 1994, 1995 6 * Remy Card (card@masi.ibp.fr) 7 * Laboratoire MASI - Institut Blaise Pascal 8 * Universite Pierre et Marie Curie (Paris VI) 9 * 10 * from 11 * 12 * linux/fs/minix/file.c 13 * 14 * Copyright (C) 1991, 1992 Linus Torvalds 15 * 16 * ext2 fs regular file handling primitives 17 * 18 * 64-bit file support on 64-bit platforms by Jakub Jelinek 19 * (jj@sunsite.ms.mff.cuni.cz) 20 */ 21 22 #include <linux/time.h> 23 #include <linux/pagemap.h> 24 #include <linux/dax.h> 25 #include <linux/filelock.h> 26 #include <linux/quotaops.h> 27 #include <linux/iomap.h> 28 #include <linux/uio.h> 29 #include <linux/buffer_head.h> 30 #include "ext2.h" 31 #include "xattr.h" 32 #include "acl.h" 33 #include "trace.h" 34 35 #ifdef CONFIG_FS_DAX 36 static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) 37 { 38 struct inode *inode = iocb->ki_filp->f_mapping->host; 39 ssize_t ret; 40 41 if (!iov_iter_count(to)) 42 return 0; /* skip atime */ 43 44 inode_lock_shared(inode); 45 ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops); 46 inode_unlock_shared(inode); 47 48 file_accessed(iocb->ki_filp); 49 return ret; 50 } 51 52 static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) 53 { 54 struct file *file = iocb->ki_filp; 55 struct inode *inode = file->f_mapping->host; 56 ssize_t ret; 57 58 inode_lock(inode); 59 ret = generic_write_checks(iocb, from); 60 if (ret <= 0) 61 goto out_unlock; 62 ret = file_remove_privs(file); 63 if (ret) 64 goto out_unlock; 65 ret = file_update_time(file); 66 if (ret) 67 goto out_unlock; 68 69 ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops); 70 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 71 i_size_write(inode, iocb->ki_pos); 72 mark_inode_dirty(inode); 73 } 74 75 out_unlock: 76 inode_unlock(inode); 77 if (ret > 0) 78 ret = generic_write_sync(iocb, ret); 79 return ret; 80 } 81 82 /* 83 * The lock ordering for ext2 DAX fault paths is: 84 * 85 * mmap_lock (MM) 86 * sb_start_pagefault (vfs, freeze) 87 * address_space->invalidate_lock 88 * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX) 89 * ext2_inode_info->truncate_mutex 90 * 91 * The default page_lock and i_size verification done by non-DAX fault paths 92 * is sufficient because ext2 doesn't support hole punching. 93 */ 94 static vm_fault_t ext2_dax_fault(struct vm_fault *vmf) 95 { 96 struct inode *inode = file_inode(vmf->vma->vm_file); 97 vm_fault_t ret; 98 bool write = (vmf->flags & FAULT_FLAG_WRITE) && 99 (vmf->vma->vm_flags & VM_SHARED); 100 101 if (write) { 102 sb_start_pagefault(inode->i_sb); 103 file_update_time(vmf->vma->vm_file); 104 } 105 filemap_invalidate_lock_shared(inode->i_mapping); 106 107 ret = dax_iomap_fault(vmf, 0, NULL, NULL, &ext2_iomap_ops); 108 109 filemap_invalidate_unlock_shared(inode->i_mapping); 110 if (write) 111 sb_end_pagefault(inode->i_sb); 112 return ret; 113 } 114 115 static const struct vm_operations_struct ext2_dax_vm_ops = { 116 .fault = ext2_dax_fault, 117 /* 118 * .huge_fault is not supported for DAX because allocation in ext2 119 * cannot be reliably aligned to huge page sizes and so pmd faults 120 * will always fail and fail back to regular faults. 121 */ 122 .page_mkwrite = ext2_dax_fault, 123 .pfn_mkwrite = ext2_dax_fault, 124 }; 125 126 static int ext2_file_mmap_prepare(struct vm_area_desc *desc) 127 { 128 struct file *file = desc->file; 129 130 if (!IS_DAX(file_inode(file))) 131 return generic_file_mmap_prepare(desc); 132 133 file_accessed(file); 134 desc->vm_ops = &ext2_dax_vm_ops; 135 return 0; 136 } 137 #else 138 #define ext2_file_mmap_prepare generic_file_mmap_prepare 139 #endif 140 141 /* 142 * Called when filp is released. This happens when all file descriptors 143 * for a single struct file are closed. Note that different open() calls 144 * for the same file yield different struct file structures. 145 */ 146 static int ext2_release_file (struct inode * inode, struct file * filp) 147 { 148 if (filp->f_mode & FMODE_WRITE) { 149 mutex_lock(&EXT2_I(inode)->truncate_mutex); 150 ext2_discard_reservation(inode); 151 mutex_unlock(&EXT2_I(inode)->truncate_mutex); 152 } 153 return 0; 154 } 155 156 int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) 157 { 158 int ret; 159 struct inode *inode = file->f_mapping->host; 160 struct super_block *sb = inode->i_sb; 161 162 ret = mmb_fsync(file, &EXT2_I(inode)->i_metadata_bhs, 163 start, end, datasync); 164 if (ret == -EIO) 165 /* We don't really know where the IO error happened... */ 166 ext2_error(sb, __func__, 167 "detected IO error when writing metadata buffers"); 168 return ret; 169 } 170 171 static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 172 { 173 struct file *file = iocb->ki_filp; 174 struct inode *inode = file->f_mapping->host; 175 ssize_t ret; 176 177 trace_ext2_dio_read_begin(iocb, to, 0); 178 inode_lock_shared(inode); 179 ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0); 180 inode_unlock_shared(inode); 181 trace_ext2_dio_read_end(iocb, to, ret); 182 183 return ret; 184 } 185 186 static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size, 187 int error, unsigned int flags) 188 { 189 loff_t pos = iocb->ki_pos; 190 struct inode *inode = file_inode(iocb->ki_filp); 191 192 if (error) 193 goto out; 194 195 /* 196 * If we are extending the file, we have to update i_size here before 197 * page cache gets invalidated in iomap_dio_rw(). This prevents racing 198 * buffered reads from zeroing out too much from page cache pages. 199 * Note that all extending writes always happens synchronously with 200 * inode lock held by ext2_dio_write_iter(). So it is safe to update 201 * inode size here for extending file writes. 202 */ 203 pos += size; 204 if (pos > i_size_read(inode)) { 205 i_size_write(inode, pos); 206 mark_inode_dirty(inode); 207 } 208 out: 209 trace_ext2_dio_write_endio(iocb, size, error); 210 return error; 211 } 212 213 static const struct iomap_dio_ops ext2_dio_write_ops = { 214 .end_io = ext2_dio_write_end_io, 215 }; 216 217 static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) 218 { 219 struct file *file = iocb->ki_filp; 220 struct inode *inode = file->f_mapping->host; 221 ssize_t ret; 222 unsigned int flags = 0; 223 unsigned long blocksize = inode->i_sb->s_blocksize; 224 loff_t offset = iocb->ki_pos; 225 loff_t count = iov_iter_count(from); 226 ssize_t status = 0; 227 228 trace_ext2_dio_write_begin(iocb, from, 0); 229 inode_lock(inode); 230 ret = generic_write_checks(iocb, from); 231 if (ret <= 0) 232 goto out_unlock; 233 234 ret = kiocb_modified(iocb); 235 if (ret) 236 goto out_unlock; 237 238 /* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */ 239 if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) || 240 (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize))) 241 flags |= IOMAP_DIO_FORCE_WAIT; 242 243 ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops, 244 flags, NULL, 0); 245 246 /* ENOTBLK is magic return value for fallback to buffered-io */ 247 if (ret == -ENOTBLK) 248 ret = 0; 249 250 if (ret < 0 && ret != -EIOCBQUEUED) 251 ext2_write_failed(inode->i_mapping, offset + count); 252 253 /* handle case for partial write and for fallback to buffered write */ 254 if (ret >= 0 && iov_iter_count(from)) { 255 loff_t pos, endbyte; 256 int ret2; 257 258 iocb->ki_flags &= ~IOCB_DIRECT; 259 pos = iocb->ki_pos; 260 status = generic_perform_write(iocb, from); 261 if (unlikely(status < 0)) { 262 ret = status; 263 goto out_unlock; 264 } 265 266 ret += status; 267 endbyte = pos + status - 1; 268 ret2 = filemap_write_and_wait_range(inode->i_mapping, pos, 269 endbyte); 270 if (!ret2) 271 invalidate_mapping_pages(inode->i_mapping, 272 pos >> PAGE_SHIFT, 273 endbyte >> PAGE_SHIFT); 274 if (ret > 0) 275 generic_write_sync(iocb, ret); 276 } 277 278 out_unlock: 279 inode_unlock(inode); 280 if (status) 281 trace_ext2_dio_write_buff_end(iocb, from, status); 282 trace_ext2_dio_write_end(iocb, from, ret); 283 return ret; 284 } 285 286 static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 287 { 288 #ifdef CONFIG_FS_DAX 289 if (IS_DAX(iocb->ki_filp->f_mapping->host)) 290 return ext2_dax_read_iter(iocb, to); 291 #endif 292 if (iocb->ki_flags & IOCB_DIRECT) 293 return ext2_dio_read_iter(iocb, to); 294 295 return generic_file_read_iter(iocb, to); 296 } 297 298 static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 299 { 300 #ifdef CONFIG_FS_DAX 301 if (IS_DAX(iocb->ki_filp->f_mapping->host)) 302 return ext2_dax_write_iter(iocb, from); 303 #endif 304 if (iocb->ki_flags & IOCB_DIRECT) 305 return ext2_dio_write_iter(iocb, from); 306 307 return generic_file_write_iter(iocb, from); 308 } 309 310 static int ext2_file_open(struct inode *inode, struct file *filp) 311 { 312 filp->f_mode |= FMODE_CAN_ODIRECT; 313 return dquot_file_open(inode, filp); 314 } 315 316 const struct file_operations ext2_file_operations = { 317 .llseek = generic_file_llseek, 318 .read_iter = ext2_file_read_iter, 319 .write_iter = ext2_file_write_iter, 320 .unlocked_ioctl = ext2_ioctl, 321 #ifdef CONFIG_COMPAT 322 .compat_ioctl = ext2_compat_ioctl, 323 #endif 324 .mmap_prepare = ext2_file_mmap_prepare, 325 .open = ext2_file_open, 326 .release = ext2_release_file, 327 .fsync = ext2_fsync, 328 .get_unmapped_area = thp_get_unmapped_area, 329 .splice_read = filemap_splice_read, 330 .splice_write = iter_file_splice_write, 331 .setlease = generic_setlease, 332 }; 333 334 const struct inode_operations ext2_file_inode_operations = { 335 .listxattr = ext2_listxattr, 336 .getattr = ext2_getattr, 337 .setattr = ext2_setattr, 338 .get_inode_acl = ext2_get_acl, 339 .set_acl = ext2_set_acl, 340 .fiemap = ext2_fiemap, 341 .fileattr_get = ext2_fileattr_get, 342 .fileattr_set = ext2_fileattr_set, 343 }; 344