1*1e9ea7e0SNamjae Jeon // SPDX-License-Identifier: GPL-2.0-or-later 2*1e9ea7e0SNamjae Jeon /* 3*1e9ea7e0SNamjae Jeon * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. 4*1e9ea7e0SNamjae Jeon * 5*1e9ea7e0SNamjae Jeon * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc. 6*1e9ea7e0SNamjae Jeon */ 7*1e9ea7e0SNamjae Jeon 8*1e9ea7e0SNamjae Jeon #include <linux/blkdev.h> 9*1e9ea7e0SNamjae Jeon #include <linux/backing-dev.h> 10*1e9ea7e0SNamjae Jeon #include <linux/buffer_head.h> 11*1e9ea7e0SNamjae Jeon #include <linux/gfp.h> 12*1e9ea7e0SNamjae Jeon #include <linux/pagemap.h> 13*1e9ea7e0SNamjae Jeon #include <linux/pagevec.h> 14*1e9ea7e0SNamjae Jeon #include <linux/sched/signal.h> 15*1e9ea7e0SNamjae Jeon #include <linux/swap.h> 16*1e9ea7e0SNamjae Jeon #include <linux/uio.h> 17*1e9ea7e0SNamjae Jeon #include <linux/writeback.h> 18*1e9ea7e0SNamjae Jeon 19*1e9ea7e0SNamjae Jeon #include <asm/page.h> 20*1e9ea7e0SNamjae Jeon #include <linux/uaccess.h> 21*1e9ea7e0SNamjae Jeon 22*1e9ea7e0SNamjae Jeon #include "attrib.h" 23*1e9ea7e0SNamjae Jeon #include "bitmap.h" 24*1e9ea7e0SNamjae Jeon #include "inode.h" 25*1e9ea7e0SNamjae Jeon #include "debug.h" 26*1e9ea7e0SNamjae Jeon #include "lcnalloc.h" 27*1e9ea7e0SNamjae Jeon #include "malloc.h" 28*1e9ea7e0SNamjae Jeon #include "mft.h" 29*1e9ea7e0SNamjae Jeon #include "ntfs.h" 30*1e9ea7e0SNamjae Jeon 31*1e9ea7e0SNamjae Jeon /** 32*1e9ea7e0SNamjae Jeon * ntfs_file_open - called when an inode is about to be opened 33*1e9ea7e0SNamjae Jeon * @vi: inode to be opened 34*1e9ea7e0SNamjae Jeon * @filp: file structure describing the inode 35*1e9ea7e0SNamjae Jeon * 36*1e9ea7e0SNamjae Jeon * Limit file size to the page cache limit on architectures where unsigned long 37*1e9ea7e0SNamjae Jeon * is 32-bits. This is the most we can do for now without overflowing the page 38*1e9ea7e0SNamjae Jeon * cache page index. Doing it this way means we don't run into problems because 39*1e9ea7e0SNamjae Jeon * of existing too large files. It would be better to allow the user to read 40*1e9ea7e0SNamjae Jeon * the beginning of the file but I doubt very much anyone is going to hit this 41*1e9ea7e0SNamjae Jeon * check on a 32-bit architecture, so there is no point in adding the extra 42*1e9ea7e0SNamjae Jeon * complexity required to support this. 43*1e9ea7e0SNamjae Jeon * 44*1e9ea7e0SNamjae Jeon * On 64-bit architectures, the check is hopefully optimized away by the 45*1e9ea7e0SNamjae Jeon * compiler. 46*1e9ea7e0SNamjae Jeon * 47*1e9ea7e0SNamjae Jeon * After the check passes, just call generic_file_open() to do its work. 48*1e9ea7e0SNamjae Jeon */ 49*1e9ea7e0SNamjae Jeon static int ntfs_file_open(struct inode *vi, struct file *filp) 50*1e9ea7e0SNamjae Jeon { 51*1e9ea7e0SNamjae Jeon if (sizeof(unsigned long) < 8) { 52*1e9ea7e0SNamjae Jeon if (i_size_read(vi) > MAX_LFS_FILESIZE) 53*1e9ea7e0SNamjae Jeon return -EOVERFLOW; 54*1e9ea7e0SNamjae Jeon } 55*1e9ea7e0SNamjae Jeon return generic_file_open(vi, filp); 56*1e9ea7e0SNamjae Jeon } 57*1e9ea7e0SNamjae Jeon 58*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 59*1e9ea7e0SNamjae Jeon 60*1e9ea7e0SNamjae Jeon /** 61*1e9ea7e0SNamjae Jeon * ntfs_attr_extend_initialized - extend the initialized size of an attribute 62*1e9ea7e0SNamjae Jeon * @ni: ntfs inode of the attribute to extend 63*1e9ea7e0SNamjae Jeon * @new_init_size: requested new initialized size in bytes 64*1e9ea7e0SNamjae Jeon * 65*1e9ea7e0SNamjae Jeon * Extend the initialized size of an attribute described by the ntfs inode @ni 66*1e9ea7e0SNamjae Jeon * to @new_init_size bytes. This involves zeroing any non-sparse space between 67*1e9ea7e0SNamjae Jeon * the old initialized size and @new_init_size both in the page cache and on 68*1e9ea7e0SNamjae Jeon * disk (if relevant complete pages are already uptodate in the page cache then 69*1e9ea7e0SNamjae Jeon * these are simply marked dirty). 70*1e9ea7e0SNamjae Jeon * 71*1e9ea7e0SNamjae Jeon * As a side-effect, the file size (vfs inode->i_size) may be incremented as, 72*1e9ea7e0SNamjae Jeon * in the resident attribute case, it is tied to the initialized size and, in 73*1e9ea7e0SNamjae Jeon * the non-resident attribute case, it may not fall below the initialized size. 74*1e9ea7e0SNamjae Jeon * 75*1e9ea7e0SNamjae Jeon * Note that if the attribute is resident, we do not need to touch the page 76*1e9ea7e0SNamjae Jeon * cache at all. This is because if the page cache page is not uptodate we 77*1e9ea7e0SNamjae Jeon * bring it uptodate later, when doing the write to the mft record since we 78*1e9ea7e0SNamjae Jeon * then already have the page mapped. And if the page is uptodate, the 79*1e9ea7e0SNamjae Jeon * non-initialized region will already have been zeroed when the page was 80*1e9ea7e0SNamjae Jeon * brought uptodate and the region may in fact already have been overwritten 81*1e9ea7e0SNamjae Jeon * with new data via mmap() based writes, so we cannot just zero it. And since 82*1e9ea7e0SNamjae Jeon * POSIX specifies that the behaviour of resizing a file whilst it is mmap()ped 83*1e9ea7e0SNamjae Jeon * is unspecified, we choose not to do zeroing and thus we do not need to touch 84*1e9ea7e0SNamjae Jeon * the page at all. For a more detailed explanation see ntfs_truncate() in 85*1e9ea7e0SNamjae Jeon * fs/ntfs/inode.c. 86*1e9ea7e0SNamjae Jeon * 87*1e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. In the case that an error is 88*1e9ea7e0SNamjae Jeon * encountered it is possible that the initialized size will already have been 89*1e9ea7e0SNamjae Jeon * incremented some way towards @new_init_size but it is guaranteed that if 90*1e9ea7e0SNamjae Jeon * this is the case, the necessary zeroing will also have happened and that all 91*1e9ea7e0SNamjae Jeon * metadata is self-consistent. 92*1e9ea7e0SNamjae Jeon * 93*1e9ea7e0SNamjae Jeon * Locking: i_mutex on the vfs inode corrseponsind to the ntfs inode @ni must be 94*1e9ea7e0SNamjae Jeon * held by the caller. 95*1e9ea7e0SNamjae Jeon */ 96*1e9ea7e0SNamjae Jeon static int ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size) 97*1e9ea7e0SNamjae Jeon { 98*1e9ea7e0SNamjae Jeon s64 old_init_size; 99*1e9ea7e0SNamjae Jeon loff_t old_i_size; 100*1e9ea7e0SNamjae Jeon pgoff_t index, end_index; 101*1e9ea7e0SNamjae Jeon unsigned long flags; 102*1e9ea7e0SNamjae Jeon struct inode *vi = VFS_I(ni); 103*1e9ea7e0SNamjae Jeon ntfs_inode *base_ni; 104*1e9ea7e0SNamjae Jeon MFT_RECORD *m = NULL; 105*1e9ea7e0SNamjae Jeon ATTR_RECORD *a; 106*1e9ea7e0SNamjae Jeon ntfs_attr_search_ctx *ctx = NULL; 107*1e9ea7e0SNamjae Jeon struct address_space *mapping; 108*1e9ea7e0SNamjae Jeon struct page *page = NULL; 109*1e9ea7e0SNamjae Jeon u8 *kattr; 110*1e9ea7e0SNamjae Jeon int err; 111*1e9ea7e0SNamjae Jeon u32 attr_len; 112*1e9ea7e0SNamjae Jeon 113*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 114*1e9ea7e0SNamjae Jeon old_init_size = ni->initialized_size; 115*1e9ea7e0SNamjae Jeon old_i_size = i_size_read(vi); 116*1e9ea7e0SNamjae Jeon BUG_ON(new_init_size > ni->allocated_size); 117*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 118*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, " 119*1e9ea7e0SNamjae Jeon "old_initialized_size 0x%llx, " 120*1e9ea7e0SNamjae Jeon "new_initialized_size 0x%llx, i_size 0x%llx.", 121*1e9ea7e0SNamjae Jeon vi->i_ino, (unsigned)le32_to_cpu(ni->type), 122*1e9ea7e0SNamjae Jeon (unsigned long long)old_init_size, 123*1e9ea7e0SNamjae Jeon (unsigned long long)new_init_size, old_i_size); 124*1e9ea7e0SNamjae Jeon if (!NInoAttr(ni)) 125*1e9ea7e0SNamjae Jeon base_ni = ni; 126*1e9ea7e0SNamjae Jeon else 127*1e9ea7e0SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 128*1e9ea7e0SNamjae Jeon /* Use goto to reduce indentation and we need the label below anyway. */ 129*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni)) 130*1e9ea7e0SNamjae Jeon goto do_non_resident_extend; 131*1e9ea7e0SNamjae Jeon BUG_ON(old_init_size != old_i_size); 132*1e9ea7e0SNamjae Jeon m = map_mft_record(base_ni); 133*1e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 134*1e9ea7e0SNamjae Jeon err = PTR_ERR(m); 135*1e9ea7e0SNamjae Jeon m = NULL; 136*1e9ea7e0SNamjae Jeon goto err_out; 137*1e9ea7e0SNamjae Jeon } 138*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, m); 139*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 140*1e9ea7e0SNamjae Jeon err = -ENOMEM; 141*1e9ea7e0SNamjae Jeon goto err_out; 142*1e9ea7e0SNamjae Jeon } 143*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 144*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 145*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 146*1e9ea7e0SNamjae Jeon if (err == -ENOENT) 147*1e9ea7e0SNamjae Jeon err = -EIO; 148*1e9ea7e0SNamjae Jeon goto err_out; 149*1e9ea7e0SNamjae Jeon } 150*1e9ea7e0SNamjae Jeon m = ctx->mrec; 151*1e9ea7e0SNamjae Jeon a = ctx->attr; 152*1e9ea7e0SNamjae Jeon BUG_ON(a->non_resident); 153*1e9ea7e0SNamjae Jeon /* The total length of the attribute value. */ 154*1e9ea7e0SNamjae Jeon attr_len = le32_to_cpu(a->data.resident.value_length); 155*1e9ea7e0SNamjae Jeon BUG_ON(old_i_size != (loff_t)attr_len); 156*1e9ea7e0SNamjae Jeon /* 157*1e9ea7e0SNamjae Jeon * Do the zeroing in the mft record and update the attribute size in 158*1e9ea7e0SNamjae Jeon * the mft record. 159*1e9ea7e0SNamjae Jeon */ 160*1e9ea7e0SNamjae Jeon kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 161*1e9ea7e0SNamjae Jeon memset(kattr + attr_len, 0, new_init_size - attr_len); 162*1e9ea7e0SNamjae Jeon a->data.resident.value_length = cpu_to_le32((u32)new_init_size); 163*1e9ea7e0SNamjae Jeon /* Finally, update the sizes in the vfs and ntfs inodes. */ 164*1e9ea7e0SNamjae Jeon write_lock_irqsave(&ni->size_lock, flags); 165*1e9ea7e0SNamjae Jeon i_size_write(vi, new_init_size); 166*1e9ea7e0SNamjae Jeon ni->initialized_size = new_init_size; 167*1e9ea7e0SNamjae Jeon write_unlock_irqrestore(&ni->size_lock, flags); 168*1e9ea7e0SNamjae Jeon goto done; 169*1e9ea7e0SNamjae Jeon do_non_resident_extend: 170*1e9ea7e0SNamjae Jeon /* 171*1e9ea7e0SNamjae Jeon * If the new initialized size @new_init_size exceeds the current file 172*1e9ea7e0SNamjae Jeon * size (vfs inode->i_size), we need to extend the file size to the 173*1e9ea7e0SNamjae Jeon * new initialized size. 174*1e9ea7e0SNamjae Jeon */ 175*1e9ea7e0SNamjae Jeon if (new_init_size > old_i_size) { 176*1e9ea7e0SNamjae Jeon m = map_mft_record(base_ni); 177*1e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 178*1e9ea7e0SNamjae Jeon err = PTR_ERR(m); 179*1e9ea7e0SNamjae Jeon m = NULL; 180*1e9ea7e0SNamjae Jeon goto err_out; 181*1e9ea7e0SNamjae Jeon } 182*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, m); 183*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 184*1e9ea7e0SNamjae Jeon err = -ENOMEM; 185*1e9ea7e0SNamjae Jeon goto err_out; 186*1e9ea7e0SNamjae Jeon } 187*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 188*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 189*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 190*1e9ea7e0SNamjae Jeon if (err == -ENOENT) 191*1e9ea7e0SNamjae Jeon err = -EIO; 192*1e9ea7e0SNamjae Jeon goto err_out; 193*1e9ea7e0SNamjae Jeon } 194*1e9ea7e0SNamjae Jeon m = ctx->mrec; 195*1e9ea7e0SNamjae Jeon a = ctx->attr; 196*1e9ea7e0SNamjae Jeon BUG_ON(!a->non_resident); 197*1e9ea7e0SNamjae Jeon BUG_ON(old_i_size != (loff_t) 198*1e9ea7e0SNamjae Jeon sle64_to_cpu(a->data.non_resident.data_size)); 199*1e9ea7e0SNamjae Jeon a->data.non_resident.data_size = cpu_to_sle64(new_init_size); 200*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 201*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 202*1e9ea7e0SNamjae Jeon /* Update the file size in the vfs inode. */ 203*1e9ea7e0SNamjae Jeon i_size_write(vi, new_init_size); 204*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 205*1e9ea7e0SNamjae Jeon ctx = NULL; 206*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 207*1e9ea7e0SNamjae Jeon m = NULL; 208*1e9ea7e0SNamjae Jeon } 209*1e9ea7e0SNamjae Jeon mapping = vi->i_mapping; 210*1e9ea7e0SNamjae Jeon index = old_init_size >> PAGE_SHIFT; 211*1e9ea7e0SNamjae Jeon end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT; 212*1e9ea7e0SNamjae Jeon do { 213*1e9ea7e0SNamjae Jeon /* 214*1e9ea7e0SNamjae Jeon * Read the page. If the page is not present, this will zero 215*1e9ea7e0SNamjae Jeon * the uninitialized regions for us. 216*1e9ea7e0SNamjae Jeon */ 217*1e9ea7e0SNamjae Jeon page = read_mapping_page(mapping, index, NULL); 218*1e9ea7e0SNamjae Jeon if (IS_ERR(page)) { 219*1e9ea7e0SNamjae Jeon err = PTR_ERR(page); 220*1e9ea7e0SNamjae Jeon goto init_err_out; 221*1e9ea7e0SNamjae Jeon } 222*1e9ea7e0SNamjae Jeon /* 223*1e9ea7e0SNamjae Jeon * Update the initialized size in the ntfs inode. This is 224*1e9ea7e0SNamjae Jeon * enough to make ntfs_writepage() work. 225*1e9ea7e0SNamjae Jeon */ 226*1e9ea7e0SNamjae Jeon write_lock_irqsave(&ni->size_lock, flags); 227*1e9ea7e0SNamjae Jeon ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT; 228*1e9ea7e0SNamjae Jeon if (ni->initialized_size > new_init_size) 229*1e9ea7e0SNamjae Jeon ni->initialized_size = new_init_size; 230*1e9ea7e0SNamjae Jeon write_unlock_irqrestore(&ni->size_lock, flags); 231*1e9ea7e0SNamjae Jeon /* Set the page dirty so it gets written out. */ 232*1e9ea7e0SNamjae Jeon set_page_dirty(page); 233*1e9ea7e0SNamjae Jeon put_page(page); 234*1e9ea7e0SNamjae Jeon /* 235*1e9ea7e0SNamjae Jeon * Play nice with the vm and the rest of the system. This is 236*1e9ea7e0SNamjae Jeon * very much needed as we can potentially be modifying the 237*1e9ea7e0SNamjae Jeon * initialised size from a very small value to a really huge 238*1e9ea7e0SNamjae Jeon * value, e.g. 239*1e9ea7e0SNamjae Jeon * f = open(somefile, O_TRUNC); 240*1e9ea7e0SNamjae Jeon * truncate(f, 10GiB); 241*1e9ea7e0SNamjae Jeon * seek(f, 10GiB); 242*1e9ea7e0SNamjae Jeon * write(f, 1); 243*1e9ea7e0SNamjae Jeon * And this would mean we would be marking dirty hundreds of 244*1e9ea7e0SNamjae Jeon * thousands of pages or as in the above example more than 245*1e9ea7e0SNamjae Jeon * two and a half million pages! 246*1e9ea7e0SNamjae Jeon * 247*1e9ea7e0SNamjae Jeon * TODO: For sparse pages could optimize this workload by using 248*1e9ea7e0SNamjae Jeon * the FsMisc / MiscFs page bit as a "PageIsSparse" bit. This 249*1e9ea7e0SNamjae Jeon * would be set in read_folio for sparse pages and here we would 250*1e9ea7e0SNamjae Jeon * not need to mark dirty any pages which have this bit set. 251*1e9ea7e0SNamjae Jeon * The only caveat is that we have to clear the bit everywhere 252*1e9ea7e0SNamjae Jeon * where we allocate any clusters that lie in the page or that 253*1e9ea7e0SNamjae Jeon * contain the page. 254*1e9ea7e0SNamjae Jeon * 255*1e9ea7e0SNamjae Jeon * TODO: An even greater optimization would be for us to only 256*1e9ea7e0SNamjae Jeon * call read_folio() on pages which are not in sparse regions as 257*1e9ea7e0SNamjae Jeon * determined from the runlist. This would greatly reduce the 258*1e9ea7e0SNamjae Jeon * number of pages we read and make dirty in the case of sparse 259*1e9ea7e0SNamjae Jeon * files. 260*1e9ea7e0SNamjae Jeon */ 261*1e9ea7e0SNamjae Jeon balance_dirty_pages_ratelimited(mapping); 262*1e9ea7e0SNamjae Jeon cond_resched(); 263*1e9ea7e0SNamjae Jeon } while (++index < end_index); 264*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 265*1e9ea7e0SNamjae Jeon BUG_ON(ni->initialized_size != new_init_size); 266*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 267*1e9ea7e0SNamjae Jeon /* Now bring in sync the initialized_size in the mft record. */ 268*1e9ea7e0SNamjae Jeon m = map_mft_record(base_ni); 269*1e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 270*1e9ea7e0SNamjae Jeon err = PTR_ERR(m); 271*1e9ea7e0SNamjae Jeon m = NULL; 272*1e9ea7e0SNamjae Jeon goto init_err_out; 273*1e9ea7e0SNamjae Jeon } 274*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, m); 275*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 276*1e9ea7e0SNamjae Jeon err = -ENOMEM; 277*1e9ea7e0SNamjae Jeon goto init_err_out; 278*1e9ea7e0SNamjae Jeon } 279*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 280*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 281*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 282*1e9ea7e0SNamjae Jeon if (err == -ENOENT) 283*1e9ea7e0SNamjae Jeon err = -EIO; 284*1e9ea7e0SNamjae Jeon goto init_err_out; 285*1e9ea7e0SNamjae Jeon } 286*1e9ea7e0SNamjae Jeon m = ctx->mrec; 287*1e9ea7e0SNamjae Jeon a = ctx->attr; 288*1e9ea7e0SNamjae Jeon BUG_ON(!a->non_resident); 289*1e9ea7e0SNamjae Jeon a->data.non_resident.initialized_size = cpu_to_sle64(new_init_size); 290*1e9ea7e0SNamjae Jeon done: 291*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 292*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 293*1e9ea7e0SNamjae Jeon if (ctx) 294*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 295*1e9ea7e0SNamjae Jeon if (m) 296*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 297*1e9ea7e0SNamjae Jeon ntfs_debug("Done, initialized_size 0x%llx, i_size 0x%llx.", 298*1e9ea7e0SNamjae Jeon (unsigned long long)new_init_size, i_size_read(vi)); 299*1e9ea7e0SNamjae Jeon return 0; 300*1e9ea7e0SNamjae Jeon init_err_out: 301*1e9ea7e0SNamjae Jeon write_lock_irqsave(&ni->size_lock, flags); 302*1e9ea7e0SNamjae Jeon ni->initialized_size = old_init_size; 303*1e9ea7e0SNamjae Jeon write_unlock_irqrestore(&ni->size_lock, flags); 304*1e9ea7e0SNamjae Jeon err_out: 305*1e9ea7e0SNamjae Jeon if (ctx) 306*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 307*1e9ea7e0SNamjae Jeon if (m) 308*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 309*1e9ea7e0SNamjae Jeon ntfs_debug("Failed. Returning error code %i.", err); 310*1e9ea7e0SNamjae Jeon return err; 311*1e9ea7e0SNamjae Jeon } 312*1e9ea7e0SNamjae Jeon 313*1e9ea7e0SNamjae Jeon static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, 314*1e9ea7e0SNamjae Jeon struct iov_iter *from) 315*1e9ea7e0SNamjae Jeon { 316*1e9ea7e0SNamjae Jeon loff_t pos; 317*1e9ea7e0SNamjae Jeon s64 end, ll; 318*1e9ea7e0SNamjae Jeon ssize_t err; 319*1e9ea7e0SNamjae Jeon unsigned long flags; 320*1e9ea7e0SNamjae Jeon struct file *file = iocb->ki_filp; 321*1e9ea7e0SNamjae Jeon struct inode *vi = file_inode(file); 322*1e9ea7e0SNamjae Jeon ntfs_inode *ni = NTFS_I(vi); 323*1e9ea7e0SNamjae Jeon ntfs_volume *vol = ni->vol; 324*1e9ea7e0SNamjae Jeon 325*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " 326*1e9ea7e0SNamjae Jeon "0x%llx, count 0x%zx.", vi->i_ino, 327*1e9ea7e0SNamjae Jeon (unsigned)le32_to_cpu(ni->type), 328*1e9ea7e0SNamjae Jeon (unsigned long long)iocb->ki_pos, 329*1e9ea7e0SNamjae Jeon iov_iter_count(from)); 330*1e9ea7e0SNamjae Jeon err = generic_write_checks(iocb, from); 331*1e9ea7e0SNamjae Jeon if (unlikely(err <= 0)) 332*1e9ea7e0SNamjae Jeon goto out; 333*1e9ea7e0SNamjae Jeon /* 334*1e9ea7e0SNamjae Jeon * All checks have passed. Before we start doing any writing we want 335*1e9ea7e0SNamjae Jeon * to abort any totally illegal writes. 336*1e9ea7e0SNamjae Jeon */ 337*1e9ea7e0SNamjae Jeon BUG_ON(NInoMstProtected(ni)); 338*1e9ea7e0SNamjae Jeon BUG_ON(ni->type != AT_DATA); 339*1e9ea7e0SNamjae Jeon /* If file is encrypted, deny access, just like NT4. */ 340*1e9ea7e0SNamjae Jeon if (NInoEncrypted(ni)) { 341*1e9ea7e0SNamjae Jeon /* Only $DATA attributes can be encrypted. */ 342*1e9ea7e0SNamjae Jeon /* 343*1e9ea7e0SNamjae Jeon * Reminder for later: Encrypted files are _always_ 344*1e9ea7e0SNamjae Jeon * non-resident so that the content can always be encrypted. 345*1e9ea7e0SNamjae Jeon */ 346*1e9ea7e0SNamjae Jeon ntfs_debug("Denying write access to encrypted file."); 347*1e9ea7e0SNamjae Jeon err = -EACCES; 348*1e9ea7e0SNamjae Jeon goto out; 349*1e9ea7e0SNamjae Jeon } 350*1e9ea7e0SNamjae Jeon if (NInoCompressed(ni)) { 351*1e9ea7e0SNamjae Jeon /* Only unnamed $DATA attribute can be compressed. */ 352*1e9ea7e0SNamjae Jeon BUG_ON(ni->name_len); 353*1e9ea7e0SNamjae Jeon /* 354*1e9ea7e0SNamjae Jeon * Reminder for later: If resident, the data is not actually 355*1e9ea7e0SNamjae Jeon * compressed. Only on the switch to non-resident does 356*1e9ea7e0SNamjae Jeon * compression kick in. This is in contrast to encrypted files 357*1e9ea7e0SNamjae Jeon * (see above). 358*1e9ea7e0SNamjae Jeon */ 359*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Writing to compressed files is not " 360*1e9ea7e0SNamjae Jeon "implemented yet. Sorry."); 361*1e9ea7e0SNamjae Jeon err = -EOPNOTSUPP; 362*1e9ea7e0SNamjae Jeon goto out; 363*1e9ea7e0SNamjae Jeon } 364*1e9ea7e0SNamjae Jeon err = file_remove_privs(file); 365*1e9ea7e0SNamjae Jeon if (unlikely(err)) 366*1e9ea7e0SNamjae Jeon goto out; 367*1e9ea7e0SNamjae Jeon /* 368*1e9ea7e0SNamjae Jeon * Our ->update_time method always succeeds thus file_update_time() 369*1e9ea7e0SNamjae Jeon * cannot fail either so there is no need to check the return code. 370*1e9ea7e0SNamjae Jeon */ 371*1e9ea7e0SNamjae Jeon file_update_time(file); 372*1e9ea7e0SNamjae Jeon pos = iocb->ki_pos; 373*1e9ea7e0SNamjae Jeon /* The first byte after the last cluster being written to. */ 374*1e9ea7e0SNamjae Jeon end = (pos + iov_iter_count(from) + vol->cluster_size_mask) & 375*1e9ea7e0SNamjae Jeon ~(u64)vol->cluster_size_mask; 376*1e9ea7e0SNamjae Jeon /* 377*1e9ea7e0SNamjae Jeon * If the write goes beyond the allocated size, extend the allocation 378*1e9ea7e0SNamjae Jeon * to cover the whole of the write, rounded up to the nearest cluster. 379*1e9ea7e0SNamjae Jeon */ 380*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 381*1e9ea7e0SNamjae Jeon ll = ni->allocated_size; 382*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 383*1e9ea7e0SNamjae Jeon if (end > ll) { 384*1e9ea7e0SNamjae Jeon /* 385*1e9ea7e0SNamjae Jeon * Extend the allocation without changing the data size. 386*1e9ea7e0SNamjae Jeon * 387*1e9ea7e0SNamjae Jeon * Note we ensure the allocation is big enough to at least 388*1e9ea7e0SNamjae Jeon * write some data but we do not require the allocation to be 389*1e9ea7e0SNamjae Jeon * complete, i.e. it may be partial. 390*1e9ea7e0SNamjae Jeon */ 391*1e9ea7e0SNamjae Jeon ll = ntfs_attr_extend_allocation(ni, end, -1, pos); 392*1e9ea7e0SNamjae Jeon if (likely(ll >= 0)) { 393*1e9ea7e0SNamjae Jeon BUG_ON(pos >= ll); 394*1e9ea7e0SNamjae Jeon /* If the extension was partial truncate the write. */ 395*1e9ea7e0SNamjae Jeon if (end > ll) { 396*1e9ea7e0SNamjae Jeon ntfs_debug("Truncating write to inode 0x%lx, " 397*1e9ea7e0SNamjae Jeon "attribute type 0x%x, because " 398*1e9ea7e0SNamjae Jeon "the allocation was only " 399*1e9ea7e0SNamjae Jeon "partially extended.", 400*1e9ea7e0SNamjae Jeon vi->i_ino, (unsigned) 401*1e9ea7e0SNamjae Jeon le32_to_cpu(ni->type)); 402*1e9ea7e0SNamjae Jeon iov_iter_truncate(from, ll - pos); 403*1e9ea7e0SNamjae Jeon } 404*1e9ea7e0SNamjae Jeon } else { 405*1e9ea7e0SNamjae Jeon err = ll; 406*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 407*1e9ea7e0SNamjae Jeon ll = ni->allocated_size; 408*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 409*1e9ea7e0SNamjae Jeon /* Perform a partial write if possible or fail. */ 410*1e9ea7e0SNamjae Jeon if (pos < ll) { 411*1e9ea7e0SNamjae Jeon ntfs_debug("Truncating write to inode 0x%lx " 412*1e9ea7e0SNamjae Jeon "attribute type 0x%x, because " 413*1e9ea7e0SNamjae Jeon "extending the allocation " 414*1e9ea7e0SNamjae Jeon "failed (error %d).", 415*1e9ea7e0SNamjae Jeon vi->i_ino, (unsigned) 416*1e9ea7e0SNamjae Jeon le32_to_cpu(ni->type), 417*1e9ea7e0SNamjae Jeon (int)-err); 418*1e9ea7e0SNamjae Jeon iov_iter_truncate(from, ll - pos); 419*1e9ea7e0SNamjae Jeon } else { 420*1e9ea7e0SNamjae Jeon if (err != -ENOSPC) 421*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Cannot perform " 422*1e9ea7e0SNamjae Jeon "write to inode " 423*1e9ea7e0SNamjae Jeon "0x%lx, attribute " 424*1e9ea7e0SNamjae Jeon "type 0x%x, because " 425*1e9ea7e0SNamjae Jeon "extending the " 426*1e9ea7e0SNamjae Jeon "allocation failed " 427*1e9ea7e0SNamjae Jeon "(error %ld).", 428*1e9ea7e0SNamjae Jeon vi->i_ino, (unsigned) 429*1e9ea7e0SNamjae Jeon le32_to_cpu(ni->type), 430*1e9ea7e0SNamjae Jeon (long)-err); 431*1e9ea7e0SNamjae Jeon else 432*1e9ea7e0SNamjae Jeon ntfs_debug("Cannot perform write to " 433*1e9ea7e0SNamjae Jeon "inode 0x%lx, " 434*1e9ea7e0SNamjae Jeon "attribute type 0x%x, " 435*1e9ea7e0SNamjae Jeon "because there is not " 436*1e9ea7e0SNamjae Jeon "space left.", 437*1e9ea7e0SNamjae Jeon vi->i_ino, (unsigned) 438*1e9ea7e0SNamjae Jeon le32_to_cpu(ni->type)); 439*1e9ea7e0SNamjae Jeon goto out; 440*1e9ea7e0SNamjae Jeon } 441*1e9ea7e0SNamjae Jeon } 442*1e9ea7e0SNamjae Jeon } 443*1e9ea7e0SNamjae Jeon /* 444*1e9ea7e0SNamjae Jeon * If the write starts beyond the initialized size, extend it up to the 445*1e9ea7e0SNamjae Jeon * beginning of the write and initialize all non-sparse space between 446*1e9ea7e0SNamjae Jeon * the old initialized size and the new one. This automatically also 447*1e9ea7e0SNamjae Jeon * increments the vfs inode->i_size to keep it above or equal to the 448*1e9ea7e0SNamjae Jeon * initialized_size. 449*1e9ea7e0SNamjae Jeon */ 450*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 451*1e9ea7e0SNamjae Jeon ll = ni->initialized_size; 452*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 453*1e9ea7e0SNamjae Jeon if (pos > ll) { 454*1e9ea7e0SNamjae Jeon /* 455*1e9ea7e0SNamjae Jeon * Wait for ongoing direct i/o to complete before proceeding. 456*1e9ea7e0SNamjae Jeon * New direct i/o cannot start as we hold i_mutex. 457*1e9ea7e0SNamjae Jeon */ 458*1e9ea7e0SNamjae Jeon inode_dio_wait(vi); 459*1e9ea7e0SNamjae Jeon err = ntfs_attr_extend_initialized(ni, pos); 460*1e9ea7e0SNamjae Jeon if (unlikely(err < 0)) 461*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Cannot perform write to inode " 462*1e9ea7e0SNamjae Jeon "0x%lx, attribute type 0x%x, because " 463*1e9ea7e0SNamjae Jeon "extending the initialized size " 464*1e9ea7e0SNamjae Jeon "failed (error %d).", vi->i_ino, 465*1e9ea7e0SNamjae Jeon (unsigned)le32_to_cpu(ni->type), 466*1e9ea7e0SNamjae Jeon (int)-err); 467*1e9ea7e0SNamjae Jeon } 468*1e9ea7e0SNamjae Jeon out: 469*1e9ea7e0SNamjae Jeon return err; 470*1e9ea7e0SNamjae Jeon } 471*1e9ea7e0SNamjae Jeon 472*1e9ea7e0SNamjae Jeon /** 473*1e9ea7e0SNamjae Jeon * __ntfs_grab_cache_pages - obtain a number of locked pages 474*1e9ea7e0SNamjae Jeon * @mapping: address space mapping from which to obtain page cache pages 475*1e9ea7e0SNamjae Jeon * @index: starting index in @mapping at which to begin obtaining pages 476*1e9ea7e0SNamjae Jeon * @nr_pages: number of page cache pages to obtain 477*1e9ea7e0SNamjae Jeon * @pages: array of pages in which to return the obtained page cache pages 478*1e9ea7e0SNamjae Jeon * @cached_page: allocated but as yet unused page 479*1e9ea7e0SNamjae Jeon * 480*1e9ea7e0SNamjae Jeon * Obtain @nr_pages locked page cache pages from the mapping @mapping and 481*1e9ea7e0SNamjae Jeon * starting at index @index. 482*1e9ea7e0SNamjae Jeon * 483*1e9ea7e0SNamjae Jeon * If a page is newly created, add it to lru list 484*1e9ea7e0SNamjae Jeon * 485*1e9ea7e0SNamjae Jeon * Note, the page locks are obtained in ascending page index order. 486*1e9ea7e0SNamjae Jeon */ 487*1e9ea7e0SNamjae Jeon static inline int __ntfs_grab_cache_pages(struct address_space *mapping, 488*1e9ea7e0SNamjae Jeon pgoff_t index, const unsigned nr_pages, struct page **pages, 489*1e9ea7e0SNamjae Jeon struct page **cached_page) 490*1e9ea7e0SNamjae Jeon { 491*1e9ea7e0SNamjae Jeon int err, nr; 492*1e9ea7e0SNamjae Jeon 493*1e9ea7e0SNamjae Jeon BUG_ON(!nr_pages); 494*1e9ea7e0SNamjae Jeon err = nr = 0; 495*1e9ea7e0SNamjae Jeon do { 496*1e9ea7e0SNamjae Jeon pages[nr] = find_get_page_flags(mapping, index, FGP_LOCK | 497*1e9ea7e0SNamjae Jeon FGP_ACCESSED); 498*1e9ea7e0SNamjae Jeon if (!pages[nr]) { 499*1e9ea7e0SNamjae Jeon if (!*cached_page) { 500*1e9ea7e0SNamjae Jeon *cached_page = page_cache_alloc(mapping); 501*1e9ea7e0SNamjae Jeon if (unlikely(!*cached_page)) { 502*1e9ea7e0SNamjae Jeon err = -ENOMEM; 503*1e9ea7e0SNamjae Jeon goto err_out; 504*1e9ea7e0SNamjae Jeon } 505*1e9ea7e0SNamjae Jeon } 506*1e9ea7e0SNamjae Jeon err = add_to_page_cache_lru(*cached_page, mapping, 507*1e9ea7e0SNamjae Jeon index, 508*1e9ea7e0SNamjae Jeon mapping_gfp_constraint(mapping, GFP_KERNEL)); 509*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 510*1e9ea7e0SNamjae Jeon if (err == -EEXIST) 511*1e9ea7e0SNamjae Jeon continue; 512*1e9ea7e0SNamjae Jeon goto err_out; 513*1e9ea7e0SNamjae Jeon } 514*1e9ea7e0SNamjae Jeon pages[nr] = *cached_page; 515*1e9ea7e0SNamjae Jeon *cached_page = NULL; 516*1e9ea7e0SNamjae Jeon } 517*1e9ea7e0SNamjae Jeon index++; 518*1e9ea7e0SNamjae Jeon nr++; 519*1e9ea7e0SNamjae Jeon } while (nr < nr_pages); 520*1e9ea7e0SNamjae Jeon out: 521*1e9ea7e0SNamjae Jeon return err; 522*1e9ea7e0SNamjae Jeon err_out: 523*1e9ea7e0SNamjae Jeon while (nr > 0) { 524*1e9ea7e0SNamjae Jeon unlock_page(pages[--nr]); 525*1e9ea7e0SNamjae Jeon put_page(pages[nr]); 526*1e9ea7e0SNamjae Jeon } 527*1e9ea7e0SNamjae Jeon goto out; 528*1e9ea7e0SNamjae Jeon } 529*1e9ea7e0SNamjae Jeon 530*1e9ea7e0SNamjae Jeon static inline void ntfs_submit_bh_for_read(struct buffer_head *bh) 531*1e9ea7e0SNamjae Jeon { 532*1e9ea7e0SNamjae Jeon lock_buffer(bh); 533*1e9ea7e0SNamjae Jeon get_bh(bh); 534*1e9ea7e0SNamjae Jeon bh->b_end_io = end_buffer_read_sync; 535*1e9ea7e0SNamjae Jeon submit_bh(REQ_OP_READ, bh); 536*1e9ea7e0SNamjae Jeon } 537*1e9ea7e0SNamjae Jeon 538*1e9ea7e0SNamjae Jeon /** 539*1e9ea7e0SNamjae Jeon * ntfs_prepare_pages_for_non_resident_write - prepare pages for receiving data 540*1e9ea7e0SNamjae Jeon * @pages: array of destination pages 541*1e9ea7e0SNamjae Jeon * @nr_pages: number of pages in @pages 542*1e9ea7e0SNamjae Jeon * @pos: byte position in file at which the write begins 543*1e9ea7e0SNamjae Jeon * @bytes: number of bytes to be written 544*1e9ea7e0SNamjae Jeon * 545*1e9ea7e0SNamjae Jeon * This is called for non-resident attributes from ntfs_file_buffered_write() 546*1e9ea7e0SNamjae Jeon * with i_mutex held on the inode (@pages[0]->mapping->host). There are 547*1e9ea7e0SNamjae Jeon * @nr_pages pages in @pages which are locked but not kmap()ped. The source 548*1e9ea7e0SNamjae Jeon * data has not yet been copied into the @pages. 549*1e9ea7e0SNamjae Jeon * 550*1e9ea7e0SNamjae Jeon * Need to fill any holes with actual clusters, allocate buffers if necessary, 551*1e9ea7e0SNamjae Jeon * ensure all the buffers are mapped, and bring uptodate any buffers that are 552*1e9ea7e0SNamjae Jeon * only partially being written to. 553*1e9ea7e0SNamjae Jeon * 554*1e9ea7e0SNamjae Jeon * If @nr_pages is greater than one, we are guaranteed that the cluster size is 555*1e9ea7e0SNamjae Jeon * greater than PAGE_SIZE, that all pages in @pages are entirely inside 556*1e9ea7e0SNamjae Jeon * the same cluster and that they are the entirety of that cluster, and that 557*1e9ea7e0SNamjae Jeon * the cluster is sparse, i.e. we need to allocate a cluster to fill the hole. 558*1e9ea7e0SNamjae Jeon * 559*1e9ea7e0SNamjae Jeon * i_size is not to be modified yet. 560*1e9ea7e0SNamjae Jeon * 561*1e9ea7e0SNamjae Jeon * Return 0 on success or -errno on error. 562*1e9ea7e0SNamjae Jeon */ 563*1e9ea7e0SNamjae Jeon static int ntfs_prepare_pages_for_non_resident_write(struct page **pages, 564*1e9ea7e0SNamjae Jeon unsigned nr_pages, s64 pos, size_t bytes) 565*1e9ea7e0SNamjae Jeon { 566*1e9ea7e0SNamjae Jeon VCN vcn, highest_vcn = 0, cpos, cend, bh_cpos, bh_cend; 567*1e9ea7e0SNamjae Jeon LCN lcn; 568*1e9ea7e0SNamjae Jeon s64 bh_pos, vcn_len, end, initialized_size; 569*1e9ea7e0SNamjae Jeon sector_t lcn_block; 570*1e9ea7e0SNamjae Jeon struct folio *folio; 571*1e9ea7e0SNamjae Jeon struct inode *vi; 572*1e9ea7e0SNamjae Jeon ntfs_inode *ni, *base_ni = NULL; 573*1e9ea7e0SNamjae Jeon ntfs_volume *vol; 574*1e9ea7e0SNamjae Jeon runlist_element *rl, *rl2; 575*1e9ea7e0SNamjae Jeon struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; 576*1e9ea7e0SNamjae Jeon ntfs_attr_search_ctx *ctx = NULL; 577*1e9ea7e0SNamjae Jeon MFT_RECORD *m = NULL; 578*1e9ea7e0SNamjae Jeon ATTR_RECORD *a = NULL; 579*1e9ea7e0SNamjae Jeon unsigned long flags; 580*1e9ea7e0SNamjae Jeon u32 attr_rec_len = 0; 581*1e9ea7e0SNamjae Jeon unsigned blocksize, u; 582*1e9ea7e0SNamjae Jeon int err, mp_size; 583*1e9ea7e0SNamjae Jeon bool rl_write_locked, was_hole, is_retry; 584*1e9ea7e0SNamjae Jeon unsigned char blocksize_bits; 585*1e9ea7e0SNamjae Jeon struct { 586*1e9ea7e0SNamjae Jeon u8 runlist_merged:1; 587*1e9ea7e0SNamjae Jeon u8 mft_attr_mapped:1; 588*1e9ea7e0SNamjae Jeon u8 mp_rebuilt:1; 589*1e9ea7e0SNamjae Jeon u8 attr_switched:1; 590*1e9ea7e0SNamjae Jeon } status = { 0, 0, 0, 0 }; 591*1e9ea7e0SNamjae Jeon 592*1e9ea7e0SNamjae Jeon BUG_ON(!nr_pages); 593*1e9ea7e0SNamjae Jeon BUG_ON(!pages); 594*1e9ea7e0SNamjae Jeon BUG_ON(!*pages); 595*1e9ea7e0SNamjae Jeon vi = pages[0]->mapping->host; 596*1e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 597*1e9ea7e0SNamjae Jeon vol = ni->vol; 598*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " 599*1e9ea7e0SNamjae Jeon "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", 600*1e9ea7e0SNamjae Jeon vi->i_ino, ni->type, pages[0]->index, nr_pages, 601*1e9ea7e0SNamjae Jeon (long long)pos, bytes); 602*1e9ea7e0SNamjae Jeon blocksize = vol->sb->s_blocksize; 603*1e9ea7e0SNamjae Jeon blocksize_bits = vol->sb->s_blocksize_bits; 604*1e9ea7e0SNamjae Jeon rl_write_locked = false; 605*1e9ea7e0SNamjae Jeon rl = NULL; 606*1e9ea7e0SNamjae Jeon err = 0; 607*1e9ea7e0SNamjae Jeon vcn = lcn = -1; 608*1e9ea7e0SNamjae Jeon vcn_len = 0; 609*1e9ea7e0SNamjae Jeon lcn_block = -1; 610*1e9ea7e0SNamjae Jeon was_hole = false; 611*1e9ea7e0SNamjae Jeon cpos = pos >> vol->cluster_size_bits; 612*1e9ea7e0SNamjae Jeon end = pos + bytes; 613*1e9ea7e0SNamjae Jeon cend = (end + vol->cluster_size - 1) >> vol->cluster_size_bits; 614*1e9ea7e0SNamjae Jeon /* 615*1e9ea7e0SNamjae Jeon * Loop over each buffer in each folio. Use goto to 616*1e9ea7e0SNamjae Jeon * reduce indentation. 617*1e9ea7e0SNamjae Jeon */ 618*1e9ea7e0SNamjae Jeon u = 0; 619*1e9ea7e0SNamjae Jeon do_next_folio: 620*1e9ea7e0SNamjae Jeon folio = page_folio(pages[u]); 621*1e9ea7e0SNamjae Jeon bh_pos = folio_pos(folio); 622*1e9ea7e0SNamjae Jeon head = folio_buffers(folio); 623*1e9ea7e0SNamjae Jeon if (!head) 624*1e9ea7e0SNamjae Jeon /* 625*1e9ea7e0SNamjae Jeon * create_empty_buffers() will create uptodate/dirty 626*1e9ea7e0SNamjae Jeon * buffers if the folio is uptodate/dirty. 627*1e9ea7e0SNamjae Jeon */ 628*1e9ea7e0SNamjae Jeon head = create_empty_buffers(folio, blocksize, 0); 629*1e9ea7e0SNamjae Jeon bh = head; 630*1e9ea7e0SNamjae Jeon do { 631*1e9ea7e0SNamjae Jeon VCN cdelta; 632*1e9ea7e0SNamjae Jeon s64 bh_end; 633*1e9ea7e0SNamjae Jeon unsigned bh_cofs; 634*1e9ea7e0SNamjae Jeon 635*1e9ea7e0SNamjae Jeon /* Clear buffer_new on all buffers to reinitialise state. */ 636*1e9ea7e0SNamjae Jeon if (buffer_new(bh)) 637*1e9ea7e0SNamjae Jeon clear_buffer_new(bh); 638*1e9ea7e0SNamjae Jeon bh_end = bh_pos + blocksize; 639*1e9ea7e0SNamjae Jeon bh_cpos = bh_pos >> vol->cluster_size_bits; 640*1e9ea7e0SNamjae Jeon bh_cofs = bh_pos & vol->cluster_size_mask; 641*1e9ea7e0SNamjae Jeon if (buffer_mapped(bh)) { 642*1e9ea7e0SNamjae Jeon /* 643*1e9ea7e0SNamjae Jeon * The buffer is already mapped. If it is uptodate, 644*1e9ea7e0SNamjae Jeon * ignore it. 645*1e9ea7e0SNamjae Jeon */ 646*1e9ea7e0SNamjae Jeon if (buffer_uptodate(bh)) 647*1e9ea7e0SNamjae Jeon continue; 648*1e9ea7e0SNamjae Jeon /* 649*1e9ea7e0SNamjae Jeon * The buffer is not uptodate. If the folio is uptodate 650*1e9ea7e0SNamjae Jeon * set the buffer uptodate and otherwise ignore it. 651*1e9ea7e0SNamjae Jeon */ 652*1e9ea7e0SNamjae Jeon if (folio_test_uptodate(folio)) { 653*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 654*1e9ea7e0SNamjae Jeon continue; 655*1e9ea7e0SNamjae Jeon } 656*1e9ea7e0SNamjae Jeon /* 657*1e9ea7e0SNamjae Jeon * Neither the folio nor the buffer are uptodate. If 658*1e9ea7e0SNamjae Jeon * the buffer is only partially being written to, we 659*1e9ea7e0SNamjae Jeon * need to read it in before the write, i.e. now. 660*1e9ea7e0SNamjae Jeon */ 661*1e9ea7e0SNamjae Jeon if ((bh_pos < pos && bh_end > pos) || 662*1e9ea7e0SNamjae Jeon (bh_pos < end && bh_end > end)) { 663*1e9ea7e0SNamjae Jeon /* 664*1e9ea7e0SNamjae Jeon * If the buffer is fully or partially within 665*1e9ea7e0SNamjae Jeon * the initialized size, do an actual read. 666*1e9ea7e0SNamjae Jeon * Otherwise, simply zero the buffer. 667*1e9ea7e0SNamjae Jeon */ 668*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 669*1e9ea7e0SNamjae Jeon initialized_size = ni->initialized_size; 670*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 671*1e9ea7e0SNamjae Jeon if (bh_pos < initialized_size) { 672*1e9ea7e0SNamjae Jeon ntfs_submit_bh_for_read(bh); 673*1e9ea7e0SNamjae Jeon *wait_bh++ = bh; 674*1e9ea7e0SNamjae Jeon } else { 675*1e9ea7e0SNamjae Jeon folio_zero_range(folio, bh_offset(bh), 676*1e9ea7e0SNamjae Jeon blocksize); 677*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 678*1e9ea7e0SNamjae Jeon } 679*1e9ea7e0SNamjae Jeon } 680*1e9ea7e0SNamjae Jeon continue; 681*1e9ea7e0SNamjae Jeon } 682*1e9ea7e0SNamjae Jeon /* Unmapped buffer. Need to map it. */ 683*1e9ea7e0SNamjae Jeon bh->b_bdev = vol->sb->s_bdev; 684*1e9ea7e0SNamjae Jeon /* 685*1e9ea7e0SNamjae Jeon * If the current buffer is in the same clusters as the map 686*1e9ea7e0SNamjae Jeon * cache, there is no need to check the runlist again. The 687*1e9ea7e0SNamjae Jeon * map cache is made up of @vcn, which is the first cached file 688*1e9ea7e0SNamjae Jeon * cluster, @vcn_len which is the number of cached file 689*1e9ea7e0SNamjae Jeon * clusters, @lcn is the device cluster corresponding to @vcn, 690*1e9ea7e0SNamjae Jeon * and @lcn_block is the block number corresponding to @lcn. 691*1e9ea7e0SNamjae Jeon */ 692*1e9ea7e0SNamjae Jeon cdelta = bh_cpos - vcn; 693*1e9ea7e0SNamjae Jeon if (likely(!cdelta || (cdelta > 0 && cdelta < vcn_len))) { 694*1e9ea7e0SNamjae Jeon map_buffer_cached: 695*1e9ea7e0SNamjae Jeon BUG_ON(lcn < 0); 696*1e9ea7e0SNamjae Jeon bh->b_blocknr = lcn_block + 697*1e9ea7e0SNamjae Jeon (cdelta << (vol->cluster_size_bits - 698*1e9ea7e0SNamjae Jeon blocksize_bits)) + 699*1e9ea7e0SNamjae Jeon (bh_cofs >> blocksize_bits); 700*1e9ea7e0SNamjae Jeon set_buffer_mapped(bh); 701*1e9ea7e0SNamjae Jeon /* 702*1e9ea7e0SNamjae Jeon * If the folio is uptodate so is the buffer. If the 703*1e9ea7e0SNamjae Jeon * buffer is fully outside the write, we ignore it if 704*1e9ea7e0SNamjae Jeon * it was already allocated and we mark it dirty so it 705*1e9ea7e0SNamjae Jeon * gets written out if we allocated it. On the other 706*1e9ea7e0SNamjae Jeon * hand, if we allocated the buffer but we are not 707*1e9ea7e0SNamjae Jeon * marking it dirty we set buffer_new so we can do 708*1e9ea7e0SNamjae Jeon * error recovery. 709*1e9ea7e0SNamjae Jeon */ 710*1e9ea7e0SNamjae Jeon if (folio_test_uptodate(folio)) { 711*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh)) 712*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 713*1e9ea7e0SNamjae Jeon if (unlikely(was_hole)) { 714*1e9ea7e0SNamjae Jeon /* We allocated the buffer. */ 715*1e9ea7e0SNamjae Jeon clean_bdev_bh_alias(bh); 716*1e9ea7e0SNamjae Jeon if (bh_end <= pos || bh_pos >= end) 717*1e9ea7e0SNamjae Jeon mark_buffer_dirty(bh); 718*1e9ea7e0SNamjae Jeon else 719*1e9ea7e0SNamjae Jeon set_buffer_new(bh); 720*1e9ea7e0SNamjae Jeon } 721*1e9ea7e0SNamjae Jeon continue; 722*1e9ea7e0SNamjae Jeon } 723*1e9ea7e0SNamjae Jeon /* Page is _not_ uptodate. */ 724*1e9ea7e0SNamjae Jeon if (likely(!was_hole)) { 725*1e9ea7e0SNamjae Jeon /* 726*1e9ea7e0SNamjae Jeon * Buffer was already allocated. If it is not 727*1e9ea7e0SNamjae Jeon * uptodate and is only partially being written 728*1e9ea7e0SNamjae Jeon * to, we need to read it in before the write, 729*1e9ea7e0SNamjae Jeon * i.e. now. 730*1e9ea7e0SNamjae Jeon */ 731*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh) && bh_pos < end && 732*1e9ea7e0SNamjae Jeon bh_end > pos && 733*1e9ea7e0SNamjae Jeon (bh_pos < pos || 734*1e9ea7e0SNamjae Jeon bh_end > end)) { 735*1e9ea7e0SNamjae Jeon /* 736*1e9ea7e0SNamjae Jeon * If the buffer is fully or partially 737*1e9ea7e0SNamjae Jeon * within the initialized size, do an 738*1e9ea7e0SNamjae Jeon * actual read. Otherwise, simply zero 739*1e9ea7e0SNamjae Jeon * the buffer. 740*1e9ea7e0SNamjae Jeon */ 741*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, 742*1e9ea7e0SNamjae Jeon flags); 743*1e9ea7e0SNamjae Jeon initialized_size = ni->initialized_size; 744*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, 745*1e9ea7e0SNamjae Jeon flags); 746*1e9ea7e0SNamjae Jeon if (bh_pos < initialized_size) { 747*1e9ea7e0SNamjae Jeon ntfs_submit_bh_for_read(bh); 748*1e9ea7e0SNamjae Jeon *wait_bh++ = bh; 749*1e9ea7e0SNamjae Jeon } else { 750*1e9ea7e0SNamjae Jeon folio_zero_range(folio, 751*1e9ea7e0SNamjae Jeon bh_offset(bh), 752*1e9ea7e0SNamjae Jeon blocksize); 753*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 754*1e9ea7e0SNamjae Jeon } 755*1e9ea7e0SNamjae Jeon } 756*1e9ea7e0SNamjae Jeon continue; 757*1e9ea7e0SNamjae Jeon } 758*1e9ea7e0SNamjae Jeon /* We allocated the buffer. */ 759*1e9ea7e0SNamjae Jeon clean_bdev_bh_alias(bh); 760*1e9ea7e0SNamjae Jeon /* 761*1e9ea7e0SNamjae Jeon * If the buffer is fully outside the write, zero it, 762*1e9ea7e0SNamjae Jeon * set it uptodate, and mark it dirty so it gets 763*1e9ea7e0SNamjae Jeon * written out. If it is partially being written to, 764*1e9ea7e0SNamjae Jeon * zero region surrounding the write but leave it to 765*1e9ea7e0SNamjae Jeon * commit write to do anything else. Finally, if the 766*1e9ea7e0SNamjae Jeon * buffer is fully being overwritten, do nothing. 767*1e9ea7e0SNamjae Jeon */ 768*1e9ea7e0SNamjae Jeon if (bh_end <= pos || bh_pos >= end) { 769*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh)) { 770*1e9ea7e0SNamjae Jeon folio_zero_range(folio, bh_offset(bh), 771*1e9ea7e0SNamjae Jeon blocksize); 772*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 773*1e9ea7e0SNamjae Jeon } 774*1e9ea7e0SNamjae Jeon mark_buffer_dirty(bh); 775*1e9ea7e0SNamjae Jeon continue; 776*1e9ea7e0SNamjae Jeon } 777*1e9ea7e0SNamjae Jeon set_buffer_new(bh); 778*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh) && 779*1e9ea7e0SNamjae Jeon (bh_pos < pos || bh_end > end)) { 780*1e9ea7e0SNamjae Jeon u8 *kaddr; 781*1e9ea7e0SNamjae Jeon unsigned pofs; 782*1e9ea7e0SNamjae Jeon 783*1e9ea7e0SNamjae Jeon kaddr = kmap_local_folio(folio, 0); 784*1e9ea7e0SNamjae Jeon if (bh_pos < pos) { 785*1e9ea7e0SNamjae Jeon pofs = bh_pos & ~PAGE_MASK; 786*1e9ea7e0SNamjae Jeon memset(kaddr + pofs, 0, pos - bh_pos); 787*1e9ea7e0SNamjae Jeon } 788*1e9ea7e0SNamjae Jeon if (bh_end > end) { 789*1e9ea7e0SNamjae Jeon pofs = end & ~PAGE_MASK; 790*1e9ea7e0SNamjae Jeon memset(kaddr + pofs, 0, bh_end - end); 791*1e9ea7e0SNamjae Jeon } 792*1e9ea7e0SNamjae Jeon kunmap_local(kaddr); 793*1e9ea7e0SNamjae Jeon flush_dcache_folio(folio); 794*1e9ea7e0SNamjae Jeon } 795*1e9ea7e0SNamjae Jeon continue; 796*1e9ea7e0SNamjae Jeon } 797*1e9ea7e0SNamjae Jeon /* 798*1e9ea7e0SNamjae Jeon * Slow path: this is the first buffer in the cluster. If it 799*1e9ea7e0SNamjae Jeon * is outside allocated size and is not uptodate, zero it and 800*1e9ea7e0SNamjae Jeon * set it uptodate. 801*1e9ea7e0SNamjae Jeon */ 802*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 803*1e9ea7e0SNamjae Jeon initialized_size = ni->allocated_size; 804*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 805*1e9ea7e0SNamjae Jeon if (bh_pos > initialized_size) { 806*1e9ea7e0SNamjae Jeon if (folio_test_uptodate(folio)) { 807*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh)) 808*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 809*1e9ea7e0SNamjae Jeon } else if (!buffer_uptodate(bh)) { 810*1e9ea7e0SNamjae Jeon folio_zero_range(folio, bh_offset(bh), 811*1e9ea7e0SNamjae Jeon blocksize); 812*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 813*1e9ea7e0SNamjae Jeon } 814*1e9ea7e0SNamjae Jeon continue; 815*1e9ea7e0SNamjae Jeon } 816*1e9ea7e0SNamjae Jeon is_retry = false; 817*1e9ea7e0SNamjae Jeon if (!rl) { 818*1e9ea7e0SNamjae Jeon down_read(&ni->runlist.lock); 819*1e9ea7e0SNamjae Jeon retry_remap: 820*1e9ea7e0SNamjae Jeon rl = ni->runlist.rl; 821*1e9ea7e0SNamjae Jeon } 822*1e9ea7e0SNamjae Jeon if (likely(rl != NULL)) { 823*1e9ea7e0SNamjae Jeon /* Seek to element containing target cluster. */ 824*1e9ea7e0SNamjae Jeon while (rl->length && rl[1].vcn <= bh_cpos) 825*1e9ea7e0SNamjae Jeon rl++; 826*1e9ea7e0SNamjae Jeon lcn = ntfs_rl_vcn_to_lcn(rl, bh_cpos); 827*1e9ea7e0SNamjae Jeon if (likely(lcn >= 0)) { 828*1e9ea7e0SNamjae Jeon /* 829*1e9ea7e0SNamjae Jeon * Successful remap, setup the map cache and 830*1e9ea7e0SNamjae Jeon * use that to deal with the buffer. 831*1e9ea7e0SNamjae Jeon */ 832*1e9ea7e0SNamjae Jeon was_hole = false; 833*1e9ea7e0SNamjae Jeon vcn = bh_cpos; 834*1e9ea7e0SNamjae Jeon vcn_len = rl[1].vcn - vcn; 835*1e9ea7e0SNamjae Jeon lcn_block = lcn << (vol->cluster_size_bits - 836*1e9ea7e0SNamjae Jeon blocksize_bits); 837*1e9ea7e0SNamjae Jeon cdelta = 0; 838*1e9ea7e0SNamjae Jeon /* 839*1e9ea7e0SNamjae Jeon * If the number of remaining clusters touched 840*1e9ea7e0SNamjae Jeon * by the write is smaller or equal to the 841*1e9ea7e0SNamjae Jeon * number of cached clusters, unlock the 842*1e9ea7e0SNamjae Jeon * runlist as the map cache will be used from 843*1e9ea7e0SNamjae Jeon * now on. 844*1e9ea7e0SNamjae Jeon */ 845*1e9ea7e0SNamjae Jeon if (likely(vcn + vcn_len >= cend)) { 846*1e9ea7e0SNamjae Jeon if (rl_write_locked) { 847*1e9ea7e0SNamjae Jeon up_write(&ni->runlist.lock); 848*1e9ea7e0SNamjae Jeon rl_write_locked = false; 849*1e9ea7e0SNamjae Jeon } else 850*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 851*1e9ea7e0SNamjae Jeon rl = NULL; 852*1e9ea7e0SNamjae Jeon } 853*1e9ea7e0SNamjae Jeon goto map_buffer_cached; 854*1e9ea7e0SNamjae Jeon } 855*1e9ea7e0SNamjae Jeon } else 856*1e9ea7e0SNamjae Jeon lcn = LCN_RL_NOT_MAPPED; 857*1e9ea7e0SNamjae Jeon /* 858*1e9ea7e0SNamjae Jeon * If it is not a hole and not out of bounds, the runlist is 859*1e9ea7e0SNamjae Jeon * probably unmapped so try to map it now. 860*1e9ea7e0SNamjae Jeon */ 861*1e9ea7e0SNamjae Jeon if (unlikely(lcn != LCN_HOLE && lcn != LCN_ENOENT)) { 862*1e9ea7e0SNamjae Jeon if (likely(!is_retry && lcn == LCN_RL_NOT_MAPPED)) { 863*1e9ea7e0SNamjae Jeon /* Attempt to map runlist. */ 864*1e9ea7e0SNamjae Jeon if (!rl_write_locked) { 865*1e9ea7e0SNamjae Jeon /* 866*1e9ea7e0SNamjae Jeon * We need the runlist locked for 867*1e9ea7e0SNamjae Jeon * writing, so if it is locked for 868*1e9ea7e0SNamjae Jeon * reading relock it now and retry in 869*1e9ea7e0SNamjae Jeon * case it changed whilst we dropped 870*1e9ea7e0SNamjae Jeon * the lock. 871*1e9ea7e0SNamjae Jeon */ 872*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 873*1e9ea7e0SNamjae Jeon down_write(&ni->runlist.lock); 874*1e9ea7e0SNamjae Jeon rl_write_locked = true; 875*1e9ea7e0SNamjae Jeon goto retry_remap; 876*1e9ea7e0SNamjae Jeon } 877*1e9ea7e0SNamjae Jeon err = ntfs_map_runlist_nolock(ni, bh_cpos, 878*1e9ea7e0SNamjae Jeon NULL); 879*1e9ea7e0SNamjae Jeon if (likely(!err)) { 880*1e9ea7e0SNamjae Jeon is_retry = true; 881*1e9ea7e0SNamjae Jeon goto retry_remap; 882*1e9ea7e0SNamjae Jeon } 883*1e9ea7e0SNamjae Jeon /* 884*1e9ea7e0SNamjae Jeon * If @vcn is out of bounds, pretend @lcn is 885*1e9ea7e0SNamjae Jeon * LCN_ENOENT. As long as the buffer is out 886*1e9ea7e0SNamjae Jeon * of bounds this will work fine. 887*1e9ea7e0SNamjae Jeon */ 888*1e9ea7e0SNamjae Jeon if (err == -ENOENT) { 889*1e9ea7e0SNamjae Jeon lcn = LCN_ENOENT; 890*1e9ea7e0SNamjae Jeon err = 0; 891*1e9ea7e0SNamjae Jeon goto rl_not_mapped_enoent; 892*1e9ea7e0SNamjae Jeon } 893*1e9ea7e0SNamjae Jeon } else 894*1e9ea7e0SNamjae Jeon err = -EIO; 895*1e9ea7e0SNamjae Jeon /* Failed to map the buffer, even after retrying. */ 896*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1; 897*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 898*1e9ea7e0SNamjae Jeon "attribute type 0x%x, vcn 0x%llx, " 899*1e9ea7e0SNamjae Jeon "vcn offset 0x%x, because its " 900*1e9ea7e0SNamjae Jeon "location on disk could not be " 901*1e9ea7e0SNamjae Jeon "determined%s (error code %i).", 902*1e9ea7e0SNamjae Jeon ni->mft_no, ni->type, 903*1e9ea7e0SNamjae Jeon (unsigned long long)bh_cpos, 904*1e9ea7e0SNamjae Jeon (unsigned)bh_pos & 905*1e9ea7e0SNamjae Jeon vol->cluster_size_mask, 906*1e9ea7e0SNamjae Jeon is_retry ? " even after retrying" : "", 907*1e9ea7e0SNamjae Jeon err); 908*1e9ea7e0SNamjae Jeon break; 909*1e9ea7e0SNamjae Jeon } 910*1e9ea7e0SNamjae Jeon rl_not_mapped_enoent: 911*1e9ea7e0SNamjae Jeon /* 912*1e9ea7e0SNamjae Jeon * The buffer is in a hole or out of bounds. We need to fill 913*1e9ea7e0SNamjae Jeon * the hole, unless the buffer is in a cluster which is not 914*1e9ea7e0SNamjae Jeon * touched by the write, in which case we just leave the buffer 915*1e9ea7e0SNamjae Jeon * unmapped. This can only happen when the cluster size is 916*1e9ea7e0SNamjae Jeon * less than the page cache size. 917*1e9ea7e0SNamjae Jeon */ 918*1e9ea7e0SNamjae Jeon if (unlikely(vol->cluster_size < PAGE_SIZE)) { 919*1e9ea7e0SNamjae Jeon bh_cend = (bh_end + vol->cluster_size - 1) >> 920*1e9ea7e0SNamjae Jeon vol->cluster_size_bits; 921*1e9ea7e0SNamjae Jeon if ((bh_cend <= cpos || bh_cpos >= cend)) { 922*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1; 923*1e9ea7e0SNamjae Jeon /* 924*1e9ea7e0SNamjae Jeon * If the buffer is uptodate we skip it. If it 925*1e9ea7e0SNamjae Jeon * is not but the folio is uptodate, we can set 926*1e9ea7e0SNamjae Jeon * the buffer uptodate. If the folio is not 927*1e9ea7e0SNamjae Jeon * uptodate, we can clear the buffer and set it 928*1e9ea7e0SNamjae Jeon * uptodate. Whether this is worthwhile is 929*1e9ea7e0SNamjae Jeon * debatable and this could be removed. 930*1e9ea7e0SNamjae Jeon */ 931*1e9ea7e0SNamjae Jeon if (folio_test_uptodate(folio)) { 932*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh)) 933*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 934*1e9ea7e0SNamjae Jeon } else if (!buffer_uptodate(bh)) { 935*1e9ea7e0SNamjae Jeon folio_zero_range(folio, bh_offset(bh), 936*1e9ea7e0SNamjae Jeon blocksize); 937*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 938*1e9ea7e0SNamjae Jeon } 939*1e9ea7e0SNamjae Jeon continue; 940*1e9ea7e0SNamjae Jeon } 941*1e9ea7e0SNamjae Jeon } 942*1e9ea7e0SNamjae Jeon /* 943*1e9ea7e0SNamjae Jeon * Out of bounds buffer is invalid if it was not really out of 944*1e9ea7e0SNamjae Jeon * bounds. 945*1e9ea7e0SNamjae Jeon */ 946*1e9ea7e0SNamjae Jeon BUG_ON(lcn != LCN_HOLE); 947*1e9ea7e0SNamjae Jeon /* 948*1e9ea7e0SNamjae Jeon * We need the runlist locked for writing, so if it is locked 949*1e9ea7e0SNamjae Jeon * for reading relock it now and retry in case it changed 950*1e9ea7e0SNamjae Jeon * whilst we dropped the lock. 951*1e9ea7e0SNamjae Jeon */ 952*1e9ea7e0SNamjae Jeon BUG_ON(!rl); 953*1e9ea7e0SNamjae Jeon if (!rl_write_locked) { 954*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 955*1e9ea7e0SNamjae Jeon down_write(&ni->runlist.lock); 956*1e9ea7e0SNamjae Jeon rl_write_locked = true; 957*1e9ea7e0SNamjae Jeon goto retry_remap; 958*1e9ea7e0SNamjae Jeon } 959*1e9ea7e0SNamjae Jeon /* Find the previous last allocated cluster. */ 960*1e9ea7e0SNamjae Jeon BUG_ON(rl->lcn != LCN_HOLE); 961*1e9ea7e0SNamjae Jeon lcn = -1; 962*1e9ea7e0SNamjae Jeon rl2 = rl; 963*1e9ea7e0SNamjae Jeon while (--rl2 >= ni->runlist.rl) { 964*1e9ea7e0SNamjae Jeon if (rl2->lcn >= 0) { 965*1e9ea7e0SNamjae Jeon lcn = rl2->lcn + rl2->length; 966*1e9ea7e0SNamjae Jeon break; 967*1e9ea7e0SNamjae Jeon } 968*1e9ea7e0SNamjae Jeon } 969*1e9ea7e0SNamjae Jeon rl2 = ntfs_cluster_alloc(vol, bh_cpos, 1, lcn, DATA_ZONE, 970*1e9ea7e0SNamjae Jeon false); 971*1e9ea7e0SNamjae Jeon if (IS_ERR(rl2)) { 972*1e9ea7e0SNamjae Jeon err = PTR_ERR(rl2); 973*1e9ea7e0SNamjae Jeon ntfs_debug("Failed to allocate cluster, error code %i.", 974*1e9ea7e0SNamjae Jeon err); 975*1e9ea7e0SNamjae Jeon break; 976*1e9ea7e0SNamjae Jeon } 977*1e9ea7e0SNamjae Jeon lcn = rl2->lcn; 978*1e9ea7e0SNamjae Jeon rl = ntfs_runlists_merge(ni->runlist.rl, rl2); 979*1e9ea7e0SNamjae Jeon if (IS_ERR(rl)) { 980*1e9ea7e0SNamjae Jeon err = PTR_ERR(rl); 981*1e9ea7e0SNamjae Jeon if (err != -ENOMEM) 982*1e9ea7e0SNamjae Jeon err = -EIO; 983*1e9ea7e0SNamjae Jeon if (ntfs_cluster_free_from_rl(vol, rl2)) { 984*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to release " 985*1e9ea7e0SNamjae Jeon "allocated cluster in error " 986*1e9ea7e0SNamjae Jeon "code path. Run chkdsk to " 987*1e9ea7e0SNamjae Jeon "recover the lost cluster."); 988*1e9ea7e0SNamjae Jeon NVolSetErrors(vol); 989*1e9ea7e0SNamjae Jeon } 990*1e9ea7e0SNamjae Jeon ntfs_free(rl2); 991*1e9ea7e0SNamjae Jeon break; 992*1e9ea7e0SNamjae Jeon } 993*1e9ea7e0SNamjae Jeon ni->runlist.rl = rl; 994*1e9ea7e0SNamjae Jeon status.runlist_merged = 1; 995*1e9ea7e0SNamjae Jeon ntfs_debug("Allocated cluster, lcn 0x%llx.", 996*1e9ea7e0SNamjae Jeon (unsigned long long)lcn); 997*1e9ea7e0SNamjae Jeon /* Map and lock the mft record and get the attribute record. */ 998*1e9ea7e0SNamjae Jeon if (!NInoAttr(ni)) 999*1e9ea7e0SNamjae Jeon base_ni = ni; 1000*1e9ea7e0SNamjae Jeon else 1001*1e9ea7e0SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 1002*1e9ea7e0SNamjae Jeon m = map_mft_record(base_ni); 1003*1e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 1004*1e9ea7e0SNamjae Jeon err = PTR_ERR(m); 1005*1e9ea7e0SNamjae Jeon break; 1006*1e9ea7e0SNamjae Jeon } 1007*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, m); 1008*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 1009*1e9ea7e0SNamjae Jeon err = -ENOMEM; 1010*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1011*1e9ea7e0SNamjae Jeon break; 1012*1e9ea7e0SNamjae Jeon } 1013*1e9ea7e0SNamjae Jeon status.mft_attr_mapped = 1; 1014*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1015*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, bh_cpos, NULL, 0, ctx); 1016*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 1017*1e9ea7e0SNamjae Jeon if (err == -ENOENT) 1018*1e9ea7e0SNamjae Jeon err = -EIO; 1019*1e9ea7e0SNamjae Jeon break; 1020*1e9ea7e0SNamjae Jeon } 1021*1e9ea7e0SNamjae Jeon m = ctx->mrec; 1022*1e9ea7e0SNamjae Jeon a = ctx->attr; 1023*1e9ea7e0SNamjae Jeon /* 1024*1e9ea7e0SNamjae Jeon * Find the runlist element with which the attribute extent 1025*1e9ea7e0SNamjae Jeon * starts. Note, we cannot use the _attr_ version because we 1026*1e9ea7e0SNamjae Jeon * have mapped the mft record. That is ok because we know the 1027*1e9ea7e0SNamjae Jeon * runlist fragment must be mapped already to have ever gotten 1028*1e9ea7e0SNamjae Jeon * here, so we can just use the _rl_ version. 1029*1e9ea7e0SNamjae Jeon */ 1030*1e9ea7e0SNamjae Jeon vcn = sle64_to_cpu(a->data.non_resident.lowest_vcn); 1031*1e9ea7e0SNamjae Jeon rl2 = ntfs_rl_find_vcn_nolock(rl, vcn); 1032*1e9ea7e0SNamjae Jeon BUG_ON(!rl2); 1033*1e9ea7e0SNamjae Jeon BUG_ON(!rl2->length); 1034*1e9ea7e0SNamjae Jeon BUG_ON(rl2->lcn < LCN_HOLE); 1035*1e9ea7e0SNamjae Jeon highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn); 1036*1e9ea7e0SNamjae Jeon /* 1037*1e9ea7e0SNamjae Jeon * If @highest_vcn is zero, calculate the real highest_vcn 1038*1e9ea7e0SNamjae Jeon * (which can really be zero). 1039*1e9ea7e0SNamjae Jeon */ 1040*1e9ea7e0SNamjae Jeon if (!highest_vcn) 1041*1e9ea7e0SNamjae Jeon highest_vcn = (sle64_to_cpu( 1042*1e9ea7e0SNamjae Jeon a->data.non_resident.allocated_size) >> 1043*1e9ea7e0SNamjae Jeon vol->cluster_size_bits) - 1; 1044*1e9ea7e0SNamjae Jeon /* 1045*1e9ea7e0SNamjae Jeon * Determine the size of the mapping pairs array for the new 1046*1e9ea7e0SNamjae Jeon * extent, i.e. the old extent with the hole filled. 1047*1e9ea7e0SNamjae Jeon */ 1048*1e9ea7e0SNamjae Jeon mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, vcn, 1049*1e9ea7e0SNamjae Jeon highest_vcn); 1050*1e9ea7e0SNamjae Jeon if (unlikely(mp_size <= 0)) { 1051*1e9ea7e0SNamjae Jeon if (!(err = mp_size)) 1052*1e9ea7e0SNamjae Jeon err = -EIO; 1053*1e9ea7e0SNamjae Jeon ntfs_debug("Failed to get size for mapping pairs " 1054*1e9ea7e0SNamjae Jeon "array, error code %i.", err); 1055*1e9ea7e0SNamjae Jeon break; 1056*1e9ea7e0SNamjae Jeon } 1057*1e9ea7e0SNamjae Jeon /* 1058*1e9ea7e0SNamjae Jeon * Resize the attribute record to fit the new mapping pairs 1059*1e9ea7e0SNamjae Jeon * array. 1060*1e9ea7e0SNamjae Jeon */ 1061*1e9ea7e0SNamjae Jeon attr_rec_len = le32_to_cpu(a->length); 1062*1e9ea7e0SNamjae Jeon err = ntfs_attr_record_resize(m, a, mp_size + le16_to_cpu( 1063*1e9ea7e0SNamjae Jeon a->data.non_resident.mapping_pairs_offset)); 1064*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 1065*1e9ea7e0SNamjae Jeon BUG_ON(err != -ENOSPC); 1066*1e9ea7e0SNamjae Jeon // TODO: Deal with this by using the current attribute 1067*1e9ea7e0SNamjae Jeon // and fill it with as much of the mapping pairs 1068*1e9ea7e0SNamjae Jeon // array as possible. Then loop over each attribute 1069*1e9ea7e0SNamjae Jeon // extent rewriting the mapping pairs arrays as we go 1070*1e9ea7e0SNamjae Jeon // along and if when we reach the end we have not 1071*1e9ea7e0SNamjae Jeon // enough space, try to resize the last attribute 1072*1e9ea7e0SNamjae Jeon // extent and if even that fails, add a new attribute 1073*1e9ea7e0SNamjae Jeon // extent. 1074*1e9ea7e0SNamjae Jeon // We could also try to resize at each step in the hope 1075*1e9ea7e0SNamjae Jeon // that we will not need to rewrite every single extent. 1076*1e9ea7e0SNamjae Jeon // Note, we may need to decompress some extents to fill 1077*1e9ea7e0SNamjae Jeon // the runlist as we are walking the extents... 1078*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Not enough space in the mft " 1079*1e9ea7e0SNamjae Jeon "record for the extended attribute " 1080*1e9ea7e0SNamjae Jeon "record. This case is not " 1081*1e9ea7e0SNamjae Jeon "implemented yet."); 1082*1e9ea7e0SNamjae Jeon err = -EOPNOTSUPP; 1083*1e9ea7e0SNamjae Jeon break ; 1084*1e9ea7e0SNamjae Jeon } 1085*1e9ea7e0SNamjae Jeon status.mp_rebuilt = 1; 1086*1e9ea7e0SNamjae Jeon /* 1087*1e9ea7e0SNamjae Jeon * Generate the mapping pairs array directly into the attribute 1088*1e9ea7e0SNamjae Jeon * record. 1089*1e9ea7e0SNamjae Jeon */ 1090*1e9ea7e0SNamjae Jeon err = ntfs_mapping_pairs_build(vol, (u8*)a + le16_to_cpu( 1091*1e9ea7e0SNamjae Jeon a->data.non_resident.mapping_pairs_offset), 1092*1e9ea7e0SNamjae Jeon mp_size, rl2, vcn, highest_vcn, NULL); 1093*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 1094*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Cannot fill hole in inode 0x%lx, " 1095*1e9ea7e0SNamjae Jeon "attribute type 0x%x, because building " 1096*1e9ea7e0SNamjae Jeon "the mapping pairs failed with error " 1097*1e9ea7e0SNamjae Jeon "code %i.", vi->i_ino, 1098*1e9ea7e0SNamjae Jeon (unsigned)le32_to_cpu(ni->type), err); 1099*1e9ea7e0SNamjae Jeon err = -EIO; 1100*1e9ea7e0SNamjae Jeon break; 1101*1e9ea7e0SNamjae Jeon } 1102*1e9ea7e0SNamjae Jeon /* Update the highest_vcn but only if it was not set. */ 1103*1e9ea7e0SNamjae Jeon if (unlikely(!a->data.non_resident.highest_vcn)) 1104*1e9ea7e0SNamjae Jeon a->data.non_resident.highest_vcn = 1105*1e9ea7e0SNamjae Jeon cpu_to_sle64(highest_vcn); 1106*1e9ea7e0SNamjae Jeon /* 1107*1e9ea7e0SNamjae Jeon * If the attribute is sparse/compressed, update the compressed 1108*1e9ea7e0SNamjae Jeon * size in the ntfs_inode structure and the attribute record. 1109*1e9ea7e0SNamjae Jeon */ 1110*1e9ea7e0SNamjae Jeon if (likely(NInoSparse(ni) || NInoCompressed(ni))) { 1111*1e9ea7e0SNamjae Jeon /* 1112*1e9ea7e0SNamjae Jeon * If we are not in the first attribute extent, switch 1113*1e9ea7e0SNamjae Jeon * to it, but first ensure the changes will make it to 1114*1e9ea7e0SNamjae Jeon * disk later. 1115*1e9ea7e0SNamjae Jeon */ 1116*1e9ea7e0SNamjae Jeon if (a->data.non_resident.lowest_vcn) { 1117*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 1118*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1119*1e9ea7e0SNamjae Jeon ntfs_attr_reinit_search_ctx(ctx); 1120*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, 1121*1e9ea7e0SNamjae Jeon ni->name_len, CASE_SENSITIVE, 1122*1e9ea7e0SNamjae Jeon 0, NULL, 0, ctx); 1123*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 1124*1e9ea7e0SNamjae Jeon status.attr_switched = 1; 1125*1e9ea7e0SNamjae Jeon break; 1126*1e9ea7e0SNamjae Jeon } 1127*1e9ea7e0SNamjae Jeon /* @m is not used any more so do not set it. */ 1128*1e9ea7e0SNamjae Jeon a = ctx->attr; 1129*1e9ea7e0SNamjae Jeon } 1130*1e9ea7e0SNamjae Jeon write_lock_irqsave(&ni->size_lock, flags); 1131*1e9ea7e0SNamjae Jeon ni->itype.compressed.size += vol->cluster_size; 1132*1e9ea7e0SNamjae Jeon a->data.non_resident.compressed_size = 1133*1e9ea7e0SNamjae Jeon cpu_to_sle64(ni->itype.compressed.size); 1134*1e9ea7e0SNamjae Jeon write_unlock_irqrestore(&ni->size_lock, flags); 1135*1e9ea7e0SNamjae Jeon } 1136*1e9ea7e0SNamjae Jeon /* Ensure the changes make it to disk. */ 1137*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 1138*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1139*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1140*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1141*1e9ea7e0SNamjae Jeon /* Successfully filled the hole. */ 1142*1e9ea7e0SNamjae Jeon status.runlist_merged = 0; 1143*1e9ea7e0SNamjae Jeon status.mft_attr_mapped = 0; 1144*1e9ea7e0SNamjae Jeon status.mp_rebuilt = 0; 1145*1e9ea7e0SNamjae Jeon /* Setup the map cache and use that to deal with the buffer. */ 1146*1e9ea7e0SNamjae Jeon was_hole = true; 1147*1e9ea7e0SNamjae Jeon vcn = bh_cpos; 1148*1e9ea7e0SNamjae Jeon vcn_len = 1; 1149*1e9ea7e0SNamjae Jeon lcn_block = lcn << (vol->cluster_size_bits - blocksize_bits); 1150*1e9ea7e0SNamjae Jeon cdelta = 0; 1151*1e9ea7e0SNamjae Jeon /* 1152*1e9ea7e0SNamjae Jeon * If the number of remaining clusters in the @pages is smaller 1153*1e9ea7e0SNamjae Jeon * or equal to the number of cached clusters, unlock the 1154*1e9ea7e0SNamjae Jeon * runlist as the map cache will be used from now on. 1155*1e9ea7e0SNamjae Jeon */ 1156*1e9ea7e0SNamjae Jeon if (likely(vcn + vcn_len >= cend)) { 1157*1e9ea7e0SNamjae Jeon up_write(&ni->runlist.lock); 1158*1e9ea7e0SNamjae Jeon rl_write_locked = false; 1159*1e9ea7e0SNamjae Jeon rl = NULL; 1160*1e9ea7e0SNamjae Jeon } 1161*1e9ea7e0SNamjae Jeon goto map_buffer_cached; 1162*1e9ea7e0SNamjae Jeon } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); 1163*1e9ea7e0SNamjae Jeon /* If there are no errors, do the next page. */ 1164*1e9ea7e0SNamjae Jeon if (likely(!err && ++u < nr_pages)) 1165*1e9ea7e0SNamjae Jeon goto do_next_folio; 1166*1e9ea7e0SNamjae Jeon /* If there are no errors, release the runlist lock if we took it. */ 1167*1e9ea7e0SNamjae Jeon if (likely(!err)) { 1168*1e9ea7e0SNamjae Jeon if (unlikely(rl_write_locked)) { 1169*1e9ea7e0SNamjae Jeon up_write(&ni->runlist.lock); 1170*1e9ea7e0SNamjae Jeon rl_write_locked = false; 1171*1e9ea7e0SNamjae Jeon } else if (unlikely(rl)) 1172*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 1173*1e9ea7e0SNamjae Jeon rl = NULL; 1174*1e9ea7e0SNamjae Jeon } 1175*1e9ea7e0SNamjae Jeon /* If we issued read requests, let them complete. */ 1176*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 1177*1e9ea7e0SNamjae Jeon initialized_size = ni->initialized_size; 1178*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 1179*1e9ea7e0SNamjae Jeon while (wait_bh > wait) { 1180*1e9ea7e0SNamjae Jeon bh = *--wait_bh; 1181*1e9ea7e0SNamjae Jeon wait_on_buffer(bh); 1182*1e9ea7e0SNamjae Jeon if (likely(buffer_uptodate(bh))) { 1183*1e9ea7e0SNamjae Jeon folio = bh->b_folio; 1184*1e9ea7e0SNamjae Jeon bh_pos = folio_pos(folio) + bh_offset(bh); 1185*1e9ea7e0SNamjae Jeon /* 1186*1e9ea7e0SNamjae Jeon * If the buffer overflows the initialized size, need 1187*1e9ea7e0SNamjae Jeon * to zero the overflowing region. 1188*1e9ea7e0SNamjae Jeon */ 1189*1e9ea7e0SNamjae Jeon if (unlikely(bh_pos + blocksize > initialized_size)) { 1190*1e9ea7e0SNamjae Jeon int ofs = 0; 1191*1e9ea7e0SNamjae Jeon 1192*1e9ea7e0SNamjae Jeon if (likely(bh_pos < initialized_size)) 1193*1e9ea7e0SNamjae Jeon ofs = initialized_size - bh_pos; 1194*1e9ea7e0SNamjae Jeon folio_zero_segment(folio, bh_offset(bh) + ofs, 1195*1e9ea7e0SNamjae Jeon blocksize); 1196*1e9ea7e0SNamjae Jeon } 1197*1e9ea7e0SNamjae Jeon } else /* if (unlikely(!buffer_uptodate(bh))) */ 1198*1e9ea7e0SNamjae Jeon err = -EIO; 1199*1e9ea7e0SNamjae Jeon } 1200*1e9ea7e0SNamjae Jeon if (likely(!err)) { 1201*1e9ea7e0SNamjae Jeon /* Clear buffer_new on all buffers. */ 1202*1e9ea7e0SNamjae Jeon u = 0; 1203*1e9ea7e0SNamjae Jeon do { 1204*1e9ea7e0SNamjae Jeon bh = head = page_buffers(pages[u]); 1205*1e9ea7e0SNamjae Jeon do { 1206*1e9ea7e0SNamjae Jeon if (buffer_new(bh)) 1207*1e9ea7e0SNamjae Jeon clear_buffer_new(bh); 1208*1e9ea7e0SNamjae Jeon } while ((bh = bh->b_this_page) != head); 1209*1e9ea7e0SNamjae Jeon } while (++u < nr_pages); 1210*1e9ea7e0SNamjae Jeon ntfs_debug("Done."); 1211*1e9ea7e0SNamjae Jeon return err; 1212*1e9ea7e0SNamjae Jeon } 1213*1e9ea7e0SNamjae Jeon if (status.attr_switched) { 1214*1e9ea7e0SNamjae Jeon /* Get back to the attribute extent we modified. */ 1215*1e9ea7e0SNamjae Jeon ntfs_attr_reinit_search_ctx(ctx); 1216*1e9ea7e0SNamjae Jeon if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1217*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, bh_cpos, NULL, 0, ctx)) { 1218*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to find required " 1219*1e9ea7e0SNamjae Jeon "attribute extent of attribute in " 1220*1e9ea7e0SNamjae Jeon "error code path. Run chkdsk to " 1221*1e9ea7e0SNamjae Jeon "recover."); 1222*1e9ea7e0SNamjae Jeon write_lock_irqsave(&ni->size_lock, flags); 1223*1e9ea7e0SNamjae Jeon ni->itype.compressed.size += vol->cluster_size; 1224*1e9ea7e0SNamjae Jeon write_unlock_irqrestore(&ni->size_lock, flags); 1225*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 1226*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1227*1e9ea7e0SNamjae Jeon /* 1228*1e9ea7e0SNamjae Jeon * The only thing that is now wrong is the compressed 1229*1e9ea7e0SNamjae Jeon * size of the base attribute extent which chkdsk 1230*1e9ea7e0SNamjae Jeon * should be able to fix. 1231*1e9ea7e0SNamjae Jeon */ 1232*1e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1233*1e9ea7e0SNamjae Jeon } else { 1234*1e9ea7e0SNamjae Jeon m = ctx->mrec; 1235*1e9ea7e0SNamjae Jeon a = ctx->attr; 1236*1e9ea7e0SNamjae Jeon status.attr_switched = 0; 1237*1e9ea7e0SNamjae Jeon } 1238*1e9ea7e0SNamjae Jeon } 1239*1e9ea7e0SNamjae Jeon /* 1240*1e9ea7e0SNamjae Jeon * If the runlist has been modified, need to restore it by punching a 1241*1e9ea7e0SNamjae Jeon * hole into it and we then need to deallocate the on-disk cluster as 1242*1e9ea7e0SNamjae Jeon * well. Note, we only modify the runlist if we are able to generate a 1243*1e9ea7e0SNamjae Jeon * new mapping pairs array, i.e. only when the mapped attribute extent 1244*1e9ea7e0SNamjae Jeon * is not switched. 1245*1e9ea7e0SNamjae Jeon */ 1246*1e9ea7e0SNamjae Jeon if (status.runlist_merged && !status.attr_switched) { 1247*1e9ea7e0SNamjae Jeon BUG_ON(!rl_write_locked); 1248*1e9ea7e0SNamjae Jeon /* Make the file cluster we allocated sparse in the runlist. */ 1249*1e9ea7e0SNamjae Jeon if (ntfs_rl_punch_nolock(vol, &ni->runlist, bh_cpos, 1)) { 1250*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to punch hole into " 1251*1e9ea7e0SNamjae Jeon "attribute runlist in error code " 1252*1e9ea7e0SNamjae Jeon "path. Run chkdsk to recover the " 1253*1e9ea7e0SNamjae Jeon "lost cluster."); 1254*1e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1255*1e9ea7e0SNamjae Jeon } else /* if (success) */ { 1256*1e9ea7e0SNamjae Jeon status.runlist_merged = 0; 1257*1e9ea7e0SNamjae Jeon /* 1258*1e9ea7e0SNamjae Jeon * Deallocate the on-disk cluster we allocated but only 1259*1e9ea7e0SNamjae Jeon * if we succeeded in punching its vcn out of the 1260*1e9ea7e0SNamjae Jeon * runlist. 1261*1e9ea7e0SNamjae Jeon */ 1262*1e9ea7e0SNamjae Jeon down_write(&vol->lcnbmp_lock); 1263*1e9ea7e0SNamjae Jeon if (ntfs_bitmap_clear_bit(vol->lcnbmp_ino, lcn)) { 1264*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to release " 1265*1e9ea7e0SNamjae Jeon "allocated cluster in error " 1266*1e9ea7e0SNamjae Jeon "code path. Run chkdsk to " 1267*1e9ea7e0SNamjae Jeon "recover the lost cluster."); 1268*1e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1269*1e9ea7e0SNamjae Jeon } 1270*1e9ea7e0SNamjae Jeon up_write(&vol->lcnbmp_lock); 1271*1e9ea7e0SNamjae Jeon } 1272*1e9ea7e0SNamjae Jeon } 1273*1e9ea7e0SNamjae Jeon /* 1274*1e9ea7e0SNamjae Jeon * Resize the attribute record to its old size and rebuild the mapping 1275*1e9ea7e0SNamjae Jeon * pairs array. Note, we only can do this if the runlist has been 1276*1e9ea7e0SNamjae Jeon * restored to its old state which also implies that the mapped 1277*1e9ea7e0SNamjae Jeon * attribute extent is not switched. 1278*1e9ea7e0SNamjae Jeon */ 1279*1e9ea7e0SNamjae Jeon if (status.mp_rebuilt && !status.runlist_merged) { 1280*1e9ea7e0SNamjae Jeon if (ntfs_attr_record_resize(m, a, attr_rec_len)) { 1281*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to restore attribute " 1282*1e9ea7e0SNamjae Jeon "record in error code path. Run " 1283*1e9ea7e0SNamjae Jeon "chkdsk to recover."); 1284*1e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1285*1e9ea7e0SNamjae Jeon } else /* if (success) */ { 1286*1e9ea7e0SNamjae Jeon if (ntfs_mapping_pairs_build(vol, (u8*)a + 1287*1e9ea7e0SNamjae Jeon le16_to_cpu(a->data.non_resident. 1288*1e9ea7e0SNamjae Jeon mapping_pairs_offset), attr_rec_len - 1289*1e9ea7e0SNamjae Jeon le16_to_cpu(a->data.non_resident. 1290*1e9ea7e0SNamjae Jeon mapping_pairs_offset), ni->runlist.rl, 1291*1e9ea7e0SNamjae Jeon vcn, highest_vcn, NULL)) { 1292*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to restore " 1293*1e9ea7e0SNamjae Jeon "mapping pairs array in error " 1294*1e9ea7e0SNamjae Jeon "code path. Run chkdsk to " 1295*1e9ea7e0SNamjae Jeon "recover."); 1296*1e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1297*1e9ea7e0SNamjae Jeon } 1298*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 1299*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1300*1e9ea7e0SNamjae Jeon } 1301*1e9ea7e0SNamjae Jeon } 1302*1e9ea7e0SNamjae Jeon /* Release the mft record and the attribute. */ 1303*1e9ea7e0SNamjae Jeon if (status.mft_attr_mapped) { 1304*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1305*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1306*1e9ea7e0SNamjae Jeon } 1307*1e9ea7e0SNamjae Jeon /* Release the runlist lock. */ 1308*1e9ea7e0SNamjae Jeon if (rl_write_locked) 1309*1e9ea7e0SNamjae Jeon up_write(&ni->runlist.lock); 1310*1e9ea7e0SNamjae Jeon else if (rl) 1311*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 1312*1e9ea7e0SNamjae Jeon /* 1313*1e9ea7e0SNamjae Jeon * Zero out any newly allocated blocks to avoid exposing stale data. 1314*1e9ea7e0SNamjae Jeon * If BH_New is set, we know that the block was newly allocated above 1315*1e9ea7e0SNamjae Jeon * and that it has not been fully zeroed and marked dirty yet. 1316*1e9ea7e0SNamjae Jeon */ 1317*1e9ea7e0SNamjae Jeon nr_pages = u; 1318*1e9ea7e0SNamjae Jeon u = 0; 1319*1e9ea7e0SNamjae Jeon end = bh_cpos << vol->cluster_size_bits; 1320*1e9ea7e0SNamjae Jeon do { 1321*1e9ea7e0SNamjae Jeon folio = page_folio(pages[u]); 1322*1e9ea7e0SNamjae Jeon bh = head = folio_buffers(folio); 1323*1e9ea7e0SNamjae Jeon do { 1324*1e9ea7e0SNamjae Jeon if (u == nr_pages && 1325*1e9ea7e0SNamjae Jeon folio_pos(folio) + bh_offset(bh) >= end) 1326*1e9ea7e0SNamjae Jeon break; 1327*1e9ea7e0SNamjae Jeon if (!buffer_new(bh)) 1328*1e9ea7e0SNamjae Jeon continue; 1329*1e9ea7e0SNamjae Jeon clear_buffer_new(bh); 1330*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh)) { 1331*1e9ea7e0SNamjae Jeon if (folio_test_uptodate(folio)) 1332*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 1333*1e9ea7e0SNamjae Jeon else { 1334*1e9ea7e0SNamjae Jeon folio_zero_range(folio, bh_offset(bh), 1335*1e9ea7e0SNamjae Jeon blocksize); 1336*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 1337*1e9ea7e0SNamjae Jeon } 1338*1e9ea7e0SNamjae Jeon } 1339*1e9ea7e0SNamjae Jeon mark_buffer_dirty(bh); 1340*1e9ea7e0SNamjae Jeon } while ((bh = bh->b_this_page) != head); 1341*1e9ea7e0SNamjae Jeon } while (++u <= nr_pages); 1342*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed. Returning error code %i.", err); 1343*1e9ea7e0SNamjae Jeon return err; 1344*1e9ea7e0SNamjae Jeon } 1345*1e9ea7e0SNamjae Jeon 1346*1e9ea7e0SNamjae Jeon static inline void ntfs_flush_dcache_pages(struct page **pages, 1347*1e9ea7e0SNamjae Jeon unsigned nr_pages) 1348*1e9ea7e0SNamjae Jeon { 1349*1e9ea7e0SNamjae Jeon BUG_ON(!nr_pages); 1350*1e9ea7e0SNamjae Jeon /* 1351*1e9ea7e0SNamjae Jeon * Warning: Do not do the decrement at the same time as the call to 1352*1e9ea7e0SNamjae Jeon * flush_dcache_page() because it is a NULL macro on i386 and hence the 1353*1e9ea7e0SNamjae Jeon * decrement never happens so the loop never terminates. 1354*1e9ea7e0SNamjae Jeon */ 1355*1e9ea7e0SNamjae Jeon do { 1356*1e9ea7e0SNamjae Jeon --nr_pages; 1357*1e9ea7e0SNamjae Jeon flush_dcache_page(pages[nr_pages]); 1358*1e9ea7e0SNamjae Jeon } while (nr_pages > 0); 1359*1e9ea7e0SNamjae Jeon } 1360*1e9ea7e0SNamjae Jeon 1361*1e9ea7e0SNamjae Jeon /** 1362*1e9ea7e0SNamjae Jeon * ntfs_commit_pages_after_non_resident_write - commit the received data 1363*1e9ea7e0SNamjae Jeon * @pages: array of destination pages 1364*1e9ea7e0SNamjae Jeon * @nr_pages: number of pages in @pages 1365*1e9ea7e0SNamjae Jeon * @pos: byte position in file at which the write begins 1366*1e9ea7e0SNamjae Jeon * @bytes: number of bytes to be written 1367*1e9ea7e0SNamjae Jeon * 1368*1e9ea7e0SNamjae Jeon * See description of ntfs_commit_pages_after_write(), below. 1369*1e9ea7e0SNamjae Jeon */ 1370*1e9ea7e0SNamjae Jeon static inline int ntfs_commit_pages_after_non_resident_write( 1371*1e9ea7e0SNamjae Jeon struct page **pages, const unsigned nr_pages, 1372*1e9ea7e0SNamjae Jeon s64 pos, size_t bytes) 1373*1e9ea7e0SNamjae Jeon { 1374*1e9ea7e0SNamjae Jeon s64 end, initialized_size; 1375*1e9ea7e0SNamjae Jeon struct inode *vi; 1376*1e9ea7e0SNamjae Jeon ntfs_inode *ni, *base_ni; 1377*1e9ea7e0SNamjae Jeon struct buffer_head *bh, *head; 1378*1e9ea7e0SNamjae Jeon ntfs_attr_search_ctx *ctx; 1379*1e9ea7e0SNamjae Jeon MFT_RECORD *m; 1380*1e9ea7e0SNamjae Jeon ATTR_RECORD *a; 1381*1e9ea7e0SNamjae Jeon unsigned long flags; 1382*1e9ea7e0SNamjae Jeon unsigned blocksize, u; 1383*1e9ea7e0SNamjae Jeon int err; 1384*1e9ea7e0SNamjae Jeon 1385*1e9ea7e0SNamjae Jeon vi = pages[0]->mapping->host; 1386*1e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 1387*1e9ea7e0SNamjae Jeon blocksize = vi->i_sb->s_blocksize; 1388*1e9ea7e0SNamjae Jeon end = pos + bytes; 1389*1e9ea7e0SNamjae Jeon u = 0; 1390*1e9ea7e0SNamjae Jeon do { 1391*1e9ea7e0SNamjae Jeon s64 bh_pos; 1392*1e9ea7e0SNamjae Jeon struct page *page; 1393*1e9ea7e0SNamjae Jeon bool partial; 1394*1e9ea7e0SNamjae Jeon 1395*1e9ea7e0SNamjae Jeon page = pages[u]; 1396*1e9ea7e0SNamjae Jeon bh_pos = (s64)page->index << PAGE_SHIFT; 1397*1e9ea7e0SNamjae Jeon bh = head = page_buffers(page); 1398*1e9ea7e0SNamjae Jeon partial = false; 1399*1e9ea7e0SNamjae Jeon do { 1400*1e9ea7e0SNamjae Jeon s64 bh_end; 1401*1e9ea7e0SNamjae Jeon 1402*1e9ea7e0SNamjae Jeon bh_end = bh_pos + blocksize; 1403*1e9ea7e0SNamjae Jeon if (bh_end <= pos || bh_pos >= end) { 1404*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh)) 1405*1e9ea7e0SNamjae Jeon partial = true; 1406*1e9ea7e0SNamjae Jeon } else { 1407*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 1408*1e9ea7e0SNamjae Jeon mark_buffer_dirty(bh); 1409*1e9ea7e0SNamjae Jeon } 1410*1e9ea7e0SNamjae Jeon } while (bh_pos += blocksize, (bh = bh->b_this_page) != head); 1411*1e9ea7e0SNamjae Jeon /* 1412*1e9ea7e0SNamjae Jeon * If all buffers are now uptodate but the page is not, set the 1413*1e9ea7e0SNamjae Jeon * page uptodate. 1414*1e9ea7e0SNamjae Jeon */ 1415*1e9ea7e0SNamjae Jeon if (!partial && !PageUptodate(page)) 1416*1e9ea7e0SNamjae Jeon SetPageUptodate(page); 1417*1e9ea7e0SNamjae Jeon } while (++u < nr_pages); 1418*1e9ea7e0SNamjae Jeon /* 1419*1e9ea7e0SNamjae Jeon * Finally, if we do not need to update initialized_size or i_size we 1420*1e9ea7e0SNamjae Jeon * are finished. 1421*1e9ea7e0SNamjae Jeon */ 1422*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 1423*1e9ea7e0SNamjae Jeon initialized_size = ni->initialized_size; 1424*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 1425*1e9ea7e0SNamjae Jeon if (end <= initialized_size) { 1426*1e9ea7e0SNamjae Jeon ntfs_debug("Done."); 1427*1e9ea7e0SNamjae Jeon return 0; 1428*1e9ea7e0SNamjae Jeon } 1429*1e9ea7e0SNamjae Jeon /* 1430*1e9ea7e0SNamjae Jeon * Update initialized_size/i_size as appropriate, both in the inode and 1431*1e9ea7e0SNamjae Jeon * the mft record. 1432*1e9ea7e0SNamjae Jeon */ 1433*1e9ea7e0SNamjae Jeon if (!NInoAttr(ni)) 1434*1e9ea7e0SNamjae Jeon base_ni = ni; 1435*1e9ea7e0SNamjae Jeon else 1436*1e9ea7e0SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 1437*1e9ea7e0SNamjae Jeon /* Map, pin, and lock the mft record. */ 1438*1e9ea7e0SNamjae Jeon m = map_mft_record(base_ni); 1439*1e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 1440*1e9ea7e0SNamjae Jeon err = PTR_ERR(m); 1441*1e9ea7e0SNamjae Jeon m = NULL; 1442*1e9ea7e0SNamjae Jeon ctx = NULL; 1443*1e9ea7e0SNamjae Jeon goto err_out; 1444*1e9ea7e0SNamjae Jeon } 1445*1e9ea7e0SNamjae Jeon BUG_ON(!NInoNonResident(ni)); 1446*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, m); 1447*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 1448*1e9ea7e0SNamjae Jeon err = -ENOMEM; 1449*1e9ea7e0SNamjae Jeon goto err_out; 1450*1e9ea7e0SNamjae Jeon } 1451*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1452*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 1453*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 1454*1e9ea7e0SNamjae Jeon if (err == -ENOENT) 1455*1e9ea7e0SNamjae Jeon err = -EIO; 1456*1e9ea7e0SNamjae Jeon goto err_out; 1457*1e9ea7e0SNamjae Jeon } 1458*1e9ea7e0SNamjae Jeon a = ctx->attr; 1459*1e9ea7e0SNamjae Jeon BUG_ON(!a->non_resident); 1460*1e9ea7e0SNamjae Jeon write_lock_irqsave(&ni->size_lock, flags); 1461*1e9ea7e0SNamjae Jeon BUG_ON(end > ni->allocated_size); 1462*1e9ea7e0SNamjae Jeon ni->initialized_size = end; 1463*1e9ea7e0SNamjae Jeon a->data.non_resident.initialized_size = cpu_to_sle64(end); 1464*1e9ea7e0SNamjae Jeon if (end > i_size_read(vi)) { 1465*1e9ea7e0SNamjae Jeon i_size_write(vi, end); 1466*1e9ea7e0SNamjae Jeon a->data.non_resident.data_size = 1467*1e9ea7e0SNamjae Jeon a->data.non_resident.initialized_size; 1468*1e9ea7e0SNamjae Jeon } 1469*1e9ea7e0SNamjae Jeon write_unlock_irqrestore(&ni->size_lock, flags); 1470*1e9ea7e0SNamjae Jeon /* Mark the mft record dirty, so it gets written back. */ 1471*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 1472*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1473*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1474*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1475*1e9ea7e0SNamjae Jeon ntfs_debug("Done."); 1476*1e9ea7e0SNamjae Jeon return 0; 1477*1e9ea7e0SNamjae Jeon err_out: 1478*1e9ea7e0SNamjae Jeon if (ctx) 1479*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1480*1e9ea7e0SNamjae Jeon if (m) 1481*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1482*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Failed to update initialized_size/i_size (error " 1483*1e9ea7e0SNamjae Jeon "code %i).", err); 1484*1e9ea7e0SNamjae Jeon if (err != -ENOMEM) 1485*1e9ea7e0SNamjae Jeon NVolSetErrors(ni->vol); 1486*1e9ea7e0SNamjae Jeon return err; 1487*1e9ea7e0SNamjae Jeon } 1488*1e9ea7e0SNamjae Jeon 1489*1e9ea7e0SNamjae Jeon /** 1490*1e9ea7e0SNamjae Jeon * ntfs_commit_pages_after_write - commit the received data 1491*1e9ea7e0SNamjae Jeon * @pages: array of destination pages 1492*1e9ea7e0SNamjae Jeon * @nr_pages: number of pages in @pages 1493*1e9ea7e0SNamjae Jeon * @pos: byte position in file at which the write begins 1494*1e9ea7e0SNamjae Jeon * @bytes: number of bytes to be written 1495*1e9ea7e0SNamjae Jeon * 1496*1e9ea7e0SNamjae Jeon * This is called from ntfs_file_buffered_write() with i_mutex held on the inode 1497*1e9ea7e0SNamjae Jeon * (@pages[0]->mapping->host). There are @nr_pages pages in @pages which are 1498*1e9ea7e0SNamjae Jeon * locked but not kmap()ped. The source data has already been copied into the 1499*1e9ea7e0SNamjae Jeon * @page. ntfs_prepare_pages_for_non_resident_write() has been called before 1500*1e9ea7e0SNamjae Jeon * the data was copied (for non-resident attributes only) and it returned 1501*1e9ea7e0SNamjae Jeon * success. 1502*1e9ea7e0SNamjae Jeon * 1503*1e9ea7e0SNamjae Jeon * Need to set uptodate and mark dirty all buffers within the boundary of the 1504*1e9ea7e0SNamjae Jeon * write. If all buffers in a page are uptodate we set the page uptodate, too. 1505*1e9ea7e0SNamjae Jeon * 1506*1e9ea7e0SNamjae Jeon * Setting the buffers dirty ensures that they get written out later when 1507*1e9ea7e0SNamjae Jeon * ntfs_writepage() is invoked by the VM. 1508*1e9ea7e0SNamjae Jeon * 1509*1e9ea7e0SNamjae Jeon * Finally, we need to update i_size and initialized_size as appropriate both 1510*1e9ea7e0SNamjae Jeon * in the inode and the mft record. 1511*1e9ea7e0SNamjae Jeon * 1512*1e9ea7e0SNamjae Jeon * This is modelled after fs/buffer.c::generic_commit_write(), which marks 1513*1e9ea7e0SNamjae Jeon * buffers uptodate and dirty, sets the page uptodate if all buffers in the 1514*1e9ea7e0SNamjae Jeon * page are uptodate, and updates i_size if the end of io is beyond i_size. In 1515*1e9ea7e0SNamjae Jeon * that case, it also marks the inode dirty. 1516*1e9ea7e0SNamjae Jeon * 1517*1e9ea7e0SNamjae Jeon * If things have gone as outlined in 1518*1e9ea7e0SNamjae Jeon * ntfs_prepare_pages_for_non_resident_write(), we do not need to do any page 1519*1e9ea7e0SNamjae Jeon * content modifications here for non-resident attributes. For resident 1520*1e9ea7e0SNamjae Jeon * attributes we need to do the uptodate bringing here which we combine with 1521*1e9ea7e0SNamjae Jeon * the copying into the mft record which means we save one atomic kmap. 1522*1e9ea7e0SNamjae Jeon * 1523*1e9ea7e0SNamjae Jeon * Return 0 on success or -errno on error. 1524*1e9ea7e0SNamjae Jeon */ 1525*1e9ea7e0SNamjae Jeon static int ntfs_commit_pages_after_write(struct page **pages, 1526*1e9ea7e0SNamjae Jeon const unsigned nr_pages, s64 pos, size_t bytes) 1527*1e9ea7e0SNamjae Jeon { 1528*1e9ea7e0SNamjae Jeon s64 end, initialized_size; 1529*1e9ea7e0SNamjae Jeon loff_t i_size; 1530*1e9ea7e0SNamjae Jeon struct inode *vi; 1531*1e9ea7e0SNamjae Jeon ntfs_inode *ni, *base_ni; 1532*1e9ea7e0SNamjae Jeon struct page *page; 1533*1e9ea7e0SNamjae Jeon ntfs_attr_search_ctx *ctx; 1534*1e9ea7e0SNamjae Jeon MFT_RECORD *m; 1535*1e9ea7e0SNamjae Jeon ATTR_RECORD *a; 1536*1e9ea7e0SNamjae Jeon char *kattr, *kaddr; 1537*1e9ea7e0SNamjae Jeon unsigned long flags; 1538*1e9ea7e0SNamjae Jeon u32 attr_len; 1539*1e9ea7e0SNamjae Jeon int err; 1540*1e9ea7e0SNamjae Jeon 1541*1e9ea7e0SNamjae Jeon BUG_ON(!nr_pages); 1542*1e9ea7e0SNamjae Jeon BUG_ON(!pages); 1543*1e9ea7e0SNamjae Jeon page = pages[0]; 1544*1e9ea7e0SNamjae Jeon BUG_ON(!page); 1545*1e9ea7e0SNamjae Jeon vi = page->mapping->host; 1546*1e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 1547*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, start page " 1548*1e9ea7e0SNamjae Jeon "index 0x%lx, nr_pages 0x%x, pos 0x%llx, bytes 0x%zx.", 1549*1e9ea7e0SNamjae Jeon vi->i_ino, ni->type, page->index, nr_pages, 1550*1e9ea7e0SNamjae Jeon (long long)pos, bytes); 1551*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni)) 1552*1e9ea7e0SNamjae Jeon return ntfs_commit_pages_after_non_resident_write(pages, 1553*1e9ea7e0SNamjae Jeon nr_pages, pos, bytes); 1554*1e9ea7e0SNamjae Jeon BUG_ON(nr_pages > 1); 1555*1e9ea7e0SNamjae Jeon /* 1556*1e9ea7e0SNamjae Jeon * Attribute is resident, implying it is not compressed, encrypted, or 1557*1e9ea7e0SNamjae Jeon * sparse. 1558*1e9ea7e0SNamjae Jeon */ 1559*1e9ea7e0SNamjae Jeon if (!NInoAttr(ni)) 1560*1e9ea7e0SNamjae Jeon base_ni = ni; 1561*1e9ea7e0SNamjae Jeon else 1562*1e9ea7e0SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 1563*1e9ea7e0SNamjae Jeon BUG_ON(NInoNonResident(ni)); 1564*1e9ea7e0SNamjae Jeon /* Map, pin, and lock the mft record. */ 1565*1e9ea7e0SNamjae Jeon m = map_mft_record(base_ni); 1566*1e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 1567*1e9ea7e0SNamjae Jeon err = PTR_ERR(m); 1568*1e9ea7e0SNamjae Jeon m = NULL; 1569*1e9ea7e0SNamjae Jeon ctx = NULL; 1570*1e9ea7e0SNamjae Jeon goto err_out; 1571*1e9ea7e0SNamjae Jeon } 1572*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, m); 1573*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 1574*1e9ea7e0SNamjae Jeon err = -ENOMEM; 1575*1e9ea7e0SNamjae Jeon goto err_out; 1576*1e9ea7e0SNamjae Jeon } 1577*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1578*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 1579*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 1580*1e9ea7e0SNamjae Jeon if (err == -ENOENT) 1581*1e9ea7e0SNamjae Jeon err = -EIO; 1582*1e9ea7e0SNamjae Jeon goto err_out; 1583*1e9ea7e0SNamjae Jeon } 1584*1e9ea7e0SNamjae Jeon a = ctx->attr; 1585*1e9ea7e0SNamjae Jeon BUG_ON(a->non_resident); 1586*1e9ea7e0SNamjae Jeon /* The total length of the attribute value. */ 1587*1e9ea7e0SNamjae Jeon attr_len = le32_to_cpu(a->data.resident.value_length); 1588*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 1589*1e9ea7e0SNamjae Jeon BUG_ON(attr_len != i_size); 1590*1e9ea7e0SNamjae Jeon BUG_ON(pos > attr_len); 1591*1e9ea7e0SNamjae Jeon end = pos + bytes; 1592*1e9ea7e0SNamjae Jeon BUG_ON(end > le32_to_cpu(a->length) - 1593*1e9ea7e0SNamjae Jeon le16_to_cpu(a->data.resident.value_offset)); 1594*1e9ea7e0SNamjae Jeon kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset); 1595*1e9ea7e0SNamjae Jeon kaddr = kmap_atomic(page); 1596*1e9ea7e0SNamjae Jeon /* Copy the received data from the page to the mft record. */ 1597*1e9ea7e0SNamjae Jeon memcpy(kattr + pos, kaddr + pos, bytes); 1598*1e9ea7e0SNamjae Jeon /* Update the attribute length if necessary. */ 1599*1e9ea7e0SNamjae Jeon if (end > attr_len) { 1600*1e9ea7e0SNamjae Jeon attr_len = end; 1601*1e9ea7e0SNamjae Jeon a->data.resident.value_length = cpu_to_le32(attr_len); 1602*1e9ea7e0SNamjae Jeon } 1603*1e9ea7e0SNamjae Jeon /* 1604*1e9ea7e0SNamjae Jeon * If the page is not uptodate, bring the out of bounds area(s) 1605*1e9ea7e0SNamjae Jeon * uptodate by copying data from the mft record to the page. 1606*1e9ea7e0SNamjae Jeon */ 1607*1e9ea7e0SNamjae Jeon if (!PageUptodate(page)) { 1608*1e9ea7e0SNamjae Jeon if (pos > 0) 1609*1e9ea7e0SNamjae Jeon memcpy(kaddr, kattr, pos); 1610*1e9ea7e0SNamjae Jeon if (end < attr_len) 1611*1e9ea7e0SNamjae Jeon memcpy(kaddr + end, kattr + end, attr_len - end); 1612*1e9ea7e0SNamjae Jeon /* Zero the region outside the end of the attribute value. */ 1613*1e9ea7e0SNamjae Jeon memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len); 1614*1e9ea7e0SNamjae Jeon flush_dcache_page(page); 1615*1e9ea7e0SNamjae Jeon SetPageUptodate(page); 1616*1e9ea7e0SNamjae Jeon } 1617*1e9ea7e0SNamjae Jeon kunmap_atomic(kaddr); 1618*1e9ea7e0SNamjae Jeon /* Update initialized_size/i_size if necessary. */ 1619*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 1620*1e9ea7e0SNamjae Jeon initialized_size = ni->initialized_size; 1621*1e9ea7e0SNamjae Jeon BUG_ON(end > ni->allocated_size); 1622*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 1623*1e9ea7e0SNamjae Jeon BUG_ON(initialized_size != i_size); 1624*1e9ea7e0SNamjae Jeon if (end > initialized_size) { 1625*1e9ea7e0SNamjae Jeon write_lock_irqsave(&ni->size_lock, flags); 1626*1e9ea7e0SNamjae Jeon ni->initialized_size = end; 1627*1e9ea7e0SNamjae Jeon i_size_write(vi, end); 1628*1e9ea7e0SNamjae Jeon write_unlock_irqrestore(&ni->size_lock, flags); 1629*1e9ea7e0SNamjae Jeon } 1630*1e9ea7e0SNamjae Jeon /* Mark the mft record dirty, so it gets written back. */ 1631*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 1632*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1633*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1634*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1635*1e9ea7e0SNamjae Jeon ntfs_debug("Done."); 1636*1e9ea7e0SNamjae Jeon return 0; 1637*1e9ea7e0SNamjae Jeon err_out: 1638*1e9ea7e0SNamjae Jeon if (err == -ENOMEM) { 1639*1e9ea7e0SNamjae Jeon ntfs_warning(vi->i_sb, "Error allocating memory required to " 1640*1e9ea7e0SNamjae Jeon "commit the write."); 1641*1e9ea7e0SNamjae Jeon if (PageUptodate(page)) { 1642*1e9ea7e0SNamjae Jeon ntfs_warning(vi->i_sb, "Page is uptodate, setting " 1643*1e9ea7e0SNamjae Jeon "dirty so the write will be retried " 1644*1e9ea7e0SNamjae Jeon "later on by the VM."); 1645*1e9ea7e0SNamjae Jeon /* 1646*1e9ea7e0SNamjae Jeon * Put the page on mapping->dirty_pages, but leave its 1647*1e9ea7e0SNamjae Jeon * buffers' dirty state as-is. 1648*1e9ea7e0SNamjae Jeon */ 1649*1e9ea7e0SNamjae Jeon __set_page_dirty_nobuffers(page); 1650*1e9ea7e0SNamjae Jeon err = 0; 1651*1e9ea7e0SNamjae Jeon } else 1652*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Page is not uptodate. Written " 1653*1e9ea7e0SNamjae Jeon "data has been lost."); 1654*1e9ea7e0SNamjae Jeon } else { 1655*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Resident attribute commit write failed " 1656*1e9ea7e0SNamjae Jeon "with error %i.", err); 1657*1e9ea7e0SNamjae Jeon NVolSetErrors(ni->vol); 1658*1e9ea7e0SNamjae Jeon } 1659*1e9ea7e0SNamjae Jeon if (ctx) 1660*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1661*1e9ea7e0SNamjae Jeon if (m) 1662*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1663*1e9ea7e0SNamjae Jeon return err; 1664*1e9ea7e0SNamjae Jeon } 1665*1e9ea7e0SNamjae Jeon 1666*1e9ea7e0SNamjae Jeon /* 1667*1e9ea7e0SNamjae Jeon * Copy as much as we can into the pages and return the number of bytes which 1668*1e9ea7e0SNamjae Jeon * were successfully copied. If a fault is encountered then clear the pages 1669*1e9ea7e0SNamjae Jeon * out to (ofs + bytes) and return the number of bytes which were copied. 1670*1e9ea7e0SNamjae Jeon */ 1671*1e9ea7e0SNamjae Jeon static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages, 1672*1e9ea7e0SNamjae Jeon unsigned ofs, struct iov_iter *i, size_t bytes) 1673*1e9ea7e0SNamjae Jeon { 1674*1e9ea7e0SNamjae Jeon struct page **last_page = pages + nr_pages; 1675*1e9ea7e0SNamjae Jeon size_t total = 0; 1676*1e9ea7e0SNamjae Jeon unsigned len, copied; 1677*1e9ea7e0SNamjae Jeon 1678*1e9ea7e0SNamjae Jeon do { 1679*1e9ea7e0SNamjae Jeon len = PAGE_SIZE - ofs; 1680*1e9ea7e0SNamjae Jeon if (len > bytes) 1681*1e9ea7e0SNamjae Jeon len = bytes; 1682*1e9ea7e0SNamjae Jeon copied = copy_page_from_iter_atomic(*pages, ofs, len, i); 1683*1e9ea7e0SNamjae Jeon total += copied; 1684*1e9ea7e0SNamjae Jeon bytes -= copied; 1685*1e9ea7e0SNamjae Jeon if (!bytes) 1686*1e9ea7e0SNamjae Jeon break; 1687*1e9ea7e0SNamjae Jeon if (copied < len) 1688*1e9ea7e0SNamjae Jeon goto err; 1689*1e9ea7e0SNamjae Jeon ofs = 0; 1690*1e9ea7e0SNamjae Jeon } while (++pages < last_page); 1691*1e9ea7e0SNamjae Jeon out: 1692*1e9ea7e0SNamjae Jeon return total; 1693*1e9ea7e0SNamjae Jeon err: 1694*1e9ea7e0SNamjae Jeon /* Zero the rest of the target like __copy_from_user(). */ 1695*1e9ea7e0SNamjae Jeon len = PAGE_SIZE - copied; 1696*1e9ea7e0SNamjae Jeon do { 1697*1e9ea7e0SNamjae Jeon if (len > bytes) 1698*1e9ea7e0SNamjae Jeon len = bytes; 1699*1e9ea7e0SNamjae Jeon zero_user(*pages, copied, len); 1700*1e9ea7e0SNamjae Jeon bytes -= len; 1701*1e9ea7e0SNamjae Jeon copied = 0; 1702*1e9ea7e0SNamjae Jeon len = PAGE_SIZE; 1703*1e9ea7e0SNamjae Jeon } while (++pages < last_page); 1704*1e9ea7e0SNamjae Jeon goto out; 1705*1e9ea7e0SNamjae Jeon } 1706*1e9ea7e0SNamjae Jeon 1707*1e9ea7e0SNamjae Jeon /** 1708*1e9ea7e0SNamjae Jeon * ntfs_perform_write - perform buffered write to a file 1709*1e9ea7e0SNamjae Jeon * @file: file to write to 1710*1e9ea7e0SNamjae Jeon * @i: iov_iter with data to write 1711*1e9ea7e0SNamjae Jeon * @pos: byte offset in file at which to begin writing to 1712*1e9ea7e0SNamjae Jeon */ 1713*1e9ea7e0SNamjae Jeon static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i, 1714*1e9ea7e0SNamjae Jeon loff_t pos) 1715*1e9ea7e0SNamjae Jeon { 1716*1e9ea7e0SNamjae Jeon struct address_space *mapping = file->f_mapping; 1717*1e9ea7e0SNamjae Jeon struct inode *vi = mapping->host; 1718*1e9ea7e0SNamjae Jeon ntfs_inode *ni = NTFS_I(vi); 1719*1e9ea7e0SNamjae Jeon ntfs_volume *vol = ni->vol; 1720*1e9ea7e0SNamjae Jeon struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER]; 1721*1e9ea7e0SNamjae Jeon struct page *cached_page = NULL; 1722*1e9ea7e0SNamjae Jeon VCN last_vcn; 1723*1e9ea7e0SNamjae Jeon LCN lcn; 1724*1e9ea7e0SNamjae Jeon size_t bytes; 1725*1e9ea7e0SNamjae Jeon ssize_t status, written = 0; 1726*1e9ea7e0SNamjae Jeon unsigned nr_pages; 1727*1e9ea7e0SNamjae Jeon 1728*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " 1729*1e9ea7e0SNamjae Jeon "0x%llx, count 0x%lx.", vi->i_ino, 1730*1e9ea7e0SNamjae Jeon (unsigned)le32_to_cpu(ni->type), 1731*1e9ea7e0SNamjae Jeon (unsigned long long)pos, 1732*1e9ea7e0SNamjae Jeon (unsigned long)iov_iter_count(i)); 1733*1e9ea7e0SNamjae Jeon /* 1734*1e9ea7e0SNamjae Jeon * If a previous ntfs_truncate() failed, repeat it and abort if it 1735*1e9ea7e0SNamjae Jeon * fails again. 1736*1e9ea7e0SNamjae Jeon */ 1737*1e9ea7e0SNamjae Jeon if (unlikely(NInoTruncateFailed(ni))) { 1738*1e9ea7e0SNamjae Jeon int err; 1739*1e9ea7e0SNamjae Jeon 1740*1e9ea7e0SNamjae Jeon inode_dio_wait(vi); 1741*1e9ea7e0SNamjae Jeon err = ntfs_truncate(vi); 1742*1e9ea7e0SNamjae Jeon if (err || NInoTruncateFailed(ni)) { 1743*1e9ea7e0SNamjae Jeon if (!err) 1744*1e9ea7e0SNamjae Jeon err = -EIO; 1745*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Cannot perform write to inode " 1746*1e9ea7e0SNamjae Jeon "0x%lx, attribute type 0x%x, because " 1747*1e9ea7e0SNamjae Jeon "ntfs_truncate() failed (error code " 1748*1e9ea7e0SNamjae Jeon "%i).", vi->i_ino, 1749*1e9ea7e0SNamjae Jeon (unsigned)le32_to_cpu(ni->type), err); 1750*1e9ea7e0SNamjae Jeon return err; 1751*1e9ea7e0SNamjae Jeon } 1752*1e9ea7e0SNamjae Jeon } 1753*1e9ea7e0SNamjae Jeon /* 1754*1e9ea7e0SNamjae Jeon * Determine the number of pages per cluster for non-resident 1755*1e9ea7e0SNamjae Jeon * attributes. 1756*1e9ea7e0SNamjae Jeon */ 1757*1e9ea7e0SNamjae Jeon nr_pages = 1; 1758*1e9ea7e0SNamjae Jeon if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni)) 1759*1e9ea7e0SNamjae Jeon nr_pages = vol->cluster_size >> PAGE_SHIFT; 1760*1e9ea7e0SNamjae Jeon last_vcn = -1; 1761*1e9ea7e0SNamjae Jeon do { 1762*1e9ea7e0SNamjae Jeon VCN vcn; 1763*1e9ea7e0SNamjae Jeon pgoff_t start_idx; 1764*1e9ea7e0SNamjae Jeon unsigned ofs, do_pages, u; 1765*1e9ea7e0SNamjae Jeon size_t copied; 1766*1e9ea7e0SNamjae Jeon 1767*1e9ea7e0SNamjae Jeon start_idx = pos >> PAGE_SHIFT; 1768*1e9ea7e0SNamjae Jeon ofs = pos & ~PAGE_MASK; 1769*1e9ea7e0SNamjae Jeon bytes = PAGE_SIZE - ofs; 1770*1e9ea7e0SNamjae Jeon do_pages = 1; 1771*1e9ea7e0SNamjae Jeon if (nr_pages > 1) { 1772*1e9ea7e0SNamjae Jeon vcn = pos >> vol->cluster_size_bits; 1773*1e9ea7e0SNamjae Jeon if (vcn != last_vcn) { 1774*1e9ea7e0SNamjae Jeon last_vcn = vcn; 1775*1e9ea7e0SNamjae Jeon /* 1776*1e9ea7e0SNamjae Jeon * Get the lcn of the vcn the write is in. If 1777*1e9ea7e0SNamjae Jeon * it is a hole, need to lock down all pages in 1778*1e9ea7e0SNamjae Jeon * the cluster. 1779*1e9ea7e0SNamjae Jeon */ 1780*1e9ea7e0SNamjae Jeon down_read(&ni->runlist.lock); 1781*1e9ea7e0SNamjae Jeon lcn = ntfs_attr_vcn_to_lcn_nolock(ni, pos >> 1782*1e9ea7e0SNamjae Jeon vol->cluster_size_bits, false); 1783*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 1784*1e9ea7e0SNamjae Jeon if (unlikely(lcn < LCN_HOLE)) { 1785*1e9ea7e0SNamjae Jeon if (lcn == LCN_ENOMEM) 1786*1e9ea7e0SNamjae Jeon status = -ENOMEM; 1787*1e9ea7e0SNamjae Jeon else { 1788*1e9ea7e0SNamjae Jeon status = -EIO; 1789*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Cannot " 1790*1e9ea7e0SNamjae Jeon "perform write to " 1791*1e9ea7e0SNamjae Jeon "inode 0x%lx, " 1792*1e9ea7e0SNamjae Jeon "attribute type 0x%x, " 1793*1e9ea7e0SNamjae Jeon "because the attribute " 1794*1e9ea7e0SNamjae Jeon "is corrupt.", 1795*1e9ea7e0SNamjae Jeon vi->i_ino, (unsigned) 1796*1e9ea7e0SNamjae Jeon le32_to_cpu(ni->type)); 1797*1e9ea7e0SNamjae Jeon } 1798*1e9ea7e0SNamjae Jeon break; 1799*1e9ea7e0SNamjae Jeon } 1800*1e9ea7e0SNamjae Jeon if (lcn == LCN_HOLE) { 1801*1e9ea7e0SNamjae Jeon start_idx = (pos & ~(s64) 1802*1e9ea7e0SNamjae Jeon vol->cluster_size_mask) 1803*1e9ea7e0SNamjae Jeon >> PAGE_SHIFT; 1804*1e9ea7e0SNamjae Jeon bytes = vol->cluster_size - (pos & 1805*1e9ea7e0SNamjae Jeon vol->cluster_size_mask); 1806*1e9ea7e0SNamjae Jeon do_pages = nr_pages; 1807*1e9ea7e0SNamjae Jeon } 1808*1e9ea7e0SNamjae Jeon } 1809*1e9ea7e0SNamjae Jeon } 1810*1e9ea7e0SNamjae Jeon if (bytes > iov_iter_count(i)) 1811*1e9ea7e0SNamjae Jeon bytes = iov_iter_count(i); 1812*1e9ea7e0SNamjae Jeon again: 1813*1e9ea7e0SNamjae Jeon /* 1814*1e9ea7e0SNamjae Jeon * Bring in the user page(s) that we will copy from _first_. 1815*1e9ea7e0SNamjae Jeon * Otherwise there is a nasty deadlock on copying from the same 1816*1e9ea7e0SNamjae Jeon * page(s) as we are writing to, without it/them being marked 1817*1e9ea7e0SNamjae Jeon * up-to-date. Note, at present there is nothing to stop the 1818*1e9ea7e0SNamjae Jeon * pages being swapped out between us bringing them into memory 1819*1e9ea7e0SNamjae Jeon * and doing the actual copying. 1820*1e9ea7e0SNamjae Jeon */ 1821*1e9ea7e0SNamjae Jeon if (unlikely(fault_in_iov_iter_readable(i, bytes))) { 1822*1e9ea7e0SNamjae Jeon status = -EFAULT; 1823*1e9ea7e0SNamjae Jeon break; 1824*1e9ea7e0SNamjae Jeon } 1825*1e9ea7e0SNamjae Jeon /* Get and lock @do_pages starting at index @start_idx. */ 1826*1e9ea7e0SNamjae Jeon status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages, 1827*1e9ea7e0SNamjae Jeon pages, &cached_page); 1828*1e9ea7e0SNamjae Jeon if (unlikely(status)) 1829*1e9ea7e0SNamjae Jeon break; 1830*1e9ea7e0SNamjae Jeon /* 1831*1e9ea7e0SNamjae Jeon * For non-resident attributes, we need to fill any holes with 1832*1e9ea7e0SNamjae Jeon * actual clusters and ensure all bufferes are mapped. We also 1833*1e9ea7e0SNamjae Jeon * need to bring uptodate any buffers that are only partially 1834*1e9ea7e0SNamjae Jeon * being written to. 1835*1e9ea7e0SNamjae Jeon */ 1836*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni)) { 1837*1e9ea7e0SNamjae Jeon status = ntfs_prepare_pages_for_non_resident_write( 1838*1e9ea7e0SNamjae Jeon pages, do_pages, pos, bytes); 1839*1e9ea7e0SNamjae Jeon if (unlikely(status)) { 1840*1e9ea7e0SNamjae Jeon do { 1841*1e9ea7e0SNamjae Jeon unlock_page(pages[--do_pages]); 1842*1e9ea7e0SNamjae Jeon put_page(pages[do_pages]); 1843*1e9ea7e0SNamjae Jeon } while (do_pages); 1844*1e9ea7e0SNamjae Jeon break; 1845*1e9ea7e0SNamjae Jeon } 1846*1e9ea7e0SNamjae Jeon } 1847*1e9ea7e0SNamjae Jeon u = (pos >> PAGE_SHIFT) - pages[0]->index; 1848*1e9ea7e0SNamjae Jeon copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs, 1849*1e9ea7e0SNamjae Jeon i, bytes); 1850*1e9ea7e0SNamjae Jeon ntfs_flush_dcache_pages(pages + u, do_pages - u); 1851*1e9ea7e0SNamjae Jeon status = 0; 1852*1e9ea7e0SNamjae Jeon if (likely(copied == bytes)) { 1853*1e9ea7e0SNamjae Jeon status = ntfs_commit_pages_after_write(pages, do_pages, 1854*1e9ea7e0SNamjae Jeon pos, bytes); 1855*1e9ea7e0SNamjae Jeon } 1856*1e9ea7e0SNamjae Jeon do { 1857*1e9ea7e0SNamjae Jeon unlock_page(pages[--do_pages]); 1858*1e9ea7e0SNamjae Jeon put_page(pages[do_pages]); 1859*1e9ea7e0SNamjae Jeon } while (do_pages); 1860*1e9ea7e0SNamjae Jeon if (unlikely(status < 0)) { 1861*1e9ea7e0SNamjae Jeon iov_iter_revert(i, copied); 1862*1e9ea7e0SNamjae Jeon break; 1863*1e9ea7e0SNamjae Jeon } 1864*1e9ea7e0SNamjae Jeon cond_resched(); 1865*1e9ea7e0SNamjae Jeon if (unlikely(copied < bytes)) { 1866*1e9ea7e0SNamjae Jeon iov_iter_revert(i, copied); 1867*1e9ea7e0SNamjae Jeon if (copied) 1868*1e9ea7e0SNamjae Jeon bytes = copied; 1869*1e9ea7e0SNamjae Jeon else if (bytes > PAGE_SIZE - ofs) 1870*1e9ea7e0SNamjae Jeon bytes = PAGE_SIZE - ofs; 1871*1e9ea7e0SNamjae Jeon goto again; 1872*1e9ea7e0SNamjae Jeon } 1873*1e9ea7e0SNamjae Jeon pos += copied; 1874*1e9ea7e0SNamjae Jeon written += copied; 1875*1e9ea7e0SNamjae Jeon balance_dirty_pages_ratelimited(mapping); 1876*1e9ea7e0SNamjae Jeon if (fatal_signal_pending(current)) { 1877*1e9ea7e0SNamjae Jeon status = -EINTR; 1878*1e9ea7e0SNamjae Jeon break; 1879*1e9ea7e0SNamjae Jeon } 1880*1e9ea7e0SNamjae Jeon } while (iov_iter_count(i)); 1881*1e9ea7e0SNamjae Jeon if (cached_page) 1882*1e9ea7e0SNamjae Jeon put_page(cached_page); 1883*1e9ea7e0SNamjae Jeon ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 1884*1e9ea7e0SNamjae Jeon written ? "written" : "status", (unsigned long)written, 1885*1e9ea7e0SNamjae Jeon (long)status); 1886*1e9ea7e0SNamjae Jeon return written ? written : status; 1887*1e9ea7e0SNamjae Jeon } 1888*1e9ea7e0SNamjae Jeon 1889*1e9ea7e0SNamjae Jeon /** 1890*1e9ea7e0SNamjae Jeon * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() 1891*1e9ea7e0SNamjae Jeon * @iocb: IO state structure 1892*1e9ea7e0SNamjae Jeon * @from: iov_iter with data to write 1893*1e9ea7e0SNamjae Jeon * 1894*1e9ea7e0SNamjae Jeon * Basically the same as generic_file_write_iter() except that it ends up 1895*1e9ea7e0SNamjae Jeon * up calling ntfs_perform_write() instead of generic_perform_write() and that 1896*1e9ea7e0SNamjae Jeon * O_DIRECT is not implemented. 1897*1e9ea7e0SNamjae Jeon */ 1898*1e9ea7e0SNamjae Jeon static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1899*1e9ea7e0SNamjae Jeon { 1900*1e9ea7e0SNamjae Jeon struct file *file = iocb->ki_filp; 1901*1e9ea7e0SNamjae Jeon struct inode *vi = file_inode(file); 1902*1e9ea7e0SNamjae Jeon ssize_t written = 0; 1903*1e9ea7e0SNamjae Jeon ssize_t err; 1904*1e9ea7e0SNamjae Jeon 1905*1e9ea7e0SNamjae Jeon inode_lock(vi); 1906*1e9ea7e0SNamjae Jeon /* We can write back this queue in page reclaim. */ 1907*1e9ea7e0SNamjae Jeon err = ntfs_prepare_file_for_write(iocb, from); 1908*1e9ea7e0SNamjae Jeon if (iov_iter_count(from) && !err) 1909*1e9ea7e0SNamjae Jeon written = ntfs_perform_write(file, from, iocb->ki_pos); 1910*1e9ea7e0SNamjae Jeon inode_unlock(vi); 1911*1e9ea7e0SNamjae Jeon iocb->ki_pos += written; 1912*1e9ea7e0SNamjae Jeon if (likely(written > 0)) 1913*1e9ea7e0SNamjae Jeon written = generic_write_sync(iocb, written); 1914*1e9ea7e0SNamjae Jeon return written ? written : err; 1915*1e9ea7e0SNamjae Jeon } 1916*1e9ea7e0SNamjae Jeon 1917*1e9ea7e0SNamjae Jeon /** 1918*1e9ea7e0SNamjae Jeon * ntfs_file_fsync - sync a file to disk 1919*1e9ea7e0SNamjae Jeon * @filp: file to be synced 1920*1e9ea7e0SNamjae Jeon * @datasync: if non-zero only flush user data and not metadata 1921*1e9ea7e0SNamjae Jeon * 1922*1e9ea7e0SNamjae Jeon * Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync 1923*1e9ea7e0SNamjae Jeon * system calls. This function is inspired by fs/buffer.c::file_fsync(). 1924*1e9ea7e0SNamjae Jeon * 1925*1e9ea7e0SNamjae Jeon * If @datasync is false, write the mft record and all associated extent mft 1926*1e9ea7e0SNamjae Jeon * records as well as the $DATA attribute and then sync the block device. 1927*1e9ea7e0SNamjae Jeon * 1928*1e9ea7e0SNamjae Jeon * If @datasync is true and the attribute is non-resident, we skip the writing 1929*1e9ea7e0SNamjae Jeon * of the mft record and all associated extent mft records (this might still 1930*1e9ea7e0SNamjae Jeon * happen due to the write_inode_now() call). 1931*1e9ea7e0SNamjae Jeon * 1932*1e9ea7e0SNamjae Jeon * Also, if @datasync is true, we do not wait on the inode to be written out 1933*1e9ea7e0SNamjae Jeon * but we always wait on the page cache pages to be written out. 1934*1e9ea7e0SNamjae Jeon * 1935*1e9ea7e0SNamjae Jeon * Locking: Caller must hold i_mutex on the inode. 1936*1e9ea7e0SNamjae Jeon * 1937*1e9ea7e0SNamjae Jeon * TODO: We should probably also write all attribute/index inodes associated 1938*1e9ea7e0SNamjae Jeon * with this inode but since we have no simple way of getting to them we ignore 1939*1e9ea7e0SNamjae Jeon * this problem for now. 1940*1e9ea7e0SNamjae Jeon */ 1941*1e9ea7e0SNamjae Jeon static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, 1942*1e9ea7e0SNamjae Jeon int datasync) 1943*1e9ea7e0SNamjae Jeon { 1944*1e9ea7e0SNamjae Jeon struct inode *vi = filp->f_mapping->host; 1945*1e9ea7e0SNamjae Jeon int err, ret = 0; 1946*1e9ea7e0SNamjae Jeon 1947*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 1948*1e9ea7e0SNamjae Jeon 1949*1e9ea7e0SNamjae Jeon err = file_write_and_wait_range(filp, start, end); 1950*1e9ea7e0SNamjae Jeon if (err) 1951*1e9ea7e0SNamjae Jeon return err; 1952*1e9ea7e0SNamjae Jeon inode_lock(vi); 1953*1e9ea7e0SNamjae Jeon 1954*1e9ea7e0SNamjae Jeon BUG_ON(S_ISDIR(vi->i_mode)); 1955*1e9ea7e0SNamjae Jeon if (!datasync || !NInoNonResident(NTFS_I(vi))) 1956*1e9ea7e0SNamjae Jeon ret = __ntfs_write_inode(vi, 1); 1957*1e9ea7e0SNamjae Jeon write_inode_now(vi, !datasync); 1958*1e9ea7e0SNamjae Jeon /* 1959*1e9ea7e0SNamjae Jeon * NOTE: If we were to use mapping->private_list (see ext2 and 1960*1e9ea7e0SNamjae Jeon * fs/buffer.c) for dirty blocks then we could optimize the below to be 1961*1e9ea7e0SNamjae Jeon * sync_mapping_buffers(vi->i_mapping). 1962*1e9ea7e0SNamjae Jeon */ 1963*1e9ea7e0SNamjae Jeon err = sync_blockdev(vi->i_sb->s_bdev); 1964*1e9ea7e0SNamjae Jeon if (unlikely(err && !ret)) 1965*1e9ea7e0SNamjae Jeon ret = err; 1966*1e9ea7e0SNamjae Jeon if (likely(!ret)) 1967*1e9ea7e0SNamjae Jeon ntfs_debug("Done."); 1968*1e9ea7e0SNamjae Jeon else 1969*1e9ea7e0SNamjae Jeon ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " 1970*1e9ea7e0SNamjae Jeon "%u.", datasync ? "data" : "", vi->i_ino, -ret); 1971*1e9ea7e0SNamjae Jeon inode_unlock(vi); 1972*1e9ea7e0SNamjae Jeon return ret; 1973*1e9ea7e0SNamjae Jeon } 1974*1e9ea7e0SNamjae Jeon 1975*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1976*1e9ea7e0SNamjae Jeon 1977*1e9ea7e0SNamjae Jeon const struct file_operations ntfs_file_ops = { 1978*1e9ea7e0SNamjae Jeon .llseek = generic_file_llseek, 1979*1e9ea7e0SNamjae Jeon .read_iter = generic_file_read_iter, 1980*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 1981*1e9ea7e0SNamjae Jeon .write_iter = ntfs_file_write_iter, 1982*1e9ea7e0SNamjae Jeon .fsync = ntfs_file_fsync, 1983*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1984*1e9ea7e0SNamjae Jeon .mmap = generic_file_mmap, 1985*1e9ea7e0SNamjae Jeon .open = ntfs_file_open, 1986*1e9ea7e0SNamjae Jeon .splice_read = filemap_splice_read, 1987*1e9ea7e0SNamjae Jeon }; 1988*1e9ea7e0SNamjae Jeon 1989*1e9ea7e0SNamjae Jeon const struct inode_operations ntfs_file_inode_ops = { 1990*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 1991*1e9ea7e0SNamjae Jeon .setattr = ntfs_setattr, 1992*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1993*1e9ea7e0SNamjae Jeon }; 1994*1e9ea7e0SNamjae Jeon 1995*1e9ea7e0SNamjae Jeon const struct file_operations ntfs_empty_file_ops = {}; 1996*1e9ea7e0SNamjae Jeon 1997*1e9ea7e0SNamjae Jeon const struct inode_operations ntfs_empty_inode_ops = {}; 1998