1*1e9ea7e0SNamjae Jeon // SPDX-License-Identifier: GPL-2.0-or-later 2*1e9ea7e0SNamjae Jeon /* 3*1e9ea7e0SNamjae Jeon * aops.c - NTFS kernel address space operations and page cache handling. 4*1e9ea7e0SNamjae Jeon * 5*1e9ea7e0SNamjae Jeon * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. 6*1e9ea7e0SNamjae Jeon * Copyright (c) 2002 Richard Russon 7*1e9ea7e0SNamjae Jeon */ 8*1e9ea7e0SNamjae Jeon 9*1e9ea7e0SNamjae Jeon #include <linux/errno.h> 10*1e9ea7e0SNamjae Jeon #include <linux/fs.h> 11*1e9ea7e0SNamjae Jeon #include <linux/gfp.h> 12*1e9ea7e0SNamjae Jeon #include <linux/mm.h> 13*1e9ea7e0SNamjae Jeon #include <linux/pagemap.h> 14*1e9ea7e0SNamjae Jeon #include <linux/swap.h> 15*1e9ea7e0SNamjae Jeon #include <linux/buffer_head.h> 16*1e9ea7e0SNamjae Jeon #include <linux/writeback.h> 17*1e9ea7e0SNamjae Jeon #include <linux/bit_spinlock.h> 18*1e9ea7e0SNamjae Jeon #include <linux/bio.h> 19*1e9ea7e0SNamjae Jeon 20*1e9ea7e0SNamjae Jeon #include "aops.h" 21*1e9ea7e0SNamjae Jeon #include "attrib.h" 22*1e9ea7e0SNamjae Jeon #include "debug.h" 23*1e9ea7e0SNamjae Jeon #include "inode.h" 24*1e9ea7e0SNamjae Jeon #include "mft.h" 25*1e9ea7e0SNamjae Jeon #include "runlist.h" 26*1e9ea7e0SNamjae Jeon #include "types.h" 27*1e9ea7e0SNamjae Jeon #include "ntfs.h" 28*1e9ea7e0SNamjae Jeon 29*1e9ea7e0SNamjae Jeon /** 30*1e9ea7e0SNamjae Jeon * ntfs_end_buffer_async_read - async io completion for reading attributes 31*1e9ea7e0SNamjae Jeon * @bh: buffer head on which io is completed 32*1e9ea7e0SNamjae Jeon * @uptodate: whether @bh is now uptodate or not 33*1e9ea7e0SNamjae Jeon * 34*1e9ea7e0SNamjae Jeon * Asynchronous I/O completion handler for reading pages belonging to the 35*1e9ea7e0SNamjae Jeon * attribute address space of an inode. The inodes can either be files or 36*1e9ea7e0SNamjae Jeon * directories or they can be fake inodes describing some attribute. 37*1e9ea7e0SNamjae Jeon * 38*1e9ea7e0SNamjae Jeon * If NInoMstProtected(), perform the post read mst fixups when all IO on the 39*1e9ea7e0SNamjae Jeon * page has been completed and mark the page uptodate or set the error bit on 40*1e9ea7e0SNamjae Jeon * the page. To determine the size of the records that need fixing up, we 41*1e9ea7e0SNamjae Jeon * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs 42*1e9ea7e0SNamjae Jeon * record size, and index_block_size_bits, to the log(base 2) of the ntfs 43*1e9ea7e0SNamjae Jeon * record size. 44*1e9ea7e0SNamjae Jeon */ 45*1e9ea7e0SNamjae Jeon static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) 46*1e9ea7e0SNamjae Jeon { 47*1e9ea7e0SNamjae Jeon unsigned long flags; 48*1e9ea7e0SNamjae Jeon struct buffer_head *first, *tmp; 49*1e9ea7e0SNamjae Jeon struct page *page; 50*1e9ea7e0SNamjae Jeon struct inode *vi; 51*1e9ea7e0SNamjae Jeon ntfs_inode *ni; 52*1e9ea7e0SNamjae Jeon int page_uptodate = 1; 53*1e9ea7e0SNamjae Jeon 54*1e9ea7e0SNamjae Jeon page = bh->b_page; 55*1e9ea7e0SNamjae Jeon vi = page->mapping->host; 56*1e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 57*1e9ea7e0SNamjae Jeon 58*1e9ea7e0SNamjae Jeon if (likely(uptodate)) { 59*1e9ea7e0SNamjae Jeon loff_t i_size; 60*1e9ea7e0SNamjae Jeon s64 file_ofs, init_size; 61*1e9ea7e0SNamjae Jeon 62*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 63*1e9ea7e0SNamjae Jeon 64*1e9ea7e0SNamjae Jeon file_ofs = ((s64)page->index << PAGE_SHIFT) + 65*1e9ea7e0SNamjae Jeon bh_offset(bh); 66*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 67*1e9ea7e0SNamjae Jeon init_size = ni->initialized_size; 68*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 69*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 70*1e9ea7e0SNamjae Jeon if (unlikely(init_size > i_size)) { 71*1e9ea7e0SNamjae Jeon /* Race with shrinking truncate. */ 72*1e9ea7e0SNamjae Jeon init_size = i_size; 73*1e9ea7e0SNamjae Jeon } 74*1e9ea7e0SNamjae Jeon /* Check for the current buffer head overflowing. */ 75*1e9ea7e0SNamjae Jeon if (unlikely(file_ofs + bh->b_size > init_size)) { 76*1e9ea7e0SNamjae Jeon int ofs; 77*1e9ea7e0SNamjae Jeon void *kaddr; 78*1e9ea7e0SNamjae Jeon 79*1e9ea7e0SNamjae Jeon ofs = 0; 80*1e9ea7e0SNamjae Jeon if (file_ofs < init_size) 81*1e9ea7e0SNamjae Jeon ofs = init_size - file_ofs; 82*1e9ea7e0SNamjae Jeon kaddr = kmap_atomic(page); 83*1e9ea7e0SNamjae Jeon memset(kaddr + bh_offset(bh) + ofs, 0, 84*1e9ea7e0SNamjae Jeon bh->b_size - ofs); 85*1e9ea7e0SNamjae Jeon flush_dcache_page(page); 86*1e9ea7e0SNamjae Jeon kunmap_atomic(kaddr); 87*1e9ea7e0SNamjae Jeon } 88*1e9ea7e0SNamjae Jeon } else { 89*1e9ea7e0SNamjae Jeon clear_buffer_uptodate(bh); 90*1e9ea7e0SNamjae Jeon SetPageError(page); 91*1e9ea7e0SNamjae Jeon ntfs_error(ni->vol->sb, "Buffer I/O error, logical block " 92*1e9ea7e0SNamjae Jeon "0x%llx.", (unsigned long long)bh->b_blocknr); 93*1e9ea7e0SNamjae Jeon } 94*1e9ea7e0SNamjae Jeon first = page_buffers(page); 95*1e9ea7e0SNamjae Jeon spin_lock_irqsave(&first->b_uptodate_lock, flags); 96*1e9ea7e0SNamjae Jeon clear_buffer_async_read(bh); 97*1e9ea7e0SNamjae Jeon unlock_buffer(bh); 98*1e9ea7e0SNamjae Jeon tmp = bh; 99*1e9ea7e0SNamjae Jeon do { 100*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(tmp)) 101*1e9ea7e0SNamjae Jeon page_uptodate = 0; 102*1e9ea7e0SNamjae Jeon if (buffer_async_read(tmp)) { 103*1e9ea7e0SNamjae Jeon if (likely(buffer_locked(tmp))) 104*1e9ea7e0SNamjae Jeon goto still_busy; 105*1e9ea7e0SNamjae Jeon /* Async buffers must be locked. */ 106*1e9ea7e0SNamjae Jeon BUG(); 107*1e9ea7e0SNamjae Jeon } 108*1e9ea7e0SNamjae Jeon tmp = tmp->b_this_page; 109*1e9ea7e0SNamjae Jeon } while (tmp != bh); 110*1e9ea7e0SNamjae Jeon spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 111*1e9ea7e0SNamjae Jeon /* 112*1e9ea7e0SNamjae Jeon * If none of the buffers had errors then we can set the page uptodate, 113*1e9ea7e0SNamjae Jeon * but we first have to perform the post read mst fixups, if the 114*1e9ea7e0SNamjae Jeon * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. 115*1e9ea7e0SNamjae Jeon * Note we ignore fixup errors as those are detected when 116*1e9ea7e0SNamjae Jeon * map_mft_record() is called which gives us per record granularity 117*1e9ea7e0SNamjae Jeon * rather than per page granularity. 118*1e9ea7e0SNamjae Jeon */ 119*1e9ea7e0SNamjae Jeon if (!NInoMstProtected(ni)) { 120*1e9ea7e0SNamjae Jeon if (likely(page_uptodate && !PageError(page))) 121*1e9ea7e0SNamjae Jeon SetPageUptodate(page); 122*1e9ea7e0SNamjae Jeon } else { 123*1e9ea7e0SNamjae Jeon u8 *kaddr; 124*1e9ea7e0SNamjae Jeon unsigned int i, recs; 125*1e9ea7e0SNamjae Jeon u32 rec_size; 126*1e9ea7e0SNamjae Jeon 127*1e9ea7e0SNamjae Jeon rec_size = ni->itype.index.block_size; 128*1e9ea7e0SNamjae Jeon recs = PAGE_SIZE / rec_size; 129*1e9ea7e0SNamjae Jeon /* Should have been verified before we got here... */ 130*1e9ea7e0SNamjae Jeon BUG_ON(!recs); 131*1e9ea7e0SNamjae Jeon kaddr = kmap_atomic(page); 132*1e9ea7e0SNamjae Jeon for (i = 0; i < recs; i++) 133*1e9ea7e0SNamjae Jeon post_read_mst_fixup((NTFS_RECORD*)(kaddr + 134*1e9ea7e0SNamjae Jeon i * rec_size), rec_size); 135*1e9ea7e0SNamjae Jeon kunmap_atomic(kaddr); 136*1e9ea7e0SNamjae Jeon flush_dcache_page(page); 137*1e9ea7e0SNamjae Jeon if (likely(page_uptodate && !PageError(page))) 138*1e9ea7e0SNamjae Jeon SetPageUptodate(page); 139*1e9ea7e0SNamjae Jeon } 140*1e9ea7e0SNamjae Jeon unlock_page(page); 141*1e9ea7e0SNamjae Jeon return; 142*1e9ea7e0SNamjae Jeon still_busy: 143*1e9ea7e0SNamjae Jeon spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 144*1e9ea7e0SNamjae Jeon return; 145*1e9ea7e0SNamjae Jeon } 146*1e9ea7e0SNamjae Jeon 147*1e9ea7e0SNamjae Jeon /** 148*1e9ea7e0SNamjae Jeon * ntfs_read_block - fill a @folio of an address space with data 149*1e9ea7e0SNamjae Jeon * @folio: page cache folio to fill with data 150*1e9ea7e0SNamjae Jeon * 151*1e9ea7e0SNamjae Jeon * We read each buffer asynchronously and when all buffers are read in, our io 152*1e9ea7e0SNamjae Jeon * completion handler ntfs_end_buffer_read_async(), if required, automatically 153*1e9ea7e0SNamjae Jeon * applies the mst fixups to the folio before finally marking it uptodate and 154*1e9ea7e0SNamjae Jeon * unlocking it. 155*1e9ea7e0SNamjae Jeon * 156*1e9ea7e0SNamjae Jeon * We only enforce allocated_size limit because i_size is checked for in 157*1e9ea7e0SNamjae Jeon * generic_file_read(). 158*1e9ea7e0SNamjae Jeon * 159*1e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 160*1e9ea7e0SNamjae Jeon * 161*1e9ea7e0SNamjae Jeon * Contains an adapted version of fs/buffer.c::block_read_full_folio(). 162*1e9ea7e0SNamjae Jeon */ 163*1e9ea7e0SNamjae Jeon static int ntfs_read_block(struct folio *folio) 164*1e9ea7e0SNamjae Jeon { 165*1e9ea7e0SNamjae Jeon loff_t i_size; 166*1e9ea7e0SNamjae Jeon VCN vcn; 167*1e9ea7e0SNamjae Jeon LCN lcn; 168*1e9ea7e0SNamjae Jeon s64 init_size; 169*1e9ea7e0SNamjae Jeon struct inode *vi; 170*1e9ea7e0SNamjae Jeon ntfs_inode *ni; 171*1e9ea7e0SNamjae Jeon ntfs_volume *vol; 172*1e9ea7e0SNamjae Jeon runlist_element *rl; 173*1e9ea7e0SNamjae Jeon struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 174*1e9ea7e0SNamjae Jeon sector_t iblock, lblock, zblock; 175*1e9ea7e0SNamjae Jeon unsigned long flags; 176*1e9ea7e0SNamjae Jeon unsigned int blocksize, vcn_ofs; 177*1e9ea7e0SNamjae Jeon int i, nr; 178*1e9ea7e0SNamjae Jeon unsigned char blocksize_bits; 179*1e9ea7e0SNamjae Jeon 180*1e9ea7e0SNamjae Jeon vi = folio->mapping->host; 181*1e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 182*1e9ea7e0SNamjae Jeon vol = ni->vol; 183*1e9ea7e0SNamjae Jeon 184*1e9ea7e0SNamjae Jeon /* $MFT/$DATA must have its complete runlist in memory at all times. */ 185*1e9ea7e0SNamjae Jeon BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni)); 186*1e9ea7e0SNamjae Jeon 187*1e9ea7e0SNamjae Jeon blocksize = vol->sb->s_blocksize; 188*1e9ea7e0SNamjae Jeon blocksize_bits = vol->sb->s_blocksize_bits; 189*1e9ea7e0SNamjae Jeon 190*1e9ea7e0SNamjae Jeon head = folio_buffers(folio); 191*1e9ea7e0SNamjae Jeon if (!head) 192*1e9ea7e0SNamjae Jeon head = create_empty_buffers(folio, blocksize, 0); 193*1e9ea7e0SNamjae Jeon bh = head; 194*1e9ea7e0SNamjae Jeon 195*1e9ea7e0SNamjae Jeon /* 196*1e9ea7e0SNamjae Jeon * We may be racing with truncate. To avoid some of the problems we 197*1e9ea7e0SNamjae Jeon * now take a snapshot of the various sizes and use those for the whole 198*1e9ea7e0SNamjae Jeon * of the function. In case of an extending truncate it just means we 199*1e9ea7e0SNamjae Jeon * may leave some buffers unmapped which are now allocated. This is 200*1e9ea7e0SNamjae Jeon * not a problem since these buffers will just get mapped when a write 201*1e9ea7e0SNamjae Jeon * occurs. In case of a shrinking truncate, we will detect this later 202*1e9ea7e0SNamjae Jeon * on due to the runlist being incomplete and if the folio is being 203*1e9ea7e0SNamjae Jeon * fully truncated, truncate will throw it away as soon as we unlock 204*1e9ea7e0SNamjae Jeon * it so no need to worry what we do with it. 205*1e9ea7e0SNamjae Jeon */ 206*1e9ea7e0SNamjae Jeon iblock = (s64)folio->index << (PAGE_SHIFT - blocksize_bits); 207*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 208*1e9ea7e0SNamjae Jeon lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; 209*1e9ea7e0SNamjae Jeon init_size = ni->initialized_size; 210*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 211*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 212*1e9ea7e0SNamjae Jeon if (unlikely(init_size > i_size)) { 213*1e9ea7e0SNamjae Jeon /* Race with shrinking truncate. */ 214*1e9ea7e0SNamjae Jeon init_size = i_size; 215*1e9ea7e0SNamjae Jeon } 216*1e9ea7e0SNamjae Jeon zblock = (init_size + blocksize - 1) >> blocksize_bits; 217*1e9ea7e0SNamjae Jeon 218*1e9ea7e0SNamjae Jeon /* Loop through all the buffers in the folio. */ 219*1e9ea7e0SNamjae Jeon rl = NULL; 220*1e9ea7e0SNamjae Jeon nr = i = 0; 221*1e9ea7e0SNamjae Jeon do { 222*1e9ea7e0SNamjae Jeon int err = 0; 223*1e9ea7e0SNamjae Jeon 224*1e9ea7e0SNamjae Jeon if (unlikely(buffer_uptodate(bh))) 225*1e9ea7e0SNamjae Jeon continue; 226*1e9ea7e0SNamjae Jeon if (unlikely(buffer_mapped(bh))) { 227*1e9ea7e0SNamjae Jeon arr[nr++] = bh; 228*1e9ea7e0SNamjae Jeon continue; 229*1e9ea7e0SNamjae Jeon } 230*1e9ea7e0SNamjae Jeon bh->b_bdev = vol->sb->s_bdev; 231*1e9ea7e0SNamjae Jeon /* Is the block within the allowed limits? */ 232*1e9ea7e0SNamjae Jeon if (iblock < lblock) { 233*1e9ea7e0SNamjae Jeon bool is_retry = false; 234*1e9ea7e0SNamjae Jeon 235*1e9ea7e0SNamjae Jeon /* Convert iblock into corresponding vcn and offset. */ 236*1e9ea7e0SNamjae Jeon vcn = (VCN)iblock << blocksize_bits >> 237*1e9ea7e0SNamjae Jeon vol->cluster_size_bits; 238*1e9ea7e0SNamjae Jeon vcn_ofs = ((VCN)iblock << blocksize_bits) & 239*1e9ea7e0SNamjae Jeon vol->cluster_size_mask; 240*1e9ea7e0SNamjae Jeon if (!rl) { 241*1e9ea7e0SNamjae Jeon lock_retry_remap: 242*1e9ea7e0SNamjae Jeon down_read(&ni->runlist.lock); 243*1e9ea7e0SNamjae Jeon rl = ni->runlist.rl; 244*1e9ea7e0SNamjae Jeon } 245*1e9ea7e0SNamjae Jeon if (likely(rl != NULL)) { 246*1e9ea7e0SNamjae Jeon /* Seek to element containing target vcn. */ 247*1e9ea7e0SNamjae Jeon while (rl->length && rl[1].vcn <= vcn) 248*1e9ea7e0SNamjae Jeon rl++; 249*1e9ea7e0SNamjae Jeon lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 250*1e9ea7e0SNamjae Jeon } else 251*1e9ea7e0SNamjae Jeon lcn = LCN_RL_NOT_MAPPED; 252*1e9ea7e0SNamjae Jeon /* Successful remap. */ 253*1e9ea7e0SNamjae Jeon if (lcn >= 0) { 254*1e9ea7e0SNamjae Jeon /* Setup buffer head to correct block. */ 255*1e9ea7e0SNamjae Jeon bh->b_blocknr = ((lcn << vol->cluster_size_bits) 256*1e9ea7e0SNamjae Jeon + vcn_ofs) >> blocksize_bits; 257*1e9ea7e0SNamjae Jeon set_buffer_mapped(bh); 258*1e9ea7e0SNamjae Jeon /* Only read initialized data blocks. */ 259*1e9ea7e0SNamjae Jeon if (iblock < zblock) { 260*1e9ea7e0SNamjae Jeon arr[nr++] = bh; 261*1e9ea7e0SNamjae Jeon continue; 262*1e9ea7e0SNamjae Jeon } 263*1e9ea7e0SNamjae Jeon /* Fully non-initialized data block, zero it. */ 264*1e9ea7e0SNamjae Jeon goto handle_zblock; 265*1e9ea7e0SNamjae Jeon } 266*1e9ea7e0SNamjae Jeon /* It is a hole, need to zero it. */ 267*1e9ea7e0SNamjae Jeon if (lcn == LCN_HOLE) 268*1e9ea7e0SNamjae Jeon goto handle_hole; 269*1e9ea7e0SNamjae Jeon /* If first try and runlist unmapped, map and retry. */ 270*1e9ea7e0SNamjae Jeon if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 271*1e9ea7e0SNamjae Jeon is_retry = true; 272*1e9ea7e0SNamjae Jeon /* 273*1e9ea7e0SNamjae Jeon * Attempt to map runlist, dropping lock for 274*1e9ea7e0SNamjae Jeon * the duration. 275*1e9ea7e0SNamjae Jeon */ 276*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 277*1e9ea7e0SNamjae Jeon err = ntfs_map_runlist(ni, vcn); 278*1e9ea7e0SNamjae Jeon if (likely(!err)) 279*1e9ea7e0SNamjae Jeon goto lock_retry_remap; 280*1e9ea7e0SNamjae Jeon rl = NULL; 281*1e9ea7e0SNamjae Jeon } else if (!rl) 282*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 283*1e9ea7e0SNamjae Jeon /* 284*1e9ea7e0SNamjae Jeon * If buffer is outside the runlist, treat it as a 285*1e9ea7e0SNamjae Jeon * hole. This can happen due to concurrent truncate 286*1e9ea7e0SNamjae Jeon * for example. 287*1e9ea7e0SNamjae Jeon */ 288*1e9ea7e0SNamjae Jeon if (err == -ENOENT || lcn == LCN_ENOENT) { 289*1e9ea7e0SNamjae Jeon err = 0; 290*1e9ea7e0SNamjae Jeon goto handle_hole; 291*1e9ea7e0SNamjae Jeon } 292*1e9ea7e0SNamjae Jeon /* Hard error, zero out region. */ 293*1e9ea7e0SNamjae Jeon if (!err) 294*1e9ea7e0SNamjae Jeon err = -EIO; 295*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1; 296*1e9ea7e0SNamjae Jeon folio_set_error(folio); 297*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to read from inode 0x%lx, " 298*1e9ea7e0SNamjae Jeon "attribute type 0x%x, vcn 0x%llx, " 299*1e9ea7e0SNamjae Jeon "offset 0x%x because its location on " 300*1e9ea7e0SNamjae Jeon "disk could not be determined%s " 301*1e9ea7e0SNamjae Jeon "(error code %i).", ni->mft_no, 302*1e9ea7e0SNamjae Jeon ni->type, (unsigned long long)vcn, 303*1e9ea7e0SNamjae Jeon vcn_ofs, is_retry ? " even after " 304*1e9ea7e0SNamjae Jeon "retrying" : "", err); 305*1e9ea7e0SNamjae Jeon } 306*1e9ea7e0SNamjae Jeon /* 307*1e9ea7e0SNamjae Jeon * Either iblock was outside lblock limits or 308*1e9ea7e0SNamjae Jeon * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion 309*1e9ea7e0SNamjae Jeon * of the folio and set the buffer uptodate. 310*1e9ea7e0SNamjae Jeon */ 311*1e9ea7e0SNamjae Jeon handle_hole: 312*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1UL; 313*1e9ea7e0SNamjae Jeon clear_buffer_mapped(bh); 314*1e9ea7e0SNamjae Jeon handle_zblock: 315*1e9ea7e0SNamjae Jeon folio_zero_range(folio, i * blocksize, blocksize); 316*1e9ea7e0SNamjae Jeon if (likely(!err)) 317*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 318*1e9ea7e0SNamjae Jeon } while (i++, iblock++, (bh = bh->b_this_page) != head); 319*1e9ea7e0SNamjae Jeon 320*1e9ea7e0SNamjae Jeon /* Release the lock if we took it. */ 321*1e9ea7e0SNamjae Jeon if (rl) 322*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 323*1e9ea7e0SNamjae Jeon 324*1e9ea7e0SNamjae Jeon /* Check we have at least one buffer ready for i/o. */ 325*1e9ea7e0SNamjae Jeon if (nr) { 326*1e9ea7e0SNamjae Jeon struct buffer_head *tbh; 327*1e9ea7e0SNamjae Jeon 328*1e9ea7e0SNamjae Jeon /* Lock the buffers. */ 329*1e9ea7e0SNamjae Jeon for (i = 0; i < nr; i++) { 330*1e9ea7e0SNamjae Jeon tbh = arr[i]; 331*1e9ea7e0SNamjae Jeon lock_buffer(tbh); 332*1e9ea7e0SNamjae Jeon tbh->b_end_io = ntfs_end_buffer_async_read; 333*1e9ea7e0SNamjae Jeon set_buffer_async_read(tbh); 334*1e9ea7e0SNamjae Jeon } 335*1e9ea7e0SNamjae Jeon /* Finally, start i/o on the buffers. */ 336*1e9ea7e0SNamjae Jeon for (i = 0; i < nr; i++) { 337*1e9ea7e0SNamjae Jeon tbh = arr[i]; 338*1e9ea7e0SNamjae Jeon if (likely(!buffer_uptodate(tbh))) 339*1e9ea7e0SNamjae Jeon submit_bh(REQ_OP_READ, tbh); 340*1e9ea7e0SNamjae Jeon else 341*1e9ea7e0SNamjae Jeon ntfs_end_buffer_async_read(tbh, 1); 342*1e9ea7e0SNamjae Jeon } 343*1e9ea7e0SNamjae Jeon return 0; 344*1e9ea7e0SNamjae Jeon } 345*1e9ea7e0SNamjae Jeon /* No i/o was scheduled on any of the buffers. */ 346*1e9ea7e0SNamjae Jeon if (likely(!folio_test_error(folio))) 347*1e9ea7e0SNamjae Jeon folio_mark_uptodate(folio); 348*1e9ea7e0SNamjae Jeon else /* Signal synchronous i/o error. */ 349*1e9ea7e0SNamjae Jeon nr = -EIO; 350*1e9ea7e0SNamjae Jeon folio_unlock(folio); 351*1e9ea7e0SNamjae Jeon return nr; 352*1e9ea7e0SNamjae Jeon } 353*1e9ea7e0SNamjae Jeon 354*1e9ea7e0SNamjae Jeon /** 355*1e9ea7e0SNamjae Jeon * ntfs_read_folio - fill a @folio of a @file with data from the device 356*1e9ea7e0SNamjae Jeon * @file: open file to which the folio @folio belongs or NULL 357*1e9ea7e0SNamjae Jeon * @folio: page cache folio to fill with data 358*1e9ea7e0SNamjae Jeon * 359*1e9ea7e0SNamjae Jeon * For non-resident attributes, ntfs_read_folio() fills the @folio of the open 360*1e9ea7e0SNamjae Jeon * file @file by calling the ntfs version of the generic block_read_full_folio() 361*1e9ea7e0SNamjae Jeon * function, ntfs_read_block(), which in turn creates and reads in the buffers 362*1e9ea7e0SNamjae Jeon * associated with the folio asynchronously. 363*1e9ea7e0SNamjae Jeon * 364*1e9ea7e0SNamjae Jeon * For resident attributes, OTOH, ntfs_read_folio() fills @folio by copying the 365*1e9ea7e0SNamjae Jeon * data from the mft record (which at this stage is most likely in memory) and 366*1e9ea7e0SNamjae Jeon * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as 367*1e9ea7e0SNamjae Jeon * even if the mft record is not cached at this point in time, we need to wait 368*1e9ea7e0SNamjae Jeon * for it to be read in before we can do the copy. 369*1e9ea7e0SNamjae Jeon * 370*1e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 371*1e9ea7e0SNamjae Jeon */ 372*1e9ea7e0SNamjae Jeon static int ntfs_read_folio(struct file *file, struct folio *folio) 373*1e9ea7e0SNamjae Jeon { 374*1e9ea7e0SNamjae Jeon struct page *page = &folio->page; 375*1e9ea7e0SNamjae Jeon loff_t i_size; 376*1e9ea7e0SNamjae Jeon struct inode *vi; 377*1e9ea7e0SNamjae Jeon ntfs_inode *ni, *base_ni; 378*1e9ea7e0SNamjae Jeon u8 *addr; 379*1e9ea7e0SNamjae Jeon ntfs_attr_search_ctx *ctx; 380*1e9ea7e0SNamjae Jeon MFT_RECORD *mrec; 381*1e9ea7e0SNamjae Jeon unsigned long flags; 382*1e9ea7e0SNamjae Jeon u32 attr_len; 383*1e9ea7e0SNamjae Jeon int err = 0; 384*1e9ea7e0SNamjae Jeon 385*1e9ea7e0SNamjae Jeon retry_readpage: 386*1e9ea7e0SNamjae Jeon BUG_ON(!PageLocked(page)); 387*1e9ea7e0SNamjae Jeon vi = page->mapping->host; 388*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 389*1e9ea7e0SNamjae Jeon /* Is the page fully outside i_size? (truncate in progress) */ 390*1e9ea7e0SNamjae Jeon if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >> 391*1e9ea7e0SNamjae Jeon PAGE_SHIFT)) { 392*1e9ea7e0SNamjae Jeon zero_user(page, 0, PAGE_SIZE); 393*1e9ea7e0SNamjae Jeon ntfs_debug("Read outside i_size - truncated?"); 394*1e9ea7e0SNamjae Jeon goto done; 395*1e9ea7e0SNamjae Jeon } 396*1e9ea7e0SNamjae Jeon /* 397*1e9ea7e0SNamjae Jeon * This can potentially happen because we clear PageUptodate() during 398*1e9ea7e0SNamjae Jeon * ntfs_writepage() of MstProtected() attributes. 399*1e9ea7e0SNamjae Jeon */ 400*1e9ea7e0SNamjae Jeon if (PageUptodate(page)) { 401*1e9ea7e0SNamjae Jeon unlock_page(page); 402*1e9ea7e0SNamjae Jeon return 0; 403*1e9ea7e0SNamjae Jeon } 404*1e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 405*1e9ea7e0SNamjae Jeon /* 406*1e9ea7e0SNamjae Jeon * Only $DATA attributes can be encrypted and only unnamed $DATA 407*1e9ea7e0SNamjae Jeon * attributes can be compressed. Index root can have the flags set but 408*1e9ea7e0SNamjae Jeon * this means to create compressed/encrypted files, not that the 409*1e9ea7e0SNamjae Jeon * attribute is compressed/encrypted. Note we need to check for 410*1e9ea7e0SNamjae Jeon * AT_INDEX_ALLOCATION since this is the type of both directory and 411*1e9ea7e0SNamjae Jeon * index inodes. 412*1e9ea7e0SNamjae Jeon */ 413*1e9ea7e0SNamjae Jeon if (ni->type != AT_INDEX_ALLOCATION) { 414*1e9ea7e0SNamjae Jeon /* If attribute is encrypted, deny access, just like NT4. */ 415*1e9ea7e0SNamjae Jeon if (NInoEncrypted(ni)) { 416*1e9ea7e0SNamjae Jeon BUG_ON(ni->type != AT_DATA); 417*1e9ea7e0SNamjae Jeon err = -EACCES; 418*1e9ea7e0SNamjae Jeon goto err_out; 419*1e9ea7e0SNamjae Jeon } 420*1e9ea7e0SNamjae Jeon /* Compressed data streams are handled in compress.c. */ 421*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni) && NInoCompressed(ni)) { 422*1e9ea7e0SNamjae Jeon BUG_ON(ni->type != AT_DATA); 423*1e9ea7e0SNamjae Jeon BUG_ON(ni->name_len); 424*1e9ea7e0SNamjae Jeon return ntfs_read_compressed_block(page); 425*1e9ea7e0SNamjae Jeon } 426*1e9ea7e0SNamjae Jeon } 427*1e9ea7e0SNamjae Jeon /* NInoNonResident() == NInoIndexAllocPresent() */ 428*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni)) { 429*1e9ea7e0SNamjae Jeon /* Normal, non-resident data stream. */ 430*1e9ea7e0SNamjae Jeon return ntfs_read_block(folio); 431*1e9ea7e0SNamjae Jeon } 432*1e9ea7e0SNamjae Jeon /* 433*1e9ea7e0SNamjae Jeon * Attribute is resident, implying it is not compressed or encrypted. 434*1e9ea7e0SNamjae Jeon * This also means the attribute is smaller than an mft record and 435*1e9ea7e0SNamjae Jeon * hence smaller than a page, so can simply zero out any pages with 436*1e9ea7e0SNamjae Jeon * index above 0. Note the attribute can actually be marked compressed 437*1e9ea7e0SNamjae Jeon * but if it is resident the actual data is not compressed so we are 438*1e9ea7e0SNamjae Jeon * ok to ignore the compressed flag here. 439*1e9ea7e0SNamjae Jeon */ 440*1e9ea7e0SNamjae Jeon if (unlikely(page->index > 0)) { 441*1e9ea7e0SNamjae Jeon zero_user(page, 0, PAGE_SIZE); 442*1e9ea7e0SNamjae Jeon goto done; 443*1e9ea7e0SNamjae Jeon } 444*1e9ea7e0SNamjae Jeon if (!NInoAttr(ni)) 445*1e9ea7e0SNamjae Jeon base_ni = ni; 446*1e9ea7e0SNamjae Jeon else 447*1e9ea7e0SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 448*1e9ea7e0SNamjae Jeon /* Map, pin, and lock the mft record. */ 449*1e9ea7e0SNamjae Jeon mrec = map_mft_record(base_ni); 450*1e9ea7e0SNamjae Jeon if (IS_ERR(mrec)) { 451*1e9ea7e0SNamjae Jeon err = PTR_ERR(mrec); 452*1e9ea7e0SNamjae Jeon goto err_out; 453*1e9ea7e0SNamjae Jeon } 454*1e9ea7e0SNamjae Jeon /* 455*1e9ea7e0SNamjae Jeon * If a parallel write made the attribute non-resident, drop the mft 456*1e9ea7e0SNamjae Jeon * record and retry the read_folio. 457*1e9ea7e0SNamjae Jeon */ 458*1e9ea7e0SNamjae Jeon if (unlikely(NInoNonResident(ni))) { 459*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 460*1e9ea7e0SNamjae Jeon goto retry_readpage; 461*1e9ea7e0SNamjae Jeon } 462*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, mrec); 463*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 464*1e9ea7e0SNamjae Jeon err = -ENOMEM; 465*1e9ea7e0SNamjae Jeon goto unm_err_out; 466*1e9ea7e0SNamjae Jeon } 467*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 468*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 469*1e9ea7e0SNamjae Jeon if (unlikely(err)) 470*1e9ea7e0SNamjae Jeon goto put_unm_err_out; 471*1e9ea7e0SNamjae Jeon attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 472*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 473*1e9ea7e0SNamjae Jeon if (unlikely(attr_len > ni->initialized_size)) 474*1e9ea7e0SNamjae Jeon attr_len = ni->initialized_size; 475*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 476*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 477*1e9ea7e0SNamjae Jeon if (unlikely(attr_len > i_size)) { 478*1e9ea7e0SNamjae Jeon /* Race with shrinking truncate. */ 479*1e9ea7e0SNamjae Jeon attr_len = i_size; 480*1e9ea7e0SNamjae Jeon } 481*1e9ea7e0SNamjae Jeon addr = kmap_atomic(page); 482*1e9ea7e0SNamjae Jeon /* Copy the data to the page. */ 483*1e9ea7e0SNamjae Jeon memcpy(addr, (u8*)ctx->attr + 484*1e9ea7e0SNamjae Jeon le16_to_cpu(ctx->attr->data.resident.value_offset), 485*1e9ea7e0SNamjae Jeon attr_len); 486*1e9ea7e0SNamjae Jeon /* Zero the remainder of the page. */ 487*1e9ea7e0SNamjae Jeon memset(addr + attr_len, 0, PAGE_SIZE - attr_len); 488*1e9ea7e0SNamjae Jeon flush_dcache_page(page); 489*1e9ea7e0SNamjae Jeon kunmap_atomic(addr); 490*1e9ea7e0SNamjae Jeon put_unm_err_out: 491*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 492*1e9ea7e0SNamjae Jeon unm_err_out: 493*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 494*1e9ea7e0SNamjae Jeon done: 495*1e9ea7e0SNamjae Jeon SetPageUptodate(page); 496*1e9ea7e0SNamjae Jeon err_out: 497*1e9ea7e0SNamjae Jeon unlock_page(page); 498*1e9ea7e0SNamjae Jeon return err; 499*1e9ea7e0SNamjae Jeon } 500*1e9ea7e0SNamjae Jeon 501*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 502*1e9ea7e0SNamjae Jeon 503*1e9ea7e0SNamjae Jeon /** 504*1e9ea7e0SNamjae Jeon * ntfs_write_block - write a @folio to the backing store 505*1e9ea7e0SNamjae Jeon * @folio: page cache folio to write out 506*1e9ea7e0SNamjae Jeon * @wbc: writeback control structure 507*1e9ea7e0SNamjae Jeon * 508*1e9ea7e0SNamjae Jeon * This function is for writing folios belonging to non-resident, non-mst 509*1e9ea7e0SNamjae Jeon * protected attributes to their backing store. 510*1e9ea7e0SNamjae Jeon * 511*1e9ea7e0SNamjae Jeon * For a folio with buffers, map and write the dirty buffers asynchronously 512*1e9ea7e0SNamjae Jeon * under folio writeback. For a folio without buffers, create buffers for the 513*1e9ea7e0SNamjae Jeon * folio, then proceed as above. 514*1e9ea7e0SNamjae Jeon * 515*1e9ea7e0SNamjae Jeon * If a folio doesn't have buffers the folio dirty state is definitive. If 516*1e9ea7e0SNamjae Jeon * a folio does have buffers, the folio dirty state is just a hint, 517*1e9ea7e0SNamjae Jeon * and the buffer dirty state is definitive. (A hint which has rules: 518*1e9ea7e0SNamjae Jeon * dirty buffers against a clean folio is illegal. Other combinations are 519*1e9ea7e0SNamjae Jeon * legal and need to be handled. In particular a dirty folio containing 520*1e9ea7e0SNamjae Jeon * clean buffers for example.) 521*1e9ea7e0SNamjae Jeon * 522*1e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 523*1e9ea7e0SNamjae Jeon * 524*1e9ea7e0SNamjae Jeon * Based on ntfs_read_block() and __block_write_full_folio(). 525*1e9ea7e0SNamjae Jeon */ 526*1e9ea7e0SNamjae Jeon static int ntfs_write_block(struct folio *folio, struct writeback_control *wbc) 527*1e9ea7e0SNamjae Jeon { 528*1e9ea7e0SNamjae Jeon VCN vcn; 529*1e9ea7e0SNamjae Jeon LCN lcn; 530*1e9ea7e0SNamjae Jeon s64 initialized_size; 531*1e9ea7e0SNamjae Jeon loff_t i_size; 532*1e9ea7e0SNamjae Jeon sector_t block, dblock, iblock; 533*1e9ea7e0SNamjae Jeon struct inode *vi; 534*1e9ea7e0SNamjae Jeon ntfs_inode *ni; 535*1e9ea7e0SNamjae Jeon ntfs_volume *vol; 536*1e9ea7e0SNamjae Jeon runlist_element *rl; 537*1e9ea7e0SNamjae Jeon struct buffer_head *bh, *head; 538*1e9ea7e0SNamjae Jeon unsigned long flags; 539*1e9ea7e0SNamjae Jeon unsigned int blocksize, vcn_ofs; 540*1e9ea7e0SNamjae Jeon int err; 541*1e9ea7e0SNamjae Jeon bool need_end_writeback; 542*1e9ea7e0SNamjae Jeon unsigned char blocksize_bits; 543*1e9ea7e0SNamjae Jeon 544*1e9ea7e0SNamjae Jeon vi = folio->mapping->host; 545*1e9ea7e0SNamjae Jeon ni = NTFS_I(vi); 546*1e9ea7e0SNamjae Jeon vol = ni->vol; 547*1e9ea7e0SNamjae Jeon 548*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 549*1e9ea7e0SNamjae Jeon "0x%lx.", ni->mft_no, ni->type, folio->index); 550*1e9ea7e0SNamjae Jeon 551*1e9ea7e0SNamjae Jeon BUG_ON(!NInoNonResident(ni)); 552*1e9ea7e0SNamjae Jeon BUG_ON(NInoMstProtected(ni)); 553*1e9ea7e0SNamjae Jeon blocksize = vol->sb->s_blocksize; 554*1e9ea7e0SNamjae Jeon blocksize_bits = vol->sb->s_blocksize_bits; 555*1e9ea7e0SNamjae Jeon head = folio_buffers(folio); 556*1e9ea7e0SNamjae Jeon if (!head) { 557*1e9ea7e0SNamjae Jeon BUG_ON(!folio_test_uptodate(folio)); 558*1e9ea7e0SNamjae Jeon head = create_empty_buffers(folio, blocksize, 559*1e9ea7e0SNamjae Jeon (1 << BH_Uptodate) | (1 << BH_Dirty)); 560*1e9ea7e0SNamjae Jeon } 561*1e9ea7e0SNamjae Jeon bh = head; 562*1e9ea7e0SNamjae Jeon 563*1e9ea7e0SNamjae Jeon /* NOTE: Different naming scheme to ntfs_read_block()! */ 564*1e9ea7e0SNamjae Jeon 565*1e9ea7e0SNamjae Jeon /* The first block in the folio. */ 566*1e9ea7e0SNamjae Jeon block = (s64)folio->index << (PAGE_SHIFT - blocksize_bits); 567*1e9ea7e0SNamjae Jeon 568*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 569*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 570*1e9ea7e0SNamjae Jeon initialized_size = ni->initialized_size; 571*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 572*1e9ea7e0SNamjae Jeon 573*1e9ea7e0SNamjae Jeon /* The first out of bounds block for the data size. */ 574*1e9ea7e0SNamjae Jeon dblock = (i_size + blocksize - 1) >> blocksize_bits; 575*1e9ea7e0SNamjae Jeon 576*1e9ea7e0SNamjae Jeon /* The last (fully or partially) initialized block. */ 577*1e9ea7e0SNamjae Jeon iblock = initialized_size >> blocksize_bits; 578*1e9ea7e0SNamjae Jeon 579*1e9ea7e0SNamjae Jeon /* 580*1e9ea7e0SNamjae Jeon * Be very careful. We have no exclusion from block_dirty_folio 581*1e9ea7e0SNamjae Jeon * here, and the (potentially unmapped) buffers may become dirty at 582*1e9ea7e0SNamjae Jeon * any time. If a buffer becomes dirty here after we've inspected it 583*1e9ea7e0SNamjae Jeon * then we just miss that fact, and the folio stays dirty. 584*1e9ea7e0SNamjae Jeon * 585*1e9ea7e0SNamjae Jeon * Buffers outside i_size may be dirtied by block_dirty_folio; 586*1e9ea7e0SNamjae Jeon * handle that here by just cleaning them. 587*1e9ea7e0SNamjae Jeon */ 588*1e9ea7e0SNamjae Jeon 589*1e9ea7e0SNamjae Jeon /* 590*1e9ea7e0SNamjae Jeon * Loop through all the buffers in the folio, mapping all the dirty 591*1e9ea7e0SNamjae Jeon * buffers to disk addresses and handling any aliases from the 592*1e9ea7e0SNamjae Jeon * underlying block device's mapping. 593*1e9ea7e0SNamjae Jeon */ 594*1e9ea7e0SNamjae Jeon rl = NULL; 595*1e9ea7e0SNamjae Jeon err = 0; 596*1e9ea7e0SNamjae Jeon do { 597*1e9ea7e0SNamjae Jeon bool is_retry = false; 598*1e9ea7e0SNamjae Jeon 599*1e9ea7e0SNamjae Jeon if (unlikely(block >= dblock)) { 600*1e9ea7e0SNamjae Jeon /* 601*1e9ea7e0SNamjae Jeon * Mapped buffers outside i_size will occur, because 602*1e9ea7e0SNamjae Jeon * this folio can be outside i_size when there is a 603*1e9ea7e0SNamjae Jeon * truncate in progress. The contents of such buffers 604*1e9ea7e0SNamjae Jeon * were zeroed by ntfs_writepage(). 605*1e9ea7e0SNamjae Jeon * 606*1e9ea7e0SNamjae Jeon * FIXME: What about the small race window where 607*1e9ea7e0SNamjae Jeon * ntfs_writepage() has not done any clearing because 608*1e9ea7e0SNamjae Jeon * the folio was within i_size but before we get here, 609*1e9ea7e0SNamjae Jeon * vmtruncate() modifies i_size? 610*1e9ea7e0SNamjae Jeon */ 611*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bh); 612*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 613*1e9ea7e0SNamjae Jeon continue; 614*1e9ea7e0SNamjae Jeon } 615*1e9ea7e0SNamjae Jeon 616*1e9ea7e0SNamjae Jeon /* Clean buffers are not written out, so no need to map them. */ 617*1e9ea7e0SNamjae Jeon if (!buffer_dirty(bh)) 618*1e9ea7e0SNamjae Jeon continue; 619*1e9ea7e0SNamjae Jeon 620*1e9ea7e0SNamjae Jeon /* Make sure we have enough initialized size. */ 621*1e9ea7e0SNamjae Jeon if (unlikely((block >= iblock) && 622*1e9ea7e0SNamjae Jeon (initialized_size < i_size))) { 623*1e9ea7e0SNamjae Jeon /* 624*1e9ea7e0SNamjae Jeon * If this folio is fully outside initialized 625*1e9ea7e0SNamjae Jeon * size, zero out all folios between the current 626*1e9ea7e0SNamjae Jeon * initialized size and the current folio. Just 627*1e9ea7e0SNamjae Jeon * use ntfs_read_folio() to do the zeroing 628*1e9ea7e0SNamjae Jeon * transparently. 629*1e9ea7e0SNamjae Jeon */ 630*1e9ea7e0SNamjae Jeon if (block > iblock) { 631*1e9ea7e0SNamjae Jeon // TODO: 632*1e9ea7e0SNamjae Jeon // For each folio do: 633*1e9ea7e0SNamjae Jeon // - read_cache_folio() 634*1e9ea7e0SNamjae Jeon // Again for each folio do: 635*1e9ea7e0SNamjae Jeon // - wait_on_folio_locked() 636*1e9ea7e0SNamjae Jeon // - Check (folio_test_uptodate(folio) && 637*1e9ea7e0SNamjae Jeon // !folio_test_error(folio)) 638*1e9ea7e0SNamjae Jeon // Update initialized size in the attribute and 639*1e9ea7e0SNamjae Jeon // in the inode. 640*1e9ea7e0SNamjae Jeon // Again, for each folio do: 641*1e9ea7e0SNamjae Jeon // block_dirty_folio(); 642*1e9ea7e0SNamjae Jeon // folio_put() 643*1e9ea7e0SNamjae Jeon // We don't need to wait on the writes. 644*1e9ea7e0SNamjae Jeon // Update iblock. 645*1e9ea7e0SNamjae Jeon } 646*1e9ea7e0SNamjae Jeon /* 647*1e9ea7e0SNamjae Jeon * The current folio straddles initialized size. Zero 648*1e9ea7e0SNamjae Jeon * all non-uptodate buffers and set them uptodate (and 649*1e9ea7e0SNamjae Jeon * dirty?). Note, there aren't any non-uptodate buffers 650*1e9ea7e0SNamjae Jeon * if the folio is uptodate. 651*1e9ea7e0SNamjae Jeon * FIXME: For an uptodate folio, the buffers may need to 652*1e9ea7e0SNamjae Jeon * be written out because they were not initialized on 653*1e9ea7e0SNamjae Jeon * disk before. 654*1e9ea7e0SNamjae Jeon */ 655*1e9ea7e0SNamjae Jeon if (!folio_test_uptodate(folio)) { 656*1e9ea7e0SNamjae Jeon // TODO: 657*1e9ea7e0SNamjae Jeon // Zero any non-uptodate buffers up to i_size. 658*1e9ea7e0SNamjae Jeon // Set them uptodate and dirty. 659*1e9ea7e0SNamjae Jeon } 660*1e9ea7e0SNamjae Jeon // TODO: 661*1e9ea7e0SNamjae Jeon // Update initialized size in the attribute and in the 662*1e9ea7e0SNamjae Jeon // inode (up to i_size). 663*1e9ea7e0SNamjae Jeon // Update iblock. 664*1e9ea7e0SNamjae Jeon // FIXME: This is inefficient. Try to batch the two 665*1e9ea7e0SNamjae Jeon // size changes to happen in one go. 666*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Writing beyond initialized size " 667*1e9ea7e0SNamjae Jeon "is not supported yet. Sorry."); 668*1e9ea7e0SNamjae Jeon err = -EOPNOTSUPP; 669*1e9ea7e0SNamjae Jeon break; 670*1e9ea7e0SNamjae Jeon // Do NOT set_buffer_new() BUT DO clear buffer range 671*1e9ea7e0SNamjae Jeon // outside write request range. 672*1e9ea7e0SNamjae Jeon // set_buffer_uptodate() on complete buffers as well as 673*1e9ea7e0SNamjae Jeon // set_buffer_dirty(). 674*1e9ea7e0SNamjae Jeon } 675*1e9ea7e0SNamjae Jeon 676*1e9ea7e0SNamjae Jeon /* No need to map buffers that are already mapped. */ 677*1e9ea7e0SNamjae Jeon if (buffer_mapped(bh)) 678*1e9ea7e0SNamjae Jeon continue; 679*1e9ea7e0SNamjae Jeon 680*1e9ea7e0SNamjae Jeon /* Unmapped, dirty buffer. Need to map it. */ 681*1e9ea7e0SNamjae Jeon bh->b_bdev = vol->sb->s_bdev; 682*1e9ea7e0SNamjae Jeon 683*1e9ea7e0SNamjae Jeon /* Convert block into corresponding vcn and offset. */ 684*1e9ea7e0SNamjae Jeon vcn = (VCN)block << blocksize_bits; 685*1e9ea7e0SNamjae Jeon vcn_ofs = vcn & vol->cluster_size_mask; 686*1e9ea7e0SNamjae Jeon vcn >>= vol->cluster_size_bits; 687*1e9ea7e0SNamjae Jeon if (!rl) { 688*1e9ea7e0SNamjae Jeon lock_retry_remap: 689*1e9ea7e0SNamjae Jeon down_read(&ni->runlist.lock); 690*1e9ea7e0SNamjae Jeon rl = ni->runlist.rl; 691*1e9ea7e0SNamjae Jeon } 692*1e9ea7e0SNamjae Jeon if (likely(rl != NULL)) { 693*1e9ea7e0SNamjae Jeon /* Seek to element containing target vcn. */ 694*1e9ea7e0SNamjae Jeon while (rl->length && rl[1].vcn <= vcn) 695*1e9ea7e0SNamjae Jeon rl++; 696*1e9ea7e0SNamjae Jeon lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 697*1e9ea7e0SNamjae Jeon } else 698*1e9ea7e0SNamjae Jeon lcn = LCN_RL_NOT_MAPPED; 699*1e9ea7e0SNamjae Jeon /* Successful remap. */ 700*1e9ea7e0SNamjae Jeon if (lcn >= 0) { 701*1e9ea7e0SNamjae Jeon /* Setup buffer head to point to correct block. */ 702*1e9ea7e0SNamjae Jeon bh->b_blocknr = ((lcn << vol->cluster_size_bits) + 703*1e9ea7e0SNamjae Jeon vcn_ofs) >> blocksize_bits; 704*1e9ea7e0SNamjae Jeon set_buffer_mapped(bh); 705*1e9ea7e0SNamjae Jeon continue; 706*1e9ea7e0SNamjae Jeon } 707*1e9ea7e0SNamjae Jeon /* It is a hole, need to instantiate it. */ 708*1e9ea7e0SNamjae Jeon if (lcn == LCN_HOLE) { 709*1e9ea7e0SNamjae Jeon u8 *kaddr; 710*1e9ea7e0SNamjae Jeon unsigned long *bpos, *bend; 711*1e9ea7e0SNamjae Jeon 712*1e9ea7e0SNamjae Jeon /* Check if the buffer is zero. */ 713*1e9ea7e0SNamjae Jeon kaddr = kmap_local_folio(folio, bh_offset(bh)); 714*1e9ea7e0SNamjae Jeon bpos = (unsigned long *)kaddr; 715*1e9ea7e0SNamjae Jeon bend = (unsigned long *)(kaddr + blocksize); 716*1e9ea7e0SNamjae Jeon do { 717*1e9ea7e0SNamjae Jeon if (unlikely(*bpos)) 718*1e9ea7e0SNamjae Jeon break; 719*1e9ea7e0SNamjae Jeon } while (likely(++bpos < bend)); 720*1e9ea7e0SNamjae Jeon kunmap_local(kaddr); 721*1e9ea7e0SNamjae Jeon if (bpos == bend) { 722*1e9ea7e0SNamjae Jeon /* 723*1e9ea7e0SNamjae Jeon * Buffer is zero and sparse, no need to write 724*1e9ea7e0SNamjae Jeon * it. 725*1e9ea7e0SNamjae Jeon */ 726*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1; 727*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bh); 728*1e9ea7e0SNamjae Jeon continue; 729*1e9ea7e0SNamjae Jeon } 730*1e9ea7e0SNamjae Jeon // TODO: Instantiate the hole. 731*1e9ea7e0SNamjae Jeon // clear_buffer_new(bh); 732*1e9ea7e0SNamjae Jeon // clean_bdev_bh_alias(bh); 733*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Writing into sparse regions is " 734*1e9ea7e0SNamjae Jeon "not supported yet. Sorry."); 735*1e9ea7e0SNamjae Jeon err = -EOPNOTSUPP; 736*1e9ea7e0SNamjae Jeon break; 737*1e9ea7e0SNamjae Jeon } 738*1e9ea7e0SNamjae Jeon /* If first try and runlist unmapped, map and retry. */ 739*1e9ea7e0SNamjae Jeon if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { 740*1e9ea7e0SNamjae Jeon is_retry = true; 741*1e9ea7e0SNamjae Jeon /* 742*1e9ea7e0SNamjae Jeon * Attempt to map runlist, dropping lock for 743*1e9ea7e0SNamjae Jeon * the duration. 744*1e9ea7e0SNamjae Jeon */ 745*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 746*1e9ea7e0SNamjae Jeon err = ntfs_map_runlist(ni, vcn); 747*1e9ea7e0SNamjae Jeon if (likely(!err)) 748*1e9ea7e0SNamjae Jeon goto lock_retry_remap; 749*1e9ea7e0SNamjae Jeon rl = NULL; 750*1e9ea7e0SNamjae Jeon } else if (!rl) 751*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 752*1e9ea7e0SNamjae Jeon /* 753*1e9ea7e0SNamjae Jeon * If buffer is outside the runlist, truncate has cut it out 754*1e9ea7e0SNamjae Jeon * of the runlist. Just clean and clear the buffer and set it 755*1e9ea7e0SNamjae Jeon * uptodate so it can get discarded by the VM. 756*1e9ea7e0SNamjae Jeon */ 757*1e9ea7e0SNamjae Jeon if (err == -ENOENT || lcn == LCN_ENOENT) { 758*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1; 759*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bh); 760*1e9ea7e0SNamjae Jeon folio_zero_range(folio, bh_offset(bh), blocksize); 761*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 762*1e9ea7e0SNamjae Jeon err = 0; 763*1e9ea7e0SNamjae Jeon continue; 764*1e9ea7e0SNamjae Jeon } 765*1e9ea7e0SNamjae Jeon /* Failed to map the buffer, even after retrying. */ 766*1e9ea7e0SNamjae Jeon if (!err) 767*1e9ea7e0SNamjae Jeon err = -EIO; 768*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1; 769*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to write to inode 0x%lx, " 770*1e9ea7e0SNamjae Jeon "attribute type 0x%x, vcn 0x%llx, offset 0x%x " 771*1e9ea7e0SNamjae Jeon "because its location on disk could not be " 772*1e9ea7e0SNamjae Jeon "determined%s (error code %i).", ni->mft_no, 773*1e9ea7e0SNamjae Jeon ni->type, (unsigned long long)vcn, 774*1e9ea7e0SNamjae Jeon vcn_ofs, is_retry ? " even after " 775*1e9ea7e0SNamjae Jeon "retrying" : "", err); 776*1e9ea7e0SNamjae Jeon break; 777*1e9ea7e0SNamjae Jeon } while (block++, (bh = bh->b_this_page) != head); 778*1e9ea7e0SNamjae Jeon 779*1e9ea7e0SNamjae Jeon /* Release the lock if we took it. */ 780*1e9ea7e0SNamjae Jeon if (rl) 781*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 782*1e9ea7e0SNamjae Jeon 783*1e9ea7e0SNamjae Jeon /* For the error case, need to reset bh to the beginning. */ 784*1e9ea7e0SNamjae Jeon bh = head; 785*1e9ea7e0SNamjae Jeon 786*1e9ea7e0SNamjae Jeon /* Just an optimization, so ->read_folio() is not called later. */ 787*1e9ea7e0SNamjae Jeon if (unlikely(!folio_test_uptodate(folio))) { 788*1e9ea7e0SNamjae Jeon int uptodate = 1; 789*1e9ea7e0SNamjae Jeon do { 790*1e9ea7e0SNamjae Jeon if (!buffer_uptodate(bh)) { 791*1e9ea7e0SNamjae Jeon uptodate = 0; 792*1e9ea7e0SNamjae Jeon bh = head; 793*1e9ea7e0SNamjae Jeon break; 794*1e9ea7e0SNamjae Jeon } 795*1e9ea7e0SNamjae Jeon } while ((bh = bh->b_this_page) != head); 796*1e9ea7e0SNamjae Jeon if (uptodate) 797*1e9ea7e0SNamjae Jeon folio_mark_uptodate(folio); 798*1e9ea7e0SNamjae Jeon } 799*1e9ea7e0SNamjae Jeon 800*1e9ea7e0SNamjae Jeon /* Setup all mapped, dirty buffers for async write i/o. */ 801*1e9ea7e0SNamjae Jeon do { 802*1e9ea7e0SNamjae Jeon if (buffer_mapped(bh) && buffer_dirty(bh)) { 803*1e9ea7e0SNamjae Jeon lock_buffer(bh); 804*1e9ea7e0SNamjae Jeon if (test_clear_buffer_dirty(bh)) { 805*1e9ea7e0SNamjae Jeon BUG_ON(!buffer_uptodate(bh)); 806*1e9ea7e0SNamjae Jeon mark_buffer_async_write(bh); 807*1e9ea7e0SNamjae Jeon } else 808*1e9ea7e0SNamjae Jeon unlock_buffer(bh); 809*1e9ea7e0SNamjae Jeon } else if (unlikely(err)) { 810*1e9ea7e0SNamjae Jeon /* 811*1e9ea7e0SNamjae Jeon * For the error case. The buffer may have been set 812*1e9ea7e0SNamjae Jeon * dirty during attachment to a dirty folio. 813*1e9ea7e0SNamjae Jeon */ 814*1e9ea7e0SNamjae Jeon if (err != -ENOMEM) 815*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bh); 816*1e9ea7e0SNamjae Jeon } 817*1e9ea7e0SNamjae Jeon } while ((bh = bh->b_this_page) != head); 818*1e9ea7e0SNamjae Jeon 819*1e9ea7e0SNamjae Jeon if (unlikely(err)) { 820*1e9ea7e0SNamjae Jeon // TODO: Remove the -EOPNOTSUPP check later on... 821*1e9ea7e0SNamjae Jeon if (unlikely(err == -EOPNOTSUPP)) 822*1e9ea7e0SNamjae Jeon err = 0; 823*1e9ea7e0SNamjae Jeon else if (err == -ENOMEM) { 824*1e9ea7e0SNamjae Jeon ntfs_warning(vol->sb, "Error allocating memory. " 825*1e9ea7e0SNamjae Jeon "Redirtying folio so we try again " 826*1e9ea7e0SNamjae Jeon "later."); 827*1e9ea7e0SNamjae Jeon /* 828*1e9ea7e0SNamjae Jeon * Put the folio back on mapping->dirty_pages, but 829*1e9ea7e0SNamjae Jeon * leave its buffer's dirty state as-is. 830*1e9ea7e0SNamjae Jeon */ 831*1e9ea7e0SNamjae Jeon folio_redirty_for_writepage(wbc, folio); 832*1e9ea7e0SNamjae Jeon err = 0; 833*1e9ea7e0SNamjae Jeon } else 834*1e9ea7e0SNamjae Jeon folio_set_error(folio); 835*1e9ea7e0SNamjae Jeon } 836*1e9ea7e0SNamjae Jeon 837*1e9ea7e0SNamjae Jeon BUG_ON(folio_test_writeback(folio)); 838*1e9ea7e0SNamjae Jeon folio_start_writeback(folio); /* Keeps try_to_free_buffers() away. */ 839*1e9ea7e0SNamjae Jeon 840*1e9ea7e0SNamjae Jeon /* Submit the prepared buffers for i/o. */ 841*1e9ea7e0SNamjae Jeon need_end_writeback = true; 842*1e9ea7e0SNamjae Jeon do { 843*1e9ea7e0SNamjae Jeon struct buffer_head *next = bh->b_this_page; 844*1e9ea7e0SNamjae Jeon if (buffer_async_write(bh)) { 845*1e9ea7e0SNamjae Jeon submit_bh(REQ_OP_WRITE, bh); 846*1e9ea7e0SNamjae Jeon need_end_writeback = false; 847*1e9ea7e0SNamjae Jeon } 848*1e9ea7e0SNamjae Jeon bh = next; 849*1e9ea7e0SNamjae Jeon } while (bh != head); 850*1e9ea7e0SNamjae Jeon folio_unlock(folio); 851*1e9ea7e0SNamjae Jeon 852*1e9ea7e0SNamjae Jeon /* If no i/o was started, need to end writeback here. */ 853*1e9ea7e0SNamjae Jeon if (unlikely(need_end_writeback)) 854*1e9ea7e0SNamjae Jeon folio_end_writeback(folio); 855*1e9ea7e0SNamjae Jeon 856*1e9ea7e0SNamjae Jeon ntfs_debug("Done."); 857*1e9ea7e0SNamjae Jeon return err; 858*1e9ea7e0SNamjae Jeon } 859*1e9ea7e0SNamjae Jeon 860*1e9ea7e0SNamjae Jeon /** 861*1e9ea7e0SNamjae Jeon * ntfs_write_mst_block - write a @page to the backing store 862*1e9ea7e0SNamjae Jeon * @page: page cache page to write out 863*1e9ea7e0SNamjae Jeon * @wbc: writeback control structure 864*1e9ea7e0SNamjae Jeon * 865*1e9ea7e0SNamjae Jeon * This function is for writing pages belonging to non-resident, mst protected 866*1e9ea7e0SNamjae Jeon * attributes to their backing store. The only supported attributes are index 867*1e9ea7e0SNamjae Jeon * allocation and $MFT/$DATA. Both directory inodes and index inodes are 868*1e9ea7e0SNamjae Jeon * supported for the index allocation case. 869*1e9ea7e0SNamjae Jeon * 870*1e9ea7e0SNamjae Jeon * The page must remain locked for the duration of the write because we apply 871*1e9ea7e0SNamjae Jeon * the mst fixups, write, and then undo the fixups, so if we were to unlock the 872*1e9ea7e0SNamjae Jeon * page before undoing the fixups, any other user of the page will see the 873*1e9ea7e0SNamjae Jeon * page contents as corrupt. 874*1e9ea7e0SNamjae Jeon * 875*1e9ea7e0SNamjae Jeon * We clear the page uptodate flag for the duration of the function to ensure 876*1e9ea7e0SNamjae Jeon * exclusion for the $MFT/$DATA case against someone mapping an mft record we 877*1e9ea7e0SNamjae Jeon * are about to apply the mst fixups to. 878*1e9ea7e0SNamjae Jeon * 879*1e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 880*1e9ea7e0SNamjae Jeon * 881*1e9ea7e0SNamjae Jeon * Based on ntfs_write_block(), ntfs_mft_writepage(), and 882*1e9ea7e0SNamjae Jeon * write_mft_record_nolock(). 883*1e9ea7e0SNamjae Jeon */ 884*1e9ea7e0SNamjae Jeon static int ntfs_write_mst_block(struct page *page, 885*1e9ea7e0SNamjae Jeon struct writeback_control *wbc) 886*1e9ea7e0SNamjae Jeon { 887*1e9ea7e0SNamjae Jeon sector_t block, dblock, rec_block; 888*1e9ea7e0SNamjae Jeon struct inode *vi = page->mapping->host; 889*1e9ea7e0SNamjae Jeon ntfs_inode *ni = NTFS_I(vi); 890*1e9ea7e0SNamjae Jeon ntfs_volume *vol = ni->vol; 891*1e9ea7e0SNamjae Jeon u8 *kaddr; 892*1e9ea7e0SNamjae Jeon unsigned int rec_size = ni->itype.index.block_size; 893*1e9ea7e0SNamjae Jeon ntfs_inode *locked_nis[PAGE_SIZE / NTFS_BLOCK_SIZE]; 894*1e9ea7e0SNamjae Jeon struct buffer_head *bh, *head, *tbh, *rec_start_bh; 895*1e9ea7e0SNamjae Jeon struct buffer_head *bhs[MAX_BUF_PER_PAGE]; 896*1e9ea7e0SNamjae Jeon runlist_element *rl; 897*1e9ea7e0SNamjae Jeon int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2; 898*1e9ea7e0SNamjae Jeon unsigned bh_size, rec_size_bits; 899*1e9ea7e0SNamjae Jeon bool sync, is_mft, page_is_dirty, rec_is_dirty; 900*1e9ea7e0SNamjae Jeon unsigned char bh_size_bits; 901*1e9ea7e0SNamjae Jeon 902*1e9ea7e0SNamjae Jeon if (WARN_ON(rec_size < NTFS_BLOCK_SIZE)) 903*1e9ea7e0SNamjae Jeon return -EINVAL; 904*1e9ea7e0SNamjae Jeon 905*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " 906*1e9ea7e0SNamjae Jeon "0x%lx.", vi->i_ino, ni->type, page->index); 907*1e9ea7e0SNamjae Jeon BUG_ON(!NInoNonResident(ni)); 908*1e9ea7e0SNamjae Jeon BUG_ON(!NInoMstProtected(ni)); 909*1e9ea7e0SNamjae Jeon is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino); 910*1e9ea7e0SNamjae Jeon /* 911*1e9ea7e0SNamjae Jeon * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page 912*1e9ea7e0SNamjae Jeon * in its page cache were to be marked dirty. However this should 913*1e9ea7e0SNamjae Jeon * never happen with the current driver and considering we do not 914*1e9ea7e0SNamjae Jeon * handle this case here we do want to BUG(), at least for now. 915*1e9ea7e0SNamjae Jeon */ 916*1e9ea7e0SNamjae Jeon BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || 917*1e9ea7e0SNamjae Jeon (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); 918*1e9ea7e0SNamjae Jeon bh_size = vol->sb->s_blocksize; 919*1e9ea7e0SNamjae Jeon bh_size_bits = vol->sb->s_blocksize_bits; 920*1e9ea7e0SNamjae Jeon max_bhs = PAGE_SIZE / bh_size; 921*1e9ea7e0SNamjae Jeon BUG_ON(!max_bhs); 922*1e9ea7e0SNamjae Jeon BUG_ON(max_bhs > MAX_BUF_PER_PAGE); 923*1e9ea7e0SNamjae Jeon 924*1e9ea7e0SNamjae Jeon /* Were we called for sync purposes? */ 925*1e9ea7e0SNamjae Jeon sync = (wbc->sync_mode == WB_SYNC_ALL); 926*1e9ea7e0SNamjae Jeon 927*1e9ea7e0SNamjae Jeon /* Make sure we have mapped buffers. */ 928*1e9ea7e0SNamjae Jeon bh = head = page_buffers(page); 929*1e9ea7e0SNamjae Jeon BUG_ON(!bh); 930*1e9ea7e0SNamjae Jeon 931*1e9ea7e0SNamjae Jeon rec_size_bits = ni->itype.index.block_size_bits; 932*1e9ea7e0SNamjae Jeon BUG_ON(!(PAGE_SIZE >> rec_size_bits)); 933*1e9ea7e0SNamjae Jeon bhs_per_rec = rec_size >> bh_size_bits; 934*1e9ea7e0SNamjae Jeon BUG_ON(!bhs_per_rec); 935*1e9ea7e0SNamjae Jeon 936*1e9ea7e0SNamjae Jeon /* The first block in the page. */ 937*1e9ea7e0SNamjae Jeon rec_block = block = (sector_t)page->index << 938*1e9ea7e0SNamjae Jeon (PAGE_SHIFT - bh_size_bits); 939*1e9ea7e0SNamjae Jeon 940*1e9ea7e0SNamjae Jeon /* The first out of bounds block for the data size. */ 941*1e9ea7e0SNamjae Jeon dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits; 942*1e9ea7e0SNamjae Jeon 943*1e9ea7e0SNamjae Jeon rl = NULL; 944*1e9ea7e0SNamjae Jeon err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; 945*1e9ea7e0SNamjae Jeon page_is_dirty = rec_is_dirty = false; 946*1e9ea7e0SNamjae Jeon rec_start_bh = NULL; 947*1e9ea7e0SNamjae Jeon do { 948*1e9ea7e0SNamjae Jeon bool is_retry = false; 949*1e9ea7e0SNamjae Jeon 950*1e9ea7e0SNamjae Jeon if (likely(block < rec_block)) { 951*1e9ea7e0SNamjae Jeon if (unlikely(block >= dblock)) { 952*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bh); 953*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 954*1e9ea7e0SNamjae Jeon continue; 955*1e9ea7e0SNamjae Jeon } 956*1e9ea7e0SNamjae Jeon /* 957*1e9ea7e0SNamjae Jeon * This block is not the first one in the record. We 958*1e9ea7e0SNamjae Jeon * ignore the buffer's dirty state because we could 959*1e9ea7e0SNamjae Jeon * have raced with a parallel mark_ntfs_record_dirty(). 960*1e9ea7e0SNamjae Jeon */ 961*1e9ea7e0SNamjae Jeon if (!rec_is_dirty) 962*1e9ea7e0SNamjae Jeon continue; 963*1e9ea7e0SNamjae Jeon if (unlikely(err2)) { 964*1e9ea7e0SNamjae Jeon if (err2 != -ENOMEM) 965*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bh); 966*1e9ea7e0SNamjae Jeon continue; 967*1e9ea7e0SNamjae Jeon } 968*1e9ea7e0SNamjae Jeon } else /* if (block == rec_block) */ { 969*1e9ea7e0SNamjae Jeon BUG_ON(block > rec_block); 970*1e9ea7e0SNamjae Jeon /* This block is the first one in the record. */ 971*1e9ea7e0SNamjae Jeon rec_block += bhs_per_rec; 972*1e9ea7e0SNamjae Jeon err2 = 0; 973*1e9ea7e0SNamjae Jeon if (unlikely(block >= dblock)) { 974*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bh); 975*1e9ea7e0SNamjae Jeon continue; 976*1e9ea7e0SNamjae Jeon } 977*1e9ea7e0SNamjae Jeon if (!buffer_dirty(bh)) { 978*1e9ea7e0SNamjae Jeon /* Clean records are not written out. */ 979*1e9ea7e0SNamjae Jeon rec_is_dirty = false; 980*1e9ea7e0SNamjae Jeon continue; 981*1e9ea7e0SNamjae Jeon } 982*1e9ea7e0SNamjae Jeon rec_is_dirty = true; 983*1e9ea7e0SNamjae Jeon rec_start_bh = bh; 984*1e9ea7e0SNamjae Jeon } 985*1e9ea7e0SNamjae Jeon /* Need to map the buffer if it is not mapped already. */ 986*1e9ea7e0SNamjae Jeon if (unlikely(!buffer_mapped(bh))) { 987*1e9ea7e0SNamjae Jeon VCN vcn; 988*1e9ea7e0SNamjae Jeon LCN lcn; 989*1e9ea7e0SNamjae Jeon unsigned int vcn_ofs; 990*1e9ea7e0SNamjae Jeon 991*1e9ea7e0SNamjae Jeon bh->b_bdev = vol->sb->s_bdev; 992*1e9ea7e0SNamjae Jeon /* Obtain the vcn and offset of the current block. */ 993*1e9ea7e0SNamjae Jeon vcn = (VCN)block << bh_size_bits; 994*1e9ea7e0SNamjae Jeon vcn_ofs = vcn & vol->cluster_size_mask; 995*1e9ea7e0SNamjae Jeon vcn >>= vol->cluster_size_bits; 996*1e9ea7e0SNamjae Jeon if (!rl) { 997*1e9ea7e0SNamjae Jeon lock_retry_remap: 998*1e9ea7e0SNamjae Jeon down_read(&ni->runlist.lock); 999*1e9ea7e0SNamjae Jeon rl = ni->runlist.rl; 1000*1e9ea7e0SNamjae Jeon } 1001*1e9ea7e0SNamjae Jeon if (likely(rl != NULL)) { 1002*1e9ea7e0SNamjae Jeon /* Seek to element containing target vcn. */ 1003*1e9ea7e0SNamjae Jeon while (rl->length && rl[1].vcn <= vcn) 1004*1e9ea7e0SNamjae Jeon rl++; 1005*1e9ea7e0SNamjae Jeon lcn = ntfs_rl_vcn_to_lcn(rl, vcn); 1006*1e9ea7e0SNamjae Jeon } else 1007*1e9ea7e0SNamjae Jeon lcn = LCN_RL_NOT_MAPPED; 1008*1e9ea7e0SNamjae Jeon /* Successful remap. */ 1009*1e9ea7e0SNamjae Jeon if (likely(lcn >= 0)) { 1010*1e9ea7e0SNamjae Jeon /* Setup buffer head to correct block. */ 1011*1e9ea7e0SNamjae Jeon bh->b_blocknr = ((lcn << 1012*1e9ea7e0SNamjae Jeon vol->cluster_size_bits) + 1013*1e9ea7e0SNamjae Jeon vcn_ofs) >> bh_size_bits; 1014*1e9ea7e0SNamjae Jeon set_buffer_mapped(bh); 1015*1e9ea7e0SNamjae Jeon } else { 1016*1e9ea7e0SNamjae Jeon /* 1017*1e9ea7e0SNamjae Jeon * Remap failed. Retry to map the runlist once 1018*1e9ea7e0SNamjae Jeon * unless we are working on $MFT which always 1019*1e9ea7e0SNamjae Jeon * has the whole of its runlist in memory. 1020*1e9ea7e0SNamjae Jeon */ 1021*1e9ea7e0SNamjae Jeon if (!is_mft && !is_retry && 1022*1e9ea7e0SNamjae Jeon lcn == LCN_RL_NOT_MAPPED) { 1023*1e9ea7e0SNamjae Jeon is_retry = true; 1024*1e9ea7e0SNamjae Jeon /* 1025*1e9ea7e0SNamjae Jeon * Attempt to map runlist, dropping 1026*1e9ea7e0SNamjae Jeon * lock for the duration. 1027*1e9ea7e0SNamjae Jeon */ 1028*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 1029*1e9ea7e0SNamjae Jeon err2 = ntfs_map_runlist(ni, vcn); 1030*1e9ea7e0SNamjae Jeon if (likely(!err2)) 1031*1e9ea7e0SNamjae Jeon goto lock_retry_remap; 1032*1e9ea7e0SNamjae Jeon if (err2 == -ENOMEM) 1033*1e9ea7e0SNamjae Jeon page_is_dirty = true; 1034*1e9ea7e0SNamjae Jeon lcn = err2; 1035*1e9ea7e0SNamjae Jeon } else { 1036*1e9ea7e0SNamjae Jeon err2 = -EIO; 1037*1e9ea7e0SNamjae Jeon if (!rl) 1038*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 1039*1e9ea7e0SNamjae Jeon } 1040*1e9ea7e0SNamjae Jeon /* Hard error. Abort writing this record. */ 1041*1e9ea7e0SNamjae Jeon if (!err || err == -ENOMEM) 1042*1e9ea7e0SNamjae Jeon err = err2; 1043*1e9ea7e0SNamjae Jeon bh->b_blocknr = -1; 1044*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Cannot write ntfs record " 1045*1e9ea7e0SNamjae Jeon "0x%llx (inode 0x%lx, " 1046*1e9ea7e0SNamjae Jeon "attribute type 0x%x) because " 1047*1e9ea7e0SNamjae Jeon "its location on disk could " 1048*1e9ea7e0SNamjae Jeon "not be determined (error " 1049*1e9ea7e0SNamjae Jeon "code %lli).", 1050*1e9ea7e0SNamjae Jeon (long long)block << 1051*1e9ea7e0SNamjae Jeon bh_size_bits >> 1052*1e9ea7e0SNamjae Jeon vol->mft_record_size_bits, 1053*1e9ea7e0SNamjae Jeon ni->mft_no, ni->type, 1054*1e9ea7e0SNamjae Jeon (long long)lcn); 1055*1e9ea7e0SNamjae Jeon /* 1056*1e9ea7e0SNamjae Jeon * If this is not the first buffer, remove the 1057*1e9ea7e0SNamjae Jeon * buffers in this record from the list of 1058*1e9ea7e0SNamjae Jeon * buffers to write and clear their dirty bit 1059*1e9ea7e0SNamjae Jeon * if not error -ENOMEM. 1060*1e9ea7e0SNamjae Jeon */ 1061*1e9ea7e0SNamjae Jeon if (rec_start_bh != bh) { 1062*1e9ea7e0SNamjae Jeon while (bhs[--nr_bhs] != rec_start_bh) 1063*1e9ea7e0SNamjae Jeon ; 1064*1e9ea7e0SNamjae Jeon if (err2 != -ENOMEM) { 1065*1e9ea7e0SNamjae Jeon do { 1066*1e9ea7e0SNamjae Jeon clear_buffer_dirty( 1067*1e9ea7e0SNamjae Jeon rec_start_bh); 1068*1e9ea7e0SNamjae Jeon } while ((rec_start_bh = 1069*1e9ea7e0SNamjae Jeon rec_start_bh-> 1070*1e9ea7e0SNamjae Jeon b_this_page) != 1071*1e9ea7e0SNamjae Jeon bh); 1072*1e9ea7e0SNamjae Jeon } 1073*1e9ea7e0SNamjae Jeon } 1074*1e9ea7e0SNamjae Jeon continue; 1075*1e9ea7e0SNamjae Jeon } 1076*1e9ea7e0SNamjae Jeon } 1077*1e9ea7e0SNamjae Jeon BUG_ON(!buffer_uptodate(bh)); 1078*1e9ea7e0SNamjae Jeon BUG_ON(nr_bhs >= max_bhs); 1079*1e9ea7e0SNamjae Jeon bhs[nr_bhs++] = bh; 1080*1e9ea7e0SNamjae Jeon } while (block++, (bh = bh->b_this_page) != head); 1081*1e9ea7e0SNamjae Jeon if (unlikely(rl)) 1082*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 1083*1e9ea7e0SNamjae Jeon /* If there were no dirty buffers, we are done. */ 1084*1e9ea7e0SNamjae Jeon if (!nr_bhs) 1085*1e9ea7e0SNamjae Jeon goto done; 1086*1e9ea7e0SNamjae Jeon /* Map the page so we can access its contents. */ 1087*1e9ea7e0SNamjae Jeon kaddr = kmap(page); 1088*1e9ea7e0SNamjae Jeon /* Clear the page uptodate flag whilst the mst fixups are applied. */ 1089*1e9ea7e0SNamjae Jeon BUG_ON(!PageUptodate(page)); 1090*1e9ea7e0SNamjae Jeon ClearPageUptodate(page); 1091*1e9ea7e0SNamjae Jeon for (i = 0; i < nr_bhs; i++) { 1092*1e9ea7e0SNamjae Jeon unsigned int ofs; 1093*1e9ea7e0SNamjae Jeon 1094*1e9ea7e0SNamjae Jeon /* Skip buffers which are not at the beginning of records. */ 1095*1e9ea7e0SNamjae Jeon if (i % bhs_per_rec) 1096*1e9ea7e0SNamjae Jeon continue; 1097*1e9ea7e0SNamjae Jeon tbh = bhs[i]; 1098*1e9ea7e0SNamjae Jeon ofs = bh_offset(tbh); 1099*1e9ea7e0SNamjae Jeon if (is_mft) { 1100*1e9ea7e0SNamjae Jeon ntfs_inode *tni; 1101*1e9ea7e0SNamjae Jeon unsigned long mft_no; 1102*1e9ea7e0SNamjae Jeon 1103*1e9ea7e0SNamjae Jeon /* Get the mft record number. */ 1104*1e9ea7e0SNamjae Jeon mft_no = (((s64)page->index << PAGE_SHIFT) + ofs) 1105*1e9ea7e0SNamjae Jeon >> rec_size_bits; 1106*1e9ea7e0SNamjae Jeon /* Check whether to write this mft record. */ 1107*1e9ea7e0SNamjae Jeon tni = NULL; 1108*1e9ea7e0SNamjae Jeon if (!ntfs_may_write_mft_record(vol, mft_no, 1109*1e9ea7e0SNamjae Jeon (MFT_RECORD*)(kaddr + ofs), &tni)) { 1110*1e9ea7e0SNamjae Jeon /* 1111*1e9ea7e0SNamjae Jeon * The record should not be written. This 1112*1e9ea7e0SNamjae Jeon * means we need to redirty the page before 1113*1e9ea7e0SNamjae Jeon * returning. 1114*1e9ea7e0SNamjae Jeon */ 1115*1e9ea7e0SNamjae Jeon page_is_dirty = true; 1116*1e9ea7e0SNamjae Jeon /* 1117*1e9ea7e0SNamjae Jeon * Remove the buffers in this mft record from 1118*1e9ea7e0SNamjae Jeon * the list of buffers to write. 1119*1e9ea7e0SNamjae Jeon */ 1120*1e9ea7e0SNamjae Jeon do { 1121*1e9ea7e0SNamjae Jeon bhs[i] = NULL; 1122*1e9ea7e0SNamjae Jeon } while (++i % bhs_per_rec); 1123*1e9ea7e0SNamjae Jeon continue; 1124*1e9ea7e0SNamjae Jeon } 1125*1e9ea7e0SNamjae Jeon /* 1126*1e9ea7e0SNamjae Jeon * The record should be written. If a locked ntfs 1127*1e9ea7e0SNamjae Jeon * inode was returned, add it to the array of locked 1128*1e9ea7e0SNamjae Jeon * ntfs inodes. 1129*1e9ea7e0SNamjae Jeon */ 1130*1e9ea7e0SNamjae Jeon if (tni) 1131*1e9ea7e0SNamjae Jeon locked_nis[nr_locked_nis++] = tni; 1132*1e9ea7e0SNamjae Jeon } 1133*1e9ea7e0SNamjae Jeon /* Apply the mst protection fixups. */ 1134*1e9ea7e0SNamjae Jeon err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs), 1135*1e9ea7e0SNamjae Jeon rec_size); 1136*1e9ea7e0SNamjae Jeon if (unlikely(err2)) { 1137*1e9ea7e0SNamjae Jeon if (!err || err == -ENOMEM) 1138*1e9ea7e0SNamjae Jeon err = -EIO; 1139*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to apply mst fixups " 1140*1e9ea7e0SNamjae Jeon "(inode 0x%lx, attribute type 0x%x, " 1141*1e9ea7e0SNamjae Jeon "page index 0x%lx, page offset 0x%x)!" 1142*1e9ea7e0SNamjae Jeon " Unmount and run chkdsk.", vi->i_ino, 1143*1e9ea7e0SNamjae Jeon ni->type, page->index, ofs); 1144*1e9ea7e0SNamjae Jeon /* 1145*1e9ea7e0SNamjae Jeon * Mark all the buffers in this record clean as we do 1146*1e9ea7e0SNamjae Jeon * not want to write corrupt data to disk. 1147*1e9ea7e0SNamjae Jeon */ 1148*1e9ea7e0SNamjae Jeon do { 1149*1e9ea7e0SNamjae Jeon clear_buffer_dirty(bhs[i]); 1150*1e9ea7e0SNamjae Jeon bhs[i] = NULL; 1151*1e9ea7e0SNamjae Jeon } while (++i % bhs_per_rec); 1152*1e9ea7e0SNamjae Jeon continue; 1153*1e9ea7e0SNamjae Jeon } 1154*1e9ea7e0SNamjae Jeon nr_recs++; 1155*1e9ea7e0SNamjae Jeon } 1156*1e9ea7e0SNamjae Jeon /* If no records are to be written out, we are done. */ 1157*1e9ea7e0SNamjae Jeon if (!nr_recs) 1158*1e9ea7e0SNamjae Jeon goto unm_done; 1159*1e9ea7e0SNamjae Jeon flush_dcache_page(page); 1160*1e9ea7e0SNamjae Jeon /* Lock buffers and start synchronous write i/o on them. */ 1161*1e9ea7e0SNamjae Jeon for (i = 0; i < nr_bhs; i++) { 1162*1e9ea7e0SNamjae Jeon tbh = bhs[i]; 1163*1e9ea7e0SNamjae Jeon if (!tbh) 1164*1e9ea7e0SNamjae Jeon continue; 1165*1e9ea7e0SNamjae Jeon if (!trylock_buffer(tbh)) 1166*1e9ea7e0SNamjae Jeon BUG(); 1167*1e9ea7e0SNamjae Jeon /* The buffer dirty state is now irrelevant, just clean it. */ 1168*1e9ea7e0SNamjae Jeon clear_buffer_dirty(tbh); 1169*1e9ea7e0SNamjae Jeon BUG_ON(!buffer_uptodate(tbh)); 1170*1e9ea7e0SNamjae Jeon BUG_ON(!buffer_mapped(tbh)); 1171*1e9ea7e0SNamjae Jeon get_bh(tbh); 1172*1e9ea7e0SNamjae Jeon tbh->b_end_io = end_buffer_write_sync; 1173*1e9ea7e0SNamjae Jeon submit_bh(REQ_OP_WRITE, tbh); 1174*1e9ea7e0SNamjae Jeon } 1175*1e9ea7e0SNamjae Jeon /* Synchronize the mft mirror now if not @sync. */ 1176*1e9ea7e0SNamjae Jeon if (is_mft && !sync) 1177*1e9ea7e0SNamjae Jeon goto do_mirror; 1178*1e9ea7e0SNamjae Jeon do_wait: 1179*1e9ea7e0SNamjae Jeon /* Wait on i/o completion of buffers. */ 1180*1e9ea7e0SNamjae Jeon for (i = 0; i < nr_bhs; i++) { 1181*1e9ea7e0SNamjae Jeon tbh = bhs[i]; 1182*1e9ea7e0SNamjae Jeon if (!tbh) 1183*1e9ea7e0SNamjae Jeon continue; 1184*1e9ea7e0SNamjae Jeon wait_on_buffer(tbh); 1185*1e9ea7e0SNamjae Jeon if (unlikely(!buffer_uptodate(tbh))) { 1186*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "I/O error while writing ntfs " 1187*1e9ea7e0SNamjae Jeon "record buffer (inode 0x%lx, " 1188*1e9ea7e0SNamjae Jeon "attribute type 0x%x, page index " 1189*1e9ea7e0SNamjae Jeon "0x%lx, page offset 0x%lx)! Unmount " 1190*1e9ea7e0SNamjae Jeon "and run chkdsk.", vi->i_ino, ni->type, 1191*1e9ea7e0SNamjae Jeon page->index, bh_offset(tbh)); 1192*1e9ea7e0SNamjae Jeon if (!err || err == -ENOMEM) 1193*1e9ea7e0SNamjae Jeon err = -EIO; 1194*1e9ea7e0SNamjae Jeon /* 1195*1e9ea7e0SNamjae Jeon * Set the buffer uptodate so the page and buffer 1196*1e9ea7e0SNamjae Jeon * states do not become out of sync. 1197*1e9ea7e0SNamjae Jeon */ 1198*1e9ea7e0SNamjae Jeon set_buffer_uptodate(tbh); 1199*1e9ea7e0SNamjae Jeon } 1200*1e9ea7e0SNamjae Jeon } 1201*1e9ea7e0SNamjae Jeon /* If @sync, now synchronize the mft mirror. */ 1202*1e9ea7e0SNamjae Jeon if (is_mft && sync) { 1203*1e9ea7e0SNamjae Jeon do_mirror: 1204*1e9ea7e0SNamjae Jeon for (i = 0; i < nr_bhs; i++) { 1205*1e9ea7e0SNamjae Jeon unsigned long mft_no; 1206*1e9ea7e0SNamjae Jeon unsigned int ofs; 1207*1e9ea7e0SNamjae Jeon 1208*1e9ea7e0SNamjae Jeon /* 1209*1e9ea7e0SNamjae Jeon * Skip buffers which are not at the beginning of 1210*1e9ea7e0SNamjae Jeon * records. 1211*1e9ea7e0SNamjae Jeon */ 1212*1e9ea7e0SNamjae Jeon if (i % bhs_per_rec) 1213*1e9ea7e0SNamjae Jeon continue; 1214*1e9ea7e0SNamjae Jeon tbh = bhs[i]; 1215*1e9ea7e0SNamjae Jeon /* Skip removed buffers (and hence records). */ 1216*1e9ea7e0SNamjae Jeon if (!tbh) 1217*1e9ea7e0SNamjae Jeon continue; 1218*1e9ea7e0SNamjae Jeon ofs = bh_offset(tbh); 1219*1e9ea7e0SNamjae Jeon /* Get the mft record number. */ 1220*1e9ea7e0SNamjae Jeon mft_no = (((s64)page->index << PAGE_SHIFT) + ofs) 1221*1e9ea7e0SNamjae Jeon >> rec_size_bits; 1222*1e9ea7e0SNamjae Jeon if (mft_no < vol->mftmirr_size) 1223*1e9ea7e0SNamjae Jeon ntfs_sync_mft_mirror(vol, mft_no, 1224*1e9ea7e0SNamjae Jeon (MFT_RECORD*)(kaddr + ofs), 1225*1e9ea7e0SNamjae Jeon sync); 1226*1e9ea7e0SNamjae Jeon } 1227*1e9ea7e0SNamjae Jeon if (!sync) 1228*1e9ea7e0SNamjae Jeon goto do_wait; 1229*1e9ea7e0SNamjae Jeon } 1230*1e9ea7e0SNamjae Jeon /* Remove the mst protection fixups again. */ 1231*1e9ea7e0SNamjae Jeon for (i = 0; i < nr_bhs; i++) { 1232*1e9ea7e0SNamjae Jeon if (!(i % bhs_per_rec)) { 1233*1e9ea7e0SNamjae Jeon tbh = bhs[i]; 1234*1e9ea7e0SNamjae Jeon if (!tbh) 1235*1e9ea7e0SNamjae Jeon continue; 1236*1e9ea7e0SNamjae Jeon post_write_mst_fixup((NTFS_RECORD*)(kaddr + 1237*1e9ea7e0SNamjae Jeon bh_offset(tbh))); 1238*1e9ea7e0SNamjae Jeon } 1239*1e9ea7e0SNamjae Jeon } 1240*1e9ea7e0SNamjae Jeon flush_dcache_page(page); 1241*1e9ea7e0SNamjae Jeon unm_done: 1242*1e9ea7e0SNamjae Jeon /* Unlock any locked inodes. */ 1243*1e9ea7e0SNamjae Jeon while (nr_locked_nis-- > 0) { 1244*1e9ea7e0SNamjae Jeon ntfs_inode *tni, *base_tni; 1245*1e9ea7e0SNamjae Jeon 1246*1e9ea7e0SNamjae Jeon tni = locked_nis[nr_locked_nis]; 1247*1e9ea7e0SNamjae Jeon /* Get the base inode. */ 1248*1e9ea7e0SNamjae Jeon mutex_lock(&tni->extent_lock); 1249*1e9ea7e0SNamjae Jeon if (tni->nr_extents >= 0) 1250*1e9ea7e0SNamjae Jeon base_tni = tni; 1251*1e9ea7e0SNamjae Jeon else { 1252*1e9ea7e0SNamjae Jeon base_tni = tni->ext.base_ntfs_ino; 1253*1e9ea7e0SNamjae Jeon BUG_ON(!base_tni); 1254*1e9ea7e0SNamjae Jeon } 1255*1e9ea7e0SNamjae Jeon mutex_unlock(&tni->extent_lock); 1256*1e9ea7e0SNamjae Jeon ntfs_debug("Unlocking %s inode 0x%lx.", 1257*1e9ea7e0SNamjae Jeon tni == base_tni ? "base" : "extent", 1258*1e9ea7e0SNamjae Jeon tni->mft_no); 1259*1e9ea7e0SNamjae Jeon mutex_unlock(&tni->mrec_lock); 1260*1e9ea7e0SNamjae Jeon atomic_dec(&tni->count); 1261*1e9ea7e0SNamjae Jeon iput(VFS_I(base_tni)); 1262*1e9ea7e0SNamjae Jeon } 1263*1e9ea7e0SNamjae Jeon SetPageUptodate(page); 1264*1e9ea7e0SNamjae Jeon kunmap(page); 1265*1e9ea7e0SNamjae Jeon done: 1266*1e9ea7e0SNamjae Jeon if (unlikely(err && err != -ENOMEM)) { 1267*1e9ea7e0SNamjae Jeon /* 1268*1e9ea7e0SNamjae Jeon * Set page error if there is only one ntfs record in the page. 1269*1e9ea7e0SNamjae Jeon * Otherwise we would loose per-record granularity. 1270*1e9ea7e0SNamjae Jeon */ 1271*1e9ea7e0SNamjae Jeon if (ni->itype.index.block_size == PAGE_SIZE) 1272*1e9ea7e0SNamjae Jeon SetPageError(page); 1273*1e9ea7e0SNamjae Jeon NVolSetErrors(vol); 1274*1e9ea7e0SNamjae Jeon } 1275*1e9ea7e0SNamjae Jeon if (page_is_dirty) { 1276*1e9ea7e0SNamjae Jeon ntfs_debug("Page still contains one or more dirty ntfs " 1277*1e9ea7e0SNamjae Jeon "records. Redirtying the page starting at " 1278*1e9ea7e0SNamjae Jeon "record 0x%lx.", page->index << 1279*1e9ea7e0SNamjae Jeon (PAGE_SHIFT - rec_size_bits)); 1280*1e9ea7e0SNamjae Jeon redirty_page_for_writepage(wbc, page); 1281*1e9ea7e0SNamjae Jeon unlock_page(page); 1282*1e9ea7e0SNamjae Jeon } else { 1283*1e9ea7e0SNamjae Jeon /* 1284*1e9ea7e0SNamjae Jeon * Keep the VM happy. This must be done otherwise the 1285*1e9ea7e0SNamjae Jeon * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though 1286*1e9ea7e0SNamjae Jeon * the page is clean. 1287*1e9ea7e0SNamjae Jeon */ 1288*1e9ea7e0SNamjae Jeon BUG_ON(PageWriteback(page)); 1289*1e9ea7e0SNamjae Jeon set_page_writeback(page); 1290*1e9ea7e0SNamjae Jeon unlock_page(page); 1291*1e9ea7e0SNamjae Jeon end_page_writeback(page); 1292*1e9ea7e0SNamjae Jeon } 1293*1e9ea7e0SNamjae Jeon if (likely(!err)) 1294*1e9ea7e0SNamjae Jeon ntfs_debug("Done."); 1295*1e9ea7e0SNamjae Jeon return err; 1296*1e9ea7e0SNamjae Jeon } 1297*1e9ea7e0SNamjae Jeon 1298*1e9ea7e0SNamjae Jeon /** 1299*1e9ea7e0SNamjae Jeon * ntfs_writepage - write a @page to the backing store 1300*1e9ea7e0SNamjae Jeon * @page: page cache page to write out 1301*1e9ea7e0SNamjae Jeon * @wbc: writeback control structure 1302*1e9ea7e0SNamjae Jeon * 1303*1e9ea7e0SNamjae Jeon * This is called from the VM when it wants to have a dirty ntfs page cache 1304*1e9ea7e0SNamjae Jeon * page cleaned. The VM has already locked the page and marked it clean. 1305*1e9ea7e0SNamjae Jeon * 1306*1e9ea7e0SNamjae Jeon * For non-resident attributes, ntfs_writepage() writes the @page by calling 1307*1e9ea7e0SNamjae Jeon * the ntfs version of the generic block_write_full_folio() function, 1308*1e9ea7e0SNamjae Jeon * ntfs_write_block(), which in turn if necessary creates and writes the 1309*1e9ea7e0SNamjae Jeon * buffers associated with the page asynchronously. 1310*1e9ea7e0SNamjae Jeon * 1311*1e9ea7e0SNamjae Jeon * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying 1312*1e9ea7e0SNamjae Jeon * the data to the mft record (which at this stage is most likely in memory). 1313*1e9ea7e0SNamjae Jeon * The mft record is then marked dirty and written out asynchronously via the 1314*1e9ea7e0SNamjae Jeon * vfs inode dirty code path for the inode the mft record belongs to or via the 1315*1e9ea7e0SNamjae Jeon * vm page dirty code path for the page the mft record is in. 1316*1e9ea7e0SNamjae Jeon * 1317*1e9ea7e0SNamjae Jeon * Based on ntfs_read_folio() and fs/buffer.c::block_write_full_folio(). 1318*1e9ea7e0SNamjae Jeon * 1319*1e9ea7e0SNamjae Jeon * Return 0 on success and -errno on error. 1320*1e9ea7e0SNamjae Jeon */ 1321*1e9ea7e0SNamjae Jeon static int ntfs_writepage(struct page *page, struct writeback_control *wbc) 1322*1e9ea7e0SNamjae Jeon { 1323*1e9ea7e0SNamjae Jeon struct folio *folio = page_folio(page); 1324*1e9ea7e0SNamjae Jeon loff_t i_size; 1325*1e9ea7e0SNamjae Jeon struct inode *vi = folio->mapping->host; 1326*1e9ea7e0SNamjae Jeon ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); 1327*1e9ea7e0SNamjae Jeon char *addr; 1328*1e9ea7e0SNamjae Jeon ntfs_attr_search_ctx *ctx = NULL; 1329*1e9ea7e0SNamjae Jeon MFT_RECORD *m = NULL; 1330*1e9ea7e0SNamjae Jeon u32 attr_len; 1331*1e9ea7e0SNamjae Jeon int err; 1332*1e9ea7e0SNamjae Jeon 1333*1e9ea7e0SNamjae Jeon retry_writepage: 1334*1e9ea7e0SNamjae Jeon BUG_ON(!folio_test_locked(folio)); 1335*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 1336*1e9ea7e0SNamjae Jeon /* Is the folio fully outside i_size? (truncate in progress) */ 1337*1e9ea7e0SNamjae Jeon if (unlikely(folio->index >= (i_size + PAGE_SIZE - 1) >> 1338*1e9ea7e0SNamjae Jeon PAGE_SHIFT)) { 1339*1e9ea7e0SNamjae Jeon /* 1340*1e9ea7e0SNamjae Jeon * The folio may have dirty, unmapped buffers. Make them 1341*1e9ea7e0SNamjae Jeon * freeable here, so the page does not leak. 1342*1e9ea7e0SNamjae Jeon */ 1343*1e9ea7e0SNamjae Jeon block_invalidate_folio(folio, 0, folio_size(folio)); 1344*1e9ea7e0SNamjae Jeon folio_unlock(folio); 1345*1e9ea7e0SNamjae Jeon ntfs_debug("Write outside i_size - truncated?"); 1346*1e9ea7e0SNamjae Jeon return 0; 1347*1e9ea7e0SNamjae Jeon } 1348*1e9ea7e0SNamjae Jeon /* 1349*1e9ea7e0SNamjae Jeon * Only $DATA attributes can be encrypted and only unnamed $DATA 1350*1e9ea7e0SNamjae Jeon * attributes can be compressed. Index root can have the flags set but 1351*1e9ea7e0SNamjae Jeon * this means to create compressed/encrypted files, not that the 1352*1e9ea7e0SNamjae Jeon * attribute is compressed/encrypted. Note we need to check for 1353*1e9ea7e0SNamjae Jeon * AT_INDEX_ALLOCATION since this is the type of both directory and 1354*1e9ea7e0SNamjae Jeon * index inodes. 1355*1e9ea7e0SNamjae Jeon */ 1356*1e9ea7e0SNamjae Jeon if (ni->type != AT_INDEX_ALLOCATION) { 1357*1e9ea7e0SNamjae Jeon /* If file is encrypted, deny access, just like NT4. */ 1358*1e9ea7e0SNamjae Jeon if (NInoEncrypted(ni)) { 1359*1e9ea7e0SNamjae Jeon folio_unlock(folio); 1360*1e9ea7e0SNamjae Jeon BUG_ON(ni->type != AT_DATA); 1361*1e9ea7e0SNamjae Jeon ntfs_debug("Denying write access to encrypted file."); 1362*1e9ea7e0SNamjae Jeon return -EACCES; 1363*1e9ea7e0SNamjae Jeon } 1364*1e9ea7e0SNamjae Jeon /* Compressed data streams are handled in compress.c. */ 1365*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni) && NInoCompressed(ni)) { 1366*1e9ea7e0SNamjae Jeon BUG_ON(ni->type != AT_DATA); 1367*1e9ea7e0SNamjae Jeon BUG_ON(ni->name_len); 1368*1e9ea7e0SNamjae Jeon // TODO: Implement and replace this with 1369*1e9ea7e0SNamjae Jeon // return ntfs_write_compressed_block(page); 1370*1e9ea7e0SNamjae Jeon folio_unlock(folio); 1371*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Writing to compressed files is " 1372*1e9ea7e0SNamjae Jeon "not supported yet. Sorry."); 1373*1e9ea7e0SNamjae Jeon return -EOPNOTSUPP; 1374*1e9ea7e0SNamjae Jeon } 1375*1e9ea7e0SNamjae Jeon // TODO: Implement and remove this check. 1376*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni) && NInoSparse(ni)) { 1377*1e9ea7e0SNamjae Jeon folio_unlock(folio); 1378*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Writing to sparse files is not " 1379*1e9ea7e0SNamjae Jeon "supported yet. Sorry."); 1380*1e9ea7e0SNamjae Jeon return -EOPNOTSUPP; 1381*1e9ea7e0SNamjae Jeon } 1382*1e9ea7e0SNamjae Jeon } 1383*1e9ea7e0SNamjae Jeon /* NInoNonResident() == NInoIndexAllocPresent() */ 1384*1e9ea7e0SNamjae Jeon if (NInoNonResident(ni)) { 1385*1e9ea7e0SNamjae Jeon /* We have to zero every time due to mmap-at-end-of-file. */ 1386*1e9ea7e0SNamjae Jeon if (folio->index >= (i_size >> PAGE_SHIFT)) { 1387*1e9ea7e0SNamjae Jeon /* The folio straddles i_size. */ 1388*1e9ea7e0SNamjae Jeon unsigned int ofs = i_size & (folio_size(folio) - 1); 1389*1e9ea7e0SNamjae Jeon folio_zero_segment(folio, ofs, folio_size(folio)); 1390*1e9ea7e0SNamjae Jeon } 1391*1e9ea7e0SNamjae Jeon /* Handle mst protected attributes. */ 1392*1e9ea7e0SNamjae Jeon if (NInoMstProtected(ni)) 1393*1e9ea7e0SNamjae Jeon return ntfs_write_mst_block(page, wbc); 1394*1e9ea7e0SNamjae Jeon /* Normal, non-resident data stream. */ 1395*1e9ea7e0SNamjae Jeon return ntfs_write_block(folio, wbc); 1396*1e9ea7e0SNamjae Jeon } 1397*1e9ea7e0SNamjae Jeon /* 1398*1e9ea7e0SNamjae Jeon * Attribute is resident, implying it is not compressed, encrypted, or 1399*1e9ea7e0SNamjae Jeon * mst protected. This also means the attribute is smaller than an mft 1400*1e9ea7e0SNamjae Jeon * record and hence smaller than a folio, so can simply return error on 1401*1e9ea7e0SNamjae Jeon * any folios with index above 0. Note the attribute can actually be 1402*1e9ea7e0SNamjae Jeon * marked compressed but if it is resident the actual data is not 1403*1e9ea7e0SNamjae Jeon * compressed so we are ok to ignore the compressed flag here. 1404*1e9ea7e0SNamjae Jeon */ 1405*1e9ea7e0SNamjae Jeon BUG_ON(folio_buffers(folio)); 1406*1e9ea7e0SNamjae Jeon BUG_ON(!folio_test_uptodate(folio)); 1407*1e9ea7e0SNamjae Jeon if (unlikely(folio->index > 0)) { 1408*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "BUG()! folio->index (0x%lx) > 0. " 1409*1e9ea7e0SNamjae Jeon "Aborting write.", folio->index); 1410*1e9ea7e0SNamjae Jeon BUG_ON(folio_test_writeback(folio)); 1411*1e9ea7e0SNamjae Jeon folio_start_writeback(folio); 1412*1e9ea7e0SNamjae Jeon folio_unlock(folio); 1413*1e9ea7e0SNamjae Jeon folio_end_writeback(folio); 1414*1e9ea7e0SNamjae Jeon return -EIO; 1415*1e9ea7e0SNamjae Jeon } 1416*1e9ea7e0SNamjae Jeon if (!NInoAttr(ni)) 1417*1e9ea7e0SNamjae Jeon base_ni = ni; 1418*1e9ea7e0SNamjae Jeon else 1419*1e9ea7e0SNamjae Jeon base_ni = ni->ext.base_ntfs_ino; 1420*1e9ea7e0SNamjae Jeon /* Map, pin, and lock the mft record. */ 1421*1e9ea7e0SNamjae Jeon m = map_mft_record(base_ni); 1422*1e9ea7e0SNamjae Jeon if (IS_ERR(m)) { 1423*1e9ea7e0SNamjae Jeon err = PTR_ERR(m); 1424*1e9ea7e0SNamjae Jeon m = NULL; 1425*1e9ea7e0SNamjae Jeon ctx = NULL; 1426*1e9ea7e0SNamjae Jeon goto err_out; 1427*1e9ea7e0SNamjae Jeon } 1428*1e9ea7e0SNamjae Jeon /* 1429*1e9ea7e0SNamjae Jeon * If a parallel write made the attribute non-resident, drop the mft 1430*1e9ea7e0SNamjae Jeon * record and retry the writepage. 1431*1e9ea7e0SNamjae Jeon */ 1432*1e9ea7e0SNamjae Jeon if (unlikely(NInoNonResident(ni))) { 1433*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1434*1e9ea7e0SNamjae Jeon goto retry_writepage; 1435*1e9ea7e0SNamjae Jeon } 1436*1e9ea7e0SNamjae Jeon ctx = ntfs_attr_get_search_ctx(base_ni, m); 1437*1e9ea7e0SNamjae Jeon if (unlikely(!ctx)) { 1438*1e9ea7e0SNamjae Jeon err = -ENOMEM; 1439*1e9ea7e0SNamjae Jeon goto err_out; 1440*1e9ea7e0SNamjae Jeon } 1441*1e9ea7e0SNamjae Jeon err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 1442*1e9ea7e0SNamjae Jeon CASE_SENSITIVE, 0, NULL, 0, ctx); 1443*1e9ea7e0SNamjae Jeon if (unlikely(err)) 1444*1e9ea7e0SNamjae Jeon goto err_out; 1445*1e9ea7e0SNamjae Jeon /* 1446*1e9ea7e0SNamjae Jeon * Keep the VM happy. This must be done otherwise 1447*1e9ea7e0SNamjae Jeon * PAGECACHE_TAG_DIRTY remains set even though the folio is clean. 1448*1e9ea7e0SNamjae Jeon */ 1449*1e9ea7e0SNamjae Jeon BUG_ON(folio_test_writeback(folio)); 1450*1e9ea7e0SNamjae Jeon folio_start_writeback(folio); 1451*1e9ea7e0SNamjae Jeon folio_unlock(folio); 1452*1e9ea7e0SNamjae Jeon attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); 1453*1e9ea7e0SNamjae Jeon i_size = i_size_read(vi); 1454*1e9ea7e0SNamjae Jeon if (unlikely(attr_len > i_size)) { 1455*1e9ea7e0SNamjae Jeon /* Race with shrinking truncate or a failed truncate. */ 1456*1e9ea7e0SNamjae Jeon attr_len = i_size; 1457*1e9ea7e0SNamjae Jeon /* 1458*1e9ea7e0SNamjae Jeon * If the truncate failed, fix it up now. If a concurrent 1459*1e9ea7e0SNamjae Jeon * truncate, we do its job, so it does not have to do anything. 1460*1e9ea7e0SNamjae Jeon */ 1461*1e9ea7e0SNamjae Jeon err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr, 1462*1e9ea7e0SNamjae Jeon attr_len); 1463*1e9ea7e0SNamjae Jeon /* Shrinking cannot fail. */ 1464*1e9ea7e0SNamjae Jeon BUG_ON(err); 1465*1e9ea7e0SNamjae Jeon } 1466*1e9ea7e0SNamjae Jeon addr = kmap_local_folio(folio, 0); 1467*1e9ea7e0SNamjae Jeon /* Copy the data from the folio to the mft record. */ 1468*1e9ea7e0SNamjae Jeon memcpy((u8*)ctx->attr + 1469*1e9ea7e0SNamjae Jeon le16_to_cpu(ctx->attr->data.resident.value_offset), 1470*1e9ea7e0SNamjae Jeon addr, attr_len); 1471*1e9ea7e0SNamjae Jeon /* Zero out of bounds area in the page cache folio. */ 1472*1e9ea7e0SNamjae Jeon memset(addr + attr_len, 0, folio_size(folio) - attr_len); 1473*1e9ea7e0SNamjae Jeon kunmap_local(addr); 1474*1e9ea7e0SNamjae Jeon flush_dcache_folio(folio); 1475*1e9ea7e0SNamjae Jeon flush_dcache_mft_record_page(ctx->ntfs_ino); 1476*1e9ea7e0SNamjae Jeon /* We are done with the folio. */ 1477*1e9ea7e0SNamjae Jeon folio_end_writeback(folio); 1478*1e9ea7e0SNamjae Jeon /* Finally, mark the mft record dirty, so it gets written back. */ 1479*1e9ea7e0SNamjae Jeon mark_mft_record_dirty(ctx->ntfs_ino); 1480*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1481*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1482*1e9ea7e0SNamjae Jeon return 0; 1483*1e9ea7e0SNamjae Jeon err_out: 1484*1e9ea7e0SNamjae Jeon if (err == -ENOMEM) { 1485*1e9ea7e0SNamjae Jeon ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " 1486*1e9ea7e0SNamjae Jeon "page so we try again later."); 1487*1e9ea7e0SNamjae Jeon /* 1488*1e9ea7e0SNamjae Jeon * Put the folio back on mapping->dirty_pages, but leave its 1489*1e9ea7e0SNamjae Jeon * buffers' dirty state as-is. 1490*1e9ea7e0SNamjae Jeon */ 1491*1e9ea7e0SNamjae Jeon folio_redirty_for_writepage(wbc, folio); 1492*1e9ea7e0SNamjae Jeon err = 0; 1493*1e9ea7e0SNamjae Jeon } else { 1494*1e9ea7e0SNamjae Jeon ntfs_error(vi->i_sb, "Resident attribute write failed with " 1495*1e9ea7e0SNamjae Jeon "error %i.", err); 1496*1e9ea7e0SNamjae Jeon folio_set_error(folio); 1497*1e9ea7e0SNamjae Jeon NVolSetErrors(ni->vol); 1498*1e9ea7e0SNamjae Jeon } 1499*1e9ea7e0SNamjae Jeon folio_unlock(folio); 1500*1e9ea7e0SNamjae Jeon if (ctx) 1501*1e9ea7e0SNamjae Jeon ntfs_attr_put_search_ctx(ctx); 1502*1e9ea7e0SNamjae Jeon if (m) 1503*1e9ea7e0SNamjae Jeon unmap_mft_record(base_ni); 1504*1e9ea7e0SNamjae Jeon return err; 1505*1e9ea7e0SNamjae Jeon } 1506*1e9ea7e0SNamjae Jeon 1507*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1508*1e9ea7e0SNamjae Jeon 1509*1e9ea7e0SNamjae Jeon /** 1510*1e9ea7e0SNamjae Jeon * ntfs_bmap - map logical file block to physical device block 1511*1e9ea7e0SNamjae Jeon * @mapping: address space mapping to which the block to be mapped belongs 1512*1e9ea7e0SNamjae Jeon * @block: logical block to map to its physical device block 1513*1e9ea7e0SNamjae Jeon * 1514*1e9ea7e0SNamjae Jeon * For regular, non-resident files (i.e. not compressed and not encrypted), map 1515*1e9ea7e0SNamjae Jeon * the logical @block belonging to the file described by the address space 1516*1e9ea7e0SNamjae Jeon * mapping @mapping to its physical device block. 1517*1e9ea7e0SNamjae Jeon * 1518*1e9ea7e0SNamjae Jeon * The size of the block is equal to the @s_blocksize field of the super block 1519*1e9ea7e0SNamjae Jeon * of the mounted file system which is guaranteed to be smaller than or equal 1520*1e9ea7e0SNamjae Jeon * to the cluster size thus the block is guaranteed to fit entirely inside the 1521*1e9ea7e0SNamjae Jeon * cluster which means we do not need to care how many contiguous bytes are 1522*1e9ea7e0SNamjae Jeon * available after the beginning of the block. 1523*1e9ea7e0SNamjae Jeon * 1524*1e9ea7e0SNamjae Jeon * Return the physical device block if the mapping succeeded or 0 if the block 1525*1e9ea7e0SNamjae Jeon * is sparse or there was an error. 1526*1e9ea7e0SNamjae Jeon * 1527*1e9ea7e0SNamjae Jeon * Note: This is a problem if someone tries to run bmap() on $Boot system file 1528*1e9ea7e0SNamjae Jeon * as that really is in block zero but there is nothing we can do. bmap() is 1529*1e9ea7e0SNamjae Jeon * just broken in that respect (just like it cannot distinguish sparse from 1530*1e9ea7e0SNamjae Jeon * not available or error). 1531*1e9ea7e0SNamjae Jeon */ 1532*1e9ea7e0SNamjae Jeon static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) 1533*1e9ea7e0SNamjae Jeon { 1534*1e9ea7e0SNamjae Jeon s64 ofs, size; 1535*1e9ea7e0SNamjae Jeon loff_t i_size; 1536*1e9ea7e0SNamjae Jeon LCN lcn; 1537*1e9ea7e0SNamjae Jeon unsigned long blocksize, flags; 1538*1e9ea7e0SNamjae Jeon ntfs_inode *ni = NTFS_I(mapping->host); 1539*1e9ea7e0SNamjae Jeon ntfs_volume *vol = ni->vol; 1540*1e9ea7e0SNamjae Jeon unsigned delta; 1541*1e9ea7e0SNamjae Jeon unsigned char blocksize_bits, cluster_size_shift; 1542*1e9ea7e0SNamjae Jeon 1543*1e9ea7e0SNamjae Jeon ntfs_debug("Entering for mft_no 0x%lx, logical block 0x%llx.", 1544*1e9ea7e0SNamjae Jeon ni->mft_no, (unsigned long long)block); 1545*1e9ea7e0SNamjae Jeon if (ni->type != AT_DATA || !NInoNonResident(ni) || NInoEncrypted(ni)) { 1546*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "BMAP does not make sense for %s " 1547*1e9ea7e0SNamjae Jeon "attributes, returning 0.", 1548*1e9ea7e0SNamjae Jeon (ni->type != AT_DATA) ? "non-data" : 1549*1e9ea7e0SNamjae Jeon (!NInoNonResident(ni) ? "resident" : 1550*1e9ea7e0SNamjae Jeon "encrypted")); 1551*1e9ea7e0SNamjae Jeon return 0; 1552*1e9ea7e0SNamjae Jeon } 1553*1e9ea7e0SNamjae Jeon /* None of these can happen. */ 1554*1e9ea7e0SNamjae Jeon BUG_ON(NInoCompressed(ni)); 1555*1e9ea7e0SNamjae Jeon BUG_ON(NInoMstProtected(ni)); 1556*1e9ea7e0SNamjae Jeon blocksize = vol->sb->s_blocksize; 1557*1e9ea7e0SNamjae Jeon blocksize_bits = vol->sb->s_blocksize_bits; 1558*1e9ea7e0SNamjae Jeon ofs = (s64)block << blocksize_bits; 1559*1e9ea7e0SNamjae Jeon read_lock_irqsave(&ni->size_lock, flags); 1560*1e9ea7e0SNamjae Jeon size = ni->initialized_size; 1561*1e9ea7e0SNamjae Jeon i_size = i_size_read(VFS_I(ni)); 1562*1e9ea7e0SNamjae Jeon read_unlock_irqrestore(&ni->size_lock, flags); 1563*1e9ea7e0SNamjae Jeon /* 1564*1e9ea7e0SNamjae Jeon * If the offset is outside the initialized size or the block straddles 1565*1e9ea7e0SNamjae Jeon * the initialized size then pretend it is a hole unless the 1566*1e9ea7e0SNamjae Jeon * initialized size equals the file size. 1567*1e9ea7e0SNamjae Jeon */ 1568*1e9ea7e0SNamjae Jeon if (unlikely(ofs >= size || (ofs + blocksize > size && size < i_size))) 1569*1e9ea7e0SNamjae Jeon goto hole; 1570*1e9ea7e0SNamjae Jeon cluster_size_shift = vol->cluster_size_bits; 1571*1e9ea7e0SNamjae Jeon down_read(&ni->runlist.lock); 1572*1e9ea7e0SNamjae Jeon lcn = ntfs_attr_vcn_to_lcn_nolock(ni, ofs >> cluster_size_shift, false); 1573*1e9ea7e0SNamjae Jeon up_read(&ni->runlist.lock); 1574*1e9ea7e0SNamjae Jeon if (unlikely(lcn < LCN_HOLE)) { 1575*1e9ea7e0SNamjae Jeon /* 1576*1e9ea7e0SNamjae Jeon * Step down to an integer to avoid gcc doing a long long 1577*1e9ea7e0SNamjae Jeon * comparision in the switch when we know @lcn is between 1578*1e9ea7e0SNamjae Jeon * LCN_HOLE and LCN_EIO (i.e. -1 to -5). 1579*1e9ea7e0SNamjae Jeon * 1580*1e9ea7e0SNamjae Jeon * Otherwise older gcc (at least on some architectures) will 1581*1e9ea7e0SNamjae Jeon * try to use __cmpdi2() which is of course not available in 1582*1e9ea7e0SNamjae Jeon * the kernel. 1583*1e9ea7e0SNamjae Jeon */ 1584*1e9ea7e0SNamjae Jeon switch ((int)lcn) { 1585*1e9ea7e0SNamjae Jeon case LCN_ENOENT: 1586*1e9ea7e0SNamjae Jeon /* 1587*1e9ea7e0SNamjae Jeon * If the offset is out of bounds then pretend it is a 1588*1e9ea7e0SNamjae Jeon * hole. 1589*1e9ea7e0SNamjae Jeon */ 1590*1e9ea7e0SNamjae Jeon goto hole; 1591*1e9ea7e0SNamjae Jeon case LCN_ENOMEM: 1592*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Not enough memory to complete " 1593*1e9ea7e0SNamjae Jeon "mapping for inode 0x%lx. " 1594*1e9ea7e0SNamjae Jeon "Returning 0.", ni->mft_no); 1595*1e9ea7e0SNamjae Jeon break; 1596*1e9ea7e0SNamjae Jeon default: 1597*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Failed to complete mapping for " 1598*1e9ea7e0SNamjae Jeon "inode 0x%lx. Run chkdsk. " 1599*1e9ea7e0SNamjae Jeon "Returning 0.", ni->mft_no); 1600*1e9ea7e0SNamjae Jeon break; 1601*1e9ea7e0SNamjae Jeon } 1602*1e9ea7e0SNamjae Jeon return 0; 1603*1e9ea7e0SNamjae Jeon } 1604*1e9ea7e0SNamjae Jeon if (lcn < 0) { 1605*1e9ea7e0SNamjae Jeon /* It is a hole. */ 1606*1e9ea7e0SNamjae Jeon hole: 1607*1e9ea7e0SNamjae Jeon ntfs_debug("Done (returning hole)."); 1608*1e9ea7e0SNamjae Jeon return 0; 1609*1e9ea7e0SNamjae Jeon } 1610*1e9ea7e0SNamjae Jeon /* 1611*1e9ea7e0SNamjae Jeon * The block is really allocated and fullfils all our criteria. 1612*1e9ea7e0SNamjae Jeon * Convert the cluster to units of block size and return the result. 1613*1e9ea7e0SNamjae Jeon */ 1614*1e9ea7e0SNamjae Jeon delta = ofs & vol->cluster_size_mask; 1615*1e9ea7e0SNamjae Jeon if (unlikely(sizeof(block) < sizeof(lcn))) { 1616*1e9ea7e0SNamjae Jeon block = lcn = ((lcn << cluster_size_shift) + delta) >> 1617*1e9ea7e0SNamjae Jeon blocksize_bits; 1618*1e9ea7e0SNamjae Jeon /* If the block number was truncated return 0. */ 1619*1e9ea7e0SNamjae Jeon if (unlikely(block != lcn)) { 1620*1e9ea7e0SNamjae Jeon ntfs_error(vol->sb, "Physical block 0x%llx is too " 1621*1e9ea7e0SNamjae Jeon "large to be returned, returning 0.", 1622*1e9ea7e0SNamjae Jeon (long long)lcn); 1623*1e9ea7e0SNamjae Jeon return 0; 1624*1e9ea7e0SNamjae Jeon } 1625*1e9ea7e0SNamjae Jeon } else 1626*1e9ea7e0SNamjae Jeon block = ((lcn << cluster_size_shift) + delta) >> 1627*1e9ea7e0SNamjae Jeon blocksize_bits; 1628*1e9ea7e0SNamjae Jeon ntfs_debug("Done (returning block 0x%llx).", (unsigned long long)lcn); 1629*1e9ea7e0SNamjae Jeon return block; 1630*1e9ea7e0SNamjae Jeon } 1631*1e9ea7e0SNamjae Jeon 1632*1e9ea7e0SNamjae Jeon /* 1633*1e9ea7e0SNamjae Jeon * ntfs_normal_aops - address space operations for normal inodes and attributes 1634*1e9ea7e0SNamjae Jeon * 1635*1e9ea7e0SNamjae Jeon * Note these are not used for compressed or mst protected inodes and 1636*1e9ea7e0SNamjae Jeon * attributes. 1637*1e9ea7e0SNamjae Jeon */ 1638*1e9ea7e0SNamjae Jeon const struct address_space_operations ntfs_normal_aops = { 1639*1e9ea7e0SNamjae Jeon .read_folio = ntfs_read_folio, 1640*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 1641*1e9ea7e0SNamjae Jeon .writepage = ntfs_writepage, 1642*1e9ea7e0SNamjae Jeon .dirty_folio = block_dirty_folio, 1643*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1644*1e9ea7e0SNamjae Jeon .bmap = ntfs_bmap, 1645*1e9ea7e0SNamjae Jeon .migrate_folio = buffer_migrate_folio, 1646*1e9ea7e0SNamjae Jeon .is_partially_uptodate = block_is_partially_uptodate, 1647*1e9ea7e0SNamjae Jeon .error_remove_folio = generic_error_remove_folio, 1648*1e9ea7e0SNamjae Jeon }; 1649*1e9ea7e0SNamjae Jeon 1650*1e9ea7e0SNamjae Jeon /* 1651*1e9ea7e0SNamjae Jeon * ntfs_compressed_aops - address space operations for compressed inodes 1652*1e9ea7e0SNamjae Jeon */ 1653*1e9ea7e0SNamjae Jeon const struct address_space_operations ntfs_compressed_aops = { 1654*1e9ea7e0SNamjae Jeon .read_folio = ntfs_read_folio, 1655*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 1656*1e9ea7e0SNamjae Jeon .writepage = ntfs_writepage, 1657*1e9ea7e0SNamjae Jeon .dirty_folio = block_dirty_folio, 1658*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1659*1e9ea7e0SNamjae Jeon .migrate_folio = buffer_migrate_folio, 1660*1e9ea7e0SNamjae Jeon .is_partially_uptodate = block_is_partially_uptodate, 1661*1e9ea7e0SNamjae Jeon .error_remove_folio = generic_error_remove_folio, 1662*1e9ea7e0SNamjae Jeon }; 1663*1e9ea7e0SNamjae Jeon 1664*1e9ea7e0SNamjae Jeon /* 1665*1e9ea7e0SNamjae Jeon * ntfs_mst_aops - general address space operations for mst protecteed inodes 1666*1e9ea7e0SNamjae Jeon * and attributes 1667*1e9ea7e0SNamjae Jeon */ 1668*1e9ea7e0SNamjae Jeon const struct address_space_operations ntfs_mst_aops = { 1669*1e9ea7e0SNamjae Jeon .read_folio = ntfs_read_folio, /* Fill page with data. */ 1670*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 1671*1e9ea7e0SNamjae Jeon .writepage = ntfs_writepage, /* Write dirty page to disk. */ 1672*1e9ea7e0SNamjae Jeon .dirty_folio = filemap_dirty_folio, 1673*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1674*1e9ea7e0SNamjae Jeon .migrate_folio = buffer_migrate_folio, 1675*1e9ea7e0SNamjae Jeon .is_partially_uptodate = block_is_partially_uptodate, 1676*1e9ea7e0SNamjae Jeon .error_remove_folio = generic_error_remove_folio, 1677*1e9ea7e0SNamjae Jeon }; 1678*1e9ea7e0SNamjae Jeon 1679*1e9ea7e0SNamjae Jeon #ifdef NTFS_RW 1680*1e9ea7e0SNamjae Jeon 1681*1e9ea7e0SNamjae Jeon /** 1682*1e9ea7e0SNamjae Jeon * mark_ntfs_record_dirty - mark an ntfs record dirty 1683*1e9ea7e0SNamjae Jeon * @page: page containing the ntfs record to mark dirty 1684*1e9ea7e0SNamjae Jeon * @ofs: byte offset within @page at which the ntfs record begins 1685*1e9ea7e0SNamjae Jeon * 1686*1e9ea7e0SNamjae Jeon * Set the buffers and the page in which the ntfs record is located dirty. 1687*1e9ea7e0SNamjae Jeon * 1688*1e9ea7e0SNamjae Jeon * The latter also marks the vfs inode the ntfs record belongs to dirty 1689*1e9ea7e0SNamjae Jeon * (I_DIRTY_PAGES only). 1690*1e9ea7e0SNamjae Jeon * 1691*1e9ea7e0SNamjae Jeon * If the page does not have buffers, we create them and set them uptodate. 1692*1e9ea7e0SNamjae Jeon * The page may not be locked which is why we need to handle the buffers under 1693*1e9ea7e0SNamjae Jeon * the mapping->i_private_lock. Once the buffers are marked dirty we no longer 1694*1e9ea7e0SNamjae Jeon * need the lock since try_to_free_buffers() does not free dirty buffers. 1695*1e9ea7e0SNamjae Jeon */ 1696*1e9ea7e0SNamjae Jeon void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { 1697*1e9ea7e0SNamjae Jeon struct address_space *mapping = page->mapping; 1698*1e9ea7e0SNamjae Jeon ntfs_inode *ni = NTFS_I(mapping->host); 1699*1e9ea7e0SNamjae Jeon struct buffer_head *bh, *head, *buffers_to_free = NULL; 1700*1e9ea7e0SNamjae Jeon unsigned int end, bh_size, bh_ofs; 1701*1e9ea7e0SNamjae Jeon 1702*1e9ea7e0SNamjae Jeon BUG_ON(!PageUptodate(page)); 1703*1e9ea7e0SNamjae Jeon end = ofs + ni->itype.index.block_size; 1704*1e9ea7e0SNamjae Jeon bh_size = VFS_I(ni)->i_sb->s_blocksize; 1705*1e9ea7e0SNamjae Jeon spin_lock(&mapping->i_private_lock); 1706*1e9ea7e0SNamjae Jeon if (unlikely(!page_has_buffers(page))) { 1707*1e9ea7e0SNamjae Jeon spin_unlock(&mapping->i_private_lock); 1708*1e9ea7e0SNamjae Jeon bh = head = alloc_page_buffers(page, bh_size, true); 1709*1e9ea7e0SNamjae Jeon spin_lock(&mapping->i_private_lock); 1710*1e9ea7e0SNamjae Jeon if (likely(!page_has_buffers(page))) { 1711*1e9ea7e0SNamjae Jeon struct buffer_head *tail; 1712*1e9ea7e0SNamjae Jeon 1713*1e9ea7e0SNamjae Jeon do { 1714*1e9ea7e0SNamjae Jeon set_buffer_uptodate(bh); 1715*1e9ea7e0SNamjae Jeon tail = bh; 1716*1e9ea7e0SNamjae Jeon bh = bh->b_this_page; 1717*1e9ea7e0SNamjae Jeon } while (bh); 1718*1e9ea7e0SNamjae Jeon tail->b_this_page = head; 1719*1e9ea7e0SNamjae Jeon attach_page_private(page, head); 1720*1e9ea7e0SNamjae Jeon } else 1721*1e9ea7e0SNamjae Jeon buffers_to_free = bh; 1722*1e9ea7e0SNamjae Jeon } 1723*1e9ea7e0SNamjae Jeon bh = head = page_buffers(page); 1724*1e9ea7e0SNamjae Jeon BUG_ON(!bh); 1725*1e9ea7e0SNamjae Jeon do { 1726*1e9ea7e0SNamjae Jeon bh_ofs = bh_offset(bh); 1727*1e9ea7e0SNamjae Jeon if (bh_ofs + bh_size <= ofs) 1728*1e9ea7e0SNamjae Jeon continue; 1729*1e9ea7e0SNamjae Jeon if (unlikely(bh_ofs >= end)) 1730*1e9ea7e0SNamjae Jeon break; 1731*1e9ea7e0SNamjae Jeon set_buffer_dirty(bh); 1732*1e9ea7e0SNamjae Jeon } while ((bh = bh->b_this_page) != head); 1733*1e9ea7e0SNamjae Jeon spin_unlock(&mapping->i_private_lock); 1734*1e9ea7e0SNamjae Jeon filemap_dirty_folio(mapping, page_folio(page)); 1735*1e9ea7e0SNamjae Jeon if (unlikely(buffers_to_free)) { 1736*1e9ea7e0SNamjae Jeon do { 1737*1e9ea7e0SNamjae Jeon bh = buffers_to_free->b_this_page; 1738*1e9ea7e0SNamjae Jeon free_buffer_head(buffers_to_free); 1739*1e9ea7e0SNamjae Jeon buffers_to_free = bh; 1740*1e9ea7e0SNamjae Jeon } while (buffers_to_free); 1741*1e9ea7e0SNamjae Jeon } 1742*1e9ea7e0SNamjae Jeon } 1743*1e9ea7e0SNamjae Jeon 1744*1e9ea7e0SNamjae Jeon #endif /* NTFS_RW */ 1745