168988114SDave Chinner /* 268988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc. 468988114SDave Chinner * All Rights Reserved. 568988114SDave Chinner * 668988114SDave Chinner * This program is free software; you can redistribute it and/or 768988114SDave Chinner * modify it under the terms of the GNU General Public License as 868988114SDave Chinner * published by the Free Software Foundation. 968988114SDave Chinner * 1068988114SDave Chinner * This program is distributed in the hope that it would be useful, 1168988114SDave Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 1268988114SDave Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1368988114SDave Chinner * GNU General Public License for more details. 1468988114SDave Chinner * 1568988114SDave Chinner * You should have received a copy of the GNU General Public License 1668988114SDave Chinner * along with this program; if not, write the Free Software Foundation, 1768988114SDave Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 1868988114SDave Chinner */ 1968988114SDave Chinner #include "xfs.h" 2068988114SDave Chinner #include "xfs_fs.h" 2170a9883cSDave Chinner #include "xfs_shared.h" 22239880efSDave Chinner #include "xfs_format.h" 23239880efSDave Chinner #include "xfs_log_format.h" 24239880efSDave Chinner #include "xfs_trans_resv.h" 2568988114SDave Chinner #include "xfs_bit.h" 2668988114SDave Chinner #include "xfs_mount.h" 2757062787SDave Chinner #include "xfs_da_format.h" 2868988114SDave Chinner #include "xfs_inode.h" 2968988114SDave Chinner #include "xfs_btree.h" 30239880efSDave Chinner #include "xfs_trans.h" 3168988114SDave Chinner #include "xfs_extfree_item.h" 3268988114SDave Chinner #include "xfs_alloc.h" 3368988114SDave Chinner #include "xfs_bmap.h" 3468988114SDave Chinner #include "xfs_bmap_util.h" 35a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 3668988114SDave Chinner #include "xfs_rtalloc.h" 3768988114SDave Chinner #include "xfs_error.h" 3868988114SDave Chinner #include "xfs_quota.h" 3968988114SDave Chinner #include "xfs_trans_space.h" 4068988114SDave Chinner #include "xfs_trace.h" 41c24b5dfaSDave Chinner #include "xfs_icache.h" 42239880efSDave Chinner #include "xfs_log.h" 4368988114SDave Chinner 4468988114SDave Chinner /* Kernel only BMAP related definitions and functions */ 4568988114SDave Chinner 4668988114SDave Chinner /* 4768988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it 4868988114SDave Chinner * differently based on whether the file is a real time file or not, because the 4968988114SDave Chinner * bmap code does. 5068988114SDave Chinner */ 5168988114SDave Chinner xfs_daddr_t 5268988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) 5368988114SDave Chinner { 5468988114SDave Chinner return (XFS_IS_REALTIME_INODE(ip) ? \ 5568988114SDave Chinner (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ 5668988114SDave Chinner XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); 5768988114SDave Chinner } 5868988114SDave Chinner 5968988114SDave Chinner /* 603fbbbea3SDave Chinner * Routine to zero an extent on disk allocated to the specific inode. 613fbbbea3SDave Chinner * 623fbbbea3SDave Chinner * The VFS functions take a linearised filesystem block offset, so we have to 633fbbbea3SDave Chinner * convert the sparse xfs fsb to the right format first. 643fbbbea3SDave Chinner * VFS types are real funky, too. 653fbbbea3SDave Chinner */ 663fbbbea3SDave Chinner int 673fbbbea3SDave Chinner xfs_zero_extent( 683fbbbea3SDave Chinner struct xfs_inode *ip, 693fbbbea3SDave Chinner xfs_fsblock_t start_fsb, 703fbbbea3SDave Chinner xfs_off_t count_fsb) 713fbbbea3SDave Chinner { 723fbbbea3SDave Chinner struct xfs_mount *mp = ip->i_mount; 733fbbbea3SDave Chinner xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); 743fbbbea3SDave Chinner sector_t block = XFS_BB_TO_FSBT(mp, sector); 753fbbbea3SDave Chinner ssize_t size = XFS_FSB_TO_B(mp, count_fsb); 763fbbbea3SDave Chinner 773fbbbea3SDave Chinner if (IS_DAX(VFS_I(ip))) 7820a90f58SRoss Zwisler return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), 7920a90f58SRoss Zwisler sector, size); 803fbbbea3SDave Chinner 813fbbbea3SDave Chinner /* 823fbbbea3SDave Chinner * let the block layer decide on the fastest method of 833fbbbea3SDave Chinner * implementing the zeroing. 843fbbbea3SDave Chinner */ 853fbbbea3SDave Chinner return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); 863fbbbea3SDave Chinner 873fbbbea3SDave Chinner } 883fbbbea3SDave Chinner 893fbbbea3SDave Chinner /* 9068988114SDave Chinner * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 9168988114SDave Chinner * caller. Frees all the extents that need freeing, which must be done 9268988114SDave Chinner * last due to locking considerations. We never free any extents in 9368988114SDave Chinner * the first transaction. 9468988114SDave Chinner * 95f6106efaSEric Sandeen * If an inode *ip is provided, rejoin it to the transaction if 96f6106efaSEric Sandeen * the transaction was committed. 9768988114SDave Chinner */ 9868988114SDave Chinner int /* error */ 9968988114SDave Chinner xfs_bmap_finish( 1008d99fe92SBrian Foster struct xfs_trans **tp, /* transaction pointer addr */ 1018d99fe92SBrian Foster struct xfs_bmap_free *flist, /* i/o: list extents to free */ 102f6106efaSEric Sandeen struct xfs_inode *ip) 10368988114SDave Chinner { 1048d99fe92SBrian Foster struct xfs_efd_log_item *efd; /* extent free data */ 1058d99fe92SBrian Foster struct xfs_efi_log_item *efi; /* extent free intention */ 10668988114SDave Chinner int error; /* error return value */ 107f6106efaSEric Sandeen int committed;/* xact committed or not */ 1088d99fe92SBrian Foster struct xfs_bmap_free_item *free; /* free extent item */ 1098d99fe92SBrian Foster struct xfs_bmap_free_item *next; /* next item on free list */ 11068988114SDave Chinner 11168988114SDave Chinner ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 112f6106efaSEric Sandeen if (flist->xbf_count == 0) 11368988114SDave Chinner return 0; 114f6106efaSEric Sandeen 1152e6db6c4SChristoph Hellwig efi = xfs_trans_get_efi(*tp, flist->xbf_count); 11668988114SDave Chinner for (free = flist->xbf_first; free; free = free->xbfi_next) 1172e6db6c4SChristoph Hellwig xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock, 11868988114SDave Chinner free->xbfi_blockcount); 1193d3c8b52SJie Liu 120f6106efaSEric Sandeen error = __xfs_trans_roll(tp, ip, &committed); 1218d99fe92SBrian Foster if (error) { 12268988114SDave Chinner /* 1238d99fe92SBrian Foster * If the transaction was committed, drop the EFD reference 1248d99fe92SBrian Foster * since we're bailing out of here. The other reference is 1258d99fe92SBrian Foster * dropped when the EFI hits the AIL. 1268d99fe92SBrian Foster * 1278d99fe92SBrian Foster * If the transaction was not committed, the EFI is freed by the 1288d99fe92SBrian Foster * EFI item unlock handler on abort. Also, we have a new 1298d99fe92SBrian Foster * transaction so we should return committed=1 even though we're 1308d99fe92SBrian Foster * returning an error. 13168988114SDave Chinner */ 132f6106efaSEric Sandeen if (committed) { 1338d99fe92SBrian Foster xfs_efi_release(efi); 1348d99fe92SBrian Foster xfs_force_shutdown((*tp)->t_mountp, 1358d99fe92SBrian Foster (error == -EFSCORRUPTED) ? 1368d99fe92SBrian Foster SHUTDOWN_CORRUPT_INCORE : 1378d99fe92SBrian Foster SHUTDOWN_META_IO_ERROR); 1388d99fe92SBrian Foster } 13968988114SDave Chinner return error; 1408d99fe92SBrian Foster } 14168988114SDave Chinner 1426bc43af3SBrian Foster /* 1436bc43af3SBrian Foster * Get an EFD and free each extent in the list, logging to the EFD in 1446bc43af3SBrian Foster * the process. The remaining bmap free list is cleaned up by the caller 1456bc43af3SBrian Foster * on error. 1466bc43af3SBrian Foster */ 1472e6db6c4SChristoph Hellwig efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count); 14868988114SDave Chinner for (free = flist->xbf_first; free != NULL; free = next) { 14968988114SDave Chinner next = free->xbfi_next; 1508d99fe92SBrian Foster 1516bc43af3SBrian Foster error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock, 15268988114SDave Chinner free->xbfi_blockcount); 1538d99fe92SBrian Foster if (error) 1548d99fe92SBrian Foster return error; 1558d99fe92SBrian Foster 15668988114SDave Chinner xfs_bmap_del_free(flist, NULL, free); 15768988114SDave Chinner } 1588d99fe92SBrian Foster 15968988114SDave Chinner return 0; 16068988114SDave Chinner } 16168988114SDave Chinner 16268988114SDave Chinner int 16368988114SDave Chinner xfs_bmap_rtalloc( 16468988114SDave Chinner struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 16568988114SDave Chinner { 16668988114SDave Chinner xfs_alloctype_t atype = 0; /* type for allocation routines */ 16768988114SDave Chinner int error; /* error return value */ 16868988114SDave Chinner xfs_mount_t *mp; /* mount point structure */ 16968988114SDave Chinner xfs_extlen_t prod = 0; /* product factor for allocators */ 17068988114SDave Chinner xfs_extlen_t ralen = 0; /* realtime allocation length */ 17168988114SDave Chinner xfs_extlen_t align; /* minimum allocation alignment */ 17268988114SDave Chinner xfs_rtblock_t rtb; 17368988114SDave Chinner 17468988114SDave Chinner mp = ap->ip->i_mount; 17568988114SDave Chinner align = xfs_get_extsz_hint(ap->ip); 17668988114SDave Chinner prod = align / mp->m_sb.sb_rextsize; 17768988114SDave Chinner error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 17868988114SDave Chinner align, 1, ap->eof, 0, 17968988114SDave Chinner ap->conv, &ap->offset, &ap->length); 18068988114SDave Chinner if (error) 18168988114SDave Chinner return error; 18268988114SDave Chinner ASSERT(ap->length); 18368988114SDave Chinner ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); 18468988114SDave Chinner 18568988114SDave Chinner /* 18668988114SDave Chinner * If the offset & length are not perfectly aligned 18768988114SDave Chinner * then kill prod, it will just get us in trouble. 18868988114SDave Chinner */ 18968988114SDave Chinner if (do_mod(ap->offset, align) || ap->length % align) 19068988114SDave Chinner prod = 1; 19168988114SDave Chinner /* 19268988114SDave Chinner * Set ralen to be the actual requested length in rtextents. 19368988114SDave Chinner */ 19468988114SDave Chinner ralen = ap->length / mp->m_sb.sb_rextsize; 19568988114SDave Chinner /* 19668988114SDave Chinner * If the old value was close enough to MAXEXTLEN that 19768988114SDave Chinner * we rounded up to it, cut it back so it's valid again. 19868988114SDave Chinner * Note that if it's a really large request (bigger than 19968988114SDave Chinner * MAXEXTLEN), we don't hear about that number, and can't 20068988114SDave Chinner * adjust the starting point to match it. 20168988114SDave Chinner */ 20268988114SDave Chinner if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 20368988114SDave Chinner ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 20468988114SDave Chinner 20568988114SDave Chinner /* 2064b680afbSDave Chinner * Lock out modifications to both the RT bitmap and summary inodes 20768988114SDave Chinner */ 20868988114SDave Chinner xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 20968988114SDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 2104b680afbSDave Chinner xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); 2114b680afbSDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 21268988114SDave Chinner 21368988114SDave Chinner /* 21468988114SDave Chinner * If it's an allocation to an empty file at offset 0, 21568988114SDave Chinner * pick an extent that will space things out in the rt area. 21668988114SDave Chinner */ 21768988114SDave Chinner if (ap->eof && ap->offset == 0) { 21868988114SDave Chinner xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ 21968988114SDave Chinner 22068988114SDave Chinner error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 22168988114SDave Chinner if (error) 22268988114SDave Chinner return error; 22368988114SDave Chinner ap->blkno = rtx * mp->m_sb.sb_rextsize; 22468988114SDave Chinner } else { 22568988114SDave Chinner ap->blkno = 0; 22668988114SDave Chinner } 22768988114SDave Chinner 22868988114SDave Chinner xfs_bmap_adjacent(ap); 22968988114SDave Chinner 23068988114SDave Chinner /* 23168988114SDave Chinner * Realtime allocation, done through xfs_rtallocate_extent. 23268988114SDave Chinner */ 23368988114SDave Chinner atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; 23468988114SDave Chinner do_div(ap->blkno, mp->m_sb.sb_rextsize); 23568988114SDave Chinner rtb = ap->blkno; 23668988114SDave Chinner ap->length = ralen; 23768988114SDave Chinner if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, 23868988114SDave Chinner &ralen, atype, ap->wasdel, prod, &rtb))) 23968988114SDave Chinner return error; 24068988114SDave Chinner if (rtb == NULLFSBLOCK && prod > 1 && 24168988114SDave Chinner (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, 24268988114SDave Chinner ap->length, &ralen, atype, 24368988114SDave Chinner ap->wasdel, 1, &rtb))) 24468988114SDave Chinner return error; 24568988114SDave Chinner ap->blkno = rtb; 24668988114SDave Chinner if (ap->blkno != NULLFSBLOCK) { 24768988114SDave Chinner ap->blkno *= mp->m_sb.sb_rextsize; 24868988114SDave Chinner ralen *= mp->m_sb.sb_rextsize; 24968988114SDave Chinner ap->length = ralen; 25068988114SDave Chinner ap->ip->i_d.di_nblocks += ralen; 25168988114SDave Chinner xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 25268988114SDave Chinner if (ap->wasdel) 25368988114SDave Chinner ap->ip->i_delayed_blks -= ralen; 25468988114SDave Chinner /* 25568988114SDave Chinner * Adjust the disk quota also. This was reserved 25668988114SDave Chinner * earlier. 25768988114SDave Chinner */ 25868988114SDave Chinner xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 25968988114SDave Chinner ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 26068988114SDave Chinner XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 2613fbbbea3SDave Chinner 2623fbbbea3SDave Chinner /* Zero the extent if we were asked to do so */ 2633fbbbea3SDave Chinner if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) { 2643fbbbea3SDave Chinner error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); 2653fbbbea3SDave Chinner if (error) 2663fbbbea3SDave Chinner return error; 2673fbbbea3SDave Chinner } 26868988114SDave Chinner } else { 26968988114SDave Chinner ap->length = 0; 27068988114SDave Chinner } 27168988114SDave Chinner return 0; 27268988114SDave Chinner } 27368988114SDave Chinner 27468988114SDave Chinner /* 27568988114SDave Chinner * Check if the endoff is outside the last extent. If so the caller will grow 27668988114SDave Chinner * the allocation to a stripe unit boundary. All offsets are considered outside 27768988114SDave Chinner * the end of file for an empty fork, so 1 is returned in *eof in that case. 27868988114SDave Chinner */ 27968988114SDave Chinner int 28068988114SDave Chinner xfs_bmap_eof( 28168988114SDave Chinner struct xfs_inode *ip, 28268988114SDave Chinner xfs_fileoff_t endoff, 28368988114SDave Chinner int whichfork, 28468988114SDave Chinner int *eof) 28568988114SDave Chinner { 28668988114SDave Chinner struct xfs_bmbt_irec rec; 28768988114SDave Chinner int error; 28868988114SDave Chinner 28968988114SDave Chinner error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); 29068988114SDave Chinner if (error || *eof) 29168988114SDave Chinner return error; 29268988114SDave Chinner 29368988114SDave Chinner *eof = endoff >= rec.br_startoff + rec.br_blockcount; 29468988114SDave Chinner return 0; 29568988114SDave Chinner } 29668988114SDave Chinner 29768988114SDave Chinner /* 29868988114SDave Chinner * Extent tree block counting routines. 29968988114SDave Chinner */ 30068988114SDave Chinner 30168988114SDave Chinner /* 30268988114SDave Chinner * Count leaf blocks given a range of extent records. 30368988114SDave Chinner */ 30468988114SDave Chinner STATIC void 30568988114SDave Chinner xfs_bmap_count_leaves( 30668988114SDave Chinner xfs_ifork_t *ifp, 30768988114SDave Chinner xfs_extnum_t idx, 30868988114SDave Chinner int numrecs, 30968988114SDave Chinner int *count) 31068988114SDave Chinner { 31168988114SDave Chinner int b; 31268988114SDave Chinner 31368988114SDave Chinner for (b = 0; b < numrecs; b++) { 31468988114SDave Chinner xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); 31568988114SDave Chinner *count += xfs_bmbt_get_blockcount(frp); 31668988114SDave Chinner } 31768988114SDave Chinner } 31868988114SDave Chinner 31968988114SDave Chinner /* 32068988114SDave Chinner * Count leaf blocks given a range of extent records originally 32168988114SDave Chinner * in btree format. 32268988114SDave Chinner */ 32368988114SDave Chinner STATIC void 32468988114SDave Chinner xfs_bmap_disk_count_leaves( 32568988114SDave Chinner struct xfs_mount *mp, 32668988114SDave Chinner struct xfs_btree_block *block, 32768988114SDave Chinner int numrecs, 32868988114SDave Chinner int *count) 32968988114SDave Chinner { 33068988114SDave Chinner int b; 33168988114SDave Chinner xfs_bmbt_rec_t *frp; 33268988114SDave Chinner 33368988114SDave Chinner for (b = 1; b <= numrecs; b++) { 33468988114SDave Chinner frp = XFS_BMBT_REC_ADDR(mp, block, b); 33568988114SDave Chinner *count += xfs_bmbt_disk_get_blockcount(frp); 33668988114SDave Chinner } 33768988114SDave Chinner } 33868988114SDave Chinner 33968988114SDave Chinner /* 34068988114SDave Chinner * Recursively walks each level of a btree 3418be11e92SZhi Yong Wu * to count total fsblocks in use. 34268988114SDave Chinner */ 34368988114SDave Chinner STATIC int /* error */ 34468988114SDave Chinner xfs_bmap_count_tree( 34568988114SDave Chinner xfs_mount_t *mp, /* file system mount point */ 34668988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 34768988114SDave Chinner xfs_ifork_t *ifp, /* inode fork pointer */ 34868988114SDave Chinner xfs_fsblock_t blockno, /* file system block number */ 34968988114SDave Chinner int levelin, /* level in btree */ 35068988114SDave Chinner int *count) /* Count of blocks */ 35168988114SDave Chinner { 35268988114SDave Chinner int error; 35368988114SDave Chinner xfs_buf_t *bp, *nbp; 35468988114SDave Chinner int level = levelin; 35568988114SDave Chinner __be64 *pp; 35668988114SDave Chinner xfs_fsblock_t bno = blockno; 35768988114SDave Chinner xfs_fsblock_t nextbno; 35868988114SDave Chinner struct xfs_btree_block *block, *nextblock; 35968988114SDave Chinner int numrecs; 36068988114SDave Chinner 36168988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 36268988114SDave Chinner &xfs_bmbt_buf_ops); 36368988114SDave Chinner if (error) 36468988114SDave Chinner return error; 36568988114SDave Chinner *count += 1; 36668988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 36768988114SDave Chinner 36868988114SDave Chinner if (--level) { 36968988114SDave Chinner /* Not at node above leaves, count this level of nodes */ 37068988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 37168988114SDave Chinner while (nextbno != NULLFSBLOCK) { 37268988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 37368988114SDave Chinner XFS_BMAP_BTREE_REF, 37468988114SDave Chinner &xfs_bmbt_buf_ops); 37568988114SDave Chinner if (error) 37668988114SDave Chinner return error; 37768988114SDave Chinner *count += 1; 37868988114SDave Chinner nextblock = XFS_BUF_TO_BLOCK(nbp); 37968988114SDave Chinner nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); 38068988114SDave Chinner xfs_trans_brelse(tp, nbp); 38168988114SDave Chinner } 38268988114SDave Chinner 38368988114SDave Chinner /* Dive to the next level */ 38468988114SDave Chinner pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 38568988114SDave Chinner bno = be64_to_cpu(*pp); 38668988114SDave Chinner if (unlikely((error = 38768988114SDave Chinner xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 38868988114SDave Chinner xfs_trans_brelse(tp, bp); 38968988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", 39068988114SDave Chinner XFS_ERRLEVEL_LOW, mp); 3912451337dSDave Chinner return -EFSCORRUPTED; 39268988114SDave Chinner } 39368988114SDave Chinner xfs_trans_brelse(tp, bp); 39468988114SDave Chinner } else { 39568988114SDave Chinner /* count all level 1 nodes and their leaves */ 39668988114SDave Chinner for (;;) { 39768988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 39868988114SDave Chinner numrecs = be16_to_cpu(block->bb_numrecs); 39968988114SDave Chinner xfs_bmap_disk_count_leaves(mp, block, numrecs, count); 40068988114SDave Chinner xfs_trans_brelse(tp, bp); 40168988114SDave Chinner if (nextbno == NULLFSBLOCK) 40268988114SDave Chinner break; 40368988114SDave Chinner bno = nextbno; 40468988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 40568988114SDave Chinner XFS_BMAP_BTREE_REF, 40668988114SDave Chinner &xfs_bmbt_buf_ops); 40768988114SDave Chinner if (error) 40868988114SDave Chinner return error; 40968988114SDave Chinner *count += 1; 41068988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 41168988114SDave Chinner } 41268988114SDave Chinner } 41368988114SDave Chinner return 0; 41468988114SDave Chinner } 41568988114SDave Chinner 41668988114SDave Chinner /* 41768988114SDave Chinner * Count fsblocks of the given fork. 41868988114SDave Chinner */ 41968988114SDave Chinner int /* error */ 42068988114SDave Chinner xfs_bmap_count_blocks( 42168988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 42268988114SDave Chinner xfs_inode_t *ip, /* incore inode */ 42368988114SDave Chinner int whichfork, /* data or attr fork */ 42468988114SDave Chinner int *count) /* out: count of blocks */ 42568988114SDave Chinner { 42668988114SDave Chinner struct xfs_btree_block *block; /* current btree block */ 42768988114SDave Chinner xfs_fsblock_t bno; /* block # of "block" */ 42868988114SDave Chinner xfs_ifork_t *ifp; /* fork structure */ 42968988114SDave Chinner int level; /* btree level, for checking */ 43068988114SDave Chinner xfs_mount_t *mp; /* file system mount structure */ 43168988114SDave Chinner __be64 *pp; /* pointer to block address */ 43268988114SDave Chinner 43368988114SDave Chinner bno = NULLFSBLOCK; 43468988114SDave Chinner mp = ip->i_mount; 43568988114SDave Chinner ifp = XFS_IFORK_PTR(ip, whichfork); 43668988114SDave Chinner if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { 43768988114SDave Chinner xfs_bmap_count_leaves(ifp, 0, 43868988114SDave Chinner ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), 43968988114SDave Chinner count); 44068988114SDave Chinner return 0; 44168988114SDave Chinner } 44268988114SDave Chinner 44368988114SDave Chinner /* 44468988114SDave Chinner * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 44568988114SDave Chinner */ 44668988114SDave Chinner block = ifp->if_broot; 44768988114SDave Chinner level = be16_to_cpu(block->bb_level); 44868988114SDave Chinner ASSERT(level > 0); 44968988114SDave Chinner pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 45068988114SDave Chinner bno = be64_to_cpu(*pp); 451d5cf09baSChristoph Hellwig ASSERT(bno != NULLFSBLOCK); 45268988114SDave Chinner ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 45368988114SDave Chinner ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 45468988114SDave Chinner 45568988114SDave Chinner if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { 45668988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, 45768988114SDave Chinner mp); 4582451337dSDave Chinner return -EFSCORRUPTED; 45968988114SDave Chinner } 46068988114SDave Chinner 46168988114SDave Chinner return 0; 46268988114SDave Chinner } 46368988114SDave Chinner 46468988114SDave Chinner /* 46568988114SDave Chinner * returns 1 for success, 0 if we failed to map the extent. 46668988114SDave Chinner */ 46768988114SDave Chinner STATIC int 46868988114SDave Chinner xfs_getbmapx_fix_eof_hole( 46968988114SDave Chinner xfs_inode_t *ip, /* xfs incore inode pointer */ 47068988114SDave Chinner struct getbmapx *out, /* output structure */ 47168988114SDave Chinner int prealloced, /* this is a file with 47268988114SDave Chinner * preallocated data space */ 47368988114SDave Chinner __int64_t end, /* last block requested */ 47468988114SDave Chinner xfs_fsblock_t startblock) 47568988114SDave Chinner { 47668988114SDave Chinner __int64_t fixlen; 47768988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 47868988114SDave Chinner xfs_ifork_t *ifp; /* inode fork pointer */ 47968988114SDave Chinner xfs_extnum_t lastx; /* last extent pointer */ 48068988114SDave Chinner xfs_fileoff_t fileblock; 48168988114SDave Chinner 48268988114SDave Chinner if (startblock == HOLESTARTBLOCK) { 48368988114SDave Chinner mp = ip->i_mount; 48468988114SDave Chinner out->bmv_block = -1; 48568988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 48668988114SDave Chinner fixlen -= out->bmv_offset; 48768988114SDave Chinner if (prealloced && out->bmv_offset + out->bmv_length == end) { 48868988114SDave Chinner /* Came to hole at EOF. Trim it. */ 48968988114SDave Chinner if (fixlen <= 0) 49068988114SDave Chinner return 0; 49168988114SDave Chinner out->bmv_length = fixlen; 49268988114SDave Chinner } 49368988114SDave Chinner } else { 49468988114SDave Chinner if (startblock == DELAYSTARTBLOCK) 49568988114SDave Chinner out->bmv_block = -2; 49668988114SDave Chinner else 49768988114SDave Chinner out->bmv_block = xfs_fsb_to_db(ip, startblock); 49868988114SDave Chinner fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); 49968988114SDave Chinner ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 50068988114SDave Chinner if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && 50168988114SDave Chinner (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) 50268988114SDave Chinner out->bmv_oflags |= BMV_OF_LAST; 50368988114SDave Chinner } 50468988114SDave Chinner 50568988114SDave Chinner return 1; 50668988114SDave Chinner } 50768988114SDave Chinner 50868988114SDave Chinner /* 50968988114SDave Chinner * Get inode's extents as described in bmv, and format for output. 51068988114SDave Chinner * Calls formatter to fill the user's buffer until all extents 51168988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have 51268988114SDave Chinner * been filled, or until the formatter short-circuits the loop, 51368988114SDave Chinner * if it is tracking filled-in extents on its own. 51468988114SDave Chinner */ 51568988114SDave Chinner int /* error code */ 51668988114SDave Chinner xfs_getbmap( 51768988114SDave Chinner xfs_inode_t *ip, 51868988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */ 51968988114SDave Chinner xfs_bmap_format_t formatter, /* format to user */ 52068988114SDave Chinner void *arg) /* formatter arg */ 52168988114SDave Chinner { 52268988114SDave Chinner __int64_t bmvend; /* last block requested */ 52368988114SDave Chinner int error = 0; /* return value */ 52468988114SDave Chinner __int64_t fixlen; /* length for -1 case */ 52568988114SDave Chinner int i; /* extent number */ 52668988114SDave Chinner int lock; /* lock state */ 52768988114SDave Chinner xfs_bmbt_irec_t *map; /* buffer for user's data */ 52868988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 52968988114SDave Chinner int nex; /* # of user extents can do */ 53068988114SDave Chinner int nexleft; /* # of user extents left */ 53168988114SDave Chinner int subnex; /* # of bmapi's can do */ 53268988114SDave Chinner int nmap; /* number of map entries */ 53368988114SDave Chinner struct getbmapx *out; /* output structure */ 53468988114SDave Chinner int whichfork; /* data or attr fork */ 53568988114SDave Chinner int prealloced; /* this is a file with 53668988114SDave Chinner * preallocated data space */ 53768988114SDave Chinner int iflags; /* interface flags */ 53868988114SDave Chinner int bmapi_flags; /* flags for xfs_bmapi */ 53968988114SDave Chinner int cur_ext = 0; 54068988114SDave Chinner 54168988114SDave Chinner mp = ip->i_mount; 54268988114SDave Chinner iflags = bmv->bmv_iflags; 54368988114SDave Chinner whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; 54468988114SDave Chinner 54568988114SDave Chinner if (whichfork == XFS_ATTR_FORK) { 54668988114SDave Chinner if (XFS_IFORK_Q(ip)) { 54768988114SDave Chinner if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 54868988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && 54968988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 5502451337dSDave Chinner return -EINVAL; 55168988114SDave Chinner } else if (unlikely( 55268988114SDave Chinner ip->i_d.di_aformat != 0 && 55368988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { 55468988114SDave Chinner XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, 55568988114SDave Chinner ip->i_mount); 5562451337dSDave Chinner return -EFSCORRUPTED; 55768988114SDave Chinner } 55868988114SDave Chinner 55968988114SDave Chinner prealloced = 0; 56068988114SDave Chinner fixlen = 1LL << 32; 56168988114SDave Chinner } else { 56268988114SDave Chinner if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 56368988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 56468988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5652451337dSDave Chinner return -EINVAL; 56668988114SDave Chinner 56768988114SDave Chinner if (xfs_get_extsz_hint(ip) || 56868988114SDave Chinner ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 56968988114SDave Chinner prealloced = 1; 57068988114SDave Chinner fixlen = mp->m_super->s_maxbytes; 57168988114SDave Chinner } else { 57268988114SDave Chinner prealloced = 0; 57368988114SDave Chinner fixlen = XFS_ISIZE(ip); 57468988114SDave Chinner } 57568988114SDave Chinner } 57668988114SDave Chinner 57768988114SDave Chinner if (bmv->bmv_length == -1) { 57868988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); 57968988114SDave Chinner bmv->bmv_length = 58068988114SDave Chinner max_t(__int64_t, fixlen - bmv->bmv_offset, 0); 58168988114SDave Chinner } else if (bmv->bmv_length == 0) { 58268988114SDave Chinner bmv->bmv_entries = 0; 58368988114SDave Chinner return 0; 58468988114SDave Chinner } else if (bmv->bmv_length < 0) { 5852451337dSDave Chinner return -EINVAL; 58668988114SDave Chinner } 58768988114SDave Chinner 58868988114SDave Chinner nex = bmv->bmv_count - 1; 58968988114SDave Chinner if (nex <= 0) 5902451337dSDave Chinner return -EINVAL; 59168988114SDave Chinner bmvend = bmv->bmv_offset + bmv->bmv_length; 59268988114SDave Chinner 59368988114SDave Chinner 59468988114SDave Chinner if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 5952451337dSDave Chinner return -ENOMEM; 596fdd3cceeSDave Chinner out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); 59768988114SDave Chinner if (!out) 5982451337dSDave Chinner return -ENOMEM; 59968988114SDave Chinner 60068988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED); 601efa70be1SChristoph Hellwig if (whichfork == XFS_DATA_FORK) { 602efa70be1SChristoph Hellwig if (!(iflags & BMV_IF_DELALLOC) && 603efa70be1SChristoph Hellwig (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { 6042451337dSDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 60568988114SDave Chinner if (error) 60668988114SDave Chinner goto out_unlock_iolock; 607efa70be1SChristoph Hellwig 60868988114SDave Chinner /* 609efa70be1SChristoph Hellwig * Even after flushing the inode, there can still be 610efa70be1SChristoph Hellwig * delalloc blocks on the inode beyond EOF due to 611efa70be1SChristoph Hellwig * speculative preallocation. These are not removed 612efa70be1SChristoph Hellwig * until the release function is called or the inode 613efa70be1SChristoph Hellwig * is inactivated. Hence we cannot assert here that 614efa70be1SChristoph Hellwig * ip->i_delayed_blks == 0. 61568988114SDave Chinner */ 61668988114SDave Chinner } 61768988114SDave Chinner 618309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 619efa70be1SChristoph Hellwig } else { 620efa70be1SChristoph Hellwig lock = xfs_ilock_attr_map_shared(ip); 621efa70be1SChristoph Hellwig } 62268988114SDave Chinner 62368988114SDave Chinner /* 62468988114SDave Chinner * Don't let nex be bigger than the number of extents 62568988114SDave Chinner * we can have assuming alternating holes and real extents. 62668988114SDave Chinner */ 62768988114SDave Chinner if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) 62868988114SDave Chinner nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 62968988114SDave Chinner 63068988114SDave Chinner bmapi_flags = xfs_bmapi_aflag(whichfork); 63168988114SDave Chinner if (!(iflags & BMV_IF_PREALLOC)) 63268988114SDave Chinner bmapi_flags |= XFS_BMAPI_IGSTATE; 63368988114SDave Chinner 63468988114SDave Chinner /* 63568988114SDave Chinner * Allocate enough space to handle "subnex" maps at a time. 63668988114SDave Chinner */ 6372451337dSDave Chinner error = -ENOMEM; 63868988114SDave Chinner subnex = 16; 63968988114SDave Chinner map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); 64068988114SDave Chinner if (!map) 64168988114SDave Chinner goto out_unlock_ilock; 64268988114SDave Chinner 64368988114SDave Chinner bmv->bmv_entries = 0; 64468988114SDave Chinner 64568988114SDave Chinner if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && 64668988114SDave Chinner (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { 64768988114SDave Chinner error = 0; 64868988114SDave Chinner goto out_free_map; 64968988114SDave Chinner } 65068988114SDave Chinner 65168988114SDave Chinner nexleft = nex; 65268988114SDave Chinner 65368988114SDave Chinner do { 65468988114SDave Chinner nmap = (nexleft > subnex) ? subnex : nexleft; 65568988114SDave Chinner error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 65668988114SDave Chinner XFS_BB_TO_FSB(mp, bmv->bmv_length), 65768988114SDave Chinner map, &nmap, bmapi_flags); 65868988114SDave Chinner if (error) 65968988114SDave Chinner goto out_free_map; 66068988114SDave Chinner ASSERT(nmap <= subnex); 66168988114SDave Chinner 66268988114SDave Chinner for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { 66368988114SDave Chinner out[cur_ext].bmv_oflags = 0; 66468988114SDave Chinner if (map[i].br_state == XFS_EXT_UNWRITTEN) 66568988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; 66668988114SDave Chinner else if (map[i].br_startblock == DELAYSTARTBLOCK) 66768988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; 66868988114SDave Chinner out[cur_ext].bmv_offset = 66968988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_startoff); 67068988114SDave Chinner out[cur_ext].bmv_length = 67168988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_blockcount); 67268988114SDave Chinner out[cur_ext].bmv_unused1 = 0; 67368988114SDave Chinner out[cur_ext].bmv_unused2 = 0; 67468988114SDave Chinner 67568988114SDave Chinner /* 67668988114SDave Chinner * delayed allocation extents that start beyond EOF can 67768988114SDave Chinner * occur due to speculative EOF allocation when the 67868988114SDave Chinner * delalloc extent is larger than the largest freespace 67968988114SDave Chinner * extent at conversion time. These extents cannot be 68068988114SDave Chinner * converted by data writeback, so can exist here even 68168988114SDave Chinner * if we are not supposed to be finding delalloc 68268988114SDave Chinner * extents. 68368988114SDave Chinner */ 68468988114SDave Chinner if (map[i].br_startblock == DELAYSTARTBLOCK && 68568988114SDave Chinner map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) 68668988114SDave Chinner ASSERT((iflags & BMV_IF_DELALLOC) != 0); 68768988114SDave Chinner 68868988114SDave Chinner if (map[i].br_startblock == HOLESTARTBLOCK && 68968988114SDave Chinner whichfork == XFS_ATTR_FORK) { 69068988114SDave Chinner /* came to the end of attribute fork */ 69168988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_LAST; 69268988114SDave Chinner goto out_free_map; 69368988114SDave Chinner } 69468988114SDave Chinner 69568988114SDave Chinner if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], 69668988114SDave Chinner prealloced, bmvend, 69768988114SDave Chinner map[i].br_startblock)) 69868988114SDave Chinner goto out_free_map; 69968988114SDave Chinner 70068988114SDave Chinner bmv->bmv_offset = 70168988114SDave Chinner out[cur_ext].bmv_offset + 70268988114SDave Chinner out[cur_ext].bmv_length; 70368988114SDave Chinner bmv->bmv_length = 70468988114SDave Chinner max_t(__int64_t, 0, bmvend - bmv->bmv_offset); 70568988114SDave Chinner 70668988114SDave Chinner /* 70768988114SDave Chinner * In case we don't want to return the hole, 70868988114SDave Chinner * don't increase cur_ext so that we can reuse 70968988114SDave Chinner * it in the next loop. 71068988114SDave Chinner */ 71168988114SDave Chinner if ((iflags & BMV_IF_NO_HOLES) && 71268988114SDave Chinner map[i].br_startblock == HOLESTARTBLOCK) { 71368988114SDave Chinner memset(&out[cur_ext], 0, sizeof(out[cur_ext])); 71468988114SDave Chinner continue; 71568988114SDave Chinner } 71668988114SDave Chinner 71768988114SDave Chinner nexleft--; 71868988114SDave Chinner bmv->bmv_entries++; 71968988114SDave Chinner cur_ext++; 72068988114SDave Chinner } 72168988114SDave Chinner } while (nmap && nexleft && bmv->bmv_length); 72268988114SDave Chinner 72368988114SDave Chinner out_free_map: 72468988114SDave Chinner kmem_free(map); 72568988114SDave Chinner out_unlock_ilock: 72601f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 72768988114SDave Chinner out_unlock_iolock: 72868988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED); 72968988114SDave Chinner 73068988114SDave Chinner for (i = 0; i < cur_ext; i++) { 73168988114SDave Chinner int full = 0; /* user array is full */ 73268988114SDave Chinner 73368988114SDave Chinner /* format results & advance arg */ 73468988114SDave Chinner error = formatter(&arg, &out[i], &full); 73568988114SDave Chinner if (error || full) 73668988114SDave Chinner break; 73768988114SDave Chinner } 73868988114SDave Chinner 73968988114SDave Chinner kmem_free(out); 74068988114SDave Chinner return error; 74168988114SDave Chinner } 74268988114SDave Chinner 74368988114SDave Chinner /* 74468988114SDave Chinner * dead simple method of punching delalyed allocation blocks from a range in 74568988114SDave Chinner * the inode. Walks a block at a time so will be slow, but is only executed in 746ad4809bfSZhi Yong Wu * rare error cases so the overhead is not critical. This will always punch out 74768988114SDave Chinner * both the start and end blocks, even if the ranges only partially overlap 74868988114SDave Chinner * them, so it is up to the caller to ensure that partial blocks are not 74968988114SDave Chinner * passed in. 75068988114SDave Chinner */ 75168988114SDave Chinner int 75268988114SDave Chinner xfs_bmap_punch_delalloc_range( 75368988114SDave Chinner struct xfs_inode *ip, 75468988114SDave Chinner xfs_fileoff_t start_fsb, 75568988114SDave Chinner xfs_fileoff_t length) 75668988114SDave Chinner { 75768988114SDave Chinner xfs_fileoff_t remaining = length; 75868988114SDave Chinner int error = 0; 75968988114SDave Chinner 76068988114SDave Chinner ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 76168988114SDave Chinner 76268988114SDave Chinner do { 76368988114SDave Chinner int done; 76468988114SDave Chinner xfs_bmbt_irec_t imap; 76568988114SDave Chinner int nimaps = 1; 76668988114SDave Chinner xfs_fsblock_t firstblock; 76768988114SDave Chinner xfs_bmap_free_t flist; 76868988114SDave Chinner 76968988114SDave Chinner /* 77068988114SDave Chinner * Map the range first and check that it is a delalloc extent 77168988114SDave Chinner * before trying to unmap the range. Otherwise we will be 77268988114SDave Chinner * trying to remove a real extent (which requires a 77368988114SDave Chinner * transaction) or a hole, which is probably a bad idea... 77468988114SDave Chinner */ 77568988114SDave Chinner error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, 77668988114SDave Chinner XFS_BMAPI_ENTIRE); 77768988114SDave Chinner 77868988114SDave Chinner if (error) { 77968988114SDave Chinner /* something screwed, just bail */ 78068988114SDave Chinner if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 78168988114SDave Chinner xfs_alert(ip->i_mount, 78268988114SDave Chinner "Failed delalloc mapping lookup ino %lld fsb %lld.", 78368988114SDave Chinner ip->i_ino, start_fsb); 78468988114SDave Chinner } 78568988114SDave Chinner break; 78668988114SDave Chinner } 78768988114SDave Chinner if (!nimaps) { 78868988114SDave Chinner /* nothing there */ 78968988114SDave Chinner goto next_block; 79068988114SDave Chinner } 79168988114SDave Chinner if (imap.br_startblock != DELAYSTARTBLOCK) { 79268988114SDave Chinner /* been converted, ignore */ 79368988114SDave Chinner goto next_block; 79468988114SDave Chinner } 79568988114SDave Chinner WARN_ON(imap.br_blockcount == 0); 79668988114SDave Chinner 79768988114SDave Chinner /* 79868988114SDave Chinner * Note: while we initialise the firstblock/flist pair, they 79968988114SDave Chinner * should never be used because blocks should never be 80068988114SDave Chinner * allocated or freed for a delalloc extent and hence we need 80168988114SDave Chinner * don't cancel or finish them after the xfs_bunmapi() call. 80268988114SDave Chinner */ 80368988114SDave Chinner xfs_bmap_init(&flist, &firstblock); 80468988114SDave Chinner error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 80568988114SDave Chinner &flist, &done); 80668988114SDave Chinner if (error) 80768988114SDave Chinner break; 80868988114SDave Chinner 80968988114SDave Chinner ASSERT(!flist.xbf_count && !flist.xbf_first); 81068988114SDave Chinner next_block: 81168988114SDave Chinner start_fsb++; 81268988114SDave Chinner remaining--; 81368988114SDave Chinner } while(remaining > 0); 81468988114SDave Chinner 81568988114SDave Chinner return error; 81668988114SDave Chinner } 817c24b5dfaSDave Chinner 818c24b5dfaSDave Chinner /* 819c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF 820c24b5dfaSDave Chinner * blocks. The 'force' parameter determines whether we should also consider 821c24b5dfaSDave Chinner * regular files that are marked preallocated or append-only. 822c24b5dfaSDave Chinner */ 823c24b5dfaSDave Chinner bool 824c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) 825c24b5dfaSDave Chinner { 826c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */ 827c19b3b05SDave Chinner if (!S_ISREG(VFS_I(ip)->i_mode)) 828c24b5dfaSDave Chinner return false; 829c24b5dfaSDave Chinner 830c24b5dfaSDave Chinner /* 831c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not 832c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove. 833c24b5dfaSDave Chinner */ 834c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 && 8352667c6f9SDave Chinner VFS_I(ip)->i_mapping->nrpages == 0 && 836c24b5dfaSDave Chinner ip->i_delayed_blks == 0) 837c24b5dfaSDave Chinner return false; 838c24b5dfaSDave Chinner 839c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */ 840c24b5dfaSDave Chinner if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) 841c24b5dfaSDave Chinner return false; 842c24b5dfaSDave Chinner 843c24b5dfaSDave Chinner /* 844c24b5dfaSDave Chinner * Do not free real preallocated or append-only files unless the file 845c24b5dfaSDave Chinner * has delalloc blocks and we are forced to remove them. 846c24b5dfaSDave Chinner */ 847c24b5dfaSDave Chinner if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 848c24b5dfaSDave Chinner if (!force || ip->i_delayed_blks == 0) 849c24b5dfaSDave Chinner return false; 850c24b5dfaSDave Chinner 851c24b5dfaSDave Chinner return true; 852c24b5dfaSDave Chinner } 853c24b5dfaSDave Chinner 854c24b5dfaSDave Chinner /* 855c24b5dfaSDave Chinner * This is called by xfs_inactive to free any blocks beyond eof 856c24b5dfaSDave Chinner * when the link count isn't zero and by xfs_dm_punch_hole() when 857c24b5dfaSDave Chinner * punching a hole to EOF. 858c24b5dfaSDave Chinner */ 859c24b5dfaSDave Chinner int 860c24b5dfaSDave Chinner xfs_free_eofblocks( 861c24b5dfaSDave Chinner xfs_mount_t *mp, 862c24b5dfaSDave Chinner xfs_inode_t *ip, 863c24b5dfaSDave Chinner bool need_iolock) 864c24b5dfaSDave Chinner { 865c24b5dfaSDave Chinner xfs_trans_t *tp; 866c24b5dfaSDave Chinner int error; 867c24b5dfaSDave Chinner xfs_fileoff_t end_fsb; 868c24b5dfaSDave Chinner xfs_fileoff_t last_fsb; 869c24b5dfaSDave Chinner xfs_filblks_t map_len; 870c24b5dfaSDave Chinner int nimaps; 871c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 872c24b5dfaSDave Chinner 873c24b5dfaSDave Chinner /* 874c24b5dfaSDave Chinner * Figure out if there are any blocks beyond the end 875c24b5dfaSDave Chinner * of the file. If not, then there is nothing to do. 876c24b5dfaSDave Chinner */ 877c24b5dfaSDave Chinner end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 878c24b5dfaSDave Chinner last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 879c24b5dfaSDave Chinner if (last_fsb <= end_fsb) 880c24b5dfaSDave Chinner return 0; 881c24b5dfaSDave Chinner map_len = last_fsb - end_fsb; 882c24b5dfaSDave Chinner 883c24b5dfaSDave Chinner nimaps = 1; 884c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_SHARED); 885c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 886c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 887c24b5dfaSDave Chinner 888c24b5dfaSDave Chinner if (!error && (nimaps != 0) && 889c24b5dfaSDave Chinner (imap.br_startblock != HOLESTARTBLOCK || 890c24b5dfaSDave Chinner ip->i_delayed_blks)) { 891c24b5dfaSDave Chinner /* 892c24b5dfaSDave Chinner * Attach the dquots to the inode up front. 893c24b5dfaSDave Chinner */ 894c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 895c24b5dfaSDave Chinner if (error) 896c24b5dfaSDave Chinner return error; 897c24b5dfaSDave Chinner 898c24b5dfaSDave Chinner /* 899c24b5dfaSDave Chinner * There are blocks after the end of file. 900c24b5dfaSDave Chinner * Free them up now by truncating the file to 901c24b5dfaSDave Chinner * its current size. 902c24b5dfaSDave Chinner */ 903c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 904c24b5dfaSDave Chinner 905c24b5dfaSDave Chinner if (need_iolock) { 906c24b5dfaSDave Chinner if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 9074906e215SChristoph Hellwig xfs_trans_cancel(tp); 9082451337dSDave Chinner return -EAGAIN; 909c24b5dfaSDave Chinner } 910c24b5dfaSDave Chinner } 911c24b5dfaSDave Chinner 9123d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 913c24b5dfaSDave Chinner if (error) { 914c24b5dfaSDave Chinner ASSERT(XFS_FORCED_SHUTDOWN(mp)); 9154906e215SChristoph Hellwig xfs_trans_cancel(tp); 916c24b5dfaSDave Chinner if (need_iolock) 917c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 918c24b5dfaSDave Chinner return error; 919c24b5dfaSDave Chinner } 920c24b5dfaSDave Chinner 921c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 922c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 923c24b5dfaSDave Chinner 924c24b5dfaSDave Chinner /* 925c24b5dfaSDave Chinner * Do not update the on-disk file size. If we update the 926c24b5dfaSDave Chinner * on-disk file size and then the system crashes before the 927c24b5dfaSDave Chinner * contents of the file are flushed to disk then the files 928c24b5dfaSDave Chinner * may be full of holes (ie NULL files bug). 929c24b5dfaSDave Chinner */ 930c24b5dfaSDave Chinner error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 931c24b5dfaSDave Chinner XFS_ISIZE(ip)); 932c24b5dfaSDave Chinner if (error) { 933c24b5dfaSDave Chinner /* 934c24b5dfaSDave Chinner * If we get an error at this point we simply don't 935c24b5dfaSDave Chinner * bother truncating the file. 936c24b5dfaSDave Chinner */ 9374906e215SChristoph Hellwig xfs_trans_cancel(tp); 938c24b5dfaSDave Chinner } else { 93970393313SChristoph Hellwig error = xfs_trans_commit(tp); 940c24b5dfaSDave Chinner if (!error) 941c24b5dfaSDave Chinner xfs_inode_clear_eofblocks_tag(ip); 942c24b5dfaSDave Chinner } 943c24b5dfaSDave Chinner 944c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 945c24b5dfaSDave Chinner if (need_iolock) 946c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 947c24b5dfaSDave Chinner } 948c24b5dfaSDave Chinner return error; 949c24b5dfaSDave Chinner } 950c24b5dfaSDave Chinner 95183aee9e4SChristoph Hellwig int 952c24b5dfaSDave Chinner xfs_alloc_file_space( 95383aee9e4SChristoph Hellwig struct xfs_inode *ip, 954c24b5dfaSDave Chinner xfs_off_t offset, 955c24b5dfaSDave Chinner xfs_off_t len, 9565f8aca8bSChristoph Hellwig int alloc_type) 957c24b5dfaSDave Chinner { 958c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 959c24b5dfaSDave Chinner xfs_off_t count; 960c24b5dfaSDave Chinner xfs_filblks_t allocated_fsb; 961c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb; 962c24b5dfaSDave Chinner xfs_extlen_t extsz, temp; 963c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 964c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 965c24b5dfaSDave Chinner int nimaps; 966c24b5dfaSDave Chinner int quota_flag; 967c24b5dfaSDave Chinner int rt; 968c24b5dfaSDave Chinner xfs_trans_t *tp; 969c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp; 970c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 971c24b5dfaSDave Chinner uint qblocks, resblks, resrtextents; 972c24b5dfaSDave Chinner int error; 973c24b5dfaSDave Chinner 974c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip); 975c24b5dfaSDave Chinner 976c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 9772451337dSDave Chinner return -EIO; 978c24b5dfaSDave Chinner 979c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 980c24b5dfaSDave Chinner if (error) 981c24b5dfaSDave Chinner return error; 982c24b5dfaSDave Chinner 983c24b5dfaSDave Chinner if (len <= 0) 9842451337dSDave Chinner return -EINVAL; 985c24b5dfaSDave Chinner 986c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 987c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip); 988c24b5dfaSDave Chinner 989c24b5dfaSDave Chinner count = len; 990c24b5dfaSDave Chinner imapp = &imaps[0]; 991c24b5dfaSDave Chinner nimaps = 1; 992c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 993c24b5dfaSDave Chinner allocatesize_fsb = XFS_B_TO_FSB(mp, count); 994c24b5dfaSDave Chinner 995c24b5dfaSDave Chinner /* 996c24b5dfaSDave Chinner * Allocate file space until done or until there is an error 997c24b5dfaSDave Chinner */ 998c24b5dfaSDave Chinner while (allocatesize_fsb && !error) { 999c24b5dfaSDave Chinner xfs_fileoff_t s, e; 1000c24b5dfaSDave Chinner 1001c24b5dfaSDave Chinner /* 1002c24b5dfaSDave Chinner * Determine space reservations for data/realtime. 1003c24b5dfaSDave Chinner */ 1004c24b5dfaSDave Chinner if (unlikely(extsz)) { 1005c24b5dfaSDave Chinner s = startoffset_fsb; 1006c24b5dfaSDave Chinner do_div(s, extsz); 1007c24b5dfaSDave Chinner s *= extsz; 1008c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb; 1009c24b5dfaSDave Chinner if ((temp = do_mod(startoffset_fsb, extsz))) 1010c24b5dfaSDave Chinner e += temp; 1011c24b5dfaSDave Chinner if ((temp = do_mod(e, extsz))) 1012c24b5dfaSDave Chinner e += extsz - temp; 1013c24b5dfaSDave Chinner } else { 1014c24b5dfaSDave Chinner s = 0; 1015c24b5dfaSDave Chinner e = allocatesize_fsb; 1016c24b5dfaSDave Chinner } 1017c24b5dfaSDave Chinner 1018c24b5dfaSDave Chinner /* 1019c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block 1020c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are 1021c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate 1022c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk 1023c24b5dfaSDave Chinner * to MAXEXTLEN (21 bits), so use that to enforce the limit. 1024c24b5dfaSDave Chinner */ 1025c24b5dfaSDave Chinner resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 1026c24b5dfaSDave Chinner if (unlikely(rt)) { 1027c24b5dfaSDave Chinner resrtextents = qblocks = resblks; 1028c24b5dfaSDave Chinner resrtextents /= mp->m_sb.sb_rextsize; 1029c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1030c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_RTBLKS; 1031c24b5dfaSDave Chinner } else { 1032c24b5dfaSDave Chinner resrtextents = 0; 1033c24b5dfaSDave Chinner resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 1034c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_REGBLKS; 1035c24b5dfaSDave Chinner } 1036c24b5dfaSDave Chinner 1037c24b5dfaSDave Chinner /* 1038c24b5dfaSDave Chinner * Allocate and setup the transaction. 1039c24b5dfaSDave Chinner */ 1040c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 10413d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 10423d3c8b52SJie Liu resblks, resrtextents); 1043c24b5dfaSDave Chinner /* 1044c24b5dfaSDave Chinner * Check for running out of space 1045c24b5dfaSDave Chinner */ 1046c24b5dfaSDave Chinner if (error) { 1047c24b5dfaSDave Chinner /* 1048c24b5dfaSDave Chinner * Free the transaction structure. 1049c24b5dfaSDave Chinner */ 10502451337dSDave Chinner ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 10514906e215SChristoph Hellwig xfs_trans_cancel(tp); 1052c24b5dfaSDave Chinner break; 1053c24b5dfaSDave Chinner } 1054c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 1055c24b5dfaSDave Chinner error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 1056c24b5dfaSDave Chinner 0, quota_flag); 1057c24b5dfaSDave Chinner if (error) 1058c24b5dfaSDave Chinner goto error1; 1059c24b5dfaSDave Chinner 1060c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 1061c24b5dfaSDave Chinner 1062c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &firstfsb); 1063c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1064c24b5dfaSDave Chinner allocatesize_fsb, alloc_type, &firstfsb, 1065dbd5c8c9SBrian Foster resblks, imapp, &nimaps, &free_list); 1066f6106efaSEric Sandeen if (error) 1067c24b5dfaSDave Chinner goto error0; 1068c24b5dfaSDave Chinner 1069c24b5dfaSDave Chinner /* 1070c24b5dfaSDave Chinner * Complete the transaction 1071c24b5dfaSDave Chinner */ 1072f6106efaSEric Sandeen error = xfs_bmap_finish(&tp, &free_list, NULL); 1073f6106efaSEric Sandeen if (error) 1074c24b5dfaSDave Chinner goto error0; 1075c24b5dfaSDave Chinner 107670393313SChristoph Hellwig error = xfs_trans_commit(tp); 1077c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1078f6106efaSEric Sandeen if (error) 1079c24b5dfaSDave Chinner break; 1080c24b5dfaSDave Chinner 1081c24b5dfaSDave Chinner allocated_fsb = imapp->br_blockcount; 1082c24b5dfaSDave Chinner 1083c24b5dfaSDave Chinner if (nimaps == 0) { 10842451337dSDave Chinner error = -ENOSPC; 1085c24b5dfaSDave Chinner break; 1086c24b5dfaSDave Chinner } 1087c24b5dfaSDave Chinner 1088c24b5dfaSDave Chinner startoffset_fsb += allocated_fsb; 1089c24b5dfaSDave Chinner allocatesize_fsb -= allocated_fsb; 1090c24b5dfaSDave Chinner } 1091c24b5dfaSDave Chinner 1092c24b5dfaSDave Chinner return error; 1093c24b5dfaSDave Chinner 1094c24b5dfaSDave Chinner error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1095c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1096c24b5dfaSDave Chinner xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1097c24b5dfaSDave Chinner 1098c24b5dfaSDave Chinner error1: /* Just cancel transaction */ 10994906e215SChristoph Hellwig xfs_trans_cancel(tp); 1100c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1101c24b5dfaSDave Chinner return error; 1102c24b5dfaSDave Chinner } 1103c24b5dfaSDave Chinner 1104c24b5dfaSDave Chinner /* 1105c24b5dfaSDave Chinner * Zero file bytes between startoff and endoff inclusive. 1106c24b5dfaSDave Chinner * The iolock is held exclusive and no blocks are buffered. 1107c24b5dfaSDave Chinner * 1108c24b5dfaSDave Chinner * This function is used by xfs_free_file_space() to zero 1109c24b5dfaSDave Chinner * partial blocks when the range to free is not block aligned. 1110c24b5dfaSDave Chinner * When unreserving space with boundaries that are not block 1111c24b5dfaSDave Chinner * aligned we round up the start and round down the end 1112c24b5dfaSDave Chinner * boundaries and then use this function to zero the parts of 1113c24b5dfaSDave Chinner * the blocks that got dropped during the rounding. 1114c24b5dfaSDave Chinner */ 1115c24b5dfaSDave Chinner STATIC int 1116c24b5dfaSDave Chinner xfs_zero_remaining_bytes( 1117c24b5dfaSDave Chinner xfs_inode_t *ip, 1118c24b5dfaSDave Chinner xfs_off_t startoff, 1119c24b5dfaSDave Chinner xfs_off_t endoff) 1120c24b5dfaSDave Chinner { 1121c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 1122c24b5dfaSDave Chinner xfs_fileoff_t offset_fsb; 1123c24b5dfaSDave Chinner xfs_off_t lastoffset; 1124c24b5dfaSDave Chinner xfs_off_t offset; 1125c24b5dfaSDave Chinner xfs_buf_t *bp; 1126c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 1127c24b5dfaSDave Chinner int nimap; 1128c24b5dfaSDave Chinner int error = 0; 1129c24b5dfaSDave Chinner 1130c24b5dfaSDave Chinner /* 1131c24b5dfaSDave Chinner * Avoid doing I/O beyond eof - it's not necessary 1132c24b5dfaSDave Chinner * since nothing can read beyond eof. The space will 1133c24b5dfaSDave Chinner * be zeroed when the file is extended anyway. 1134c24b5dfaSDave Chinner */ 1135c24b5dfaSDave Chinner if (startoff >= XFS_ISIZE(ip)) 1136c24b5dfaSDave Chinner return 0; 1137c24b5dfaSDave Chinner 1138c24b5dfaSDave Chinner if (endoff > XFS_ISIZE(ip)) 1139c24b5dfaSDave Chinner endoff = XFS_ISIZE(ip); 1140c24b5dfaSDave Chinner 1141c24b5dfaSDave Chinner for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 11424f317369SChristoph Hellwig uint lock_mode; 11434f317369SChristoph Hellwig 1144c24b5dfaSDave Chinner offset_fsb = XFS_B_TO_FSBT(mp, offset); 1145c24b5dfaSDave Chinner nimap = 1; 11464f317369SChristoph Hellwig 11474f317369SChristoph Hellwig lock_mode = xfs_ilock_data_map_shared(ip); 1148c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); 11494f317369SChristoph Hellwig xfs_iunlock(ip, lock_mode); 11504f317369SChristoph Hellwig 1151c24b5dfaSDave Chinner if (error || nimap < 1) 1152c24b5dfaSDave Chinner break; 1153c24b5dfaSDave Chinner ASSERT(imap.br_blockcount >= 1); 1154c24b5dfaSDave Chinner ASSERT(imap.br_startoff == offset_fsb); 11554f69f578SDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 11564f69f578SDave Chinner 11574f69f578SDave Chinner if (imap.br_startblock == HOLESTARTBLOCK || 11584f69f578SDave Chinner imap.br_state == XFS_EXT_UNWRITTEN) { 11594f69f578SDave Chinner /* skip the entire extent */ 11604f69f578SDave Chinner lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 11614f69f578SDave Chinner imap.br_blockcount) - 1; 11624f69f578SDave Chinner continue; 11634f69f578SDave Chinner } 11644f69f578SDave Chinner 1165c24b5dfaSDave Chinner lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 1166c24b5dfaSDave Chinner if (lastoffset > endoff) 1167c24b5dfaSDave Chinner lastoffset = endoff; 11684f69f578SDave Chinner 11694f69f578SDave Chinner /* DAX can just zero the backing device directly */ 11704f69f578SDave Chinner if (IS_DAX(VFS_I(ip))) { 11714f69f578SDave Chinner error = dax_zero_page_range(VFS_I(ip), offset, 11724f69f578SDave Chinner lastoffset - offset + 1, 11734f69f578SDave Chinner xfs_get_blocks_direct); 11744f69f578SDave Chinner if (error) 11754f69f578SDave Chinner return error; 1176c24b5dfaSDave Chinner continue; 11774f69f578SDave Chinner } 117883a0adc3SChristoph Hellwig 11798c156125SChristoph Hellwig error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? 11808c156125SChristoph Hellwig mp->m_rtdev_targp : mp->m_ddev_targp, 11818c156125SChristoph Hellwig xfs_fsb_to_db(ip, imap.br_startblock), 11828c156125SChristoph Hellwig BTOBB(mp->m_sb.sb_blocksize), 11838c156125SChristoph Hellwig 0, &bp, NULL); 11848c156125SChristoph Hellwig if (error) 11858c156125SChristoph Hellwig return error; 11868c156125SChristoph Hellwig 1187c24b5dfaSDave Chinner memset(bp->b_addr + 1188c24b5dfaSDave Chinner (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 1189c24b5dfaSDave Chinner 0, lastoffset - offset + 1); 119083a0adc3SChristoph Hellwig 11918c156125SChristoph Hellwig error = xfs_bwrite(bp); 11928c156125SChristoph Hellwig xfs_buf_relse(bp); 11938c156125SChristoph Hellwig if (error) 11948c156125SChristoph Hellwig return error; 119583a0adc3SChristoph Hellwig } 1196c24b5dfaSDave Chinner return error; 1197c24b5dfaSDave Chinner } 1198c24b5dfaSDave Chinner 119983aee9e4SChristoph Hellwig int 1200c24b5dfaSDave Chinner xfs_free_file_space( 120183aee9e4SChristoph Hellwig struct xfs_inode *ip, 1202c24b5dfaSDave Chinner xfs_off_t offset, 12035f8aca8bSChristoph Hellwig xfs_off_t len) 1204c24b5dfaSDave Chinner { 1205c24b5dfaSDave Chinner int done; 1206c24b5dfaSDave Chinner xfs_fileoff_t endoffset_fsb; 1207c24b5dfaSDave Chinner int error; 1208c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 1209c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1210c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 1211c24b5dfaSDave Chinner xfs_off_t ioffset; 12128b5279e3SBrian Foster xfs_off_t iendoffset; 1213c24b5dfaSDave Chinner xfs_extlen_t mod=0; 1214c24b5dfaSDave Chinner xfs_mount_t *mp; 1215c24b5dfaSDave Chinner int nimap; 1216c24b5dfaSDave Chinner uint resblks; 1217c24b5dfaSDave Chinner xfs_off_t rounding; 1218c24b5dfaSDave Chinner int rt; 1219c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 1220c24b5dfaSDave Chinner xfs_trans_t *tp; 1221c24b5dfaSDave Chinner 1222c24b5dfaSDave Chinner mp = ip->i_mount; 1223c24b5dfaSDave Chinner 1224c24b5dfaSDave Chinner trace_xfs_free_file_space(ip); 1225c24b5dfaSDave Chinner 1226c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1227c24b5dfaSDave Chinner if (error) 1228c24b5dfaSDave Chinner return error; 1229c24b5dfaSDave Chinner 1230c24b5dfaSDave Chinner error = 0; 1231c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */ 1232c24b5dfaSDave Chinner return error; 1233c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 1234c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset); 1235c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 1236c24b5dfaSDave Chinner 1237c24b5dfaSDave Chinner /* wait for the completion of any pending DIOs */ 1238c24b5dfaSDave Chinner inode_dio_wait(VFS_I(ip)); 1239c24b5dfaSDave Chinner 1240*09cbfeafSKirill A. Shutemov rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); 12418b5279e3SBrian Foster ioffset = round_down(offset, rounding); 12428b5279e3SBrian Foster iendoffset = round_up(offset + len, rounding) - 1; 12438b5279e3SBrian Foster error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, 12448b5279e3SBrian Foster iendoffset); 1245c24b5dfaSDave Chinner if (error) 12465f8aca8bSChristoph Hellwig goto out; 12478b5279e3SBrian Foster truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); 1248c24b5dfaSDave Chinner 1249c24b5dfaSDave Chinner /* 1250c24b5dfaSDave Chinner * Need to zero the stuff we're not freeing, on disk. 1251c24b5dfaSDave Chinner * If it's a realtime file & can't use unwritten extents then we 1252c24b5dfaSDave Chinner * actually need to zero the extent edges. Otherwise xfs_bunmapi 1253c24b5dfaSDave Chinner * will take care of it for us. 1254c24b5dfaSDave Chinner */ 1255c24b5dfaSDave Chinner if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 1256c24b5dfaSDave Chinner nimap = 1; 1257c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, startoffset_fsb, 1, 1258c24b5dfaSDave Chinner &imap, &nimap, 0); 1259c24b5dfaSDave Chinner if (error) 12605f8aca8bSChristoph Hellwig goto out; 1261c24b5dfaSDave Chinner ASSERT(nimap == 0 || nimap == 1); 1262c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1263c24b5dfaSDave Chinner xfs_daddr_t block; 1264c24b5dfaSDave Chinner 1265c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1266c24b5dfaSDave Chinner block = imap.br_startblock; 1267c24b5dfaSDave Chinner mod = do_div(block, mp->m_sb.sb_rextsize); 1268c24b5dfaSDave Chinner if (mod) 1269c24b5dfaSDave Chinner startoffset_fsb += mp->m_sb.sb_rextsize - mod; 1270c24b5dfaSDave Chinner } 1271c24b5dfaSDave Chinner nimap = 1; 1272c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, 1273c24b5dfaSDave Chinner &imap, &nimap, 0); 1274c24b5dfaSDave Chinner if (error) 12755f8aca8bSChristoph Hellwig goto out; 1276c24b5dfaSDave Chinner ASSERT(nimap == 0 || nimap == 1); 1277c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1278c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1279c24b5dfaSDave Chinner mod++; 1280c24b5dfaSDave Chinner if (mod && (mod != mp->m_sb.sb_rextsize)) 1281c24b5dfaSDave Chinner endoffset_fsb -= mod; 1282c24b5dfaSDave Chinner } 1283c24b5dfaSDave Chinner } 1284c24b5dfaSDave Chinner if ((done = (endoffset_fsb <= startoffset_fsb))) 1285c24b5dfaSDave Chinner /* 1286c24b5dfaSDave Chinner * One contiguous piece to clear 1287c24b5dfaSDave Chinner */ 1288c24b5dfaSDave Chinner error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 1289c24b5dfaSDave Chinner else { 1290c24b5dfaSDave Chinner /* 1291c24b5dfaSDave Chinner * Some full blocks, possibly two pieces to clear 1292c24b5dfaSDave Chinner */ 1293c24b5dfaSDave Chinner if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 1294c24b5dfaSDave Chinner error = xfs_zero_remaining_bytes(ip, offset, 1295c24b5dfaSDave Chinner XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 1296c24b5dfaSDave Chinner if (!error && 1297c24b5dfaSDave Chinner XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 1298c24b5dfaSDave Chinner error = xfs_zero_remaining_bytes(ip, 1299c24b5dfaSDave Chinner XFS_FSB_TO_B(mp, endoffset_fsb), 1300c24b5dfaSDave Chinner offset + len - 1); 1301c24b5dfaSDave Chinner } 1302c24b5dfaSDave Chinner 1303c24b5dfaSDave Chinner /* 1304c24b5dfaSDave Chinner * free file space until done or until there is an error 1305c24b5dfaSDave Chinner */ 1306c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1307c24b5dfaSDave Chinner while (!error && !done) { 1308c24b5dfaSDave Chinner 1309c24b5dfaSDave Chinner /* 1310c24b5dfaSDave Chinner * allocate and setup the transaction. Allow this 1311c24b5dfaSDave Chinner * transaction to dip into the reserve blocks to ensure 1312c24b5dfaSDave Chinner * the freeing of the space succeeds at ENOSPC. 1313c24b5dfaSDave Chinner */ 1314c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 13153d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); 1316c24b5dfaSDave Chinner 1317c24b5dfaSDave Chinner /* 1318c24b5dfaSDave Chinner * check for running out of space 1319c24b5dfaSDave Chinner */ 1320c24b5dfaSDave Chinner if (error) { 1321c24b5dfaSDave Chinner /* 1322c24b5dfaSDave Chinner * Free the transaction structure. 1323c24b5dfaSDave Chinner */ 13242451337dSDave Chinner ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 13254906e215SChristoph Hellwig xfs_trans_cancel(tp); 1326c24b5dfaSDave Chinner break; 1327c24b5dfaSDave Chinner } 1328c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 1329c24b5dfaSDave Chinner error = xfs_trans_reserve_quota(tp, mp, 1330c24b5dfaSDave Chinner ip->i_udquot, ip->i_gdquot, ip->i_pdquot, 1331c24b5dfaSDave Chinner resblks, 0, XFS_QMOPT_RES_REGBLKS); 1332c24b5dfaSDave Chinner if (error) 1333c24b5dfaSDave Chinner goto error1; 1334c24b5dfaSDave Chinner 1335c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 1336c24b5dfaSDave Chinner 1337c24b5dfaSDave Chinner /* 1338c24b5dfaSDave Chinner * issue the bunmapi() call to free the blocks 1339c24b5dfaSDave Chinner */ 1340c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &firstfsb); 1341c24b5dfaSDave Chinner error = xfs_bunmapi(tp, ip, startoffset_fsb, 1342c24b5dfaSDave Chinner endoffset_fsb - startoffset_fsb, 1343c24b5dfaSDave Chinner 0, 2, &firstfsb, &free_list, &done); 1344f6106efaSEric Sandeen if (error) 1345c24b5dfaSDave Chinner goto error0; 1346c24b5dfaSDave Chinner 1347c24b5dfaSDave Chinner /* 1348c24b5dfaSDave Chinner * complete the transaction 1349c24b5dfaSDave Chinner */ 1350f6106efaSEric Sandeen error = xfs_bmap_finish(&tp, &free_list, NULL); 1351f6106efaSEric Sandeen if (error) 1352c24b5dfaSDave Chinner goto error0; 1353c24b5dfaSDave Chinner 135470393313SChristoph Hellwig error = xfs_trans_commit(tp); 1355c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1356c24b5dfaSDave Chinner } 1357c24b5dfaSDave Chinner 13585f8aca8bSChristoph Hellwig out: 1359c24b5dfaSDave Chinner return error; 1360c24b5dfaSDave Chinner 1361c24b5dfaSDave Chinner error0: 1362c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1363c24b5dfaSDave Chinner error1: 13644906e215SChristoph Hellwig xfs_trans_cancel(tp); 13655f8aca8bSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 13665f8aca8bSChristoph Hellwig goto out; 1367c24b5dfaSDave Chinner } 1368c24b5dfaSDave Chinner 13695d11fb4bSBrian Foster /* 13705d11fb4bSBrian Foster * Preallocate and zero a range of a file. This mechanism has the allocation 13715d11fb4bSBrian Foster * semantics of fallocate and in addition converts data in the range to zeroes. 13725d11fb4bSBrian Foster */ 1373865e9446SChristoph Hellwig int 1374c24b5dfaSDave Chinner xfs_zero_file_space( 1375c24b5dfaSDave Chinner struct xfs_inode *ip, 1376c24b5dfaSDave Chinner xfs_off_t offset, 13775f8aca8bSChristoph Hellwig xfs_off_t len) 1378c24b5dfaSDave Chinner { 1379c24b5dfaSDave Chinner struct xfs_mount *mp = ip->i_mount; 13805d11fb4bSBrian Foster uint blksize; 1381c24b5dfaSDave Chinner int error; 1382c24b5dfaSDave Chinner 1383897b73b6SDave Chinner trace_xfs_zero_file_space(ip); 1384897b73b6SDave Chinner 13855d11fb4bSBrian Foster blksize = 1 << mp->m_sb.sb_blocklog; 1386c24b5dfaSDave Chinner 1387c24b5dfaSDave Chinner /* 13885d11fb4bSBrian Foster * Punch a hole and prealloc the range. We use hole punch rather than 13895d11fb4bSBrian Foster * unwritten extent conversion for two reasons: 13905d11fb4bSBrian Foster * 13915d11fb4bSBrian Foster * 1.) Hole punch handles partial block zeroing for us. 13925d11fb4bSBrian Foster * 13935d11fb4bSBrian Foster * 2.) If prealloc returns ENOSPC, the file range is still zero-valued 13945d11fb4bSBrian Foster * by virtue of the hole punch. 1395c24b5dfaSDave Chinner */ 13965d11fb4bSBrian Foster error = xfs_free_file_space(ip, offset, len); 1397c24b5dfaSDave Chinner if (error) 13985f8aca8bSChristoph Hellwig goto out; 1399c24b5dfaSDave Chinner 14005d11fb4bSBrian Foster error = xfs_alloc_file_space(ip, round_down(offset, blksize), 14015d11fb4bSBrian Foster round_up(offset + len, blksize) - 14025d11fb4bSBrian Foster round_down(offset, blksize), 14035d11fb4bSBrian Foster XFS_BMAPI_PREALLOC); 14045f8aca8bSChristoph Hellwig out: 1405c24b5dfaSDave Chinner return error; 1406c24b5dfaSDave Chinner 1407c24b5dfaSDave Chinner } 1408c24b5dfaSDave Chinner 1409c24b5dfaSDave Chinner /* 1410a904b1caSNamjae Jeon * @next_fsb will keep track of the extent currently undergoing shift. 1411a904b1caSNamjae Jeon * @stop_fsb will keep track of the extent at which we have to stop. 1412a904b1caSNamjae Jeon * If we are shifting left, we will start with block (offset + len) and 1413a904b1caSNamjae Jeon * shift each extent till last extent. 1414a904b1caSNamjae Jeon * If we are shifting right, we will start with last extent inside file space 1415a904b1caSNamjae Jeon * and continue until we reach the block corresponding to offset. 1416e1d8fb88SNamjae Jeon */ 141772c1a739Skbuild test robot static int 1418a904b1caSNamjae Jeon xfs_shift_file_space( 1419e1d8fb88SNamjae Jeon struct xfs_inode *ip, 1420e1d8fb88SNamjae Jeon xfs_off_t offset, 1421a904b1caSNamjae Jeon xfs_off_t len, 1422a904b1caSNamjae Jeon enum shift_direction direction) 1423e1d8fb88SNamjae Jeon { 1424e1d8fb88SNamjae Jeon int done = 0; 1425e1d8fb88SNamjae Jeon struct xfs_mount *mp = ip->i_mount; 1426e1d8fb88SNamjae Jeon struct xfs_trans *tp; 1427e1d8fb88SNamjae Jeon int error; 1428e1d8fb88SNamjae Jeon struct xfs_bmap_free free_list; 1429e1d8fb88SNamjae Jeon xfs_fsblock_t first_block; 1430a904b1caSNamjae Jeon xfs_fileoff_t stop_fsb; 14312c845f5aSBrian Foster xfs_fileoff_t next_fsb; 1432e1d8fb88SNamjae Jeon xfs_fileoff_t shift_fsb; 1433e1d8fb88SNamjae Jeon 1434a904b1caSNamjae Jeon ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); 1435e1d8fb88SNamjae Jeon 1436a904b1caSNamjae Jeon if (direction == SHIFT_LEFT) { 14372c845f5aSBrian Foster next_fsb = XFS_B_TO_FSB(mp, offset + len); 1438a904b1caSNamjae Jeon stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); 1439a904b1caSNamjae Jeon } else { 1440a904b1caSNamjae Jeon /* 1441a904b1caSNamjae Jeon * If right shift, delegate the work of initialization of 1442a904b1caSNamjae Jeon * next_fsb to xfs_bmap_shift_extent as it has ilock held. 1443a904b1caSNamjae Jeon */ 1444a904b1caSNamjae Jeon next_fsb = NULLFSBLOCK; 1445a904b1caSNamjae Jeon stop_fsb = XFS_B_TO_FSB(mp, offset); 1446a904b1caSNamjae Jeon } 1447e1d8fb88SNamjae Jeon 1448a904b1caSNamjae Jeon shift_fsb = XFS_B_TO_FSB(mp, len); 1449f71721d0SBrian Foster 1450f71721d0SBrian Foster /* 1451f71721d0SBrian Foster * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1452f71721d0SBrian Foster * into the accessible region of the file. 1453f71721d0SBrian Foster */ 145441b9d726SBrian Foster if (xfs_can_free_eofblocks(ip, true)) { 145541b9d726SBrian Foster error = xfs_free_eofblocks(mp, ip, false); 145641b9d726SBrian Foster if (error) 145741b9d726SBrian Foster return error; 145841b9d726SBrian Foster } 14591669a8caSDave Chinner 1460f71721d0SBrian Foster /* 1461f71721d0SBrian Foster * Writeback and invalidate cache for the remainder of the file as we're 1462a904b1caSNamjae Jeon * about to shift down every extent from offset to EOF. 1463f71721d0SBrian Foster */ 1464f71721d0SBrian Foster error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1465a904b1caSNamjae Jeon offset, -1); 1466f71721d0SBrian Foster if (error) 1467f71721d0SBrian Foster return error; 1468f71721d0SBrian Foster error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 1469*09cbfeafSKirill A. Shutemov offset >> PAGE_SHIFT, -1); 1470e1d8fb88SNamjae Jeon if (error) 1471e1d8fb88SNamjae Jeon return error; 1472e1d8fb88SNamjae Jeon 1473a904b1caSNamjae Jeon /* 1474a904b1caSNamjae Jeon * The extent shiting code works on extent granularity. So, if 1475a904b1caSNamjae Jeon * stop_fsb is not the starting block of extent, we need to split 1476a904b1caSNamjae Jeon * the extent at stop_fsb. 1477a904b1caSNamjae Jeon */ 1478a904b1caSNamjae Jeon if (direction == SHIFT_RIGHT) { 1479a904b1caSNamjae Jeon error = xfs_bmap_split_extent(ip, stop_fsb); 1480a904b1caSNamjae Jeon if (error) 1481a904b1caSNamjae Jeon return error; 1482a904b1caSNamjae Jeon } 1483a904b1caSNamjae Jeon 1484e1d8fb88SNamjae Jeon while (!error && !done) { 1485e1d8fb88SNamjae Jeon tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1486e1d8fb88SNamjae Jeon /* 1487e1d8fb88SNamjae Jeon * We would need to reserve permanent block for transaction. 1488e1d8fb88SNamjae Jeon * This will come into picture when after shifting extent into 1489e1d8fb88SNamjae Jeon * hole we found that adjacent extents can be merged which 1490e1d8fb88SNamjae Jeon * may lead to freeing of a block during record update. 1491e1d8fb88SNamjae Jeon */ 1492e1d8fb88SNamjae Jeon error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 1493e1d8fb88SNamjae Jeon XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); 1494e1d8fb88SNamjae Jeon if (error) { 14954906e215SChristoph Hellwig xfs_trans_cancel(tp); 1496e1d8fb88SNamjae Jeon break; 1497e1d8fb88SNamjae Jeon } 1498e1d8fb88SNamjae Jeon 1499e1d8fb88SNamjae Jeon xfs_ilock(ip, XFS_ILOCK_EXCL); 1500e1d8fb88SNamjae Jeon error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 1501e1d8fb88SNamjae Jeon ip->i_gdquot, ip->i_pdquot, 1502e1d8fb88SNamjae Jeon XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 1503e1d8fb88SNamjae Jeon XFS_QMOPT_RES_REGBLKS); 1504e1d8fb88SNamjae Jeon if (error) 1505d4a97a04SBrian Foster goto out_trans_cancel; 1506e1d8fb88SNamjae Jeon 1507a904b1caSNamjae Jeon xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1508e1d8fb88SNamjae Jeon 1509e1d8fb88SNamjae Jeon xfs_bmap_init(&free_list, &first_block); 1510e1d8fb88SNamjae Jeon 1511e1d8fb88SNamjae Jeon /* 1512e1d8fb88SNamjae Jeon * We are using the write transaction in which max 2 bmbt 1513e1d8fb88SNamjae Jeon * updates are allowed 1514e1d8fb88SNamjae Jeon */ 1515a904b1caSNamjae Jeon error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 1516a904b1caSNamjae Jeon &done, stop_fsb, &first_block, &free_list, 1517a904b1caSNamjae Jeon direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1518e1d8fb88SNamjae Jeon if (error) 1519d4a97a04SBrian Foster goto out_bmap_cancel; 1520e1d8fb88SNamjae Jeon 1521f6106efaSEric Sandeen error = xfs_bmap_finish(&tp, &free_list, NULL); 1522e1d8fb88SNamjae Jeon if (error) 1523d4a97a04SBrian Foster goto out_bmap_cancel; 1524e1d8fb88SNamjae Jeon 152570393313SChristoph Hellwig error = xfs_trans_commit(tp); 1526e1d8fb88SNamjae Jeon } 1527e1d8fb88SNamjae Jeon 1528e1d8fb88SNamjae Jeon return error; 1529e1d8fb88SNamjae Jeon 1530d4a97a04SBrian Foster out_bmap_cancel: 1531d4a97a04SBrian Foster xfs_bmap_cancel(&free_list); 1532d4a97a04SBrian Foster out_trans_cancel: 15334906e215SChristoph Hellwig xfs_trans_cancel(tp); 1534e1d8fb88SNamjae Jeon return error; 1535e1d8fb88SNamjae Jeon } 1536e1d8fb88SNamjae Jeon 1537e1d8fb88SNamjae Jeon /* 1538a904b1caSNamjae Jeon * xfs_collapse_file_space() 1539a904b1caSNamjae Jeon * This routine frees disk space and shift extent for the given file. 1540a904b1caSNamjae Jeon * The first thing we do is to free data blocks in the specified range 1541a904b1caSNamjae Jeon * by calling xfs_free_file_space(). It would also sync dirty data 1542a904b1caSNamjae Jeon * and invalidate page cache over the region on which collapse range 1543a904b1caSNamjae Jeon * is working. And Shift extent records to the left to cover a hole. 1544a904b1caSNamjae Jeon * RETURNS: 1545a904b1caSNamjae Jeon * 0 on success 1546a904b1caSNamjae Jeon * errno on error 1547a904b1caSNamjae Jeon * 1548a904b1caSNamjae Jeon */ 1549a904b1caSNamjae Jeon int 1550a904b1caSNamjae Jeon xfs_collapse_file_space( 1551a904b1caSNamjae Jeon struct xfs_inode *ip, 1552a904b1caSNamjae Jeon xfs_off_t offset, 1553a904b1caSNamjae Jeon xfs_off_t len) 1554a904b1caSNamjae Jeon { 1555a904b1caSNamjae Jeon int error; 1556a904b1caSNamjae Jeon 1557a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1558a904b1caSNamjae Jeon trace_xfs_collapse_file_space(ip); 1559a904b1caSNamjae Jeon 1560a904b1caSNamjae Jeon error = xfs_free_file_space(ip, offset, len); 1561a904b1caSNamjae Jeon if (error) 1562a904b1caSNamjae Jeon return error; 1563a904b1caSNamjae Jeon 1564a904b1caSNamjae Jeon return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT); 1565a904b1caSNamjae Jeon } 1566a904b1caSNamjae Jeon 1567a904b1caSNamjae Jeon /* 1568a904b1caSNamjae Jeon * xfs_insert_file_space() 1569a904b1caSNamjae Jeon * This routine create hole space by shifting extents for the given file. 1570a904b1caSNamjae Jeon * The first thing we do is to sync dirty data and invalidate page cache 1571a904b1caSNamjae Jeon * over the region on which insert range is working. And split an extent 1572a904b1caSNamjae Jeon * to two extents at given offset by calling xfs_bmap_split_extent. 1573a904b1caSNamjae Jeon * And shift all extent records which are laying between [offset, 1574a904b1caSNamjae Jeon * last allocated extent] to the right to reserve hole range. 1575a904b1caSNamjae Jeon * RETURNS: 1576a904b1caSNamjae Jeon * 0 on success 1577a904b1caSNamjae Jeon * errno on error 1578a904b1caSNamjae Jeon */ 1579a904b1caSNamjae Jeon int 1580a904b1caSNamjae Jeon xfs_insert_file_space( 1581a904b1caSNamjae Jeon struct xfs_inode *ip, 1582a904b1caSNamjae Jeon loff_t offset, 1583a904b1caSNamjae Jeon loff_t len) 1584a904b1caSNamjae Jeon { 1585a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1586a904b1caSNamjae Jeon trace_xfs_insert_file_space(ip); 1587a904b1caSNamjae Jeon 1588a904b1caSNamjae Jeon return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT); 1589a904b1caSNamjae Jeon } 1590a904b1caSNamjae Jeon 1591a904b1caSNamjae Jeon /* 1592a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is 1593a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with 1594a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1595a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get 1596a133d952SDave Chinner * invalid formats on the target inode. 1597a133d952SDave Chinner * 1598a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has 1599a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a 1600a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just 1601a133d952SDave Chinner * blindly swap data forks on attr2 filesystems. 1602a133d952SDave Chinner * 1603a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with 1604a133d952SDave Chinner * a corrupt temporary inode, either. 1605a133d952SDave Chinner * 1606a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source 1607a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is 1608a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on 1609a133d952SDave Chinner * userspace to get this right. 1610a133d952SDave Chinner */ 1611a133d952SDave Chinner static int 1612a133d952SDave Chinner xfs_swap_extents_check_format( 1613a133d952SDave Chinner xfs_inode_t *ip, /* target inode */ 1614a133d952SDave Chinner xfs_inode_t *tip) /* tmp inode */ 1615a133d952SDave Chinner { 1616a133d952SDave Chinner 1617a133d952SDave Chinner /* Should never get a local format */ 1618a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1619a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 16202451337dSDave Chinner return -EINVAL; 1621a133d952SDave Chinner 1622a133d952SDave Chinner /* 1623a133d952SDave Chinner * if the target inode has less extents that then temporary inode then 1624a133d952SDave Chinner * why did userspace call us? 1625a133d952SDave Chinner */ 1626a133d952SDave Chinner if (ip->i_d.di_nextents < tip->i_d.di_nextents) 16272451337dSDave Chinner return -EINVAL; 1628a133d952SDave Chinner 1629a133d952SDave Chinner /* 1630a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree 1631a133d952SDave Chinner * form then we will end up with the target inode in the wrong format 1632a133d952SDave Chinner * as we already know there are less extents in the temp inode. 1633a133d952SDave Chinner */ 1634a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1635a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 16362451337dSDave Chinner return -EINVAL; 1637a133d952SDave Chinner 1638a133d952SDave Chinner /* Check temp in extent form to max in target */ 1639a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1640a133d952SDave Chinner XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1641a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 16422451337dSDave Chinner return -EINVAL; 1643a133d952SDave Chinner 1644a133d952SDave Chinner /* Check target in extent form to max in temp */ 1645a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1646a133d952SDave Chinner XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1647a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 16482451337dSDave Chinner return -EINVAL; 1649a133d952SDave Chinner 1650a133d952SDave Chinner /* 1651a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit 1652a133d952SDave Chinner * in the target and that it has enough extents to be in btree format 1653a133d952SDave Chinner * in the target. 1654a133d952SDave Chinner * 1655a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions 1656a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in 1657a133d952SDave Chinner * extent format... 1658a133d952SDave Chinner */ 1659a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1660a133d952SDave Chinner if (XFS_IFORK_BOFF(ip) && 1661a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 16622451337dSDave Chinner return -EINVAL; 1663a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1664a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 16652451337dSDave Chinner return -EINVAL; 1666a133d952SDave Chinner } 1667a133d952SDave Chinner 1668a133d952SDave Chinner /* Reciprocal target->temp btree format checks */ 1669a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1670a133d952SDave Chinner if (XFS_IFORK_BOFF(tip) && 1671a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 16722451337dSDave Chinner return -EINVAL; 1673a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1674a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 16752451337dSDave Chinner return -EINVAL; 1676a133d952SDave Chinner } 1677a133d952SDave Chinner 1678a133d952SDave Chinner return 0; 1679a133d952SDave Chinner } 1680a133d952SDave Chinner 16817abbb8f9SDave Chinner static int 16824ef897a2SDave Chinner xfs_swap_extent_flush( 16834ef897a2SDave Chinner struct xfs_inode *ip) 16844ef897a2SDave Chinner { 16854ef897a2SDave Chinner int error; 16864ef897a2SDave Chinner 16874ef897a2SDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 16884ef897a2SDave Chinner if (error) 16894ef897a2SDave Chinner return error; 16904ef897a2SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1); 16914ef897a2SDave Chinner 16924ef897a2SDave Chinner /* Verify O_DIRECT for ftmp */ 16934ef897a2SDave Chinner if (VFS_I(ip)->i_mapping->nrpages) 16944ef897a2SDave Chinner return -EINVAL; 16954ef897a2SDave Chinner return 0; 16964ef897a2SDave Chinner } 16974ef897a2SDave Chinner 16984ef897a2SDave Chinner int 1699a133d952SDave Chinner xfs_swap_extents( 1700a133d952SDave Chinner xfs_inode_t *ip, /* target inode */ 1701a133d952SDave Chinner xfs_inode_t *tip, /* tmp inode */ 1702a133d952SDave Chinner xfs_swapext_t *sxp) 1703a133d952SDave Chinner { 1704a133d952SDave Chinner xfs_mount_t *mp = ip->i_mount; 1705a133d952SDave Chinner xfs_trans_t *tp; 1706a133d952SDave Chinner xfs_bstat_t *sbp = &sxp->sx_stat; 1707a133d952SDave Chinner xfs_ifork_t *tempifp, *ifp, *tifp; 1708a133d952SDave Chinner int src_log_flags, target_log_flags; 1709a133d952SDave Chinner int error = 0; 1710a133d952SDave Chinner int aforkblks = 0; 1711a133d952SDave Chinner int taforkblks = 0; 1712a133d952SDave Chinner __uint64_t tmp; 171381217683SDave Chinner int lock_flags; 1714a133d952SDave Chinner 1715a133d952SDave Chinner tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 1716a133d952SDave Chinner if (!tempifp) { 17172451337dSDave Chinner error = -ENOMEM; 1718a133d952SDave Chinner goto out; 1719a133d952SDave Chinner } 1720a133d952SDave Chinner 1721a133d952SDave Chinner /* 1722723cac48SDave Chinner * Lock the inodes against other IO, page faults and truncate to 1723723cac48SDave Chinner * begin with. Then we can ensure the inodes are flushed and have no 1724723cac48SDave Chinner * page cache safely. Once we have done this we can take the ilocks and 1725723cac48SDave Chinner * do the rest of the checks. 1726a133d952SDave Chinner */ 1727723cac48SDave Chinner lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 1728a133d952SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1729723cac48SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); 1730a133d952SDave Chinner 1731a133d952SDave Chinner /* Verify that both files have the same format */ 1732c19b3b05SDave Chinner if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { 17332451337dSDave Chinner error = -EINVAL; 1734a133d952SDave Chinner goto out_unlock; 1735a133d952SDave Chinner } 1736a133d952SDave Chinner 1737a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */ 1738a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 17392451337dSDave Chinner error = -EINVAL; 1740a133d952SDave Chinner goto out_unlock; 1741a133d952SDave Chinner } 1742a133d952SDave Chinner 17434ef897a2SDave Chinner error = xfs_swap_extent_flush(ip); 1744a133d952SDave Chinner if (error) 1745a133d952SDave Chinner goto out_unlock; 17464ef897a2SDave Chinner error = xfs_swap_extent_flush(tip); 17474ef897a2SDave Chinner if (error) 17484ef897a2SDave Chinner goto out_unlock; 1749a133d952SDave Chinner 17504ef897a2SDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); 17514ef897a2SDave Chinner error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 17524ef897a2SDave Chinner if (error) { 17534906e215SChristoph Hellwig xfs_trans_cancel(tp); 1754a133d952SDave Chinner goto out_unlock; 1755a133d952SDave Chinner } 1756723cac48SDave Chinner 1757723cac48SDave Chinner /* 1758723cac48SDave Chinner * Lock and join the inodes to the tansaction so that transaction commit 1759723cac48SDave Chinner * or cancel will unlock the inodes from this point onwards. 1760723cac48SDave Chinner */ 17614ef897a2SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 17624ef897a2SDave Chinner lock_flags |= XFS_ILOCK_EXCL; 1763723cac48SDave Chinner xfs_trans_ijoin(tp, ip, lock_flags); 1764723cac48SDave Chinner xfs_trans_ijoin(tp, tip, lock_flags); 1765723cac48SDave Chinner 1766a133d952SDave Chinner 1767a133d952SDave Chinner /* Verify all data are being swapped */ 1768a133d952SDave Chinner if (sxp->sx_offset != 0 || 1769a133d952SDave Chinner sxp->sx_length != ip->i_d.di_size || 1770a133d952SDave Chinner sxp->sx_length != tip->i_d.di_size) { 17712451337dSDave Chinner error = -EFAULT; 17724ef897a2SDave Chinner goto out_trans_cancel; 1773a133d952SDave Chinner } 1774a133d952SDave Chinner 1775a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0); 1776a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1); 1777a133d952SDave Chinner 1778a133d952SDave Chinner /* check inode formats now that data is flushed */ 1779a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip); 1780a133d952SDave Chinner if (error) { 1781a133d952SDave Chinner xfs_notice(mp, 1782a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.", 1783a133d952SDave Chinner __func__, ip->i_ino); 17844ef897a2SDave Chinner goto out_trans_cancel; 1785a133d952SDave Chinner } 1786a133d952SDave Chinner 1787a133d952SDave Chinner /* 1788a133d952SDave Chinner * Compare the current change & modify times with that 1789a133d952SDave Chinner * passed in. If they differ, we abort this swap. 1790a133d952SDave Chinner * This is the mechanism used to ensure the calling 1791a133d952SDave Chinner * process that the file was not changed out from 1792a133d952SDave Chinner * under it. 1793a133d952SDave Chinner */ 1794a133d952SDave Chinner if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || 1795a133d952SDave Chinner (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 1796a133d952SDave Chinner (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 1797a133d952SDave Chinner (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 17982451337dSDave Chinner error = -EBUSY; 179981217683SDave Chinner goto out_trans_cancel; 1800a133d952SDave Chinner } 1801a133d952SDave Chinner /* 1802a133d952SDave Chinner * Count the number of extended attribute blocks 1803a133d952SDave Chinner */ 1804a133d952SDave Chinner if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 1805a133d952SDave Chinner (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1806a133d952SDave Chinner error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); 1807a133d952SDave Chinner if (error) 1808a133d952SDave Chinner goto out_trans_cancel; 1809a133d952SDave Chinner } 1810a133d952SDave Chinner if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 1811a133d952SDave Chinner (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1812a133d952SDave Chinner error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, 1813a133d952SDave Chinner &taforkblks); 1814a133d952SDave Chinner if (error) 1815a133d952SDave Chinner goto out_trans_cancel; 1816a133d952SDave Chinner } 1817a133d952SDave Chinner 181821b5c978SDave Chinner /* 181921b5c978SDave Chinner * Before we've swapped the forks, lets set the owners of the forks 182021b5c978SDave Chinner * appropriately. We have to do this as we are demand paging the btree 182121b5c978SDave Chinner * buffers, and so the validation done on read will expect the owner 182221b5c978SDave Chinner * field to be correctly set. Once we change the owners, we can swap the 182321b5c978SDave Chinner * inode forks. 182421b5c978SDave Chinner * 182521b5c978SDave Chinner * Note the trickiness in setting the log flags - we set the owner log 182621b5c978SDave Chinner * flag on the opposite inode (i.e. the inode we are setting the new 182721b5c978SDave Chinner * owner to be) because once we swap the forks and log that, log 182821b5c978SDave Chinner * recovery is going to see the fork as owned by the swapped inode, 182921b5c978SDave Chinner * not the pre-swapped inodes. 183021b5c978SDave Chinner */ 183121b5c978SDave Chinner src_log_flags = XFS_ILOG_CORE; 183221b5c978SDave Chinner target_log_flags = XFS_ILOG_CORE; 183321b5c978SDave Chinner if (ip->i_d.di_version == 3 && 183421b5c978SDave Chinner ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1835638f4416SDave Chinner target_log_flags |= XFS_ILOG_DOWNER; 1836638f4416SDave Chinner error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, 1837638f4416SDave Chinner tip->i_ino, NULL); 183821b5c978SDave Chinner if (error) 183921b5c978SDave Chinner goto out_trans_cancel; 184021b5c978SDave Chinner } 184121b5c978SDave Chinner 184221b5c978SDave Chinner if (tip->i_d.di_version == 3 && 184321b5c978SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1844638f4416SDave Chinner src_log_flags |= XFS_ILOG_DOWNER; 1845638f4416SDave Chinner error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, 1846638f4416SDave Chinner ip->i_ino, NULL); 184721b5c978SDave Chinner if (error) 184821b5c978SDave Chinner goto out_trans_cancel; 184921b5c978SDave Chinner } 185021b5c978SDave Chinner 1851a133d952SDave Chinner /* 1852a133d952SDave Chinner * Swap the data forks of the inodes 1853a133d952SDave Chinner */ 1854a133d952SDave Chinner ifp = &ip->i_df; 1855a133d952SDave Chinner tifp = &tip->i_df; 1856a133d952SDave Chinner *tempifp = *ifp; /* struct copy */ 1857a133d952SDave Chinner *ifp = *tifp; /* struct copy */ 1858a133d952SDave Chinner *tifp = *tempifp; /* struct copy */ 1859a133d952SDave Chinner 1860a133d952SDave Chinner /* 1861a133d952SDave Chinner * Fix the on-disk inode values 1862a133d952SDave Chinner */ 1863a133d952SDave Chinner tmp = (__uint64_t)ip->i_d.di_nblocks; 1864a133d952SDave Chinner ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 1865a133d952SDave Chinner tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 1866a133d952SDave Chinner 1867a133d952SDave Chinner tmp = (__uint64_t) ip->i_d.di_nextents; 1868a133d952SDave Chinner ip->i_d.di_nextents = tip->i_d.di_nextents; 1869a133d952SDave Chinner tip->i_d.di_nextents = tmp; 1870a133d952SDave Chinner 1871a133d952SDave Chinner tmp = (__uint64_t) ip->i_d.di_format; 1872a133d952SDave Chinner ip->i_d.di_format = tip->i_d.di_format; 1873a133d952SDave Chinner tip->i_d.di_format = tmp; 1874a133d952SDave Chinner 1875a133d952SDave Chinner /* 1876a133d952SDave Chinner * The extents in the source inode could still contain speculative 1877a133d952SDave Chinner * preallocation beyond EOF (e.g. the file is open but not modified 1878a133d952SDave Chinner * while defrag is in progress). In that case, we need to copy over the 1879a133d952SDave Chinner * number of delalloc blocks the data fork in the source inode is 1880a133d952SDave Chinner * tracking beyond EOF so that when the fork is truncated away when the 1881a133d952SDave Chinner * temporary inode is unlinked we don't underrun the i_delayed_blks 1882a133d952SDave Chinner * counter on that inode. 1883a133d952SDave Chinner */ 1884a133d952SDave Chinner ASSERT(tip->i_delayed_blks == 0); 1885a133d952SDave Chinner tip->i_delayed_blks = ip->i_delayed_blks; 1886a133d952SDave Chinner ip->i_delayed_blks = 0; 1887a133d952SDave Chinner 1888a133d952SDave Chinner switch (ip->i_d.di_format) { 1889a133d952SDave Chinner case XFS_DINODE_FMT_EXTENTS: 1890a133d952SDave Chinner /* If the extents fit in the inode, fix the 1891a133d952SDave Chinner * pointer. Otherwise it's already NULL or 1892a133d952SDave Chinner * pointing to the extent. 1893a133d952SDave Chinner */ 1894a133d952SDave Chinner if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1895a133d952SDave Chinner ifp->if_u1.if_extents = 1896a133d952SDave Chinner ifp->if_u2.if_inline_ext; 1897a133d952SDave Chinner } 1898a133d952SDave Chinner src_log_flags |= XFS_ILOG_DEXT; 1899a133d952SDave Chinner break; 1900a133d952SDave Chinner case XFS_DINODE_FMT_BTREE: 190121b5c978SDave Chinner ASSERT(ip->i_d.di_version < 3 || 1902638f4416SDave Chinner (src_log_flags & XFS_ILOG_DOWNER)); 1903a133d952SDave Chinner src_log_flags |= XFS_ILOG_DBROOT; 1904a133d952SDave Chinner break; 1905a133d952SDave Chinner } 1906a133d952SDave Chinner 1907a133d952SDave Chinner switch (tip->i_d.di_format) { 1908a133d952SDave Chinner case XFS_DINODE_FMT_EXTENTS: 1909a133d952SDave Chinner /* If the extents fit in the inode, fix the 1910a133d952SDave Chinner * pointer. Otherwise it's already NULL or 1911a133d952SDave Chinner * pointing to the extent. 1912a133d952SDave Chinner */ 1913a133d952SDave Chinner if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1914a133d952SDave Chinner tifp->if_u1.if_extents = 1915a133d952SDave Chinner tifp->if_u2.if_inline_ext; 1916a133d952SDave Chinner } 1917a133d952SDave Chinner target_log_flags |= XFS_ILOG_DEXT; 1918a133d952SDave Chinner break; 1919a133d952SDave Chinner case XFS_DINODE_FMT_BTREE: 1920a133d952SDave Chinner target_log_flags |= XFS_ILOG_DBROOT; 192121b5c978SDave Chinner ASSERT(tip->i_d.di_version < 3 || 1922638f4416SDave Chinner (target_log_flags & XFS_ILOG_DOWNER)); 1923a133d952SDave Chinner break; 1924a133d952SDave Chinner } 1925a133d952SDave Chinner 1926a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags); 1927a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags); 1928a133d952SDave Chinner 1929a133d952SDave Chinner /* 1930a133d952SDave Chinner * If this is a synchronous mount, make sure that the 1931a133d952SDave Chinner * transaction goes to disk before returning to the user. 1932a133d952SDave Chinner */ 1933a133d952SDave Chinner if (mp->m_flags & XFS_MOUNT_WSYNC) 1934a133d952SDave Chinner xfs_trans_set_sync(tp); 1935a133d952SDave Chinner 193670393313SChristoph Hellwig error = xfs_trans_commit(tp); 1937a133d952SDave Chinner 1938a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0); 1939a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1); 1940a133d952SDave Chinner out: 1941a133d952SDave Chinner kmem_free(tempifp); 1942a133d952SDave Chinner return error; 1943a133d952SDave Chinner 1944a133d952SDave Chinner out_unlock: 194581217683SDave Chinner xfs_iunlock(ip, lock_flags); 194681217683SDave Chinner xfs_iunlock(tip, lock_flags); 1947a133d952SDave Chinner goto out; 1948a133d952SDave Chinner 1949a133d952SDave Chinner out_trans_cancel: 19504906e215SChristoph Hellwig xfs_trans_cancel(tp); 1951723cac48SDave Chinner goto out; 1952a133d952SDave Chinner } 1953