168988114SDave Chinner /* 268988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc. 468988114SDave Chinner * All Rights Reserved. 568988114SDave Chinner * 668988114SDave Chinner * This program is free software; you can redistribute it and/or 768988114SDave Chinner * modify it under the terms of the GNU General Public License as 868988114SDave Chinner * published by the Free Software Foundation. 968988114SDave Chinner * 1068988114SDave Chinner * This program is distributed in the hope that it would be useful, 1168988114SDave Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 1268988114SDave Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1368988114SDave Chinner * GNU General Public License for more details. 1468988114SDave Chinner * 1568988114SDave Chinner * You should have received a copy of the GNU General Public License 1668988114SDave Chinner * along with this program; if not, write the Free Software Foundation, 1768988114SDave Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 1868988114SDave Chinner */ 1968988114SDave Chinner #include "xfs.h" 2068988114SDave Chinner #include "xfs_fs.h" 2170a9883cSDave Chinner #include "xfs_shared.h" 22239880efSDave Chinner #include "xfs_format.h" 23239880efSDave Chinner #include "xfs_log_format.h" 24239880efSDave Chinner #include "xfs_trans_resv.h" 2568988114SDave Chinner #include "xfs_bit.h" 2668988114SDave Chinner #include "xfs_mount.h" 2757062787SDave Chinner #include "xfs_da_format.h" 283ab78df2SDarrick J. Wong #include "xfs_defer.h" 2968988114SDave Chinner #include "xfs_inode.h" 3068988114SDave Chinner #include "xfs_btree.h" 31239880efSDave Chinner #include "xfs_trans.h" 3268988114SDave Chinner #include "xfs_extfree_item.h" 3368988114SDave Chinner #include "xfs_alloc.h" 3468988114SDave Chinner #include "xfs_bmap.h" 3568988114SDave Chinner #include "xfs_bmap_util.h" 36a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 3768988114SDave Chinner #include "xfs_rtalloc.h" 3868988114SDave Chinner #include "xfs_error.h" 3968988114SDave Chinner #include "xfs_quota.h" 4068988114SDave Chinner #include "xfs_trans_space.h" 4168988114SDave Chinner #include "xfs_trace.h" 42c24b5dfaSDave Chinner #include "xfs_icache.h" 43239880efSDave Chinner #include "xfs_log.h" 44*9c194644SDarrick J. Wong #include "xfs_rmap_btree.h" 4568988114SDave Chinner 4668988114SDave Chinner /* Kernel only BMAP related definitions and functions */ 4768988114SDave Chinner 4868988114SDave Chinner /* 4968988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it 5068988114SDave Chinner * differently based on whether the file is a real time file or not, because the 5168988114SDave Chinner * bmap code does. 5268988114SDave Chinner */ 5368988114SDave Chinner xfs_daddr_t 5468988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) 5568988114SDave Chinner { 5668988114SDave Chinner return (XFS_IS_REALTIME_INODE(ip) ? \ 5768988114SDave Chinner (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ 5868988114SDave Chinner XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); 5968988114SDave Chinner } 6068988114SDave Chinner 6168988114SDave Chinner /* 623fbbbea3SDave Chinner * Routine to zero an extent on disk allocated to the specific inode. 633fbbbea3SDave Chinner * 643fbbbea3SDave Chinner * The VFS functions take a linearised filesystem block offset, so we have to 653fbbbea3SDave Chinner * convert the sparse xfs fsb to the right format first. 663fbbbea3SDave Chinner * VFS types are real funky, too. 673fbbbea3SDave Chinner */ 683fbbbea3SDave Chinner int 693fbbbea3SDave Chinner xfs_zero_extent( 703fbbbea3SDave Chinner struct xfs_inode *ip, 713fbbbea3SDave Chinner xfs_fsblock_t start_fsb, 723fbbbea3SDave Chinner xfs_off_t count_fsb) 733fbbbea3SDave Chinner { 743fbbbea3SDave Chinner struct xfs_mount *mp = ip->i_mount; 753fbbbea3SDave Chinner xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); 763fbbbea3SDave Chinner sector_t block = XFS_BB_TO_FSBT(mp, sector); 773fbbbea3SDave Chinner 783dc29161SMatthew Wilcox return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), 793dc29161SMatthew Wilcox block << (mp->m_super->s_blocksize_bits - 9), 803dc29161SMatthew Wilcox count_fsb << (mp->m_super->s_blocksize_bits - 9), 813dc29161SMatthew Wilcox GFP_NOFS, true); 823fbbbea3SDave Chinner } 833fbbbea3SDave Chinner 8468988114SDave Chinner int 8568988114SDave Chinner xfs_bmap_rtalloc( 8668988114SDave Chinner struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 8768988114SDave Chinner { 8868988114SDave Chinner xfs_alloctype_t atype = 0; /* type for allocation routines */ 8968988114SDave Chinner int error; /* error return value */ 9068988114SDave Chinner xfs_mount_t *mp; /* mount point structure */ 9168988114SDave Chinner xfs_extlen_t prod = 0; /* product factor for allocators */ 9268988114SDave Chinner xfs_extlen_t ralen = 0; /* realtime allocation length */ 9368988114SDave Chinner xfs_extlen_t align; /* minimum allocation alignment */ 9468988114SDave Chinner xfs_rtblock_t rtb; 9568988114SDave Chinner 9668988114SDave Chinner mp = ap->ip->i_mount; 9768988114SDave Chinner align = xfs_get_extsz_hint(ap->ip); 9868988114SDave Chinner prod = align / mp->m_sb.sb_rextsize; 9968988114SDave Chinner error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 10068988114SDave Chinner align, 1, ap->eof, 0, 10168988114SDave Chinner ap->conv, &ap->offset, &ap->length); 10268988114SDave Chinner if (error) 10368988114SDave Chinner return error; 10468988114SDave Chinner ASSERT(ap->length); 10568988114SDave Chinner ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); 10668988114SDave Chinner 10768988114SDave Chinner /* 10868988114SDave Chinner * If the offset & length are not perfectly aligned 10968988114SDave Chinner * then kill prod, it will just get us in trouble. 11068988114SDave Chinner */ 11168988114SDave Chinner if (do_mod(ap->offset, align) || ap->length % align) 11268988114SDave Chinner prod = 1; 11368988114SDave Chinner /* 11468988114SDave Chinner * Set ralen to be the actual requested length in rtextents. 11568988114SDave Chinner */ 11668988114SDave Chinner ralen = ap->length / mp->m_sb.sb_rextsize; 11768988114SDave Chinner /* 11868988114SDave Chinner * If the old value was close enough to MAXEXTLEN that 11968988114SDave Chinner * we rounded up to it, cut it back so it's valid again. 12068988114SDave Chinner * Note that if it's a really large request (bigger than 12168988114SDave Chinner * MAXEXTLEN), we don't hear about that number, and can't 12268988114SDave Chinner * adjust the starting point to match it. 12368988114SDave Chinner */ 12468988114SDave Chinner if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 12568988114SDave Chinner ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 12668988114SDave Chinner 12768988114SDave Chinner /* 1284b680afbSDave Chinner * Lock out modifications to both the RT bitmap and summary inodes 12968988114SDave Chinner */ 130f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 13168988114SDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 132f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 1334b680afbSDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 13468988114SDave Chinner 13568988114SDave Chinner /* 13668988114SDave Chinner * If it's an allocation to an empty file at offset 0, 13768988114SDave Chinner * pick an extent that will space things out in the rt area. 13868988114SDave Chinner */ 13968988114SDave Chinner if (ap->eof && ap->offset == 0) { 14068988114SDave Chinner xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ 14168988114SDave Chinner 14268988114SDave Chinner error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 14368988114SDave Chinner if (error) 14468988114SDave Chinner return error; 14568988114SDave Chinner ap->blkno = rtx * mp->m_sb.sb_rextsize; 14668988114SDave Chinner } else { 14768988114SDave Chinner ap->blkno = 0; 14868988114SDave Chinner } 14968988114SDave Chinner 15068988114SDave Chinner xfs_bmap_adjacent(ap); 15168988114SDave Chinner 15268988114SDave Chinner /* 15368988114SDave Chinner * Realtime allocation, done through xfs_rtallocate_extent. 15468988114SDave Chinner */ 15568988114SDave Chinner atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; 15668988114SDave Chinner do_div(ap->blkno, mp->m_sb.sb_rextsize); 15768988114SDave Chinner rtb = ap->blkno; 15868988114SDave Chinner ap->length = ralen; 15968988114SDave Chinner if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, 16068988114SDave Chinner &ralen, atype, ap->wasdel, prod, &rtb))) 16168988114SDave Chinner return error; 16268988114SDave Chinner if (rtb == NULLFSBLOCK && prod > 1 && 16368988114SDave Chinner (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, 16468988114SDave Chinner ap->length, &ralen, atype, 16568988114SDave Chinner ap->wasdel, 1, &rtb))) 16668988114SDave Chinner return error; 16768988114SDave Chinner ap->blkno = rtb; 16868988114SDave Chinner if (ap->blkno != NULLFSBLOCK) { 16968988114SDave Chinner ap->blkno *= mp->m_sb.sb_rextsize; 17068988114SDave Chinner ralen *= mp->m_sb.sb_rextsize; 17168988114SDave Chinner ap->length = ralen; 17268988114SDave Chinner ap->ip->i_d.di_nblocks += ralen; 17368988114SDave Chinner xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 17468988114SDave Chinner if (ap->wasdel) 17568988114SDave Chinner ap->ip->i_delayed_blks -= ralen; 17668988114SDave Chinner /* 17768988114SDave Chinner * Adjust the disk quota also. This was reserved 17868988114SDave Chinner * earlier. 17968988114SDave Chinner */ 18068988114SDave Chinner xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 18168988114SDave Chinner ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 18268988114SDave Chinner XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 1833fbbbea3SDave Chinner 1843fbbbea3SDave Chinner /* Zero the extent if we were asked to do so */ 1853fbbbea3SDave Chinner if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) { 1863fbbbea3SDave Chinner error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); 1873fbbbea3SDave Chinner if (error) 1883fbbbea3SDave Chinner return error; 1893fbbbea3SDave Chinner } 19068988114SDave Chinner } else { 19168988114SDave Chinner ap->length = 0; 19268988114SDave Chinner } 19368988114SDave Chinner return 0; 19468988114SDave Chinner } 19568988114SDave Chinner 19668988114SDave Chinner /* 19768988114SDave Chinner * Check if the endoff is outside the last extent. If so the caller will grow 19868988114SDave Chinner * the allocation to a stripe unit boundary. All offsets are considered outside 19968988114SDave Chinner * the end of file for an empty fork, so 1 is returned in *eof in that case. 20068988114SDave Chinner */ 20168988114SDave Chinner int 20268988114SDave Chinner xfs_bmap_eof( 20368988114SDave Chinner struct xfs_inode *ip, 20468988114SDave Chinner xfs_fileoff_t endoff, 20568988114SDave Chinner int whichfork, 20668988114SDave Chinner int *eof) 20768988114SDave Chinner { 20868988114SDave Chinner struct xfs_bmbt_irec rec; 20968988114SDave Chinner int error; 21068988114SDave Chinner 21168988114SDave Chinner error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); 21268988114SDave Chinner if (error || *eof) 21368988114SDave Chinner return error; 21468988114SDave Chinner 21568988114SDave Chinner *eof = endoff >= rec.br_startoff + rec.br_blockcount; 21668988114SDave Chinner return 0; 21768988114SDave Chinner } 21868988114SDave Chinner 21968988114SDave Chinner /* 22068988114SDave Chinner * Extent tree block counting routines. 22168988114SDave Chinner */ 22268988114SDave Chinner 22368988114SDave Chinner /* 22468988114SDave Chinner * Count leaf blocks given a range of extent records. 22568988114SDave Chinner */ 22668988114SDave Chinner STATIC void 22768988114SDave Chinner xfs_bmap_count_leaves( 22868988114SDave Chinner xfs_ifork_t *ifp, 22968988114SDave Chinner xfs_extnum_t idx, 23068988114SDave Chinner int numrecs, 23168988114SDave Chinner int *count) 23268988114SDave Chinner { 23368988114SDave Chinner int b; 23468988114SDave Chinner 23568988114SDave Chinner for (b = 0; b < numrecs; b++) { 23668988114SDave Chinner xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); 23768988114SDave Chinner *count += xfs_bmbt_get_blockcount(frp); 23868988114SDave Chinner } 23968988114SDave Chinner } 24068988114SDave Chinner 24168988114SDave Chinner /* 24268988114SDave Chinner * Count leaf blocks given a range of extent records originally 24368988114SDave Chinner * in btree format. 24468988114SDave Chinner */ 24568988114SDave Chinner STATIC void 24668988114SDave Chinner xfs_bmap_disk_count_leaves( 24768988114SDave Chinner struct xfs_mount *mp, 24868988114SDave Chinner struct xfs_btree_block *block, 24968988114SDave Chinner int numrecs, 25068988114SDave Chinner int *count) 25168988114SDave Chinner { 25268988114SDave Chinner int b; 25368988114SDave Chinner xfs_bmbt_rec_t *frp; 25468988114SDave Chinner 25568988114SDave Chinner for (b = 1; b <= numrecs; b++) { 25668988114SDave Chinner frp = XFS_BMBT_REC_ADDR(mp, block, b); 25768988114SDave Chinner *count += xfs_bmbt_disk_get_blockcount(frp); 25868988114SDave Chinner } 25968988114SDave Chinner } 26068988114SDave Chinner 26168988114SDave Chinner /* 26268988114SDave Chinner * Recursively walks each level of a btree 2638be11e92SZhi Yong Wu * to count total fsblocks in use. 26468988114SDave Chinner */ 26568988114SDave Chinner STATIC int /* error */ 26668988114SDave Chinner xfs_bmap_count_tree( 26768988114SDave Chinner xfs_mount_t *mp, /* file system mount point */ 26868988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 26968988114SDave Chinner xfs_ifork_t *ifp, /* inode fork pointer */ 27068988114SDave Chinner xfs_fsblock_t blockno, /* file system block number */ 27168988114SDave Chinner int levelin, /* level in btree */ 27268988114SDave Chinner int *count) /* Count of blocks */ 27368988114SDave Chinner { 27468988114SDave Chinner int error; 27568988114SDave Chinner xfs_buf_t *bp, *nbp; 27668988114SDave Chinner int level = levelin; 27768988114SDave Chinner __be64 *pp; 27868988114SDave Chinner xfs_fsblock_t bno = blockno; 27968988114SDave Chinner xfs_fsblock_t nextbno; 28068988114SDave Chinner struct xfs_btree_block *block, *nextblock; 28168988114SDave Chinner int numrecs; 28268988114SDave Chinner 28368988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 28468988114SDave Chinner &xfs_bmbt_buf_ops); 28568988114SDave Chinner if (error) 28668988114SDave Chinner return error; 28768988114SDave Chinner *count += 1; 28868988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 28968988114SDave Chinner 29068988114SDave Chinner if (--level) { 29168988114SDave Chinner /* Not at node above leaves, count this level of nodes */ 29268988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 29368988114SDave Chinner while (nextbno != NULLFSBLOCK) { 29468988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 29568988114SDave Chinner XFS_BMAP_BTREE_REF, 29668988114SDave Chinner &xfs_bmbt_buf_ops); 29768988114SDave Chinner if (error) 29868988114SDave Chinner return error; 29968988114SDave Chinner *count += 1; 30068988114SDave Chinner nextblock = XFS_BUF_TO_BLOCK(nbp); 30168988114SDave Chinner nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); 30268988114SDave Chinner xfs_trans_brelse(tp, nbp); 30368988114SDave Chinner } 30468988114SDave Chinner 30568988114SDave Chinner /* Dive to the next level */ 30668988114SDave Chinner pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 30768988114SDave Chinner bno = be64_to_cpu(*pp); 30868988114SDave Chinner if (unlikely((error = 30968988114SDave Chinner xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 31068988114SDave Chinner xfs_trans_brelse(tp, bp); 31168988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", 31268988114SDave Chinner XFS_ERRLEVEL_LOW, mp); 3132451337dSDave Chinner return -EFSCORRUPTED; 31468988114SDave Chinner } 31568988114SDave Chinner xfs_trans_brelse(tp, bp); 31668988114SDave Chinner } else { 31768988114SDave Chinner /* count all level 1 nodes and their leaves */ 31868988114SDave Chinner for (;;) { 31968988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 32068988114SDave Chinner numrecs = be16_to_cpu(block->bb_numrecs); 32168988114SDave Chinner xfs_bmap_disk_count_leaves(mp, block, numrecs, count); 32268988114SDave Chinner xfs_trans_brelse(tp, bp); 32368988114SDave Chinner if (nextbno == NULLFSBLOCK) 32468988114SDave Chinner break; 32568988114SDave Chinner bno = nextbno; 32668988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 32768988114SDave Chinner XFS_BMAP_BTREE_REF, 32868988114SDave Chinner &xfs_bmbt_buf_ops); 32968988114SDave Chinner if (error) 33068988114SDave Chinner return error; 33168988114SDave Chinner *count += 1; 33268988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 33368988114SDave Chinner } 33468988114SDave Chinner } 33568988114SDave Chinner return 0; 33668988114SDave Chinner } 33768988114SDave Chinner 33868988114SDave Chinner /* 33968988114SDave Chinner * Count fsblocks of the given fork. 34068988114SDave Chinner */ 3410d5a75e9SEric Sandeen static int /* error */ 34268988114SDave Chinner xfs_bmap_count_blocks( 34368988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 34468988114SDave Chinner xfs_inode_t *ip, /* incore inode */ 34568988114SDave Chinner int whichfork, /* data or attr fork */ 34668988114SDave Chinner int *count) /* out: count of blocks */ 34768988114SDave Chinner { 34868988114SDave Chinner struct xfs_btree_block *block; /* current btree block */ 34968988114SDave Chinner xfs_fsblock_t bno; /* block # of "block" */ 35068988114SDave Chinner xfs_ifork_t *ifp; /* fork structure */ 35168988114SDave Chinner int level; /* btree level, for checking */ 35268988114SDave Chinner xfs_mount_t *mp; /* file system mount structure */ 35368988114SDave Chinner __be64 *pp; /* pointer to block address */ 35468988114SDave Chinner 35568988114SDave Chinner bno = NULLFSBLOCK; 35668988114SDave Chinner mp = ip->i_mount; 35768988114SDave Chinner ifp = XFS_IFORK_PTR(ip, whichfork); 35868988114SDave Chinner if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { 35968988114SDave Chinner xfs_bmap_count_leaves(ifp, 0, 36068988114SDave Chinner ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), 36168988114SDave Chinner count); 36268988114SDave Chinner return 0; 36368988114SDave Chinner } 36468988114SDave Chinner 36568988114SDave Chinner /* 36668988114SDave Chinner * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 36768988114SDave Chinner */ 36868988114SDave Chinner block = ifp->if_broot; 36968988114SDave Chinner level = be16_to_cpu(block->bb_level); 37068988114SDave Chinner ASSERT(level > 0); 37168988114SDave Chinner pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 37268988114SDave Chinner bno = be64_to_cpu(*pp); 373d5cf09baSChristoph Hellwig ASSERT(bno != NULLFSBLOCK); 37468988114SDave Chinner ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 37568988114SDave Chinner ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 37668988114SDave Chinner 37768988114SDave Chinner if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { 37868988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, 37968988114SDave Chinner mp); 3802451337dSDave Chinner return -EFSCORRUPTED; 38168988114SDave Chinner } 38268988114SDave Chinner 38368988114SDave Chinner return 0; 38468988114SDave Chinner } 38568988114SDave Chinner 38668988114SDave Chinner /* 38768988114SDave Chinner * returns 1 for success, 0 if we failed to map the extent. 38868988114SDave Chinner */ 38968988114SDave Chinner STATIC int 39068988114SDave Chinner xfs_getbmapx_fix_eof_hole( 39168988114SDave Chinner xfs_inode_t *ip, /* xfs incore inode pointer */ 39268988114SDave Chinner struct getbmapx *out, /* output structure */ 39368988114SDave Chinner int prealloced, /* this is a file with 39468988114SDave Chinner * preallocated data space */ 39568988114SDave Chinner __int64_t end, /* last block requested */ 39668988114SDave Chinner xfs_fsblock_t startblock) 39768988114SDave Chinner { 39868988114SDave Chinner __int64_t fixlen; 39968988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 40068988114SDave Chinner xfs_ifork_t *ifp; /* inode fork pointer */ 40168988114SDave Chinner xfs_extnum_t lastx; /* last extent pointer */ 40268988114SDave Chinner xfs_fileoff_t fileblock; 40368988114SDave Chinner 40468988114SDave Chinner if (startblock == HOLESTARTBLOCK) { 40568988114SDave Chinner mp = ip->i_mount; 40668988114SDave Chinner out->bmv_block = -1; 40768988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 40868988114SDave Chinner fixlen -= out->bmv_offset; 40968988114SDave Chinner if (prealloced && out->bmv_offset + out->bmv_length == end) { 41068988114SDave Chinner /* Came to hole at EOF. Trim it. */ 41168988114SDave Chinner if (fixlen <= 0) 41268988114SDave Chinner return 0; 41368988114SDave Chinner out->bmv_length = fixlen; 41468988114SDave Chinner } 41568988114SDave Chinner } else { 41668988114SDave Chinner if (startblock == DELAYSTARTBLOCK) 41768988114SDave Chinner out->bmv_block = -2; 41868988114SDave Chinner else 41968988114SDave Chinner out->bmv_block = xfs_fsb_to_db(ip, startblock); 42068988114SDave Chinner fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); 42168988114SDave Chinner ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 42268988114SDave Chinner if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && 42368988114SDave Chinner (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) 42468988114SDave Chinner out->bmv_oflags |= BMV_OF_LAST; 42568988114SDave Chinner } 42668988114SDave Chinner 42768988114SDave Chinner return 1; 42868988114SDave Chinner } 42968988114SDave Chinner 43068988114SDave Chinner /* 43168988114SDave Chinner * Get inode's extents as described in bmv, and format for output. 43268988114SDave Chinner * Calls formatter to fill the user's buffer until all extents 43368988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have 43468988114SDave Chinner * been filled, or until the formatter short-circuits the loop, 43568988114SDave Chinner * if it is tracking filled-in extents on its own. 43668988114SDave Chinner */ 43768988114SDave Chinner int /* error code */ 43868988114SDave Chinner xfs_getbmap( 43968988114SDave Chinner xfs_inode_t *ip, 44068988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */ 44168988114SDave Chinner xfs_bmap_format_t formatter, /* format to user */ 44268988114SDave Chinner void *arg) /* formatter arg */ 44368988114SDave Chinner { 44468988114SDave Chinner __int64_t bmvend; /* last block requested */ 44568988114SDave Chinner int error = 0; /* return value */ 44668988114SDave Chinner __int64_t fixlen; /* length for -1 case */ 44768988114SDave Chinner int i; /* extent number */ 44868988114SDave Chinner int lock; /* lock state */ 44968988114SDave Chinner xfs_bmbt_irec_t *map; /* buffer for user's data */ 45068988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 45168988114SDave Chinner int nex; /* # of user extents can do */ 45268988114SDave Chinner int nexleft; /* # of user extents left */ 45368988114SDave Chinner int subnex; /* # of bmapi's can do */ 45468988114SDave Chinner int nmap; /* number of map entries */ 45568988114SDave Chinner struct getbmapx *out; /* output structure */ 45668988114SDave Chinner int whichfork; /* data or attr fork */ 45768988114SDave Chinner int prealloced; /* this is a file with 45868988114SDave Chinner * preallocated data space */ 45968988114SDave Chinner int iflags; /* interface flags */ 46068988114SDave Chinner int bmapi_flags; /* flags for xfs_bmapi */ 46168988114SDave Chinner int cur_ext = 0; 46268988114SDave Chinner 46368988114SDave Chinner mp = ip->i_mount; 46468988114SDave Chinner iflags = bmv->bmv_iflags; 46568988114SDave Chinner whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; 46668988114SDave Chinner 46768988114SDave Chinner if (whichfork == XFS_ATTR_FORK) { 46868988114SDave Chinner if (XFS_IFORK_Q(ip)) { 46968988114SDave Chinner if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 47068988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && 47168988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 4722451337dSDave Chinner return -EINVAL; 47368988114SDave Chinner } else if (unlikely( 47468988114SDave Chinner ip->i_d.di_aformat != 0 && 47568988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { 47668988114SDave Chinner XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, 47768988114SDave Chinner ip->i_mount); 4782451337dSDave Chinner return -EFSCORRUPTED; 47968988114SDave Chinner } 48068988114SDave Chinner 48168988114SDave Chinner prealloced = 0; 48268988114SDave Chinner fixlen = 1LL << 32; 48368988114SDave Chinner } else { 48468988114SDave Chinner if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 48568988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 48668988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 4872451337dSDave Chinner return -EINVAL; 48868988114SDave Chinner 48968988114SDave Chinner if (xfs_get_extsz_hint(ip) || 49068988114SDave Chinner ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 49168988114SDave Chinner prealloced = 1; 49268988114SDave Chinner fixlen = mp->m_super->s_maxbytes; 49368988114SDave Chinner } else { 49468988114SDave Chinner prealloced = 0; 49568988114SDave Chinner fixlen = XFS_ISIZE(ip); 49668988114SDave Chinner } 49768988114SDave Chinner } 49868988114SDave Chinner 49968988114SDave Chinner if (bmv->bmv_length == -1) { 50068988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); 50168988114SDave Chinner bmv->bmv_length = 50268988114SDave Chinner max_t(__int64_t, fixlen - bmv->bmv_offset, 0); 50368988114SDave Chinner } else if (bmv->bmv_length == 0) { 50468988114SDave Chinner bmv->bmv_entries = 0; 50568988114SDave Chinner return 0; 50668988114SDave Chinner } else if (bmv->bmv_length < 0) { 5072451337dSDave Chinner return -EINVAL; 50868988114SDave Chinner } 50968988114SDave Chinner 51068988114SDave Chinner nex = bmv->bmv_count - 1; 51168988114SDave Chinner if (nex <= 0) 5122451337dSDave Chinner return -EINVAL; 51368988114SDave Chinner bmvend = bmv->bmv_offset + bmv->bmv_length; 51468988114SDave Chinner 51568988114SDave Chinner 51668988114SDave Chinner if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 5172451337dSDave Chinner return -ENOMEM; 518fdd3cceeSDave Chinner out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); 51968988114SDave Chinner if (!out) 5202451337dSDave Chinner return -ENOMEM; 52168988114SDave Chinner 52268988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED); 523efa70be1SChristoph Hellwig if (whichfork == XFS_DATA_FORK) { 524efa70be1SChristoph Hellwig if (!(iflags & BMV_IF_DELALLOC) && 525efa70be1SChristoph Hellwig (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { 5262451337dSDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 52768988114SDave Chinner if (error) 52868988114SDave Chinner goto out_unlock_iolock; 529efa70be1SChristoph Hellwig 53068988114SDave Chinner /* 531efa70be1SChristoph Hellwig * Even after flushing the inode, there can still be 532efa70be1SChristoph Hellwig * delalloc blocks on the inode beyond EOF due to 533efa70be1SChristoph Hellwig * speculative preallocation. These are not removed 534efa70be1SChristoph Hellwig * until the release function is called or the inode 535efa70be1SChristoph Hellwig * is inactivated. Hence we cannot assert here that 536efa70be1SChristoph Hellwig * ip->i_delayed_blks == 0. 53768988114SDave Chinner */ 53868988114SDave Chinner } 53968988114SDave Chinner 540309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 541efa70be1SChristoph Hellwig } else { 542efa70be1SChristoph Hellwig lock = xfs_ilock_attr_map_shared(ip); 543efa70be1SChristoph Hellwig } 54468988114SDave Chinner 54568988114SDave Chinner /* 54668988114SDave Chinner * Don't let nex be bigger than the number of extents 54768988114SDave Chinner * we can have assuming alternating holes and real extents. 54868988114SDave Chinner */ 54968988114SDave Chinner if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) 55068988114SDave Chinner nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 55168988114SDave Chinner 55268988114SDave Chinner bmapi_flags = xfs_bmapi_aflag(whichfork); 55368988114SDave Chinner if (!(iflags & BMV_IF_PREALLOC)) 55468988114SDave Chinner bmapi_flags |= XFS_BMAPI_IGSTATE; 55568988114SDave Chinner 55668988114SDave Chinner /* 55768988114SDave Chinner * Allocate enough space to handle "subnex" maps at a time. 55868988114SDave Chinner */ 5592451337dSDave Chinner error = -ENOMEM; 56068988114SDave Chinner subnex = 16; 56168988114SDave Chinner map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); 56268988114SDave Chinner if (!map) 56368988114SDave Chinner goto out_unlock_ilock; 56468988114SDave Chinner 56568988114SDave Chinner bmv->bmv_entries = 0; 56668988114SDave Chinner 56768988114SDave Chinner if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && 56868988114SDave Chinner (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { 56968988114SDave Chinner error = 0; 57068988114SDave Chinner goto out_free_map; 57168988114SDave Chinner } 57268988114SDave Chinner 57368988114SDave Chinner nexleft = nex; 57468988114SDave Chinner 57568988114SDave Chinner do { 57668988114SDave Chinner nmap = (nexleft > subnex) ? subnex : nexleft; 57768988114SDave Chinner error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 57868988114SDave Chinner XFS_BB_TO_FSB(mp, bmv->bmv_length), 57968988114SDave Chinner map, &nmap, bmapi_flags); 58068988114SDave Chinner if (error) 58168988114SDave Chinner goto out_free_map; 58268988114SDave Chinner ASSERT(nmap <= subnex); 58368988114SDave Chinner 58468988114SDave Chinner for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { 58568988114SDave Chinner out[cur_ext].bmv_oflags = 0; 58668988114SDave Chinner if (map[i].br_state == XFS_EXT_UNWRITTEN) 58768988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; 58868988114SDave Chinner else if (map[i].br_startblock == DELAYSTARTBLOCK) 58968988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; 59068988114SDave Chinner out[cur_ext].bmv_offset = 59168988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_startoff); 59268988114SDave Chinner out[cur_ext].bmv_length = 59368988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_blockcount); 59468988114SDave Chinner out[cur_ext].bmv_unused1 = 0; 59568988114SDave Chinner out[cur_ext].bmv_unused2 = 0; 59668988114SDave Chinner 59768988114SDave Chinner /* 59868988114SDave Chinner * delayed allocation extents that start beyond EOF can 59968988114SDave Chinner * occur due to speculative EOF allocation when the 60068988114SDave Chinner * delalloc extent is larger than the largest freespace 60168988114SDave Chinner * extent at conversion time. These extents cannot be 60268988114SDave Chinner * converted by data writeback, so can exist here even 60368988114SDave Chinner * if we are not supposed to be finding delalloc 60468988114SDave Chinner * extents. 60568988114SDave Chinner */ 60668988114SDave Chinner if (map[i].br_startblock == DELAYSTARTBLOCK && 60768988114SDave Chinner map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) 60868988114SDave Chinner ASSERT((iflags & BMV_IF_DELALLOC) != 0); 60968988114SDave Chinner 61068988114SDave Chinner if (map[i].br_startblock == HOLESTARTBLOCK && 61168988114SDave Chinner whichfork == XFS_ATTR_FORK) { 61268988114SDave Chinner /* came to the end of attribute fork */ 61368988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_LAST; 61468988114SDave Chinner goto out_free_map; 61568988114SDave Chinner } 61668988114SDave Chinner 61768988114SDave Chinner if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], 61868988114SDave Chinner prealloced, bmvend, 61968988114SDave Chinner map[i].br_startblock)) 62068988114SDave Chinner goto out_free_map; 62168988114SDave Chinner 62268988114SDave Chinner bmv->bmv_offset = 62368988114SDave Chinner out[cur_ext].bmv_offset + 62468988114SDave Chinner out[cur_ext].bmv_length; 62568988114SDave Chinner bmv->bmv_length = 62668988114SDave Chinner max_t(__int64_t, 0, bmvend - bmv->bmv_offset); 62768988114SDave Chinner 62868988114SDave Chinner /* 62968988114SDave Chinner * In case we don't want to return the hole, 63068988114SDave Chinner * don't increase cur_ext so that we can reuse 63168988114SDave Chinner * it in the next loop. 63268988114SDave Chinner */ 63368988114SDave Chinner if ((iflags & BMV_IF_NO_HOLES) && 63468988114SDave Chinner map[i].br_startblock == HOLESTARTBLOCK) { 63568988114SDave Chinner memset(&out[cur_ext], 0, sizeof(out[cur_ext])); 63668988114SDave Chinner continue; 63768988114SDave Chinner } 63868988114SDave Chinner 63968988114SDave Chinner nexleft--; 64068988114SDave Chinner bmv->bmv_entries++; 64168988114SDave Chinner cur_ext++; 64268988114SDave Chinner } 64368988114SDave Chinner } while (nmap && nexleft && bmv->bmv_length); 64468988114SDave Chinner 64568988114SDave Chinner out_free_map: 64668988114SDave Chinner kmem_free(map); 64768988114SDave Chinner out_unlock_ilock: 64801f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 64968988114SDave Chinner out_unlock_iolock: 65068988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED); 65168988114SDave Chinner 65268988114SDave Chinner for (i = 0; i < cur_ext; i++) { 65368988114SDave Chinner int full = 0; /* user array is full */ 65468988114SDave Chinner 65568988114SDave Chinner /* format results & advance arg */ 65668988114SDave Chinner error = formatter(&arg, &out[i], &full); 65768988114SDave Chinner if (error || full) 65868988114SDave Chinner break; 65968988114SDave Chinner } 66068988114SDave Chinner 66168988114SDave Chinner kmem_free(out); 66268988114SDave Chinner return error; 66368988114SDave Chinner } 66468988114SDave Chinner 66568988114SDave Chinner /* 66668988114SDave Chinner * dead simple method of punching delalyed allocation blocks from a range in 66768988114SDave Chinner * the inode. Walks a block at a time so will be slow, but is only executed in 668ad4809bfSZhi Yong Wu * rare error cases so the overhead is not critical. This will always punch out 66968988114SDave Chinner * both the start and end blocks, even if the ranges only partially overlap 67068988114SDave Chinner * them, so it is up to the caller to ensure that partial blocks are not 67168988114SDave Chinner * passed in. 67268988114SDave Chinner */ 67368988114SDave Chinner int 67468988114SDave Chinner xfs_bmap_punch_delalloc_range( 67568988114SDave Chinner struct xfs_inode *ip, 67668988114SDave Chinner xfs_fileoff_t start_fsb, 67768988114SDave Chinner xfs_fileoff_t length) 67868988114SDave Chinner { 67968988114SDave Chinner xfs_fileoff_t remaining = length; 68068988114SDave Chinner int error = 0; 68168988114SDave Chinner 68268988114SDave Chinner ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 68368988114SDave Chinner 68468988114SDave Chinner do { 68568988114SDave Chinner int done; 68668988114SDave Chinner xfs_bmbt_irec_t imap; 68768988114SDave Chinner int nimaps = 1; 68868988114SDave Chinner xfs_fsblock_t firstblock; 6892c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 69068988114SDave Chinner 69168988114SDave Chinner /* 69268988114SDave Chinner * Map the range first and check that it is a delalloc extent 69368988114SDave Chinner * before trying to unmap the range. Otherwise we will be 69468988114SDave Chinner * trying to remove a real extent (which requires a 69568988114SDave Chinner * transaction) or a hole, which is probably a bad idea... 69668988114SDave Chinner */ 69768988114SDave Chinner error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, 69868988114SDave Chinner XFS_BMAPI_ENTIRE); 69968988114SDave Chinner 70068988114SDave Chinner if (error) { 70168988114SDave Chinner /* something screwed, just bail */ 70268988114SDave Chinner if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 70368988114SDave Chinner xfs_alert(ip->i_mount, 70468988114SDave Chinner "Failed delalloc mapping lookup ino %lld fsb %lld.", 70568988114SDave Chinner ip->i_ino, start_fsb); 70668988114SDave Chinner } 70768988114SDave Chinner break; 70868988114SDave Chinner } 70968988114SDave Chinner if (!nimaps) { 71068988114SDave Chinner /* nothing there */ 71168988114SDave Chinner goto next_block; 71268988114SDave Chinner } 71368988114SDave Chinner if (imap.br_startblock != DELAYSTARTBLOCK) { 71468988114SDave Chinner /* been converted, ignore */ 71568988114SDave Chinner goto next_block; 71668988114SDave Chinner } 71768988114SDave Chinner WARN_ON(imap.br_blockcount == 0); 71868988114SDave Chinner 71968988114SDave Chinner /* 7202c3234d1SDarrick J. Wong * Note: while we initialise the firstblock/dfops pair, they 72168988114SDave Chinner * should never be used because blocks should never be 72268988114SDave Chinner * allocated or freed for a delalloc extent and hence we need 72368988114SDave Chinner * don't cancel or finish them after the xfs_bunmapi() call. 72468988114SDave Chinner */ 7252c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstblock); 72668988114SDave Chinner error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 7272c3234d1SDarrick J. Wong &dfops, &done); 72868988114SDave Chinner if (error) 72968988114SDave Chinner break; 73068988114SDave Chinner 7312c3234d1SDarrick J. Wong ASSERT(!xfs_defer_has_unfinished_work(&dfops)); 73268988114SDave Chinner next_block: 73368988114SDave Chinner start_fsb++; 73468988114SDave Chinner remaining--; 73568988114SDave Chinner } while(remaining > 0); 73668988114SDave Chinner 73768988114SDave Chinner return error; 73868988114SDave Chinner } 739c24b5dfaSDave Chinner 740c24b5dfaSDave Chinner /* 741c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF 742c24b5dfaSDave Chinner * blocks. The 'force' parameter determines whether we should also consider 743c24b5dfaSDave Chinner * regular files that are marked preallocated or append-only. 744c24b5dfaSDave Chinner */ 745c24b5dfaSDave Chinner bool 746c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) 747c24b5dfaSDave Chinner { 748c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */ 749c19b3b05SDave Chinner if (!S_ISREG(VFS_I(ip)->i_mode)) 750c24b5dfaSDave Chinner return false; 751c24b5dfaSDave Chinner 752c24b5dfaSDave Chinner /* 753c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not 754c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove. 755c24b5dfaSDave Chinner */ 756c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 && 7572667c6f9SDave Chinner VFS_I(ip)->i_mapping->nrpages == 0 && 758c24b5dfaSDave Chinner ip->i_delayed_blks == 0) 759c24b5dfaSDave Chinner return false; 760c24b5dfaSDave Chinner 761c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */ 762c24b5dfaSDave Chinner if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) 763c24b5dfaSDave Chinner return false; 764c24b5dfaSDave Chinner 765c24b5dfaSDave Chinner /* 766c24b5dfaSDave Chinner * Do not free real preallocated or append-only files unless the file 767c24b5dfaSDave Chinner * has delalloc blocks and we are forced to remove them. 768c24b5dfaSDave Chinner */ 769c24b5dfaSDave Chinner if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 770c24b5dfaSDave Chinner if (!force || ip->i_delayed_blks == 0) 771c24b5dfaSDave Chinner return false; 772c24b5dfaSDave Chinner 773c24b5dfaSDave Chinner return true; 774c24b5dfaSDave Chinner } 775c24b5dfaSDave Chinner 776c24b5dfaSDave Chinner /* 777c24b5dfaSDave Chinner * This is called by xfs_inactive to free any blocks beyond eof 778c24b5dfaSDave Chinner * when the link count isn't zero and by xfs_dm_punch_hole() when 779c24b5dfaSDave Chinner * punching a hole to EOF. 780c24b5dfaSDave Chinner */ 781c24b5dfaSDave Chinner int 782c24b5dfaSDave Chinner xfs_free_eofblocks( 783c24b5dfaSDave Chinner xfs_mount_t *mp, 784c24b5dfaSDave Chinner xfs_inode_t *ip, 785c24b5dfaSDave Chinner bool need_iolock) 786c24b5dfaSDave Chinner { 787c24b5dfaSDave Chinner xfs_trans_t *tp; 788c24b5dfaSDave Chinner int error; 789c24b5dfaSDave Chinner xfs_fileoff_t end_fsb; 790c24b5dfaSDave Chinner xfs_fileoff_t last_fsb; 791c24b5dfaSDave Chinner xfs_filblks_t map_len; 792c24b5dfaSDave Chinner int nimaps; 793c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 794c24b5dfaSDave Chinner 795c24b5dfaSDave Chinner /* 796c24b5dfaSDave Chinner * Figure out if there are any blocks beyond the end 797c24b5dfaSDave Chinner * of the file. If not, then there is nothing to do. 798c24b5dfaSDave Chinner */ 799c24b5dfaSDave Chinner end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 800c24b5dfaSDave Chinner last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 801c24b5dfaSDave Chinner if (last_fsb <= end_fsb) 802c24b5dfaSDave Chinner return 0; 803c24b5dfaSDave Chinner map_len = last_fsb - end_fsb; 804c24b5dfaSDave Chinner 805c24b5dfaSDave Chinner nimaps = 1; 806c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_SHARED); 807c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 808c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 809c24b5dfaSDave Chinner 810c24b5dfaSDave Chinner if (!error && (nimaps != 0) && 811c24b5dfaSDave Chinner (imap.br_startblock != HOLESTARTBLOCK || 812c24b5dfaSDave Chinner ip->i_delayed_blks)) { 813c24b5dfaSDave Chinner /* 814c24b5dfaSDave Chinner * Attach the dquots to the inode up front. 815c24b5dfaSDave Chinner */ 816c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 817c24b5dfaSDave Chinner if (error) 818c24b5dfaSDave Chinner return error; 819c24b5dfaSDave Chinner 820c24b5dfaSDave Chinner /* 821c24b5dfaSDave Chinner * There are blocks after the end of file. 822c24b5dfaSDave Chinner * Free them up now by truncating the file to 823c24b5dfaSDave Chinner * its current size. 824c24b5dfaSDave Chinner */ 825c24b5dfaSDave Chinner if (need_iolock) { 826253f4911SChristoph Hellwig if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) 8272451337dSDave Chinner return -EAGAIN; 828c24b5dfaSDave Chinner } 829c24b5dfaSDave Chinner 830253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, 831253f4911SChristoph Hellwig &tp); 832c24b5dfaSDave Chinner if (error) { 833c24b5dfaSDave Chinner ASSERT(XFS_FORCED_SHUTDOWN(mp)); 834c24b5dfaSDave Chinner if (need_iolock) 835c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 836c24b5dfaSDave Chinner return error; 837c24b5dfaSDave Chinner } 838c24b5dfaSDave Chinner 839c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 840c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 841c24b5dfaSDave Chinner 842c24b5dfaSDave Chinner /* 843c24b5dfaSDave Chinner * Do not update the on-disk file size. If we update the 844c24b5dfaSDave Chinner * on-disk file size and then the system crashes before the 845c24b5dfaSDave Chinner * contents of the file are flushed to disk then the files 846c24b5dfaSDave Chinner * may be full of holes (ie NULL files bug). 847c24b5dfaSDave Chinner */ 848c24b5dfaSDave Chinner error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 849c24b5dfaSDave Chinner XFS_ISIZE(ip)); 850c24b5dfaSDave Chinner if (error) { 851c24b5dfaSDave Chinner /* 852c24b5dfaSDave Chinner * If we get an error at this point we simply don't 853c24b5dfaSDave Chinner * bother truncating the file. 854c24b5dfaSDave Chinner */ 8554906e215SChristoph Hellwig xfs_trans_cancel(tp); 856c24b5dfaSDave Chinner } else { 85770393313SChristoph Hellwig error = xfs_trans_commit(tp); 858c24b5dfaSDave Chinner if (!error) 859c24b5dfaSDave Chinner xfs_inode_clear_eofblocks_tag(ip); 860c24b5dfaSDave Chinner } 861c24b5dfaSDave Chinner 862c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 863c24b5dfaSDave Chinner if (need_iolock) 864c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 865c24b5dfaSDave Chinner } 866c24b5dfaSDave Chinner return error; 867c24b5dfaSDave Chinner } 868c24b5dfaSDave Chinner 86983aee9e4SChristoph Hellwig int 870c24b5dfaSDave Chinner xfs_alloc_file_space( 87183aee9e4SChristoph Hellwig struct xfs_inode *ip, 872c24b5dfaSDave Chinner xfs_off_t offset, 873c24b5dfaSDave Chinner xfs_off_t len, 8745f8aca8bSChristoph Hellwig int alloc_type) 875c24b5dfaSDave Chinner { 876c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 877c24b5dfaSDave Chinner xfs_off_t count; 878c24b5dfaSDave Chinner xfs_filblks_t allocated_fsb; 879c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb; 880c24b5dfaSDave Chinner xfs_extlen_t extsz, temp; 881c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 882c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 883c24b5dfaSDave Chinner int nimaps; 884c24b5dfaSDave Chinner int quota_flag; 885c24b5dfaSDave Chinner int rt; 886c24b5dfaSDave Chinner xfs_trans_t *tp; 887c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp; 8882c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 889c24b5dfaSDave Chinner uint qblocks, resblks, resrtextents; 890c24b5dfaSDave Chinner int error; 891c24b5dfaSDave Chinner 892c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip); 893c24b5dfaSDave Chinner 894c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 8952451337dSDave Chinner return -EIO; 896c24b5dfaSDave Chinner 897c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 898c24b5dfaSDave Chinner if (error) 899c24b5dfaSDave Chinner return error; 900c24b5dfaSDave Chinner 901c24b5dfaSDave Chinner if (len <= 0) 9022451337dSDave Chinner return -EINVAL; 903c24b5dfaSDave Chinner 904c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 905c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip); 906c24b5dfaSDave Chinner 907c24b5dfaSDave Chinner count = len; 908c24b5dfaSDave Chinner imapp = &imaps[0]; 909c24b5dfaSDave Chinner nimaps = 1; 910c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 911c24b5dfaSDave Chinner allocatesize_fsb = XFS_B_TO_FSB(mp, count); 912c24b5dfaSDave Chinner 913c24b5dfaSDave Chinner /* 914c24b5dfaSDave Chinner * Allocate file space until done or until there is an error 915c24b5dfaSDave Chinner */ 916c24b5dfaSDave Chinner while (allocatesize_fsb && !error) { 917c24b5dfaSDave Chinner xfs_fileoff_t s, e; 918c24b5dfaSDave Chinner 919c24b5dfaSDave Chinner /* 920c24b5dfaSDave Chinner * Determine space reservations for data/realtime. 921c24b5dfaSDave Chinner */ 922c24b5dfaSDave Chinner if (unlikely(extsz)) { 923c24b5dfaSDave Chinner s = startoffset_fsb; 924c24b5dfaSDave Chinner do_div(s, extsz); 925c24b5dfaSDave Chinner s *= extsz; 926c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb; 927c24b5dfaSDave Chinner if ((temp = do_mod(startoffset_fsb, extsz))) 928c24b5dfaSDave Chinner e += temp; 929c24b5dfaSDave Chinner if ((temp = do_mod(e, extsz))) 930c24b5dfaSDave Chinner e += extsz - temp; 931c24b5dfaSDave Chinner } else { 932c24b5dfaSDave Chinner s = 0; 933c24b5dfaSDave Chinner e = allocatesize_fsb; 934c24b5dfaSDave Chinner } 935c24b5dfaSDave Chinner 936c24b5dfaSDave Chinner /* 937c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block 938c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are 939c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate 940c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk 941c24b5dfaSDave Chinner * to MAXEXTLEN (21 bits), so use that to enforce the limit. 942c24b5dfaSDave Chinner */ 943c24b5dfaSDave Chinner resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 944c24b5dfaSDave Chinner if (unlikely(rt)) { 945c24b5dfaSDave Chinner resrtextents = qblocks = resblks; 946c24b5dfaSDave Chinner resrtextents /= mp->m_sb.sb_rextsize; 947c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 948c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_RTBLKS; 949c24b5dfaSDave Chinner } else { 950c24b5dfaSDave Chinner resrtextents = 0; 951c24b5dfaSDave Chinner resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 952c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_REGBLKS; 953c24b5dfaSDave Chinner } 954c24b5dfaSDave Chinner 955c24b5dfaSDave Chinner /* 956c24b5dfaSDave Chinner * Allocate and setup the transaction. 957c24b5dfaSDave Chinner */ 958253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 959253f4911SChristoph Hellwig resrtextents, 0, &tp); 960253f4911SChristoph Hellwig 961c24b5dfaSDave Chinner /* 962c24b5dfaSDave Chinner * Check for running out of space 963c24b5dfaSDave Chinner */ 964c24b5dfaSDave Chinner if (error) { 965c24b5dfaSDave Chinner /* 966c24b5dfaSDave Chinner * Free the transaction structure. 967c24b5dfaSDave Chinner */ 9682451337dSDave Chinner ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 969c24b5dfaSDave Chinner break; 970c24b5dfaSDave Chinner } 971c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 972c24b5dfaSDave Chinner error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 973c24b5dfaSDave Chinner 0, quota_flag); 974c24b5dfaSDave Chinner if (error) 975c24b5dfaSDave Chinner goto error1; 976c24b5dfaSDave Chinner 977c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 978c24b5dfaSDave Chinner 9792c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 980c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb, 981c24b5dfaSDave Chinner allocatesize_fsb, alloc_type, &firstfsb, 9822c3234d1SDarrick J. Wong resblks, imapp, &nimaps, &dfops); 983f6106efaSEric Sandeen if (error) 984c24b5dfaSDave Chinner goto error0; 985c24b5dfaSDave Chinner 986c24b5dfaSDave Chinner /* 987c24b5dfaSDave Chinner * Complete the transaction 988c24b5dfaSDave Chinner */ 9892c3234d1SDarrick J. Wong error = xfs_defer_finish(&tp, &dfops, NULL); 990f6106efaSEric Sandeen if (error) 991c24b5dfaSDave Chinner goto error0; 992c24b5dfaSDave Chinner 99370393313SChristoph Hellwig error = xfs_trans_commit(tp); 994c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 995f6106efaSEric Sandeen if (error) 996c24b5dfaSDave Chinner break; 997c24b5dfaSDave Chinner 998c24b5dfaSDave Chinner allocated_fsb = imapp->br_blockcount; 999c24b5dfaSDave Chinner 1000c24b5dfaSDave Chinner if (nimaps == 0) { 10012451337dSDave Chinner error = -ENOSPC; 1002c24b5dfaSDave Chinner break; 1003c24b5dfaSDave Chinner } 1004c24b5dfaSDave Chinner 1005c24b5dfaSDave Chinner startoffset_fsb += allocated_fsb; 1006c24b5dfaSDave Chinner allocatesize_fsb -= allocated_fsb; 1007c24b5dfaSDave Chinner } 1008c24b5dfaSDave Chinner 1009c24b5dfaSDave Chinner return error; 1010c24b5dfaSDave Chinner 1011c24b5dfaSDave Chinner error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 10122c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1013c24b5dfaSDave Chinner xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1014c24b5dfaSDave Chinner 1015c24b5dfaSDave Chinner error1: /* Just cancel transaction */ 10164906e215SChristoph Hellwig xfs_trans_cancel(tp); 1017c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1018c24b5dfaSDave Chinner return error; 1019c24b5dfaSDave Chinner } 1020c24b5dfaSDave Chinner 1021bdb0d04fSChristoph Hellwig static int 1022bdb0d04fSChristoph Hellwig xfs_unmap_extent( 102383aee9e4SChristoph Hellwig struct xfs_inode *ip, 1024bdb0d04fSChristoph Hellwig xfs_fileoff_t startoffset_fsb, 1025bdb0d04fSChristoph Hellwig xfs_filblks_t len_fsb, 1026bdb0d04fSChristoph Hellwig int *done) 1027c24b5dfaSDave Chinner { 1028bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1029bdb0d04fSChristoph Hellwig struct xfs_trans *tp; 10302c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 1031c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 1032bdb0d04fSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1033bdb0d04fSChristoph Hellwig int error; 1034c24b5dfaSDave Chinner 1035bdb0d04fSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 1036bdb0d04fSChristoph Hellwig if (error) { 1037bdb0d04fSChristoph Hellwig ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1038bdb0d04fSChristoph Hellwig return error; 1039bdb0d04fSChristoph Hellwig } 1040c24b5dfaSDave Chinner 1041bdb0d04fSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1042bdb0d04fSChristoph Hellwig error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, 1043bdb0d04fSChristoph Hellwig ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); 1044bdb0d04fSChristoph Hellwig if (error) 1045bdb0d04fSChristoph Hellwig goto out_trans_cancel; 1046c24b5dfaSDave Chinner 1047bdb0d04fSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1048c24b5dfaSDave Chinner 10492c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 1050bdb0d04fSChristoph Hellwig error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, 10512c3234d1SDarrick J. Wong &dfops, done); 1052bdb0d04fSChristoph Hellwig if (error) 1053bdb0d04fSChristoph Hellwig goto out_bmap_cancel; 1054bdb0d04fSChristoph Hellwig 10552c3234d1SDarrick J. Wong error = xfs_defer_finish(&tp, &dfops, ip); 1056bdb0d04fSChristoph Hellwig if (error) 1057bdb0d04fSChristoph Hellwig goto out_bmap_cancel; 1058bdb0d04fSChristoph Hellwig 1059bdb0d04fSChristoph Hellwig error = xfs_trans_commit(tp); 1060bdb0d04fSChristoph Hellwig out_unlock: 1061bdb0d04fSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 1062bdb0d04fSChristoph Hellwig return error; 1063bdb0d04fSChristoph Hellwig 1064bdb0d04fSChristoph Hellwig out_bmap_cancel: 10652c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1066bdb0d04fSChristoph Hellwig out_trans_cancel: 1067bdb0d04fSChristoph Hellwig xfs_trans_cancel(tp); 1068bdb0d04fSChristoph Hellwig goto out_unlock; 1069bdb0d04fSChristoph Hellwig } 1070bdb0d04fSChristoph Hellwig 1071bdb0d04fSChristoph Hellwig static int 1072bdb0d04fSChristoph Hellwig xfs_adjust_extent_unmap_boundaries( 1073bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 1074bdb0d04fSChristoph Hellwig xfs_fileoff_t *startoffset_fsb, 1075bdb0d04fSChristoph Hellwig xfs_fileoff_t *endoffset_fsb) 1076bdb0d04fSChristoph Hellwig { 1077bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1078bdb0d04fSChristoph Hellwig struct xfs_bmbt_irec imap; 1079bdb0d04fSChristoph Hellwig int nimap, error; 1080c24b5dfaSDave Chinner xfs_extlen_t mod = 0; 1081c24b5dfaSDave Chinner 1082c24b5dfaSDave Chinner nimap = 1; 1083bdb0d04fSChristoph Hellwig error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0); 1084c24b5dfaSDave Chinner if (error) 1085bdb0d04fSChristoph Hellwig return error; 1086c24b5dfaSDave Chinner 1087c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1088c24b5dfaSDave Chinner xfs_daddr_t block; 1089c24b5dfaSDave Chinner 1090c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1091c24b5dfaSDave Chinner block = imap.br_startblock; 1092c24b5dfaSDave Chinner mod = do_div(block, mp->m_sb.sb_rextsize); 1093c24b5dfaSDave Chinner if (mod) 1094bdb0d04fSChristoph Hellwig *startoffset_fsb += mp->m_sb.sb_rextsize - mod; 1095c24b5dfaSDave Chinner } 1096c24b5dfaSDave Chinner 1097c24b5dfaSDave Chinner nimap = 1; 1098bdb0d04fSChristoph Hellwig error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0); 1099c24b5dfaSDave Chinner if (error) 1100c24b5dfaSDave Chinner return error; 1101c24b5dfaSDave Chinner 1102c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1103c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1104c24b5dfaSDave Chinner mod++; 1105bdb0d04fSChristoph Hellwig if (mod && mod != mp->m_sb.sb_rextsize) 1106bdb0d04fSChristoph Hellwig *endoffset_fsb -= mod; 1107c24b5dfaSDave Chinner } 1108c24b5dfaSDave Chinner 1109bdb0d04fSChristoph Hellwig return 0; 1110c24b5dfaSDave Chinner } 1111bdb0d04fSChristoph Hellwig 1112bdb0d04fSChristoph Hellwig static int 1113bdb0d04fSChristoph Hellwig xfs_flush_unmap_range( 1114bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 1115bdb0d04fSChristoph Hellwig xfs_off_t offset, 1116bdb0d04fSChristoph Hellwig xfs_off_t len) 1117bdb0d04fSChristoph Hellwig { 1118bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1119bdb0d04fSChristoph Hellwig struct inode *inode = VFS_I(ip); 1120bdb0d04fSChristoph Hellwig xfs_off_t rounding, start, end; 1121bdb0d04fSChristoph Hellwig int error; 1122bdb0d04fSChristoph Hellwig 1123bdb0d04fSChristoph Hellwig /* wait for the completion of any pending DIOs */ 1124bdb0d04fSChristoph Hellwig inode_dio_wait(inode); 1125bdb0d04fSChristoph Hellwig 1126bdb0d04fSChristoph Hellwig rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); 1127bdb0d04fSChristoph Hellwig start = round_down(offset, rounding); 1128bdb0d04fSChristoph Hellwig end = round_up(offset + len, rounding) - 1; 1129bdb0d04fSChristoph Hellwig 1130bdb0d04fSChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end); 1131c24b5dfaSDave Chinner if (error) 1132c24b5dfaSDave Chinner return error; 1133bdb0d04fSChristoph Hellwig truncate_pagecache_range(inode, start, end); 1134bdb0d04fSChristoph Hellwig return 0; 1135c24b5dfaSDave Chinner } 1136c24b5dfaSDave Chinner 1137c24b5dfaSDave Chinner int 1138c24b5dfaSDave Chinner xfs_free_file_space( 1139c24b5dfaSDave Chinner struct xfs_inode *ip, 1140c24b5dfaSDave Chinner xfs_off_t offset, 1141c24b5dfaSDave Chinner xfs_off_t len) 1142c24b5dfaSDave Chinner { 1143bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1144c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 1145bdb0d04fSChristoph Hellwig xfs_fileoff_t endoffset_fsb; 11463c2bdc91SChristoph Hellwig int done = 0, error; 1147c24b5dfaSDave Chinner 1148c24b5dfaSDave Chinner trace_xfs_free_file_space(ip); 1149c24b5dfaSDave Chinner 1150c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1151c24b5dfaSDave Chinner if (error) 1152c24b5dfaSDave Chinner return error; 1153c24b5dfaSDave Chinner 1154c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */ 1155bdb0d04fSChristoph Hellwig return 0; 1156bdb0d04fSChristoph Hellwig 1157bdb0d04fSChristoph Hellwig error = xfs_flush_unmap_range(ip, offset, len); 1158bdb0d04fSChristoph Hellwig if (error) 1159c24b5dfaSDave Chinner return error; 1160bdb0d04fSChristoph Hellwig 1161c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset); 1162c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 1163c24b5dfaSDave Chinner 1164bdb0d04fSChristoph Hellwig /* 1165bdb0d04fSChristoph Hellwig * Need to zero the stuff we're not freeing, on disk. If it's a RT file 1166bdb0d04fSChristoph Hellwig * and we can't use unwritten extents then we actually need to ensure 1167bdb0d04fSChristoph Hellwig * to zero the whole extent, otherwise we just need to take of block 1168bdb0d04fSChristoph Hellwig * boundaries, and xfs_bunmapi will handle the rest. 1169bdb0d04fSChristoph Hellwig */ 1170bdb0d04fSChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip) && 1171bdb0d04fSChristoph Hellwig !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 1172bdb0d04fSChristoph Hellwig error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb, 1173bdb0d04fSChristoph Hellwig &endoffset_fsb); 1174c24b5dfaSDave Chinner if (error) 1175c24b5dfaSDave Chinner return error; 1176bdb0d04fSChristoph Hellwig } 1177c24b5dfaSDave Chinner 11783c2bdc91SChristoph Hellwig if (endoffset_fsb > startoffset_fsb) { 11793c2bdc91SChristoph Hellwig while (!done) { 1180bdb0d04fSChristoph Hellwig error = xfs_unmap_extent(ip, startoffset_fsb, 1181bdb0d04fSChristoph Hellwig endoffset_fsb - startoffset_fsb, &done); 11823c2bdc91SChristoph Hellwig if (error) 11833c2bdc91SChristoph Hellwig return error; 11843c2bdc91SChristoph Hellwig } 1185c24b5dfaSDave Chinner } 1186c24b5dfaSDave Chinner 11873c2bdc91SChristoph Hellwig /* 11883c2bdc91SChristoph Hellwig * Now that we've unmap all full blocks we'll have to zero out any 11893c2bdc91SChristoph Hellwig * partial block at the beginning and/or end. xfs_zero_range is 11903c2bdc91SChristoph Hellwig * smart enough to skip any holes, including those we just created. 11913c2bdc91SChristoph Hellwig */ 11923c2bdc91SChristoph Hellwig return xfs_zero_range(ip, offset, len, NULL); 1193c24b5dfaSDave Chinner } 1194c24b5dfaSDave Chinner 11955d11fb4bSBrian Foster /* 11965d11fb4bSBrian Foster * Preallocate and zero a range of a file. This mechanism has the allocation 11975d11fb4bSBrian Foster * semantics of fallocate and in addition converts data in the range to zeroes. 11985d11fb4bSBrian Foster */ 1199865e9446SChristoph Hellwig int 1200c24b5dfaSDave Chinner xfs_zero_file_space( 1201c24b5dfaSDave Chinner struct xfs_inode *ip, 1202c24b5dfaSDave Chinner xfs_off_t offset, 12035f8aca8bSChristoph Hellwig xfs_off_t len) 1204c24b5dfaSDave Chinner { 1205c24b5dfaSDave Chinner struct xfs_mount *mp = ip->i_mount; 12065d11fb4bSBrian Foster uint blksize; 1207c24b5dfaSDave Chinner int error; 1208c24b5dfaSDave Chinner 1209897b73b6SDave Chinner trace_xfs_zero_file_space(ip); 1210897b73b6SDave Chinner 12115d11fb4bSBrian Foster blksize = 1 << mp->m_sb.sb_blocklog; 1212c24b5dfaSDave Chinner 1213c24b5dfaSDave Chinner /* 12145d11fb4bSBrian Foster * Punch a hole and prealloc the range. We use hole punch rather than 12155d11fb4bSBrian Foster * unwritten extent conversion for two reasons: 12165d11fb4bSBrian Foster * 12175d11fb4bSBrian Foster * 1.) Hole punch handles partial block zeroing for us. 12185d11fb4bSBrian Foster * 12195d11fb4bSBrian Foster * 2.) If prealloc returns ENOSPC, the file range is still zero-valued 12205d11fb4bSBrian Foster * by virtue of the hole punch. 1221c24b5dfaSDave Chinner */ 12225d11fb4bSBrian Foster error = xfs_free_file_space(ip, offset, len); 1223c24b5dfaSDave Chinner if (error) 12245f8aca8bSChristoph Hellwig goto out; 1225c24b5dfaSDave Chinner 12265d11fb4bSBrian Foster error = xfs_alloc_file_space(ip, round_down(offset, blksize), 12275d11fb4bSBrian Foster round_up(offset + len, blksize) - 12285d11fb4bSBrian Foster round_down(offset, blksize), 12295d11fb4bSBrian Foster XFS_BMAPI_PREALLOC); 12305f8aca8bSChristoph Hellwig out: 1231c24b5dfaSDave Chinner return error; 1232c24b5dfaSDave Chinner 1233c24b5dfaSDave Chinner } 1234c24b5dfaSDave Chinner 1235c24b5dfaSDave Chinner /* 1236a904b1caSNamjae Jeon * @next_fsb will keep track of the extent currently undergoing shift. 1237a904b1caSNamjae Jeon * @stop_fsb will keep track of the extent at which we have to stop. 1238a904b1caSNamjae Jeon * If we are shifting left, we will start with block (offset + len) and 1239a904b1caSNamjae Jeon * shift each extent till last extent. 1240a904b1caSNamjae Jeon * If we are shifting right, we will start with last extent inside file space 1241a904b1caSNamjae Jeon * and continue until we reach the block corresponding to offset. 1242e1d8fb88SNamjae Jeon */ 124372c1a739Skbuild test robot static int 1244a904b1caSNamjae Jeon xfs_shift_file_space( 1245e1d8fb88SNamjae Jeon struct xfs_inode *ip, 1246e1d8fb88SNamjae Jeon xfs_off_t offset, 1247a904b1caSNamjae Jeon xfs_off_t len, 1248a904b1caSNamjae Jeon enum shift_direction direction) 1249e1d8fb88SNamjae Jeon { 1250e1d8fb88SNamjae Jeon int done = 0; 1251e1d8fb88SNamjae Jeon struct xfs_mount *mp = ip->i_mount; 1252e1d8fb88SNamjae Jeon struct xfs_trans *tp; 1253e1d8fb88SNamjae Jeon int error; 12542c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 1255e1d8fb88SNamjae Jeon xfs_fsblock_t first_block; 1256a904b1caSNamjae Jeon xfs_fileoff_t stop_fsb; 12572c845f5aSBrian Foster xfs_fileoff_t next_fsb; 1258e1d8fb88SNamjae Jeon xfs_fileoff_t shift_fsb; 1259e1d8fb88SNamjae Jeon 1260a904b1caSNamjae Jeon ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); 1261e1d8fb88SNamjae Jeon 1262a904b1caSNamjae Jeon if (direction == SHIFT_LEFT) { 12632c845f5aSBrian Foster next_fsb = XFS_B_TO_FSB(mp, offset + len); 1264a904b1caSNamjae Jeon stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); 1265a904b1caSNamjae Jeon } else { 1266a904b1caSNamjae Jeon /* 1267a904b1caSNamjae Jeon * If right shift, delegate the work of initialization of 1268a904b1caSNamjae Jeon * next_fsb to xfs_bmap_shift_extent as it has ilock held. 1269a904b1caSNamjae Jeon */ 1270a904b1caSNamjae Jeon next_fsb = NULLFSBLOCK; 1271a904b1caSNamjae Jeon stop_fsb = XFS_B_TO_FSB(mp, offset); 1272a904b1caSNamjae Jeon } 1273e1d8fb88SNamjae Jeon 1274a904b1caSNamjae Jeon shift_fsb = XFS_B_TO_FSB(mp, len); 1275f71721d0SBrian Foster 1276f71721d0SBrian Foster /* 1277f71721d0SBrian Foster * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1278f71721d0SBrian Foster * into the accessible region of the file. 1279f71721d0SBrian Foster */ 128041b9d726SBrian Foster if (xfs_can_free_eofblocks(ip, true)) { 128141b9d726SBrian Foster error = xfs_free_eofblocks(mp, ip, false); 128241b9d726SBrian Foster if (error) 128341b9d726SBrian Foster return error; 128441b9d726SBrian Foster } 12851669a8caSDave Chinner 1286f71721d0SBrian Foster /* 1287f71721d0SBrian Foster * Writeback and invalidate cache for the remainder of the file as we're 1288a904b1caSNamjae Jeon * about to shift down every extent from offset to EOF. 1289f71721d0SBrian Foster */ 1290f71721d0SBrian Foster error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1291a904b1caSNamjae Jeon offset, -1); 1292f71721d0SBrian Foster if (error) 1293f71721d0SBrian Foster return error; 1294f71721d0SBrian Foster error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 129509cbfeafSKirill A. Shutemov offset >> PAGE_SHIFT, -1); 1296e1d8fb88SNamjae Jeon if (error) 1297e1d8fb88SNamjae Jeon return error; 1298e1d8fb88SNamjae Jeon 1299a904b1caSNamjae Jeon /* 1300a904b1caSNamjae Jeon * The extent shiting code works on extent granularity. So, if 1301a904b1caSNamjae Jeon * stop_fsb is not the starting block of extent, we need to split 1302a904b1caSNamjae Jeon * the extent at stop_fsb. 1303a904b1caSNamjae Jeon */ 1304a904b1caSNamjae Jeon if (direction == SHIFT_RIGHT) { 1305a904b1caSNamjae Jeon error = xfs_bmap_split_extent(ip, stop_fsb); 1306a904b1caSNamjae Jeon if (error) 1307a904b1caSNamjae Jeon return error; 1308a904b1caSNamjae Jeon } 1309a904b1caSNamjae Jeon 1310e1d8fb88SNamjae Jeon while (!error && !done) { 1311e1d8fb88SNamjae Jeon /* 1312e1d8fb88SNamjae Jeon * We would need to reserve permanent block for transaction. 1313e1d8fb88SNamjae Jeon * This will come into picture when after shifting extent into 1314e1d8fb88SNamjae Jeon * hole we found that adjacent extents can be merged which 1315e1d8fb88SNamjae Jeon * may lead to freeing of a block during record update. 1316e1d8fb88SNamjae Jeon */ 1317253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 1318253f4911SChristoph Hellwig XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); 1319253f4911SChristoph Hellwig if (error) 1320e1d8fb88SNamjae Jeon break; 1321e1d8fb88SNamjae Jeon 1322e1d8fb88SNamjae Jeon xfs_ilock(ip, XFS_ILOCK_EXCL); 1323e1d8fb88SNamjae Jeon error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 1324e1d8fb88SNamjae Jeon ip->i_gdquot, ip->i_pdquot, 1325e1d8fb88SNamjae Jeon XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 1326e1d8fb88SNamjae Jeon XFS_QMOPT_RES_REGBLKS); 1327e1d8fb88SNamjae Jeon if (error) 1328d4a97a04SBrian Foster goto out_trans_cancel; 1329e1d8fb88SNamjae Jeon 1330a904b1caSNamjae Jeon xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1331e1d8fb88SNamjae Jeon 13322c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &first_block); 1333e1d8fb88SNamjae Jeon 1334e1d8fb88SNamjae Jeon /* 1335e1d8fb88SNamjae Jeon * We are using the write transaction in which max 2 bmbt 1336e1d8fb88SNamjae Jeon * updates are allowed 1337e1d8fb88SNamjae Jeon */ 1338a904b1caSNamjae Jeon error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 13392c3234d1SDarrick J. Wong &done, stop_fsb, &first_block, &dfops, 1340a904b1caSNamjae Jeon direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1341e1d8fb88SNamjae Jeon if (error) 1342d4a97a04SBrian Foster goto out_bmap_cancel; 1343e1d8fb88SNamjae Jeon 13442c3234d1SDarrick J. Wong error = xfs_defer_finish(&tp, &dfops, NULL); 1345e1d8fb88SNamjae Jeon if (error) 1346d4a97a04SBrian Foster goto out_bmap_cancel; 1347e1d8fb88SNamjae Jeon 134870393313SChristoph Hellwig error = xfs_trans_commit(tp); 1349e1d8fb88SNamjae Jeon } 1350e1d8fb88SNamjae Jeon 1351e1d8fb88SNamjae Jeon return error; 1352e1d8fb88SNamjae Jeon 1353d4a97a04SBrian Foster out_bmap_cancel: 13542c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1355d4a97a04SBrian Foster out_trans_cancel: 13564906e215SChristoph Hellwig xfs_trans_cancel(tp); 1357e1d8fb88SNamjae Jeon return error; 1358e1d8fb88SNamjae Jeon } 1359e1d8fb88SNamjae Jeon 1360e1d8fb88SNamjae Jeon /* 1361a904b1caSNamjae Jeon * xfs_collapse_file_space() 1362a904b1caSNamjae Jeon * This routine frees disk space and shift extent for the given file. 1363a904b1caSNamjae Jeon * The first thing we do is to free data blocks in the specified range 1364a904b1caSNamjae Jeon * by calling xfs_free_file_space(). It would also sync dirty data 1365a904b1caSNamjae Jeon * and invalidate page cache over the region on which collapse range 1366a904b1caSNamjae Jeon * is working. And Shift extent records to the left to cover a hole. 1367a904b1caSNamjae Jeon * RETURNS: 1368a904b1caSNamjae Jeon * 0 on success 1369a904b1caSNamjae Jeon * errno on error 1370a904b1caSNamjae Jeon * 1371a904b1caSNamjae Jeon */ 1372a904b1caSNamjae Jeon int 1373a904b1caSNamjae Jeon xfs_collapse_file_space( 1374a904b1caSNamjae Jeon struct xfs_inode *ip, 1375a904b1caSNamjae Jeon xfs_off_t offset, 1376a904b1caSNamjae Jeon xfs_off_t len) 1377a904b1caSNamjae Jeon { 1378a904b1caSNamjae Jeon int error; 1379a904b1caSNamjae Jeon 1380a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1381a904b1caSNamjae Jeon trace_xfs_collapse_file_space(ip); 1382a904b1caSNamjae Jeon 1383a904b1caSNamjae Jeon error = xfs_free_file_space(ip, offset, len); 1384a904b1caSNamjae Jeon if (error) 1385a904b1caSNamjae Jeon return error; 1386a904b1caSNamjae Jeon 1387a904b1caSNamjae Jeon return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT); 1388a904b1caSNamjae Jeon } 1389a904b1caSNamjae Jeon 1390a904b1caSNamjae Jeon /* 1391a904b1caSNamjae Jeon * xfs_insert_file_space() 1392a904b1caSNamjae Jeon * This routine create hole space by shifting extents for the given file. 1393a904b1caSNamjae Jeon * The first thing we do is to sync dirty data and invalidate page cache 1394a904b1caSNamjae Jeon * over the region on which insert range is working. And split an extent 1395a904b1caSNamjae Jeon * to two extents at given offset by calling xfs_bmap_split_extent. 1396a904b1caSNamjae Jeon * And shift all extent records which are laying between [offset, 1397a904b1caSNamjae Jeon * last allocated extent] to the right to reserve hole range. 1398a904b1caSNamjae Jeon * RETURNS: 1399a904b1caSNamjae Jeon * 0 on success 1400a904b1caSNamjae Jeon * errno on error 1401a904b1caSNamjae Jeon */ 1402a904b1caSNamjae Jeon int 1403a904b1caSNamjae Jeon xfs_insert_file_space( 1404a904b1caSNamjae Jeon struct xfs_inode *ip, 1405a904b1caSNamjae Jeon loff_t offset, 1406a904b1caSNamjae Jeon loff_t len) 1407a904b1caSNamjae Jeon { 1408a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1409a904b1caSNamjae Jeon trace_xfs_insert_file_space(ip); 1410a904b1caSNamjae Jeon 1411a904b1caSNamjae Jeon return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT); 1412a904b1caSNamjae Jeon } 1413a904b1caSNamjae Jeon 1414a904b1caSNamjae Jeon /* 1415a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is 1416a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with 1417a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1418a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get 1419a133d952SDave Chinner * invalid formats on the target inode. 1420a133d952SDave Chinner * 1421a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has 1422a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a 1423a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just 1424a133d952SDave Chinner * blindly swap data forks on attr2 filesystems. 1425a133d952SDave Chinner * 1426a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with 1427a133d952SDave Chinner * a corrupt temporary inode, either. 1428a133d952SDave Chinner * 1429a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source 1430a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is 1431a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on 1432a133d952SDave Chinner * userspace to get this right. 1433a133d952SDave Chinner */ 1434a133d952SDave Chinner static int 1435a133d952SDave Chinner xfs_swap_extents_check_format( 1436a133d952SDave Chinner xfs_inode_t *ip, /* target inode */ 1437a133d952SDave Chinner xfs_inode_t *tip) /* tmp inode */ 1438a133d952SDave Chinner { 1439a133d952SDave Chinner 1440a133d952SDave Chinner /* Should never get a local format */ 1441a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1442a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 14432451337dSDave Chinner return -EINVAL; 1444a133d952SDave Chinner 1445a133d952SDave Chinner /* 1446a133d952SDave Chinner * if the target inode has less extents that then temporary inode then 1447a133d952SDave Chinner * why did userspace call us? 1448a133d952SDave Chinner */ 1449a133d952SDave Chinner if (ip->i_d.di_nextents < tip->i_d.di_nextents) 14502451337dSDave Chinner return -EINVAL; 1451a133d952SDave Chinner 1452a133d952SDave Chinner /* 1453a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree 1454a133d952SDave Chinner * form then we will end up with the target inode in the wrong format 1455a133d952SDave Chinner * as we already know there are less extents in the temp inode. 1456a133d952SDave Chinner */ 1457a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1458a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 14592451337dSDave Chinner return -EINVAL; 1460a133d952SDave Chinner 1461a133d952SDave Chinner /* Check temp in extent form to max in target */ 1462a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1463a133d952SDave Chinner XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1464a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 14652451337dSDave Chinner return -EINVAL; 1466a133d952SDave Chinner 1467a133d952SDave Chinner /* Check target in extent form to max in temp */ 1468a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1469a133d952SDave Chinner XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1470a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 14712451337dSDave Chinner return -EINVAL; 1472a133d952SDave Chinner 1473a133d952SDave Chinner /* 1474a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit 1475a133d952SDave Chinner * in the target and that it has enough extents to be in btree format 1476a133d952SDave Chinner * in the target. 1477a133d952SDave Chinner * 1478a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions 1479a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in 1480a133d952SDave Chinner * extent format... 1481a133d952SDave Chinner */ 1482a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1483a133d952SDave Chinner if (XFS_IFORK_BOFF(ip) && 1484a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 14852451337dSDave Chinner return -EINVAL; 1486a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1487a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 14882451337dSDave Chinner return -EINVAL; 1489a133d952SDave Chinner } 1490a133d952SDave Chinner 1491a133d952SDave Chinner /* Reciprocal target->temp btree format checks */ 1492a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1493a133d952SDave Chinner if (XFS_IFORK_BOFF(tip) && 1494a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 14952451337dSDave Chinner return -EINVAL; 1496a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1497a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 14982451337dSDave Chinner return -EINVAL; 1499a133d952SDave Chinner } 1500a133d952SDave Chinner 1501a133d952SDave Chinner return 0; 1502a133d952SDave Chinner } 1503a133d952SDave Chinner 15047abbb8f9SDave Chinner static int 15054ef897a2SDave Chinner xfs_swap_extent_flush( 15064ef897a2SDave Chinner struct xfs_inode *ip) 15074ef897a2SDave Chinner { 15084ef897a2SDave Chinner int error; 15094ef897a2SDave Chinner 15104ef897a2SDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 15114ef897a2SDave Chinner if (error) 15124ef897a2SDave Chinner return error; 15134ef897a2SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1); 15144ef897a2SDave Chinner 15154ef897a2SDave Chinner /* Verify O_DIRECT for ftmp */ 15164ef897a2SDave Chinner if (VFS_I(ip)->i_mapping->nrpages) 15174ef897a2SDave Chinner return -EINVAL; 15184ef897a2SDave Chinner return 0; 15194ef897a2SDave Chinner } 15204ef897a2SDave Chinner 15214ef897a2SDave Chinner int 1522a133d952SDave Chinner xfs_swap_extents( 1523a133d952SDave Chinner xfs_inode_t *ip, /* target inode */ 1524a133d952SDave Chinner xfs_inode_t *tip, /* tmp inode */ 1525a133d952SDave Chinner xfs_swapext_t *sxp) 1526a133d952SDave Chinner { 1527a133d952SDave Chinner xfs_mount_t *mp = ip->i_mount; 1528a133d952SDave Chinner xfs_trans_t *tp; 1529a133d952SDave Chinner xfs_bstat_t *sbp = &sxp->sx_stat; 1530a133d952SDave Chinner xfs_ifork_t *tempifp, *ifp, *tifp; 1531a133d952SDave Chinner int src_log_flags, target_log_flags; 1532a133d952SDave Chinner int error = 0; 1533a133d952SDave Chinner int aforkblks = 0; 1534a133d952SDave Chinner int taforkblks = 0; 1535a133d952SDave Chinner __uint64_t tmp; 153681217683SDave Chinner int lock_flags; 1537a133d952SDave Chinner 1538a133d952SDave Chinner tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 1539a133d952SDave Chinner if (!tempifp) { 15402451337dSDave Chinner error = -ENOMEM; 1541a133d952SDave Chinner goto out; 1542a133d952SDave Chinner } 1543a133d952SDave Chinner 1544a133d952SDave Chinner /* 1545723cac48SDave Chinner * Lock the inodes against other IO, page faults and truncate to 1546723cac48SDave Chinner * begin with. Then we can ensure the inodes are flushed and have no 1547723cac48SDave Chinner * page cache safely. Once we have done this we can take the ilocks and 1548723cac48SDave Chinner * do the rest of the checks. 1549a133d952SDave Chinner */ 1550723cac48SDave Chinner lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 1551a133d952SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1552723cac48SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); 1553a133d952SDave Chinner 1554a133d952SDave Chinner /* Verify that both files have the same format */ 1555c19b3b05SDave Chinner if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { 15562451337dSDave Chinner error = -EINVAL; 1557a133d952SDave Chinner goto out_unlock; 1558a133d952SDave Chinner } 1559a133d952SDave Chinner 1560a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */ 1561a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 15622451337dSDave Chinner error = -EINVAL; 1563a133d952SDave Chinner goto out_unlock; 1564a133d952SDave Chinner } 1565a133d952SDave Chinner 15664ef897a2SDave Chinner error = xfs_swap_extent_flush(ip); 1567a133d952SDave Chinner if (error) 1568a133d952SDave Chinner goto out_unlock; 15694ef897a2SDave Chinner error = xfs_swap_extent_flush(tip); 15704ef897a2SDave Chinner if (error) 15714ef897a2SDave Chinner goto out_unlock; 1572a133d952SDave Chinner 1573253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); 1574253f4911SChristoph Hellwig if (error) 1575a133d952SDave Chinner goto out_unlock; 1576723cac48SDave Chinner 1577723cac48SDave Chinner /* 1578723cac48SDave Chinner * Lock and join the inodes to the tansaction so that transaction commit 1579723cac48SDave Chinner * or cancel will unlock the inodes from this point onwards. 1580723cac48SDave Chinner */ 15814ef897a2SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 15824ef897a2SDave Chinner lock_flags |= XFS_ILOCK_EXCL; 1583723cac48SDave Chinner xfs_trans_ijoin(tp, ip, lock_flags); 1584723cac48SDave Chinner xfs_trans_ijoin(tp, tip, lock_flags); 1585723cac48SDave Chinner 1586a133d952SDave Chinner 1587a133d952SDave Chinner /* Verify all data are being swapped */ 1588a133d952SDave Chinner if (sxp->sx_offset != 0 || 1589a133d952SDave Chinner sxp->sx_length != ip->i_d.di_size || 1590a133d952SDave Chinner sxp->sx_length != tip->i_d.di_size) { 15912451337dSDave Chinner error = -EFAULT; 15924ef897a2SDave Chinner goto out_trans_cancel; 1593a133d952SDave Chinner } 1594a133d952SDave Chinner 1595a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0); 1596a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1); 1597a133d952SDave Chinner 1598a133d952SDave Chinner /* check inode formats now that data is flushed */ 1599a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip); 1600a133d952SDave Chinner if (error) { 1601a133d952SDave Chinner xfs_notice(mp, 1602a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.", 1603a133d952SDave Chinner __func__, ip->i_ino); 16044ef897a2SDave Chinner goto out_trans_cancel; 1605a133d952SDave Chinner } 1606a133d952SDave Chinner 1607a133d952SDave Chinner /* 1608a133d952SDave Chinner * Compare the current change & modify times with that 1609a133d952SDave Chinner * passed in. If they differ, we abort this swap. 1610a133d952SDave Chinner * This is the mechanism used to ensure the calling 1611a133d952SDave Chinner * process that the file was not changed out from 1612a133d952SDave Chinner * under it. 1613a133d952SDave Chinner */ 1614a133d952SDave Chinner if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || 1615a133d952SDave Chinner (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 1616a133d952SDave Chinner (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 1617a133d952SDave Chinner (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 16182451337dSDave Chinner error = -EBUSY; 161981217683SDave Chinner goto out_trans_cancel; 1620a133d952SDave Chinner } 1621a133d952SDave Chinner /* 1622a133d952SDave Chinner * Count the number of extended attribute blocks 1623a133d952SDave Chinner */ 1624a133d952SDave Chinner if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 1625a133d952SDave Chinner (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1626a133d952SDave Chinner error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); 1627a133d952SDave Chinner if (error) 1628a133d952SDave Chinner goto out_trans_cancel; 1629a133d952SDave Chinner } 1630a133d952SDave Chinner if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 1631a133d952SDave Chinner (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1632a133d952SDave Chinner error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, 1633a133d952SDave Chinner &taforkblks); 1634a133d952SDave Chinner if (error) 1635a133d952SDave Chinner goto out_trans_cancel; 1636a133d952SDave Chinner } 1637a133d952SDave Chinner 163821b5c978SDave Chinner /* 163921b5c978SDave Chinner * Before we've swapped the forks, lets set the owners of the forks 164021b5c978SDave Chinner * appropriately. We have to do this as we are demand paging the btree 164121b5c978SDave Chinner * buffers, and so the validation done on read will expect the owner 164221b5c978SDave Chinner * field to be correctly set. Once we change the owners, we can swap the 164321b5c978SDave Chinner * inode forks. 164421b5c978SDave Chinner * 164521b5c978SDave Chinner * Note the trickiness in setting the log flags - we set the owner log 164621b5c978SDave Chinner * flag on the opposite inode (i.e. the inode we are setting the new 164721b5c978SDave Chinner * owner to be) because once we swap the forks and log that, log 164821b5c978SDave Chinner * recovery is going to see the fork as owned by the swapped inode, 164921b5c978SDave Chinner * not the pre-swapped inodes. 165021b5c978SDave Chinner */ 165121b5c978SDave Chinner src_log_flags = XFS_ILOG_CORE; 165221b5c978SDave Chinner target_log_flags = XFS_ILOG_CORE; 165321b5c978SDave Chinner if (ip->i_d.di_version == 3 && 165421b5c978SDave Chinner ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1655638f4416SDave Chinner target_log_flags |= XFS_ILOG_DOWNER; 1656638f4416SDave Chinner error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, 1657638f4416SDave Chinner tip->i_ino, NULL); 165821b5c978SDave Chinner if (error) 165921b5c978SDave Chinner goto out_trans_cancel; 166021b5c978SDave Chinner } 166121b5c978SDave Chinner 166221b5c978SDave Chinner if (tip->i_d.di_version == 3 && 166321b5c978SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1664638f4416SDave Chinner src_log_flags |= XFS_ILOG_DOWNER; 1665638f4416SDave Chinner error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, 1666638f4416SDave Chinner ip->i_ino, NULL); 166721b5c978SDave Chinner if (error) 166821b5c978SDave Chinner goto out_trans_cancel; 166921b5c978SDave Chinner } 167021b5c978SDave Chinner 1671a133d952SDave Chinner /* 1672a133d952SDave Chinner * Swap the data forks of the inodes 1673a133d952SDave Chinner */ 1674a133d952SDave Chinner ifp = &ip->i_df; 1675a133d952SDave Chinner tifp = &tip->i_df; 1676a133d952SDave Chinner *tempifp = *ifp; /* struct copy */ 1677a133d952SDave Chinner *ifp = *tifp; /* struct copy */ 1678a133d952SDave Chinner *tifp = *tempifp; /* struct copy */ 1679a133d952SDave Chinner 1680a133d952SDave Chinner /* 1681a133d952SDave Chinner * Fix the on-disk inode values 1682a133d952SDave Chinner */ 1683a133d952SDave Chinner tmp = (__uint64_t)ip->i_d.di_nblocks; 1684a133d952SDave Chinner ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 1685a133d952SDave Chinner tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 1686a133d952SDave Chinner 1687a133d952SDave Chinner tmp = (__uint64_t) ip->i_d.di_nextents; 1688a133d952SDave Chinner ip->i_d.di_nextents = tip->i_d.di_nextents; 1689a133d952SDave Chinner tip->i_d.di_nextents = tmp; 1690a133d952SDave Chinner 1691a133d952SDave Chinner tmp = (__uint64_t) ip->i_d.di_format; 1692a133d952SDave Chinner ip->i_d.di_format = tip->i_d.di_format; 1693a133d952SDave Chinner tip->i_d.di_format = tmp; 1694a133d952SDave Chinner 1695a133d952SDave Chinner /* 1696a133d952SDave Chinner * The extents in the source inode could still contain speculative 1697a133d952SDave Chinner * preallocation beyond EOF (e.g. the file is open but not modified 1698a133d952SDave Chinner * while defrag is in progress). In that case, we need to copy over the 1699a133d952SDave Chinner * number of delalloc blocks the data fork in the source inode is 1700a133d952SDave Chinner * tracking beyond EOF so that when the fork is truncated away when the 1701a133d952SDave Chinner * temporary inode is unlinked we don't underrun the i_delayed_blks 1702a133d952SDave Chinner * counter on that inode. 1703a133d952SDave Chinner */ 1704a133d952SDave Chinner ASSERT(tip->i_delayed_blks == 0); 1705a133d952SDave Chinner tip->i_delayed_blks = ip->i_delayed_blks; 1706a133d952SDave Chinner ip->i_delayed_blks = 0; 1707a133d952SDave Chinner 1708a133d952SDave Chinner switch (ip->i_d.di_format) { 1709a133d952SDave Chinner case XFS_DINODE_FMT_EXTENTS: 1710a133d952SDave Chinner /* If the extents fit in the inode, fix the 1711a133d952SDave Chinner * pointer. Otherwise it's already NULL or 1712a133d952SDave Chinner * pointing to the extent. 1713a133d952SDave Chinner */ 1714a133d952SDave Chinner if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1715a133d952SDave Chinner ifp->if_u1.if_extents = 1716a133d952SDave Chinner ifp->if_u2.if_inline_ext; 1717a133d952SDave Chinner } 1718a133d952SDave Chinner src_log_flags |= XFS_ILOG_DEXT; 1719a133d952SDave Chinner break; 1720a133d952SDave Chinner case XFS_DINODE_FMT_BTREE: 172121b5c978SDave Chinner ASSERT(ip->i_d.di_version < 3 || 1722638f4416SDave Chinner (src_log_flags & XFS_ILOG_DOWNER)); 1723a133d952SDave Chinner src_log_flags |= XFS_ILOG_DBROOT; 1724a133d952SDave Chinner break; 1725a133d952SDave Chinner } 1726a133d952SDave Chinner 1727a133d952SDave Chinner switch (tip->i_d.di_format) { 1728a133d952SDave Chinner case XFS_DINODE_FMT_EXTENTS: 1729a133d952SDave Chinner /* If the extents fit in the inode, fix the 1730a133d952SDave Chinner * pointer. Otherwise it's already NULL or 1731a133d952SDave Chinner * pointing to the extent. 1732a133d952SDave Chinner */ 1733a133d952SDave Chinner if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1734a133d952SDave Chinner tifp->if_u1.if_extents = 1735a133d952SDave Chinner tifp->if_u2.if_inline_ext; 1736a133d952SDave Chinner } 1737a133d952SDave Chinner target_log_flags |= XFS_ILOG_DEXT; 1738a133d952SDave Chinner break; 1739a133d952SDave Chinner case XFS_DINODE_FMT_BTREE: 1740a133d952SDave Chinner target_log_flags |= XFS_ILOG_DBROOT; 174121b5c978SDave Chinner ASSERT(tip->i_d.di_version < 3 || 1742638f4416SDave Chinner (target_log_flags & XFS_ILOG_DOWNER)); 1743a133d952SDave Chinner break; 1744a133d952SDave Chinner } 1745a133d952SDave Chinner 1746a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags); 1747a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags); 1748a133d952SDave Chinner 1749a133d952SDave Chinner /* 1750a133d952SDave Chinner * If this is a synchronous mount, make sure that the 1751a133d952SDave Chinner * transaction goes to disk before returning to the user. 1752a133d952SDave Chinner */ 1753a133d952SDave Chinner if (mp->m_flags & XFS_MOUNT_WSYNC) 1754a133d952SDave Chinner xfs_trans_set_sync(tp); 1755a133d952SDave Chinner 175670393313SChristoph Hellwig error = xfs_trans_commit(tp); 1757a133d952SDave Chinner 1758a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0); 1759a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1); 1760a133d952SDave Chinner out: 1761a133d952SDave Chinner kmem_free(tempifp); 1762a133d952SDave Chinner return error; 1763a133d952SDave Chinner 1764a133d952SDave Chinner out_unlock: 176581217683SDave Chinner xfs_iunlock(ip, lock_flags); 176681217683SDave Chinner xfs_iunlock(tip, lock_flags); 1767a133d952SDave Chinner goto out; 1768a133d952SDave Chinner 1769a133d952SDave Chinner out_trans_cancel: 17704906e215SChristoph Hellwig xfs_trans_cancel(tp); 1771723cac48SDave Chinner goto out; 1772a133d952SDave Chinner } 1773