168988114SDave Chinner /* 268988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc. 468988114SDave Chinner * All Rights Reserved. 568988114SDave Chinner * 668988114SDave Chinner * This program is free software; you can redistribute it and/or 768988114SDave Chinner * modify it under the terms of the GNU General Public License as 868988114SDave Chinner * published by the Free Software Foundation. 968988114SDave Chinner * 1068988114SDave Chinner * This program is distributed in the hope that it would be useful, 1168988114SDave Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 1268988114SDave Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1368988114SDave Chinner * GNU General Public License for more details. 1468988114SDave Chinner * 1568988114SDave Chinner * You should have received a copy of the GNU General Public License 1668988114SDave Chinner * along with this program; if not, write the Free Software Foundation, 1768988114SDave Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 1868988114SDave Chinner */ 1968988114SDave Chinner #include "xfs.h" 2068988114SDave Chinner #include "xfs_fs.h" 2170a9883cSDave Chinner #include "xfs_shared.h" 22239880efSDave Chinner #include "xfs_format.h" 23239880efSDave Chinner #include "xfs_log_format.h" 24239880efSDave Chinner #include "xfs_trans_resv.h" 2568988114SDave Chinner #include "xfs_bit.h" 2668988114SDave Chinner #include "xfs_mount.h" 2757062787SDave Chinner #include "xfs_da_format.h" 283ab78df2SDarrick J. Wong #include "xfs_defer.h" 2968988114SDave Chinner #include "xfs_inode.h" 3068988114SDave Chinner #include "xfs_btree.h" 31239880efSDave Chinner #include "xfs_trans.h" 3268988114SDave Chinner #include "xfs_extfree_item.h" 3368988114SDave Chinner #include "xfs_alloc.h" 3468988114SDave Chinner #include "xfs_bmap.h" 3568988114SDave Chinner #include "xfs_bmap_util.h" 36a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 3768988114SDave Chinner #include "xfs_rtalloc.h" 3868988114SDave Chinner #include "xfs_error.h" 3968988114SDave Chinner #include "xfs_quota.h" 4068988114SDave Chinner #include "xfs_trans_space.h" 4168988114SDave Chinner #include "xfs_trace.h" 42c24b5dfaSDave Chinner #include "xfs_icache.h" 43239880efSDave Chinner #include "xfs_log.h" 449c194644SDarrick J. Wong #include "xfs_rmap_btree.h" 45f86f4037SDarrick J. Wong #include "xfs_iomap.h" 46f86f4037SDarrick J. Wong #include "xfs_reflink.h" 47f86f4037SDarrick J. Wong #include "xfs_refcount.h" 4868988114SDave Chinner 4968988114SDave Chinner /* Kernel only BMAP related definitions and functions */ 5068988114SDave Chinner 5168988114SDave Chinner /* 5268988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it 5368988114SDave Chinner * differently based on whether the file is a real time file or not, because the 5468988114SDave Chinner * bmap code does. 5568988114SDave Chinner */ 5668988114SDave Chinner xfs_daddr_t 5768988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) 5868988114SDave Chinner { 5968988114SDave Chinner return (XFS_IS_REALTIME_INODE(ip) ? \ 6068988114SDave Chinner (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ 6168988114SDave Chinner XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); 6268988114SDave Chinner } 6368988114SDave Chinner 6468988114SDave Chinner /* 653fbbbea3SDave Chinner * Routine to zero an extent on disk allocated to the specific inode. 663fbbbea3SDave Chinner * 673fbbbea3SDave Chinner * The VFS functions take a linearised filesystem block offset, so we have to 683fbbbea3SDave Chinner * convert the sparse xfs fsb to the right format first. 693fbbbea3SDave Chinner * VFS types are real funky, too. 703fbbbea3SDave Chinner */ 713fbbbea3SDave Chinner int 723fbbbea3SDave Chinner xfs_zero_extent( 733fbbbea3SDave Chinner struct xfs_inode *ip, 743fbbbea3SDave Chinner xfs_fsblock_t start_fsb, 753fbbbea3SDave Chinner xfs_off_t count_fsb) 763fbbbea3SDave Chinner { 773fbbbea3SDave Chinner struct xfs_mount *mp = ip->i_mount; 783fbbbea3SDave Chinner xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); 793fbbbea3SDave Chinner sector_t block = XFS_BB_TO_FSBT(mp, sector); 803fbbbea3SDave Chinner 813dc29161SMatthew Wilcox return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), 823dc29161SMatthew Wilcox block << (mp->m_super->s_blocksize_bits - 9), 833dc29161SMatthew Wilcox count_fsb << (mp->m_super->s_blocksize_bits - 9), 843dc29161SMatthew Wilcox GFP_NOFS, true); 853fbbbea3SDave Chinner } 863fbbbea3SDave Chinner 8768988114SDave Chinner int 8868988114SDave Chinner xfs_bmap_rtalloc( 8968988114SDave Chinner struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 9068988114SDave Chinner { 9168988114SDave Chinner xfs_alloctype_t atype = 0; /* type for allocation routines */ 9268988114SDave Chinner int error; /* error return value */ 9368988114SDave Chinner xfs_mount_t *mp; /* mount point structure */ 9468988114SDave Chinner xfs_extlen_t prod = 0; /* product factor for allocators */ 9568988114SDave Chinner xfs_extlen_t ralen = 0; /* realtime allocation length */ 9668988114SDave Chinner xfs_extlen_t align; /* minimum allocation alignment */ 9768988114SDave Chinner xfs_rtblock_t rtb; 9868988114SDave Chinner 9968988114SDave Chinner mp = ap->ip->i_mount; 10068988114SDave Chinner align = xfs_get_extsz_hint(ap->ip); 10168988114SDave Chinner prod = align / mp->m_sb.sb_rextsize; 10268988114SDave Chinner error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 10368988114SDave Chinner align, 1, ap->eof, 0, 10468988114SDave Chinner ap->conv, &ap->offset, &ap->length); 10568988114SDave Chinner if (error) 10668988114SDave Chinner return error; 10768988114SDave Chinner ASSERT(ap->length); 10868988114SDave Chinner ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); 10968988114SDave Chinner 11068988114SDave Chinner /* 11168988114SDave Chinner * If the offset & length are not perfectly aligned 11268988114SDave Chinner * then kill prod, it will just get us in trouble. 11368988114SDave Chinner */ 11468988114SDave Chinner if (do_mod(ap->offset, align) || ap->length % align) 11568988114SDave Chinner prod = 1; 11668988114SDave Chinner /* 11768988114SDave Chinner * Set ralen to be the actual requested length in rtextents. 11868988114SDave Chinner */ 11968988114SDave Chinner ralen = ap->length / mp->m_sb.sb_rextsize; 12068988114SDave Chinner /* 12168988114SDave Chinner * If the old value was close enough to MAXEXTLEN that 12268988114SDave Chinner * we rounded up to it, cut it back so it's valid again. 12368988114SDave Chinner * Note that if it's a really large request (bigger than 12468988114SDave Chinner * MAXEXTLEN), we don't hear about that number, and can't 12568988114SDave Chinner * adjust the starting point to match it. 12668988114SDave Chinner */ 12768988114SDave Chinner if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 12868988114SDave Chinner ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 12968988114SDave Chinner 13068988114SDave Chinner /* 1314b680afbSDave Chinner * Lock out modifications to both the RT bitmap and summary inodes 13268988114SDave Chinner */ 133f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 13468988114SDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 135f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 1364b680afbSDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 13768988114SDave Chinner 13868988114SDave Chinner /* 13968988114SDave Chinner * If it's an allocation to an empty file at offset 0, 14068988114SDave Chinner * pick an extent that will space things out in the rt area. 14168988114SDave Chinner */ 14268988114SDave Chinner if (ap->eof && ap->offset == 0) { 14368988114SDave Chinner xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ 14468988114SDave Chinner 14568988114SDave Chinner error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 14668988114SDave Chinner if (error) 14768988114SDave Chinner return error; 14868988114SDave Chinner ap->blkno = rtx * mp->m_sb.sb_rextsize; 14968988114SDave Chinner } else { 15068988114SDave Chinner ap->blkno = 0; 15168988114SDave Chinner } 15268988114SDave Chinner 15368988114SDave Chinner xfs_bmap_adjacent(ap); 15468988114SDave Chinner 15568988114SDave Chinner /* 15668988114SDave Chinner * Realtime allocation, done through xfs_rtallocate_extent. 15768988114SDave Chinner */ 15868988114SDave Chinner atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; 15968988114SDave Chinner do_div(ap->blkno, mp->m_sb.sb_rextsize); 16068988114SDave Chinner rtb = ap->blkno; 16168988114SDave Chinner ap->length = ralen; 16268988114SDave Chinner if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, 16368988114SDave Chinner &ralen, atype, ap->wasdel, prod, &rtb))) 16468988114SDave Chinner return error; 16568988114SDave Chinner if (rtb == NULLFSBLOCK && prod > 1 && 16668988114SDave Chinner (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, 16768988114SDave Chinner ap->length, &ralen, atype, 16868988114SDave Chinner ap->wasdel, 1, &rtb))) 16968988114SDave Chinner return error; 17068988114SDave Chinner ap->blkno = rtb; 17168988114SDave Chinner if (ap->blkno != NULLFSBLOCK) { 17268988114SDave Chinner ap->blkno *= mp->m_sb.sb_rextsize; 17368988114SDave Chinner ralen *= mp->m_sb.sb_rextsize; 17468988114SDave Chinner ap->length = ralen; 17568988114SDave Chinner ap->ip->i_d.di_nblocks += ralen; 17668988114SDave Chinner xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 17768988114SDave Chinner if (ap->wasdel) 17868988114SDave Chinner ap->ip->i_delayed_blks -= ralen; 17968988114SDave Chinner /* 18068988114SDave Chinner * Adjust the disk quota also. This was reserved 18168988114SDave Chinner * earlier. 18268988114SDave Chinner */ 18368988114SDave Chinner xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 18468988114SDave Chinner ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 18568988114SDave Chinner XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 1863fbbbea3SDave Chinner 1873fbbbea3SDave Chinner /* Zero the extent if we were asked to do so */ 188292378edSDave Chinner if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) { 1893fbbbea3SDave Chinner error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); 1903fbbbea3SDave Chinner if (error) 1913fbbbea3SDave Chinner return error; 1923fbbbea3SDave Chinner } 19368988114SDave Chinner } else { 19468988114SDave Chinner ap->length = 0; 19568988114SDave Chinner } 19668988114SDave Chinner return 0; 19768988114SDave Chinner } 19868988114SDave Chinner 19968988114SDave Chinner /* 20068988114SDave Chinner * Check if the endoff is outside the last extent. If so the caller will grow 20168988114SDave Chinner * the allocation to a stripe unit boundary. All offsets are considered outside 20268988114SDave Chinner * the end of file for an empty fork, so 1 is returned in *eof in that case. 20368988114SDave Chinner */ 20468988114SDave Chinner int 20568988114SDave Chinner xfs_bmap_eof( 20668988114SDave Chinner struct xfs_inode *ip, 20768988114SDave Chinner xfs_fileoff_t endoff, 20868988114SDave Chinner int whichfork, 20968988114SDave Chinner int *eof) 21068988114SDave Chinner { 21168988114SDave Chinner struct xfs_bmbt_irec rec; 21268988114SDave Chinner int error; 21368988114SDave Chinner 21468988114SDave Chinner error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); 21568988114SDave Chinner if (error || *eof) 21668988114SDave Chinner return error; 21768988114SDave Chinner 21868988114SDave Chinner *eof = endoff >= rec.br_startoff + rec.br_blockcount; 21968988114SDave Chinner return 0; 22068988114SDave Chinner } 22168988114SDave Chinner 22268988114SDave Chinner /* 22368988114SDave Chinner * Extent tree block counting routines. 22468988114SDave Chinner */ 22568988114SDave Chinner 22668988114SDave Chinner /* 22768988114SDave Chinner * Count leaf blocks given a range of extent records. 22868988114SDave Chinner */ 22968988114SDave Chinner STATIC void 23068988114SDave Chinner xfs_bmap_count_leaves( 23168988114SDave Chinner xfs_ifork_t *ifp, 23268988114SDave Chinner xfs_extnum_t idx, 23368988114SDave Chinner int numrecs, 23468988114SDave Chinner int *count) 23568988114SDave Chinner { 23668988114SDave Chinner int b; 23768988114SDave Chinner 23868988114SDave Chinner for (b = 0; b < numrecs; b++) { 23968988114SDave Chinner xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); 24068988114SDave Chinner *count += xfs_bmbt_get_blockcount(frp); 24168988114SDave Chinner } 24268988114SDave Chinner } 24368988114SDave Chinner 24468988114SDave Chinner /* 24568988114SDave Chinner * Count leaf blocks given a range of extent records originally 24668988114SDave Chinner * in btree format. 24768988114SDave Chinner */ 24868988114SDave Chinner STATIC void 24968988114SDave Chinner xfs_bmap_disk_count_leaves( 25068988114SDave Chinner struct xfs_mount *mp, 25168988114SDave Chinner struct xfs_btree_block *block, 25268988114SDave Chinner int numrecs, 25368988114SDave Chinner int *count) 25468988114SDave Chinner { 25568988114SDave Chinner int b; 25668988114SDave Chinner xfs_bmbt_rec_t *frp; 25768988114SDave Chinner 25868988114SDave Chinner for (b = 1; b <= numrecs; b++) { 25968988114SDave Chinner frp = XFS_BMBT_REC_ADDR(mp, block, b); 26068988114SDave Chinner *count += xfs_bmbt_disk_get_blockcount(frp); 26168988114SDave Chinner } 26268988114SDave Chinner } 26368988114SDave Chinner 26468988114SDave Chinner /* 26568988114SDave Chinner * Recursively walks each level of a btree 2668be11e92SZhi Yong Wu * to count total fsblocks in use. 26768988114SDave Chinner */ 26868988114SDave Chinner STATIC int /* error */ 26968988114SDave Chinner xfs_bmap_count_tree( 27068988114SDave Chinner xfs_mount_t *mp, /* file system mount point */ 27168988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 27268988114SDave Chinner xfs_ifork_t *ifp, /* inode fork pointer */ 27368988114SDave Chinner xfs_fsblock_t blockno, /* file system block number */ 27468988114SDave Chinner int levelin, /* level in btree */ 27568988114SDave Chinner int *count) /* Count of blocks */ 27668988114SDave Chinner { 27768988114SDave Chinner int error; 27868988114SDave Chinner xfs_buf_t *bp, *nbp; 27968988114SDave Chinner int level = levelin; 28068988114SDave Chinner __be64 *pp; 28168988114SDave Chinner xfs_fsblock_t bno = blockno; 28268988114SDave Chinner xfs_fsblock_t nextbno; 28368988114SDave Chinner struct xfs_btree_block *block, *nextblock; 28468988114SDave Chinner int numrecs; 28568988114SDave Chinner 28668988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 28768988114SDave Chinner &xfs_bmbt_buf_ops); 28868988114SDave Chinner if (error) 28968988114SDave Chinner return error; 29068988114SDave Chinner *count += 1; 29168988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 29268988114SDave Chinner 29368988114SDave Chinner if (--level) { 29468988114SDave Chinner /* Not at node above leaves, count this level of nodes */ 29568988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 29668988114SDave Chinner while (nextbno != NULLFSBLOCK) { 29768988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 29868988114SDave Chinner XFS_BMAP_BTREE_REF, 29968988114SDave Chinner &xfs_bmbt_buf_ops); 30068988114SDave Chinner if (error) 30168988114SDave Chinner return error; 30268988114SDave Chinner *count += 1; 30368988114SDave Chinner nextblock = XFS_BUF_TO_BLOCK(nbp); 30468988114SDave Chinner nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); 30568988114SDave Chinner xfs_trans_brelse(tp, nbp); 30668988114SDave Chinner } 30768988114SDave Chinner 30868988114SDave Chinner /* Dive to the next level */ 30968988114SDave Chinner pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 31068988114SDave Chinner bno = be64_to_cpu(*pp); 31168988114SDave Chinner if (unlikely((error = 31268988114SDave Chinner xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 31368988114SDave Chinner xfs_trans_brelse(tp, bp); 31468988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", 31568988114SDave Chinner XFS_ERRLEVEL_LOW, mp); 3162451337dSDave Chinner return -EFSCORRUPTED; 31768988114SDave Chinner } 31868988114SDave Chinner xfs_trans_brelse(tp, bp); 31968988114SDave Chinner } else { 32068988114SDave Chinner /* count all level 1 nodes and their leaves */ 32168988114SDave Chinner for (;;) { 32268988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 32368988114SDave Chinner numrecs = be16_to_cpu(block->bb_numrecs); 32468988114SDave Chinner xfs_bmap_disk_count_leaves(mp, block, numrecs, count); 32568988114SDave Chinner xfs_trans_brelse(tp, bp); 32668988114SDave Chinner if (nextbno == NULLFSBLOCK) 32768988114SDave Chinner break; 32868988114SDave Chinner bno = nextbno; 32968988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 33068988114SDave Chinner XFS_BMAP_BTREE_REF, 33168988114SDave Chinner &xfs_bmbt_buf_ops); 33268988114SDave Chinner if (error) 33368988114SDave Chinner return error; 33468988114SDave Chinner *count += 1; 33568988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 33668988114SDave Chinner } 33768988114SDave Chinner } 33868988114SDave Chinner return 0; 33968988114SDave Chinner } 34068988114SDave Chinner 34168988114SDave Chinner /* 34268988114SDave Chinner * Count fsblocks of the given fork. 34368988114SDave Chinner */ 3440d5a75e9SEric Sandeen static int /* error */ 34568988114SDave Chinner xfs_bmap_count_blocks( 34668988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 34768988114SDave Chinner xfs_inode_t *ip, /* incore inode */ 34868988114SDave Chinner int whichfork, /* data or attr fork */ 34968988114SDave Chinner int *count) /* out: count of blocks */ 35068988114SDave Chinner { 35168988114SDave Chinner struct xfs_btree_block *block; /* current btree block */ 35268988114SDave Chinner xfs_fsblock_t bno; /* block # of "block" */ 35368988114SDave Chinner xfs_ifork_t *ifp; /* fork structure */ 35468988114SDave Chinner int level; /* btree level, for checking */ 35568988114SDave Chinner xfs_mount_t *mp; /* file system mount structure */ 35668988114SDave Chinner __be64 *pp; /* pointer to block address */ 35768988114SDave Chinner 35868988114SDave Chinner bno = NULLFSBLOCK; 35968988114SDave Chinner mp = ip->i_mount; 36068988114SDave Chinner ifp = XFS_IFORK_PTR(ip, whichfork); 36168988114SDave Chinner if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { 36268988114SDave Chinner xfs_bmap_count_leaves(ifp, 0, 36368988114SDave Chinner ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), 36468988114SDave Chinner count); 36568988114SDave Chinner return 0; 36668988114SDave Chinner } 36768988114SDave Chinner 36868988114SDave Chinner /* 36968988114SDave Chinner * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 37068988114SDave Chinner */ 37168988114SDave Chinner block = ifp->if_broot; 37268988114SDave Chinner level = be16_to_cpu(block->bb_level); 37368988114SDave Chinner ASSERT(level > 0); 37468988114SDave Chinner pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 37568988114SDave Chinner bno = be64_to_cpu(*pp); 376d5cf09baSChristoph Hellwig ASSERT(bno != NULLFSBLOCK); 37768988114SDave Chinner ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 37868988114SDave Chinner ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 37968988114SDave Chinner 38068988114SDave Chinner if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { 38168988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, 38268988114SDave Chinner mp); 3832451337dSDave Chinner return -EFSCORRUPTED; 38468988114SDave Chinner } 38568988114SDave Chinner 38668988114SDave Chinner return 0; 38768988114SDave Chinner } 38868988114SDave Chinner 38968988114SDave Chinner /* 39068988114SDave Chinner * returns 1 for success, 0 if we failed to map the extent. 39168988114SDave Chinner */ 39268988114SDave Chinner STATIC int 39368988114SDave Chinner xfs_getbmapx_fix_eof_hole( 39468988114SDave Chinner xfs_inode_t *ip, /* xfs incore inode pointer */ 395f86f4037SDarrick J. Wong int whichfork, 39668988114SDave Chinner struct getbmapx *out, /* output structure */ 39768988114SDave Chinner int prealloced, /* this is a file with 39868988114SDave Chinner * preallocated data space */ 39968988114SDave Chinner __int64_t end, /* last block requested */ 400f86f4037SDarrick J. Wong xfs_fsblock_t startblock, 401f86f4037SDarrick J. Wong bool moretocome) 40268988114SDave Chinner { 40368988114SDave Chinner __int64_t fixlen; 40468988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 40568988114SDave Chinner xfs_ifork_t *ifp; /* inode fork pointer */ 40668988114SDave Chinner xfs_extnum_t lastx; /* last extent pointer */ 40768988114SDave Chinner xfs_fileoff_t fileblock; 40868988114SDave Chinner 40968988114SDave Chinner if (startblock == HOLESTARTBLOCK) { 41068988114SDave Chinner mp = ip->i_mount; 41168988114SDave Chinner out->bmv_block = -1; 41268988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 41368988114SDave Chinner fixlen -= out->bmv_offset; 41468988114SDave Chinner if (prealloced && out->bmv_offset + out->bmv_length == end) { 41568988114SDave Chinner /* Came to hole at EOF. Trim it. */ 41668988114SDave Chinner if (fixlen <= 0) 41768988114SDave Chinner return 0; 41868988114SDave Chinner out->bmv_length = fixlen; 41968988114SDave Chinner } 42068988114SDave Chinner } else { 42168988114SDave Chinner if (startblock == DELAYSTARTBLOCK) 42268988114SDave Chinner out->bmv_block = -2; 42368988114SDave Chinner else 42468988114SDave Chinner out->bmv_block = xfs_fsb_to_db(ip, startblock); 42568988114SDave Chinner fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); 426f86f4037SDarrick J. Wong ifp = XFS_IFORK_PTR(ip, whichfork); 427f86f4037SDarrick J. Wong if (!moretocome && 428f86f4037SDarrick J. Wong xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && 42968988114SDave Chinner (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) 43068988114SDave Chinner out->bmv_oflags |= BMV_OF_LAST; 43168988114SDave Chinner } 43268988114SDave Chinner 43368988114SDave Chinner return 1; 43468988114SDave Chinner } 43568988114SDave Chinner 436f86f4037SDarrick J. Wong /* Adjust the reported bmap around shared/unshared extent transitions. */ 437f86f4037SDarrick J. Wong STATIC int 438f86f4037SDarrick J. Wong xfs_getbmap_adjust_shared( 439f86f4037SDarrick J. Wong struct xfs_inode *ip, 440f86f4037SDarrick J. Wong int whichfork, 441f86f4037SDarrick J. Wong struct xfs_bmbt_irec *map, 442f86f4037SDarrick J. Wong struct getbmapx *out, 443f86f4037SDarrick J. Wong struct xfs_bmbt_irec *next_map) 444f86f4037SDarrick J. Wong { 445f86f4037SDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 446f86f4037SDarrick J. Wong xfs_agnumber_t agno; 447f86f4037SDarrick J. Wong xfs_agblock_t agbno; 448f86f4037SDarrick J. Wong xfs_agblock_t ebno; 449f86f4037SDarrick J. Wong xfs_extlen_t elen; 450f86f4037SDarrick J. Wong xfs_extlen_t nlen; 451f86f4037SDarrick J. Wong int error; 452f86f4037SDarrick J. Wong 453f86f4037SDarrick J. Wong next_map->br_startblock = NULLFSBLOCK; 454f86f4037SDarrick J. Wong next_map->br_startoff = NULLFILEOFF; 455f86f4037SDarrick J. Wong next_map->br_blockcount = 0; 456f86f4037SDarrick J. Wong 457f86f4037SDarrick J. Wong /* Only written data blocks can be shared. */ 458f86f4037SDarrick J. Wong if (!xfs_is_reflink_inode(ip) || whichfork != XFS_DATA_FORK || 459f86f4037SDarrick J. Wong map->br_startblock == DELAYSTARTBLOCK || 460f86f4037SDarrick J. Wong map->br_startblock == HOLESTARTBLOCK || 461f86f4037SDarrick J. Wong ISUNWRITTEN(map)) 462f86f4037SDarrick J. Wong return 0; 463f86f4037SDarrick J. Wong 464f86f4037SDarrick J. Wong agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); 465f86f4037SDarrick J. Wong agbno = XFS_FSB_TO_AGBNO(mp, map->br_startblock); 466f86f4037SDarrick J. Wong error = xfs_reflink_find_shared(mp, agno, agbno, map->br_blockcount, 467f86f4037SDarrick J. Wong &ebno, &elen, true); 468f86f4037SDarrick J. Wong if (error) 469f86f4037SDarrick J. Wong return error; 470f86f4037SDarrick J. Wong 471f86f4037SDarrick J. Wong if (ebno == NULLAGBLOCK) { 472f86f4037SDarrick J. Wong /* No shared blocks at all. */ 473f86f4037SDarrick J. Wong return 0; 474f86f4037SDarrick J. Wong } else if (agbno == ebno) { 475f86f4037SDarrick J. Wong /* 476f86f4037SDarrick J. Wong * Shared extent at (agbno, elen). Shrink the reported 477f86f4037SDarrick J. Wong * extent length and prepare to move the start of map[i] 478f86f4037SDarrick J. Wong * to agbno+elen, with the aim of (re)formatting the new 479f86f4037SDarrick J. Wong * map[i] the next time through the inner loop. 480f86f4037SDarrick J. Wong */ 481f86f4037SDarrick J. Wong out->bmv_length = XFS_FSB_TO_BB(mp, elen); 482f86f4037SDarrick J. Wong out->bmv_oflags |= BMV_OF_SHARED; 483f86f4037SDarrick J. Wong if (elen != map->br_blockcount) { 484f86f4037SDarrick J. Wong *next_map = *map; 485f86f4037SDarrick J. Wong next_map->br_startblock += elen; 486f86f4037SDarrick J. Wong next_map->br_startoff += elen; 487f86f4037SDarrick J. Wong next_map->br_blockcount -= elen; 488f86f4037SDarrick J. Wong } 489f86f4037SDarrick J. Wong map->br_blockcount -= elen; 490f86f4037SDarrick J. Wong } else { 491f86f4037SDarrick J. Wong /* 492f86f4037SDarrick J. Wong * There's an unshared extent (agbno, ebno - agbno) 493f86f4037SDarrick J. Wong * followed by shared extent at (ebno, elen). Shrink 494f86f4037SDarrick J. Wong * the reported extent length to cover only the unshared 495f86f4037SDarrick J. Wong * extent and prepare to move up the start of map[i] to 496f86f4037SDarrick J. Wong * ebno, with the aim of (re)formatting the new map[i] 497f86f4037SDarrick J. Wong * the next time through the inner loop. 498f86f4037SDarrick J. Wong */ 499f86f4037SDarrick J. Wong *next_map = *map; 500f86f4037SDarrick J. Wong nlen = ebno - agbno; 501f86f4037SDarrick J. Wong out->bmv_length = XFS_FSB_TO_BB(mp, nlen); 502f86f4037SDarrick J. Wong next_map->br_startblock += nlen; 503f86f4037SDarrick J. Wong next_map->br_startoff += nlen; 504f86f4037SDarrick J. Wong next_map->br_blockcount -= nlen; 505f86f4037SDarrick J. Wong map->br_blockcount -= nlen; 506f86f4037SDarrick J. Wong } 507f86f4037SDarrick J. Wong 508f86f4037SDarrick J. Wong return 0; 509f86f4037SDarrick J. Wong } 510f86f4037SDarrick J. Wong 51168988114SDave Chinner /* 51268988114SDave Chinner * Get inode's extents as described in bmv, and format for output. 51368988114SDave Chinner * Calls formatter to fill the user's buffer until all extents 51468988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have 51568988114SDave Chinner * been filled, or until the formatter short-circuits the loop, 51668988114SDave Chinner * if it is tracking filled-in extents on its own. 51768988114SDave Chinner */ 51868988114SDave Chinner int /* error code */ 51968988114SDave Chinner xfs_getbmap( 52068988114SDave Chinner xfs_inode_t *ip, 52168988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */ 52268988114SDave Chinner xfs_bmap_format_t formatter, /* format to user */ 52368988114SDave Chinner void *arg) /* formatter arg */ 52468988114SDave Chinner { 52568988114SDave Chinner __int64_t bmvend; /* last block requested */ 52668988114SDave Chinner int error = 0; /* return value */ 52768988114SDave Chinner __int64_t fixlen; /* length for -1 case */ 52868988114SDave Chinner int i; /* extent number */ 52968988114SDave Chinner int lock; /* lock state */ 53068988114SDave Chinner xfs_bmbt_irec_t *map; /* buffer for user's data */ 53168988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 53268988114SDave Chinner int nex; /* # of user extents can do */ 53368988114SDave Chinner int nexleft; /* # of user extents left */ 53468988114SDave Chinner int subnex; /* # of bmapi's can do */ 53568988114SDave Chinner int nmap; /* number of map entries */ 53668988114SDave Chinner struct getbmapx *out; /* output structure */ 53768988114SDave Chinner int whichfork; /* data or attr fork */ 53868988114SDave Chinner int prealloced; /* this is a file with 53968988114SDave Chinner * preallocated data space */ 54068988114SDave Chinner int iflags; /* interface flags */ 54168988114SDave Chinner int bmapi_flags; /* flags for xfs_bmapi */ 54268988114SDave Chinner int cur_ext = 0; 543f86f4037SDarrick J. Wong struct xfs_bmbt_irec inject_map; 54468988114SDave Chinner 54568988114SDave Chinner mp = ip->i_mount; 54668988114SDave Chinner iflags = bmv->bmv_iflags; 54768988114SDave Chinner 548f86f4037SDarrick J. Wong #ifndef DEBUG 549f86f4037SDarrick J. Wong /* Only allow CoW fork queries if we're debugging. */ 550f86f4037SDarrick J. Wong if (iflags & BMV_IF_COWFORK) 551f86f4037SDarrick J. Wong return -EINVAL; 552f86f4037SDarrick J. Wong #endif 553f86f4037SDarrick J. Wong if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) 554f86f4037SDarrick J. Wong return -EINVAL; 555f86f4037SDarrick J. Wong 556f86f4037SDarrick J. Wong if (iflags & BMV_IF_ATTRFORK) 557f86f4037SDarrick J. Wong whichfork = XFS_ATTR_FORK; 558f86f4037SDarrick J. Wong else if (iflags & BMV_IF_COWFORK) 559f86f4037SDarrick J. Wong whichfork = XFS_COW_FORK; 560f86f4037SDarrick J. Wong else 561f86f4037SDarrick J. Wong whichfork = XFS_DATA_FORK; 562f86f4037SDarrick J. Wong 563f86f4037SDarrick J. Wong switch (whichfork) { 564f86f4037SDarrick J. Wong case XFS_ATTR_FORK: 56568988114SDave Chinner if (XFS_IFORK_Q(ip)) { 56668988114SDave Chinner if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 56768988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && 56868988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 5692451337dSDave Chinner return -EINVAL; 57068988114SDave Chinner } else if (unlikely( 57168988114SDave Chinner ip->i_d.di_aformat != 0 && 57268988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { 57368988114SDave Chinner XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, 57468988114SDave Chinner ip->i_mount); 5752451337dSDave Chinner return -EFSCORRUPTED; 57668988114SDave Chinner } 57768988114SDave Chinner 57868988114SDave Chinner prealloced = 0; 57968988114SDave Chinner fixlen = 1LL << 32; 580f86f4037SDarrick J. Wong break; 581f86f4037SDarrick J. Wong case XFS_COW_FORK: 582f86f4037SDarrick J. Wong if (ip->i_cformat != XFS_DINODE_FMT_EXTENTS) 583f86f4037SDarrick J. Wong return -EINVAL; 584f86f4037SDarrick J. Wong 585f7ca3522SDarrick J. Wong if (xfs_get_cowextsz_hint(ip)) { 586f7ca3522SDarrick J. Wong prealloced = 1; 587f7ca3522SDarrick J. Wong fixlen = mp->m_super->s_maxbytes; 588f7ca3522SDarrick J. Wong } else { 589f86f4037SDarrick J. Wong prealloced = 0; 590f86f4037SDarrick J. Wong fixlen = XFS_ISIZE(ip); 591f7ca3522SDarrick J. Wong } 592f86f4037SDarrick J. Wong break; 593f86f4037SDarrick J. Wong default: 59468988114SDave Chinner if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 59568988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 59668988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5972451337dSDave Chinner return -EINVAL; 59868988114SDave Chinner 59968988114SDave Chinner if (xfs_get_extsz_hint(ip) || 60068988114SDave Chinner ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 60168988114SDave Chinner prealloced = 1; 60268988114SDave Chinner fixlen = mp->m_super->s_maxbytes; 60368988114SDave Chinner } else { 60468988114SDave Chinner prealloced = 0; 60568988114SDave Chinner fixlen = XFS_ISIZE(ip); 60668988114SDave Chinner } 607f86f4037SDarrick J. Wong break; 60868988114SDave Chinner } 60968988114SDave Chinner 61068988114SDave Chinner if (bmv->bmv_length == -1) { 61168988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); 61268988114SDave Chinner bmv->bmv_length = 61368988114SDave Chinner max_t(__int64_t, fixlen - bmv->bmv_offset, 0); 61468988114SDave Chinner } else if (bmv->bmv_length == 0) { 61568988114SDave Chinner bmv->bmv_entries = 0; 61668988114SDave Chinner return 0; 61768988114SDave Chinner } else if (bmv->bmv_length < 0) { 6182451337dSDave Chinner return -EINVAL; 61968988114SDave Chinner } 62068988114SDave Chinner 62168988114SDave Chinner nex = bmv->bmv_count - 1; 62268988114SDave Chinner if (nex <= 0) 6232451337dSDave Chinner return -EINVAL; 62468988114SDave Chinner bmvend = bmv->bmv_offset + bmv->bmv_length; 62568988114SDave Chinner 62668988114SDave Chinner 62768988114SDave Chinner if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 6282451337dSDave Chinner return -ENOMEM; 629fdd3cceeSDave Chinner out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); 63068988114SDave Chinner if (!out) 6312451337dSDave Chinner return -ENOMEM; 63268988114SDave Chinner 63368988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED); 634f86f4037SDarrick J. Wong switch (whichfork) { 635f86f4037SDarrick J. Wong case XFS_DATA_FORK: 636efa70be1SChristoph Hellwig if (!(iflags & BMV_IF_DELALLOC) && 637efa70be1SChristoph Hellwig (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { 6382451337dSDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 63968988114SDave Chinner if (error) 64068988114SDave Chinner goto out_unlock_iolock; 641efa70be1SChristoph Hellwig 64268988114SDave Chinner /* 643efa70be1SChristoph Hellwig * Even after flushing the inode, there can still be 644efa70be1SChristoph Hellwig * delalloc blocks on the inode beyond EOF due to 645efa70be1SChristoph Hellwig * speculative preallocation. These are not removed 646efa70be1SChristoph Hellwig * until the release function is called or the inode 647efa70be1SChristoph Hellwig * is inactivated. Hence we cannot assert here that 648efa70be1SChristoph Hellwig * ip->i_delayed_blks == 0. 64968988114SDave Chinner */ 65068988114SDave Chinner } 65168988114SDave Chinner 652309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 653f86f4037SDarrick J. Wong break; 654f86f4037SDarrick J. Wong case XFS_COW_FORK: 655f86f4037SDarrick J. Wong lock = XFS_ILOCK_SHARED; 656f86f4037SDarrick J. Wong xfs_ilock(ip, lock); 657f86f4037SDarrick J. Wong break; 658f86f4037SDarrick J. Wong case XFS_ATTR_FORK: 659efa70be1SChristoph Hellwig lock = xfs_ilock_attr_map_shared(ip); 660f86f4037SDarrick J. Wong break; 661efa70be1SChristoph Hellwig } 66268988114SDave Chinner 66368988114SDave Chinner /* 66468988114SDave Chinner * Don't let nex be bigger than the number of extents 66568988114SDave Chinner * we can have assuming alternating holes and real extents. 66668988114SDave Chinner */ 66768988114SDave Chinner if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) 66868988114SDave Chinner nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 66968988114SDave Chinner 67068988114SDave Chinner bmapi_flags = xfs_bmapi_aflag(whichfork); 67168988114SDave Chinner if (!(iflags & BMV_IF_PREALLOC)) 67268988114SDave Chinner bmapi_flags |= XFS_BMAPI_IGSTATE; 67368988114SDave Chinner 67468988114SDave Chinner /* 67568988114SDave Chinner * Allocate enough space to handle "subnex" maps at a time. 67668988114SDave Chinner */ 6772451337dSDave Chinner error = -ENOMEM; 67868988114SDave Chinner subnex = 16; 67968988114SDave Chinner map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); 68068988114SDave Chinner if (!map) 68168988114SDave Chinner goto out_unlock_ilock; 68268988114SDave Chinner 68368988114SDave Chinner bmv->bmv_entries = 0; 68468988114SDave Chinner 68568988114SDave Chinner if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && 68668988114SDave Chinner (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { 68768988114SDave Chinner error = 0; 68868988114SDave Chinner goto out_free_map; 68968988114SDave Chinner } 69068988114SDave Chinner 69168988114SDave Chinner nexleft = nex; 69268988114SDave Chinner 69368988114SDave Chinner do { 69468988114SDave Chinner nmap = (nexleft > subnex) ? subnex : nexleft; 69568988114SDave Chinner error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 69668988114SDave Chinner XFS_BB_TO_FSB(mp, bmv->bmv_length), 69768988114SDave Chinner map, &nmap, bmapi_flags); 69868988114SDave Chinner if (error) 69968988114SDave Chinner goto out_free_map; 70068988114SDave Chinner ASSERT(nmap <= subnex); 70168988114SDave Chinner 702f86f4037SDarrick J. Wong for (i = 0; i < nmap && nexleft && bmv->bmv_length && 703f86f4037SDarrick J. Wong cur_ext < bmv->bmv_count; i++) { 70468988114SDave Chinner out[cur_ext].bmv_oflags = 0; 70568988114SDave Chinner if (map[i].br_state == XFS_EXT_UNWRITTEN) 70668988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; 70768988114SDave Chinner else if (map[i].br_startblock == DELAYSTARTBLOCK) 70868988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; 70968988114SDave Chinner out[cur_ext].bmv_offset = 71068988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_startoff); 71168988114SDave Chinner out[cur_ext].bmv_length = 71268988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_blockcount); 71368988114SDave Chinner out[cur_ext].bmv_unused1 = 0; 71468988114SDave Chinner out[cur_ext].bmv_unused2 = 0; 71568988114SDave Chinner 71668988114SDave Chinner /* 71768988114SDave Chinner * delayed allocation extents that start beyond EOF can 71868988114SDave Chinner * occur due to speculative EOF allocation when the 71968988114SDave Chinner * delalloc extent is larger than the largest freespace 72068988114SDave Chinner * extent at conversion time. These extents cannot be 72168988114SDave Chinner * converted by data writeback, so can exist here even 72268988114SDave Chinner * if we are not supposed to be finding delalloc 72368988114SDave Chinner * extents. 72468988114SDave Chinner */ 72568988114SDave Chinner if (map[i].br_startblock == DELAYSTARTBLOCK && 72668988114SDave Chinner map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) 72768988114SDave Chinner ASSERT((iflags & BMV_IF_DELALLOC) != 0); 72868988114SDave Chinner 72968988114SDave Chinner if (map[i].br_startblock == HOLESTARTBLOCK && 73068988114SDave Chinner whichfork == XFS_ATTR_FORK) { 73168988114SDave Chinner /* came to the end of attribute fork */ 73268988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_LAST; 73368988114SDave Chinner goto out_free_map; 73468988114SDave Chinner } 73568988114SDave Chinner 736f86f4037SDarrick J. Wong /* Is this a shared block? */ 737f86f4037SDarrick J. Wong error = xfs_getbmap_adjust_shared(ip, whichfork, 738f86f4037SDarrick J. Wong &map[i], &out[cur_ext], &inject_map); 739f86f4037SDarrick J. Wong if (error) 740f86f4037SDarrick J. Wong goto out_free_map; 741f86f4037SDarrick J. Wong 742f86f4037SDarrick J. Wong if (!xfs_getbmapx_fix_eof_hole(ip, whichfork, 743f86f4037SDarrick J. Wong &out[cur_ext], prealloced, bmvend, 744f86f4037SDarrick J. Wong map[i].br_startblock, 745f86f4037SDarrick J. Wong inject_map.br_startblock != NULLFSBLOCK)) 74668988114SDave Chinner goto out_free_map; 74768988114SDave Chinner 74868988114SDave Chinner bmv->bmv_offset = 74968988114SDave Chinner out[cur_ext].bmv_offset + 75068988114SDave Chinner out[cur_ext].bmv_length; 75168988114SDave Chinner bmv->bmv_length = 75268988114SDave Chinner max_t(__int64_t, 0, bmvend - bmv->bmv_offset); 75368988114SDave Chinner 75468988114SDave Chinner /* 75568988114SDave Chinner * In case we don't want to return the hole, 75668988114SDave Chinner * don't increase cur_ext so that we can reuse 75768988114SDave Chinner * it in the next loop. 75868988114SDave Chinner */ 75968988114SDave Chinner if ((iflags & BMV_IF_NO_HOLES) && 76068988114SDave Chinner map[i].br_startblock == HOLESTARTBLOCK) { 76168988114SDave Chinner memset(&out[cur_ext], 0, sizeof(out[cur_ext])); 76268988114SDave Chinner continue; 76368988114SDave Chinner } 76468988114SDave Chinner 765f86f4037SDarrick J. Wong if (inject_map.br_startblock != NULLFSBLOCK) { 766f86f4037SDarrick J. Wong map[i] = inject_map; 767f86f4037SDarrick J. Wong i--; 768f86f4037SDarrick J. Wong } else 76968988114SDave Chinner nexleft--; 77068988114SDave Chinner bmv->bmv_entries++; 77168988114SDave Chinner cur_ext++; 77268988114SDave Chinner } 773f86f4037SDarrick J. Wong } while (nmap && nexleft && bmv->bmv_length && 774f86f4037SDarrick J. Wong cur_ext < bmv->bmv_count); 77568988114SDave Chinner 77668988114SDave Chinner out_free_map: 77768988114SDave Chinner kmem_free(map); 77868988114SDave Chinner out_unlock_ilock: 77901f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 78068988114SDave Chinner out_unlock_iolock: 78168988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED); 78268988114SDave Chinner 78368988114SDave Chinner for (i = 0; i < cur_ext; i++) { 78468988114SDave Chinner int full = 0; /* user array is full */ 78568988114SDave Chinner 78668988114SDave Chinner /* format results & advance arg */ 78768988114SDave Chinner error = formatter(&arg, &out[i], &full); 78868988114SDave Chinner if (error || full) 78968988114SDave Chinner break; 79068988114SDave Chinner } 79168988114SDave Chinner 79268988114SDave Chinner kmem_free(out); 79368988114SDave Chinner return error; 79468988114SDave Chinner } 79568988114SDave Chinner 79668988114SDave Chinner /* 79768988114SDave Chinner * dead simple method of punching delalyed allocation blocks from a range in 79868988114SDave Chinner * the inode. Walks a block at a time so will be slow, but is only executed in 799ad4809bfSZhi Yong Wu * rare error cases so the overhead is not critical. This will always punch out 80068988114SDave Chinner * both the start and end blocks, even if the ranges only partially overlap 80168988114SDave Chinner * them, so it is up to the caller to ensure that partial blocks are not 80268988114SDave Chinner * passed in. 80368988114SDave Chinner */ 80468988114SDave Chinner int 80568988114SDave Chinner xfs_bmap_punch_delalloc_range( 80668988114SDave Chinner struct xfs_inode *ip, 80768988114SDave Chinner xfs_fileoff_t start_fsb, 80868988114SDave Chinner xfs_fileoff_t length) 80968988114SDave Chinner { 81068988114SDave Chinner xfs_fileoff_t remaining = length; 81168988114SDave Chinner int error = 0; 81268988114SDave Chinner 81368988114SDave Chinner ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 81468988114SDave Chinner 81568988114SDave Chinner do { 81668988114SDave Chinner int done; 81768988114SDave Chinner xfs_bmbt_irec_t imap; 81868988114SDave Chinner int nimaps = 1; 81968988114SDave Chinner xfs_fsblock_t firstblock; 8202c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 82168988114SDave Chinner 82268988114SDave Chinner /* 82368988114SDave Chinner * Map the range first and check that it is a delalloc extent 82468988114SDave Chinner * before trying to unmap the range. Otherwise we will be 82568988114SDave Chinner * trying to remove a real extent (which requires a 82668988114SDave Chinner * transaction) or a hole, which is probably a bad idea... 82768988114SDave Chinner */ 82868988114SDave Chinner error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, 82968988114SDave Chinner XFS_BMAPI_ENTIRE); 83068988114SDave Chinner 83168988114SDave Chinner if (error) { 83268988114SDave Chinner /* something screwed, just bail */ 83368988114SDave Chinner if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 83468988114SDave Chinner xfs_alert(ip->i_mount, 83568988114SDave Chinner "Failed delalloc mapping lookup ino %lld fsb %lld.", 83668988114SDave Chinner ip->i_ino, start_fsb); 83768988114SDave Chinner } 83868988114SDave Chinner break; 83968988114SDave Chinner } 84068988114SDave Chinner if (!nimaps) { 84168988114SDave Chinner /* nothing there */ 84268988114SDave Chinner goto next_block; 84368988114SDave Chinner } 84468988114SDave Chinner if (imap.br_startblock != DELAYSTARTBLOCK) { 84568988114SDave Chinner /* been converted, ignore */ 84668988114SDave Chinner goto next_block; 84768988114SDave Chinner } 84868988114SDave Chinner WARN_ON(imap.br_blockcount == 0); 84968988114SDave Chinner 85068988114SDave Chinner /* 8512c3234d1SDarrick J. Wong * Note: while we initialise the firstblock/dfops pair, they 85268988114SDave Chinner * should never be used because blocks should never be 85368988114SDave Chinner * allocated or freed for a delalloc extent and hence we need 85468988114SDave Chinner * don't cancel or finish them after the xfs_bunmapi() call. 85568988114SDave Chinner */ 8562c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstblock); 85768988114SDave Chinner error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 8582c3234d1SDarrick J. Wong &dfops, &done); 85968988114SDave Chinner if (error) 86068988114SDave Chinner break; 86168988114SDave Chinner 8622c3234d1SDarrick J. Wong ASSERT(!xfs_defer_has_unfinished_work(&dfops)); 86368988114SDave Chinner next_block: 86468988114SDave Chinner start_fsb++; 86568988114SDave Chinner remaining--; 86668988114SDave Chinner } while(remaining > 0); 86768988114SDave Chinner 86868988114SDave Chinner return error; 86968988114SDave Chinner } 870c24b5dfaSDave Chinner 871c24b5dfaSDave Chinner /* 872c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF 873c24b5dfaSDave Chinner * blocks. The 'force' parameter determines whether we should also consider 874c24b5dfaSDave Chinner * regular files that are marked preallocated or append-only. 875c24b5dfaSDave Chinner */ 876c24b5dfaSDave Chinner bool 877c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) 878c24b5dfaSDave Chinner { 879c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */ 880c19b3b05SDave Chinner if (!S_ISREG(VFS_I(ip)->i_mode)) 881c24b5dfaSDave Chinner return false; 882c24b5dfaSDave Chinner 883c24b5dfaSDave Chinner /* 884c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not 885c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove. 886c24b5dfaSDave Chinner */ 887c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 && 8882667c6f9SDave Chinner VFS_I(ip)->i_mapping->nrpages == 0 && 889c24b5dfaSDave Chinner ip->i_delayed_blks == 0) 890c24b5dfaSDave Chinner return false; 891c24b5dfaSDave Chinner 892c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */ 893c24b5dfaSDave Chinner if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) 894c24b5dfaSDave Chinner return false; 895c24b5dfaSDave Chinner 896c24b5dfaSDave Chinner /* 897c24b5dfaSDave Chinner * Do not free real preallocated or append-only files unless the file 898c24b5dfaSDave Chinner * has delalloc blocks and we are forced to remove them. 899c24b5dfaSDave Chinner */ 900c24b5dfaSDave Chinner if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 901c24b5dfaSDave Chinner if (!force || ip->i_delayed_blks == 0) 902c24b5dfaSDave Chinner return false; 903c24b5dfaSDave Chinner 904c24b5dfaSDave Chinner return true; 905c24b5dfaSDave Chinner } 906c24b5dfaSDave Chinner 907c24b5dfaSDave Chinner /* 908c24b5dfaSDave Chinner * This is called by xfs_inactive to free any blocks beyond eof 909c24b5dfaSDave Chinner * when the link count isn't zero and by xfs_dm_punch_hole() when 910c24b5dfaSDave Chinner * punching a hole to EOF. 911c24b5dfaSDave Chinner */ 912c24b5dfaSDave Chinner int 913c24b5dfaSDave Chinner xfs_free_eofblocks( 914c24b5dfaSDave Chinner xfs_mount_t *mp, 915c24b5dfaSDave Chinner xfs_inode_t *ip, 916c24b5dfaSDave Chinner bool need_iolock) 917c24b5dfaSDave Chinner { 918c24b5dfaSDave Chinner xfs_trans_t *tp; 919c24b5dfaSDave Chinner int error; 920c24b5dfaSDave Chinner xfs_fileoff_t end_fsb; 921c24b5dfaSDave Chinner xfs_fileoff_t last_fsb; 922c24b5dfaSDave Chinner xfs_filblks_t map_len; 923c24b5dfaSDave Chinner int nimaps; 924c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 925c24b5dfaSDave Chinner 926c24b5dfaSDave Chinner /* 927c24b5dfaSDave Chinner * Figure out if there are any blocks beyond the end 928c24b5dfaSDave Chinner * of the file. If not, then there is nothing to do. 929c24b5dfaSDave Chinner */ 930c24b5dfaSDave Chinner end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 931c24b5dfaSDave Chinner last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 932c24b5dfaSDave Chinner if (last_fsb <= end_fsb) 933c24b5dfaSDave Chinner return 0; 934c24b5dfaSDave Chinner map_len = last_fsb - end_fsb; 935c24b5dfaSDave Chinner 936c24b5dfaSDave Chinner nimaps = 1; 937c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_SHARED); 938c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 939c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 940c24b5dfaSDave Chinner 941c24b5dfaSDave Chinner if (!error && (nimaps != 0) && 942c24b5dfaSDave Chinner (imap.br_startblock != HOLESTARTBLOCK || 943c24b5dfaSDave Chinner ip->i_delayed_blks)) { 944c24b5dfaSDave Chinner /* 945c24b5dfaSDave Chinner * Attach the dquots to the inode up front. 946c24b5dfaSDave Chinner */ 947c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 948c24b5dfaSDave Chinner if (error) 949c24b5dfaSDave Chinner return error; 950c24b5dfaSDave Chinner 951c24b5dfaSDave Chinner /* 952c24b5dfaSDave Chinner * There are blocks after the end of file. 953c24b5dfaSDave Chinner * Free them up now by truncating the file to 954c24b5dfaSDave Chinner * its current size. 955c24b5dfaSDave Chinner */ 956c24b5dfaSDave Chinner if (need_iolock) { 957253f4911SChristoph Hellwig if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) 9582451337dSDave Chinner return -EAGAIN; 959c24b5dfaSDave Chinner } 960c24b5dfaSDave Chinner 961253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, 962253f4911SChristoph Hellwig &tp); 963c24b5dfaSDave Chinner if (error) { 964c24b5dfaSDave Chinner ASSERT(XFS_FORCED_SHUTDOWN(mp)); 965c24b5dfaSDave Chinner if (need_iolock) 966c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 967c24b5dfaSDave Chinner return error; 968c24b5dfaSDave Chinner } 969c24b5dfaSDave Chinner 970c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 971c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 972c24b5dfaSDave Chinner 973c24b5dfaSDave Chinner /* 974c24b5dfaSDave Chinner * Do not update the on-disk file size. If we update the 975c24b5dfaSDave Chinner * on-disk file size and then the system crashes before the 976c24b5dfaSDave Chinner * contents of the file are flushed to disk then the files 977c24b5dfaSDave Chinner * may be full of holes (ie NULL files bug). 978c24b5dfaSDave Chinner */ 979c24b5dfaSDave Chinner error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 980c24b5dfaSDave Chinner XFS_ISIZE(ip)); 981c24b5dfaSDave Chinner if (error) { 982c24b5dfaSDave Chinner /* 983c24b5dfaSDave Chinner * If we get an error at this point we simply don't 984c24b5dfaSDave Chinner * bother truncating the file. 985c24b5dfaSDave Chinner */ 9864906e215SChristoph Hellwig xfs_trans_cancel(tp); 987c24b5dfaSDave Chinner } else { 98870393313SChristoph Hellwig error = xfs_trans_commit(tp); 989c24b5dfaSDave Chinner if (!error) 990c24b5dfaSDave Chinner xfs_inode_clear_eofblocks_tag(ip); 991c24b5dfaSDave Chinner } 992c24b5dfaSDave Chinner 993c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 994c24b5dfaSDave Chinner if (need_iolock) 995c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 996c24b5dfaSDave Chinner } 997c24b5dfaSDave Chinner return error; 998c24b5dfaSDave Chinner } 999c24b5dfaSDave Chinner 100083aee9e4SChristoph Hellwig int 1001c24b5dfaSDave Chinner xfs_alloc_file_space( 100283aee9e4SChristoph Hellwig struct xfs_inode *ip, 1003c24b5dfaSDave Chinner xfs_off_t offset, 1004c24b5dfaSDave Chinner xfs_off_t len, 10055f8aca8bSChristoph Hellwig int alloc_type) 1006c24b5dfaSDave Chinner { 1007c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 1008c24b5dfaSDave Chinner xfs_off_t count; 1009c24b5dfaSDave Chinner xfs_filblks_t allocated_fsb; 1010c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb; 1011c24b5dfaSDave Chinner xfs_extlen_t extsz, temp; 1012c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 1013c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 1014c24b5dfaSDave Chinner int nimaps; 1015c24b5dfaSDave Chinner int quota_flag; 1016c24b5dfaSDave Chinner int rt; 1017c24b5dfaSDave Chinner xfs_trans_t *tp; 1018c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp; 10192c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 1020c24b5dfaSDave Chinner uint qblocks, resblks, resrtextents; 1021c24b5dfaSDave Chinner int error; 1022c24b5dfaSDave Chinner 1023c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip); 1024c24b5dfaSDave Chinner 1025c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 10262451337dSDave Chinner return -EIO; 1027c24b5dfaSDave Chinner 1028c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1029c24b5dfaSDave Chinner if (error) 1030c24b5dfaSDave Chinner return error; 1031c24b5dfaSDave Chinner 1032c24b5dfaSDave Chinner if (len <= 0) 10332451337dSDave Chinner return -EINVAL; 1034c24b5dfaSDave Chinner 1035c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 1036c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip); 1037c24b5dfaSDave Chinner 1038c24b5dfaSDave Chinner count = len; 1039c24b5dfaSDave Chinner imapp = &imaps[0]; 1040c24b5dfaSDave Chinner nimaps = 1; 1041c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 1042c24b5dfaSDave Chinner allocatesize_fsb = XFS_B_TO_FSB(mp, count); 1043c24b5dfaSDave Chinner 1044c24b5dfaSDave Chinner /* 1045c24b5dfaSDave Chinner * Allocate file space until done or until there is an error 1046c24b5dfaSDave Chinner */ 1047c24b5dfaSDave Chinner while (allocatesize_fsb && !error) { 1048c24b5dfaSDave Chinner xfs_fileoff_t s, e; 1049c24b5dfaSDave Chinner 1050c24b5dfaSDave Chinner /* 1051c24b5dfaSDave Chinner * Determine space reservations for data/realtime. 1052c24b5dfaSDave Chinner */ 1053c24b5dfaSDave Chinner if (unlikely(extsz)) { 1054c24b5dfaSDave Chinner s = startoffset_fsb; 1055c24b5dfaSDave Chinner do_div(s, extsz); 1056c24b5dfaSDave Chinner s *= extsz; 1057c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb; 1058c24b5dfaSDave Chinner if ((temp = do_mod(startoffset_fsb, extsz))) 1059c24b5dfaSDave Chinner e += temp; 1060c24b5dfaSDave Chinner if ((temp = do_mod(e, extsz))) 1061c24b5dfaSDave Chinner e += extsz - temp; 1062c24b5dfaSDave Chinner } else { 1063c24b5dfaSDave Chinner s = 0; 1064c24b5dfaSDave Chinner e = allocatesize_fsb; 1065c24b5dfaSDave Chinner } 1066c24b5dfaSDave Chinner 1067c24b5dfaSDave Chinner /* 1068c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block 1069c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are 1070c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate 1071c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk 1072c24b5dfaSDave Chinner * to MAXEXTLEN (21 bits), so use that to enforce the limit. 1073c24b5dfaSDave Chinner */ 1074c24b5dfaSDave Chinner resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 1075c24b5dfaSDave Chinner if (unlikely(rt)) { 1076c24b5dfaSDave Chinner resrtextents = qblocks = resblks; 1077c24b5dfaSDave Chinner resrtextents /= mp->m_sb.sb_rextsize; 1078c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1079c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_RTBLKS; 1080c24b5dfaSDave Chinner } else { 1081c24b5dfaSDave Chinner resrtextents = 0; 1082c24b5dfaSDave Chinner resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 1083c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_REGBLKS; 1084c24b5dfaSDave Chinner } 1085c24b5dfaSDave Chinner 1086c24b5dfaSDave Chinner /* 1087c24b5dfaSDave Chinner * Allocate and setup the transaction. 1088c24b5dfaSDave Chinner */ 1089253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 1090253f4911SChristoph Hellwig resrtextents, 0, &tp); 1091253f4911SChristoph Hellwig 1092c24b5dfaSDave Chinner /* 1093c24b5dfaSDave Chinner * Check for running out of space 1094c24b5dfaSDave Chinner */ 1095c24b5dfaSDave Chinner if (error) { 1096c24b5dfaSDave Chinner /* 1097c24b5dfaSDave Chinner * Free the transaction structure. 1098c24b5dfaSDave Chinner */ 10992451337dSDave Chinner ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1100c24b5dfaSDave Chinner break; 1101c24b5dfaSDave Chinner } 1102c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 1103c24b5dfaSDave Chinner error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 1104c24b5dfaSDave Chinner 0, quota_flag); 1105c24b5dfaSDave Chinner if (error) 1106c24b5dfaSDave Chinner goto error1; 1107c24b5dfaSDave Chinner 1108c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 1109c24b5dfaSDave Chinner 11102c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 1111c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1112c24b5dfaSDave Chinner allocatesize_fsb, alloc_type, &firstfsb, 11132c3234d1SDarrick J. Wong resblks, imapp, &nimaps, &dfops); 1114f6106efaSEric Sandeen if (error) 1115c24b5dfaSDave Chinner goto error0; 1116c24b5dfaSDave Chinner 1117c24b5dfaSDave Chinner /* 1118c24b5dfaSDave Chinner * Complete the transaction 1119c24b5dfaSDave Chinner */ 11202c3234d1SDarrick J. Wong error = xfs_defer_finish(&tp, &dfops, NULL); 1121f6106efaSEric Sandeen if (error) 1122c24b5dfaSDave Chinner goto error0; 1123c24b5dfaSDave Chinner 112470393313SChristoph Hellwig error = xfs_trans_commit(tp); 1125c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1126f6106efaSEric Sandeen if (error) 1127c24b5dfaSDave Chinner break; 1128c24b5dfaSDave Chinner 1129c24b5dfaSDave Chinner allocated_fsb = imapp->br_blockcount; 1130c24b5dfaSDave Chinner 1131c24b5dfaSDave Chinner if (nimaps == 0) { 11322451337dSDave Chinner error = -ENOSPC; 1133c24b5dfaSDave Chinner break; 1134c24b5dfaSDave Chinner } 1135c24b5dfaSDave Chinner 1136c24b5dfaSDave Chinner startoffset_fsb += allocated_fsb; 1137c24b5dfaSDave Chinner allocatesize_fsb -= allocated_fsb; 1138c24b5dfaSDave Chinner } 1139c24b5dfaSDave Chinner 1140c24b5dfaSDave Chinner return error; 1141c24b5dfaSDave Chinner 1142c24b5dfaSDave Chinner error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 11432c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1144c24b5dfaSDave Chinner xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1145c24b5dfaSDave Chinner 1146c24b5dfaSDave Chinner error1: /* Just cancel transaction */ 11474906e215SChristoph Hellwig xfs_trans_cancel(tp); 1148c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1149c24b5dfaSDave Chinner return error; 1150c24b5dfaSDave Chinner } 1151c24b5dfaSDave Chinner 1152bdb0d04fSChristoph Hellwig static int 1153bdb0d04fSChristoph Hellwig xfs_unmap_extent( 115483aee9e4SChristoph Hellwig struct xfs_inode *ip, 1155bdb0d04fSChristoph Hellwig xfs_fileoff_t startoffset_fsb, 1156bdb0d04fSChristoph Hellwig xfs_filblks_t len_fsb, 1157bdb0d04fSChristoph Hellwig int *done) 1158c24b5dfaSDave Chinner { 1159bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1160bdb0d04fSChristoph Hellwig struct xfs_trans *tp; 11612c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 1162c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 1163bdb0d04fSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1164bdb0d04fSChristoph Hellwig int error; 1165c24b5dfaSDave Chinner 1166bdb0d04fSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 1167bdb0d04fSChristoph Hellwig if (error) { 1168bdb0d04fSChristoph Hellwig ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1169bdb0d04fSChristoph Hellwig return error; 1170bdb0d04fSChristoph Hellwig } 1171c24b5dfaSDave Chinner 1172bdb0d04fSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1173bdb0d04fSChristoph Hellwig error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, 1174bdb0d04fSChristoph Hellwig ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); 1175bdb0d04fSChristoph Hellwig if (error) 1176bdb0d04fSChristoph Hellwig goto out_trans_cancel; 1177c24b5dfaSDave Chinner 1178bdb0d04fSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1179c24b5dfaSDave Chinner 11802c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 1181bdb0d04fSChristoph Hellwig error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, 11822c3234d1SDarrick J. Wong &dfops, done); 1183bdb0d04fSChristoph Hellwig if (error) 1184bdb0d04fSChristoph Hellwig goto out_bmap_cancel; 1185bdb0d04fSChristoph Hellwig 11862c3234d1SDarrick J. Wong error = xfs_defer_finish(&tp, &dfops, ip); 1187bdb0d04fSChristoph Hellwig if (error) 1188bdb0d04fSChristoph Hellwig goto out_bmap_cancel; 1189bdb0d04fSChristoph Hellwig 1190bdb0d04fSChristoph Hellwig error = xfs_trans_commit(tp); 1191bdb0d04fSChristoph Hellwig out_unlock: 1192bdb0d04fSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 1193bdb0d04fSChristoph Hellwig return error; 1194bdb0d04fSChristoph Hellwig 1195bdb0d04fSChristoph Hellwig out_bmap_cancel: 11962c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1197bdb0d04fSChristoph Hellwig out_trans_cancel: 1198bdb0d04fSChristoph Hellwig xfs_trans_cancel(tp); 1199bdb0d04fSChristoph Hellwig goto out_unlock; 1200bdb0d04fSChristoph Hellwig } 1201bdb0d04fSChristoph Hellwig 1202bdb0d04fSChristoph Hellwig static int 1203bdb0d04fSChristoph Hellwig xfs_adjust_extent_unmap_boundaries( 1204bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 1205bdb0d04fSChristoph Hellwig xfs_fileoff_t *startoffset_fsb, 1206bdb0d04fSChristoph Hellwig xfs_fileoff_t *endoffset_fsb) 1207bdb0d04fSChristoph Hellwig { 1208bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1209bdb0d04fSChristoph Hellwig struct xfs_bmbt_irec imap; 1210bdb0d04fSChristoph Hellwig int nimap, error; 1211c24b5dfaSDave Chinner xfs_extlen_t mod = 0; 1212c24b5dfaSDave Chinner 1213c24b5dfaSDave Chinner nimap = 1; 1214bdb0d04fSChristoph Hellwig error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0); 1215c24b5dfaSDave Chinner if (error) 1216bdb0d04fSChristoph Hellwig return error; 1217c24b5dfaSDave Chinner 1218c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1219c24b5dfaSDave Chinner xfs_daddr_t block; 1220c24b5dfaSDave Chinner 1221c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1222c24b5dfaSDave Chinner block = imap.br_startblock; 1223c24b5dfaSDave Chinner mod = do_div(block, mp->m_sb.sb_rextsize); 1224c24b5dfaSDave Chinner if (mod) 1225bdb0d04fSChristoph Hellwig *startoffset_fsb += mp->m_sb.sb_rextsize - mod; 1226c24b5dfaSDave Chinner } 1227c24b5dfaSDave Chinner 1228c24b5dfaSDave Chinner nimap = 1; 1229bdb0d04fSChristoph Hellwig error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0); 1230c24b5dfaSDave Chinner if (error) 1231c24b5dfaSDave Chinner return error; 1232c24b5dfaSDave Chinner 1233c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1234c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1235c24b5dfaSDave Chinner mod++; 1236bdb0d04fSChristoph Hellwig if (mod && mod != mp->m_sb.sb_rextsize) 1237bdb0d04fSChristoph Hellwig *endoffset_fsb -= mod; 1238c24b5dfaSDave Chinner } 1239c24b5dfaSDave Chinner 1240bdb0d04fSChristoph Hellwig return 0; 1241c24b5dfaSDave Chinner } 1242bdb0d04fSChristoph Hellwig 1243bdb0d04fSChristoph Hellwig static int 1244bdb0d04fSChristoph Hellwig xfs_flush_unmap_range( 1245bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 1246bdb0d04fSChristoph Hellwig xfs_off_t offset, 1247bdb0d04fSChristoph Hellwig xfs_off_t len) 1248bdb0d04fSChristoph Hellwig { 1249bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1250bdb0d04fSChristoph Hellwig struct inode *inode = VFS_I(ip); 1251bdb0d04fSChristoph Hellwig xfs_off_t rounding, start, end; 1252bdb0d04fSChristoph Hellwig int error; 1253bdb0d04fSChristoph Hellwig 1254bdb0d04fSChristoph Hellwig /* wait for the completion of any pending DIOs */ 1255bdb0d04fSChristoph Hellwig inode_dio_wait(inode); 1256bdb0d04fSChristoph Hellwig 1257bdb0d04fSChristoph Hellwig rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); 1258bdb0d04fSChristoph Hellwig start = round_down(offset, rounding); 1259bdb0d04fSChristoph Hellwig end = round_up(offset + len, rounding) - 1; 1260bdb0d04fSChristoph Hellwig 1261bdb0d04fSChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end); 1262c24b5dfaSDave Chinner if (error) 1263c24b5dfaSDave Chinner return error; 1264bdb0d04fSChristoph Hellwig truncate_pagecache_range(inode, start, end); 1265bdb0d04fSChristoph Hellwig return 0; 1266c24b5dfaSDave Chinner } 1267c24b5dfaSDave Chinner 1268c24b5dfaSDave Chinner int 1269c24b5dfaSDave Chinner xfs_free_file_space( 1270c24b5dfaSDave Chinner struct xfs_inode *ip, 1271c24b5dfaSDave Chinner xfs_off_t offset, 1272c24b5dfaSDave Chinner xfs_off_t len) 1273c24b5dfaSDave Chinner { 1274bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1275c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 1276bdb0d04fSChristoph Hellwig xfs_fileoff_t endoffset_fsb; 12773c2bdc91SChristoph Hellwig int done = 0, error; 1278c24b5dfaSDave Chinner 1279c24b5dfaSDave Chinner trace_xfs_free_file_space(ip); 1280c24b5dfaSDave Chinner 1281c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1282c24b5dfaSDave Chinner if (error) 1283c24b5dfaSDave Chinner return error; 1284c24b5dfaSDave Chinner 1285c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */ 1286bdb0d04fSChristoph Hellwig return 0; 1287bdb0d04fSChristoph Hellwig 1288bdb0d04fSChristoph Hellwig error = xfs_flush_unmap_range(ip, offset, len); 1289bdb0d04fSChristoph Hellwig if (error) 1290c24b5dfaSDave Chinner return error; 1291bdb0d04fSChristoph Hellwig 1292c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset); 1293c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 1294c24b5dfaSDave Chinner 1295bdb0d04fSChristoph Hellwig /* 1296bdb0d04fSChristoph Hellwig * Need to zero the stuff we're not freeing, on disk. If it's a RT file 1297bdb0d04fSChristoph Hellwig * and we can't use unwritten extents then we actually need to ensure 1298bdb0d04fSChristoph Hellwig * to zero the whole extent, otherwise we just need to take of block 1299bdb0d04fSChristoph Hellwig * boundaries, and xfs_bunmapi will handle the rest. 1300bdb0d04fSChristoph Hellwig */ 1301bdb0d04fSChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip) && 1302bdb0d04fSChristoph Hellwig !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 1303bdb0d04fSChristoph Hellwig error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb, 1304bdb0d04fSChristoph Hellwig &endoffset_fsb); 1305c24b5dfaSDave Chinner if (error) 1306c24b5dfaSDave Chinner return error; 1307bdb0d04fSChristoph Hellwig } 1308c24b5dfaSDave Chinner 13093c2bdc91SChristoph Hellwig if (endoffset_fsb > startoffset_fsb) { 13103c2bdc91SChristoph Hellwig while (!done) { 1311bdb0d04fSChristoph Hellwig error = xfs_unmap_extent(ip, startoffset_fsb, 1312bdb0d04fSChristoph Hellwig endoffset_fsb - startoffset_fsb, &done); 13133c2bdc91SChristoph Hellwig if (error) 13143c2bdc91SChristoph Hellwig return error; 13153c2bdc91SChristoph Hellwig } 1316c24b5dfaSDave Chinner } 1317c24b5dfaSDave Chinner 13183c2bdc91SChristoph Hellwig /* 13193c2bdc91SChristoph Hellwig * Now that we've unmap all full blocks we'll have to zero out any 13203c2bdc91SChristoph Hellwig * partial block at the beginning and/or end. xfs_zero_range is 13213c2bdc91SChristoph Hellwig * smart enough to skip any holes, including those we just created. 13223c2bdc91SChristoph Hellwig */ 13233c2bdc91SChristoph Hellwig return xfs_zero_range(ip, offset, len, NULL); 1324c24b5dfaSDave Chinner } 1325c24b5dfaSDave Chinner 13265d11fb4bSBrian Foster /* 13275d11fb4bSBrian Foster * Preallocate and zero a range of a file. This mechanism has the allocation 13285d11fb4bSBrian Foster * semantics of fallocate and in addition converts data in the range to zeroes. 13295d11fb4bSBrian Foster */ 1330865e9446SChristoph Hellwig int 1331c24b5dfaSDave Chinner xfs_zero_file_space( 1332c24b5dfaSDave Chinner struct xfs_inode *ip, 1333c24b5dfaSDave Chinner xfs_off_t offset, 13345f8aca8bSChristoph Hellwig xfs_off_t len) 1335c24b5dfaSDave Chinner { 1336c24b5dfaSDave Chinner struct xfs_mount *mp = ip->i_mount; 13375d11fb4bSBrian Foster uint blksize; 1338c24b5dfaSDave Chinner int error; 1339c24b5dfaSDave Chinner 1340897b73b6SDave Chinner trace_xfs_zero_file_space(ip); 1341897b73b6SDave Chinner 13425d11fb4bSBrian Foster blksize = 1 << mp->m_sb.sb_blocklog; 1343c24b5dfaSDave Chinner 1344c24b5dfaSDave Chinner /* 13455d11fb4bSBrian Foster * Punch a hole and prealloc the range. We use hole punch rather than 13465d11fb4bSBrian Foster * unwritten extent conversion for two reasons: 13475d11fb4bSBrian Foster * 13485d11fb4bSBrian Foster * 1.) Hole punch handles partial block zeroing for us. 13495d11fb4bSBrian Foster * 13505d11fb4bSBrian Foster * 2.) If prealloc returns ENOSPC, the file range is still zero-valued 13515d11fb4bSBrian Foster * by virtue of the hole punch. 1352c24b5dfaSDave Chinner */ 13535d11fb4bSBrian Foster error = xfs_free_file_space(ip, offset, len); 1354c24b5dfaSDave Chinner if (error) 13555f8aca8bSChristoph Hellwig goto out; 1356c24b5dfaSDave Chinner 13575d11fb4bSBrian Foster error = xfs_alloc_file_space(ip, round_down(offset, blksize), 13585d11fb4bSBrian Foster round_up(offset + len, blksize) - 13595d11fb4bSBrian Foster round_down(offset, blksize), 13605d11fb4bSBrian Foster XFS_BMAPI_PREALLOC); 13615f8aca8bSChristoph Hellwig out: 1362c24b5dfaSDave Chinner return error; 1363c24b5dfaSDave Chinner 1364c24b5dfaSDave Chinner } 1365c24b5dfaSDave Chinner 1366c24b5dfaSDave Chinner /* 1367a904b1caSNamjae Jeon * @next_fsb will keep track of the extent currently undergoing shift. 1368a904b1caSNamjae Jeon * @stop_fsb will keep track of the extent at which we have to stop. 1369a904b1caSNamjae Jeon * If we are shifting left, we will start with block (offset + len) and 1370a904b1caSNamjae Jeon * shift each extent till last extent. 1371a904b1caSNamjae Jeon * If we are shifting right, we will start with last extent inside file space 1372a904b1caSNamjae Jeon * and continue until we reach the block corresponding to offset. 1373e1d8fb88SNamjae Jeon */ 137472c1a739Skbuild test robot static int 1375a904b1caSNamjae Jeon xfs_shift_file_space( 1376e1d8fb88SNamjae Jeon struct xfs_inode *ip, 1377e1d8fb88SNamjae Jeon xfs_off_t offset, 1378a904b1caSNamjae Jeon xfs_off_t len, 1379a904b1caSNamjae Jeon enum shift_direction direction) 1380e1d8fb88SNamjae Jeon { 1381e1d8fb88SNamjae Jeon int done = 0; 1382e1d8fb88SNamjae Jeon struct xfs_mount *mp = ip->i_mount; 1383e1d8fb88SNamjae Jeon struct xfs_trans *tp; 1384e1d8fb88SNamjae Jeon int error; 13852c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 1386e1d8fb88SNamjae Jeon xfs_fsblock_t first_block; 1387a904b1caSNamjae Jeon xfs_fileoff_t stop_fsb; 13882c845f5aSBrian Foster xfs_fileoff_t next_fsb; 1389e1d8fb88SNamjae Jeon xfs_fileoff_t shift_fsb; 1390e1d8fb88SNamjae Jeon 1391a904b1caSNamjae Jeon ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT); 1392e1d8fb88SNamjae Jeon 1393a904b1caSNamjae Jeon if (direction == SHIFT_LEFT) { 13942c845f5aSBrian Foster next_fsb = XFS_B_TO_FSB(mp, offset + len); 1395a904b1caSNamjae Jeon stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size); 1396a904b1caSNamjae Jeon } else { 1397a904b1caSNamjae Jeon /* 1398a904b1caSNamjae Jeon * If right shift, delegate the work of initialization of 1399a904b1caSNamjae Jeon * next_fsb to xfs_bmap_shift_extent as it has ilock held. 1400a904b1caSNamjae Jeon */ 1401a904b1caSNamjae Jeon next_fsb = NULLFSBLOCK; 1402a904b1caSNamjae Jeon stop_fsb = XFS_B_TO_FSB(mp, offset); 1403a904b1caSNamjae Jeon } 1404e1d8fb88SNamjae Jeon 1405a904b1caSNamjae Jeon shift_fsb = XFS_B_TO_FSB(mp, len); 1406f71721d0SBrian Foster 1407f71721d0SBrian Foster /* 1408f71721d0SBrian Foster * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1409f71721d0SBrian Foster * into the accessible region of the file. 1410f71721d0SBrian Foster */ 141141b9d726SBrian Foster if (xfs_can_free_eofblocks(ip, true)) { 141241b9d726SBrian Foster error = xfs_free_eofblocks(mp, ip, false); 141341b9d726SBrian Foster if (error) 141441b9d726SBrian Foster return error; 141541b9d726SBrian Foster } 14161669a8caSDave Chinner 1417f71721d0SBrian Foster /* 1418f71721d0SBrian Foster * Writeback and invalidate cache for the remainder of the file as we're 1419a904b1caSNamjae Jeon * about to shift down every extent from offset to EOF. 1420f71721d0SBrian Foster */ 1421f71721d0SBrian Foster error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1422a904b1caSNamjae Jeon offset, -1); 1423f71721d0SBrian Foster if (error) 1424f71721d0SBrian Foster return error; 1425f71721d0SBrian Foster error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 142609cbfeafSKirill A. Shutemov offset >> PAGE_SHIFT, -1); 1427e1d8fb88SNamjae Jeon if (error) 1428e1d8fb88SNamjae Jeon return error; 1429e1d8fb88SNamjae Jeon 1430a904b1caSNamjae Jeon /* 1431a904b1caSNamjae Jeon * The extent shiting code works on extent granularity. So, if 1432a904b1caSNamjae Jeon * stop_fsb is not the starting block of extent, we need to split 1433a904b1caSNamjae Jeon * the extent at stop_fsb. 1434a904b1caSNamjae Jeon */ 1435a904b1caSNamjae Jeon if (direction == SHIFT_RIGHT) { 1436a904b1caSNamjae Jeon error = xfs_bmap_split_extent(ip, stop_fsb); 1437a904b1caSNamjae Jeon if (error) 1438a904b1caSNamjae Jeon return error; 1439a904b1caSNamjae Jeon } 1440a904b1caSNamjae Jeon 1441e1d8fb88SNamjae Jeon while (!error && !done) { 1442e1d8fb88SNamjae Jeon /* 1443e1d8fb88SNamjae Jeon * We would need to reserve permanent block for transaction. 1444e1d8fb88SNamjae Jeon * This will come into picture when after shifting extent into 1445e1d8fb88SNamjae Jeon * hole we found that adjacent extents can be merged which 1446e1d8fb88SNamjae Jeon * may lead to freeing of a block during record update. 1447e1d8fb88SNamjae Jeon */ 1448253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 1449253f4911SChristoph Hellwig XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); 1450253f4911SChristoph Hellwig if (error) 1451e1d8fb88SNamjae Jeon break; 1452e1d8fb88SNamjae Jeon 1453e1d8fb88SNamjae Jeon xfs_ilock(ip, XFS_ILOCK_EXCL); 1454e1d8fb88SNamjae Jeon error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 1455e1d8fb88SNamjae Jeon ip->i_gdquot, ip->i_pdquot, 1456e1d8fb88SNamjae Jeon XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 1457e1d8fb88SNamjae Jeon XFS_QMOPT_RES_REGBLKS); 1458e1d8fb88SNamjae Jeon if (error) 1459d4a97a04SBrian Foster goto out_trans_cancel; 1460e1d8fb88SNamjae Jeon 1461a904b1caSNamjae Jeon xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1462e1d8fb88SNamjae Jeon 14632c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &first_block); 1464e1d8fb88SNamjae Jeon 1465e1d8fb88SNamjae Jeon /* 1466e1d8fb88SNamjae Jeon * We are using the write transaction in which max 2 bmbt 1467e1d8fb88SNamjae Jeon * updates are allowed 1468e1d8fb88SNamjae Jeon */ 1469a904b1caSNamjae Jeon error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb, 14702c3234d1SDarrick J. Wong &done, stop_fsb, &first_block, &dfops, 1471a904b1caSNamjae Jeon direction, XFS_BMAP_MAX_SHIFT_EXTENTS); 1472e1d8fb88SNamjae Jeon if (error) 1473d4a97a04SBrian Foster goto out_bmap_cancel; 1474e1d8fb88SNamjae Jeon 14752c3234d1SDarrick J. Wong error = xfs_defer_finish(&tp, &dfops, NULL); 1476e1d8fb88SNamjae Jeon if (error) 1477d4a97a04SBrian Foster goto out_bmap_cancel; 1478e1d8fb88SNamjae Jeon 147970393313SChristoph Hellwig error = xfs_trans_commit(tp); 1480e1d8fb88SNamjae Jeon } 1481e1d8fb88SNamjae Jeon 1482e1d8fb88SNamjae Jeon return error; 1483e1d8fb88SNamjae Jeon 1484d4a97a04SBrian Foster out_bmap_cancel: 14852c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1486d4a97a04SBrian Foster out_trans_cancel: 14874906e215SChristoph Hellwig xfs_trans_cancel(tp); 1488e1d8fb88SNamjae Jeon return error; 1489e1d8fb88SNamjae Jeon } 1490e1d8fb88SNamjae Jeon 1491e1d8fb88SNamjae Jeon /* 1492a904b1caSNamjae Jeon * xfs_collapse_file_space() 1493a904b1caSNamjae Jeon * This routine frees disk space and shift extent for the given file. 1494a904b1caSNamjae Jeon * The first thing we do is to free data blocks in the specified range 1495a904b1caSNamjae Jeon * by calling xfs_free_file_space(). It would also sync dirty data 1496a904b1caSNamjae Jeon * and invalidate page cache over the region on which collapse range 1497a904b1caSNamjae Jeon * is working. And Shift extent records to the left to cover a hole. 1498a904b1caSNamjae Jeon * RETURNS: 1499a904b1caSNamjae Jeon * 0 on success 1500a904b1caSNamjae Jeon * errno on error 1501a904b1caSNamjae Jeon * 1502a904b1caSNamjae Jeon */ 1503a904b1caSNamjae Jeon int 1504a904b1caSNamjae Jeon xfs_collapse_file_space( 1505a904b1caSNamjae Jeon struct xfs_inode *ip, 1506a904b1caSNamjae Jeon xfs_off_t offset, 1507a904b1caSNamjae Jeon xfs_off_t len) 1508a904b1caSNamjae Jeon { 1509a904b1caSNamjae Jeon int error; 1510a904b1caSNamjae Jeon 1511a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1512a904b1caSNamjae Jeon trace_xfs_collapse_file_space(ip); 1513a904b1caSNamjae Jeon 1514a904b1caSNamjae Jeon error = xfs_free_file_space(ip, offset, len); 1515a904b1caSNamjae Jeon if (error) 1516a904b1caSNamjae Jeon return error; 1517a904b1caSNamjae Jeon 1518a904b1caSNamjae Jeon return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT); 1519a904b1caSNamjae Jeon } 1520a904b1caSNamjae Jeon 1521a904b1caSNamjae Jeon /* 1522a904b1caSNamjae Jeon * xfs_insert_file_space() 1523a904b1caSNamjae Jeon * This routine create hole space by shifting extents for the given file. 1524a904b1caSNamjae Jeon * The first thing we do is to sync dirty data and invalidate page cache 1525a904b1caSNamjae Jeon * over the region on which insert range is working. And split an extent 1526a904b1caSNamjae Jeon * to two extents at given offset by calling xfs_bmap_split_extent. 1527a904b1caSNamjae Jeon * And shift all extent records which are laying between [offset, 1528a904b1caSNamjae Jeon * last allocated extent] to the right to reserve hole range. 1529a904b1caSNamjae Jeon * RETURNS: 1530a904b1caSNamjae Jeon * 0 on success 1531a904b1caSNamjae Jeon * errno on error 1532a904b1caSNamjae Jeon */ 1533a904b1caSNamjae Jeon int 1534a904b1caSNamjae Jeon xfs_insert_file_space( 1535a904b1caSNamjae Jeon struct xfs_inode *ip, 1536a904b1caSNamjae Jeon loff_t offset, 1537a904b1caSNamjae Jeon loff_t len) 1538a904b1caSNamjae Jeon { 1539a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1540a904b1caSNamjae Jeon trace_xfs_insert_file_space(ip); 1541a904b1caSNamjae Jeon 1542a904b1caSNamjae Jeon return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT); 1543a904b1caSNamjae Jeon } 1544a904b1caSNamjae Jeon 1545a904b1caSNamjae Jeon /* 1546a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is 1547a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with 1548a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1549a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get 1550a133d952SDave Chinner * invalid formats on the target inode. 1551a133d952SDave Chinner * 1552a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has 1553a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a 1554a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just 1555a133d952SDave Chinner * blindly swap data forks on attr2 filesystems. 1556a133d952SDave Chinner * 1557a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with 1558a133d952SDave Chinner * a corrupt temporary inode, either. 1559a133d952SDave Chinner * 1560a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source 1561a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is 1562a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on 1563a133d952SDave Chinner * userspace to get this right. 1564a133d952SDave Chinner */ 1565a133d952SDave Chinner static int 1566a133d952SDave Chinner xfs_swap_extents_check_format( 1567e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1568e06259aaSDarrick J. Wong struct xfs_inode *tip) /* tmp inode */ 1569a133d952SDave Chinner { 1570a133d952SDave Chinner 1571a133d952SDave Chinner /* Should never get a local format */ 1572a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1573a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 15742451337dSDave Chinner return -EINVAL; 1575a133d952SDave Chinner 1576a133d952SDave Chinner /* 1577a133d952SDave Chinner * if the target inode has less extents that then temporary inode then 1578a133d952SDave Chinner * why did userspace call us? 1579a133d952SDave Chinner */ 1580a133d952SDave Chinner if (ip->i_d.di_nextents < tip->i_d.di_nextents) 15812451337dSDave Chinner return -EINVAL; 1582a133d952SDave Chinner 1583a133d952SDave Chinner /* 1584*1f08af52SDarrick J. Wong * If we have to use the (expensive) rmap swap method, we can 1585*1f08af52SDarrick J. Wong * handle any number of extents and any format. 1586*1f08af52SDarrick J. Wong */ 1587*1f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb)) 1588*1f08af52SDarrick J. Wong return 0; 1589*1f08af52SDarrick J. Wong 1590*1f08af52SDarrick J. Wong /* 1591a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree 1592a133d952SDave Chinner * form then we will end up with the target inode in the wrong format 1593a133d952SDave Chinner * as we already know there are less extents in the temp inode. 1594a133d952SDave Chinner */ 1595a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1596a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 15972451337dSDave Chinner return -EINVAL; 1598a133d952SDave Chinner 1599a133d952SDave Chinner /* Check temp in extent form to max in target */ 1600a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1601a133d952SDave Chinner XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1602a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 16032451337dSDave Chinner return -EINVAL; 1604a133d952SDave Chinner 1605a133d952SDave Chinner /* Check target in extent form to max in temp */ 1606a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1607a133d952SDave Chinner XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1608a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 16092451337dSDave Chinner return -EINVAL; 1610a133d952SDave Chinner 1611a133d952SDave Chinner /* 1612a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit 1613a133d952SDave Chinner * in the target and that it has enough extents to be in btree format 1614a133d952SDave Chinner * in the target. 1615a133d952SDave Chinner * 1616a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions 1617a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in 1618a133d952SDave Chinner * extent format... 1619a133d952SDave Chinner */ 1620a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1621a133d952SDave Chinner if (XFS_IFORK_BOFF(ip) && 1622a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 16232451337dSDave Chinner return -EINVAL; 1624a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1625a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 16262451337dSDave Chinner return -EINVAL; 1627a133d952SDave Chinner } 1628a133d952SDave Chinner 1629a133d952SDave Chinner /* Reciprocal target->temp btree format checks */ 1630a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1631a133d952SDave Chinner if (XFS_IFORK_BOFF(tip) && 1632a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 16332451337dSDave Chinner return -EINVAL; 1634a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1635a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 16362451337dSDave Chinner return -EINVAL; 1637a133d952SDave Chinner } 1638a133d952SDave Chinner 1639a133d952SDave Chinner return 0; 1640a133d952SDave Chinner } 1641a133d952SDave Chinner 16427abbb8f9SDave Chinner static int 16434ef897a2SDave Chinner xfs_swap_extent_flush( 16444ef897a2SDave Chinner struct xfs_inode *ip) 16454ef897a2SDave Chinner { 16464ef897a2SDave Chinner int error; 16474ef897a2SDave Chinner 16484ef897a2SDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 16494ef897a2SDave Chinner if (error) 16504ef897a2SDave Chinner return error; 16514ef897a2SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1); 16524ef897a2SDave Chinner 16534ef897a2SDave Chinner /* Verify O_DIRECT for ftmp */ 16544ef897a2SDave Chinner if (VFS_I(ip)->i_mapping->nrpages) 16554ef897a2SDave Chinner return -EINVAL; 16564ef897a2SDave Chinner return 0; 16574ef897a2SDave Chinner } 16584ef897a2SDave Chinner 1659*1f08af52SDarrick J. Wong /* 1660*1f08af52SDarrick J. Wong * Move extents from one file to another, when rmap is enabled. 1661*1f08af52SDarrick J. Wong */ 1662*1f08af52SDarrick J. Wong STATIC int 1663*1f08af52SDarrick J. Wong xfs_swap_extent_rmap( 1664*1f08af52SDarrick J. Wong struct xfs_trans **tpp, 1665*1f08af52SDarrick J. Wong struct xfs_inode *ip, 1666*1f08af52SDarrick J. Wong struct xfs_inode *tip) 1667*1f08af52SDarrick J. Wong { 1668*1f08af52SDarrick J. Wong struct xfs_bmbt_irec irec; 1669*1f08af52SDarrick J. Wong struct xfs_bmbt_irec uirec; 1670*1f08af52SDarrick J. Wong struct xfs_bmbt_irec tirec; 1671*1f08af52SDarrick J. Wong xfs_fileoff_t offset_fsb; 1672*1f08af52SDarrick J. Wong xfs_fileoff_t end_fsb; 1673*1f08af52SDarrick J. Wong xfs_filblks_t count_fsb; 1674*1f08af52SDarrick J. Wong xfs_fsblock_t firstfsb; 1675*1f08af52SDarrick J. Wong struct xfs_defer_ops dfops; 1676*1f08af52SDarrick J. Wong int error; 1677*1f08af52SDarrick J. Wong xfs_filblks_t ilen; 1678*1f08af52SDarrick J. Wong xfs_filblks_t rlen; 1679*1f08af52SDarrick J. Wong int nimaps; 1680*1f08af52SDarrick J. Wong __uint64_t tip_flags2; 1681*1f08af52SDarrick J. Wong 1682*1f08af52SDarrick J. Wong /* 1683*1f08af52SDarrick J. Wong * If the source file has shared blocks, we must flag the donor 1684*1f08af52SDarrick J. Wong * file as having shared blocks so that we get the shared-block 1685*1f08af52SDarrick J. Wong * rmap functions when we go to fix up the rmaps. The flags 1686*1f08af52SDarrick J. Wong * will be switch for reals later. 1687*1f08af52SDarrick J. Wong */ 1688*1f08af52SDarrick J. Wong tip_flags2 = tip->i_d.di_flags2; 1689*1f08af52SDarrick J. Wong if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) 1690*1f08af52SDarrick J. Wong tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK; 1691*1f08af52SDarrick J. Wong 1692*1f08af52SDarrick J. Wong offset_fsb = 0; 1693*1f08af52SDarrick J. Wong end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip))); 1694*1f08af52SDarrick J. Wong count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); 1695*1f08af52SDarrick J. Wong 1696*1f08af52SDarrick J. Wong while (count_fsb) { 1697*1f08af52SDarrick J. Wong /* Read extent from the donor file */ 1698*1f08af52SDarrick J. Wong nimaps = 1; 1699*1f08af52SDarrick J. Wong error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec, 1700*1f08af52SDarrick J. Wong &nimaps, 0); 1701*1f08af52SDarrick J. Wong if (error) 1702*1f08af52SDarrick J. Wong goto out; 1703*1f08af52SDarrick J. Wong ASSERT(nimaps == 1); 1704*1f08af52SDarrick J. Wong ASSERT(tirec.br_startblock != DELAYSTARTBLOCK); 1705*1f08af52SDarrick J. Wong 1706*1f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap(tip, &tirec); 1707*1f08af52SDarrick J. Wong ilen = tirec.br_blockcount; 1708*1f08af52SDarrick J. Wong 1709*1f08af52SDarrick J. Wong /* Unmap the old blocks in the source file. */ 1710*1f08af52SDarrick J. Wong while (tirec.br_blockcount) { 1711*1f08af52SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 1712*1f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); 1713*1f08af52SDarrick J. Wong 1714*1f08af52SDarrick J. Wong /* Read extent from the source file */ 1715*1f08af52SDarrick J. Wong nimaps = 1; 1716*1f08af52SDarrick J. Wong error = xfs_bmapi_read(ip, tirec.br_startoff, 1717*1f08af52SDarrick J. Wong tirec.br_blockcount, &irec, 1718*1f08af52SDarrick J. Wong &nimaps, 0); 1719*1f08af52SDarrick J. Wong if (error) 1720*1f08af52SDarrick J. Wong goto out_defer; 1721*1f08af52SDarrick J. Wong ASSERT(nimaps == 1); 1722*1f08af52SDarrick J. Wong ASSERT(tirec.br_startoff == irec.br_startoff); 1723*1f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); 1724*1f08af52SDarrick J. Wong 1725*1f08af52SDarrick J. Wong /* Trim the extent. */ 1726*1f08af52SDarrick J. Wong uirec = tirec; 1727*1f08af52SDarrick J. Wong uirec.br_blockcount = rlen = min_t(xfs_filblks_t, 1728*1f08af52SDarrick J. Wong tirec.br_blockcount, 1729*1f08af52SDarrick J. Wong irec.br_blockcount); 1730*1f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); 1731*1f08af52SDarrick J. Wong 1732*1f08af52SDarrick J. Wong /* Remove the mapping from the donor file. */ 1733*1f08af52SDarrick J. Wong error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, 1734*1f08af52SDarrick J. Wong tip, &uirec); 1735*1f08af52SDarrick J. Wong if (error) 1736*1f08af52SDarrick J. Wong goto out_defer; 1737*1f08af52SDarrick J. Wong 1738*1f08af52SDarrick J. Wong /* Remove the mapping from the source file. */ 1739*1f08af52SDarrick J. Wong error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, 1740*1f08af52SDarrick J. Wong ip, &irec); 1741*1f08af52SDarrick J. Wong if (error) 1742*1f08af52SDarrick J. Wong goto out_defer; 1743*1f08af52SDarrick J. Wong 1744*1f08af52SDarrick J. Wong /* Map the donor file's blocks into the source file. */ 1745*1f08af52SDarrick J. Wong error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, 1746*1f08af52SDarrick J. Wong ip, &uirec); 1747*1f08af52SDarrick J. Wong if (error) 1748*1f08af52SDarrick J. Wong goto out_defer; 1749*1f08af52SDarrick J. Wong 1750*1f08af52SDarrick J. Wong /* Map the source file's blocks into the donor file. */ 1751*1f08af52SDarrick J. Wong error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, 1752*1f08af52SDarrick J. Wong tip, &irec); 1753*1f08af52SDarrick J. Wong if (error) 1754*1f08af52SDarrick J. Wong goto out_defer; 1755*1f08af52SDarrick J. Wong 1756*1f08af52SDarrick J. Wong error = xfs_defer_finish(tpp, &dfops, ip); 1757*1f08af52SDarrick J. Wong if (error) 1758*1f08af52SDarrick J. Wong goto out_defer; 1759*1f08af52SDarrick J. Wong 1760*1f08af52SDarrick J. Wong tirec.br_startoff += rlen; 1761*1f08af52SDarrick J. Wong if (tirec.br_startblock != HOLESTARTBLOCK && 1762*1f08af52SDarrick J. Wong tirec.br_startblock != DELAYSTARTBLOCK) 1763*1f08af52SDarrick J. Wong tirec.br_startblock += rlen; 1764*1f08af52SDarrick J. Wong tirec.br_blockcount -= rlen; 1765*1f08af52SDarrick J. Wong } 1766*1f08af52SDarrick J. Wong 1767*1f08af52SDarrick J. Wong /* Roll on... */ 1768*1f08af52SDarrick J. Wong count_fsb -= ilen; 1769*1f08af52SDarrick J. Wong offset_fsb += ilen; 1770*1f08af52SDarrick J. Wong } 1771*1f08af52SDarrick J. Wong 1772*1f08af52SDarrick J. Wong tip->i_d.di_flags2 = tip_flags2; 1773*1f08af52SDarrick J. Wong return 0; 1774*1f08af52SDarrick J. Wong 1775*1f08af52SDarrick J. Wong out_defer: 1776*1f08af52SDarrick J. Wong xfs_defer_cancel(&dfops); 1777*1f08af52SDarrick J. Wong out: 1778*1f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); 1779*1f08af52SDarrick J. Wong tip->i_d.di_flags2 = tip_flags2; 1780*1f08af52SDarrick J. Wong return error; 1781*1f08af52SDarrick J. Wong } 1782*1f08af52SDarrick J. Wong 178339aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */ 178439aff5fdSDarrick J. Wong STATIC int 178539aff5fdSDarrick J. Wong xfs_swap_extent_forks( 178639aff5fdSDarrick J. Wong struct xfs_trans *tp, 178739aff5fdSDarrick J. Wong struct xfs_inode *ip, 178839aff5fdSDarrick J. Wong struct xfs_inode *tip, 178939aff5fdSDarrick J. Wong int *src_log_flags, 179039aff5fdSDarrick J. Wong int *target_log_flags) 179139aff5fdSDarrick J. Wong { 179239aff5fdSDarrick J. Wong struct xfs_ifork tempifp, *ifp, *tifp; 179339aff5fdSDarrick J. Wong int aforkblks = 0; 179439aff5fdSDarrick J. Wong int taforkblks = 0; 179539aff5fdSDarrick J. Wong __uint64_t tmp; 179639aff5fdSDarrick J. Wong int error; 179739aff5fdSDarrick J. Wong 179839aff5fdSDarrick J. Wong /* 179939aff5fdSDarrick J. Wong * Count the number of extended attribute blocks 180039aff5fdSDarrick J. Wong */ 180139aff5fdSDarrick J. Wong if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 180239aff5fdSDarrick J. Wong (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 180339aff5fdSDarrick J. Wong error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, 180439aff5fdSDarrick J. Wong &aforkblks); 180539aff5fdSDarrick J. Wong if (error) 180639aff5fdSDarrick J. Wong return error; 180739aff5fdSDarrick J. Wong } 180839aff5fdSDarrick J. Wong if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 180939aff5fdSDarrick J. Wong (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 181039aff5fdSDarrick J. Wong error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, 181139aff5fdSDarrick J. Wong &taforkblks); 181239aff5fdSDarrick J. Wong if (error) 181339aff5fdSDarrick J. Wong return error; 181439aff5fdSDarrick J. Wong } 181539aff5fdSDarrick J. Wong 181639aff5fdSDarrick J. Wong /* 181739aff5fdSDarrick J. Wong * Before we've swapped the forks, lets set the owners of the forks 181839aff5fdSDarrick J. Wong * appropriately. We have to do this as we are demand paging the btree 181939aff5fdSDarrick J. Wong * buffers, and so the validation done on read will expect the owner 182039aff5fdSDarrick J. Wong * field to be correctly set. Once we change the owners, we can swap the 182139aff5fdSDarrick J. Wong * inode forks. 182239aff5fdSDarrick J. Wong */ 182339aff5fdSDarrick J. Wong if (ip->i_d.di_version == 3 && 182439aff5fdSDarrick J. Wong ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 182539aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DOWNER; 182639aff5fdSDarrick J. Wong error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, 182739aff5fdSDarrick J. Wong tip->i_ino, NULL); 182839aff5fdSDarrick J. Wong if (error) 182939aff5fdSDarrick J. Wong return error; 183039aff5fdSDarrick J. Wong } 183139aff5fdSDarrick J. Wong 183239aff5fdSDarrick J. Wong if (tip->i_d.di_version == 3 && 183339aff5fdSDarrick J. Wong tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 183439aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DOWNER; 183539aff5fdSDarrick J. Wong error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, 183639aff5fdSDarrick J. Wong ip->i_ino, NULL); 183739aff5fdSDarrick J. Wong if (error) 183839aff5fdSDarrick J. Wong return error; 183939aff5fdSDarrick J. Wong } 184039aff5fdSDarrick J. Wong 184139aff5fdSDarrick J. Wong /* 184239aff5fdSDarrick J. Wong * Swap the data forks of the inodes 184339aff5fdSDarrick J. Wong */ 184439aff5fdSDarrick J. Wong ifp = &ip->i_df; 184539aff5fdSDarrick J. Wong tifp = &tip->i_df; 184639aff5fdSDarrick J. Wong tempifp = *ifp; /* struct copy */ 184739aff5fdSDarrick J. Wong *ifp = *tifp; /* struct copy */ 184839aff5fdSDarrick J. Wong *tifp = tempifp; /* struct copy */ 184939aff5fdSDarrick J. Wong 185039aff5fdSDarrick J. Wong /* 185139aff5fdSDarrick J. Wong * Fix the on-disk inode values 185239aff5fdSDarrick J. Wong */ 185339aff5fdSDarrick J. Wong tmp = (__uint64_t)ip->i_d.di_nblocks; 185439aff5fdSDarrick J. Wong ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 185539aff5fdSDarrick J. Wong tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 185639aff5fdSDarrick J. Wong 185739aff5fdSDarrick J. Wong tmp = (__uint64_t) ip->i_d.di_nextents; 185839aff5fdSDarrick J. Wong ip->i_d.di_nextents = tip->i_d.di_nextents; 185939aff5fdSDarrick J. Wong tip->i_d.di_nextents = tmp; 186039aff5fdSDarrick J. Wong 186139aff5fdSDarrick J. Wong tmp = (__uint64_t) ip->i_d.di_format; 186239aff5fdSDarrick J. Wong ip->i_d.di_format = tip->i_d.di_format; 186339aff5fdSDarrick J. Wong tip->i_d.di_format = tmp; 186439aff5fdSDarrick J. Wong 186539aff5fdSDarrick J. Wong /* 186639aff5fdSDarrick J. Wong * The extents in the source inode could still contain speculative 186739aff5fdSDarrick J. Wong * preallocation beyond EOF (e.g. the file is open but not modified 186839aff5fdSDarrick J. Wong * while defrag is in progress). In that case, we need to copy over the 186939aff5fdSDarrick J. Wong * number of delalloc blocks the data fork in the source inode is 187039aff5fdSDarrick J. Wong * tracking beyond EOF so that when the fork is truncated away when the 187139aff5fdSDarrick J. Wong * temporary inode is unlinked we don't underrun the i_delayed_blks 187239aff5fdSDarrick J. Wong * counter on that inode. 187339aff5fdSDarrick J. Wong */ 187439aff5fdSDarrick J. Wong ASSERT(tip->i_delayed_blks == 0); 187539aff5fdSDarrick J. Wong tip->i_delayed_blks = ip->i_delayed_blks; 187639aff5fdSDarrick J. Wong ip->i_delayed_blks = 0; 187739aff5fdSDarrick J. Wong 187839aff5fdSDarrick J. Wong switch (ip->i_d.di_format) { 187939aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 188039aff5fdSDarrick J. Wong /* If the extents fit in the inode, fix the 188139aff5fdSDarrick J. Wong * pointer. Otherwise it's already NULL or 188239aff5fdSDarrick J. Wong * pointing to the extent. 188339aff5fdSDarrick J. Wong */ 188439aff5fdSDarrick J. Wong if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 188539aff5fdSDarrick J. Wong ifp->if_u1.if_extents = 188639aff5fdSDarrick J. Wong ifp->if_u2.if_inline_ext; 188739aff5fdSDarrick J. Wong } 188839aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DEXT; 188939aff5fdSDarrick J. Wong break; 189039aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 189139aff5fdSDarrick J. Wong ASSERT(ip->i_d.di_version < 3 || 189239aff5fdSDarrick J. Wong (*src_log_flags & XFS_ILOG_DOWNER)); 189339aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DBROOT; 189439aff5fdSDarrick J. Wong break; 189539aff5fdSDarrick J. Wong } 189639aff5fdSDarrick J. Wong 189739aff5fdSDarrick J. Wong switch (tip->i_d.di_format) { 189839aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 189939aff5fdSDarrick J. Wong /* If the extents fit in the inode, fix the 190039aff5fdSDarrick J. Wong * pointer. Otherwise it's already NULL or 190139aff5fdSDarrick J. Wong * pointing to the extent. 190239aff5fdSDarrick J. Wong */ 190339aff5fdSDarrick J. Wong if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 190439aff5fdSDarrick J. Wong tifp->if_u1.if_extents = 190539aff5fdSDarrick J. Wong tifp->if_u2.if_inline_ext; 190639aff5fdSDarrick J. Wong } 190739aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DEXT; 190839aff5fdSDarrick J. Wong break; 190939aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 191039aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DBROOT; 191139aff5fdSDarrick J. Wong ASSERT(tip->i_d.di_version < 3 || 191239aff5fdSDarrick J. Wong (*target_log_flags & XFS_ILOG_DOWNER)); 191339aff5fdSDarrick J. Wong break; 191439aff5fdSDarrick J. Wong } 191539aff5fdSDarrick J. Wong 191639aff5fdSDarrick J. Wong return 0; 191739aff5fdSDarrick J. Wong } 191839aff5fdSDarrick J. Wong 19194ef897a2SDave Chinner int 1920a133d952SDave Chinner xfs_swap_extents( 1921e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1922e06259aaSDarrick J. Wong struct xfs_inode *tip, /* tmp inode */ 1923e06259aaSDarrick J. Wong struct xfs_swapext *sxp) 1924a133d952SDave Chinner { 1925e06259aaSDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 1926e06259aaSDarrick J. Wong struct xfs_trans *tp; 1927e06259aaSDarrick J. Wong struct xfs_bstat *sbp = &sxp->sx_stat; 1928a133d952SDave Chinner int src_log_flags, target_log_flags; 1929a133d952SDave Chinner int error = 0; 193081217683SDave Chinner int lock_flags; 1931f0bc4d13SDarrick J. Wong struct xfs_ifork *cowfp; 1932f0bc4d13SDarrick J. Wong __uint64_t f; 1933*1f08af52SDarrick J. Wong int resblks; 1934a133d952SDave Chinner 1935a133d952SDave Chinner /* 1936723cac48SDave Chinner * Lock the inodes against other IO, page faults and truncate to 1937723cac48SDave Chinner * begin with. Then we can ensure the inodes are flushed and have no 1938723cac48SDave Chinner * page cache safely. Once we have done this we can take the ilocks and 1939723cac48SDave Chinner * do the rest of the checks. 1940a133d952SDave Chinner */ 1941723cac48SDave Chinner lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 1942a133d952SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1943723cac48SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); 1944a133d952SDave Chinner 1945a133d952SDave Chinner /* Verify that both files have the same format */ 1946c19b3b05SDave Chinner if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { 19472451337dSDave Chinner error = -EINVAL; 1948a133d952SDave Chinner goto out_unlock; 1949a133d952SDave Chinner } 1950a133d952SDave Chinner 1951a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */ 1952a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 19532451337dSDave Chinner error = -EINVAL; 1954a133d952SDave Chinner goto out_unlock; 1955a133d952SDave Chinner } 1956a133d952SDave Chinner 19574ef897a2SDave Chinner error = xfs_swap_extent_flush(ip); 1958a133d952SDave Chinner if (error) 1959a133d952SDave Chinner goto out_unlock; 19604ef897a2SDave Chinner error = xfs_swap_extent_flush(tip); 19614ef897a2SDave Chinner if (error) 19624ef897a2SDave Chinner goto out_unlock; 1963a133d952SDave Chinner 1964*1f08af52SDarrick J. Wong /* 1965*1f08af52SDarrick J. Wong * Extent "swapping" with rmap requires a permanent reservation and 1966*1f08af52SDarrick J. Wong * a block reservation because it's really just a remap operation 1967*1f08af52SDarrick J. Wong * performed with log redo items! 1968*1f08af52SDarrick J. Wong */ 1969*1f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 1970*1f08af52SDarrick J. Wong /* 1971*1f08af52SDarrick J. Wong * Conceptually this shouldn't affect the shape of either 1972*1f08af52SDarrick J. Wong * bmbt, but since we atomically move extents one by one, 1973*1f08af52SDarrick J. Wong * we reserve enough space to rebuild both trees. 1974*1f08af52SDarrick J. Wong */ 1975*1f08af52SDarrick J. Wong resblks = XFS_SWAP_RMAP_SPACE_RES(mp, 1976*1f08af52SDarrick J. Wong XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK), 1977*1f08af52SDarrick J. Wong XFS_DATA_FORK) + 1978*1f08af52SDarrick J. Wong XFS_SWAP_RMAP_SPACE_RES(mp, 1979*1f08af52SDarrick J. Wong XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK), 1980*1f08af52SDarrick J. Wong XFS_DATA_FORK); 1981*1f08af52SDarrick J. Wong error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 1982*1f08af52SDarrick J. Wong 0, 0, &tp); 1983*1f08af52SDarrick J. Wong } else 1984*1f08af52SDarrick J. Wong error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 1985*1f08af52SDarrick J. Wong 0, 0, &tp); 1986253f4911SChristoph Hellwig if (error) 1987a133d952SDave Chinner goto out_unlock; 1988723cac48SDave Chinner 1989723cac48SDave Chinner /* 1990723cac48SDave Chinner * Lock and join the inodes to the tansaction so that transaction commit 1991723cac48SDave Chinner * or cancel will unlock the inodes from this point onwards. 1992723cac48SDave Chinner */ 19934ef897a2SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 19944ef897a2SDave Chinner lock_flags |= XFS_ILOCK_EXCL; 199539aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, ip, 0); 199639aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, tip, 0); 1997723cac48SDave Chinner 1998a133d952SDave Chinner 1999a133d952SDave Chinner /* Verify all data are being swapped */ 2000a133d952SDave Chinner if (sxp->sx_offset != 0 || 2001a133d952SDave Chinner sxp->sx_length != ip->i_d.di_size || 2002a133d952SDave Chinner sxp->sx_length != tip->i_d.di_size) { 20032451337dSDave Chinner error = -EFAULT; 20044ef897a2SDave Chinner goto out_trans_cancel; 2005a133d952SDave Chinner } 2006a133d952SDave Chinner 2007a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0); 2008a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1); 2009a133d952SDave Chinner 2010a133d952SDave Chinner /* check inode formats now that data is flushed */ 2011a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip); 2012a133d952SDave Chinner if (error) { 2013a133d952SDave Chinner xfs_notice(mp, 2014a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.", 2015a133d952SDave Chinner __func__, ip->i_ino); 20164ef897a2SDave Chinner goto out_trans_cancel; 2017a133d952SDave Chinner } 2018a133d952SDave Chinner 2019a133d952SDave Chinner /* 2020a133d952SDave Chinner * Compare the current change & modify times with that 2021a133d952SDave Chinner * passed in. If they differ, we abort this swap. 2022a133d952SDave Chinner * This is the mechanism used to ensure the calling 2023a133d952SDave Chinner * process that the file was not changed out from 2024a133d952SDave Chinner * under it. 2025a133d952SDave Chinner */ 2026a133d952SDave Chinner if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || 2027a133d952SDave Chinner (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 2028a133d952SDave Chinner (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 2029a133d952SDave Chinner (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 20302451337dSDave Chinner error = -EBUSY; 203181217683SDave Chinner goto out_trans_cancel; 2032a133d952SDave Chinner } 2033a133d952SDave Chinner 203421b5c978SDave Chinner /* 203521b5c978SDave Chinner * Note the trickiness in setting the log flags - we set the owner log 203621b5c978SDave Chinner * flag on the opposite inode (i.e. the inode we are setting the new 203721b5c978SDave Chinner * owner to be) because once we swap the forks and log that, log 203821b5c978SDave Chinner * recovery is going to see the fork as owned by the swapped inode, 203921b5c978SDave Chinner * not the pre-swapped inodes. 204021b5c978SDave Chinner */ 204121b5c978SDave Chinner src_log_flags = XFS_ILOG_CORE; 204221b5c978SDave Chinner target_log_flags = XFS_ILOG_CORE; 204339aff5fdSDarrick J. Wong 2044*1f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 2045*1f08af52SDarrick J. Wong error = xfs_swap_extent_rmap(&tp, ip, tip); 2046*1f08af52SDarrick J. Wong else 204739aff5fdSDarrick J. Wong error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags, 204839aff5fdSDarrick J. Wong &target_log_flags); 204921b5c978SDave Chinner if (error) 205021b5c978SDave Chinner goto out_trans_cancel; 2051a133d952SDave Chinner 2052f0bc4d13SDarrick J. Wong /* Do we have to swap reflink flags? */ 2053f0bc4d13SDarrick J. Wong if ((ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) ^ 2054f0bc4d13SDarrick J. Wong (tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)) { 2055f0bc4d13SDarrick J. Wong f = ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; 2056f0bc4d13SDarrick J. Wong ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 2057f0bc4d13SDarrick J. Wong ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; 2058f0bc4d13SDarrick J. Wong tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 2059f0bc4d13SDarrick J. Wong tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; 2060f0bc4d13SDarrick J. Wong cowfp = ip->i_cowfp; 2061f0bc4d13SDarrick J. Wong ip->i_cowfp = tip->i_cowfp; 2062f0bc4d13SDarrick J. Wong tip->i_cowfp = cowfp; 206383104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(ip); 206483104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(tip); 2065f0bc4d13SDarrick J. Wong } 2066f0bc4d13SDarrick J. Wong 2067a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags); 2068a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags); 2069a133d952SDave Chinner 2070a133d952SDave Chinner /* 2071a133d952SDave Chinner * If this is a synchronous mount, make sure that the 2072a133d952SDave Chinner * transaction goes to disk before returning to the user. 2073a133d952SDave Chinner */ 2074a133d952SDave Chinner if (mp->m_flags & XFS_MOUNT_WSYNC) 2075a133d952SDave Chinner xfs_trans_set_sync(tp); 2076a133d952SDave Chinner 207770393313SChristoph Hellwig error = xfs_trans_commit(tp); 2078a133d952SDave Chinner 2079a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0); 2080a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1); 208139aff5fdSDarrick J. Wong 208239aff5fdSDarrick J. Wong xfs_iunlock(ip, lock_flags); 208339aff5fdSDarrick J. Wong xfs_iunlock(tip, lock_flags); 2084a133d952SDave Chinner return error; 2085a133d952SDave Chinner 208639aff5fdSDarrick J. Wong out_trans_cancel: 208739aff5fdSDarrick J. Wong xfs_trans_cancel(tp); 208839aff5fdSDarrick J. Wong 2089a133d952SDave Chinner out_unlock: 209081217683SDave Chinner xfs_iunlock(ip, lock_flags); 209181217683SDave Chinner xfs_iunlock(tip, lock_flags); 209239aff5fdSDarrick J. Wong return error; 2093a133d952SDave Chinner } 2094