10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 268988114SDave Chinner /* 368988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc. 568988114SDave Chinner * All Rights Reserved. 668988114SDave Chinner */ 768988114SDave Chinner #include "xfs.h" 868988114SDave Chinner #include "xfs_fs.h" 970a9883cSDave Chinner #include "xfs_shared.h" 10239880efSDave Chinner #include "xfs_format.h" 11239880efSDave Chinner #include "xfs_log_format.h" 12239880efSDave Chinner #include "xfs_trans_resv.h" 1368988114SDave Chinner #include "xfs_bit.h" 1468988114SDave Chinner #include "xfs_mount.h" 1557062787SDave Chinner #include "xfs_da_format.h" 163ab78df2SDarrick J. Wong #include "xfs_defer.h" 1768988114SDave Chinner #include "xfs_inode.h" 1868988114SDave Chinner #include "xfs_btree.h" 19239880efSDave Chinner #include "xfs_trans.h" 2068988114SDave Chinner #include "xfs_extfree_item.h" 2168988114SDave Chinner #include "xfs_alloc.h" 2268988114SDave Chinner #include "xfs_bmap.h" 2368988114SDave Chinner #include "xfs_bmap_util.h" 24a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 2568988114SDave Chinner #include "xfs_rtalloc.h" 2668988114SDave Chinner #include "xfs_error.h" 2768988114SDave Chinner #include "xfs_quota.h" 2868988114SDave Chinner #include "xfs_trans_space.h" 2968988114SDave Chinner #include "xfs_trace.h" 30c24b5dfaSDave Chinner #include "xfs_icache.h" 31239880efSDave Chinner #include "xfs_log.h" 329c194644SDarrick J. Wong #include "xfs_rmap_btree.h" 33f86f4037SDarrick J. Wong #include "xfs_iomap.h" 34f86f4037SDarrick J. Wong #include "xfs_reflink.h" 35f86f4037SDarrick J. Wong #include "xfs_refcount.h" 3668988114SDave Chinner 3768988114SDave Chinner /* Kernel only BMAP related definitions and functions */ 3868988114SDave Chinner 3968988114SDave Chinner /* 4068988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it 4168988114SDave Chinner * differently based on whether the file is a real time file or not, because the 4268988114SDave Chinner * bmap code does. 4368988114SDave Chinner */ 4468988114SDave Chinner xfs_daddr_t 4568988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) 4668988114SDave Chinner { 4768988114SDave Chinner return (XFS_IS_REALTIME_INODE(ip) ? \ 4868988114SDave Chinner (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ 4968988114SDave Chinner XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); 5068988114SDave Chinner } 5168988114SDave Chinner 5268988114SDave Chinner /* 533fbbbea3SDave Chinner * Routine to zero an extent on disk allocated to the specific inode. 543fbbbea3SDave Chinner * 553fbbbea3SDave Chinner * The VFS functions take a linearised filesystem block offset, so we have to 563fbbbea3SDave Chinner * convert the sparse xfs fsb to the right format first. 573fbbbea3SDave Chinner * VFS types are real funky, too. 583fbbbea3SDave Chinner */ 593fbbbea3SDave Chinner int 603fbbbea3SDave Chinner xfs_zero_extent( 613fbbbea3SDave Chinner struct xfs_inode *ip, 623fbbbea3SDave Chinner xfs_fsblock_t start_fsb, 633fbbbea3SDave Chinner xfs_off_t count_fsb) 643fbbbea3SDave Chinner { 653fbbbea3SDave Chinner struct xfs_mount *mp = ip->i_mount; 663fbbbea3SDave Chinner xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); 673fbbbea3SDave Chinner sector_t block = XFS_BB_TO_FSBT(mp, sector); 683fbbbea3SDave Chinner 693dc29161SMatthew Wilcox return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), 703dc29161SMatthew Wilcox block << (mp->m_super->s_blocksize_bits - 9), 713dc29161SMatthew Wilcox count_fsb << (mp->m_super->s_blocksize_bits - 9), 72ee472d83SChristoph Hellwig GFP_NOFS, 0); 733fbbbea3SDave Chinner } 743fbbbea3SDave Chinner 75bb9c2e54SDave Chinner #ifdef CONFIG_XFS_RT 7668988114SDave Chinner int 7768988114SDave Chinner xfs_bmap_rtalloc( 7868988114SDave Chinner struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 7968988114SDave Chinner { 8068988114SDave Chinner int error; /* error return value */ 8168988114SDave Chinner xfs_mount_t *mp; /* mount point structure */ 8268988114SDave Chinner xfs_extlen_t prod = 0; /* product factor for allocators */ 830703a8e1SDave Chinner xfs_extlen_t mod = 0; /* product factor for allocators */ 8468988114SDave Chinner xfs_extlen_t ralen = 0; /* realtime allocation length */ 8568988114SDave Chinner xfs_extlen_t align; /* minimum allocation alignment */ 8668988114SDave Chinner xfs_rtblock_t rtb; 8768988114SDave Chinner 8868988114SDave Chinner mp = ap->ip->i_mount; 8968988114SDave Chinner align = xfs_get_extsz_hint(ap->ip); 9068988114SDave Chinner prod = align / mp->m_sb.sb_rextsize; 9168988114SDave Chinner error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 9268988114SDave Chinner align, 1, ap->eof, 0, 9368988114SDave Chinner ap->conv, &ap->offset, &ap->length); 9468988114SDave Chinner if (error) 9568988114SDave Chinner return error; 9668988114SDave Chinner ASSERT(ap->length); 9768988114SDave Chinner ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); 9868988114SDave Chinner 9968988114SDave Chinner /* 10068988114SDave Chinner * If the offset & length are not perfectly aligned 10168988114SDave Chinner * then kill prod, it will just get us in trouble. 10268988114SDave Chinner */ 1030703a8e1SDave Chinner div_u64_rem(ap->offset, align, &mod); 1040703a8e1SDave Chinner if (mod || ap->length % align) 10568988114SDave Chinner prod = 1; 10668988114SDave Chinner /* 10768988114SDave Chinner * Set ralen to be the actual requested length in rtextents. 10868988114SDave Chinner */ 10968988114SDave Chinner ralen = ap->length / mp->m_sb.sb_rextsize; 11068988114SDave Chinner /* 11168988114SDave Chinner * If the old value was close enough to MAXEXTLEN that 11268988114SDave Chinner * we rounded up to it, cut it back so it's valid again. 11368988114SDave Chinner * Note that if it's a really large request (bigger than 11468988114SDave Chinner * MAXEXTLEN), we don't hear about that number, and can't 11568988114SDave Chinner * adjust the starting point to match it. 11668988114SDave Chinner */ 11768988114SDave Chinner if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 11868988114SDave Chinner ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 11968988114SDave Chinner 12068988114SDave Chinner /* 1214b680afbSDave Chinner * Lock out modifications to both the RT bitmap and summary inodes 12268988114SDave Chinner */ 123f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 12468988114SDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 125f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 1264b680afbSDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 12768988114SDave Chinner 12868988114SDave Chinner /* 12968988114SDave Chinner * If it's an allocation to an empty file at offset 0, 13068988114SDave Chinner * pick an extent that will space things out in the rt area. 13168988114SDave Chinner */ 13268988114SDave Chinner if (ap->eof && ap->offset == 0) { 13368988114SDave Chinner xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ 13468988114SDave Chinner 13568988114SDave Chinner error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 13668988114SDave Chinner if (error) 13768988114SDave Chinner return error; 13868988114SDave Chinner ap->blkno = rtx * mp->m_sb.sb_rextsize; 13968988114SDave Chinner } else { 14068988114SDave Chinner ap->blkno = 0; 14168988114SDave Chinner } 14268988114SDave Chinner 14368988114SDave Chinner xfs_bmap_adjacent(ap); 14468988114SDave Chinner 14568988114SDave Chinner /* 14668988114SDave Chinner * Realtime allocation, done through xfs_rtallocate_extent. 14768988114SDave Chinner */ 14868988114SDave Chinner do_div(ap->blkno, mp->m_sb.sb_rextsize); 14968988114SDave Chinner rtb = ap->blkno; 15068988114SDave Chinner ap->length = ralen; 151089ec2f8SChristoph Hellwig error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, 152089ec2f8SChristoph Hellwig &ralen, ap->wasdel, prod, &rtb); 153089ec2f8SChristoph Hellwig if (error) 15468988114SDave Chinner return error; 155089ec2f8SChristoph Hellwig 15668988114SDave Chinner ap->blkno = rtb; 15768988114SDave Chinner if (ap->blkno != NULLFSBLOCK) { 15868988114SDave Chinner ap->blkno *= mp->m_sb.sb_rextsize; 15968988114SDave Chinner ralen *= mp->m_sb.sb_rextsize; 16068988114SDave Chinner ap->length = ralen; 16168988114SDave Chinner ap->ip->i_d.di_nblocks += ralen; 16268988114SDave Chinner xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 16368988114SDave Chinner if (ap->wasdel) 16468988114SDave Chinner ap->ip->i_delayed_blks -= ralen; 16568988114SDave Chinner /* 16668988114SDave Chinner * Adjust the disk quota also. This was reserved 16768988114SDave Chinner * earlier. 16868988114SDave Chinner */ 16968988114SDave Chinner xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 17068988114SDave Chinner ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 17168988114SDave Chinner XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 1723fbbbea3SDave Chinner 1733fbbbea3SDave Chinner /* Zero the extent if we were asked to do so */ 174292378edSDave Chinner if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) { 1753fbbbea3SDave Chinner error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); 1763fbbbea3SDave Chinner if (error) 1773fbbbea3SDave Chinner return error; 1783fbbbea3SDave Chinner } 17968988114SDave Chinner } else { 18068988114SDave Chinner ap->length = 0; 18168988114SDave Chinner } 18268988114SDave Chinner return 0; 18368988114SDave Chinner } 184bb9c2e54SDave Chinner #endif /* CONFIG_XFS_RT */ 18568988114SDave Chinner 18668988114SDave Chinner /* 18768988114SDave Chinner * Check if the endoff is outside the last extent. If so the caller will grow 18868988114SDave Chinner * the allocation to a stripe unit boundary. All offsets are considered outside 18968988114SDave Chinner * the end of file for an empty fork, so 1 is returned in *eof in that case. 19068988114SDave Chinner */ 19168988114SDave Chinner int 19268988114SDave Chinner xfs_bmap_eof( 19368988114SDave Chinner struct xfs_inode *ip, 19468988114SDave Chinner xfs_fileoff_t endoff, 19568988114SDave Chinner int whichfork, 19668988114SDave Chinner int *eof) 19768988114SDave Chinner { 19868988114SDave Chinner struct xfs_bmbt_irec rec; 19968988114SDave Chinner int error; 20068988114SDave Chinner 20168988114SDave Chinner error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); 20268988114SDave Chinner if (error || *eof) 20368988114SDave Chinner return error; 20468988114SDave Chinner 20568988114SDave Chinner *eof = endoff >= rec.br_startoff + rec.br_blockcount; 20668988114SDave Chinner return 0; 20768988114SDave Chinner } 20868988114SDave Chinner 20968988114SDave Chinner /* 21068988114SDave Chinner * Extent tree block counting routines. 21168988114SDave Chinner */ 21268988114SDave Chinner 21368988114SDave Chinner /* 214d29cb3e4SDarrick J. Wong * Count leaf blocks given a range of extent records. Delayed allocation 215d29cb3e4SDarrick J. Wong * extents are not counted towards the totals. 21668988114SDave Chinner */ 217e17a5c6fSChristoph Hellwig xfs_extnum_t 21868988114SDave Chinner xfs_bmap_count_leaves( 219d29cb3e4SDarrick J. Wong struct xfs_ifork *ifp, 220e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 22168988114SDave Chinner { 222b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur; 223e17a5c6fSChristoph Hellwig struct xfs_bmbt_irec got; 224b2b1712aSChristoph Hellwig xfs_extnum_t numrecs = 0; 22568988114SDave Chinner 226b2b1712aSChristoph Hellwig for_each_xfs_iext(ifp, &icur, &got) { 227e17a5c6fSChristoph Hellwig if (!isnullstartblock(got.br_startblock)) { 228e17a5c6fSChristoph Hellwig *count += got.br_blockcount; 229e17a5c6fSChristoph Hellwig numrecs++; 23068988114SDave Chinner } 23168988114SDave Chinner } 232b2b1712aSChristoph Hellwig 233e17a5c6fSChristoph Hellwig return numrecs; 234d29cb3e4SDarrick J. Wong } 23568988114SDave Chinner 23668988114SDave Chinner /* 23768988114SDave Chinner * Count leaf blocks given a range of extent records originally 23868988114SDave Chinner * in btree format. 23968988114SDave Chinner */ 24068988114SDave Chinner STATIC void 24168988114SDave Chinner xfs_bmap_disk_count_leaves( 24268988114SDave Chinner struct xfs_mount *mp, 24368988114SDave Chinner struct xfs_btree_block *block, 24468988114SDave Chinner int numrecs, 245e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 24668988114SDave Chinner { 24768988114SDave Chinner int b; 24868988114SDave Chinner xfs_bmbt_rec_t *frp; 24968988114SDave Chinner 25068988114SDave Chinner for (b = 1; b <= numrecs; b++) { 25168988114SDave Chinner frp = XFS_BMBT_REC_ADDR(mp, block, b); 25268988114SDave Chinner *count += xfs_bmbt_disk_get_blockcount(frp); 25368988114SDave Chinner } 25468988114SDave Chinner } 25568988114SDave Chinner 25668988114SDave Chinner /* 25768988114SDave Chinner * Recursively walks each level of a btree 2588be11e92SZhi Yong Wu * to count total fsblocks in use. 25968988114SDave Chinner */ 260e7f5d5caSDarrick J. Wong STATIC int 26168988114SDave Chinner xfs_bmap_count_tree( 262e7f5d5caSDarrick J. Wong struct xfs_mount *mp, 263e7f5d5caSDarrick J. Wong struct xfs_trans *tp, 264e7f5d5caSDarrick J. Wong struct xfs_ifork *ifp, 265e7f5d5caSDarrick J. Wong xfs_fsblock_t blockno, 266e7f5d5caSDarrick J. Wong int levelin, 267e7f5d5caSDarrick J. Wong xfs_extnum_t *nextents, 268e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 26968988114SDave Chinner { 27068988114SDave Chinner int error; 271e7f5d5caSDarrick J. Wong struct xfs_buf *bp, *nbp; 27268988114SDave Chinner int level = levelin; 27368988114SDave Chinner __be64 *pp; 27468988114SDave Chinner xfs_fsblock_t bno = blockno; 27568988114SDave Chinner xfs_fsblock_t nextbno; 27668988114SDave Chinner struct xfs_btree_block *block, *nextblock; 27768988114SDave Chinner int numrecs; 27868988114SDave Chinner 27968988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 28068988114SDave Chinner &xfs_bmbt_buf_ops); 28168988114SDave Chinner if (error) 28268988114SDave Chinner return error; 28368988114SDave Chinner *count += 1; 28468988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 28568988114SDave Chinner 28668988114SDave Chinner if (--level) { 28768988114SDave Chinner /* Not at node above leaves, count this level of nodes */ 28868988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 28968988114SDave Chinner while (nextbno != NULLFSBLOCK) { 29068988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 29168988114SDave Chinner XFS_BMAP_BTREE_REF, 29268988114SDave Chinner &xfs_bmbt_buf_ops); 29368988114SDave Chinner if (error) 29468988114SDave Chinner return error; 29568988114SDave Chinner *count += 1; 29668988114SDave Chinner nextblock = XFS_BUF_TO_BLOCK(nbp); 29768988114SDave Chinner nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); 29868988114SDave Chinner xfs_trans_brelse(tp, nbp); 29968988114SDave Chinner } 30068988114SDave Chinner 30168988114SDave Chinner /* Dive to the next level */ 30268988114SDave Chinner pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 30368988114SDave Chinner bno = be64_to_cpu(*pp); 304e7f5d5caSDarrick J. Wong error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents, 305e7f5d5caSDarrick J. Wong count); 306e7f5d5caSDarrick J. Wong if (error) { 30768988114SDave Chinner xfs_trans_brelse(tp, bp); 30868988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", 30968988114SDave Chinner XFS_ERRLEVEL_LOW, mp); 3102451337dSDave Chinner return -EFSCORRUPTED; 31168988114SDave Chinner } 31268988114SDave Chinner xfs_trans_brelse(tp, bp); 31368988114SDave Chinner } else { 31468988114SDave Chinner /* count all level 1 nodes and their leaves */ 31568988114SDave Chinner for (;;) { 31668988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 31768988114SDave Chinner numrecs = be16_to_cpu(block->bb_numrecs); 318e7f5d5caSDarrick J. Wong (*nextents) += numrecs; 31968988114SDave Chinner xfs_bmap_disk_count_leaves(mp, block, numrecs, count); 32068988114SDave Chinner xfs_trans_brelse(tp, bp); 32168988114SDave Chinner if (nextbno == NULLFSBLOCK) 32268988114SDave Chinner break; 32368988114SDave Chinner bno = nextbno; 32468988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 32568988114SDave Chinner XFS_BMAP_BTREE_REF, 32668988114SDave Chinner &xfs_bmbt_buf_ops); 32768988114SDave Chinner if (error) 32868988114SDave Chinner return error; 32968988114SDave Chinner *count += 1; 33068988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 33168988114SDave Chinner } 33268988114SDave Chinner } 33368988114SDave Chinner return 0; 33468988114SDave Chinner } 33568988114SDave Chinner 33668988114SDave Chinner /* 337d29cb3e4SDarrick J. Wong * Count fsblocks of the given fork. Delayed allocation extents are 338d29cb3e4SDarrick J. Wong * not counted towards the totals. 33968988114SDave Chinner */ 340e7f5d5caSDarrick J. Wong int 34168988114SDave Chinner xfs_bmap_count_blocks( 342e7f5d5caSDarrick J. Wong struct xfs_trans *tp, 343e7f5d5caSDarrick J. Wong struct xfs_inode *ip, 344e7f5d5caSDarrick J. Wong int whichfork, 345e7f5d5caSDarrick J. Wong xfs_extnum_t *nextents, 346e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 34768988114SDave Chinner { 348e7f5d5caSDarrick J. Wong struct xfs_mount *mp; /* file system mount structure */ 34968988114SDave Chinner __be64 *pp; /* pointer to block address */ 350e7f5d5caSDarrick J. Wong struct xfs_btree_block *block; /* current btree block */ 351e7f5d5caSDarrick J. Wong struct xfs_ifork *ifp; /* fork structure */ 352e7f5d5caSDarrick J. Wong xfs_fsblock_t bno; /* block # of "block" */ 353e7f5d5caSDarrick J. Wong int level; /* btree level, for checking */ 354e7f5d5caSDarrick J. Wong int error; 35568988114SDave Chinner 35668988114SDave Chinner bno = NULLFSBLOCK; 35768988114SDave Chinner mp = ip->i_mount; 358e7f5d5caSDarrick J. Wong *nextents = 0; 359e7f5d5caSDarrick J. Wong *count = 0; 36068988114SDave Chinner ifp = XFS_IFORK_PTR(ip, whichfork); 361e7f5d5caSDarrick J. Wong if (!ifp) 36268988114SDave Chinner return 0; 363e7f5d5caSDarrick J. Wong 364e7f5d5caSDarrick J. Wong switch (XFS_IFORK_FORMAT(ip, whichfork)) { 365e7f5d5caSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 366e17a5c6fSChristoph Hellwig *nextents = xfs_bmap_count_leaves(ifp, count); 367e7f5d5caSDarrick J. Wong return 0; 368e7f5d5caSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 369e7f5d5caSDarrick J. Wong if (!(ifp->if_flags & XFS_IFEXTENTS)) { 370e7f5d5caSDarrick J. Wong error = xfs_iread_extents(tp, ip, whichfork); 371e7f5d5caSDarrick J. Wong if (error) 372e7f5d5caSDarrick J. Wong return error; 37368988114SDave Chinner } 37468988114SDave Chinner 37568988114SDave Chinner /* 37668988114SDave Chinner * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 37768988114SDave Chinner */ 37868988114SDave Chinner block = ifp->if_broot; 37968988114SDave Chinner level = be16_to_cpu(block->bb_level); 38068988114SDave Chinner ASSERT(level > 0); 38168988114SDave Chinner pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 38268988114SDave Chinner bno = be64_to_cpu(*pp); 383d5cf09baSChristoph Hellwig ASSERT(bno != NULLFSBLOCK); 38468988114SDave Chinner ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 38568988114SDave Chinner ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 38668988114SDave Chinner 387e7f5d5caSDarrick J. Wong error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, 388e7f5d5caSDarrick J. Wong nextents, count); 389e7f5d5caSDarrick J. Wong if (error) { 390e7f5d5caSDarrick J. Wong XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", 391e7f5d5caSDarrick J. Wong XFS_ERRLEVEL_LOW, mp); 3922451337dSDave Chinner return -EFSCORRUPTED; 39368988114SDave Chinner } 394e7f5d5caSDarrick J. Wong return 0; 395e7f5d5caSDarrick J. Wong } 39668988114SDave Chinner 39768988114SDave Chinner return 0; 39868988114SDave Chinner } 39968988114SDave Chinner 400abbf9e8aSChristoph Hellwig static int 401abbf9e8aSChristoph Hellwig xfs_getbmap_report_one( 402f86f4037SDarrick J. Wong struct xfs_inode *ip, 403abbf9e8aSChristoph Hellwig struct getbmapx *bmv, 404232b5194SChristoph Hellwig struct kgetbmap *out, 405abbf9e8aSChristoph Hellwig int64_t bmv_end, 406abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *got) 407f86f4037SDarrick J. Wong { 408232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries; 409abbf9e8aSChristoph Hellwig bool shared = false, trimmed = false; 410f86f4037SDarrick J. Wong int error; 411f86f4037SDarrick J. Wong 412abbf9e8aSChristoph Hellwig error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed); 413f86f4037SDarrick J. Wong if (error) 414f86f4037SDarrick J. Wong return error; 415f86f4037SDarrick J. Wong 416abbf9e8aSChristoph Hellwig if (isnullstartblock(got->br_startblock) || 417abbf9e8aSChristoph Hellwig got->br_startblock == DELAYSTARTBLOCK) { 418f86f4037SDarrick J. Wong /* 419abbf9e8aSChristoph Hellwig * Delalloc extents that start beyond EOF can occur due to 420abbf9e8aSChristoph Hellwig * speculative EOF allocation when the delalloc extent is larger 421abbf9e8aSChristoph Hellwig * than the largest freespace extent at conversion time. These 422abbf9e8aSChristoph Hellwig * extents cannot be converted by data writeback, so can exist 423abbf9e8aSChristoph Hellwig * here even if we are not supposed to be finding delalloc 424abbf9e8aSChristoph Hellwig * extents. 425f86f4037SDarrick J. Wong */ 426abbf9e8aSChristoph Hellwig if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip))) 427abbf9e8aSChristoph Hellwig ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0); 428abbf9e8aSChristoph Hellwig 429abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_DELALLOC; 430abbf9e8aSChristoph Hellwig p->bmv_block = -2; 431f86f4037SDarrick J. Wong } else { 432abbf9e8aSChristoph Hellwig p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); 433f86f4037SDarrick J. Wong } 434f86f4037SDarrick J. Wong 435abbf9e8aSChristoph Hellwig if (got->br_state == XFS_EXT_UNWRITTEN && 436abbf9e8aSChristoph Hellwig (bmv->bmv_iflags & BMV_IF_PREALLOC)) 437abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_PREALLOC; 438abbf9e8aSChristoph Hellwig 439abbf9e8aSChristoph Hellwig if (shared) 440abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_SHARED; 441abbf9e8aSChristoph Hellwig 442abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); 443abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); 444abbf9e8aSChristoph Hellwig 445abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length; 446abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 447abbf9e8aSChristoph Hellwig bmv->bmv_entries++; 448f86f4037SDarrick J. Wong return 0; 449f86f4037SDarrick J. Wong } 450f86f4037SDarrick J. Wong 451abbf9e8aSChristoph Hellwig static void 452abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole( 453abbf9e8aSChristoph Hellwig struct xfs_inode *ip, 454abbf9e8aSChristoph Hellwig struct getbmapx *bmv, 455232b5194SChristoph Hellwig struct kgetbmap *out, 456abbf9e8aSChristoph Hellwig int64_t bmv_end, 457abbf9e8aSChristoph Hellwig xfs_fileoff_t bno, 458abbf9e8aSChristoph Hellwig xfs_fileoff_t end) 459abbf9e8aSChristoph Hellwig { 460232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries; 461abbf9e8aSChristoph Hellwig 462abbf9e8aSChristoph Hellwig if (bmv->bmv_iflags & BMV_IF_NO_HOLES) 463abbf9e8aSChristoph Hellwig return; 464abbf9e8aSChristoph Hellwig 465abbf9e8aSChristoph Hellwig p->bmv_block = -1; 466abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); 467abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); 468abbf9e8aSChristoph Hellwig 469abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length; 470abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 471abbf9e8aSChristoph Hellwig bmv->bmv_entries++; 472abbf9e8aSChristoph Hellwig } 473abbf9e8aSChristoph Hellwig 474abbf9e8aSChristoph Hellwig static inline bool 475abbf9e8aSChristoph Hellwig xfs_getbmap_full( 476abbf9e8aSChristoph Hellwig struct getbmapx *bmv) 477abbf9e8aSChristoph Hellwig { 478abbf9e8aSChristoph Hellwig return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1; 479abbf9e8aSChristoph Hellwig } 480abbf9e8aSChristoph Hellwig 481abbf9e8aSChristoph Hellwig static bool 482abbf9e8aSChristoph Hellwig xfs_getbmap_next_rec( 483abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *rec, 484abbf9e8aSChristoph Hellwig xfs_fileoff_t total_end) 485abbf9e8aSChristoph Hellwig { 486abbf9e8aSChristoph Hellwig xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount; 487abbf9e8aSChristoph Hellwig 488abbf9e8aSChristoph Hellwig if (end == total_end) 489abbf9e8aSChristoph Hellwig return false; 490abbf9e8aSChristoph Hellwig 491abbf9e8aSChristoph Hellwig rec->br_startoff += rec->br_blockcount; 492abbf9e8aSChristoph Hellwig if (!isnullstartblock(rec->br_startblock) && 493abbf9e8aSChristoph Hellwig rec->br_startblock != DELAYSTARTBLOCK) 494abbf9e8aSChristoph Hellwig rec->br_startblock += rec->br_blockcount; 495abbf9e8aSChristoph Hellwig rec->br_blockcount = total_end - end; 496abbf9e8aSChristoph Hellwig return true; 497abbf9e8aSChristoph Hellwig } 498abbf9e8aSChristoph Hellwig 49968988114SDave Chinner /* 50068988114SDave Chinner * Get inode's extents as described in bmv, and format for output. 50168988114SDave Chinner * Calls formatter to fill the user's buffer until all extents 50268988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have 50368988114SDave Chinner * been filled, or until the formatter short-circuits the loop, 50468988114SDave Chinner * if it is tracking filled-in extents on its own. 50568988114SDave Chinner */ 50668988114SDave Chinner int /* error code */ 50768988114SDave Chinner xfs_getbmap( 508232b5194SChristoph Hellwig struct xfs_inode *ip, 50968988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */ 510232b5194SChristoph Hellwig struct kgetbmap *out) 51168988114SDave Chinner { 512abbf9e8aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 513abbf9e8aSChristoph Hellwig int iflags = bmv->bmv_iflags; 514232b5194SChristoph Hellwig int whichfork, lock, error = 0; 515abbf9e8aSChristoph Hellwig int64_t bmv_end, max_len; 516abbf9e8aSChristoph Hellwig xfs_fileoff_t bno, first_bno; 517abbf9e8aSChristoph Hellwig struct xfs_ifork *ifp; 518abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec got, rec; 519abbf9e8aSChristoph Hellwig xfs_filblks_t len; 520b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur; 52168988114SDave Chinner 522232b5194SChristoph Hellwig if (bmv->bmv_iflags & ~BMV_IF_VALID) 523232b5194SChristoph Hellwig return -EINVAL; 524f86f4037SDarrick J. Wong #ifndef DEBUG 525f86f4037SDarrick J. Wong /* Only allow CoW fork queries if we're debugging. */ 526f86f4037SDarrick J. Wong if (iflags & BMV_IF_COWFORK) 527f86f4037SDarrick J. Wong return -EINVAL; 528f86f4037SDarrick J. Wong #endif 529f86f4037SDarrick J. Wong if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) 530f86f4037SDarrick J. Wong return -EINVAL; 531f86f4037SDarrick J. Wong 532abbf9e8aSChristoph Hellwig if (bmv->bmv_length < -1) 533abbf9e8aSChristoph Hellwig return -EINVAL; 534abbf9e8aSChristoph Hellwig bmv->bmv_entries = 0; 535abbf9e8aSChristoph Hellwig if (bmv->bmv_length == 0) 536abbf9e8aSChristoph Hellwig return 0; 537abbf9e8aSChristoph Hellwig 538f86f4037SDarrick J. Wong if (iflags & BMV_IF_ATTRFORK) 539f86f4037SDarrick J. Wong whichfork = XFS_ATTR_FORK; 540f86f4037SDarrick J. Wong else if (iflags & BMV_IF_COWFORK) 541f86f4037SDarrick J. Wong whichfork = XFS_COW_FORK; 542f86f4037SDarrick J. Wong else 543f86f4037SDarrick J. Wong whichfork = XFS_DATA_FORK; 544abbf9e8aSChristoph Hellwig ifp = XFS_IFORK_PTR(ip, whichfork); 54568988114SDave Chinner 54668988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED); 547f86f4037SDarrick J. Wong switch (whichfork) { 548abbf9e8aSChristoph Hellwig case XFS_ATTR_FORK: 549abbf9e8aSChristoph Hellwig if (!XFS_IFORK_Q(ip)) 550abbf9e8aSChristoph Hellwig goto out_unlock_iolock; 551abbf9e8aSChristoph Hellwig 552abbf9e8aSChristoph Hellwig max_len = 1LL << 32; 553abbf9e8aSChristoph Hellwig lock = xfs_ilock_attr_map_shared(ip); 554abbf9e8aSChristoph Hellwig break; 555abbf9e8aSChristoph Hellwig case XFS_COW_FORK: 556abbf9e8aSChristoph Hellwig /* No CoW fork? Just return */ 557abbf9e8aSChristoph Hellwig if (!ifp) 558abbf9e8aSChristoph Hellwig goto out_unlock_iolock; 559abbf9e8aSChristoph Hellwig 560abbf9e8aSChristoph Hellwig if (xfs_get_cowextsz_hint(ip)) 561abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes; 562abbf9e8aSChristoph Hellwig else 563abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip); 564abbf9e8aSChristoph Hellwig 565abbf9e8aSChristoph Hellwig lock = XFS_ILOCK_SHARED; 566abbf9e8aSChristoph Hellwig xfs_ilock(ip, lock); 567abbf9e8aSChristoph Hellwig break; 568f86f4037SDarrick J. Wong case XFS_DATA_FORK: 569efa70be1SChristoph Hellwig if (!(iflags & BMV_IF_DELALLOC) && 570efa70be1SChristoph Hellwig (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { 5712451337dSDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 57268988114SDave Chinner if (error) 57368988114SDave Chinner goto out_unlock_iolock; 574efa70be1SChristoph Hellwig 57568988114SDave Chinner /* 576efa70be1SChristoph Hellwig * Even after flushing the inode, there can still be 577efa70be1SChristoph Hellwig * delalloc blocks on the inode beyond EOF due to 578efa70be1SChristoph Hellwig * speculative preallocation. These are not removed 579efa70be1SChristoph Hellwig * until the release function is called or the inode 580efa70be1SChristoph Hellwig * is inactivated. Hence we cannot assert here that 581efa70be1SChristoph Hellwig * ip->i_delayed_blks == 0. 58268988114SDave Chinner */ 58368988114SDave Chinner } 58468988114SDave Chinner 585abbf9e8aSChristoph Hellwig if (xfs_get_extsz_hint(ip) || 586abbf9e8aSChristoph Hellwig (ip->i_d.di_flags & 587abbf9e8aSChristoph Hellwig (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) 588abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes; 589abbf9e8aSChristoph Hellwig else 590abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip); 591abbf9e8aSChristoph Hellwig 592309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 593f86f4037SDarrick J. Wong break; 594efa70be1SChristoph Hellwig } 59568988114SDave Chinner 596abbf9e8aSChristoph Hellwig switch (XFS_IFORK_FORMAT(ip, whichfork)) { 597abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_EXTENTS: 598abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_BTREE: 599abbf9e8aSChristoph Hellwig break; 600abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_LOCAL: 601abbf9e8aSChristoph Hellwig /* Local format inode forks report no extents. */ 60268988114SDave Chinner goto out_unlock_ilock; 603abbf9e8aSChristoph Hellwig default: 604abbf9e8aSChristoph Hellwig error = -EINVAL; 605abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 60668988114SDave Chinner } 60768988114SDave Chinner 608abbf9e8aSChristoph Hellwig if (bmv->bmv_length == -1) { 609abbf9e8aSChristoph Hellwig max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); 610abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); 611abbf9e8aSChristoph Hellwig } 612abbf9e8aSChristoph Hellwig 613abbf9e8aSChristoph Hellwig bmv_end = bmv->bmv_offset + bmv->bmv_length; 614abbf9e8aSChristoph Hellwig 615abbf9e8aSChristoph Hellwig first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); 616abbf9e8aSChristoph Hellwig len = XFS_BB_TO_FSB(mp, bmv->bmv_length); 617abbf9e8aSChristoph Hellwig 618abbf9e8aSChristoph Hellwig if (!(ifp->if_flags & XFS_IFEXTENTS)) { 619abbf9e8aSChristoph Hellwig error = xfs_iread_extents(NULL, ip, whichfork); 620abbf9e8aSChristoph Hellwig if (error) 621abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 622abbf9e8aSChristoph Hellwig } 623abbf9e8aSChristoph Hellwig 624b2b1712aSChristoph Hellwig if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 625abbf9e8aSChristoph Hellwig /* 626abbf9e8aSChristoph Hellwig * Report a whole-file hole if the delalloc flag is set to 627abbf9e8aSChristoph Hellwig * stay compatible with the old implementation. 628abbf9e8aSChristoph Hellwig */ 629abbf9e8aSChristoph Hellwig if (iflags & BMV_IF_DELALLOC) 630abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 631abbf9e8aSChristoph Hellwig XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 632abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 633abbf9e8aSChristoph Hellwig } 634abbf9e8aSChristoph Hellwig 635abbf9e8aSChristoph Hellwig while (!xfs_getbmap_full(bmv)) { 636abbf9e8aSChristoph Hellwig xfs_trim_extent(&got, first_bno, len); 637abbf9e8aSChristoph Hellwig 638abbf9e8aSChristoph Hellwig /* 639abbf9e8aSChristoph Hellwig * Report an entry for a hole if this extent doesn't directly 640abbf9e8aSChristoph Hellwig * follow the previous one. 641abbf9e8aSChristoph Hellwig */ 642abbf9e8aSChristoph Hellwig if (got.br_startoff > bno) { 643abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 644abbf9e8aSChristoph Hellwig got.br_startoff); 645abbf9e8aSChristoph Hellwig if (xfs_getbmap_full(bmv)) 646abbf9e8aSChristoph Hellwig break; 647abbf9e8aSChristoph Hellwig } 648abbf9e8aSChristoph Hellwig 649abbf9e8aSChristoph Hellwig /* 650abbf9e8aSChristoph Hellwig * In order to report shared extents accurately, we report each 651abbf9e8aSChristoph Hellwig * distinct shared / unshared part of a single bmbt record with 652abbf9e8aSChristoph Hellwig * an individual getbmapx record. 653abbf9e8aSChristoph Hellwig */ 654abbf9e8aSChristoph Hellwig bno = got.br_startoff + got.br_blockcount; 655abbf9e8aSChristoph Hellwig rec = got; 65668988114SDave Chinner do { 657abbf9e8aSChristoph Hellwig error = xfs_getbmap_report_one(ip, bmv, out, bmv_end, 658abbf9e8aSChristoph Hellwig &rec); 659abbf9e8aSChristoph Hellwig if (error || xfs_getbmap_full(bmv)) 660abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 661abbf9e8aSChristoph Hellwig } while (xfs_getbmap_next_rec(&rec, bno)); 66268988114SDave Chinner 663b2b1712aSChristoph Hellwig if (!xfs_iext_next_extent(ifp, &icur, &got)) { 664abbf9e8aSChristoph Hellwig xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 66568988114SDave Chinner 666abbf9e8aSChristoph Hellwig out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST; 66768988114SDave Chinner 668abbf9e8aSChristoph Hellwig if (whichfork != XFS_ATTR_FORK && bno < end && 669abbf9e8aSChristoph Hellwig !xfs_getbmap_full(bmv)) { 670abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, 671abbf9e8aSChristoph Hellwig bno, end); 672abbf9e8aSChristoph Hellwig } 673abbf9e8aSChristoph Hellwig break; 67468988114SDave Chinner } 67568988114SDave Chinner 676abbf9e8aSChristoph Hellwig if (bno >= first_bno + len) 677abbf9e8aSChristoph Hellwig break; 67868988114SDave Chinner } 67968988114SDave Chinner 68068988114SDave Chinner out_unlock_ilock: 68101f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 68268988114SDave Chinner out_unlock_iolock: 68368988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED); 68468988114SDave Chinner return error; 68568988114SDave Chinner } 68668988114SDave Chinner 68768988114SDave Chinner /* 688*e2ac8363SChristoph Hellwig * Dead simple method of punching delalyed allocation blocks from a range in 689*e2ac8363SChristoph Hellwig * the inode. This will always punch out both the start and end blocks, even 690*e2ac8363SChristoph Hellwig * if the ranges only partially overlap them, so it is up to the caller to 691*e2ac8363SChristoph Hellwig * ensure that partial blocks are not passed in. 69268988114SDave Chinner */ 69368988114SDave Chinner int 69468988114SDave Chinner xfs_bmap_punch_delalloc_range( 69568988114SDave Chinner struct xfs_inode *ip, 69668988114SDave Chinner xfs_fileoff_t start_fsb, 69768988114SDave Chinner xfs_fileoff_t length) 69868988114SDave Chinner { 699*e2ac8363SChristoph Hellwig struct xfs_ifork *ifp = &ip->i_df; 700*e2ac8363SChristoph Hellwig xfs_fileoff_t end_fsb = start_fsb + length; 701*e2ac8363SChristoph Hellwig struct xfs_bmbt_irec got, del; 702*e2ac8363SChristoph Hellwig struct xfs_iext_cursor icur; 70368988114SDave Chinner int error = 0; 70468988114SDave Chinner 70568988114SDave Chinner ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 70668988114SDave Chinner 707*e2ac8363SChristoph Hellwig if (!(ifp->if_flags & XFS_IFEXTENTS)) { 708*e2ac8363SChristoph Hellwig error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 70968988114SDave Chinner if (error) 710*e2ac8363SChristoph Hellwig return error; 711*e2ac8363SChristoph Hellwig } 71268988114SDave Chinner 713*e2ac8363SChristoph Hellwig if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) 714*e2ac8363SChristoph Hellwig return 0; 715*e2ac8363SChristoph Hellwig 716*e2ac8363SChristoph Hellwig while (got.br_startoff + got.br_blockcount > start_fsb) { 717*e2ac8363SChristoph Hellwig del = got; 718*e2ac8363SChristoph Hellwig xfs_trim_extent(&del, start_fsb, length); 719*e2ac8363SChristoph Hellwig 720*e2ac8363SChristoph Hellwig /* 721*e2ac8363SChristoph Hellwig * A delete can push the cursor forward. Step back to the 722*e2ac8363SChristoph Hellwig * previous extent on non-delalloc or extents outside the 723*e2ac8363SChristoph Hellwig * target range. 724*e2ac8363SChristoph Hellwig */ 725*e2ac8363SChristoph Hellwig if (!del.br_blockcount || 726*e2ac8363SChristoph Hellwig !isnullstartblock(del.br_startblock)) { 727*e2ac8363SChristoph Hellwig if (!xfs_iext_prev_extent(ifp, &icur, &got)) 728*e2ac8363SChristoph Hellwig break; 729*e2ac8363SChristoph Hellwig continue; 730*e2ac8363SChristoph Hellwig } 731*e2ac8363SChristoph Hellwig 732*e2ac8363SChristoph Hellwig error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, 733*e2ac8363SChristoph Hellwig &got, &del); 734*e2ac8363SChristoph Hellwig if (error || !xfs_iext_get_extent(ifp, &icur, &got)) 735*e2ac8363SChristoph Hellwig break; 736*e2ac8363SChristoph Hellwig } 73768988114SDave Chinner 73868988114SDave Chinner return error; 73968988114SDave Chinner } 740c24b5dfaSDave Chinner 741c24b5dfaSDave Chinner /* 742c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF 743c24b5dfaSDave Chinner * blocks. The 'force' parameter determines whether we should also consider 744c24b5dfaSDave Chinner * regular files that are marked preallocated or append-only. 745c24b5dfaSDave Chinner */ 746c24b5dfaSDave Chinner bool 747c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) 748c24b5dfaSDave Chinner { 749c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */ 750c19b3b05SDave Chinner if (!S_ISREG(VFS_I(ip)->i_mode)) 751c24b5dfaSDave Chinner return false; 752c24b5dfaSDave Chinner 753c24b5dfaSDave Chinner /* 754c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not 755c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove. 756c24b5dfaSDave Chinner */ 757c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 && 7582667c6f9SDave Chinner VFS_I(ip)->i_mapping->nrpages == 0 && 759c24b5dfaSDave Chinner ip->i_delayed_blks == 0) 760c24b5dfaSDave Chinner return false; 761c24b5dfaSDave Chinner 762c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */ 763c24b5dfaSDave Chinner if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) 764c24b5dfaSDave Chinner return false; 765c24b5dfaSDave Chinner 766c24b5dfaSDave Chinner /* 767c24b5dfaSDave Chinner * Do not free real preallocated or append-only files unless the file 768c24b5dfaSDave Chinner * has delalloc blocks and we are forced to remove them. 769c24b5dfaSDave Chinner */ 770c24b5dfaSDave Chinner if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 771c24b5dfaSDave Chinner if (!force || ip->i_delayed_blks == 0) 772c24b5dfaSDave Chinner return false; 773c24b5dfaSDave Chinner 774c24b5dfaSDave Chinner return true; 775c24b5dfaSDave Chinner } 776c24b5dfaSDave Chinner 777c24b5dfaSDave Chinner /* 7783b4683c2SBrian Foster * This is called to free any blocks beyond eof. The caller must hold 7793b4683c2SBrian Foster * IOLOCK_EXCL unless we are in the inode reclaim path and have the only 7803b4683c2SBrian Foster * reference to the inode. 781c24b5dfaSDave Chinner */ 782c24b5dfaSDave Chinner int 783c24b5dfaSDave Chinner xfs_free_eofblocks( 784a36b9261SBrian Foster struct xfs_inode *ip) 785c24b5dfaSDave Chinner { 786a36b9261SBrian Foster struct xfs_trans *tp; 787c24b5dfaSDave Chinner int error; 788c24b5dfaSDave Chinner xfs_fileoff_t end_fsb; 789c24b5dfaSDave Chinner xfs_fileoff_t last_fsb; 790c24b5dfaSDave Chinner xfs_filblks_t map_len; 791c24b5dfaSDave Chinner int nimaps; 792a36b9261SBrian Foster struct xfs_bmbt_irec imap; 793a36b9261SBrian Foster struct xfs_mount *mp = ip->i_mount; 794a36b9261SBrian Foster 795c24b5dfaSDave Chinner /* 796c24b5dfaSDave Chinner * Figure out if there are any blocks beyond the end 797c24b5dfaSDave Chinner * of the file. If not, then there is nothing to do. 798c24b5dfaSDave Chinner */ 799c24b5dfaSDave Chinner end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 800c24b5dfaSDave Chinner last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 801c24b5dfaSDave Chinner if (last_fsb <= end_fsb) 802c24b5dfaSDave Chinner return 0; 803c24b5dfaSDave Chinner map_len = last_fsb - end_fsb; 804c24b5dfaSDave Chinner 805c24b5dfaSDave Chinner nimaps = 1; 806c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_SHARED); 807c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 808c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 809c24b5dfaSDave Chinner 810a36b9261SBrian Foster /* 811a36b9261SBrian Foster * If there are blocks after the end of file, truncate the file to its 812a36b9261SBrian Foster * current size to free them up. 813a36b9261SBrian Foster */ 814c24b5dfaSDave Chinner if (!error && (nimaps != 0) && 815c24b5dfaSDave Chinner (imap.br_startblock != HOLESTARTBLOCK || 816c24b5dfaSDave Chinner ip->i_delayed_blks)) { 817c24b5dfaSDave Chinner /* 818c24b5dfaSDave Chinner * Attach the dquots to the inode up front. 819c24b5dfaSDave Chinner */ 820c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 821c24b5dfaSDave Chinner if (error) 822c24b5dfaSDave Chinner return error; 823c24b5dfaSDave Chinner 824e4229d6bSBrian Foster /* wait on dio to ensure i_size has settled */ 825e4229d6bSBrian Foster inode_dio_wait(VFS_I(ip)); 826e4229d6bSBrian Foster 827253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, 828253f4911SChristoph Hellwig &tp); 829c24b5dfaSDave Chinner if (error) { 830c24b5dfaSDave Chinner ASSERT(XFS_FORCED_SHUTDOWN(mp)); 831c24b5dfaSDave Chinner return error; 832c24b5dfaSDave Chinner } 833c24b5dfaSDave Chinner 834c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 835c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 836c24b5dfaSDave Chinner 837c24b5dfaSDave Chinner /* 838c24b5dfaSDave Chinner * Do not update the on-disk file size. If we update the 839c24b5dfaSDave Chinner * on-disk file size and then the system crashes before the 840c24b5dfaSDave Chinner * contents of the file are flushed to disk then the files 841c24b5dfaSDave Chinner * may be full of holes (ie NULL files bug). 842c24b5dfaSDave Chinner */ 8434e529339SBrian Foster error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK, 8444e529339SBrian Foster XFS_ISIZE(ip), XFS_BMAPI_NODISCARD); 845c24b5dfaSDave Chinner if (error) { 846c24b5dfaSDave Chinner /* 847c24b5dfaSDave Chinner * If we get an error at this point we simply don't 848c24b5dfaSDave Chinner * bother truncating the file. 849c24b5dfaSDave Chinner */ 8504906e215SChristoph Hellwig xfs_trans_cancel(tp); 851c24b5dfaSDave Chinner } else { 85270393313SChristoph Hellwig error = xfs_trans_commit(tp); 853c24b5dfaSDave Chinner if (!error) 854c24b5dfaSDave Chinner xfs_inode_clear_eofblocks_tag(ip); 855c24b5dfaSDave Chinner } 856c24b5dfaSDave Chinner 857c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 858c24b5dfaSDave Chinner } 859c24b5dfaSDave Chinner return error; 860c24b5dfaSDave Chinner } 861c24b5dfaSDave Chinner 86283aee9e4SChristoph Hellwig int 863c24b5dfaSDave Chinner xfs_alloc_file_space( 86483aee9e4SChristoph Hellwig struct xfs_inode *ip, 865c24b5dfaSDave Chinner xfs_off_t offset, 866c24b5dfaSDave Chinner xfs_off_t len, 8675f8aca8bSChristoph Hellwig int alloc_type) 868c24b5dfaSDave Chinner { 869c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 870c24b5dfaSDave Chinner xfs_off_t count; 871c24b5dfaSDave Chinner xfs_filblks_t allocated_fsb; 872c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb; 873c24b5dfaSDave Chinner xfs_extlen_t extsz, temp; 874c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 875c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 876c24b5dfaSDave Chinner int nimaps; 877c24b5dfaSDave Chinner int quota_flag; 878c24b5dfaSDave Chinner int rt; 879c24b5dfaSDave Chinner xfs_trans_t *tp; 880c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp; 8812c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 882c24b5dfaSDave Chinner uint qblocks, resblks, resrtextents; 883c24b5dfaSDave Chinner int error; 884c24b5dfaSDave Chinner 885c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip); 886c24b5dfaSDave Chinner 887c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 8882451337dSDave Chinner return -EIO; 889c24b5dfaSDave Chinner 890c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 891c24b5dfaSDave Chinner if (error) 892c24b5dfaSDave Chinner return error; 893c24b5dfaSDave Chinner 894c24b5dfaSDave Chinner if (len <= 0) 8952451337dSDave Chinner return -EINVAL; 896c24b5dfaSDave Chinner 897c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 898c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip); 899c24b5dfaSDave Chinner 900c24b5dfaSDave Chinner count = len; 901c24b5dfaSDave Chinner imapp = &imaps[0]; 902c24b5dfaSDave Chinner nimaps = 1; 903c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 904c24b5dfaSDave Chinner allocatesize_fsb = XFS_B_TO_FSB(mp, count); 905c24b5dfaSDave Chinner 906c24b5dfaSDave Chinner /* 907c24b5dfaSDave Chinner * Allocate file space until done or until there is an error 908c24b5dfaSDave Chinner */ 909c24b5dfaSDave Chinner while (allocatesize_fsb && !error) { 910c24b5dfaSDave Chinner xfs_fileoff_t s, e; 911c24b5dfaSDave Chinner 912c24b5dfaSDave Chinner /* 913c24b5dfaSDave Chinner * Determine space reservations for data/realtime. 914c24b5dfaSDave Chinner */ 915c24b5dfaSDave Chinner if (unlikely(extsz)) { 916c24b5dfaSDave Chinner s = startoffset_fsb; 917c24b5dfaSDave Chinner do_div(s, extsz); 918c24b5dfaSDave Chinner s *= extsz; 919c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb; 9200703a8e1SDave Chinner div_u64_rem(startoffset_fsb, extsz, &temp); 9210703a8e1SDave Chinner if (temp) 922c24b5dfaSDave Chinner e += temp; 9230703a8e1SDave Chinner div_u64_rem(e, extsz, &temp); 9240703a8e1SDave Chinner if (temp) 925c24b5dfaSDave Chinner e += extsz - temp; 926c24b5dfaSDave Chinner } else { 927c24b5dfaSDave Chinner s = 0; 928c24b5dfaSDave Chinner e = allocatesize_fsb; 929c24b5dfaSDave Chinner } 930c24b5dfaSDave Chinner 931c24b5dfaSDave Chinner /* 932c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block 933c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are 934c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate 935c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk 936c24b5dfaSDave Chinner * to MAXEXTLEN (21 bits), so use that to enforce the limit. 937c24b5dfaSDave Chinner */ 938c24b5dfaSDave Chinner resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 939c24b5dfaSDave Chinner if (unlikely(rt)) { 940c24b5dfaSDave Chinner resrtextents = qblocks = resblks; 941c24b5dfaSDave Chinner resrtextents /= mp->m_sb.sb_rextsize; 942c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 943c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_RTBLKS; 944c24b5dfaSDave Chinner } else { 945c24b5dfaSDave Chinner resrtextents = 0; 946c24b5dfaSDave Chinner resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 947c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_REGBLKS; 948c24b5dfaSDave Chinner } 949c24b5dfaSDave Chinner 950c24b5dfaSDave Chinner /* 951c24b5dfaSDave Chinner * Allocate and setup the transaction. 952c24b5dfaSDave Chinner */ 953253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 954253f4911SChristoph Hellwig resrtextents, 0, &tp); 955253f4911SChristoph Hellwig 956c24b5dfaSDave Chinner /* 957c24b5dfaSDave Chinner * Check for running out of space 958c24b5dfaSDave Chinner */ 959c24b5dfaSDave Chinner if (error) { 960c24b5dfaSDave Chinner /* 961c24b5dfaSDave Chinner * Free the transaction structure. 962c24b5dfaSDave Chinner */ 9632451337dSDave Chinner ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 964c24b5dfaSDave Chinner break; 965c24b5dfaSDave Chinner } 966c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 967c24b5dfaSDave Chinner error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 968c24b5dfaSDave Chinner 0, quota_flag); 969c24b5dfaSDave Chinner if (error) 970c24b5dfaSDave Chinner goto error1; 971c24b5dfaSDave Chinner 972c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 973c24b5dfaSDave Chinner 9742c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 975c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb, 976c24b5dfaSDave Chinner allocatesize_fsb, alloc_type, &firstfsb, 9772c3234d1SDarrick J. Wong resblks, imapp, &nimaps, &dfops); 978f6106efaSEric Sandeen if (error) 979c24b5dfaSDave Chinner goto error0; 980c24b5dfaSDave Chinner 981c24b5dfaSDave Chinner /* 982c24b5dfaSDave Chinner * Complete the transaction 983c24b5dfaSDave Chinner */ 9848ad7c629SChristoph Hellwig error = xfs_defer_finish(&tp, &dfops); 985f6106efaSEric Sandeen if (error) 986c24b5dfaSDave Chinner goto error0; 987c24b5dfaSDave Chinner 98870393313SChristoph Hellwig error = xfs_trans_commit(tp); 989c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 990f6106efaSEric Sandeen if (error) 991c24b5dfaSDave Chinner break; 992c24b5dfaSDave Chinner 993c24b5dfaSDave Chinner allocated_fsb = imapp->br_blockcount; 994c24b5dfaSDave Chinner 995c24b5dfaSDave Chinner if (nimaps == 0) { 9962451337dSDave Chinner error = -ENOSPC; 997c24b5dfaSDave Chinner break; 998c24b5dfaSDave Chinner } 999c24b5dfaSDave Chinner 1000c24b5dfaSDave Chinner startoffset_fsb += allocated_fsb; 1001c24b5dfaSDave Chinner allocatesize_fsb -= allocated_fsb; 1002c24b5dfaSDave Chinner } 1003c24b5dfaSDave Chinner 1004c24b5dfaSDave Chinner return error; 1005c24b5dfaSDave Chinner 1006c24b5dfaSDave Chinner error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 10072c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1008c24b5dfaSDave Chinner xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1009c24b5dfaSDave Chinner 1010c24b5dfaSDave Chinner error1: /* Just cancel transaction */ 10114906e215SChristoph Hellwig xfs_trans_cancel(tp); 1012c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1013c24b5dfaSDave Chinner return error; 1014c24b5dfaSDave Chinner } 1015c24b5dfaSDave Chinner 1016bdb0d04fSChristoph Hellwig static int 1017bdb0d04fSChristoph Hellwig xfs_unmap_extent( 101883aee9e4SChristoph Hellwig struct xfs_inode *ip, 1019bdb0d04fSChristoph Hellwig xfs_fileoff_t startoffset_fsb, 1020bdb0d04fSChristoph Hellwig xfs_filblks_t len_fsb, 1021bdb0d04fSChristoph Hellwig int *done) 1022c24b5dfaSDave Chinner { 1023bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1024bdb0d04fSChristoph Hellwig struct xfs_trans *tp; 10252c3234d1SDarrick J. Wong struct xfs_defer_ops dfops; 1026c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 1027bdb0d04fSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1028bdb0d04fSChristoph Hellwig int error; 1029c24b5dfaSDave Chinner 1030bdb0d04fSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 1031bdb0d04fSChristoph Hellwig if (error) { 1032bdb0d04fSChristoph Hellwig ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1033bdb0d04fSChristoph Hellwig return error; 1034bdb0d04fSChristoph Hellwig } 1035c24b5dfaSDave Chinner 1036bdb0d04fSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1037bdb0d04fSChristoph Hellwig error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, 1038bdb0d04fSChristoph Hellwig ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); 1039bdb0d04fSChristoph Hellwig if (error) 1040bdb0d04fSChristoph Hellwig goto out_trans_cancel; 1041c24b5dfaSDave Chinner 1042bdb0d04fSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 1043c24b5dfaSDave Chinner 10442c3234d1SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 1045bdb0d04fSChristoph Hellwig error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb, 10462c3234d1SDarrick J. Wong &dfops, done); 1047bdb0d04fSChristoph Hellwig if (error) 1048bdb0d04fSChristoph Hellwig goto out_bmap_cancel; 1049bdb0d04fSChristoph Hellwig 10508ad7c629SChristoph Hellwig xfs_defer_ijoin(&dfops, ip); 10518ad7c629SChristoph Hellwig error = xfs_defer_finish(&tp, &dfops); 1052bdb0d04fSChristoph Hellwig if (error) 1053bdb0d04fSChristoph Hellwig goto out_bmap_cancel; 1054bdb0d04fSChristoph Hellwig 1055bdb0d04fSChristoph Hellwig error = xfs_trans_commit(tp); 1056bdb0d04fSChristoph Hellwig out_unlock: 1057bdb0d04fSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 1058bdb0d04fSChristoph Hellwig return error; 1059bdb0d04fSChristoph Hellwig 1060bdb0d04fSChristoph Hellwig out_bmap_cancel: 10612c3234d1SDarrick J. Wong xfs_defer_cancel(&dfops); 1062bdb0d04fSChristoph Hellwig out_trans_cancel: 1063bdb0d04fSChristoph Hellwig xfs_trans_cancel(tp); 1064bdb0d04fSChristoph Hellwig goto out_unlock; 1065bdb0d04fSChristoph Hellwig } 1066bdb0d04fSChristoph Hellwig 1067bdb0d04fSChristoph Hellwig static int 1068bdb0d04fSChristoph Hellwig xfs_adjust_extent_unmap_boundaries( 1069bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 1070bdb0d04fSChristoph Hellwig xfs_fileoff_t *startoffset_fsb, 1071bdb0d04fSChristoph Hellwig xfs_fileoff_t *endoffset_fsb) 1072bdb0d04fSChristoph Hellwig { 1073bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1074bdb0d04fSChristoph Hellwig struct xfs_bmbt_irec imap; 1075bdb0d04fSChristoph Hellwig int nimap, error; 1076c24b5dfaSDave Chinner xfs_extlen_t mod = 0; 1077c24b5dfaSDave Chinner 1078c24b5dfaSDave Chinner nimap = 1; 1079bdb0d04fSChristoph Hellwig error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0); 1080c24b5dfaSDave Chinner if (error) 1081bdb0d04fSChristoph Hellwig return error; 1082c24b5dfaSDave Chinner 1083c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1084c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 10850703a8e1SDave Chinner div_u64_rem(imap.br_startblock, mp->m_sb.sb_rextsize, &mod); 1086c24b5dfaSDave Chinner if (mod) 1087bdb0d04fSChristoph Hellwig *startoffset_fsb += mp->m_sb.sb_rextsize - mod; 1088c24b5dfaSDave Chinner } 1089c24b5dfaSDave Chinner 1090c24b5dfaSDave Chinner nimap = 1; 1091bdb0d04fSChristoph Hellwig error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0); 1092c24b5dfaSDave Chinner if (error) 1093c24b5dfaSDave Chinner return error; 1094c24b5dfaSDave Chinner 1095c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1096c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1097c24b5dfaSDave Chinner mod++; 1098bdb0d04fSChristoph Hellwig if (mod && mod != mp->m_sb.sb_rextsize) 1099bdb0d04fSChristoph Hellwig *endoffset_fsb -= mod; 1100c24b5dfaSDave Chinner } 1101c24b5dfaSDave Chinner 1102bdb0d04fSChristoph Hellwig return 0; 1103c24b5dfaSDave Chinner } 1104bdb0d04fSChristoph Hellwig 1105bdb0d04fSChristoph Hellwig static int 1106bdb0d04fSChristoph Hellwig xfs_flush_unmap_range( 1107bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 1108bdb0d04fSChristoph Hellwig xfs_off_t offset, 1109bdb0d04fSChristoph Hellwig xfs_off_t len) 1110bdb0d04fSChristoph Hellwig { 1111bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1112bdb0d04fSChristoph Hellwig struct inode *inode = VFS_I(ip); 1113bdb0d04fSChristoph Hellwig xfs_off_t rounding, start, end; 1114bdb0d04fSChristoph Hellwig int error; 1115bdb0d04fSChristoph Hellwig 1116bdb0d04fSChristoph Hellwig /* wait for the completion of any pending DIOs */ 1117bdb0d04fSChristoph Hellwig inode_dio_wait(inode); 1118bdb0d04fSChristoph Hellwig 1119bdb0d04fSChristoph Hellwig rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); 1120bdb0d04fSChristoph Hellwig start = round_down(offset, rounding); 1121bdb0d04fSChristoph Hellwig end = round_up(offset + len, rounding) - 1; 1122bdb0d04fSChristoph Hellwig 1123bdb0d04fSChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end); 1124c24b5dfaSDave Chinner if (error) 1125c24b5dfaSDave Chinner return error; 1126bdb0d04fSChristoph Hellwig truncate_pagecache_range(inode, start, end); 1127bdb0d04fSChristoph Hellwig return 0; 1128c24b5dfaSDave Chinner } 1129c24b5dfaSDave Chinner 1130c24b5dfaSDave Chinner int 1131c24b5dfaSDave Chinner xfs_free_file_space( 1132c24b5dfaSDave Chinner struct xfs_inode *ip, 1133c24b5dfaSDave Chinner xfs_off_t offset, 1134c24b5dfaSDave Chinner xfs_off_t len) 1135c24b5dfaSDave Chinner { 1136bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1137c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 1138bdb0d04fSChristoph Hellwig xfs_fileoff_t endoffset_fsb; 11393c2bdc91SChristoph Hellwig int done = 0, error; 1140c24b5dfaSDave Chinner 1141c24b5dfaSDave Chinner trace_xfs_free_file_space(ip); 1142c24b5dfaSDave Chinner 1143c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 1144c24b5dfaSDave Chinner if (error) 1145c24b5dfaSDave Chinner return error; 1146c24b5dfaSDave Chinner 1147c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */ 1148bdb0d04fSChristoph Hellwig return 0; 1149bdb0d04fSChristoph Hellwig 1150bdb0d04fSChristoph Hellwig error = xfs_flush_unmap_range(ip, offset, len); 1151bdb0d04fSChristoph Hellwig if (error) 1152c24b5dfaSDave Chinner return error; 1153bdb0d04fSChristoph Hellwig 1154c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset); 1155c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 1156c24b5dfaSDave Chinner 1157bdb0d04fSChristoph Hellwig /* 1158bdb0d04fSChristoph Hellwig * Need to zero the stuff we're not freeing, on disk. If it's a RT file 1159bdb0d04fSChristoph Hellwig * and we can't use unwritten extents then we actually need to ensure 1160bdb0d04fSChristoph Hellwig * to zero the whole extent, otherwise we just need to take of block 1161bdb0d04fSChristoph Hellwig * boundaries, and xfs_bunmapi will handle the rest. 1162bdb0d04fSChristoph Hellwig */ 1163bdb0d04fSChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip) && 1164bdb0d04fSChristoph Hellwig !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 1165bdb0d04fSChristoph Hellwig error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb, 1166bdb0d04fSChristoph Hellwig &endoffset_fsb); 1167c24b5dfaSDave Chinner if (error) 1168c24b5dfaSDave Chinner return error; 1169bdb0d04fSChristoph Hellwig } 1170c24b5dfaSDave Chinner 11713c2bdc91SChristoph Hellwig if (endoffset_fsb > startoffset_fsb) { 11723c2bdc91SChristoph Hellwig while (!done) { 1173bdb0d04fSChristoph Hellwig error = xfs_unmap_extent(ip, startoffset_fsb, 1174bdb0d04fSChristoph Hellwig endoffset_fsb - startoffset_fsb, &done); 11753c2bdc91SChristoph Hellwig if (error) 11763c2bdc91SChristoph Hellwig return error; 11773c2bdc91SChristoph Hellwig } 1178c24b5dfaSDave Chinner } 1179c24b5dfaSDave Chinner 11803c2bdc91SChristoph Hellwig /* 11813c2bdc91SChristoph Hellwig * Now that we've unmap all full blocks we'll have to zero out any 1182f5c54717SChristoph Hellwig * partial block at the beginning and/or end. iomap_zero_range is smart 1183f5c54717SChristoph Hellwig * enough to skip any holes, including those we just created, but we 1184f5c54717SChristoph Hellwig * must take care not to zero beyond EOF and enlarge i_size. 11853c2bdc91SChristoph Hellwig */ 11863dd09d5aSCalvin Owens if (offset >= XFS_ISIZE(ip)) 11873dd09d5aSCalvin Owens return 0; 11883dd09d5aSCalvin Owens if (offset + len > XFS_ISIZE(ip)) 11893dd09d5aSCalvin Owens len = XFS_ISIZE(ip) - offset; 1190f5c54717SChristoph Hellwig return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); 1191c24b5dfaSDave Chinner } 1192c24b5dfaSDave Chinner 11935d11fb4bSBrian Foster /* 11945d11fb4bSBrian Foster * Preallocate and zero a range of a file. This mechanism has the allocation 11955d11fb4bSBrian Foster * semantics of fallocate and in addition converts data in the range to zeroes. 11965d11fb4bSBrian Foster */ 1197865e9446SChristoph Hellwig int 1198c24b5dfaSDave Chinner xfs_zero_file_space( 1199c24b5dfaSDave Chinner struct xfs_inode *ip, 1200c24b5dfaSDave Chinner xfs_off_t offset, 12015f8aca8bSChristoph Hellwig xfs_off_t len) 1202c24b5dfaSDave Chinner { 1203c24b5dfaSDave Chinner struct xfs_mount *mp = ip->i_mount; 12045d11fb4bSBrian Foster uint blksize; 1205c24b5dfaSDave Chinner int error; 1206c24b5dfaSDave Chinner 1207897b73b6SDave Chinner trace_xfs_zero_file_space(ip); 1208897b73b6SDave Chinner 12095d11fb4bSBrian Foster blksize = 1 << mp->m_sb.sb_blocklog; 1210c24b5dfaSDave Chinner 1211c24b5dfaSDave Chinner /* 12125d11fb4bSBrian Foster * Punch a hole and prealloc the range. We use hole punch rather than 12135d11fb4bSBrian Foster * unwritten extent conversion for two reasons: 12145d11fb4bSBrian Foster * 12155d11fb4bSBrian Foster * 1.) Hole punch handles partial block zeroing for us. 12165d11fb4bSBrian Foster * 12175d11fb4bSBrian Foster * 2.) If prealloc returns ENOSPC, the file range is still zero-valued 12185d11fb4bSBrian Foster * by virtue of the hole punch. 1219c24b5dfaSDave Chinner */ 12205d11fb4bSBrian Foster error = xfs_free_file_space(ip, offset, len); 1221c24b5dfaSDave Chinner if (error) 12225f8aca8bSChristoph Hellwig goto out; 1223c24b5dfaSDave Chinner 12245d11fb4bSBrian Foster error = xfs_alloc_file_space(ip, round_down(offset, blksize), 12255d11fb4bSBrian Foster round_up(offset + len, blksize) - 12265d11fb4bSBrian Foster round_down(offset, blksize), 12275d11fb4bSBrian Foster XFS_BMAPI_PREALLOC); 12285f8aca8bSChristoph Hellwig out: 1229c24b5dfaSDave Chinner return error; 1230c24b5dfaSDave Chinner 1231c24b5dfaSDave Chinner } 1232c24b5dfaSDave Chinner 123372c1a739Skbuild test robot static int 12344ed36c6bSChristoph Hellwig xfs_prepare_shift( 1235e1d8fb88SNamjae Jeon struct xfs_inode *ip, 12364ed36c6bSChristoph Hellwig loff_t offset) 1237e1d8fb88SNamjae Jeon { 1238e1d8fb88SNamjae Jeon int error; 1239f71721d0SBrian Foster 1240f71721d0SBrian Foster /* 1241f71721d0SBrian Foster * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1242f71721d0SBrian Foster * into the accessible region of the file. 1243f71721d0SBrian Foster */ 124441b9d726SBrian Foster if (xfs_can_free_eofblocks(ip, true)) { 1245a36b9261SBrian Foster error = xfs_free_eofblocks(ip); 124641b9d726SBrian Foster if (error) 124741b9d726SBrian Foster return error; 124841b9d726SBrian Foster } 12491669a8caSDave Chinner 1250f71721d0SBrian Foster /* 1251f71721d0SBrian Foster * Writeback and invalidate cache for the remainder of the file as we're 1252a904b1caSNamjae Jeon * about to shift down every extent from offset to EOF. 1253f71721d0SBrian Foster */ 12544ed36c6bSChristoph Hellwig error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1); 1255f71721d0SBrian Foster if (error) 1256f71721d0SBrian Foster return error; 1257f71721d0SBrian Foster error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 125809cbfeafSKirill A. Shutemov offset >> PAGE_SHIFT, -1); 1259e1d8fb88SNamjae Jeon if (error) 1260e1d8fb88SNamjae Jeon return error; 1261e1d8fb88SNamjae Jeon 1262a904b1caSNamjae Jeon /* 12633af423b0SDarrick J. Wong * Clean out anything hanging around in the cow fork now that 12643af423b0SDarrick J. Wong * we've flushed all the dirty data out to disk to avoid having 12653af423b0SDarrick J. Wong * CoW extents at the wrong offsets. 12663af423b0SDarrick J. Wong */ 12673af423b0SDarrick J. Wong if (xfs_is_reflink_inode(ip)) { 12683af423b0SDarrick J. Wong error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, 12693af423b0SDarrick J. Wong true); 12703af423b0SDarrick J. Wong if (error) 12713af423b0SDarrick J. Wong return error; 12723af423b0SDarrick J. Wong } 12733af423b0SDarrick J. Wong 12744ed36c6bSChristoph Hellwig return 0; 1275e1d8fb88SNamjae Jeon } 1276e1d8fb88SNamjae Jeon 1277e1d8fb88SNamjae Jeon /* 1278a904b1caSNamjae Jeon * xfs_collapse_file_space() 1279a904b1caSNamjae Jeon * This routine frees disk space and shift extent for the given file. 1280a904b1caSNamjae Jeon * The first thing we do is to free data blocks in the specified range 1281a904b1caSNamjae Jeon * by calling xfs_free_file_space(). It would also sync dirty data 1282a904b1caSNamjae Jeon * and invalidate page cache over the region on which collapse range 1283a904b1caSNamjae Jeon * is working. And Shift extent records to the left to cover a hole. 1284a904b1caSNamjae Jeon * RETURNS: 1285a904b1caSNamjae Jeon * 0 on success 1286a904b1caSNamjae Jeon * errno on error 1287a904b1caSNamjae Jeon * 1288a904b1caSNamjae Jeon */ 1289a904b1caSNamjae Jeon int 1290a904b1caSNamjae Jeon xfs_collapse_file_space( 1291a904b1caSNamjae Jeon struct xfs_inode *ip, 1292a904b1caSNamjae Jeon xfs_off_t offset, 1293a904b1caSNamjae Jeon xfs_off_t len) 1294a904b1caSNamjae Jeon { 12954ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 12964ed36c6bSChristoph Hellwig struct xfs_trans *tp; 1297a904b1caSNamjae Jeon int error; 12984ed36c6bSChristoph Hellwig struct xfs_defer_ops dfops; 12994ed36c6bSChristoph Hellwig xfs_fsblock_t first_block; 13004ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); 13014ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 13024ed36c6bSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1303ecfea3f0SChristoph Hellwig bool done = false; 1304a904b1caSNamjae Jeon 1305a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 13069ad1a23aSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); 13079ad1a23aSChristoph Hellwig 1308a904b1caSNamjae Jeon trace_xfs_collapse_file_space(ip); 1309a904b1caSNamjae Jeon 1310a904b1caSNamjae Jeon error = xfs_free_file_space(ip, offset, len); 1311a904b1caSNamjae Jeon if (error) 1312a904b1caSNamjae Jeon return error; 1313a904b1caSNamjae Jeon 13144ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset); 13154ed36c6bSChristoph Hellwig if (error) 13164ed36c6bSChristoph Hellwig return error; 13174ed36c6bSChristoph Hellwig 13184ed36c6bSChristoph Hellwig while (!error && !done) { 13194ed36c6bSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, 13204ed36c6bSChristoph Hellwig &tp); 13214ed36c6bSChristoph Hellwig if (error) 13224ed36c6bSChristoph Hellwig break; 13234ed36c6bSChristoph Hellwig 13244ed36c6bSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 13254ed36c6bSChristoph Hellwig error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 13264ed36c6bSChristoph Hellwig ip->i_gdquot, ip->i_pdquot, resblks, 0, 13274ed36c6bSChristoph Hellwig XFS_QMOPT_RES_REGBLKS); 13284ed36c6bSChristoph Hellwig if (error) 13294ed36c6bSChristoph Hellwig goto out_trans_cancel; 13304ed36c6bSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 13314ed36c6bSChristoph Hellwig 13324ed36c6bSChristoph Hellwig xfs_defer_init(&dfops, &first_block); 1333ecfea3f0SChristoph Hellwig error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, 1334a1f69417SEric Sandeen &done, &first_block, &dfops); 13354ed36c6bSChristoph Hellwig if (error) 13364ed36c6bSChristoph Hellwig goto out_bmap_cancel; 13374ed36c6bSChristoph Hellwig 13384ed36c6bSChristoph Hellwig error = xfs_defer_finish(&tp, &dfops); 13394ed36c6bSChristoph Hellwig if (error) 13404ed36c6bSChristoph Hellwig goto out_bmap_cancel; 13414ed36c6bSChristoph Hellwig error = xfs_trans_commit(tp); 13424ed36c6bSChristoph Hellwig } 13434ed36c6bSChristoph Hellwig 13444ed36c6bSChristoph Hellwig return error; 13454ed36c6bSChristoph Hellwig 13464ed36c6bSChristoph Hellwig out_bmap_cancel: 13474ed36c6bSChristoph Hellwig xfs_defer_cancel(&dfops); 13484ed36c6bSChristoph Hellwig out_trans_cancel: 13494ed36c6bSChristoph Hellwig xfs_trans_cancel(tp); 13504ed36c6bSChristoph Hellwig return error; 1351a904b1caSNamjae Jeon } 1352a904b1caSNamjae Jeon 1353a904b1caSNamjae Jeon /* 1354a904b1caSNamjae Jeon * xfs_insert_file_space() 1355a904b1caSNamjae Jeon * This routine create hole space by shifting extents for the given file. 1356a904b1caSNamjae Jeon * The first thing we do is to sync dirty data and invalidate page cache 1357a904b1caSNamjae Jeon * over the region on which insert range is working. And split an extent 1358a904b1caSNamjae Jeon * to two extents at given offset by calling xfs_bmap_split_extent. 1359a904b1caSNamjae Jeon * And shift all extent records which are laying between [offset, 1360a904b1caSNamjae Jeon * last allocated extent] to the right to reserve hole range. 1361a904b1caSNamjae Jeon * RETURNS: 1362a904b1caSNamjae Jeon * 0 on success 1363a904b1caSNamjae Jeon * errno on error 1364a904b1caSNamjae Jeon */ 1365a904b1caSNamjae Jeon int 1366a904b1caSNamjae Jeon xfs_insert_file_space( 1367a904b1caSNamjae Jeon struct xfs_inode *ip, 1368a904b1caSNamjae Jeon loff_t offset, 1369a904b1caSNamjae Jeon loff_t len) 1370a904b1caSNamjae Jeon { 13714ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 13724ed36c6bSChristoph Hellwig struct xfs_trans *tp; 13734ed36c6bSChristoph Hellwig int error; 13744ed36c6bSChristoph Hellwig struct xfs_defer_ops dfops; 13754ed36c6bSChristoph Hellwig xfs_fsblock_t first_block; 13764ed36c6bSChristoph Hellwig xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, offset); 13774ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = NULLFSBLOCK; 13784ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 1379ecfea3f0SChristoph Hellwig bool done = false; 13804ed36c6bSChristoph Hellwig 1381a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 13829ad1a23aSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); 13839ad1a23aSChristoph Hellwig 1384a904b1caSNamjae Jeon trace_xfs_insert_file_space(ip); 1385a904b1caSNamjae Jeon 13864ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset); 13874ed36c6bSChristoph Hellwig if (error) 13884ed36c6bSChristoph Hellwig return error; 13894ed36c6bSChristoph Hellwig 13904ed36c6bSChristoph Hellwig /* 13914ed36c6bSChristoph Hellwig * The extent shifting code works on extent granularity. So, if stop_fsb 13924ed36c6bSChristoph Hellwig * is not the starting block of extent, we need to split the extent at 13934ed36c6bSChristoph Hellwig * stop_fsb. 13944ed36c6bSChristoph Hellwig */ 13954ed36c6bSChristoph Hellwig error = xfs_bmap_split_extent(ip, stop_fsb); 13964ed36c6bSChristoph Hellwig if (error) 13974ed36c6bSChristoph Hellwig return error; 13984ed36c6bSChristoph Hellwig 13994ed36c6bSChristoph Hellwig while (!error && !done) { 14004ed36c6bSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, 14014ed36c6bSChristoph Hellwig &tp); 14024ed36c6bSChristoph Hellwig if (error) 14034ed36c6bSChristoph Hellwig break; 14044ed36c6bSChristoph Hellwig 14054ed36c6bSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 14064ed36c6bSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 14074ed36c6bSChristoph Hellwig xfs_defer_init(&dfops, &first_block); 1408ecfea3f0SChristoph Hellwig error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, 1409ecfea3f0SChristoph Hellwig &done, stop_fsb, &first_block, &dfops); 14104ed36c6bSChristoph Hellwig if (error) 14114ed36c6bSChristoph Hellwig goto out_bmap_cancel; 14124ed36c6bSChristoph Hellwig 14134ed36c6bSChristoph Hellwig error = xfs_defer_finish(&tp, &dfops); 14144ed36c6bSChristoph Hellwig if (error) 14154ed36c6bSChristoph Hellwig goto out_bmap_cancel; 14164ed36c6bSChristoph Hellwig error = xfs_trans_commit(tp); 14174ed36c6bSChristoph Hellwig } 14184ed36c6bSChristoph Hellwig 14194ed36c6bSChristoph Hellwig return error; 14204ed36c6bSChristoph Hellwig 14214ed36c6bSChristoph Hellwig out_bmap_cancel: 14224ed36c6bSChristoph Hellwig xfs_defer_cancel(&dfops); 14234ed36c6bSChristoph Hellwig xfs_trans_cancel(tp); 14244ed36c6bSChristoph Hellwig return error; 1425a904b1caSNamjae Jeon } 1426a904b1caSNamjae Jeon 1427a904b1caSNamjae Jeon /* 1428a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is 1429a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with 1430a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1431a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get 1432a133d952SDave Chinner * invalid formats on the target inode. 1433a133d952SDave Chinner * 1434a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has 1435a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a 1436a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just 1437a133d952SDave Chinner * blindly swap data forks on attr2 filesystems. 1438a133d952SDave Chinner * 1439a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with 1440a133d952SDave Chinner * a corrupt temporary inode, either. 1441a133d952SDave Chinner * 1442a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source 1443a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is 1444a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on 1445a133d952SDave Chinner * userspace to get this right. 1446a133d952SDave Chinner */ 1447a133d952SDave Chinner static int 1448a133d952SDave Chinner xfs_swap_extents_check_format( 1449e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1450e06259aaSDarrick J. Wong struct xfs_inode *tip) /* tmp inode */ 1451a133d952SDave Chinner { 1452a133d952SDave Chinner 1453a133d952SDave Chinner /* Should never get a local format */ 1454a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1455a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 14562451337dSDave Chinner return -EINVAL; 1457a133d952SDave Chinner 1458a133d952SDave Chinner /* 1459a133d952SDave Chinner * if the target inode has less extents that then temporary inode then 1460a133d952SDave Chinner * why did userspace call us? 1461a133d952SDave Chinner */ 1462a133d952SDave Chinner if (ip->i_d.di_nextents < tip->i_d.di_nextents) 14632451337dSDave Chinner return -EINVAL; 1464a133d952SDave Chinner 1465a133d952SDave Chinner /* 14661f08af52SDarrick J. Wong * If we have to use the (expensive) rmap swap method, we can 14671f08af52SDarrick J. Wong * handle any number of extents and any format. 14681f08af52SDarrick J. Wong */ 14691f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb)) 14701f08af52SDarrick J. Wong return 0; 14711f08af52SDarrick J. Wong 14721f08af52SDarrick J. Wong /* 1473a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree 1474a133d952SDave Chinner * form then we will end up with the target inode in the wrong format 1475a133d952SDave Chinner * as we already know there are less extents in the temp inode. 1476a133d952SDave Chinner */ 1477a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1478a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 14792451337dSDave Chinner return -EINVAL; 1480a133d952SDave Chinner 1481a133d952SDave Chinner /* Check temp in extent form to max in target */ 1482a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1483a133d952SDave Chinner XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1484a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 14852451337dSDave Chinner return -EINVAL; 1486a133d952SDave Chinner 1487a133d952SDave Chinner /* Check target in extent form to max in temp */ 1488a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1489a133d952SDave Chinner XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1490a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 14912451337dSDave Chinner return -EINVAL; 1492a133d952SDave Chinner 1493a133d952SDave Chinner /* 1494a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit 1495a133d952SDave Chinner * in the target and that it has enough extents to be in btree format 1496a133d952SDave Chinner * in the target. 1497a133d952SDave Chinner * 1498a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions 1499a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in 1500a133d952SDave Chinner * extent format... 1501a133d952SDave Chinner */ 1502a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 15030cbe48ccSArnd Bergmann if (XFS_IFORK_Q(ip) && 1504a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 15052451337dSDave Chinner return -EINVAL; 1506a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1507a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 15082451337dSDave Chinner return -EINVAL; 1509a133d952SDave Chinner } 1510a133d952SDave Chinner 1511a133d952SDave Chinner /* Reciprocal target->temp btree format checks */ 1512a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 15130cbe48ccSArnd Bergmann if (XFS_IFORK_Q(tip) && 1514a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 15152451337dSDave Chinner return -EINVAL; 1516a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1517a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 15182451337dSDave Chinner return -EINVAL; 1519a133d952SDave Chinner } 1520a133d952SDave Chinner 1521a133d952SDave Chinner return 0; 1522a133d952SDave Chinner } 1523a133d952SDave Chinner 15247abbb8f9SDave Chinner static int 15254ef897a2SDave Chinner xfs_swap_extent_flush( 15264ef897a2SDave Chinner struct xfs_inode *ip) 15274ef897a2SDave Chinner { 15284ef897a2SDave Chinner int error; 15294ef897a2SDave Chinner 15304ef897a2SDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 15314ef897a2SDave Chinner if (error) 15324ef897a2SDave Chinner return error; 15334ef897a2SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1); 15344ef897a2SDave Chinner 15354ef897a2SDave Chinner /* Verify O_DIRECT for ftmp */ 15364ef897a2SDave Chinner if (VFS_I(ip)->i_mapping->nrpages) 15374ef897a2SDave Chinner return -EINVAL; 15384ef897a2SDave Chinner return 0; 15394ef897a2SDave Chinner } 15404ef897a2SDave Chinner 15411f08af52SDarrick J. Wong /* 15421f08af52SDarrick J. Wong * Move extents from one file to another, when rmap is enabled. 15431f08af52SDarrick J. Wong */ 15441f08af52SDarrick J. Wong STATIC int 15451f08af52SDarrick J. Wong xfs_swap_extent_rmap( 15461f08af52SDarrick J. Wong struct xfs_trans **tpp, 15471f08af52SDarrick J. Wong struct xfs_inode *ip, 15481f08af52SDarrick J. Wong struct xfs_inode *tip) 15491f08af52SDarrick J. Wong { 15501f08af52SDarrick J. Wong struct xfs_bmbt_irec irec; 15511f08af52SDarrick J. Wong struct xfs_bmbt_irec uirec; 15521f08af52SDarrick J. Wong struct xfs_bmbt_irec tirec; 15531f08af52SDarrick J. Wong xfs_fileoff_t offset_fsb; 15541f08af52SDarrick J. Wong xfs_fileoff_t end_fsb; 15551f08af52SDarrick J. Wong xfs_filblks_t count_fsb; 15561f08af52SDarrick J. Wong xfs_fsblock_t firstfsb; 15571f08af52SDarrick J. Wong struct xfs_defer_ops dfops; 15581f08af52SDarrick J. Wong int error; 15591f08af52SDarrick J. Wong xfs_filblks_t ilen; 15601f08af52SDarrick J. Wong xfs_filblks_t rlen; 15611f08af52SDarrick J. Wong int nimaps; 1562c8ce540dSDarrick J. Wong uint64_t tip_flags2; 15631f08af52SDarrick J. Wong 15641f08af52SDarrick J. Wong /* 15651f08af52SDarrick J. Wong * If the source file has shared blocks, we must flag the donor 15661f08af52SDarrick J. Wong * file as having shared blocks so that we get the shared-block 15671f08af52SDarrick J. Wong * rmap functions when we go to fix up the rmaps. The flags 15681f08af52SDarrick J. Wong * will be switch for reals later. 15691f08af52SDarrick J. Wong */ 15701f08af52SDarrick J. Wong tip_flags2 = tip->i_d.di_flags2; 15711f08af52SDarrick J. Wong if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) 15721f08af52SDarrick J. Wong tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK; 15731f08af52SDarrick J. Wong 15741f08af52SDarrick J. Wong offset_fsb = 0; 15751f08af52SDarrick J. Wong end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip))); 15761f08af52SDarrick J. Wong count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); 15771f08af52SDarrick J. Wong 15781f08af52SDarrick J. Wong while (count_fsb) { 15791f08af52SDarrick J. Wong /* Read extent from the donor file */ 15801f08af52SDarrick J. Wong nimaps = 1; 15811f08af52SDarrick J. Wong error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec, 15821f08af52SDarrick J. Wong &nimaps, 0); 15831f08af52SDarrick J. Wong if (error) 15841f08af52SDarrick J. Wong goto out; 15851f08af52SDarrick J. Wong ASSERT(nimaps == 1); 15861f08af52SDarrick J. Wong ASSERT(tirec.br_startblock != DELAYSTARTBLOCK); 15871f08af52SDarrick J. Wong 15881f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap(tip, &tirec); 15891f08af52SDarrick J. Wong ilen = tirec.br_blockcount; 15901f08af52SDarrick J. Wong 15911f08af52SDarrick J. Wong /* Unmap the old blocks in the source file. */ 15921f08af52SDarrick J. Wong while (tirec.br_blockcount) { 15931f08af52SDarrick J. Wong xfs_defer_init(&dfops, &firstfsb); 15941f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); 15951f08af52SDarrick J. Wong 15961f08af52SDarrick J. Wong /* Read extent from the source file */ 15971f08af52SDarrick J. Wong nimaps = 1; 15981f08af52SDarrick J. Wong error = xfs_bmapi_read(ip, tirec.br_startoff, 15991f08af52SDarrick J. Wong tirec.br_blockcount, &irec, 16001f08af52SDarrick J. Wong &nimaps, 0); 16011f08af52SDarrick J. Wong if (error) 16021f08af52SDarrick J. Wong goto out_defer; 16031f08af52SDarrick J. Wong ASSERT(nimaps == 1); 16041f08af52SDarrick J. Wong ASSERT(tirec.br_startoff == irec.br_startoff); 16051f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); 16061f08af52SDarrick J. Wong 16071f08af52SDarrick J. Wong /* Trim the extent. */ 16081f08af52SDarrick J. Wong uirec = tirec; 16091f08af52SDarrick J. Wong uirec.br_blockcount = rlen = min_t(xfs_filblks_t, 16101f08af52SDarrick J. Wong tirec.br_blockcount, 16111f08af52SDarrick J. Wong irec.br_blockcount); 16121f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); 16131f08af52SDarrick J. Wong 16141f08af52SDarrick J. Wong /* Remove the mapping from the donor file. */ 16151f08af52SDarrick J. Wong error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, 16161f08af52SDarrick J. Wong tip, &uirec); 16171f08af52SDarrick J. Wong if (error) 16181f08af52SDarrick J. Wong goto out_defer; 16191f08af52SDarrick J. Wong 16201f08af52SDarrick J. Wong /* Remove the mapping from the source file. */ 16211f08af52SDarrick J. Wong error = xfs_bmap_unmap_extent((*tpp)->t_mountp, &dfops, 16221f08af52SDarrick J. Wong ip, &irec); 16231f08af52SDarrick J. Wong if (error) 16241f08af52SDarrick J. Wong goto out_defer; 16251f08af52SDarrick J. Wong 16261f08af52SDarrick J. Wong /* Map the donor file's blocks into the source file. */ 16271f08af52SDarrick J. Wong error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, 16281f08af52SDarrick J. Wong ip, &uirec); 16291f08af52SDarrick J. Wong if (error) 16301f08af52SDarrick J. Wong goto out_defer; 16311f08af52SDarrick J. Wong 16321f08af52SDarrick J. Wong /* Map the source file's blocks into the donor file. */ 16331f08af52SDarrick J. Wong error = xfs_bmap_map_extent((*tpp)->t_mountp, &dfops, 16341f08af52SDarrick J. Wong tip, &irec); 16351f08af52SDarrick J. Wong if (error) 16361f08af52SDarrick J. Wong goto out_defer; 16371f08af52SDarrick J. Wong 16388ad7c629SChristoph Hellwig xfs_defer_ijoin(&dfops, ip); 16398ad7c629SChristoph Hellwig error = xfs_defer_finish(tpp, &dfops); 16401f08af52SDarrick J. Wong if (error) 16411f08af52SDarrick J. Wong goto out_defer; 16421f08af52SDarrick J. Wong 16431f08af52SDarrick J. Wong tirec.br_startoff += rlen; 16441f08af52SDarrick J. Wong if (tirec.br_startblock != HOLESTARTBLOCK && 16451f08af52SDarrick J. Wong tirec.br_startblock != DELAYSTARTBLOCK) 16461f08af52SDarrick J. Wong tirec.br_startblock += rlen; 16471f08af52SDarrick J. Wong tirec.br_blockcount -= rlen; 16481f08af52SDarrick J. Wong } 16491f08af52SDarrick J. Wong 16501f08af52SDarrick J. Wong /* Roll on... */ 16511f08af52SDarrick J. Wong count_fsb -= ilen; 16521f08af52SDarrick J. Wong offset_fsb += ilen; 16531f08af52SDarrick J. Wong } 16541f08af52SDarrick J. Wong 16551f08af52SDarrick J. Wong tip->i_d.di_flags2 = tip_flags2; 16561f08af52SDarrick J. Wong return 0; 16571f08af52SDarrick J. Wong 16581f08af52SDarrick J. Wong out_defer: 16591f08af52SDarrick J. Wong xfs_defer_cancel(&dfops); 16601f08af52SDarrick J. Wong out: 16611f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); 16621f08af52SDarrick J. Wong tip->i_d.di_flags2 = tip_flags2; 16631f08af52SDarrick J. Wong return error; 16641f08af52SDarrick J. Wong } 16651f08af52SDarrick J. Wong 166639aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */ 166739aff5fdSDarrick J. Wong STATIC int 166839aff5fdSDarrick J. Wong xfs_swap_extent_forks( 166939aff5fdSDarrick J. Wong struct xfs_trans *tp, 167039aff5fdSDarrick J. Wong struct xfs_inode *ip, 167139aff5fdSDarrick J. Wong struct xfs_inode *tip, 167239aff5fdSDarrick J. Wong int *src_log_flags, 167339aff5fdSDarrick J. Wong int *target_log_flags) 167439aff5fdSDarrick J. Wong { 167539aff5fdSDarrick J. Wong struct xfs_ifork tempifp, *ifp, *tifp; 1676e7f5d5caSDarrick J. Wong xfs_filblks_t aforkblks = 0; 1677e7f5d5caSDarrick J. Wong xfs_filblks_t taforkblks = 0; 1678e7f5d5caSDarrick J. Wong xfs_extnum_t junk; 1679c8ce540dSDarrick J. Wong uint64_t tmp; 168039aff5fdSDarrick J. Wong int error; 168139aff5fdSDarrick J. Wong 168239aff5fdSDarrick J. Wong /* 168339aff5fdSDarrick J. Wong * Count the number of extended attribute blocks 168439aff5fdSDarrick J. Wong */ 168539aff5fdSDarrick J. Wong if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 168639aff5fdSDarrick J. Wong (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1687e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk, 168839aff5fdSDarrick J. Wong &aforkblks); 168939aff5fdSDarrick J. Wong if (error) 169039aff5fdSDarrick J. Wong return error; 169139aff5fdSDarrick J. Wong } 169239aff5fdSDarrick J. Wong if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 169339aff5fdSDarrick J. Wong (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1694e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk, 169539aff5fdSDarrick J. Wong &taforkblks); 169639aff5fdSDarrick J. Wong if (error) 169739aff5fdSDarrick J. Wong return error; 169839aff5fdSDarrick J. Wong } 169939aff5fdSDarrick J. Wong 170039aff5fdSDarrick J. Wong /* 17016fb10d6dSBrian Foster * Btree format (v3) inodes have the inode number stamped in the bmbt 17026fb10d6dSBrian Foster * block headers. We can't start changing the bmbt blocks until the 17036fb10d6dSBrian Foster * inode owner change is logged so recovery does the right thing in the 17046fb10d6dSBrian Foster * event of a crash. Set the owner change log flags now and leave the 17056fb10d6dSBrian Foster * bmbt scan as the last step. 170639aff5fdSDarrick J. Wong */ 170739aff5fdSDarrick J. Wong if (ip->i_d.di_version == 3 && 17086fb10d6dSBrian Foster ip->i_d.di_format == XFS_DINODE_FMT_BTREE) 170939aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DOWNER; 171039aff5fdSDarrick J. Wong if (tip->i_d.di_version == 3 && 17116fb10d6dSBrian Foster tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 171239aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DOWNER; 171339aff5fdSDarrick J. Wong 171439aff5fdSDarrick J. Wong /* 171539aff5fdSDarrick J. Wong * Swap the data forks of the inodes 171639aff5fdSDarrick J. Wong */ 171739aff5fdSDarrick J. Wong ifp = &ip->i_df; 171839aff5fdSDarrick J. Wong tifp = &tip->i_df; 171939aff5fdSDarrick J. Wong tempifp = *ifp; /* struct copy */ 172039aff5fdSDarrick J. Wong *ifp = *tifp; /* struct copy */ 172139aff5fdSDarrick J. Wong *tifp = tempifp; /* struct copy */ 172239aff5fdSDarrick J. Wong 172339aff5fdSDarrick J. Wong /* 172439aff5fdSDarrick J. Wong * Fix the on-disk inode values 172539aff5fdSDarrick J. Wong */ 1726c8ce540dSDarrick J. Wong tmp = (uint64_t)ip->i_d.di_nblocks; 172739aff5fdSDarrick J. Wong ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 172839aff5fdSDarrick J. Wong tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 172939aff5fdSDarrick J. Wong 1730c8ce540dSDarrick J. Wong tmp = (uint64_t) ip->i_d.di_nextents; 173139aff5fdSDarrick J. Wong ip->i_d.di_nextents = tip->i_d.di_nextents; 173239aff5fdSDarrick J. Wong tip->i_d.di_nextents = tmp; 173339aff5fdSDarrick J. Wong 1734c8ce540dSDarrick J. Wong tmp = (uint64_t) ip->i_d.di_format; 173539aff5fdSDarrick J. Wong ip->i_d.di_format = tip->i_d.di_format; 173639aff5fdSDarrick J. Wong tip->i_d.di_format = tmp; 173739aff5fdSDarrick J. Wong 173839aff5fdSDarrick J. Wong /* 173939aff5fdSDarrick J. Wong * The extents in the source inode could still contain speculative 174039aff5fdSDarrick J. Wong * preallocation beyond EOF (e.g. the file is open but not modified 174139aff5fdSDarrick J. Wong * while defrag is in progress). In that case, we need to copy over the 174239aff5fdSDarrick J. Wong * number of delalloc blocks the data fork in the source inode is 174339aff5fdSDarrick J. Wong * tracking beyond EOF so that when the fork is truncated away when the 174439aff5fdSDarrick J. Wong * temporary inode is unlinked we don't underrun the i_delayed_blks 174539aff5fdSDarrick J. Wong * counter on that inode. 174639aff5fdSDarrick J. Wong */ 174739aff5fdSDarrick J. Wong ASSERT(tip->i_delayed_blks == 0); 174839aff5fdSDarrick J. Wong tip->i_delayed_blks = ip->i_delayed_blks; 174939aff5fdSDarrick J. Wong ip->i_delayed_blks = 0; 175039aff5fdSDarrick J. Wong 175139aff5fdSDarrick J. Wong switch (ip->i_d.di_format) { 175239aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 175339aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DEXT; 175439aff5fdSDarrick J. Wong break; 175539aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 175639aff5fdSDarrick J. Wong ASSERT(ip->i_d.di_version < 3 || 175739aff5fdSDarrick J. Wong (*src_log_flags & XFS_ILOG_DOWNER)); 175839aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DBROOT; 175939aff5fdSDarrick J. Wong break; 176039aff5fdSDarrick J. Wong } 176139aff5fdSDarrick J. Wong 176239aff5fdSDarrick J. Wong switch (tip->i_d.di_format) { 176339aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 176439aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DEXT; 176539aff5fdSDarrick J. Wong break; 176639aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 176739aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DBROOT; 176839aff5fdSDarrick J. Wong ASSERT(tip->i_d.di_version < 3 || 176939aff5fdSDarrick J. Wong (*target_log_flags & XFS_ILOG_DOWNER)); 177039aff5fdSDarrick J. Wong break; 177139aff5fdSDarrick J. Wong } 177239aff5fdSDarrick J. Wong 177339aff5fdSDarrick J. Wong return 0; 177439aff5fdSDarrick J. Wong } 177539aff5fdSDarrick J. Wong 17762dd3d709SBrian Foster /* 17772dd3d709SBrian Foster * Fix up the owners of the bmbt blocks to refer to the current inode. The 17782dd3d709SBrian Foster * change owner scan attempts to order all modified buffers in the current 17792dd3d709SBrian Foster * transaction. In the event of ordered buffer failure, the offending buffer is 17802dd3d709SBrian Foster * physically logged as a fallback and the scan returns -EAGAIN. We must roll 17812dd3d709SBrian Foster * the transaction in this case to replenish the fallback log reservation and 17822dd3d709SBrian Foster * restart the scan. This process repeats until the scan completes. 17832dd3d709SBrian Foster */ 17842dd3d709SBrian Foster static int 17852dd3d709SBrian Foster xfs_swap_change_owner( 17862dd3d709SBrian Foster struct xfs_trans **tpp, 17872dd3d709SBrian Foster struct xfs_inode *ip, 17882dd3d709SBrian Foster struct xfs_inode *tmpip) 17892dd3d709SBrian Foster { 17902dd3d709SBrian Foster int error; 17912dd3d709SBrian Foster struct xfs_trans *tp = *tpp; 17922dd3d709SBrian Foster 17932dd3d709SBrian Foster do { 17942dd3d709SBrian Foster error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino, 17952dd3d709SBrian Foster NULL); 17962dd3d709SBrian Foster /* success or fatal error */ 17972dd3d709SBrian Foster if (error != -EAGAIN) 17982dd3d709SBrian Foster break; 17992dd3d709SBrian Foster 18002dd3d709SBrian Foster error = xfs_trans_roll(tpp); 18012dd3d709SBrian Foster if (error) 18022dd3d709SBrian Foster break; 18032dd3d709SBrian Foster tp = *tpp; 18042dd3d709SBrian Foster 18052dd3d709SBrian Foster /* 18062dd3d709SBrian Foster * Redirty both inodes so they can relog and keep the log tail 18072dd3d709SBrian Foster * moving forward. 18082dd3d709SBrian Foster */ 18092dd3d709SBrian Foster xfs_trans_ijoin(tp, ip, 0); 18102dd3d709SBrian Foster xfs_trans_ijoin(tp, tmpip, 0); 18112dd3d709SBrian Foster xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 18122dd3d709SBrian Foster xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE); 18132dd3d709SBrian Foster } while (true); 18142dd3d709SBrian Foster 18152dd3d709SBrian Foster return error; 18162dd3d709SBrian Foster } 18172dd3d709SBrian Foster 18184ef897a2SDave Chinner int 1819a133d952SDave Chinner xfs_swap_extents( 1820e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1821e06259aaSDarrick J. Wong struct xfs_inode *tip, /* tmp inode */ 1822e06259aaSDarrick J. Wong struct xfs_swapext *sxp) 1823a133d952SDave Chinner { 1824e06259aaSDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 1825e06259aaSDarrick J. Wong struct xfs_trans *tp; 1826e06259aaSDarrick J. Wong struct xfs_bstat *sbp = &sxp->sx_stat; 1827a133d952SDave Chinner int src_log_flags, target_log_flags; 1828a133d952SDave Chinner int error = 0; 182981217683SDave Chinner int lock_flags; 1830f0bc4d13SDarrick J. Wong struct xfs_ifork *cowfp; 1831c8ce540dSDarrick J. Wong uint64_t f; 18322dd3d709SBrian Foster int resblks = 0; 1833a133d952SDave Chinner 1834a133d952SDave Chinner /* 1835723cac48SDave Chinner * Lock the inodes against other IO, page faults and truncate to 1836723cac48SDave Chinner * begin with. Then we can ensure the inodes are flushed and have no 1837723cac48SDave Chinner * page cache safely. Once we have done this we can take the ilocks and 1838723cac48SDave Chinner * do the rest of the checks. 1839a133d952SDave Chinner */ 184065523218SChristoph Hellwig lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 184165523218SChristoph Hellwig lock_flags = XFS_MMAPLOCK_EXCL; 18427c2d238aSDarrick J. Wong xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); 1843a133d952SDave Chinner 1844a133d952SDave Chinner /* Verify that both files have the same format */ 1845c19b3b05SDave Chinner if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { 18462451337dSDave Chinner error = -EINVAL; 1847a133d952SDave Chinner goto out_unlock; 1848a133d952SDave Chinner } 1849a133d952SDave Chinner 1850a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */ 1851a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 18522451337dSDave Chinner error = -EINVAL; 1853a133d952SDave Chinner goto out_unlock; 1854a133d952SDave Chinner } 1855a133d952SDave Chinner 18564ef897a2SDave Chinner error = xfs_swap_extent_flush(ip); 1857a133d952SDave Chinner if (error) 1858a133d952SDave Chinner goto out_unlock; 18594ef897a2SDave Chinner error = xfs_swap_extent_flush(tip); 18604ef897a2SDave Chinner if (error) 18614ef897a2SDave Chinner goto out_unlock; 1862a133d952SDave Chinner 18631f08af52SDarrick J. Wong /* 18641f08af52SDarrick J. Wong * Extent "swapping" with rmap requires a permanent reservation and 18651f08af52SDarrick J. Wong * a block reservation because it's really just a remap operation 18661f08af52SDarrick J. Wong * performed with log redo items! 18671f08af52SDarrick J. Wong */ 18681f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 1869b3fed434SBrian Foster int w = XFS_DATA_FORK; 1870b3fed434SBrian Foster uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w); 1871b3fed434SBrian Foster uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w); 1872b3fed434SBrian Foster 18731f08af52SDarrick J. Wong /* 1874b3fed434SBrian Foster * Conceptually this shouldn't affect the shape of either bmbt, 1875b3fed434SBrian Foster * but since we atomically move extents one by one, we reserve 1876b3fed434SBrian Foster * enough space to rebuild both trees. 18771f08af52SDarrick J. Wong */ 1878b3fed434SBrian Foster resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w); 1879b3fed434SBrian Foster resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w); 1880b3fed434SBrian Foster 1881b3fed434SBrian Foster /* 1882b3fed434SBrian Foster * Handle the corner case where either inode might straddle the 1883b3fed434SBrian Foster * btree format boundary. If so, the inode could bounce between 1884b3fed434SBrian Foster * btree <-> extent format on unmap -> remap cycles, freeing and 1885b3fed434SBrian Foster * allocating a bmapbt block each time. 1886b3fed434SBrian Foster */ 1887b3fed434SBrian Foster if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1)) 1888b3fed434SBrian Foster resblks += XFS_IFORK_MAXEXT(ip, w); 1889b3fed434SBrian Foster if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1)) 1890b3fed434SBrian Foster resblks += XFS_IFORK_MAXEXT(tip, w); 18912dd3d709SBrian Foster } 18922dd3d709SBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 1893253f4911SChristoph Hellwig if (error) 1894a133d952SDave Chinner goto out_unlock; 1895723cac48SDave Chinner 1896723cac48SDave Chinner /* 1897723cac48SDave Chinner * Lock and join the inodes to the tansaction so that transaction commit 1898723cac48SDave Chinner * or cancel will unlock the inodes from this point onwards. 1899723cac48SDave Chinner */ 19007c2d238aSDarrick J. Wong xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); 19014ef897a2SDave Chinner lock_flags |= XFS_ILOCK_EXCL; 190239aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, ip, 0); 190339aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, tip, 0); 1904723cac48SDave Chinner 1905a133d952SDave Chinner 1906a133d952SDave Chinner /* Verify all data are being swapped */ 1907a133d952SDave Chinner if (sxp->sx_offset != 0 || 1908a133d952SDave Chinner sxp->sx_length != ip->i_d.di_size || 1909a133d952SDave Chinner sxp->sx_length != tip->i_d.di_size) { 19102451337dSDave Chinner error = -EFAULT; 19114ef897a2SDave Chinner goto out_trans_cancel; 1912a133d952SDave Chinner } 1913a133d952SDave Chinner 1914a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0); 1915a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1); 1916a133d952SDave Chinner 1917a133d952SDave Chinner /* check inode formats now that data is flushed */ 1918a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip); 1919a133d952SDave Chinner if (error) { 1920a133d952SDave Chinner xfs_notice(mp, 1921a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.", 1922a133d952SDave Chinner __func__, ip->i_ino); 19234ef897a2SDave Chinner goto out_trans_cancel; 1924a133d952SDave Chinner } 1925a133d952SDave Chinner 1926a133d952SDave Chinner /* 1927a133d952SDave Chinner * Compare the current change & modify times with that 1928a133d952SDave Chinner * passed in. If they differ, we abort this swap. 1929a133d952SDave Chinner * This is the mechanism used to ensure the calling 1930a133d952SDave Chinner * process that the file was not changed out from 1931a133d952SDave Chinner * under it. 1932a133d952SDave Chinner */ 1933a133d952SDave Chinner if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || 1934a133d952SDave Chinner (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 1935a133d952SDave Chinner (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 1936a133d952SDave Chinner (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 19372451337dSDave Chinner error = -EBUSY; 193881217683SDave Chinner goto out_trans_cancel; 1939a133d952SDave Chinner } 1940a133d952SDave Chinner 194121b5c978SDave Chinner /* 194221b5c978SDave Chinner * Note the trickiness in setting the log flags - we set the owner log 194321b5c978SDave Chinner * flag on the opposite inode (i.e. the inode we are setting the new 194421b5c978SDave Chinner * owner to be) because once we swap the forks and log that, log 194521b5c978SDave Chinner * recovery is going to see the fork as owned by the swapped inode, 194621b5c978SDave Chinner * not the pre-swapped inodes. 194721b5c978SDave Chinner */ 194821b5c978SDave Chinner src_log_flags = XFS_ILOG_CORE; 194921b5c978SDave Chinner target_log_flags = XFS_ILOG_CORE; 195039aff5fdSDarrick J. Wong 19511f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 19521f08af52SDarrick J. Wong error = xfs_swap_extent_rmap(&tp, ip, tip); 19531f08af52SDarrick J. Wong else 195439aff5fdSDarrick J. Wong error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags, 195539aff5fdSDarrick J. Wong &target_log_flags); 195621b5c978SDave Chinner if (error) 195721b5c978SDave Chinner goto out_trans_cancel; 1958a133d952SDave Chinner 1959f0bc4d13SDarrick J. Wong /* Do we have to swap reflink flags? */ 1960f0bc4d13SDarrick J. Wong if ((ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) ^ 1961f0bc4d13SDarrick J. Wong (tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)) { 1962f0bc4d13SDarrick J. Wong f = ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; 1963f0bc4d13SDarrick J. Wong ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1964f0bc4d13SDarrick J. Wong ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; 1965f0bc4d13SDarrick J. Wong tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1966f0bc4d13SDarrick J. Wong tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; 196752bfcdd7SDarrick J. Wong } 196852bfcdd7SDarrick J. Wong 196952bfcdd7SDarrick J. Wong /* Swap the cow forks. */ 197052bfcdd7SDarrick J. Wong if (xfs_sb_version_hasreflink(&mp->m_sb)) { 197152bfcdd7SDarrick J. Wong xfs_extnum_t extnum; 197252bfcdd7SDarrick J. Wong 197352bfcdd7SDarrick J. Wong ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); 197452bfcdd7SDarrick J. Wong ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); 197552bfcdd7SDarrick J. Wong 197652bfcdd7SDarrick J. Wong extnum = ip->i_cnextents; 197752bfcdd7SDarrick J. Wong ip->i_cnextents = tip->i_cnextents; 197852bfcdd7SDarrick J. Wong tip->i_cnextents = extnum; 197952bfcdd7SDarrick J. Wong 1980f0bc4d13SDarrick J. Wong cowfp = ip->i_cowfp; 1981f0bc4d13SDarrick J. Wong ip->i_cowfp = tip->i_cowfp; 1982f0bc4d13SDarrick J. Wong tip->i_cowfp = cowfp; 198352bfcdd7SDarrick J. Wong 19845bcffe30SChristoph Hellwig if (ip->i_cowfp && ip->i_cowfp->if_bytes) 198583104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(ip); 198652bfcdd7SDarrick J. Wong else 198752bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(ip); 19885bcffe30SChristoph Hellwig if (tip->i_cowfp && tip->i_cowfp->if_bytes) 198983104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(tip); 199052bfcdd7SDarrick J. Wong else 199152bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(tip); 1992f0bc4d13SDarrick J. Wong } 1993f0bc4d13SDarrick J. Wong 1994a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags); 1995a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags); 1996a133d952SDave Chinner 1997a133d952SDave Chinner /* 19986fb10d6dSBrian Foster * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems 19996fb10d6dSBrian Foster * have inode number owner values in the bmbt blocks that still refer to 20006fb10d6dSBrian Foster * the old inode. Scan each bmbt to fix up the owner values with the 20016fb10d6dSBrian Foster * inode number of the current inode. 20026fb10d6dSBrian Foster */ 20036fb10d6dSBrian Foster if (src_log_flags & XFS_ILOG_DOWNER) { 20042dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, ip, tip); 20056fb10d6dSBrian Foster if (error) 20066fb10d6dSBrian Foster goto out_trans_cancel; 20076fb10d6dSBrian Foster } 20086fb10d6dSBrian Foster if (target_log_flags & XFS_ILOG_DOWNER) { 20092dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, tip, ip); 20106fb10d6dSBrian Foster if (error) 20116fb10d6dSBrian Foster goto out_trans_cancel; 20126fb10d6dSBrian Foster } 20136fb10d6dSBrian Foster 20146fb10d6dSBrian Foster /* 2015a133d952SDave Chinner * If this is a synchronous mount, make sure that the 2016a133d952SDave Chinner * transaction goes to disk before returning to the user. 2017a133d952SDave Chinner */ 2018a133d952SDave Chinner if (mp->m_flags & XFS_MOUNT_WSYNC) 2019a133d952SDave Chinner xfs_trans_set_sync(tp); 2020a133d952SDave Chinner 202170393313SChristoph Hellwig error = xfs_trans_commit(tp); 2022a133d952SDave Chinner 2023a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0); 2024a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1); 202539aff5fdSDarrick J. Wong 202665523218SChristoph Hellwig out_unlock: 202739aff5fdSDarrick J. Wong xfs_iunlock(ip, lock_flags); 202839aff5fdSDarrick J. Wong xfs_iunlock(tip, lock_flags); 202965523218SChristoph Hellwig unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 2030a133d952SDave Chinner return error; 2031a133d952SDave Chinner 203239aff5fdSDarrick J. Wong out_trans_cancel: 203339aff5fdSDarrick J. Wong xfs_trans_cancel(tp); 203465523218SChristoph Hellwig goto out_unlock; 2035a133d952SDave Chinner } 2036