168988114SDave Chinner /* 268988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc. 468988114SDave Chinner * All Rights Reserved. 568988114SDave Chinner * 668988114SDave Chinner * This program is free software; you can redistribute it and/or 768988114SDave Chinner * modify it under the terms of the GNU General Public License as 868988114SDave Chinner * published by the Free Software Foundation. 968988114SDave Chinner * 1068988114SDave Chinner * This program is distributed in the hope that it would be useful, 1168988114SDave Chinner * but WITHOUT ANY WARRANTY; without even the implied warranty of 1268988114SDave Chinner * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1368988114SDave Chinner * GNU General Public License for more details. 1468988114SDave Chinner * 1568988114SDave Chinner * You should have received a copy of the GNU General Public License 1668988114SDave Chinner * along with this program; if not, write the Free Software Foundation, 1768988114SDave Chinner * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 1868988114SDave Chinner */ 1968988114SDave Chinner #include "xfs.h" 2068988114SDave Chinner #include "xfs_fs.h" 2168988114SDave Chinner #include "xfs_format.h" 2268988114SDave Chinner #include "xfs_bit.h" 2368988114SDave Chinner #include "xfs_log.h" 2468988114SDave Chinner #include "xfs_inum.h" 2568988114SDave Chinner #include "xfs_trans.h" 2668988114SDave Chinner #include "xfs_sb.h" 2768988114SDave Chinner #include "xfs_ag.h" 2868988114SDave Chinner #include "xfs_mount.h" 2968988114SDave Chinner #include "xfs_da_btree.h" 3068988114SDave Chinner #include "xfs_bmap_btree.h" 3168988114SDave Chinner #include "xfs_alloc_btree.h" 3268988114SDave Chinner #include "xfs_ialloc_btree.h" 3368988114SDave Chinner #include "xfs_dinode.h" 3468988114SDave Chinner #include "xfs_inode.h" 3568988114SDave Chinner #include "xfs_btree.h" 3668988114SDave Chinner #include "xfs_extfree_item.h" 3768988114SDave Chinner #include "xfs_alloc.h" 3868988114SDave Chinner #include "xfs_bmap.h" 3968988114SDave Chinner #include "xfs_bmap_util.h" 4068988114SDave Chinner #include "xfs_rtalloc.h" 4168988114SDave Chinner #include "xfs_error.h" 4268988114SDave Chinner #include "xfs_quota.h" 4368988114SDave Chinner #include "xfs_trans_space.h" 4468988114SDave Chinner #include "xfs_trace.h" 45c24b5dfaSDave Chinner #include "xfs_icache.h" 4668988114SDave Chinner 4768988114SDave Chinner /* Kernel only BMAP related definitions and functions */ 4868988114SDave Chinner 4968988114SDave Chinner /* 5068988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it 5168988114SDave Chinner * differently based on whether the file is a real time file or not, because the 5268988114SDave Chinner * bmap code does. 5368988114SDave Chinner */ 5468988114SDave Chinner xfs_daddr_t 5568988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) 5668988114SDave Chinner { 5768988114SDave Chinner return (XFS_IS_REALTIME_INODE(ip) ? \ 5868988114SDave Chinner (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \ 5968988114SDave Chinner XFS_FSB_TO_DADDR((ip)->i_mount, (fsb))); 6068988114SDave Chinner } 6168988114SDave Chinner 6268988114SDave Chinner /* 6368988114SDave Chinner * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 6468988114SDave Chinner * caller. Frees all the extents that need freeing, which must be done 6568988114SDave Chinner * last due to locking considerations. We never free any extents in 6668988114SDave Chinner * the first transaction. 6768988114SDave Chinner * 6868988114SDave Chinner * Return 1 if the given transaction was committed and a new one 6968988114SDave Chinner * started, and 0 otherwise in the committed parameter. 7068988114SDave Chinner */ 7168988114SDave Chinner int /* error */ 7268988114SDave Chinner xfs_bmap_finish( 7368988114SDave Chinner xfs_trans_t **tp, /* transaction pointer addr */ 7468988114SDave Chinner xfs_bmap_free_t *flist, /* i/o: list extents to free */ 7568988114SDave Chinner int *committed) /* xact committed or not */ 7668988114SDave Chinner { 7768988114SDave Chinner xfs_efd_log_item_t *efd; /* extent free data */ 7868988114SDave Chinner xfs_efi_log_item_t *efi; /* extent free intention */ 7968988114SDave Chinner int error; /* error return value */ 8068988114SDave Chinner xfs_bmap_free_item_t *free; /* free extent item */ 813d3c8b52SJie Liu struct xfs_trans_res tres; /* new log reservation */ 8268988114SDave Chinner xfs_mount_t *mp; /* filesystem mount structure */ 8368988114SDave Chinner xfs_bmap_free_item_t *next; /* next item on free list */ 8468988114SDave Chinner xfs_trans_t *ntp; /* new transaction pointer */ 8568988114SDave Chinner 8668988114SDave Chinner ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 8768988114SDave Chinner if (flist->xbf_count == 0) { 8868988114SDave Chinner *committed = 0; 8968988114SDave Chinner return 0; 9068988114SDave Chinner } 9168988114SDave Chinner ntp = *tp; 9268988114SDave Chinner efi = xfs_trans_get_efi(ntp, flist->xbf_count); 9368988114SDave Chinner for (free = flist->xbf_first; free; free = free->xbfi_next) 9468988114SDave Chinner xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock, 9568988114SDave Chinner free->xbfi_blockcount); 963d3c8b52SJie Liu 973d3c8b52SJie Liu tres.tr_logres = ntp->t_log_res; 983d3c8b52SJie Liu tres.tr_logcount = ntp->t_log_count; 993d3c8b52SJie Liu tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 10068988114SDave Chinner ntp = xfs_trans_dup(*tp); 10168988114SDave Chinner error = xfs_trans_commit(*tp, 0); 10268988114SDave Chinner *tp = ntp; 10368988114SDave Chinner *committed = 1; 10468988114SDave Chinner /* 10568988114SDave Chinner * We have a new transaction, so we should return committed=1, 10668988114SDave Chinner * even though we're returning an error. 10768988114SDave Chinner */ 10868988114SDave Chinner if (error) 10968988114SDave Chinner return error; 11068988114SDave Chinner 11168988114SDave Chinner /* 11268988114SDave Chinner * transaction commit worked ok so we can drop the extra ticket 11368988114SDave Chinner * reference that we gained in xfs_trans_dup() 11468988114SDave Chinner */ 11568988114SDave Chinner xfs_log_ticket_put(ntp->t_ticket); 11668988114SDave Chinner 1173d3c8b52SJie Liu error = xfs_trans_reserve(ntp, &tres, 0, 0); 1183d3c8b52SJie Liu if (error) 11968988114SDave Chinner return error; 12068988114SDave Chinner efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count); 12168988114SDave Chinner for (free = flist->xbf_first; free != NULL; free = next) { 12268988114SDave Chinner next = free->xbfi_next; 12368988114SDave Chinner if ((error = xfs_free_extent(ntp, free->xbfi_startblock, 12468988114SDave Chinner free->xbfi_blockcount))) { 12568988114SDave Chinner /* 12668988114SDave Chinner * The bmap free list will be cleaned up at a 12768988114SDave Chinner * higher level. The EFI will be canceled when 12868988114SDave Chinner * this transaction is aborted. 12968988114SDave Chinner * Need to force shutdown here to make sure it 13068988114SDave Chinner * happens, since this transaction may not be 13168988114SDave Chinner * dirty yet. 13268988114SDave Chinner */ 13368988114SDave Chinner mp = ntp->t_mountp; 13468988114SDave Chinner if (!XFS_FORCED_SHUTDOWN(mp)) 13568988114SDave Chinner xfs_force_shutdown(mp, 13668988114SDave Chinner (error == EFSCORRUPTED) ? 13768988114SDave Chinner SHUTDOWN_CORRUPT_INCORE : 13868988114SDave Chinner SHUTDOWN_META_IO_ERROR); 13968988114SDave Chinner return error; 14068988114SDave Chinner } 14168988114SDave Chinner xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock, 14268988114SDave Chinner free->xbfi_blockcount); 14368988114SDave Chinner xfs_bmap_del_free(flist, NULL, free); 14468988114SDave Chinner } 14568988114SDave Chinner return 0; 14668988114SDave Chinner } 14768988114SDave Chinner 14868988114SDave Chinner int 14968988114SDave Chinner xfs_bmap_rtalloc( 15068988114SDave Chinner struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 15168988114SDave Chinner { 15268988114SDave Chinner xfs_alloctype_t atype = 0; /* type for allocation routines */ 15368988114SDave Chinner int error; /* error return value */ 15468988114SDave Chinner xfs_mount_t *mp; /* mount point structure */ 15568988114SDave Chinner xfs_extlen_t prod = 0; /* product factor for allocators */ 15668988114SDave Chinner xfs_extlen_t ralen = 0; /* realtime allocation length */ 15768988114SDave Chinner xfs_extlen_t align; /* minimum allocation alignment */ 15868988114SDave Chinner xfs_rtblock_t rtb; 15968988114SDave Chinner 16068988114SDave Chinner mp = ap->ip->i_mount; 16168988114SDave Chinner align = xfs_get_extsz_hint(ap->ip); 16268988114SDave Chinner prod = align / mp->m_sb.sb_rextsize; 16368988114SDave Chinner error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 16468988114SDave Chinner align, 1, ap->eof, 0, 16568988114SDave Chinner ap->conv, &ap->offset, &ap->length); 16668988114SDave Chinner if (error) 16768988114SDave Chinner return error; 16868988114SDave Chinner ASSERT(ap->length); 16968988114SDave Chinner ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); 17068988114SDave Chinner 17168988114SDave Chinner /* 17268988114SDave Chinner * If the offset & length are not perfectly aligned 17368988114SDave Chinner * then kill prod, it will just get us in trouble. 17468988114SDave Chinner */ 17568988114SDave Chinner if (do_mod(ap->offset, align) || ap->length % align) 17668988114SDave Chinner prod = 1; 17768988114SDave Chinner /* 17868988114SDave Chinner * Set ralen to be the actual requested length in rtextents. 17968988114SDave Chinner */ 18068988114SDave Chinner ralen = ap->length / mp->m_sb.sb_rextsize; 18168988114SDave Chinner /* 18268988114SDave Chinner * If the old value was close enough to MAXEXTLEN that 18368988114SDave Chinner * we rounded up to it, cut it back so it's valid again. 18468988114SDave Chinner * Note that if it's a really large request (bigger than 18568988114SDave Chinner * MAXEXTLEN), we don't hear about that number, and can't 18668988114SDave Chinner * adjust the starting point to match it. 18768988114SDave Chinner */ 18868988114SDave Chinner if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 18968988114SDave Chinner ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 19068988114SDave Chinner 19168988114SDave Chinner /* 19268988114SDave Chinner * Lock out other modifications to the RT bitmap inode. 19368988114SDave Chinner */ 19468988114SDave Chinner xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 19568988114SDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 19668988114SDave Chinner 19768988114SDave Chinner /* 19868988114SDave Chinner * If it's an allocation to an empty file at offset 0, 19968988114SDave Chinner * pick an extent that will space things out in the rt area. 20068988114SDave Chinner */ 20168988114SDave Chinner if (ap->eof && ap->offset == 0) { 20268988114SDave Chinner xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ 20368988114SDave Chinner 20468988114SDave Chinner error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 20568988114SDave Chinner if (error) 20668988114SDave Chinner return error; 20768988114SDave Chinner ap->blkno = rtx * mp->m_sb.sb_rextsize; 20868988114SDave Chinner } else { 20968988114SDave Chinner ap->blkno = 0; 21068988114SDave Chinner } 21168988114SDave Chinner 21268988114SDave Chinner xfs_bmap_adjacent(ap); 21368988114SDave Chinner 21468988114SDave Chinner /* 21568988114SDave Chinner * Realtime allocation, done through xfs_rtallocate_extent. 21668988114SDave Chinner */ 21768988114SDave Chinner atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; 21868988114SDave Chinner do_div(ap->blkno, mp->m_sb.sb_rextsize); 21968988114SDave Chinner rtb = ap->blkno; 22068988114SDave Chinner ap->length = ralen; 22168988114SDave Chinner if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, 22268988114SDave Chinner &ralen, atype, ap->wasdel, prod, &rtb))) 22368988114SDave Chinner return error; 22468988114SDave Chinner if (rtb == NULLFSBLOCK && prod > 1 && 22568988114SDave Chinner (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, 22668988114SDave Chinner ap->length, &ralen, atype, 22768988114SDave Chinner ap->wasdel, 1, &rtb))) 22868988114SDave Chinner return error; 22968988114SDave Chinner ap->blkno = rtb; 23068988114SDave Chinner if (ap->blkno != NULLFSBLOCK) { 23168988114SDave Chinner ap->blkno *= mp->m_sb.sb_rextsize; 23268988114SDave Chinner ralen *= mp->m_sb.sb_rextsize; 23368988114SDave Chinner ap->length = ralen; 23468988114SDave Chinner ap->ip->i_d.di_nblocks += ralen; 23568988114SDave Chinner xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 23668988114SDave Chinner if (ap->wasdel) 23768988114SDave Chinner ap->ip->i_delayed_blks -= ralen; 23868988114SDave Chinner /* 23968988114SDave Chinner * Adjust the disk quota also. This was reserved 24068988114SDave Chinner * earlier. 24168988114SDave Chinner */ 24268988114SDave Chinner xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 24368988114SDave Chinner ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 24468988114SDave Chinner XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 24568988114SDave Chinner } else { 24668988114SDave Chinner ap->length = 0; 24768988114SDave Chinner } 24868988114SDave Chinner return 0; 24968988114SDave Chinner } 25068988114SDave Chinner 25168988114SDave Chinner /* 25268988114SDave Chinner * Stack switching interfaces for allocation 25368988114SDave Chinner */ 25468988114SDave Chinner static void 25568988114SDave Chinner xfs_bmapi_allocate_worker( 25668988114SDave Chinner struct work_struct *work) 25768988114SDave Chinner { 25868988114SDave Chinner struct xfs_bmalloca *args = container_of(work, 25968988114SDave Chinner struct xfs_bmalloca, work); 26068988114SDave Chinner unsigned long pflags; 26168988114SDave Chinner 26268988114SDave Chinner /* we are in a transaction context here */ 26368988114SDave Chinner current_set_flags_nested(&pflags, PF_FSTRANS); 26468988114SDave Chinner 26568988114SDave Chinner args->result = __xfs_bmapi_allocate(args); 26668988114SDave Chinner complete(args->done); 26768988114SDave Chinner 26868988114SDave Chinner current_restore_flags_nested(&pflags, PF_FSTRANS); 26968988114SDave Chinner } 27068988114SDave Chinner 27168988114SDave Chinner /* 27268988114SDave Chinner * Some allocation requests often come in with little stack to work on. Push 27368988114SDave Chinner * them off to a worker thread so there is lots of stack to use. Otherwise just 27468988114SDave Chinner * call directly to avoid the context switch overhead here. 27568988114SDave Chinner */ 27668988114SDave Chinner int 27768988114SDave Chinner xfs_bmapi_allocate( 27868988114SDave Chinner struct xfs_bmalloca *args) 27968988114SDave Chinner { 28068988114SDave Chinner DECLARE_COMPLETION_ONSTACK(done); 28168988114SDave Chinner 28268988114SDave Chinner if (!args->stack_switch) 28368988114SDave Chinner return __xfs_bmapi_allocate(args); 28468988114SDave Chinner 28568988114SDave Chinner 28668988114SDave Chinner args->done = &done; 28768988114SDave Chinner INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); 28868988114SDave Chinner queue_work(xfs_alloc_wq, &args->work); 28968988114SDave Chinner wait_for_completion(&done); 29068988114SDave Chinner return args->result; 29168988114SDave Chinner } 29268988114SDave Chinner 29368988114SDave Chinner /* 29468988114SDave Chinner * Check if the endoff is outside the last extent. If so the caller will grow 29568988114SDave Chinner * the allocation to a stripe unit boundary. All offsets are considered outside 29668988114SDave Chinner * the end of file for an empty fork, so 1 is returned in *eof in that case. 29768988114SDave Chinner */ 29868988114SDave Chinner int 29968988114SDave Chinner xfs_bmap_eof( 30068988114SDave Chinner struct xfs_inode *ip, 30168988114SDave Chinner xfs_fileoff_t endoff, 30268988114SDave Chinner int whichfork, 30368988114SDave Chinner int *eof) 30468988114SDave Chinner { 30568988114SDave Chinner struct xfs_bmbt_irec rec; 30668988114SDave Chinner int error; 30768988114SDave Chinner 30868988114SDave Chinner error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof); 30968988114SDave Chinner if (error || *eof) 31068988114SDave Chinner return error; 31168988114SDave Chinner 31268988114SDave Chinner *eof = endoff >= rec.br_startoff + rec.br_blockcount; 31368988114SDave Chinner return 0; 31468988114SDave Chinner } 31568988114SDave Chinner 31668988114SDave Chinner /* 31768988114SDave Chinner * Extent tree block counting routines. 31868988114SDave Chinner */ 31968988114SDave Chinner 32068988114SDave Chinner /* 32168988114SDave Chinner * Count leaf blocks given a range of extent records. 32268988114SDave Chinner */ 32368988114SDave Chinner STATIC void 32468988114SDave Chinner xfs_bmap_count_leaves( 32568988114SDave Chinner xfs_ifork_t *ifp, 32668988114SDave Chinner xfs_extnum_t idx, 32768988114SDave Chinner int numrecs, 32868988114SDave Chinner int *count) 32968988114SDave Chinner { 33068988114SDave Chinner int b; 33168988114SDave Chinner 33268988114SDave Chinner for (b = 0; b < numrecs; b++) { 33368988114SDave Chinner xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); 33468988114SDave Chinner *count += xfs_bmbt_get_blockcount(frp); 33568988114SDave Chinner } 33668988114SDave Chinner } 33768988114SDave Chinner 33868988114SDave Chinner /* 33968988114SDave Chinner * Count leaf blocks given a range of extent records originally 34068988114SDave Chinner * in btree format. 34168988114SDave Chinner */ 34268988114SDave Chinner STATIC void 34368988114SDave Chinner xfs_bmap_disk_count_leaves( 34468988114SDave Chinner struct xfs_mount *mp, 34568988114SDave Chinner struct xfs_btree_block *block, 34668988114SDave Chinner int numrecs, 34768988114SDave Chinner int *count) 34868988114SDave Chinner { 34968988114SDave Chinner int b; 35068988114SDave Chinner xfs_bmbt_rec_t *frp; 35168988114SDave Chinner 35268988114SDave Chinner for (b = 1; b <= numrecs; b++) { 35368988114SDave Chinner frp = XFS_BMBT_REC_ADDR(mp, block, b); 35468988114SDave Chinner *count += xfs_bmbt_disk_get_blockcount(frp); 35568988114SDave Chinner } 35668988114SDave Chinner } 35768988114SDave Chinner 35868988114SDave Chinner /* 35968988114SDave Chinner * Recursively walks each level of a btree 3608be11e92SZhi Yong Wu * to count total fsblocks in use. 36168988114SDave Chinner */ 36268988114SDave Chinner STATIC int /* error */ 36368988114SDave Chinner xfs_bmap_count_tree( 36468988114SDave Chinner xfs_mount_t *mp, /* file system mount point */ 36568988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 36668988114SDave Chinner xfs_ifork_t *ifp, /* inode fork pointer */ 36768988114SDave Chinner xfs_fsblock_t blockno, /* file system block number */ 36868988114SDave Chinner int levelin, /* level in btree */ 36968988114SDave Chinner int *count) /* Count of blocks */ 37068988114SDave Chinner { 37168988114SDave Chinner int error; 37268988114SDave Chinner xfs_buf_t *bp, *nbp; 37368988114SDave Chinner int level = levelin; 37468988114SDave Chinner __be64 *pp; 37568988114SDave Chinner xfs_fsblock_t bno = blockno; 37668988114SDave Chinner xfs_fsblock_t nextbno; 37768988114SDave Chinner struct xfs_btree_block *block, *nextblock; 37868988114SDave Chinner int numrecs; 37968988114SDave Chinner 38068988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 38168988114SDave Chinner &xfs_bmbt_buf_ops); 38268988114SDave Chinner if (error) 38368988114SDave Chinner return error; 38468988114SDave Chinner *count += 1; 38568988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 38668988114SDave Chinner 38768988114SDave Chinner if (--level) { 38868988114SDave Chinner /* Not at node above leaves, count this level of nodes */ 38968988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 39068988114SDave Chinner while (nextbno != NULLFSBLOCK) { 39168988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 39268988114SDave Chinner XFS_BMAP_BTREE_REF, 39368988114SDave Chinner &xfs_bmbt_buf_ops); 39468988114SDave Chinner if (error) 39568988114SDave Chinner return error; 39668988114SDave Chinner *count += 1; 39768988114SDave Chinner nextblock = XFS_BUF_TO_BLOCK(nbp); 39868988114SDave Chinner nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); 39968988114SDave Chinner xfs_trans_brelse(tp, nbp); 40068988114SDave Chinner } 40168988114SDave Chinner 40268988114SDave Chinner /* Dive to the next level */ 40368988114SDave Chinner pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 40468988114SDave Chinner bno = be64_to_cpu(*pp); 40568988114SDave Chinner if (unlikely((error = 40668988114SDave Chinner xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { 40768988114SDave Chinner xfs_trans_brelse(tp, bp); 40868988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", 40968988114SDave Chinner XFS_ERRLEVEL_LOW, mp); 41068988114SDave Chinner return XFS_ERROR(EFSCORRUPTED); 41168988114SDave Chinner } 41268988114SDave Chinner xfs_trans_brelse(tp, bp); 41368988114SDave Chinner } else { 41468988114SDave Chinner /* count all level 1 nodes and their leaves */ 41568988114SDave Chinner for (;;) { 41668988114SDave Chinner nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 41768988114SDave Chinner numrecs = be16_to_cpu(block->bb_numrecs); 41868988114SDave Chinner xfs_bmap_disk_count_leaves(mp, block, numrecs, count); 41968988114SDave Chinner xfs_trans_brelse(tp, bp); 42068988114SDave Chinner if (nextbno == NULLFSBLOCK) 42168988114SDave Chinner break; 42268988114SDave Chinner bno = nextbno; 42368988114SDave Chinner error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 42468988114SDave Chinner XFS_BMAP_BTREE_REF, 42568988114SDave Chinner &xfs_bmbt_buf_ops); 42668988114SDave Chinner if (error) 42768988114SDave Chinner return error; 42868988114SDave Chinner *count += 1; 42968988114SDave Chinner block = XFS_BUF_TO_BLOCK(bp); 43068988114SDave Chinner } 43168988114SDave Chinner } 43268988114SDave Chinner return 0; 43368988114SDave Chinner } 43468988114SDave Chinner 43568988114SDave Chinner /* 43668988114SDave Chinner * Count fsblocks of the given fork. 43768988114SDave Chinner */ 43868988114SDave Chinner int /* error */ 43968988114SDave Chinner xfs_bmap_count_blocks( 44068988114SDave Chinner xfs_trans_t *tp, /* transaction pointer */ 44168988114SDave Chinner xfs_inode_t *ip, /* incore inode */ 44268988114SDave Chinner int whichfork, /* data or attr fork */ 44368988114SDave Chinner int *count) /* out: count of blocks */ 44468988114SDave Chinner { 44568988114SDave Chinner struct xfs_btree_block *block; /* current btree block */ 44668988114SDave Chinner xfs_fsblock_t bno; /* block # of "block" */ 44768988114SDave Chinner xfs_ifork_t *ifp; /* fork structure */ 44868988114SDave Chinner int level; /* btree level, for checking */ 44968988114SDave Chinner xfs_mount_t *mp; /* file system mount structure */ 45068988114SDave Chinner __be64 *pp; /* pointer to block address */ 45168988114SDave Chinner 45268988114SDave Chinner bno = NULLFSBLOCK; 45368988114SDave Chinner mp = ip->i_mount; 45468988114SDave Chinner ifp = XFS_IFORK_PTR(ip, whichfork); 45568988114SDave Chinner if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { 45668988114SDave Chinner xfs_bmap_count_leaves(ifp, 0, 45768988114SDave Chinner ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), 45868988114SDave Chinner count); 45968988114SDave Chinner return 0; 46068988114SDave Chinner } 46168988114SDave Chinner 46268988114SDave Chinner /* 46368988114SDave Chinner * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 46468988114SDave Chinner */ 46568988114SDave Chinner block = ifp->if_broot; 46668988114SDave Chinner level = be16_to_cpu(block->bb_level); 46768988114SDave Chinner ASSERT(level > 0); 46868988114SDave Chinner pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 46968988114SDave Chinner bno = be64_to_cpu(*pp); 47068988114SDave Chinner ASSERT(bno != NULLDFSBNO); 47168988114SDave Chinner ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 47268988114SDave Chinner ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 47368988114SDave Chinner 47468988114SDave Chinner if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { 47568988114SDave Chinner XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, 47668988114SDave Chinner mp); 47768988114SDave Chinner return XFS_ERROR(EFSCORRUPTED); 47868988114SDave Chinner } 47968988114SDave Chinner 48068988114SDave Chinner return 0; 48168988114SDave Chinner } 48268988114SDave Chinner 48368988114SDave Chinner /* 48468988114SDave Chinner * returns 1 for success, 0 if we failed to map the extent. 48568988114SDave Chinner */ 48668988114SDave Chinner STATIC int 48768988114SDave Chinner xfs_getbmapx_fix_eof_hole( 48868988114SDave Chinner xfs_inode_t *ip, /* xfs incore inode pointer */ 48968988114SDave Chinner struct getbmapx *out, /* output structure */ 49068988114SDave Chinner int prealloced, /* this is a file with 49168988114SDave Chinner * preallocated data space */ 49268988114SDave Chinner __int64_t end, /* last block requested */ 49368988114SDave Chinner xfs_fsblock_t startblock) 49468988114SDave Chinner { 49568988114SDave Chinner __int64_t fixlen; 49668988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 49768988114SDave Chinner xfs_ifork_t *ifp; /* inode fork pointer */ 49868988114SDave Chinner xfs_extnum_t lastx; /* last extent pointer */ 49968988114SDave Chinner xfs_fileoff_t fileblock; 50068988114SDave Chinner 50168988114SDave Chinner if (startblock == HOLESTARTBLOCK) { 50268988114SDave Chinner mp = ip->i_mount; 50368988114SDave Chinner out->bmv_block = -1; 50468988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 50568988114SDave Chinner fixlen -= out->bmv_offset; 50668988114SDave Chinner if (prealloced && out->bmv_offset + out->bmv_length == end) { 50768988114SDave Chinner /* Came to hole at EOF. Trim it. */ 50868988114SDave Chinner if (fixlen <= 0) 50968988114SDave Chinner return 0; 51068988114SDave Chinner out->bmv_length = fixlen; 51168988114SDave Chinner } 51268988114SDave Chinner } else { 51368988114SDave Chinner if (startblock == DELAYSTARTBLOCK) 51468988114SDave Chinner out->bmv_block = -2; 51568988114SDave Chinner else 51668988114SDave Chinner out->bmv_block = xfs_fsb_to_db(ip, startblock); 51768988114SDave Chinner fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); 51868988114SDave Chinner ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 51968988114SDave Chinner if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && 52068988114SDave Chinner (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) 52168988114SDave Chinner out->bmv_oflags |= BMV_OF_LAST; 52268988114SDave Chinner } 52368988114SDave Chinner 52468988114SDave Chinner return 1; 52568988114SDave Chinner } 52668988114SDave Chinner 52768988114SDave Chinner /* 52868988114SDave Chinner * Get inode's extents as described in bmv, and format for output. 52968988114SDave Chinner * Calls formatter to fill the user's buffer until all extents 53068988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have 53168988114SDave Chinner * been filled, or until the formatter short-circuits the loop, 53268988114SDave Chinner * if it is tracking filled-in extents on its own. 53368988114SDave Chinner */ 53468988114SDave Chinner int /* error code */ 53568988114SDave Chinner xfs_getbmap( 53668988114SDave Chinner xfs_inode_t *ip, 53768988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */ 53868988114SDave Chinner xfs_bmap_format_t formatter, /* format to user */ 53968988114SDave Chinner void *arg) /* formatter arg */ 54068988114SDave Chinner { 54168988114SDave Chinner __int64_t bmvend; /* last block requested */ 54268988114SDave Chinner int error = 0; /* return value */ 54368988114SDave Chinner __int64_t fixlen; /* length for -1 case */ 54468988114SDave Chinner int i; /* extent number */ 54568988114SDave Chinner int lock; /* lock state */ 54668988114SDave Chinner xfs_bmbt_irec_t *map; /* buffer for user's data */ 54768988114SDave Chinner xfs_mount_t *mp; /* file system mount point */ 54868988114SDave Chinner int nex; /* # of user extents can do */ 54968988114SDave Chinner int nexleft; /* # of user extents left */ 55068988114SDave Chinner int subnex; /* # of bmapi's can do */ 55168988114SDave Chinner int nmap; /* number of map entries */ 55268988114SDave Chinner struct getbmapx *out; /* output structure */ 55368988114SDave Chinner int whichfork; /* data or attr fork */ 55468988114SDave Chinner int prealloced; /* this is a file with 55568988114SDave Chinner * preallocated data space */ 55668988114SDave Chinner int iflags; /* interface flags */ 55768988114SDave Chinner int bmapi_flags; /* flags for xfs_bmapi */ 55868988114SDave Chinner int cur_ext = 0; 55968988114SDave Chinner 56068988114SDave Chinner mp = ip->i_mount; 56168988114SDave Chinner iflags = bmv->bmv_iflags; 56268988114SDave Chinner whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; 56368988114SDave Chinner 56468988114SDave Chinner if (whichfork == XFS_ATTR_FORK) { 56568988114SDave Chinner if (XFS_IFORK_Q(ip)) { 56668988114SDave Chinner if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 56768988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && 56868988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) 56968988114SDave Chinner return XFS_ERROR(EINVAL); 57068988114SDave Chinner } else if (unlikely( 57168988114SDave Chinner ip->i_d.di_aformat != 0 && 57268988114SDave Chinner ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { 57368988114SDave Chinner XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, 57468988114SDave Chinner ip->i_mount); 57568988114SDave Chinner return XFS_ERROR(EFSCORRUPTED); 57668988114SDave Chinner } 57768988114SDave Chinner 57868988114SDave Chinner prealloced = 0; 57968988114SDave Chinner fixlen = 1LL << 32; 58068988114SDave Chinner } else { 58168988114SDave Chinner if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 58268988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 58368988114SDave Chinner ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 58468988114SDave Chinner return XFS_ERROR(EINVAL); 58568988114SDave Chinner 58668988114SDave Chinner if (xfs_get_extsz_hint(ip) || 58768988114SDave Chinner ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 58868988114SDave Chinner prealloced = 1; 58968988114SDave Chinner fixlen = mp->m_super->s_maxbytes; 59068988114SDave Chinner } else { 59168988114SDave Chinner prealloced = 0; 59268988114SDave Chinner fixlen = XFS_ISIZE(ip); 59368988114SDave Chinner } 59468988114SDave Chinner } 59568988114SDave Chinner 59668988114SDave Chinner if (bmv->bmv_length == -1) { 59768988114SDave Chinner fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); 59868988114SDave Chinner bmv->bmv_length = 59968988114SDave Chinner max_t(__int64_t, fixlen - bmv->bmv_offset, 0); 60068988114SDave Chinner } else if (bmv->bmv_length == 0) { 60168988114SDave Chinner bmv->bmv_entries = 0; 60268988114SDave Chinner return 0; 60368988114SDave Chinner } else if (bmv->bmv_length < 0) { 60468988114SDave Chinner return XFS_ERROR(EINVAL); 60568988114SDave Chinner } 60668988114SDave Chinner 60768988114SDave Chinner nex = bmv->bmv_count - 1; 60868988114SDave Chinner if (nex <= 0) 60968988114SDave Chinner return XFS_ERROR(EINVAL); 61068988114SDave Chinner bmvend = bmv->bmv_offset + bmv->bmv_length; 61168988114SDave Chinner 61268988114SDave Chinner 61368988114SDave Chinner if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) 61468988114SDave Chinner return XFS_ERROR(ENOMEM); 61568988114SDave Chinner out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); 61668988114SDave Chinner if (!out) { 61768988114SDave Chinner out = kmem_zalloc_large(bmv->bmv_count * 61868988114SDave Chinner sizeof(struct getbmapx)); 61968988114SDave Chinner if (!out) 62068988114SDave Chinner return XFS_ERROR(ENOMEM); 62168988114SDave Chinner } 62268988114SDave Chinner 62368988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED); 62468988114SDave Chinner if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { 62568988114SDave Chinner if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { 62668988114SDave Chinner error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); 62768988114SDave Chinner if (error) 62868988114SDave Chinner goto out_unlock_iolock; 62968988114SDave Chinner } 63068988114SDave Chinner /* 63168988114SDave Chinner * even after flushing the inode, there can still be delalloc 63268988114SDave Chinner * blocks on the inode beyond EOF due to speculative 63368988114SDave Chinner * preallocation. These are not removed until the release 63468988114SDave Chinner * function is called or the inode is inactivated. Hence we 63568988114SDave Chinner * cannot assert here that ip->i_delayed_blks == 0. 63668988114SDave Chinner */ 63768988114SDave Chinner } 63868988114SDave Chinner 63968988114SDave Chinner lock = xfs_ilock_map_shared(ip); 64068988114SDave Chinner 64168988114SDave Chinner /* 64268988114SDave Chinner * Don't let nex be bigger than the number of extents 64368988114SDave Chinner * we can have assuming alternating holes and real extents. 64468988114SDave Chinner */ 64568988114SDave Chinner if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) 64668988114SDave Chinner nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 64768988114SDave Chinner 64868988114SDave Chinner bmapi_flags = xfs_bmapi_aflag(whichfork); 64968988114SDave Chinner if (!(iflags & BMV_IF_PREALLOC)) 65068988114SDave Chinner bmapi_flags |= XFS_BMAPI_IGSTATE; 65168988114SDave Chinner 65268988114SDave Chinner /* 65368988114SDave Chinner * Allocate enough space to handle "subnex" maps at a time. 65468988114SDave Chinner */ 65568988114SDave Chinner error = ENOMEM; 65668988114SDave Chinner subnex = 16; 65768988114SDave Chinner map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); 65868988114SDave Chinner if (!map) 65968988114SDave Chinner goto out_unlock_ilock; 66068988114SDave Chinner 66168988114SDave Chinner bmv->bmv_entries = 0; 66268988114SDave Chinner 66368988114SDave Chinner if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && 66468988114SDave Chinner (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { 66568988114SDave Chinner error = 0; 66668988114SDave Chinner goto out_free_map; 66768988114SDave Chinner } 66868988114SDave Chinner 66968988114SDave Chinner nexleft = nex; 67068988114SDave Chinner 67168988114SDave Chinner do { 67268988114SDave Chinner nmap = (nexleft > subnex) ? subnex : nexleft; 67368988114SDave Chinner error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 67468988114SDave Chinner XFS_BB_TO_FSB(mp, bmv->bmv_length), 67568988114SDave Chinner map, &nmap, bmapi_flags); 67668988114SDave Chinner if (error) 67768988114SDave Chinner goto out_free_map; 67868988114SDave Chinner ASSERT(nmap <= subnex); 67968988114SDave Chinner 68068988114SDave Chinner for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { 68168988114SDave Chinner out[cur_ext].bmv_oflags = 0; 68268988114SDave Chinner if (map[i].br_state == XFS_EXT_UNWRITTEN) 68368988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; 68468988114SDave Chinner else if (map[i].br_startblock == DELAYSTARTBLOCK) 68568988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; 68668988114SDave Chinner out[cur_ext].bmv_offset = 68768988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_startoff); 68868988114SDave Chinner out[cur_ext].bmv_length = 68968988114SDave Chinner XFS_FSB_TO_BB(mp, map[i].br_blockcount); 69068988114SDave Chinner out[cur_ext].bmv_unused1 = 0; 69168988114SDave Chinner out[cur_ext].bmv_unused2 = 0; 69268988114SDave Chinner 69368988114SDave Chinner /* 69468988114SDave Chinner * delayed allocation extents that start beyond EOF can 69568988114SDave Chinner * occur due to speculative EOF allocation when the 69668988114SDave Chinner * delalloc extent is larger than the largest freespace 69768988114SDave Chinner * extent at conversion time. These extents cannot be 69868988114SDave Chinner * converted by data writeback, so can exist here even 69968988114SDave Chinner * if we are not supposed to be finding delalloc 70068988114SDave Chinner * extents. 70168988114SDave Chinner */ 70268988114SDave Chinner if (map[i].br_startblock == DELAYSTARTBLOCK && 70368988114SDave Chinner map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) 70468988114SDave Chinner ASSERT((iflags & BMV_IF_DELALLOC) != 0); 70568988114SDave Chinner 70668988114SDave Chinner if (map[i].br_startblock == HOLESTARTBLOCK && 70768988114SDave Chinner whichfork == XFS_ATTR_FORK) { 70868988114SDave Chinner /* came to the end of attribute fork */ 70968988114SDave Chinner out[cur_ext].bmv_oflags |= BMV_OF_LAST; 71068988114SDave Chinner goto out_free_map; 71168988114SDave Chinner } 71268988114SDave Chinner 71368988114SDave Chinner if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], 71468988114SDave Chinner prealloced, bmvend, 71568988114SDave Chinner map[i].br_startblock)) 71668988114SDave Chinner goto out_free_map; 71768988114SDave Chinner 71868988114SDave Chinner bmv->bmv_offset = 71968988114SDave Chinner out[cur_ext].bmv_offset + 72068988114SDave Chinner out[cur_ext].bmv_length; 72168988114SDave Chinner bmv->bmv_length = 72268988114SDave Chinner max_t(__int64_t, 0, bmvend - bmv->bmv_offset); 72368988114SDave Chinner 72468988114SDave Chinner /* 72568988114SDave Chinner * In case we don't want to return the hole, 72668988114SDave Chinner * don't increase cur_ext so that we can reuse 72768988114SDave Chinner * it in the next loop. 72868988114SDave Chinner */ 72968988114SDave Chinner if ((iflags & BMV_IF_NO_HOLES) && 73068988114SDave Chinner map[i].br_startblock == HOLESTARTBLOCK) { 73168988114SDave Chinner memset(&out[cur_ext], 0, sizeof(out[cur_ext])); 73268988114SDave Chinner continue; 73368988114SDave Chinner } 73468988114SDave Chinner 73568988114SDave Chinner nexleft--; 73668988114SDave Chinner bmv->bmv_entries++; 73768988114SDave Chinner cur_ext++; 73868988114SDave Chinner } 73968988114SDave Chinner } while (nmap && nexleft && bmv->bmv_length); 74068988114SDave Chinner 74168988114SDave Chinner out_free_map: 74268988114SDave Chinner kmem_free(map); 74368988114SDave Chinner out_unlock_ilock: 74468988114SDave Chinner xfs_iunlock_map_shared(ip, lock); 74568988114SDave Chinner out_unlock_iolock: 74668988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED); 74768988114SDave Chinner 74868988114SDave Chinner for (i = 0; i < cur_ext; i++) { 74968988114SDave Chinner int full = 0; /* user array is full */ 75068988114SDave Chinner 75168988114SDave Chinner /* format results & advance arg */ 75268988114SDave Chinner error = formatter(&arg, &out[i], &full); 75368988114SDave Chinner if (error || full) 75468988114SDave Chinner break; 75568988114SDave Chinner } 75668988114SDave Chinner 75768988114SDave Chinner if (is_vmalloc_addr(out)) 75868988114SDave Chinner kmem_free_large(out); 75968988114SDave Chinner else 76068988114SDave Chinner kmem_free(out); 76168988114SDave Chinner return error; 76268988114SDave Chinner } 76368988114SDave Chinner 76468988114SDave Chinner /* 76568988114SDave Chinner * dead simple method of punching delalyed allocation blocks from a range in 76668988114SDave Chinner * the inode. Walks a block at a time so will be slow, but is only executed in 767*ad4809bfSZhi Yong Wu * rare error cases so the overhead is not critical. This will always punch out 76868988114SDave Chinner * both the start and end blocks, even if the ranges only partially overlap 76968988114SDave Chinner * them, so it is up to the caller to ensure that partial blocks are not 77068988114SDave Chinner * passed in. 77168988114SDave Chinner */ 77268988114SDave Chinner int 77368988114SDave Chinner xfs_bmap_punch_delalloc_range( 77468988114SDave Chinner struct xfs_inode *ip, 77568988114SDave Chinner xfs_fileoff_t start_fsb, 77668988114SDave Chinner xfs_fileoff_t length) 77768988114SDave Chinner { 77868988114SDave Chinner xfs_fileoff_t remaining = length; 77968988114SDave Chinner int error = 0; 78068988114SDave Chinner 78168988114SDave Chinner ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 78268988114SDave Chinner 78368988114SDave Chinner do { 78468988114SDave Chinner int done; 78568988114SDave Chinner xfs_bmbt_irec_t imap; 78668988114SDave Chinner int nimaps = 1; 78768988114SDave Chinner xfs_fsblock_t firstblock; 78868988114SDave Chinner xfs_bmap_free_t flist; 78968988114SDave Chinner 79068988114SDave Chinner /* 79168988114SDave Chinner * Map the range first and check that it is a delalloc extent 79268988114SDave Chinner * before trying to unmap the range. Otherwise we will be 79368988114SDave Chinner * trying to remove a real extent (which requires a 79468988114SDave Chinner * transaction) or a hole, which is probably a bad idea... 79568988114SDave Chinner */ 79668988114SDave Chinner error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, 79768988114SDave Chinner XFS_BMAPI_ENTIRE); 79868988114SDave Chinner 79968988114SDave Chinner if (error) { 80068988114SDave Chinner /* something screwed, just bail */ 80168988114SDave Chinner if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 80268988114SDave Chinner xfs_alert(ip->i_mount, 80368988114SDave Chinner "Failed delalloc mapping lookup ino %lld fsb %lld.", 80468988114SDave Chinner ip->i_ino, start_fsb); 80568988114SDave Chinner } 80668988114SDave Chinner break; 80768988114SDave Chinner } 80868988114SDave Chinner if (!nimaps) { 80968988114SDave Chinner /* nothing there */ 81068988114SDave Chinner goto next_block; 81168988114SDave Chinner } 81268988114SDave Chinner if (imap.br_startblock != DELAYSTARTBLOCK) { 81368988114SDave Chinner /* been converted, ignore */ 81468988114SDave Chinner goto next_block; 81568988114SDave Chinner } 81668988114SDave Chinner WARN_ON(imap.br_blockcount == 0); 81768988114SDave Chinner 81868988114SDave Chinner /* 81968988114SDave Chinner * Note: while we initialise the firstblock/flist pair, they 82068988114SDave Chinner * should never be used because blocks should never be 82168988114SDave Chinner * allocated or freed for a delalloc extent and hence we need 82268988114SDave Chinner * don't cancel or finish them after the xfs_bunmapi() call. 82368988114SDave Chinner */ 82468988114SDave Chinner xfs_bmap_init(&flist, &firstblock); 82568988114SDave Chinner error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 82668988114SDave Chinner &flist, &done); 82768988114SDave Chinner if (error) 82868988114SDave Chinner break; 82968988114SDave Chinner 83068988114SDave Chinner ASSERT(!flist.xbf_count && !flist.xbf_first); 83168988114SDave Chinner next_block: 83268988114SDave Chinner start_fsb++; 83368988114SDave Chinner remaining--; 83468988114SDave Chinner } while(remaining > 0); 83568988114SDave Chinner 83668988114SDave Chinner return error; 83768988114SDave Chinner } 838c24b5dfaSDave Chinner 839c24b5dfaSDave Chinner /* 840c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF 841c24b5dfaSDave Chinner * blocks. The 'force' parameter determines whether we should also consider 842c24b5dfaSDave Chinner * regular files that are marked preallocated or append-only. 843c24b5dfaSDave Chinner */ 844c24b5dfaSDave Chinner bool 845c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) 846c24b5dfaSDave Chinner { 847c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */ 848c24b5dfaSDave Chinner if (!S_ISREG(ip->i_d.di_mode)) 849c24b5dfaSDave Chinner return false; 850c24b5dfaSDave Chinner 851c24b5dfaSDave Chinner /* 852c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not 853c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove. 854c24b5dfaSDave Chinner */ 855c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 && 856c24b5dfaSDave Chinner VN_CACHED(VFS_I(ip)) == 0 && 857c24b5dfaSDave Chinner ip->i_delayed_blks == 0) 858c24b5dfaSDave Chinner return false; 859c24b5dfaSDave Chinner 860c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */ 861c24b5dfaSDave Chinner if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) 862c24b5dfaSDave Chinner return false; 863c24b5dfaSDave Chinner 864c24b5dfaSDave Chinner /* 865c24b5dfaSDave Chinner * Do not free real preallocated or append-only files unless the file 866c24b5dfaSDave Chinner * has delalloc blocks and we are forced to remove them. 867c24b5dfaSDave Chinner */ 868c24b5dfaSDave Chinner if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 869c24b5dfaSDave Chinner if (!force || ip->i_delayed_blks == 0) 870c24b5dfaSDave Chinner return false; 871c24b5dfaSDave Chinner 872c24b5dfaSDave Chinner return true; 873c24b5dfaSDave Chinner } 874c24b5dfaSDave Chinner 875c24b5dfaSDave Chinner /* 876c24b5dfaSDave Chinner * This is called by xfs_inactive to free any blocks beyond eof 877c24b5dfaSDave Chinner * when the link count isn't zero and by xfs_dm_punch_hole() when 878c24b5dfaSDave Chinner * punching a hole to EOF. 879c24b5dfaSDave Chinner */ 880c24b5dfaSDave Chinner int 881c24b5dfaSDave Chinner xfs_free_eofblocks( 882c24b5dfaSDave Chinner xfs_mount_t *mp, 883c24b5dfaSDave Chinner xfs_inode_t *ip, 884c24b5dfaSDave Chinner bool need_iolock) 885c24b5dfaSDave Chinner { 886c24b5dfaSDave Chinner xfs_trans_t *tp; 887c24b5dfaSDave Chinner int error; 888c24b5dfaSDave Chinner xfs_fileoff_t end_fsb; 889c24b5dfaSDave Chinner xfs_fileoff_t last_fsb; 890c24b5dfaSDave Chinner xfs_filblks_t map_len; 891c24b5dfaSDave Chinner int nimaps; 892c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 893c24b5dfaSDave Chinner 894c24b5dfaSDave Chinner /* 895c24b5dfaSDave Chinner * Figure out if there are any blocks beyond the end 896c24b5dfaSDave Chinner * of the file. If not, then there is nothing to do. 897c24b5dfaSDave Chinner */ 898c24b5dfaSDave Chinner end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 899c24b5dfaSDave Chinner last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 900c24b5dfaSDave Chinner if (last_fsb <= end_fsb) 901c24b5dfaSDave Chinner return 0; 902c24b5dfaSDave Chinner map_len = last_fsb - end_fsb; 903c24b5dfaSDave Chinner 904c24b5dfaSDave Chinner nimaps = 1; 905c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_SHARED); 906c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 907c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 908c24b5dfaSDave Chinner 909c24b5dfaSDave Chinner if (!error && (nimaps != 0) && 910c24b5dfaSDave Chinner (imap.br_startblock != HOLESTARTBLOCK || 911c24b5dfaSDave Chinner ip->i_delayed_blks)) { 912c24b5dfaSDave Chinner /* 913c24b5dfaSDave Chinner * Attach the dquots to the inode up front. 914c24b5dfaSDave Chinner */ 915c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 916c24b5dfaSDave Chinner if (error) 917c24b5dfaSDave Chinner return error; 918c24b5dfaSDave Chinner 919c24b5dfaSDave Chinner /* 920c24b5dfaSDave Chinner * There are blocks after the end of file. 921c24b5dfaSDave Chinner * Free them up now by truncating the file to 922c24b5dfaSDave Chinner * its current size. 923c24b5dfaSDave Chinner */ 924c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 925c24b5dfaSDave Chinner 926c24b5dfaSDave Chinner if (need_iolock) { 927c24b5dfaSDave Chinner if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 928c24b5dfaSDave Chinner xfs_trans_cancel(tp, 0); 929c24b5dfaSDave Chinner return EAGAIN; 930c24b5dfaSDave Chinner } 931c24b5dfaSDave Chinner } 932c24b5dfaSDave Chinner 9333d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 934c24b5dfaSDave Chinner if (error) { 935c24b5dfaSDave Chinner ASSERT(XFS_FORCED_SHUTDOWN(mp)); 936c24b5dfaSDave Chinner xfs_trans_cancel(tp, 0); 937c24b5dfaSDave Chinner if (need_iolock) 938c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 939c24b5dfaSDave Chinner return error; 940c24b5dfaSDave Chinner } 941c24b5dfaSDave Chinner 942c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 943c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 944c24b5dfaSDave Chinner 945c24b5dfaSDave Chinner /* 946c24b5dfaSDave Chinner * Do not update the on-disk file size. If we update the 947c24b5dfaSDave Chinner * on-disk file size and then the system crashes before the 948c24b5dfaSDave Chinner * contents of the file are flushed to disk then the files 949c24b5dfaSDave Chinner * may be full of holes (ie NULL files bug). 950c24b5dfaSDave Chinner */ 951c24b5dfaSDave Chinner error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 952c24b5dfaSDave Chinner XFS_ISIZE(ip)); 953c24b5dfaSDave Chinner if (error) { 954c24b5dfaSDave Chinner /* 955c24b5dfaSDave Chinner * If we get an error at this point we simply don't 956c24b5dfaSDave Chinner * bother truncating the file. 957c24b5dfaSDave Chinner */ 958c24b5dfaSDave Chinner xfs_trans_cancel(tp, 959c24b5dfaSDave Chinner (XFS_TRANS_RELEASE_LOG_RES | 960c24b5dfaSDave Chinner XFS_TRANS_ABORT)); 961c24b5dfaSDave Chinner } else { 962c24b5dfaSDave Chinner error = xfs_trans_commit(tp, 963c24b5dfaSDave Chinner XFS_TRANS_RELEASE_LOG_RES); 964c24b5dfaSDave Chinner if (!error) 965c24b5dfaSDave Chinner xfs_inode_clear_eofblocks_tag(ip); 966c24b5dfaSDave Chinner } 967c24b5dfaSDave Chinner 968c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 969c24b5dfaSDave Chinner if (need_iolock) 970c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 971c24b5dfaSDave Chinner } 972c24b5dfaSDave Chinner return error; 973c24b5dfaSDave Chinner } 974c24b5dfaSDave Chinner 975c24b5dfaSDave Chinner /* 976c24b5dfaSDave Chinner * xfs_alloc_file_space() 977c24b5dfaSDave Chinner * This routine allocates disk space for the given file. 978c24b5dfaSDave Chinner * 979c24b5dfaSDave Chinner * If alloc_type == 0, this request is for an ALLOCSP type 980c24b5dfaSDave Chinner * request which will change the file size. In this case, no 981c24b5dfaSDave Chinner * DMAPI event will be generated by the call. A TRUNCATE event 982c24b5dfaSDave Chinner * will be generated later by xfs_setattr. 983c24b5dfaSDave Chinner * 984c24b5dfaSDave Chinner * If alloc_type != 0, this request is for a RESVSP type 985c24b5dfaSDave Chinner * request, and a DMAPI DM_EVENT_WRITE will be generated if the 986c24b5dfaSDave Chinner * lower block boundary byte address is less than the file's 987c24b5dfaSDave Chinner * length. 988c24b5dfaSDave Chinner * 989c24b5dfaSDave Chinner * RETURNS: 990c24b5dfaSDave Chinner * 0 on success 991c24b5dfaSDave Chinner * errno on error 992c24b5dfaSDave Chinner * 993c24b5dfaSDave Chinner */ 994c24b5dfaSDave Chinner STATIC int 995c24b5dfaSDave Chinner xfs_alloc_file_space( 996c24b5dfaSDave Chinner xfs_inode_t *ip, 997c24b5dfaSDave Chinner xfs_off_t offset, 998c24b5dfaSDave Chinner xfs_off_t len, 999c24b5dfaSDave Chinner int alloc_type, 1000c24b5dfaSDave Chinner int attr_flags) 1001c24b5dfaSDave Chinner { 1002c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 1003c24b5dfaSDave Chinner xfs_off_t count; 1004c24b5dfaSDave Chinner xfs_filblks_t allocated_fsb; 1005c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb; 1006c24b5dfaSDave Chinner xfs_extlen_t extsz, temp; 1007c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 1008c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 1009c24b5dfaSDave Chinner int nimaps; 1010c24b5dfaSDave Chinner int quota_flag; 1011c24b5dfaSDave Chinner int rt; 1012c24b5dfaSDave Chinner xfs_trans_t *tp; 1013c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp; 1014c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1015c24b5dfaSDave Chinner uint qblocks, resblks, resrtextents; 1016c24b5dfaSDave Chinner int committed; 1017c24b5dfaSDave Chinner int error; 1018c24b5dfaSDave Chinner 1019c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip); 1020c24b5dfaSDave Chinner 1021c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 1022c24b5dfaSDave Chinner return XFS_ERROR(EIO); 1023c24b5dfaSDave Chinner 1024c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1025c24b5dfaSDave Chinner if (error) 1026c24b5dfaSDave Chinner return error; 1027c24b5dfaSDave Chinner 1028c24b5dfaSDave Chinner if (len <= 0) 1029c24b5dfaSDave Chinner return XFS_ERROR(EINVAL); 1030c24b5dfaSDave Chinner 1031c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 1032c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip); 1033c24b5dfaSDave Chinner 1034c24b5dfaSDave Chinner count = len; 1035c24b5dfaSDave Chinner imapp = &imaps[0]; 1036c24b5dfaSDave Chinner nimaps = 1; 1037c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 1038c24b5dfaSDave Chinner allocatesize_fsb = XFS_B_TO_FSB(mp, count); 1039c24b5dfaSDave Chinner 1040c24b5dfaSDave Chinner /* 1041c24b5dfaSDave Chinner * Allocate file space until done or until there is an error 1042c24b5dfaSDave Chinner */ 1043c24b5dfaSDave Chinner while (allocatesize_fsb && !error) { 1044c24b5dfaSDave Chinner xfs_fileoff_t s, e; 1045c24b5dfaSDave Chinner 1046c24b5dfaSDave Chinner /* 1047c24b5dfaSDave Chinner * Determine space reservations for data/realtime. 1048c24b5dfaSDave Chinner */ 1049c24b5dfaSDave Chinner if (unlikely(extsz)) { 1050c24b5dfaSDave Chinner s = startoffset_fsb; 1051c24b5dfaSDave Chinner do_div(s, extsz); 1052c24b5dfaSDave Chinner s *= extsz; 1053c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb; 1054c24b5dfaSDave Chinner if ((temp = do_mod(startoffset_fsb, extsz))) 1055c24b5dfaSDave Chinner e += temp; 1056c24b5dfaSDave Chinner if ((temp = do_mod(e, extsz))) 1057c24b5dfaSDave Chinner e += extsz - temp; 1058c24b5dfaSDave Chinner } else { 1059c24b5dfaSDave Chinner s = 0; 1060c24b5dfaSDave Chinner e = allocatesize_fsb; 1061c24b5dfaSDave Chinner } 1062c24b5dfaSDave Chinner 1063c24b5dfaSDave Chinner /* 1064c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block 1065c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are 1066c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate 1067c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk 1068c24b5dfaSDave Chinner * to MAXEXTLEN (21 bits), so use that to enforce the limit. 1069c24b5dfaSDave Chinner */ 1070c24b5dfaSDave Chinner resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 1071c24b5dfaSDave Chinner if (unlikely(rt)) { 1072c24b5dfaSDave Chinner resrtextents = qblocks = resblks; 1073c24b5dfaSDave Chinner resrtextents /= mp->m_sb.sb_rextsize; 1074c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1075c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_RTBLKS; 1076c24b5dfaSDave Chinner } else { 1077c24b5dfaSDave Chinner resrtextents = 0; 1078c24b5dfaSDave Chinner resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 1079c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_REGBLKS; 1080c24b5dfaSDave Chinner } 1081c24b5dfaSDave Chinner 1082c24b5dfaSDave Chinner /* 1083c24b5dfaSDave Chinner * Allocate and setup the transaction. 1084c24b5dfaSDave Chinner */ 1085c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 10863d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 10873d3c8b52SJie Liu resblks, resrtextents); 1088c24b5dfaSDave Chinner /* 1089c24b5dfaSDave Chinner * Check for running out of space 1090c24b5dfaSDave Chinner */ 1091c24b5dfaSDave Chinner if (error) { 1092c24b5dfaSDave Chinner /* 1093c24b5dfaSDave Chinner * Free the transaction structure. 1094c24b5dfaSDave Chinner */ 1095c24b5dfaSDave Chinner ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1096c24b5dfaSDave Chinner xfs_trans_cancel(tp, 0); 1097c24b5dfaSDave Chinner break; 1098c24b5dfaSDave Chinner } 1099c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 1100c24b5dfaSDave Chinner error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 1101c24b5dfaSDave Chinner 0, quota_flag); 1102c24b5dfaSDave Chinner if (error) 1103c24b5dfaSDave Chinner goto error1; 1104c24b5dfaSDave Chinner 1105c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 1106c24b5dfaSDave Chinner 1107c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &firstfsb); 1108c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1109c24b5dfaSDave Chinner allocatesize_fsb, alloc_type, &firstfsb, 1110c24b5dfaSDave Chinner 0, imapp, &nimaps, &free_list); 1111c24b5dfaSDave Chinner if (error) { 1112c24b5dfaSDave Chinner goto error0; 1113c24b5dfaSDave Chinner } 1114c24b5dfaSDave Chinner 1115c24b5dfaSDave Chinner /* 1116c24b5dfaSDave Chinner * Complete the transaction 1117c24b5dfaSDave Chinner */ 1118c24b5dfaSDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 1119c24b5dfaSDave Chinner if (error) { 1120c24b5dfaSDave Chinner goto error0; 1121c24b5dfaSDave Chinner } 1122c24b5dfaSDave Chinner 1123c24b5dfaSDave Chinner error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1124c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1125c24b5dfaSDave Chinner if (error) { 1126c24b5dfaSDave Chinner break; 1127c24b5dfaSDave Chinner } 1128c24b5dfaSDave Chinner 1129c24b5dfaSDave Chinner allocated_fsb = imapp->br_blockcount; 1130c24b5dfaSDave Chinner 1131c24b5dfaSDave Chinner if (nimaps == 0) { 1132c24b5dfaSDave Chinner error = XFS_ERROR(ENOSPC); 1133c24b5dfaSDave Chinner break; 1134c24b5dfaSDave Chinner } 1135c24b5dfaSDave Chinner 1136c24b5dfaSDave Chinner startoffset_fsb += allocated_fsb; 1137c24b5dfaSDave Chinner allocatesize_fsb -= allocated_fsb; 1138c24b5dfaSDave Chinner } 1139c24b5dfaSDave Chinner 1140c24b5dfaSDave Chinner return error; 1141c24b5dfaSDave Chinner 1142c24b5dfaSDave Chinner error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ 1143c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1144c24b5dfaSDave Chinner xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1145c24b5dfaSDave Chinner 1146c24b5dfaSDave Chinner error1: /* Just cancel transaction */ 1147c24b5dfaSDave Chinner xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1148c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1149c24b5dfaSDave Chinner return error; 1150c24b5dfaSDave Chinner } 1151c24b5dfaSDave Chinner 1152c24b5dfaSDave Chinner /* 1153c24b5dfaSDave Chinner * Zero file bytes between startoff and endoff inclusive. 1154c24b5dfaSDave Chinner * The iolock is held exclusive and no blocks are buffered. 1155c24b5dfaSDave Chinner * 1156c24b5dfaSDave Chinner * This function is used by xfs_free_file_space() to zero 1157c24b5dfaSDave Chinner * partial blocks when the range to free is not block aligned. 1158c24b5dfaSDave Chinner * When unreserving space with boundaries that are not block 1159c24b5dfaSDave Chinner * aligned we round up the start and round down the end 1160c24b5dfaSDave Chinner * boundaries and then use this function to zero the parts of 1161c24b5dfaSDave Chinner * the blocks that got dropped during the rounding. 1162c24b5dfaSDave Chinner */ 1163c24b5dfaSDave Chinner STATIC int 1164c24b5dfaSDave Chinner xfs_zero_remaining_bytes( 1165c24b5dfaSDave Chinner xfs_inode_t *ip, 1166c24b5dfaSDave Chinner xfs_off_t startoff, 1167c24b5dfaSDave Chinner xfs_off_t endoff) 1168c24b5dfaSDave Chinner { 1169c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 1170c24b5dfaSDave Chinner xfs_fileoff_t offset_fsb; 1171c24b5dfaSDave Chinner xfs_off_t lastoffset; 1172c24b5dfaSDave Chinner xfs_off_t offset; 1173c24b5dfaSDave Chinner xfs_buf_t *bp; 1174c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 1175c24b5dfaSDave Chinner int nimap; 1176c24b5dfaSDave Chinner int error = 0; 1177c24b5dfaSDave Chinner 1178c24b5dfaSDave Chinner /* 1179c24b5dfaSDave Chinner * Avoid doing I/O beyond eof - it's not necessary 1180c24b5dfaSDave Chinner * since nothing can read beyond eof. The space will 1181c24b5dfaSDave Chinner * be zeroed when the file is extended anyway. 1182c24b5dfaSDave Chinner */ 1183c24b5dfaSDave Chinner if (startoff >= XFS_ISIZE(ip)) 1184c24b5dfaSDave Chinner return 0; 1185c24b5dfaSDave Chinner 1186c24b5dfaSDave Chinner if (endoff > XFS_ISIZE(ip)) 1187c24b5dfaSDave Chinner endoff = XFS_ISIZE(ip); 1188c24b5dfaSDave Chinner 1189c24b5dfaSDave Chinner bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? 1190c24b5dfaSDave Chinner mp->m_rtdev_targp : mp->m_ddev_targp, 1191c24b5dfaSDave Chinner BTOBB(mp->m_sb.sb_blocksize), 0); 1192c24b5dfaSDave Chinner if (!bp) 1193c24b5dfaSDave Chinner return XFS_ERROR(ENOMEM); 1194c24b5dfaSDave Chinner 1195c24b5dfaSDave Chinner xfs_buf_unlock(bp); 1196c24b5dfaSDave Chinner 1197c24b5dfaSDave Chinner for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1198c24b5dfaSDave Chinner offset_fsb = XFS_B_TO_FSBT(mp, offset); 1199c24b5dfaSDave Chinner nimap = 1; 1200c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); 1201c24b5dfaSDave Chinner if (error || nimap < 1) 1202c24b5dfaSDave Chinner break; 1203c24b5dfaSDave Chinner ASSERT(imap.br_blockcount >= 1); 1204c24b5dfaSDave Chinner ASSERT(imap.br_startoff == offset_fsb); 1205c24b5dfaSDave Chinner lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 1206c24b5dfaSDave Chinner if (lastoffset > endoff) 1207c24b5dfaSDave Chinner lastoffset = endoff; 1208c24b5dfaSDave Chinner if (imap.br_startblock == HOLESTARTBLOCK) 1209c24b5dfaSDave Chinner continue; 1210c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1211c24b5dfaSDave Chinner if (imap.br_state == XFS_EXT_UNWRITTEN) 1212c24b5dfaSDave Chinner continue; 1213c24b5dfaSDave Chinner XFS_BUF_UNDONE(bp); 1214c24b5dfaSDave Chinner XFS_BUF_UNWRITE(bp); 1215c24b5dfaSDave Chinner XFS_BUF_READ(bp); 1216c24b5dfaSDave Chinner XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); 1217c24b5dfaSDave Chinner xfsbdstrat(mp, bp); 1218c24b5dfaSDave Chinner error = xfs_buf_iowait(bp); 1219c24b5dfaSDave Chinner if (error) { 1220c24b5dfaSDave Chinner xfs_buf_ioerror_alert(bp, 1221c24b5dfaSDave Chinner "xfs_zero_remaining_bytes(read)"); 1222c24b5dfaSDave Chinner break; 1223c24b5dfaSDave Chinner } 1224c24b5dfaSDave Chinner memset(bp->b_addr + 1225c24b5dfaSDave Chinner (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 1226c24b5dfaSDave Chinner 0, lastoffset - offset + 1); 1227c24b5dfaSDave Chinner XFS_BUF_UNDONE(bp); 1228c24b5dfaSDave Chinner XFS_BUF_UNREAD(bp); 1229c24b5dfaSDave Chinner XFS_BUF_WRITE(bp); 1230c24b5dfaSDave Chinner xfsbdstrat(mp, bp); 1231c24b5dfaSDave Chinner error = xfs_buf_iowait(bp); 1232c24b5dfaSDave Chinner if (error) { 1233c24b5dfaSDave Chinner xfs_buf_ioerror_alert(bp, 1234c24b5dfaSDave Chinner "xfs_zero_remaining_bytes(write)"); 1235c24b5dfaSDave Chinner break; 1236c24b5dfaSDave Chinner } 1237c24b5dfaSDave Chinner } 1238c24b5dfaSDave Chinner xfs_buf_free(bp); 1239c24b5dfaSDave Chinner return error; 1240c24b5dfaSDave Chinner } 1241c24b5dfaSDave Chinner 1242c24b5dfaSDave Chinner /* 1243c24b5dfaSDave Chinner * xfs_free_file_space() 1244c24b5dfaSDave Chinner * This routine frees disk space for the given file. 1245c24b5dfaSDave Chinner * 1246c24b5dfaSDave Chinner * This routine is only called by xfs_change_file_space 1247c24b5dfaSDave Chinner * for an UNRESVSP type call. 1248c24b5dfaSDave Chinner * 1249c24b5dfaSDave Chinner * RETURNS: 1250c24b5dfaSDave Chinner * 0 on success 1251c24b5dfaSDave Chinner * errno on error 1252c24b5dfaSDave Chinner * 1253c24b5dfaSDave Chinner */ 1254c24b5dfaSDave Chinner STATIC int 1255c24b5dfaSDave Chinner xfs_free_file_space( 1256c24b5dfaSDave Chinner xfs_inode_t *ip, 1257c24b5dfaSDave Chinner xfs_off_t offset, 1258c24b5dfaSDave Chinner xfs_off_t len, 1259c24b5dfaSDave Chinner int attr_flags) 1260c24b5dfaSDave Chinner { 1261c24b5dfaSDave Chinner int committed; 1262c24b5dfaSDave Chinner int done; 1263c24b5dfaSDave Chinner xfs_fileoff_t endoffset_fsb; 1264c24b5dfaSDave Chinner int error; 1265c24b5dfaSDave Chinner xfs_fsblock_t firstfsb; 1266c24b5dfaSDave Chinner xfs_bmap_free_t free_list; 1267c24b5dfaSDave Chinner xfs_bmbt_irec_t imap; 1268c24b5dfaSDave Chinner xfs_off_t ioffset; 1269c24b5dfaSDave Chinner xfs_extlen_t mod=0; 1270c24b5dfaSDave Chinner xfs_mount_t *mp; 1271c24b5dfaSDave Chinner int nimap; 1272c24b5dfaSDave Chinner uint resblks; 1273c24b5dfaSDave Chinner xfs_off_t rounding; 1274c24b5dfaSDave Chinner int rt; 1275c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 1276c24b5dfaSDave Chinner xfs_trans_t *tp; 1277c24b5dfaSDave Chinner int need_iolock = 1; 1278c24b5dfaSDave Chinner 1279c24b5dfaSDave Chinner mp = ip->i_mount; 1280c24b5dfaSDave Chinner 1281c24b5dfaSDave Chinner trace_xfs_free_file_space(ip); 1282c24b5dfaSDave Chinner 1283c24b5dfaSDave Chinner error = xfs_qm_dqattach(ip, 0); 1284c24b5dfaSDave Chinner if (error) 1285c24b5dfaSDave Chinner return error; 1286c24b5dfaSDave Chinner 1287c24b5dfaSDave Chinner error = 0; 1288c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */ 1289c24b5dfaSDave Chinner return error; 1290c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 1291c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset); 1292c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 1293c24b5dfaSDave Chinner 1294c24b5dfaSDave Chinner if (attr_flags & XFS_ATTR_NOLOCK) 1295c24b5dfaSDave Chinner need_iolock = 0; 1296c24b5dfaSDave Chinner if (need_iolock) { 1297c24b5dfaSDave Chinner xfs_ilock(ip, XFS_IOLOCK_EXCL); 1298c24b5dfaSDave Chinner /* wait for the completion of any pending DIOs */ 1299c24b5dfaSDave Chinner inode_dio_wait(VFS_I(ip)); 1300c24b5dfaSDave Chinner } 1301c24b5dfaSDave Chinner 1302c24b5dfaSDave Chinner rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1303c24b5dfaSDave Chinner ioffset = offset & ~(rounding - 1); 1304c24b5dfaSDave Chinner error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1305c24b5dfaSDave Chinner ioffset, -1); 1306c24b5dfaSDave Chinner if (error) 1307c24b5dfaSDave Chinner goto out_unlock_iolock; 1308c24b5dfaSDave Chinner truncate_pagecache_range(VFS_I(ip), ioffset, -1); 1309c24b5dfaSDave Chinner 1310c24b5dfaSDave Chinner /* 1311c24b5dfaSDave Chinner * Need to zero the stuff we're not freeing, on disk. 1312c24b5dfaSDave Chinner * If it's a realtime file & can't use unwritten extents then we 1313c24b5dfaSDave Chinner * actually need to zero the extent edges. Otherwise xfs_bunmapi 1314c24b5dfaSDave Chinner * will take care of it for us. 1315c24b5dfaSDave Chinner */ 1316c24b5dfaSDave Chinner if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 1317c24b5dfaSDave Chinner nimap = 1; 1318c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, startoffset_fsb, 1, 1319c24b5dfaSDave Chinner &imap, &nimap, 0); 1320c24b5dfaSDave Chinner if (error) 1321c24b5dfaSDave Chinner goto out_unlock_iolock; 1322c24b5dfaSDave Chinner ASSERT(nimap == 0 || nimap == 1); 1323c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1324c24b5dfaSDave Chinner xfs_daddr_t block; 1325c24b5dfaSDave Chinner 1326c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1327c24b5dfaSDave Chinner block = imap.br_startblock; 1328c24b5dfaSDave Chinner mod = do_div(block, mp->m_sb.sb_rextsize); 1329c24b5dfaSDave Chinner if (mod) 1330c24b5dfaSDave Chinner startoffset_fsb += mp->m_sb.sb_rextsize - mod; 1331c24b5dfaSDave Chinner } 1332c24b5dfaSDave Chinner nimap = 1; 1333c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, 1334c24b5dfaSDave Chinner &imap, &nimap, 0); 1335c24b5dfaSDave Chinner if (error) 1336c24b5dfaSDave Chinner goto out_unlock_iolock; 1337c24b5dfaSDave Chinner ASSERT(nimap == 0 || nimap == 1); 1338c24b5dfaSDave Chinner if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1339c24b5dfaSDave Chinner ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1340c24b5dfaSDave Chinner mod++; 1341c24b5dfaSDave Chinner if (mod && (mod != mp->m_sb.sb_rextsize)) 1342c24b5dfaSDave Chinner endoffset_fsb -= mod; 1343c24b5dfaSDave Chinner } 1344c24b5dfaSDave Chinner } 1345c24b5dfaSDave Chinner if ((done = (endoffset_fsb <= startoffset_fsb))) 1346c24b5dfaSDave Chinner /* 1347c24b5dfaSDave Chinner * One contiguous piece to clear 1348c24b5dfaSDave Chinner */ 1349c24b5dfaSDave Chinner error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); 1350c24b5dfaSDave Chinner else { 1351c24b5dfaSDave Chinner /* 1352c24b5dfaSDave Chinner * Some full blocks, possibly two pieces to clear 1353c24b5dfaSDave Chinner */ 1354c24b5dfaSDave Chinner if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) 1355c24b5dfaSDave Chinner error = xfs_zero_remaining_bytes(ip, offset, 1356c24b5dfaSDave Chinner XFS_FSB_TO_B(mp, startoffset_fsb) - 1); 1357c24b5dfaSDave Chinner if (!error && 1358c24b5dfaSDave Chinner XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) 1359c24b5dfaSDave Chinner error = xfs_zero_remaining_bytes(ip, 1360c24b5dfaSDave Chinner XFS_FSB_TO_B(mp, endoffset_fsb), 1361c24b5dfaSDave Chinner offset + len - 1); 1362c24b5dfaSDave Chinner } 1363c24b5dfaSDave Chinner 1364c24b5dfaSDave Chinner /* 1365c24b5dfaSDave Chinner * free file space until done or until there is an error 1366c24b5dfaSDave Chinner */ 1367c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1368c24b5dfaSDave Chinner while (!error && !done) { 1369c24b5dfaSDave Chinner 1370c24b5dfaSDave Chinner /* 1371c24b5dfaSDave Chinner * allocate and setup the transaction. Allow this 1372c24b5dfaSDave Chinner * transaction to dip into the reserve blocks to ensure 1373c24b5dfaSDave Chinner * the freeing of the space succeeds at ENOSPC. 1374c24b5dfaSDave Chinner */ 1375c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1376c24b5dfaSDave Chinner tp->t_flags |= XFS_TRANS_RESERVE; 13773d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); 1378c24b5dfaSDave Chinner 1379c24b5dfaSDave Chinner /* 1380c24b5dfaSDave Chinner * check for running out of space 1381c24b5dfaSDave Chinner */ 1382c24b5dfaSDave Chinner if (error) { 1383c24b5dfaSDave Chinner /* 1384c24b5dfaSDave Chinner * Free the transaction structure. 1385c24b5dfaSDave Chinner */ 1386c24b5dfaSDave Chinner ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1387c24b5dfaSDave Chinner xfs_trans_cancel(tp, 0); 1388c24b5dfaSDave Chinner break; 1389c24b5dfaSDave Chinner } 1390c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 1391c24b5dfaSDave Chinner error = xfs_trans_reserve_quota(tp, mp, 1392c24b5dfaSDave Chinner ip->i_udquot, ip->i_gdquot, ip->i_pdquot, 1393c24b5dfaSDave Chinner resblks, 0, XFS_QMOPT_RES_REGBLKS); 1394c24b5dfaSDave Chinner if (error) 1395c24b5dfaSDave Chinner goto error1; 1396c24b5dfaSDave Chinner 1397c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 1398c24b5dfaSDave Chinner 1399c24b5dfaSDave Chinner /* 1400c24b5dfaSDave Chinner * issue the bunmapi() call to free the blocks 1401c24b5dfaSDave Chinner */ 1402c24b5dfaSDave Chinner xfs_bmap_init(&free_list, &firstfsb); 1403c24b5dfaSDave Chinner error = xfs_bunmapi(tp, ip, startoffset_fsb, 1404c24b5dfaSDave Chinner endoffset_fsb - startoffset_fsb, 1405c24b5dfaSDave Chinner 0, 2, &firstfsb, &free_list, &done); 1406c24b5dfaSDave Chinner if (error) { 1407c24b5dfaSDave Chinner goto error0; 1408c24b5dfaSDave Chinner } 1409c24b5dfaSDave Chinner 1410c24b5dfaSDave Chinner /* 1411c24b5dfaSDave Chinner * complete the transaction 1412c24b5dfaSDave Chinner */ 1413c24b5dfaSDave Chinner error = xfs_bmap_finish(&tp, &free_list, &committed); 1414c24b5dfaSDave Chinner if (error) { 1415c24b5dfaSDave Chinner goto error0; 1416c24b5dfaSDave Chinner } 1417c24b5dfaSDave Chinner 1418c24b5dfaSDave Chinner error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1419c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1420c24b5dfaSDave Chinner } 1421c24b5dfaSDave Chinner 1422c24b5dfaSDave Chinner out_unlock_iolock: 1423c24b5dfaSDave Chinner if (need_iolock) 1424c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1425c24b5dfaSDave Chinner return error; 1426c24b5dfaSDave Chinner 1427c24b5dfaSDave Chinner error0: 1428c24b5dfaSDave Chinner xfs_bmap_cancel(&free_list); 1429c24b5dfaSDave Chinner error1: 1430c24b5dfaSDave Chinner xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1431c24b5dfaSDave Chinner xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : 1432c24b5dfaSDave Chinner XFS_ILOCK_EXCL); 1433c24b5dfaSDave Chinner return error; 1434c24b5dfaSDave Chinner } 1435c24b5dfaSDave Chinner 1436c24b5dfaSDave Chinner 1437c24b5dfaSDave Chinner STATIC int 1438c24b5dfaSDave Chinner xfs_zero_file_space( 1439c24b5dfaSDave Chinner struct xfs_inode *ip, 1440c24b5dfaSDave Chinner xfs_off_t offset, 1441c24b5dfaSDave Chinner xfs_off_t len, 1442c24b5dfaSDave Chinner int attr_flags) 1443c24b5dfaSDave Chinner { 1444c24b5dfaSDave Chinner struct xfs_mount *mp = ip->i_mount; 1445c24b5dfaSDave Chinner uint granularity; 1446c24b5dfaSDave Chinner xfs_off_t start_boundary; 1447c24b5dfaSDave Chinner xfs_off_t end_boundary; 1448c24b5dfaSDave Chinner int error; 1449c24b5dfaSDave Chinner 1450c24b5dfaSDave Chinner granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1451c24b5dfaSDave Chinner 1452c24b5dfaSDave Chinner /* 1453c24b5dfaSDave Chinner * Round the range of extents we are going to convert inwards. If the 1454c24b5dfaSDave Chinner * offset is aligned, then it doesn't get changed so we zero from the 1455c24b5dfaSDave Chinner * start of the block offset points to. 1456c24b5dfaSDave Chinner */ 1457c24b5dfaSDave Chinner start_boundary = round_up(offset, granularity); 1458c24b5dfaSDave Chinner end_boundary = round_down(offset + len, granularity); 1459c24b5dfaSDave Chinner 1460c24b5dfaSDave Chinner ASSERT(start_boundary >= offset); 1461c24b5dfaSDave Chinner ASSERT(end_boundary <= offset + len); 1462c24b5dfaSDave Chinner 1463c24b5dfaSDave Chinner if (!(attr_flags & XFS_ATTR_NOLOCK)) 1464c24b5dfaSDave Chinner xfs_ilock(ip, XFS_IOLOCK_EXCL); 1465c24b5dfaSDave Chinner 1466c24b5dfaSDave Chinner if (start_boundary < end_boundary - 1) { 1467c24b5dfaSDave Chinner /* punch out the page cache over the conversion range */ 1468c24b5dfaSDave Chinner truncate_pagecache_range(VFS_I(ip), start_boundary, 1469c24b5dfaSDave Chinner end_boundary - 1); 1470c24b5dfaSDave Chinner /* convert the blocks */ 1471c24b5dfaSDave Chinner error = xfs_alloc_file_space(ip, start_boundary, 1472c24b5dfaSDave Chinner end_boundary - start_boundary - 1, 1473c24b5dfaSDave Chinner XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT, 1474c24b5dfaSDave Chinner attr_flags); 1475c24b5dfaSDave Chinner if (error) 1476c24b5dfaSDave Chinner goto out_unlock; 1477c24b5dfaSDave Chinner 1478c24b5dfaSDave Chinner /* We've handled the interior of the range, now for the edges */ 1479c24b5dfaSDave Chinner if (start_boundary != offset) 1480c24b5dfaSDave Chinner error = xfs_iozero(ip, offset, start_boundary - offset); 1481c24b5dfaSDave Chinner if (error) 1482c24b5dfaSDave Chinner goto out_unlock; 1483c24b5dfaSDave Chinner 1484c24b5dfaSDave Chinner if (end_boundary != offset + len) 1485c24b5dfaSDave Chinner error = xfs_iozero(ip, end_boundary, 1486c24b5dfaSDave Chinner offset + len - end_boundary); 1487c24b5dfaSDave Chinner 1488c24b5dfaSDave Chinner } else { 1489c24b5dfaSDave Chinner /* 1490c24b5dfaSDave Chinner * It's either a sub-granularity range or the range spanned lies 1491c24b5dfaSDave Chinner * partially across two adjacent blocks. 1492c24b5dfaSDave Chinner */ 1493c24b5dfaSDave Chinner error = xfs_iozero(ip, offset, len); 1494c24b5dfaSDave Chinner } 1495c24b5dfaSDave Chinner 1496c24b5dfaSDave Chinner out_unlock: 1497c24b5dfaSDave Chinner if (!(attr_flags & XFS_ATTR_NOLOCK)) 1498c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1499c24b5dfaSDave Chinner return error; 1500c24b5dfaSDave Chinner 1501c24b5dfaSDave Chinner } 1502c24b5dfaSDave Chinner 1503c24b5dfaSDave Chinner /* 1504c24b5dfaSDave Chinner * xfs_change_file_space() 1505c24b5dfaSDave Chinner * This routine allocates or frees disk space for the given file. 1506c24b5dfaSDave Chinner * The user specified parameters are checked for alignment and size 1507c24b5dfaSDave Chinner * limitations. 1508c24b5dfaSDave Chinner * 1509c24b5dfaSDave Chinner * RETURNS: 1510c24b5dfaSDave Chinner * 0 on success 1511c24b5dfaSDave Chinner * errno on error 1512c24b5dfaSDave Chinner * 1513c24b5dfaSDave Chinner */ 1514c24b5dfaSDave Chinner int 1515c24b5dfaSDave Chinner xfs_change_file_space( 1516c24b5dfaSDave Chinner xfs_inode_t *ip, 1517c24b5dfaSDave Chinner int cmd, 1518c24b5dfaSDave Chinner xfs_flock64_t *bf, 1519c24b5dfaSDave Chinner xfs_off_t offset, 1520c24b5dfaSDave Chinner int attr_flags) 1521c24b5dfaSDave Chinner { 1522c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 1523c24b5dfaSDave Chinner int clrprealloc; 1524c24b5dfaSDave Chinner int error; 1525c24b5dfaSDave Chinner xfs_fsize_t fsize; 1526c24b5dfaSDave Chinner int setprealloc; 1527c24b5dfaSDave Chinner xfs_off_t startoffset; 1528c24b5dfaSDave Chinner xfs_trans_t *tp; 1529c24b5dfaSDave Chinner struct iattr iattr; 1530c24b5dfaSDave Chinner 1531c24b5dfaSDave Chinner if (!S_ISREG(ip->i_d.di_mode)) 1532c24b5dfaSDave Chinner return XFS_ERROR(EINVAL); 1533c24b5dfaSDave Chinner 1534c24b5dfaSDave Chinner switch (bf->l_whence) { 1535c24b5dfaSDave Chinner case 0: /*SEEK_SET*/ 1536c24b5dfaSDave Chinner break; 1537c24b5dfaSDave Chinner case 1: /*SEEK_CUR*/ 1538c24b5dfaSDave Chinner bf->l_start += offset; 1539c24b5dfaSDave Chinner break; 1540c24b5dfaSDave Chinner case 2: /*SEEK_END*/ 1541c24b5dfaSDave Chinner bf->l_start += XFS_ISIZE(ip); 1542c24b5dfaSDave Chinner break; 1543c24b5dfaSDave Chinner default: 1544c24b5dfaSDave Chinner return XFS_ERROR(EINVAL); 1545c24b5dfaSDave Chinner } 1546c24b5dfaSDave Chinner 1547c24b5dfaSDave Chinner /* 1548c24b5dfaSDave Chinner * length of <= 0 for resv/unresv/zero is invalid. length for 1549c24b5dfaSDave Chinner * alloc/free is ignored completely and we have no idea what userspace 1550c24b5dfaSDave Chinner * might have set it to, so set it to zero to allow range 1551c24b5dfaSDave Chinner * checks to pass. 1552c24b5dfaSDave Chinner */ 1553c24b5dfaSDave Chinner switch (cmd) { 1554c24b5dfaSDave Chinner case XFS_IOC_ZERO_RANGE: 1555c24b5dfaSDave Chinner case XFS_IOC_RESVSP: 1556c24b5dfaSDave Chinner case XFS_IOC_RESVSP64: 1557c24b5dfaSDave Chinner case XFS_IOC_UNRESVSP: 1558c24b5dfaSDave Chinner case XFS_IOC_UNRESVSP64: 1559c24b5dfaSDave Chinner if (bf->l_len <= 0) 1560c24b5dfaSDave Chinner return XFS_ERROR(EINVAL); 1561c24b5dfaSDave Chinner break; 1562c24b5dfaSDave Chinner default: 1563c24b5dfaSDave Chinner bf->l_len = 0; 1564c24b5dfaSDave Chinner break; 1565c24b5dfaSDave Chinner } 1566c24b5dfaSDave Chinner 1567c24b5dfaSDave Chinner if (bf->l_start < 0 || 1568c24b5dfaSDave Chinner bf->l_start > mp->m_super->s_maxbytes || 1569c24b5dfaSDave Chinner bf->l_start + bf->l_len < 0 || 1570c24b5dfaSDave Chinner bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) 1571c24b5dfaSDave Chinner return XFS_ERROR(EINVAL); 1572c24b5dfaSDave Chinner 1573c24b5dfaSDave Chinner bf->l_whence = 0; 1574c24b5dfaSDave Chinner 1575c24b5dfaSDave Chinner startoffset = bf->l_start; 1576c24b5dfaSDave Chinner fsize = XFS_ISIZE(ip); 1577c24b5dfaSDave Chinner 1578c24b5dfaSDave Chinner setprealloc = clrprealloc = 0; 1579c24b5dfaSDave Chinner switch (cmd) { 1580c24b5dfaSDave Chinner case XFS_IOC_ZERO_RANGE: 1581c24b5dfaSDave Chinner error = xfs_zero_file_space(ip, startoffset, bf->l_len, 1582c24b5dfaSDave Chinner attr_flags); 1583c24b5dfaSDave Chinner if (error) 1584c24b5dfaSDave Chinner return error; 1585c24b5dfaSDave Chinner setprealloc = 1; 1586c24b5dfaSDave Chinner break; 1587c24b5dfaSDave Chinner 1588c24b5dfaSDave Chinner case XFS_IOC_RESVSP: 1589c24b5dfaSDave Chinner case XFS_IOC_RESVSP64: 1590c24b5dfaSDave Chinner error = xfs_alloc_file_space(ip, startoffset, bf->l_len, 1591c24b5dfaSDave Chinner XFS_BMAPI_PREALLOC, attr_flags); 1592c24b5dfaSDave Chinner if (error) 1593c24b5dfaSDave Chinner return error; 1594c24b5dfaSDave Chinner setprealloc = 1; 1595c24b5dfaSDave Chinner break; 1596c24b5dfaSDave Chinner 1597c24b5dfaSDave Chinner case XFS_IOC_UNRESVSP: 1598c24b5dfaSDave Chinner case XFS_IOC_UNRESVSP64: 1599c24b5dfaSDave Chinner if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, 1600c24b5dfaSDave Chinner attr_flags))) 1601c24b5dfaSDave Chinner return error; 1602c24b5dfaSDave Chinner break; 1603c24b5dfaSDave Chinner 1604c24b5dfaSDave Chinner case XFS_IOC_ALLOCSP: 1605c24b5dfaSDave Chinner case XFS_IOC_ALLOCSP64: 1606c24b5dfaSDave Chinner case XFS_IOC_FREESP: 1607c24b5dfaSDave Chinner case XFS_IOC_FREESP64: 1608c24b5dfaSDave Chinner /* 1609c24b5dfaSDave Chinner * These operations actually do IO when extending the file, but 1610c24b5dfaSDave Chinner * the allocation is done seperately to the zeroing that is 1611c24b5dfaSDave Chinner * done. This set of operations need to be serialised against 1612c24b5dfaSDave Chinner * other IO operations, such as truncate and buffered IO. We 1613c24b5dfaSDave Chinner * need to take the IOLOCK here to serialise the allocation and 1614c24b5dfaSDave Chinner * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, 1615c24b5dfaSDave Chinner * truncate, direct IO) from racing against the transient 1616c24b5dfaSDave Chinner * allocated but not written state we can have here. 1617c24b5dfaSDave Chinner */ 1618c24b5dfaSDave Chinner xfs_ilock(ip, XFS_IOLOCK_EXCL); 1619c24b5dfaSDave Chinner if (startoffset > fsize) { 1620c24b5dfaSDave Chinner error = xfs_alloc_file_space(ip, fsize, 1621c24b5dfaSDave Chinner startoffset - fsize, 0, 1622c24b5dfaSDave Chinner attr_flags | XFS_ATTR_NOLOCK); 1623c24b5dfaSDave Chinner if (error) { 1624c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1625c24b5dfaSDave Chinner break; 1626c24b5dfaSDave Chinner } 1627c24b5dfaSDave Chinner } 1628c24b5dfaSDave Chinner 1629c24b5dfaSDave Chinner iattr.ia_valid = ATTR_SIZE; 1630c24b5dfaSDave Chinner iattr.ia_size = startoffset; 1631c24b5dfaSDave Chinner 1632c24b5dfaSDave Chinner error = xfs_setattr_size(ip, &iattr, 1633c24b5dfaSDave Chinner attr_flags | XFS_ATTR_NOLOCK); 1634c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1635c24b5dfaSDave Chinner 1636c24b5dfaSDave Chinner if (error) 1637c24b5dfaSDave Chinner return error; 1638c24b5dfaSDave Chinner 1639c24b5dfaSDave Chinner clrprealloc = 1; 1640c24b5dfaSDave Chinner break; 1641c24b5dfaSDave Chinner 1642c24b5dfaSDave Chinner default: 1643c24b5dfaSDave Chinner ASSERT(0); 1644c24b5dfaSDave Chinner return XFS_ERROR(EINVAL); 1645c24b5dfaSDave Chinner } 1646c24b5dfaSDave Chinner 1647c24b5dfaSDave Chinner /* 1648c24b5dfaSDave Chinner * update the inode timestamp, mode, and prealloc flag bits 1649c24b5dfaSDave Chinner */ 1650c24b5dfaSDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); 16513d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); 16523d3c8b52SJie Liu if (error) { 1653c24b5dfaSDave Chinner xfs_trans_cancel(tp, 0); 1654c24b5dfaSDave Chinner return error; 1655c24b5dfaSDave Chinner } 1656c24b5dfaSDave Chinner 1657c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 1658c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1659c24b5dfaSDave Chinner 1660c24b5dfaSDave Chinner if ((attr_flags & XFS_ATTR_DMI) == 0) { 1661c24b5dfaSDave Chinner ip->i_d.di_mode &= ~S_ISUID; 1662c24b5dfaSDave Chinner 1663c24b5dfaSDave Chinner /* 1664c24b5dfaSDave Chinner * Note that we don't have to worry about mandatory 1665c24b5dfaSDave Chinner * file locking being disabled here because we only 1666c24b5dfaSDave Chinner * clear the S_ISGID bit if the Group execute bit is 1667c24b5dfaSDave Chinner * on, but if it was on then mandatory locking wouldn't 1668c24b5dfaSDave Chinner * have been enabled. 1669c24b5dfaSDave Chinner */ 1670c24b5dfaSDave Chinner if (ip->i_d.di_mode & S_IXGRP) 1671c24b5dfaSDave Chinner ip->i_d.di_mode &= ~S_ISGID; 1672c24b5dfaSDave Chinner 1673c24b5dfaSDave Chinner xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1674c24b5dfaSDave Chinner } 1675c24b5dfaSDave Chinner if (setprealloc) 1676c24b5dfaSDave Chinner ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 1677c24b5dfaSDave Chinner else if (clrprealloc) 1678c24b5dfaSDave Chinner ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 1679c24b5dfaSDave Chinner 1680c24b5dfaSDave Chinner xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1681c24b5dfaSDave Chinner if (attr_flags & XFS_ATTR_SYNC) 1682c24b5dfaSDave Chinner xfs_trans_set_sync(tp); 1683c24b5dfaSDave Chinner return xfs_trans_commit(tp, 0); 1684c24b5dfaSDave Chinner } 1685a133d952SDave Chinner 1686a133d952SDave Chinner /* 1687a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is 1688a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with 1689a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1690a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get 1691a133d952SDave Chinner * invalid formats on the target inode. 1692a133d952SDave Chinner * 1693a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has 1694a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a 1695a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just 1696a133d952SDave Chinner * blindly swap data forks on attr2 filesystems. 1697a133d952SDave Chinner * 1698a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with 1699a133d952SDave Chinner * a corrupt temporary inode, either. 1700a133d952SDave Chinner * 1701a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source 1702a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is 1703a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on 1704a133d952SDave Chinner * userspace to get this right. 1705a133d952SDave Chinner */ 1706a133d952SDave Chinner static int 1707a133d952SDave Chinner xfs_swap_extents_check_format( 1708a133d952SDave Chinner xfs_inode_t *ip, /* target inode */ 1709a133d952SDave Chinner xfs_inode_t *tip) /* tmp inode */ 1710a133d952SDave Chinner { 1711a133d952SDave Chinner 1712a133d952SDave Chinner /* Should never get a local format */ 1713a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1714a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 1715a133d952SDave Chinner return EINVAL; 1716a133d952SDave Chinner 1717a133d952SDave Chinner /* 1718a133d952SDave Chinner * if the target inode has less extents that then temporary inode then 1719a133d952SDave Chinner * why did userspace call us? 1720a133d952SDave Chinner */ 1721a133d952SDave Chinner if (ip->i_d.di_nextents < tip->i_d.di_nextents) 1722a133d952SDave Chinner return EINVAL; 1723a133d952SDave Chinner 1724a133d952SDave Chinner /* 1725a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree 1726a133d952SDave Chinner * form then we will end up with the target inode in the wrong format 1727a133d952SDave Chinner * as we already know there are less extents in the temp inode. 1728a133d952SDave Chinner */ 1729a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1730a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 1731a133d952SDave Chinner return EINVAL; 1732a133d952SDave Chinner 1733a133d952SDave Chinner /* Check temp in extent form to max in target */ 1734a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1735a133d952SDave Chinner XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1736a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1737a133d952SDave Chinner return EINVAL; 1738a133d952SDave Chinner 1739a133d952SDave Chinner /* Check target in extent form to max in temp */ 1740a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1741a133d952SDave Chinner XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1742a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1743a133d952SDave Chinner return EINVAL; 1744a133d952SDave Chinner 1745a133d952SDave Chinner /* 1746a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit 1747a133d952SDave Chinner * in the target and that it has enough extents to be in btree format 1748a133d952SDave Chinner * in the target. 1749a133d952SDave Chinner * 1750a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions 1751a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in 1752a133d952SDave Chinner * extent format... 1753a133d952SDave Chinner */ 1754a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1755a133d952SDave Chinner if (XFS_IFORK_BOFF(ip) && 1756a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 1757a133d952SDave Chinner return EINVAL; 1758a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1759a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 1760a133d952SDave Chinner return EINVAL; 1761a133d952SDave Chinner } 1762a133d952SDave Chinner 1763a133d952SDave Chinner /* Reciprocal target->temp btree format checks */ 1764a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 1765a133d952SDave Chinner if (XFS_IFORK_BOFF(tip) && 1766a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 1767a133d952SDave Chinner return EINVAL; 1768a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1769a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 1770a133d952SDave Chinner return EINVAL; 1771a133d952SDave Chinner } 1772a133d952SDave Chinner 1773a133d952SDave Chinner return 0; 1774a133d952SDave Chinner } 1775a133d952SDave Chinner 1776a133d952SDave Chinner int 1777a133d952SDave Chinner xfs_swap_extents( 1778a133d952SDave Chinner xfs_inode_t *ip, /* target inode */ 1779a133d952SDave Chinner xfs_inode_t *tip, /* tmp inode */ 1780a133d952SDave Chinner xfs_swapext_t *sxp) 1781a133d952SDave Chinner { 1782a133d952SDave Chinner xfs_mount_t *mp = ip->i_mount; 1783a133d952SDave Chinner xfs_trans_t *tp; 1784a133d952SDave Chinner xfs_bstat_t *sbp = &sxp->sx_stat; 1785a133d952SDave Chinner xfs_ifork_t *tempifp, *ifp, *tifp; 1786a133d952SDave Chinner int src_log_flags, target_log_flags; 1787a133d952SDave Chinner int error = 0; 1788a133d952SDave Chinner int aforkblks = 0; 1789a133d952SDave Chinner int taforkblks = 0; 1790a133d952SDave Chinner __uint64_t tmp; 1791a133d952SDave Chinner 1792a133d952SDave Chinner /* 1793a133d952SDave Chinner * We have no way of updating owner information in the BMBT blocks for 1794a133d952SDave Chinner * each inode on CRC enabled filesystems, so to avoid corrupting the 1795a133d952SDave Chinner * this metadata we simply don't allow extent swaps to occur. 1796a133d952SDave Chinner */ 1797a133d952SDave Chinner if (xfs_sb_version_hascrc(&mp->m_sb)) 1798a133d952SDave Chinner return XFS_ERROR(EINVAL); 1799a133d952SDave Chinner 1800a133d952SDave Chinner tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 1801a133d952SDave Chinner if (!tempifp) { 1802a133d952SDave Chinner error = XFS_ERROR(ENOMEM); 1803a133d952SDave Chinner goto out; 1804a133d952SDave Chinner } 1805a133d952SDave Chinner 1806a133d952SDave Chinner /* 1807a133d952SDave Chinner * we have to do two separate lock calls here to keep lockdep 1808a133d952SDave Chinner * happy. If we try to get all the locks in one call, lock will 1809a133d952SDave Chinner * report false positives when we drop the ILOCK and regain them 1810a133d952SDave Chinner * below. 1811a133d952SDave Chinner */ 1812a133d952SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1813a133d952SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1814a133d952SDave Chinner 1815a133d952SDave Chinner /* Verify that both files have the same format */ 1816a133d952SDave Chinner if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 1817a133d952SDave Chinner error = XFS_ERROR(EINVAL); 1818a133d952SDave Chinner goto out_unlock; 1819a133d952SDave Chinner } 1820a133d952SDave Chinner 1821a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */ 1822a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 1823a133d952SDave Chinner error = XFS_ERROR(EINVAL); 1824a133d952SDave Chinner goto out_unlock; 1825a133d952SDave Chinner } 1826a133d952SDave Chinner 1827a133d952SDave Chinner error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); 1828a133d952SDave Chinner if (error) 1829a133d952SDave Chinner goto out_unlock; 1830a133d952SDave Chinner truncate_pagecache_range(VFS_I(tip), 0, -1); 1831a133d952SDave Chinner 1832a133d952SDave Chinner /* Verify O_DIRECT for ftmp */ 1833a133d952SDave Chinner if (VN_CACHED(VFS_I(tip)) != 0) { 1834a133d952SDave Chinner error = XFS_ERROR(EINVAL); 1835a133d952SDave Chinner goto out_unlock; 1836a133d952SDave Chinner } 1837a133d952SDave Chinner 1838a133d952SDave Chinner /* Verify all data are being swapped */ 1839a133d952SDave Chinner if (sxp->sx_offset != 0 || 1840a133d952SDave Chinner sxp->sx_length != ip->i_d.di_size || 1841a133d952SDave Chinner sxp->sx_length != tip->i_d.di_size) { 1842a133d952SDave Chinner error = XFS_ERROR(EFAULT); 1843a133d952SDave Chinner goto out_unlock; 1844a133d952SDave Chinner } 1845a133d952SDave Chinner 1846a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0); 1847a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1); 1848a133d952SDave Chinner 1849a133d952SDave Chinner /* check inode formats now that data is flushed */ 1850a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip); 1851a133d952SDave Chinner if (error) { 1852a133d952SDave Chinner xfs_notice(mp, 1853a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.", 1854a133d952SDave Chinner __func__, ip->i_ino); 1855a133d952SDave Chinner goto out_unlock; 1856a133d952SDave Chinner } 1857a133d952SDave Chinner 1858a133d952SDave Chinner /* 1859a133d952SDave Chinner * Compare the current change & modify times with that 1860a133d952SDave Chinner * passed in. If they differ, we abort this swap. 1861a133d952SDave Chinner * This is the mechanism used to ensure the calling 1862a133d952SDave Chinner * process that the file was not changed out from 1863a133d952SDave Chinner * under it. 1864a133d952SDave Chinner */ 1865a133d952SDave Chinner if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || 1866a133d952SDave Chinner (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 1867a133d952SDave Chinner (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 1868a133d952SDave Chinner (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 1869a133d952SDave Chinner error = XFS_ERROR(EBUSY); 1870a133d952SDave Chinner goto out_unlock; 1871a133d952SDave Chinner } 1872a133d952SDave Chinner 1873a133d952SDave Chinner /* We need to fail if the file is memory mapped. Once we have tossed 1874a133d952SDave Chinner * all existing pages, the page fault will have no option 1875a133d952SDave Chinner * but to go to the filesystem for pages. By making the page fault call 1876a133d952SDave Chinner * vop_read (or write in the case of autogrow) they block on the iolock 1877a133d952SDave Chinner * until we have switched the extents. 1878a133d952SDave Chinner */ 1879a133d952SDave Chinner if (VN_MAPPED(VFS_I(ip))) { 1880a133d952SDave Chinner error = XFS_ERROR(EBUSY); 1881a133d952SDave Chinner goto out_unlock; 1882a133d952SDave Chinner } 1883a133d952SDave Chinner 1884a133d952SDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 1885a133d952SDave Chinner xfs_iunlock(tip, XFS_ILOCK_EXCL); 1886a133d952SDave Chinner 1887a133d952SDave Chinner /* 1888a133d952SDave Chinner * There is a race condition here since we gave up the 1889a133d952SDave Chinner * ilock. However, the data fork will not change since 1890a133d952SDave Chinner * we have the iolock (locked for truncation too) so we 1891a133d952SDave Chinner * are safe. We don't really care if non-io related 1892a133d952SDave Chinner * fields change. 1893a133d952SDave Chinner */ 1894a133d952SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1); 1895a133d952SDave Chinner 1896a133d952SDave Chinner tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); 18973d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 18983d3c8b52SJie Liu if (error) { 1899a133d952SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_EXCL); 1900a133d952SDave Chinner xfs_iunlock(tip, XFS_IOLOCK_EXCL); 1901a133d952SDave Chinner xfs_trans_cancel(tp, 0); 1902a133d952SDave Chinner goto out; 1903a133d952SDave Chinner } 1904a133d952SDave Chinner xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1905a133d952SDave Chinner 1906a133d952SDave Chinner /* 1907a133d952SDave Chinner * Count the number of extended attribute blocks 1908a133d952SDave Chinner */ 1909a133d952SDave Chinner if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 1910a133d952SDave Chinner (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1911a133d952SDave Chinner error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); 1912a133d952SDave Chinner if (error) 1913a133d952SDave Chinner goto out_trans_cancel; 1914a133d952SDave Chinner } 1915a133d952SDave Chinner if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 1916a133d952SDave Chinner (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1917a133d952SDave Chinner error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, 1918a133d952SDave Chinner &taforkblks); 1919a133d952SDave Chinner if (error) 1920a133d952SDave Chinner goto out_trans_cancel; 1921a133d952SDave Chinner } 1922a133d952SDave Chinner 1923a133d952SDave Chinner /* 1924a133d952SDave Chinner * Swap the data forks of the inodes 1925a133d952SDave Chinner */ 1926a133d952SDave Chinner ifp = &ip->i_df; 1927a133d952SDave Chinner tifp = &tip->i_df; 1928a133d952SDave Chinner *tempifp = *ifp; /* struct copy */ 1929a133d952SDave Chinner *ifp = *tifp; /* struct copy */ 1930a133d952SDave Chinner *tifp = *tempifp; /* struct copy */ 1931a133d952SDave Chinner 1932a133d952SDave Chinner /* 1933a133d952SDave Chinner * Fix the on-disk inode values 1934a133d952SDave Chinner */ 1935a133d952SDave Chinner tmp = (__uint64_t)ip->i_d.di_nblocks; 1936a133d952SDave Chinner ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 1937a133d952SDave Chinner tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 1938a133d952SDave Chinner 1939a133d952SDave Chinner tmp = (__uint64_t) ip->i_d.di_nextents; 1940a133d952SDave Chinner ip->i_d.di_nextents = tip->i_d.di_nextents; 1941a133d952SDave Chinner tip->i_d.di_nextents = tmp; 1942a133d952SDave Chinner 1943a133d952SDave Chinner tmp = (__uint64_t) ip->i_d.di_format; 1944a133d952SDave Chinner ip->i_d.di_format = tip->i_d.di_format; 1945a133d952SDave Chinner tip->i_d.di_format = tmp; 1946a133d952SDave Chinner 1947a133d952SDave Chinner /* 1948a133d952SDave Chinner * The extents in the source inode could still contain speculative 1949a133d952SDave Chinner * preallocation beyond EOF (e.g. the file is open but not modified 1950a133d952SDave Chinner * while defrag is in progress). In that case, we need to copy over the 1951a133d952SDave Chinner * number of delalloc blocks the data fork in the source inode is 1952a133d952SDave Chinner * tracking beyond EOF so that when the fork is truncated away when the 1953a133d952SDave Chinner * temporary inode is unlinked we don't underrun the i_delayed_blks 1954a133d952SDave Chinner * counter on that inode. 1955a133d952SDave Chinner */ 1956a133d952SDave Chinner ASSERT(tip->i_delayed_blks == 0); 1957a133d952SDave Chinner tip->i_delayed_blks = ip->i_delayed_blks; 1958a133d952SDave Chinner ip->i_delayed_blks = 0; 1959a133d952SDave Chinner 1960a133d952SDave Chinner src_log_flags = XFS_ILOG_CORE; 1961a133d952SDave Chinner switch (ip->i_d.di_format) { 1962a133d952SDave Chinner case XFS_DINODE_FMT_EXTENTS: 1963a133d952SDave Chinner /* If the extents fit in the inode, fix the 1964a133d952SDave Chinner * pointer. Otherwise it's already NULL or 1965a133d952SDave Chinner * pointing to the extent. 1966a133d952SDave Chinner */ 1967a133d952SDave Chinner if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1968a133d952SDave Chinner ifp->if_u1.if_extents = 1969a133d952SDave Chinner ifp->if_u2.if_inline_ext; 1970a133d952SDave Chinner } 1971a133d952SDave Chinner src_log_flags |= XFS_ILOG_DEXT; 1972a133d952SDave Chinner break; 1973a133d952SDave Chinner case XFS_DINODE_FMT_BTREE: 1974a133d952SDave Chinner src_log_flags |= XFS_ILOG_DBROOT; 1975a133d952SDave Chinner break; 1976a133d952SDave Chinner } 1977a133d952SDave Chinner 1978a133d952SDave Chinner target_log_flags = XFS_ILOG_CORE; 1979a133d952SDave Chinner switch (tip->i_d.di_format) { 1980a133d952SDave Chinner case XFS_DINODE_FMT_EXTENTS: 1981a133d952SDave Chinner /* If the extents fit in the inode, fix the 1982a133d952SDave Chinner * pointer. Otherwise it's already NULL or 1983a133d952SDave Chinner * pointing to the extent. 1984a133d952SDave Chinner */ 1985a133d952SDave Chinner if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { 1986a133d952SDave Chinner tifp->if_u1.if_extents = 1987a133d952SDave Chinner tifp->if_u2.if_inline_ext; 1988a133d952SDave Chinner } 1989a133d952SDave Chinner target_log_flags |= XFS_ILOG_DEXT; 1990a133d952SDave Chinner break; 1991a133d952SDave Chinner case XFS_DINODE_FMT_BTREE: 1992a133d952SDave Chinner target_log_flags |= XFS_ILOG_DBROOT; 1993a133d952SDave Chinner break; 1994a133d952SDave Chinner } 1995a133d952SDave Chinner 1996a133d952SDave Chinner 1997a133d952SDave Chinner xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1998a133d952SDave Chinner xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 1999a133d952SDave Chinner 2000a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags); 2001a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags); 2002a133d952SDave Chinner 2003a133d952SDave Chinner /* 2004a133d952SDave Chinner * If this is a synchronous mount, make sure that the 2005a133d952SDave Chinner * transaction goes to disk before returning to the user. 2006a133d952SDave Chinner */ 2007a133d952SDave Chinner if (mp->m_flags & XFS_MOUNT_WSYNC) 2008a133d952SDave Chinner xfs_trans_set_sync(tp); 2009a133d952SDave Chinner 2010a133d952SDave Chinner error = xfs_trans_commit(tp, 0); 2011a133d952SDave Chinner 2012a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0); 2013a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1); 2014a133d952SDave Chinner out: 2015a133d952SDave Chinner kmem_free(tempifp); 2016a133d952SDave Chinner return error; 2017a133d952SDave Chinner 2018a133d952SDave Chinner out_unlock: 2019a133d952SDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 2020a133d952SDave Chinner xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 2021a133d952SDave Chinner goto out; 2022a133d952SDave Chinner 2023a133d952SDave Chinner out_trans_cancel: 2024a133d952SDave Chinner xfs_trans_cancel(tp, 0); 2025a133d952SDave Chinner goto out_unlock; 2026a133d952SDave Chinner } 2027