10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 268988114SDave Chinner /* 368988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc. 568988114SDave Chinner * All Rights Reserved. 668988114SDave Chinner */ 768988114SDave Chinner #include "xfs.h" 868988114SDave Chinner #include "xfs_fs.h" 970a9883cSDave Chinner #include "xfs_shared.h" 10239880efSDave Chinner #include "xfs_format.h" 11239880efSDave Chinner #include "xfs_log_format.h" 12239880efSDave Chinner #include "xfs_trans_resv.h" 1368988114SDave Chinner #include "xfs_bit.h" 1468988114SDave Chinner #include "xfs_mount.h" 153ab78df2SDarrick J. Wong #include "xfs_defer.h" 1668988114SDave Chinner #include "xfs_inode.h" 1768988114SDave Chinner #include "xfs_btree.h" 18239880efSDave Chinner #include "xfs_trans.h" 1968988114SDave Chinner #include "xfs_alloc.h" 2068988114SDave Chinner #include "xfs_bmap.h" 2168988114SDave Chinner #include "xfs_bmap_util.h" 22a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 2368988114SDave Chinner #include "xfs_rtalloc.h" 2468988114SDave Chinner #include "xfs_error.h" 2568988114SDave Chinner #include "xfs_quota.h" 2668988114SDave Chinner #include "xfs_trans_space.h" 2768988114SDave Chinner #include "xfs_trace.h" 28c24b5dfaSDave Chinner #include "xfs_icache.h" 29f86f4037SDarrick J. Wong #include "xfs_iomap.h" 30f86f4037SDarrick J. Wong #include "xfs_reflink.h" 3168988114SDave Chinner 3268988114SDave Chinner /* Kernel only BMAP related definitions and functions */ 3368988114SDave Chinner 3468988114SDave Chinner /* 3568988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it 3668988114SDave Chinner * differently based on whether the file is a real time file or not, because the 3768988114SDave Chinner * bmap code does. 3868988114SDave Chinner */ 3968988114SDave Chinner xfs_daddr_t 4068988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) 4168988114SDave Chinner { 42ecfc28a4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 43ecfc28a4SChristoph Hellwig return XFS_FSB_TO_BB(ip->i_mount, fsb); 44ecfc28a4SChristoph Hellwig return XFS_FSB_TO_DADDR(ip->i_mount, fsb); 4568988114SDave Chinner } 4668988114SDave Chinner 4768988114SDave Chinner /* 483fbbbea3SDave Chinner * Routine to zero an extent on disk allocated to the specific inode. 493fbbbea3SDave Chinner * 503fbbbea3SDave Chinner * The VFS functions take a linearised filesystem block offset, so we have to 513fbbbea3SDave Chinner * convert the sparse xfs fsb to the right format first. 523fbbbea3SDave Chinner * VFS types are real funky, too. 533fbbbea3SDave Chinner */ 543fbbbea3SDave Chinner int 553fbbbea3SDave Chinner xfs_zero_extent( 563fbbbea3SDave Chinner struct xfs_inode *ip, 573fbbbea3SDave Chinner xfs_fsblock_t start_fsb, 583fbbbea3SDave Chinner xfs_off_t count_fsb) 593fbbbea3SDave Chinner { 603fbbbea3SDave Chinner struct xfs_mount *mp = ip->i_mount; 6130fa529eSChristoph Hellwig struct xfs_buftarg *target = xfs_inode_buftarg(ip); 623fbbbea3SDave Chinner xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); 633fbbbea3SDave Chinner sector_t block = XFS_BB_TO_FSBT(mp, sector); 643fbbbea3SDave Chinner 6530fa529eSChristoph Hellwig return blkdev_issue_zeroout(target->bt_bdev, 663dc29161SMatthew Wilcox block << (mp->m_super->s_blocksize_bits - 9), 673dc29161SMatthew Wilcox count_fsb << (mp->m_super->s_blocksize_bits - 9), 68ee472d83SChristoph Hellwig GFP_NOFS, 0); 693fbbbea3SDave Chinner } 703fbbbea3SDave Chinner 71bb9c2e54SDave Chinner #ifdef CONFIG_XFS_RT 7268988114SDave Chinner int 7368988114SDave Chinner xfs_bmap_rtalloc( 7468988114SDave Chinner struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 7568988114SDave Chinner { 7668988114SDave Chinner int error; /* error return value */ 7768988114SDave Chinner xfs_mount_t *mp; /* mount point structure */ 7868988114SDave Chinner xfs_extlen_t prod = 0; /* product factor for allocators */ 790703a8e1SDave Chinner xfs_extlen_t mod = 0; /* product factor for allocators */ 8068988114SDave Chinner xfs_extlen_t ralen = 0; /* realtime allocation length */ 8168988114SDave Chinner xfs_extlen_t align; /* minimum allocation alignment */ 8268988114SDave Chinner xfs_rtblock_t rtb; 8368988114SDave Chinner 8468988114SDave Chinner mp = ap->ip->i_mount; 8568988114SDave Chinner align = xfs_get_extsz_hint(ap->ip); 8668988114SDave Chinner prod = align / mp->m_sb.sb_rextsize; 8768988114SDave Chinner error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 8868988114SDave Chinner align, 1, ap->eof, 0, 8968988114SDave Chinner ap->conv, &ap->offset, &ap->length); 9068988114SDave Chinner if (error) 9168988114SDave Chinner return error; 9268988114SDave Chinner ASSERT(ap->length); 9368988114SDave Chinner ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); 9468988114SDave Chinner 9568988114SDave Chinner /* 9668988114SDave Chinner * If the offset & length are not perfectly aligned 9768988114SDave Chinner * then kill prod, it will just get us in trouble. 9868988114SDave Chinner */ 990703a8e1SDave Chinner div_u64_rem(ap->offset, align, &mod); 1000703a8e1SDave Chinner if (mod || ap->length % align) 10168988114SDave Chinner prod = 1; 10268988114SDave Chinner /* 10368988114SDave Chinner * Set ralen to be the actual requested length in rtextents. 10468988114SDave Chinner */ 10568988114SDave Chinner ralen = ap->length / mp->m_sb.sb_rextsize; 10668988114SDave Chinner /* 10768988114SDave Chinner * If the old value was close enough to MAXEXTLEN that 10868988114SDave Chinner * we rounded up to it, cut it back so it's valid again. 10968988114SDave Chinner * Note that if it's a really large request (bigger than 11068988114SDave Chinner * MAXEXTLEN), we don't hear about that number, and can't 11168988114SDave Chinner * adjust the starting point to match it. 11268988114SDave Chinner */ 11368988114SDave Chinner if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 11468988114SDave Chinner ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 11568988114SDave Chinner 11668988114SDave Chinner /* 1174b680afbSDave Chinner * Lock out modifications to both the RT bitmap and summary inodes 11868988114SDave Chinner */ 119f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 12068988114SDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 121f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 1224b680afbSDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); 12368988114SDave Chinner 12468988114SDave Chinner /* 12568988114SDave Chinner * If it's an allocation to an empty file at offset 0, 12668988114SDave Chinner * pick an extent that will space things out in the rt area. 12768988114SDave Chinner */ 12868988114SDave Chinner if (ap->eof && ap->offset == 0) { 12968988114SDave Chinner xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ 13068988114SDave Chinner 13168988114SDave Chinner error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); 13268988114SDave Chinner if (error) 13368988114SDave Chinner return error; 13468988114SDave Chinner ap->blkno = rtx * mp->m_sb.sb_rextsize; 13568988114SDave Chinner } else { 13668988114SDave Chinner ap->blkno = 0; 13768988114SDave Chinner } 13868988114SDave Chinner 13968988114SDave Chinner xfs_bmap_adjacent(ap); 14068988114SDave Chinner 14168988114SDave Chinner /* 14268988114SDave Chinner * Realtime allocation, done through xfs_rtallocate_extent. 14368988114SDave Chinner */ 14468988114SDave Chinner do_div(ap->blkno, mp->m_sb.sb_rextsize); 14568988114SDave Chinner rtb = ap->blkno; 14668988114SDave Chinner ap->length = ralen; 147089ec2f8SChristoph Hellwig error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, 148089ec2f8SChristoph Hellwig &ralen, ap->wasdel, prod, &rtb); 149089ec2f8SChristoph Hellwig if (error) 15068988114SDave Chinner return error; 151089ec2f8SChristoph Hellwig 15268988114SDave Chinner ap->blkno = rtb; 15368988114SDave Chinner if (ap->blkno != NULLFSBLOCK) { 15468988114SDave Chinner ap->blkno *= mp->m_sb.sb_rextsize; 15568988114SDave Chinner ralen *= mp->m_sb.sb_rextsize; 15668988114SDave Chinner ap->length = ralen; 15768988114SDave Chinner ap->ip->i_d.di_nblocks += ralen; 15868988114SDave Chinner xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 15968988114SDave Chinner if (ap->wasdel) 16068988114SDave Chinner ap->ip->i_delayed_blks -= ralen; 16168988114SDave Chinner /* 16268988114SDave Chinner * Adjust the disk quota also. This was reserved 16368988114SDave Chinner * earlier. 16468988114SDave Chinner */ 16568988114SDave Chinner xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 16668988114SDave Chinner ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 16768988114SDave Chinner XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 16868988114SDave Chinner } else { 16968988114SDave Chinner ap->length = 0; 17068988114SDave Chinner } 17168988114SDave Chinner return 0; 17268988114SDave Chinner } 173bb9c2e54SDave Chinner #endif /* CONFIG_XFS_RT */ 17468988114SDave Chinner 17568988114SDave Chinner /* 17668988114SDave Chinner * Extent tree block counting routines. 17768988114SDave Chinner */ 17868988114SDave Chinner 17968988114SDave Chinner /* 180d29cb3e4SDarrick J. Wong * Count leaf blocks given a range of extent records. Delayed allocation 181d29cb3e4SDarrick J. Wong * extents are not counted towards the totals. 18268988114SDave Chinner */ 183e17a5c6fSChristoph Hellwig xfs_extnum_t 18468988114SDave Chinner xfs_bmap_count_leaves( 185d29cb3e4SDarrick J. Wong struct xfs_ifork *ifp, 186e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 18768988114SDave Chinner { 188b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur; 189e17a5c6fSChristoph Hellwig struct xfs_bmbt_irec got; 190b2b1712aSChristoph Hellwig xfs_extnum_t numrecs = 0; 19168988114SDave Chinner 192b2b1712aSChristoph Hellwig for_each_xfs_iext(ifp, &icur, &got) { 193e17a5c6fSChristoph Hellwig if (!isnullstartblock(got.br_startblock)) { 194e17a5c6fSChristoph Hellwig *count += got.br_blockcount; 195e17a5c6fSChristoph Hellwig numrecs++; 19668988114SDave Chinner } 19768988114SDave Chinner } 198b2b1712aSChristoph Hellwig 199e17a5c6fSChristoph Hellwig return numrecs; 200d29cb3e4SDarrick J. Wong } 20168988114SDave Chinner 20268988114SDave Chinner /* 203d29cb3e4SDarrick J. Wong * Count fsblocks of the given fork. Delayed allocation extents are 204d29cb3e4SDarrick J. Wong * not counted towards the totals. 20568988114SDave Chinner */ 206e7f5d5caSDarrick J. Wong int 20768988114SDave Chinner xfs_bmap_count_blocks( 208e7f5d5caSDarrick J. Wong struct xfs_trans *tp, 209e7f5d5caSDarrick J. Wong struct xfs_inode *ip, 210e7f5d5caSDarrick J. Wong int whichfork, 211e7f5d5caSDarrick J. Wong xfs_extnum_t *nextents, 212e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 21368988114SDave Chinner { 214fec40e22SDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 215fec40e22SDarrick J. Wong struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 216fec40e22SDarrick J. Wong struct xfs_btree_cur *cur; 217fec40e22SDarrick J. Wong xfs_extlen_t btblocks = 0; 218e7f5d5caSDarrick J. Wong int error; 21968988114SDave Chinner 220e7f5d5caSDarrick J. Wong *nextents = 0; 221e7f5d5caSDarrick J. Wong *count = 0; 222fec40e22SDarrick J. Wong 223e7f5d5caSDarrick J. Wong if (!ifp) 22468988114SDave Chinner return 0; 225e7f5d5caSDarrick J. Wong 226e7f5d5caSDarrick J. Wong switch (XFS_IFORK_FORMAT(ip, whichfork)) { 227e7f5d5caSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 228e7f5d5caSDarrick J. Wong if (!(ifp->if_flags & XFS_IFEXTENTS)) { 229e7f5d5caSDarrick J. Wong error = xfs_iread_extents(tp, ip, whichfork); 230e7f5d5caSDarrick J. Wong if (error) 231e7f5d5caSDarrick J. Wong return error; 23268988114SDave Chinner } 23368988114SDave Chinner 234fec40e22SDarrick J. Wong cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 235fec40e22SDarrick J. Wong error = xfs_btree_count_blocks(cur, &btblocks); 236fec40e22SDarrick J. Wong xfs_btree_del_cursor(cur, error); 237fec40e22SDarrick J. Wong if (error) 238fec40e22SDarrick J. Wong return error; 23968988114SDave Chinner 240fec40e22SDarrick J. Wong /* 241fec40e22SDarrick J. Wong * xfs_btree_count_blocks includes the root block contained in 242fec40e22SDarrick J. Wong * the inode fork in @btblocks, so subtract one because we're 243fec40e22SDarrick J. Wong * only interested in allocated disk blocks. 244fec40e22SDarrick J. Wong */ 245fec40e22SDarrick J. Wong *count += btblocks - 1; 246fec40e22SDarrick J. Wong 247fec40e22SDarrick J. Wong /* fall through */ 248fec40e22SDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 249fec40e22SDarrick J. Wong *nextents = xfs_bmap_count_leaves(ifp, count); 250fec40e22SDarrick J. Wong break; 251e7f5d5caSDarrick J. Wong } 25268988114SDave Chinner 25368988114SDave Chinner return 0; 25468988114SDave Chinner } 25568988114SDave Chinner 256abbf9e8aSChristoph Hellwig static int 257abbf9e8aSChristoph Hellwig xfs_getbmap_report_one( 258f86f4037SDarrick J. Wong struct xfs_inode *ip, 259abbf9e8aSChristoph Hellwig struct getbmapx *bmv, 260232b5194SChristoph Hellwig struct kgetbmap *out, 261abbf9e8aSChristoph Hellwig int64_t bmv_end, 262abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *got) 263f86f4037SDarrick J. Wong { 264232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries; 265d392bc81SChristoph Hellwig bool shared = false; 266f86f4037SDarrick J. Wong int error; 267f86f4037SDarrick J. Wong 268d392bc81SChristoph Hellwig error = xfs_reflink_trim_around_shared(ip, got, &shared); 269f86f4037SDarrick J. Wong if (error) 270f86f4037SDarrick J. Wong return error; 271f86f4037SDarrick J. Wong 272abbf9e8aSChristoph Hellwig if (isnullstartblock(got->br_startblock) || 273abbf9e8aSChristoph Hellwig got->br_startblock == DELAYSTARTBLOCK) { 274f86f4037SDarrick J. Wong /* 275abbf9e8aSChristoph Hellwig * Delalloc extents that start beyond EOF can occur due to 276abbf9e8aSChristoph Hellwig * speculative EOF allocation when the delalloc extent is larger 277abbf9e8aSChristoph Hellwig * than the largest freespace extent at conversion time. These 278abbf9e8aSChristoph Hellwig * extents cannot be converted by data writeback, so can exist 279abbf9e8aSChristoph Hellwig * here even if we are not supposed to be finding delalloc 280abbf9e8aSChristoph Hellwig * extents. 281f86f4037SDarrick J. Wong */ 282abbf9e8aSChristoph Hellwig if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip))) 283abbf9e8aSChristoph Hellwig ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0); 284abbf9e8aSChristoph Hellwig 285abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_DELALLOC; 286abbf9e8aSChristoph Hellwig p->bmv_block = -2; 287f86f4037SDarrick J. Wong } else { 288abbf9e8aSChristoph Hellwig p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); 289f86f4037SDarrick J. Wong } 290f86f4037SDarrick J. Wong 291abbf9e8aSChristoph Hellwig if (got->br_state == XFS_EXT_UNWRITTEN && 292abbf9e8aSChristoph Hellwig (bmv->bmv_iflags & BMV_IF_PREALLOC)) 293abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_PREALLOC; 294abbf9e8aSChristoph Hellwig 295abbf9e8aSChristoph Hellwig if (shared) 296abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_SHARED; 297abbf9e8aSChristoph Hellwig 298abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); 299abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); 300abbf9e8aSChristoph Hellwig 301abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length; 302abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 303abbf9e8aSChristoph Hellwig bmv->bmv_entries++; 304f86f4037SDarrick J. Wong return 0; 305f86f4037SDarrick J. Wong } 306f86f4037SDarrick J. Wong 307abbf9e8aSChristoph Hellwig static void 308abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole( 309abbf9e8aSChristoph Hellwig struct xfs_inode *ip, 310abbf9e8aSChristoph Hellwig struct getbmapx *bmv, 311232b5194SChristoph Hellwig struct kgetbmap *out, 312abbf9e8aSChristoph Hellwig int64_t bmv_end, 313abbf9e8aSChristoph Hellwig xfs_fileoff_t bno, 314abbf9e8aSChristoph Hellwig xfs_fileoff_t end) 315abbf9e8aSChristoph Hellwig { 316232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries; 317abbf9e8aSChristoph Hellwig 318abbf9e8aSChristoph Hellwig if (bmv->bmv_iflags & BMV_IF_NO_HOLES) 319abbf9e8aSChristoph Hellwig return; 320abbf9e8aSChristoph Hellwig 321abbf9e8aSChristoph Hellwig p->bmv_block = -1; 322abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); 323abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); 324abbf9e8aSChristoph Hellwig 325abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length; 326abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 327abbf9e8aSChristoph Hellwig bmv->bmv_entries++; 328abbf9e8aSChristoph Hellwig } 329abbf9e8aSChristoph Hellwig 330abbf9e8aSChristoph Hellwig static inline bool 331abbf9e8aSChristoph Hellwig xfs_getbmap_full( 332abbf9e8aSChristoph Hellwig struct getbmapx *bmv) 333abbf9e8aSChristoph Hellwig { 334abbf9e8aSChristoph Hellwig return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1; 335abbf9e8aSChristoph Hellwig } 336abbf9e8aSChristoph Hellwig 337abbf9e8aSChristoph Hellwig static bool 338abbf9e8aSChristoph Hellwig xfs_getbmap_next_rec( 339abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *rec, 340abbf9e8aSChristoph Hellwig xfs_fileoff_t total_end) 341abbf9e8aSChristoph Hellwig { 342abbf9e8aSChristoph Hellwig xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount; 343abbf9e8aSChristoph Hellwig 344abbf9e8aSChristoph Hellwig if (end == total_end) 345abbf9e8aSChristoph Hellwig return false; 346abbf9e8aSChristoph Hellwig 347abbf9e8aSChristoph Hellwig rec->br_startoff += rec->br_blockcount; 348abbf9e8aSChristoph Hellwig if (!isnullstartblock(rec->br_startblock) && 349abbf9e8aSChristoph Hellwig rec->br_startblock != DELAYSTARTBLOCK) 350abbf9e8aSChristoph Hellwig rec->br_startblock += rec->br_blockcount; 351abbf9e8aSChristoph Hellwig rec->br_blockcount = total_end - end; 352abbf9e8aSChristoph Hellwig return true; 353abbf9e8aSChristoph Hellwig } 354abbf9e8aSChristoph Hellwig 35568988114SDave Chinner /* 35668988114SDave Chinner * Get inode's extents as described in bmv, and format for output. 35768988114SDave Chinner * Calls formatter to fill the user's buffer until all extents 35868988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have 35968988114SDave Chinner * been filled, or until the formatter short-circuits the loop, 36068988114SDave Chinner * if it is tracking filled-in extents on its own. 36168988114SDave Chinner */ 36268988114SDave Chinner int /* error code */ 36368988114SDave Chinner xfs_getbmap( 364232b5194SChristoph Hellwig struct xfs_inode *ip, 36568988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */ 366232b5194SChristoph Hellwig struct kgetbmap *out) 36768988114SDave Chinner { 368abbf9e8aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 369abbf9e8aSChristoph Hellwig int iflags = bmv->bmv_iflags; 370232b5194SChristoph Hellwig int whichfork, lock, error = 0; 371abbf9e8aSChristoph Hellwig int64_t bmv_end, max_len; 372abbf9e8aSChristoph Hellwig xfs_fileoff_t bno, first_bno; 373abbf9e8aSChristoph Hellwig struct xfs_ifork *ifp; 374abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec got, rec; 375abbf9e8aSChristoph Hellwig xfs_filblks_t len; 376b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur; 37768988114SDave Chinner 378232b5194SChristoph Hellwig if (bmv->bmv_iflags & ~BMV_IF_VALID) 379232b5194SChristoph Hellwig return -EINVAL; 380f86f4037SDarrick J. Wong #ifndef DEBUG 381f86f4037SDarrick J. Wong /* Only allow CoW fork queries if we're debugging. */ 382f86f4037SDarrick J. Wong if (iflags & BMV_IF_COWFORK) 383f86f4037SDarrick J. Wong return -EINVAL; 384f86f4037SDarrick J. Wong #endif 385f86f4037SDarrick J. Wong if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) 386f86f4037SDarrick J. Wong return -EINVAL; 387f86f4037SDarrick J. Wong 388abbf9e8aSChristoph Hellwig if (bmv->bmv_length < -1) 389abbf9e8aSChristoph Hellwig return -EINVAL; 390abbf9e8aSChristoph Hellwig bmv->bmv_entries = 0; 391abbf9e8aSChristoph Hellwig if (bmv->bmv_length == 0) 392abbf9e8aSChristoph Hellwig return 0; 393abbf9e8aSChristoph Hellwig 394f86f4037SDarrick J. Wong if (iflags & BMV_IF_ATTRFORK) 395f86f4037SDarrick J. Wong whichfork = XFS_ATTR_FORK; 396f86f4037SDarrick J. Wong else if (iflags & BMV_IF_COWFORK) 397f86f4037SDarrick J. Wong whichfork = XFS_COW_FORK; 398f86f4037SDarrick J. Wong else 399f86f4037SDarrick J. Wong whichfork = XFS_DATA_FORK; 400abbf9e8aSChristoph Hellwig ifp = XFS_IFORK_PTR(ip, whichfork); 40168988114SDave Chinner 40268988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED); 403f86f4037SDarrick J. Wong switch (whichfork) { 404abbf9e8aSChristoph Hellwig case XFS_ATTR_FORK: 405abbf9e8aSChristoph Hellwig if (!XFS_IFORK_Q(ip)) 406abbf9e8aSChristoph Hellwig goto out_unlock_iolock; 407abbf9e8aSChristoph Hellwig 408abbf9e8aSChristoph Hellwig max_len = 1LL << 32; 409abbf9e8aSChristoph Hellwig lock = xfs_ilock_attr_map_shared(ip); 410abbf9e8aSChristoph Hellwig break; 411abbf9e8aSChristoph Hellwig case XFS_COW_FORK: 412abbf9e8aSChristoph Hellwig /* No CoW fork? Just return */ 413abbf9e8aSChristoph Hellwig if (!ifp) 414abbf9e8aSChristoph Hellwig goto out_unlock_iolock; 415abbf9e8aSChristoph Hellwig 416abbf9e8aSChristoph Hellwig if (xfs_get_cowextsz_hint(ip)) 417abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes; 418abbf9e8aSChristoph Hellwig else 419abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip); 420abbf9e8aSChristoph Hellwig 421abbf9e8aSChristoph Hellwig lock = XFS_ILOCK_SHARED; 422abbf9e8aSChristoph Hellwig xfs_ilock(ip, lock); 423abbf9e8aSChristoph Hellwig break; 424f86f4037SDarrick J. Wong case XFS_DATA_FORK: 425efa70be1SChristoph Hellwig if (!(iflags & BMV_IF_DELALLOC) && 426efa70be1SChristoph Hellwig (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { 4272451337dSDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 42868988114SDave Chinner if (error) 42968988114SDave Chinner goto out_unlock_iolock; 430efa70be1SChristoph Hellwig 43168988114SDave Chinner /* 432efa70be1SChristoph Hellwig * Even after flushing the inode, there can still be 433efa70be1SChristoph Hellwig * delalloc blocks on the inode beyond EOF due to 434efa70be1SChristoph Hellwig * speculative preallocation. These are not removed 435efa70be1SChristoph Hellwig * until the release function is called or the inode 436efa70be1SChristoph Hellwig * is inactivated. Hence we cannot assert here that 437efa70be1SChristoph Hellwig * ip->i_delayed_blks == 0. 43868988114SDave Chinner */ 43968988114SDave Chinner } 44068988114SDave Chinner 441abbf9e8aSChristoph Hellwig if (xfs_get_extsz_hint(ip) || 442abbf9e8aSChristoph Hellwig (ip->i_d.di_flags & 443abbf9e8aSChristoph Hellwig (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) 444abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes; 445abbf9e8aSChristoph Hellwig else 446abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip); 447abbf9e8aSChristoph Hellwig 448309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 449f86f4037SDarrick J. Wong break; 450efa70be1SChristoph Hellwig } 45168988114SDave Chinner 452abbf9e8aSChristoph Hellwig switch (XFS_IFORK_FORMAT(ip, whichfork)) { 453abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_EXTENTS: 454abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_BTREE: 455abbf9e8aSChristoph Hellwig break; 456abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_LOCAL: 457abbf9e8aSChristoph Hellwig /* Local format inode forks report no extents. */ 45868988114SDave Chinner goto out_unlock_ilock; 459abbf9e8aSChristoph Hellwig default: 460abbf9e8aSChristoph Hellwig error = -EINVAL; 461abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 46268988114SDave Chinner } 46368988114SDave Chinner 464abbf9e8aSChristoph Hellwig if (bmv->bmv_length == -1) { 465abbf9e8aSChristoph Hellwig max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); 466abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); 467abbf9e8aSChristoph Hellwig } 468abbf9e8aSChristoph Hellwig 469abbf9e8aSChristoph Hellwig bmv_end = bmv->bmv_offset + bmv->bmv_length; 470abbf9e8aSChristoph Hellwig 471abbf9e8aSChristoph Hellwig first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); 472abbf9e8aSChristoph Hellwig len = XFS_BB_TO_FSB(mp, bmv->bmv_length); 473abbf9e8aSChristoph Hellwig 474abbf9e8aSChristoph Hellwig if (!(ifp->if_flags & XFS_IFEXTENTS)) { 475abbf9e8aSChristoph Hellwig error = xfs_iread_extents(NULL, ip, whichfork); 476abbf9e8aSChristoph Hellwig if (error) 477abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 478abbf9e8aSChristoph Hellwig } 479abbf9e8aSChristoph Hellwig 480b2b1712aSChristoph Hellwig if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 481abbf9e8aSChristoph Hellwig /* 482abbf9e8aSChristoph Hellwig * Report a whole-file hole if the delalloc flag is set to 483abbf9e8aSChristoph Hellwig * stay compatible with the old implementation. 484abbf9e8aSChristoph Hellwig */ 485abbf9e8aSChristoph Hellwig if (iflags & BMV_IF_DELALLOC) 486abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 487abbf9e8aSChristoph Hellwig XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 488abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 489abbf9e8aSChristoph Hellwig } 490abbf9e8aSChristoph Hellwig 491abbf9e8aSChristoph Hellwig while (!xfs_getbmap_full(bmv)) { 492abbf9e8aSChristoph Hellwig xfs_trim_extent(&got, first_bno, len); 493abbf9e8aSChristoph Hellwig 494abbf9e8aSChristoph Hellwig /* 495abbf9e8aSChristoph Hellwig * Report an entry for a hole if this extent doesn't directly 496abbf9e8aSChristoph Hellwig * follow the previous one. 497abbf9e8aSChristoph Hellwig */ 498abbf9e8aSChristoph Hellwig if (got.br_startoff > bno) { 499abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 500abbf9e8aSChristoph Hellwig got.br_startoff); 501abbf9e8aSChristoph Hellwig if (xfs_getbmap_full(bmv)) 502abbf9e8aSChristoph Hellwig break; 503abbf9e8aSChristoph Hellwig } 504abbf9e8aSChristoph Hellwig 505abbf9e8aSChristoph Hellwig /* 506abbf9e8aSChristoph Hellwig * In order to report shared extents accurately, we report each 507abbf9e8aSChristoph Hellwig * distinct shared / unshared part of a single bmbt record with 508abbf9e8aSChristoph Hellwig * an individual getbmapx record. 509abbf9e8aSChristoph Hellwig */ 510abbf9e8aSChristoph Hellwig bno = got.br_startoff + got.br_blockcount; 511abbf9e8aSChristoph Hellwig rec = got; 51268988114SDave Chinner do { 513abbf9e8aSChristoph Hellwig error = xfs_getbmap_report_one(ip, bmv, out, bmv_end, 514abbf9e8aSChristoph Hellwig &rec); 515abbf9e8aSChristoph Hellwig if (error || xfs_getbmap_full(bmv)) 516abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 517abbf9e8aSChristoph Hellwig } while (xfs_getbmap_next_rec(&rec, bno)); 51868988114SDave Chinner 519b2b1712aSChristoph Hellwig if (!xfs_iext_next_extent(ifp, &icur, &got)) { 520abbf9e8aSChristoph Hellwig xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 52168988114SDave Chinner 522abbf9e8aSChristoph Hellwig out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST; 52368988114SDave Chinner 524abbf9e8aSChristoph Hellwig if (whichfork != XFS_ATTR_FORK && bno < end && 525abbf9e8aSChristoph Hellwig !xfs_getbmap_full(bmv)) { 526abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, 527abbf9e8aSChristoph Hellwig bno, end); 528abbf9e8aSChristoph Hellwig } 529abbf9e8aSChristoph Hellwig break; 53068988114SDave Chinner } 53168988114SDave Chinner 532abbf9e8aSChristoph Hellwig if (bno >= first_bno + len) 533abbf9e8aSChristoph Hellwig break; 53468988114SDave Chinner } 53568988114SDave Chinner 53668988114SDave Chinner out_unlock_ilock: 53701f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 53868988114SDave Chinner out_unlock_iolock: 53968988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED); 54068988114SDave Chinner return error; 54168988114SDave Chinner } 54268988114SDave Chinner 54368988114SDave Chinner /* 544e2ac8363SChristoph Hellwig * Dead simple method of punching delalyed allocation blocks from a range in 545e2ac8363SChristoph Hellwig * the inode. This will always punch out both the start and end blocks, even 546e2ac8363SChristoph Hellwig * if the ranges only partially overlap them, so it is up to the caller to 547e2ac8363SChristoph Hellwig * ensure that partial blocks are not passed in. 54868988114SDave Chinner */ 54968988114SDave Chinner int 55068988114SDave Chinner xfs_bmap_punch_delalloc_range( 55168988114SDave Chinner struct xfs_inode *ip, 55268988114SDave Chinner xfs_fileoff_t start_fsb, 55368988114SDave Chinner xfs_fileoff_t length) 55468988114SDave Chinner { 555e2ac8363SChristoph Hellwig struct xfs_ifork *ifp = &ip->i_df; 556e2ac8363SChristoph Hellwig xfs_fileoff_t end_fsb = start_fsb + length; 557e2ac8363SChristoph Hellwig struct xfs_bmbt_irec got, del; 558e2ac8363SChristoph Hellwig struct xfs_iext_cursor icur; 55968988114SDave Chinner int error = 0; 56068988114SDave Chinner 5610065b541SChristoph Hellwig ASSERT(ifp->if_flags & XFS_IFEXTENTS); 56268988114SDave Chinner 5630065b541SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 564e2ac8363SChristoph Hellwig if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) 565d4380177SChristoph Hellwig goto out_unlock; 566e2ac8363SChristoph Hellwig 567e2ac8363SChristoph Hellwig while (got.br_startoff + got.br_blockcount > start_fsb) { 568e2ac8363SChristoph Hellwig del = got; 569e2ac8363SChristoph Hellwig xfs_trim_extent(&del, start_fsb, length); 570e2ac8363SChristoph Hellwig 571e2ac8363SChristoph Hellwig /* 572e2ac8363SChristoph Hellwig * A delete can push the cursor forward. Step back to the 573e2ac8363SChristoph Hellwig * previous extent on non-delalloc or extents outside the 574e2ac8363SChristoph Hellwig * target range. 575e2ac8363SChristoph Hellwig */ 576e2ac8363SChristoph Hellwig if (!del.br_blockcount || 577e2ac8363SChristoph Hellwig !isnullstartblock(del.br_startblock)) { 578e2ac8363SChristoph Hellwig if (!xfs_iext_prev_extent(ifp, &icur, &got)) 579e2ac8363SChristoph Hellwig break; 580e2ac8363SChristoph Hellwig continue; 581e2ac8363SChristoph Hellwig } 582e2ac8363SChristoph Hellwig 583e2ac8363SChristoph Hellwig error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, 584e2ac8363SChristoph Hellwig &got, &del); 585e2ac8363SChristoph Hellwig if (error || !xfs_iext_get_extent(ifp, &icur, &got)) 586e2ac8363SChristoph Hellwig break; 587e2ac8363SChristoph Hellwig } 58868988114SDave Chinner 589d4380177SChristoph Hellwig out_unlock: 590d4380177SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 59168988114SDave Chinner return error; 59268988114SDave Chinner } 593c24b5dfaSDave Chinner 594c24b5dfaSDave Chinner /* 595c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF 596c24b5dfaSDave Chinner * blocks. The 'force' parameter determines whether we should also consider 597c24b5dfaSDave Chinner * regular files that are marked preallocated or append-only. 598c24b5dfaSDave Chinner */ 599c24b5dfaSDave Chinner bool 600c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force) 601c24b5dfaSDave Chinner { 602c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */ 603c19b3b05SDave Chinner if (!S_ISREG(VFS_I(ip)->i_mode)) 604c24b5dfaSDave Chinner return false; 605c24b5dfaSDave Chinner 606c24b5dfaSDave Chinner /* 607c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not 608c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove. 609c24b5dfaSDave Chinner */ 610c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 && 6112667c6f9SDave Chinner VFS_I(ip)->i_mapping->nrpages == 0 && 612c24b5dfaSDave Chinner ip->i_delayed_blks == 0) 613c24b5dfaSDave Chinner return false; 614c24b5dfaSDave Chinner 615c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */ 616c24b5dfaSDave Chinner if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) 617c24b5dfaSDave Chinner return false; 618c24b5dfaSDave Chinner 619c24b5dfaSDave Chinner /* 620c24b5dfaSDave Chinner * Do not free real preallocated or append-only files unless the file 621c24b5dfaSDave Chinner * has delalloc blocks and we are forced to remove them. 622c24b5dfaSDave Chinner */ 623c24b5dfaSDave Chinner if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 624c24b5dfaSDave Chinner if (!force || ip->i_delayed_blks == 0) 625c24b5dfaSDave Chinner return false; 626c24b5dfaSDave Chinner 627c24b5dfaSDave Chinner return true; 628c24b5dfaSDave Chinner } 629c24b5dfaSDave Chinner 630c24b5dfaSDave Chinner /* 6313b4683c2SBrian Foster * This is called to free any blocks beyond eof. The caller must hold 6323b4683c2SBrian Foster * IOLOCK_EXCL unless we are in the inode reclaim path and have the only 6333b4683c2SBrian Foster * reference to the inode. 634c24b5dfaSDave Chinner */ 635c24b5dfaSDave Chinner int 636c24b5dfaSDave Chinner xfs_free_eofblocks( 637a36b9261SBrian Foster struct xfs_inode *ip) 638c24b5dfaSDave Chinner { 639a36b9261SBrian Foster struct xfs_trans *tp; 640c24b5dfaSDave Chinner int error; 641c24b5dfaSDave Chinner xfs_fileoff_t end_fsb; 642c24b5dfaSDave Chinner xfs_fileoff_t last_fsb; 643c24b5dfaSDave Chinner xfs_filblks_t map_len; 644c24b5dfaSDave Chinner int nimaps; 645a36b9261SBrian Foster struct xfs_bmbt_irec imap; 646a36b9261SBrian Foster struct xfs_mount *mp = ip->i_mount; 647a36b9261SBrian Foster 648c24b5dfaSDave Chinner /* 649c24b5dfaSDave Chinner * Figure out if there are any blocks beyond the end 650c24b5dfaSDave Chinner * of the file. If not, then there is nothing to do. 651c24b5dfaSDave Chinner */ 652c24b5dfaSDave Chinner end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 653c24b5dfaSDave Chinner last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 654c24b5dfaSDave Chinner if (last_fsb <= end_fsb) 655c24b5dfaSDave Chinner return 0; 656c24b5dfaSDave Chinner map_len = last_fsb - end_fsb; 657c24b5dfaSDave Chinner 658c24b5dfaSDave Chinner nimaps = 1; 659c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_SHARED); 660c24b5dfaSDave Chinner error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); 661c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 662c24b5dfaSDave Chinner 663a36b9261SBrian Foster /* 664a36b9261SBrian Foster * If there are blocks after the end of file, truncate the file to its 665a36b9261SBrian Foster * current size to free them up. 666a36b9261SBrian Foster */ 667c24b5dfaSDave Chinner if (!error && (nimaps != 0) && 668c24b5dfaSDave Chinner (imap.br_startblock != HOLESTARTBLOCK || 669c24b5dfaSDave Chinner ip->i_delayed_blks)) { 670c24b5dfaSDave Chinner /* 671c24b5dfaSDave Chinner * Attach the dquots to the inode up front. 672c24b5dfaSDave Chinner */ 673c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 674c24b5dfaSDave Chinner if (error) 675c24b5dfaSDave Chinner return error; 676c24b5dfaSDave Chinner 677e4229d6bSBrian Foster /* wait on dio to ensure i_size has settled */ 678e4229d6bSBrian Foster inode_dio_wait(VFS_I(ip)); 679e4229d6bSBrian Foster 680253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, 681253f4911SChristoph Hellwig &tp); 682c24b5dfaSDave Chinner if (error) { 683c24b5dfaSDave Chinner ASSERT(XFS_FORCED_SHUTDOWN(mp)); 684c24b5dfaSDave Chinner return error; 685c24b5dfaSDave Chinner } 686c24b5dfaSDave Chinner 687c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 688c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 689c24b5dfaSDave Chinner 690c24b5dfaSDave Chinner /* 691c24b5dfaSDave Chinner * Do not update the on-disk file size. If we update the 692c24b5dfaSDave Chinner * on-disk file size and then the system crashes before the 693c24b5dfaSDave Chinner * contents of the file are flushed to disk then the files 694c24b5dfaSDave Chinner * may be full of holes (ie NULL files bug). 695c24b5dfaSDave Chinner */ 6964e529339SBrian Foster error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK, 6974e529339SBrian Foster XFS_ISIZE(ip), XFS_BMAPI_NODISCARD); 698c24b5dfaSDave Chinner if (error) { 699c24b5dfaSDave Chinner /* 700c24b5dfaSDave Chinner * If we get an error at this point we simply don't 701c24b5dfaSDave Chinner * bother truncating the file. 702c24b5dfaSDave Chinner */ 7034906e215SChristoph Hellwig xfs_trans_cancel(tp); 704c24b5dfaSDave Chinner } else { 70570393313SChristoph Hellwig error = xfs_trans_commit(tp); 706c24b5dfaSDave Chinner if (!error) 707c24b5dfaSDave Chinner xfs_inode_clear_eofblocks_tag(ip); 708c24b5dfaSDave Chinner } 709c24b5dfaSDave Chinner 710c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 711c24b5dfaSDave Chinner } 712c24b5dfaSDave Chinner return error; 713c24b5dfaSDave Chinner } 714c24b5dfaSDave Chinner 71583aee9e4SChristoph Hellwig int 716c24b5dfaSDave Chinner xfs_alloc_file_space( 71783aee9e4SChristoph Hellwig struct xfs_inode *ip, 718c24b5dfaSDave Chinner xfs_off_t offset, 719c24b5dfaSDave Chinner xfs_off_t len, 7205f8aca8bSChristoph Hellwig int alloc_type) 721c24b5dfaSDave Chinner { 722c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 723c24b5dfaSDave Chinner xfs_off_t count; 724c24b5dfaSDave Chinner xfs_filblks_t allocated_fsb; 725c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb; 726c24b5dfaSDave Chinner xfs_extlen_t extsz, temp; 727c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 728e093c4beSMax Reitz xfs_fileoff_t endoffset_fsb; 729c24b5dfaSDave Chinner int nimaps; 730c24b5dfaSDave Chinner int quota_flag; 731c24b5dfaSDave Chinner int rt; 732c24b5dfaSDave Chinner xfs_trans_t *tp; 733c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp; 734c24b5dfaSDave Chinner uint qblocks, resblks, resrtextents; 735c24b5dfaSDave Chinner int error; 736c24b5dfaSDave Chinner 737c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip); 738c24b5dfaSDave Chinner 739c24b5dfaSDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) 7402451337dSDave Chinner return -EIO; 741c24b5dfaSDave Chinner 742c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 743c24b5dfaSDave Chinner if (error) 744c24b5dfaSDave Chinner return error; 745c24b5dfaSDave Chinner 746c24b5dfaSDave Chinner if (len <= 0) 7472451337dSDave Chinner return -EINVAL; 748c24b5dfaSDave Chinner 749c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 750c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip); 751c24b5dfaSDave Chinner 752c24b5dfaSDave Chinner count = len; 753c24b5dfaSDave Chinner imapp = &imaps[0]; 754c24b5dfaSDave Chinner nimaps = 1; 755c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 756e093c4beSMax Reitz endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); 757e093c4beSMax Reitz allocatesize_fsb = endoffset_fsb - startoffset_fsb; 758c24b5dfaSDave Chinner 759c24b5dfaSDave Chinner /* 760c24b5dfaSDave Chinner * Allocate file space until done or until there is an error 761c24b5dfaSDave Chinner */ 762c24b5dfaSDave Chinner while (allocatesize_fsb && !error) { 763c24b5dfaSDave Chinner xfs_fileoff_t s, e; 764c24b5dfaSDave Chinner 765c24b5dfaSDave Chinner /* 766c24b5dfaSDave Chinner * Determine space reservations for data/realtime. 767c24b5dfaSDave Chinner */ 768c24b5dfaSDave Chinner if (unlikely(extsz)) { 769c24b5dfaSDave Chinner s = startoffset_fsb; 770c24b5dfaSDave Chinner do_div(s, extsz); 771c24b5dfaSDave Chinner s *= extsz; 772c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb; 7730703a8e1SDave Chinner div_u64_rem(startoffset_fsb, extsz, &temp); 7740703a8e1SDave Chinner if (temp) 775c24b5dfaSDave Chinner e += temp; 7760703a8e1SDave Chinner div_u64_rem(e, extsz, &temp); 7770703a8e1SDave Chinner if (temp) 778c24b5dfaSDave Chinner e += extsz - temp; 779c24b5dfaSDave Chinner } else { 780c24b5dfaSDave Chinner s = 0; 781c24b5dfaSDave Chinner e = allocatesize_fsb; 782c24b5dfaSDave Chinner } 783c24b5dfaSDave Chinner 784c24b5dfaSDave Chinner /* 785c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block 786c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are 787c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate 788c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk 789c24b5dfaSDave Chinner * to MAXEXTLEN (21 bits), so use that to enforce the limit. 790c24b5dfaSDave Chinner */ 791c24b5dfaSDave Chinner resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); 792c24b5dfaSDave Chinner if (unlikely(rt)) { 793c24b5dfaSDave Chinner resrtextents = qblocks = resblks; 794c24b5dfaSDave Chinner resrtextents /= mp->m_sb.sb_rextsize; 795c24b5dfaSDave Chinner resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 796c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_RTBLKS; 797c24b5dfaSDave Chinner } else { 798c24b5dfaSDave Chinner resrtextents = 0; 799c24b5dfaSDave Chinner resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 800c24b5dfaSDave Chinner quota_flag = XFS_QMOPT_RES_REGBLKS; 801c24b5dfaSDave Chinner } 802c24b5dfaSDave Chinner 803c24b5dfaSDave Chinner /* 804c24b5dfaSDave Chinner * Allocate and setup the transaction. 805c24b5dfaSDave Chinner */ 806253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 807253f4911SChristoph Hellwig resrtextents, 0, &tp); 808253f4911SChristoph Hellwig 809c24b5dfaSDave Chinner /* 810c24b5dfaSDave Chinner * Check for running out of space 811c24b5dfaSDave Chinner */ 812c24b5dfaSDave Chinner if (error) { 813c24b5dfaSDave Chinner /* 814c24b5dfaSDave Chinner * Free the transaction structure. 815c24b5dfaSDave Chinner */ 8162451337dSDave Chinner ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 817c24b5dfaSDave Chinner break; 818c24b5dfaSDave Chinner } 819c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 820c24b5dfaSDave Chinner error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 821c24b5dfaSDave Chinner 0, quota_flag); 822c24b5dfaSDave Chinner if (error) 823c24b5dfaSDave Chinner goto error1; 824c24b5dfaSDave Chinner 825c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 826c24b5dfaSDave Chinner 827c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb, 828da781e64SBrian Foster allocatesize_fsb, alloc_type, 0, imapp, 829da781e64SBrian Foster &nimaps); 830f6106efaSEric Sandeen if (error) 831c24b5dfaSDave Chinner goto error0; 832c24b5dfaSDave Chinner 833c24b5dfaSDave Chinner /* 834c24b5dfaSDave Chinner * Complete the transaction 835c24b5dfaSDave Chinner */ 83670393313SChristoph Hellwig error = xfs_trans_commit(tp); 837c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 838f6106efaSEric Sandeen if (error) 839c24b5dfaSDave Chinner break; 840c24b5dfaSDave Chinner 841c24b5dfaSDave Chinner allocated_fsb = imapp->br_blockcount; 842c24b5dfaSDave Chinner 843c24b5dfaSDave Chinner if (nimaps == 0) { 8442451337dSDave Chinner error = -ENOSPC; 845c24b5dfaSDave Chinner break; 846c24b5dfaSDave Chinner } 847c24b5dfaSDave Chinner 848c24b5dfaSDave Chinner startoffset_fsb += allocated_fsb; 849c24b5dfaSDave Chinner allocatesize_fsb -= allocated_fsb; 850c24b5dfaSDave Chinner } 851c24b5dfaSDave Chinner 852c24b5dfaSDave Chinner return error; 853c24b5dfaSDave Chinner 854c8eac49eSBrian Foster error0: /* unlock inode, unreserve quota blocks, cancel trans */ 855c24b5dfaSDave Chinner xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 856c24b5dfaSDave Chinner 857c24b5dfaSDave Chinner error1: /* Just cancel transaction */ 8584906e215SChristoph Hellwig xfs_trans_cancel(tp); 859c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 860c24b5dfaSDave Chinner return error; 861c24b5dfaSDave Chinner } 862c24b5dfaSDave Chinner 863bdb0d04fSChristoph Hellwig static int 864bdb0d04fSChristoph Hellwig xfs_unmap_extent( 86583aee9e4SChristoph Hellwig struct xfs_inode *ip, 866bdb0d04fSChristoph Hellwig xfs_fileoff_t startoffset_fsb, 867bdb0d04fSChristoph Hellwig xfs_filblks_t len_fsb, 868bdb0d04fSChristoph Hellwig int *done) 869c24b5dfaSDave Chinner { 870bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 871bdb0d04fSChristoph Hellwig struct xfs_trans *tp; 872bdb0d04fSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 873bdb0d04fSChristoph Hellwig int error; 874c24b5dfaSDave Chinner 875bdb0d04fSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 876bdb0d04fSChristoph Hellwig if (error) { 877bdb0d04fSChristoph Hellwig ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 878bdb0d04fSChristoph Hellwig return error; 879bdb0d04fSChristoph Hellwig } 880c24b5dfaSDave Chinner 881bdb0d04fSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 882bdb0d04fSChristoph Hellwig error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, 883bdb0d04fSChristoph Hellwig ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); 884bdb0d04fSChristoph Hellwig if (error) 885bdb0d04fSChristoph Hellwig goto out_trans_cancel; 886c24b5dfaSDave Chinner 887bdb0d04fSChristoph Hellwig xfs_trans_ijoin(tp, ip, 0); 888c24b5dfaSDave Chinner 8892af52842SBrian Foster error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done); 890bdb0d04fSChristoph Hellwig if (error) 891c8eac49eSBrian Foster goto out_trans_cancel; 892bdb0d04fSChristoph Hellwig 893bdb0d04fSChristoph Hellwig error = xfs_trans_commit(tp); 894bdb0d04fSChristoph Hellwig out_unlock: 895bdb0d04fSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 896bdb0d04fSChristoph Hellwig return error; 897bdb0d04fSChristoph Hellwig 898bdb0d04fSChristoph Hellwig out_trans_cancel: 899bdb0d04fSChristoph Hellwig xfs_trans_cancel(tp); 900bdb0d04fSChristoph Hellwig goto out_unlock; 901bdb0d04fSChristoph Hellwig } 902bdb0d04fSChristoph Hellwig 903249bd908SDave Chinner /* Caller must first wait for the completion of any pending DIOs if required. */ 9042c307174SDave Chinner int 905bdb0d04fSChristoph Hellwig xfs_flush_unmap_range( 906bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 907bdb0d04fSChristoph Hellwig xfs_off_t offset, 908bdb0d04fSChristoph Hellwig xfs_off_t len) 909bdb0d04fSChristoph Hellwig { 910bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 911bdb0d04fSChristoph Hellwig struct inode *inode = VFS_I(ip); 912bdb0d04fSChristoph Hellwig xfs_off_t rounding, start, end; 913bdb0d04fSChristoph Hellwig int error; 914bdb0d04fSChristoph Hellwig 915bdb0d04fSChristoph Hellwig rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); 916bdb0d04fSChristoph Hellwig start = round_down(offset, rounding); 917bdb0d04fSChristoph Hellwig end = round_up(offset + len, rounding) - 1; 918bdb0d04fSChristoph Hellwig 919bdb0d04fSChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end); 920c24b5dfaSDave Chinner if (error) 921c24b5dfaSDave Chinner return error; 922bdb0d04fSChristoph Hellwig truncate_pagecache_range(inode, start, end); 923bdb0d04fSChristoph Hellwig return 0; 924c24b5dfaSDave Chinner } 925c24b5dfaSDave Chinner 926c24b5dfaSDave Chinner int 927c24b5dfaSDave Chinner xfs_free_file_space( 928c24b5dfaSDave Chinner struct xfs_inode *ip, 929c24b5dfaSDave Chinner xfs_off_t offset, 930c24b5dfaSDave Chinner xfs_off_t len) 931c24b5dfaSDave Chinner { 932bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 933c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 934bdb0d04fSChristoph Hellwig xfs_fileoff_t endoffset_fsb; 9353c2bdc91SChristoph Hellwig int done = 0, error; 936c24b5dfaSDave Chinner 937c24b5dfaSDave Chinner trace_xfs_free_file_space(ip); 938c24b5dfaSDave Chinner 939c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 940c24b5dfaSDave Chinner if (error) 941c24b5dfaSDave Chinner return error; 942c24b5dfaSDave Chinner 943c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */ 944bdb0d04fSChristoph Hellwig return 0; 945bdb0d04fSChristoph Hellwig 946c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset); 947c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 948c24b5dfaSDave Chinner 949bdb0d04fSChristoph Hellwig /* 950daa79baeSChristoph Hellwig * Need to zero the stuff we're not freeing, on disk. 951bdb0d04fSChristoph Hellwig */ 9523c2bdc91SChristoph Hellwig if (endoffset_fsb > startoffset_fsb) { 9533c2bdc91SChristoph Hellwig while (!done) { 954bdb0d04fSChristoph Hellwig error = xfs_unmap_extent(ip, startoffset_fsb, 955bdb0d04fSChristoph Hellwig endoffset_fsb - startoffset_fsb, &done); 9563c2bdc91SChristoph Hellwig if (error) 9573c2bdc91SChristoph Hellwig return error; 9583c2bdc91SChristoph Hellwig } 959c24b5dfaSDave Chinner } 960c24b5dfaSDave Chinner 9613c2bdc91SChristoph Hellwig /* 9623c2bdc91SChristoph Hellwig * Now that we've unmap all full blocks we'll have to zero out any 963f5c54717SChristoph Hellwig * partial block at the beginning and/or end. iomap_zero_range is smart 964f5c54717SChristoph Hellwig * enough to skip any holes, including those we just created, but we 965f5c54717SChristoph Hellwig * must take care not to zero beyond EOF and enlarge i_size. 9663c2bdc91SChristoph Hellwig */ 9673dd09d5aSCalvin Owens if (offset >= XFS_ISIZE(ip)) 9683dd09d5aSCalvin Owens return 0; 9693dd09d5aSCalvin Owens if (offset + len > XFS_ISIZE(ip)) 9703dd09d5aSCalvin Owens len = XFS_ISIZE(ip) - offset; 971f150b423SChristoph Hellwig error = iomap_zero_range(VFS_I(ip), offset, len, NULL, 972f150b423SChristoph Hellwig &xfs_buffered_write_iomap_ops); 973e53c4b59SDarrick J. Wong if (error) 974e53c4b59SDarrick J. Wong return error; 975e53c4b59SDarrick J. Wong 976e53c4b59SDarrick J. Wong /* 977e53c4b59SDarrick J. Wong * If we zeroed right up to EOF and EOF straddles a page boundary we 978e53c4b59SDarrick J. Wong * must make sure that the post-EOF area is also zeroed because the 979e53c4b59SDarrick J. Wong * page could be mmap'd and iomap_zero_range doesn't do that for us. 980e53c4b59SDarrick J. Wong * Writeback of the eof page will do this, albeit clumsily. 981e53c4b59SDarrick J. Wong */ 982a579121fSDarrick J. Wong if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) { 983e53c4b59SDarrick J. Wong error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 984a579121fSDarrick J. Wong round_down(offset + len, PAGE_SIZE), LLONG_MAX); 985e53c4b59SDarrick J. Wong } 986e53c4b59SDarrick J. Wong 987e53c4b59SDarrick J. Wong return error; 988c24b5dfaSDave Chinner } 989c24b5dfaSDave Chinner 99072c1a739Skbuild test robot static int 9914ed36c6bSChristoph Hellwig xfs_prepare_shift( 992e1d8fb88SNamjae Jeon struct xfs_inode *ip, 9934ed36c6bSChristoph Hellwig loff_t offset) 994e1d8fb88SNamjae Jeon { 995e1d8fb88SNamjae Jeon int error; 996f71721d0SBrian Foster 997f71721d0SBrian Foster /* 998f71721d0SBrian Foster * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 999f71721d0SBrian Foster * into the accessible region of the file. 1000f71721d0SBrian Foster */ 100141b9d726SBrian Foster if (xfs_can_free_eofblocks(ip, true)) { 1002a36b9261SBrian Foster error = xfs_free_eofblocks(ip); 100341b9d726SBrian Foster if (error) 100441b9d726SBrian Foster return error; 100541b9d726SBrian Foster } 10061669a8caSDave Chinner 1007f71721d0SBrian Foster /* 1008f71721d0SBrian Foster * Writeback and invalidate cache for the remainder of the file as we're 1009a904b1caSNamjae Jeon * about to shift down every extent from offset to EOF. 1010f71721d0SBrian Foster */ 10117f9f71beSDave Chinner error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip)); 10121749d1eaSBrian Foster if (error) 10131749d1eaSBrian Foster return error; 1014e1d8fb88SNamjae Jeon 1015a904b1caSNamjae Jeon /* 10163af423b0SDarrick J. Wong * Clean out anything hanging around in the cow fork now that 10173af423b0SDarrick J. Wong * we've flushed all the dirty data out to disk to avoid having 10183af423b0SDarrick J. Wong * CoW extents at the wrong offsets. 10193af423b0SDarrick J. Wong */ 102051d62690SChristoph Hellwig if (xfs_inode_has_cow_data(ip)) { 10213af423b0SDarrick J. Wong error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, 10223af423b0SDarrick J. Wong true); 10233af423b0SDarrick J. Wong if (error) 10243af423b0SDarrick J. Wong return error; 10253af423b0SDarrick J. Wong } 10263af423b0SDarrick J. Wong 10274ed36c6bSChristoph Hellwig return 0; 1028e1d8fb88SNamjae Jeon } 1029e1d8fb88SNamjae Jeon 1030e1d8fb88SNamjae Jeon /* 1031a904b1caSNamjae Jeon * xfs_collapse_file_space() 1032a904b1caSNamjae Jeon * This routine frees disk space and shift extent for the given file. 1033a904b1caSNamjae Jeon * The first thing we do is to free data blocks in the specified range 1034a904b1caSNamjae Jeon * by calling xfs_free_file_space(). It would also sync dirty data 1035a904b1caSNamjae Jeon * and invalidate page cache over the region on which collapse range 1036a904b1caSNamjae Jeon * is working. And Shift extent records to the left to cover a hole. 1037a904b1caSNamjae Jeon * RETURNS: 1038a904b1caSNamjae Jeon * 0 on success 1039a904b1caSNamjae Jeon * errno on error 1040a904b1caSNamjae Jeon * 1041a904b1caSNamjae Jeon */ 1042a904b1caSNamjae Jeon int 1043a904b1caSNamjae Jeon xfs_collapse_file_space( 1044a904b1caSNamjae Jeon struct xfs_inode *ip, 1045a904b1caSNamjae Jeon xfs_off_t offset, 1046a904b1caSNamjae Jeon xfs_off_t len) 1047a904b1caSNamjae Jeon { 10484ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 10494ed36c6bSChristoph Hellwig struct xfs_trans *tp; 1050a904b1caSNamjae Jeon int error; 10514ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); 10524ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 10534ed36c6bSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1054ecfea3f0SChristoph Hellwig bool done = false; 1055a904b1caSNamjae Jeon 1056a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 10579ad1a23aSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); 10589ad1a23aSChristoph Hellwig 1059a904b1caSNamjae Jeon trace_xfs_collapse_file_space(ip); 1060a904b1caSNamjae Jeon 1061a904b1caSNamjae Jeon error = xfs_free_file_space(ip, offset, len); 1062a904b1caSNamjae Jeon if (error) 1063a904b1caSNamjae Jeon return error; 1064a904b1caSNamjae Jeon 10654ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset); 10664ed36c6bSChristoph Hellwig if (error) 10674ed36c6bSChristoph Hellwig return error; 10684ed36c6bSChristoph Hellwig 10694ed36c6bSChristoph Hellwig while (!error && !done) { 10704ed36c6bSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, 10714ed36c6bSChristoph Hellwig &tp); 10724ed36c6bSChristoph Hellwig if (error) 10734ed36c6bSChristoph Hellwig break; 10744ed36c6bSChristoph Hellwig 10754ed36c6bSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 10764ed36c6bSChristoph Hellwig error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 10774ed36c6bSChristoph Hellwig ip->i_gdquot, ip->i_pdquot, resblks, 0, 10784ed36c6bSChristoph Hellwig XFS_QMOPT_RES_REGBLKS); 10794ed36c6bSChristoph Hellwig if (error) 10804ed36c6bSChristoph Hellwig goto out_trans_cancel; 10814ed36c6bSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 10824ed36c6bSChristoph Hellwig 1083ecfea3f0SChristoph Hellwig error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, 1084333f950cSBrian Foster &done); 10854ed36c6bSChristoph Hellwig if (error) 1086c8eac49eSBrian Foster goto out_trans_cancel; 10874ed36c6bSChristoph Hellwig 10884ed36c6bSChristoph Hellwig error = xfs_trans_commit(tp); 10894ed36c6bSChristoph Hellwig } 10904ed36c6bSChristoph Hellwig 10914ed36c6bSChristoph Hellwig return error; 10924ed36c6bSChristoph Hellwig 10934ed36c6bSChristoph Hellwig out_trans_cancel: 10944ed36c6bSChristoph Hellwig xfs_trans_cancel(tp); 10954ed36c6bSChristoph Hellwig return error; 1096a904b1caSNamjae Jeon } 1097a904b1caSNamjae Jeon 1098a904b1caSNamjae Jeon /* 1099a904b1caSNamjae Jeon * xfs_insert_file_space() 1100a904b1caSNamjae Jeon * This routine create hole space by shifting extents for the given file. 1101a904b1caSNamjae Jeon * The first thing we do is to sync dirty data and invalidate page cache 1102a904b1caSNamjae Jeon * over the region on which insert range is working. And split an extent 1103a904b1caSNamjae Jeon * to two extents at given offset by calling xfs_bmap_split_extent. 1104a904b1caSNamjae Jeon * And shift all extent records which are laying between [offset, 1105a904b1caSNamjae Jeon * last allocated extent] to the right to reserve hole range. 1106a904b1caSNamjae Jeon * RETURNS: 1107a904b1caSNamjae Jeon * 0 on success 1108a904b1caSNamjae Jeon * errno on error 1109a904b1caSNamjae Jeon */ 1110a904b1caSNamjae Jeon int 1111a904b1caSNamjae Jeon xfs_insert_file_space( 1112a904b1caSNamjae Jeon struct xfs_inode *ip, 1113a904b1caSNamjae Jeon loff_t offset, 1114a904b1caSNamjae Jeon loff_t len) 1115a904b1caSNamjae Jeon { 11164ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 11174ed36c6bSChristoph Hellwig struct xfs_trans *tp; 11184ed36c6bSChristoph Hellwig int error; 11194ed36c6bSChristoph Hellwig xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, offset); 11204ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = NULLFSBLOCK; 11214ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 1122ecfea3f0SChristoph Hellwig bool done = false; 11234ed36c6bSChristoph Hellwig 1124a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 11259ad1a23aSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); 11269ad1a23aSChristoph Hellwig 1127a904b1caSNamjae Jeon trace_xfs_insert_file_space(ip); 1128a904b1caSNamjae Jeon 1129f62cb48eSDarrick J. Wong error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb); 1130f62cb48eSDarrick J. Wong if (error) 1131f62cb48eSDarrick J. Wong return error; 1132f62cb48eSDarrick J. Wong 11334ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset); 11344ed36c6bSChristoph Hellwig if (error) 11354ed36c6bSChristoph Hellwig return error; 11364ed36c6bSChristoph Hellwig 11374ed36c6bSChristoph Hellwig /* 11384ed36c6bSChristoph Hellwig * The extent shifting code works on extent granularity. So, if stop_fsb 11394ed36c6bSChristoph Hellwig * is not the starting block of extent, we need to split the extent at 11404ed36c6bSChristoph Hellwig * stop_fsb. 11414ed36c6bSChristoph Hellwig */ 11424ed36c6bSChristoph Hellwig error = xfs_bmap_split_extent(ip, stop_fsb); 11434ed36c6bSChristoph Hellwig if (error) 11444ed36c6bSChristoph Hellwig return error; 11454ed36c6bSChristoph Hellwig 11464ed36c6bSChristoph Hellwig while (!error && !done) { 11474ed36c6bSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, 11484ed36c6bSChristoph Hellwig &tp); 11494ed36c6bSChristoph Hellwig if (error) 11504ed36c6bSChristoph Hellwig break; 11514ed36c6bSChristoph Hellwig 11524ed36c6bSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 11534ed36c6bSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1154ecfea3f0SChristoph Hellwig error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, 1155333f950cSBrian Foster &done, stop_fsb); 11564ed36c6bSChristoph Hellwig if (error) 1157c8eac49eSBrian Foster goto out_trans_cancel; 11584ed36c6bSChristoph Hellwig 11594ed36c6bSChristoph Hellwig error = xfs_trans_commit(tp); 11604ed36c6bSChristoph Hellwig } 11614ed36c6bSChristoph Hellwig 11624ed36c6bSChristoph Hellwig return error; 11634ed36c6bSChristoph Hellwig 1164c8eac49eSBrian Foster out_trans_cancel: 11654ed36c6bSChristoph Hellwig xfs_trans_cancel(tp); 11664ed36c6bSChristoph Hellwig return error; 1167a904b1caSNamjae Jeon } 1168a904b1caSNamjae Jeon 1169a904b1caSNamjae Jeon /* 1170a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is 1171a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with 1172a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1173a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get 1174a133d952SDave Chinner * invalid formats on the target inode. 1175a133d952SDave Chinner * 1176a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has 1177a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a 1178a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just 1179a133d952SDave Chinner * blindly swap data forks on attr2 filesystems. 1180a133d952SDave Chinner * 1181a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with 1182a133d952SDave Chinner * a corrupt temporary inode, either. 1183a133d952SDave Chinner * 1184a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source 1185a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is 1186a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on 1187a133d952SDave Chinner * userspace to get this right. 1188a133d952SDave Chinner */ 1189a133d952SDave Chinner static int 1190a133d952SDave Chinner xfs_swap_extents_check_format( 1191e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1192e06259aaSDarrick J. Wong struct xfs_inode *tip) /* tmp inode */ 1193a133d952SDave Chinner { 1194a133d952SDave Chinner 1195a133d952SDave Chinner /* Should never get a local format */ 1196a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || 1197a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) 11982451337dSDave Chinner return -EINVAL; 1199a133d952SDave Chinner 1200a133d952SDave Chinner /* 1201a133d952SDave Chinner * if the target inode has less extents that then temporary inode then 1202a133d952SDave Chinner * why did userspace call us? 1203a133d952SDave Chinner */ 1204a133d952SDave Chinner if (ip->i_d.di_nextents < tip->i_d.di_nextents) 12052451337dSDave Chinner return -EINVAL; 1206a133d952SDave Chinner 1207a133d952SDave Chinner /* 12081f08af52SDarrick J. Wong * If we have to use the (expensive) rmap swap method, we can 12091f08af52SDarrick J. Wong * handle any number of extents and any format. 12101f08af52SDarrick J. Wong */ 12111f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb)) 12121f08af52SDarrick J. Wong return 0; 12131f08af52SDarrick J. Wong 12141f08af52SDarrick J. Wong /* 1215a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree 1216a133d952SDave Chinner * form then we will end up with the target inode in the wrong format 1217a133d952SDave Chinner * as we already know there are less extents in the temp inode. 1218a133d952SDave Chinner */ 1219a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1220a133d952SDave Chinner tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 12212451337dSDave Chinner return -EINVAL; 1222a133d952SDave Chinner 1223a133d952SDave Chinner /* Check temp in extent form to max in target */ 1224a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1225a133d952SDave Chinner XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > 1226a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 12272451337dSDave Chinner return -EINVAL; 1228a133d952SDave Chinner 1229a133d952SDave Chinner /* Check target in extent form to max in temp */ 1230a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1231a133d952SDave Chinner XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > 1232a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 12332451337dSDave Chinner return -EINVAL; 1234a133d952SDave Chinner 1235a133d952SDave Chinner /* 1236a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit 1237a133d952SDave Chinner * in the target and that it has enough extents to be in btree format 1238a133d952SDave Chinner * in the target. 1239a133d952SDave Chinner * 1240a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions 1241a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in 1242a133d952SDave Chinner * extent format... 1243a133d952SDave Chinner */ 1244a133d952SDave Chinner if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 12450cbe48ccSArnd Bergmann if (XFS_IFORK_Q(ip) && 1246a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip)) 12472451337dSDave Chinner return -EINVAL; 1248a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= 1249a133d952SDave Chinner XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 12502451337dSDave Chinner return -EINVAL; 1251a133d952SDave Chinner } 1252a133d952SDave Chinner 1253a133d952SDave Chinner /* Reciprocal target->temp btree format checks */ 1254a133d952SDave Chinner if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { 12550cbe48ccSArnd Bergmann if (XFS_IFORK_Q(tip) && 1256a133d952SDave Chinner XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip)) 12572451337dSDave Chinner return -EINVAL; 1258a133d952SDave Chinner if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= 1259a133d952SDave Chinner XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 12602451337dSDave Chinner return -EINVAL; 1261a133d952SDave Chinner } 1262a133d952SDave Chinner 1263a133d952SDave Chinner return 0; 1264a133d952SDave Chinner } 1265a133d952SDave Chinner 12667abbb8f9SDave Chinner static int 12674ef897a2SDave Chinner xfs_swap_extent_flush( 12684ef897a2SDave Chinner struct xfs_inode *ip) 12694ef897a2SDave Chinner { 12704ef897a2SDave Chinner int error; 12714ef897a2SDave Chinner 12724ef897a2SDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 12734ef897a2SDave Chinner if (error) 12744ef897a2SDave Chinner return error; 12754ef897a2SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1); 12764ef897a2SDave Chinner 12774ef897a2SDave Chinner /* Verify O_DIRECT for ftmp */ 12784ef897a2SDave Chinner if (VFS_I(ip)->i_mapping->nrpages) 12794ef897a2SDave Chinner return -EINVAL; 12804ef897a2SDave Chinner return 0; 12814ef897a2SDave Chinner } 12824ef897a2SDave Chinner 12831f08af52SDarrick J. Wong /* 12841f08af52SDarrick J. Wong * Move extents from one file to another, when rmap is enabled. 12851f08af52SDarrick J. Wong */ 12861f08af52SDarrick J. Wong STATIC int 12871f08af52SDarrick J. Wong xfs_swap_extent_rmap( 12881f08af52SDarrick J. Wong struct xfs_trans **tpp, 12891f08af52SDarrick J. Wong struct xfs_inode *ip, 12901f08af52SDarrick J. Wong struct xfs_inode *tip) 12911f08af52SDarrick J. Wong { 12927a7943c7SBrian Foster struct xfs_trans *tp = *tpp; 12931f08af52SDarrick J. Wong struct xfs_bmbt_irec irec; 12941f08af52SDarrick J. Wong struct xfs_bmbt_irec uirec; 12951f08af52SDarrick J. Wong struct xfs_bmbt_irec tirec; 12961f08af52SDarrick J. Wong xfs_fileoff_t offset_fsb; 12971f08af52SDarrick J. Wong xfs_fileoff_t end_fsb; 12981f08af52SDarrick J. Wong xfs_filblks_t count_fsb; 12991f08af52SDarrick J. Wong int error; 13001f08af52SDarrick J. Wong xfs_filblks_t ilen; 13011f08af52SDarrick J. Wong xfs_filblks_t rlen; 13021f08af52SDarrick J. Wong int nimaps; 1303c8ce540dSDarrick J. Wong uint64_t tip_flags2; 13041f08af52SDarrick J. Wong 13051f08af52SDarrick J. Wong /* 13061f08af52SDarrick J. Wong * If the source file has shared blocks, we must flag the donor 13071f08af52SDarrick J. Wong * file as having shared blocks so that we get the shared-block 13081f08af52SDarrick J. Wong * rmap functions when we go to fix up the rmaps. The flags 13091f08af52SDarrick J. Wong * will be switch for reals later. 13101f08af52SDarrick J. Wong */ 13111f08af52SDarrick J. Wong tip_flags2 = tip->i_d.di_flags2; 13121f08af52SDarrick J. Wong if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) 13131f08af52SDarrick J. Wong tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK; 13141f08af52SDarrick J. Wong 13151f08af52SDarrick J. Wong offset_fsb = 0; 13161f08af52SDarrick J. Wong end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip))); 13171f08af52SDarrick J. Wong count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); 13181f08af52SDarrick J. Wong 13191f08af52SDarrick J. Wong while (count_fsb) { 13201f08af52SDarrick J. Wong /* Read extent from the donor file */ 13211f08af52SDarrick J. Wong nimaps = 1; 13221f08af52SDarrick J. Wong error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec, 13231f08af52SDarrick J. Wong &nimaps, 0); 13241f08af52SDarrick J. Wong if (error) 13251f08af52SDarrick J. Wong goto out; 13261f08af52SDarrick J. Wong ASSERT(nimaps == 1); 13271f08af52SDarrick J. Wong ASSERT(tirec.br_startblock != DELAYSTARTBLOCK); 13281f08af52SDarrick J. Wong 13291f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap(tip, &tirec); 13301f08af52SDarrick J. Wong ilen = tirec.br_blockcount; 13311f08af52SDarrick J. Wong 13321f08af52SDarrick J. Wong /* Unmap the old blocks in the source file. */ 13331f08af52SDarrick J. Wong while (tirec.br_blockcount) { 1334c8eac49eSBrian Foster ASSERT(tp->t_firstblock == NULLFSBLOCK); 13351f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); 13361f08af52SDarrick J. Wong 13371f08af52SDarrick J. Wong /* Read extent from the source file */ 13381f08af52SDarrick J. Wong nimaps = 1; 13391f08af52SDarrick J. Wong error = xfs_bmapi_read(ip, tirec.br_startoff, 13401f08af52SDarrick J. Wong tirec.br_blockcount, &irec, 13411f08af52SDarrick J. Wong &nimaps, 0); 13421f08af52SDarrick J. Wong if (error) 1343d5a2e289SBrian Foster goto out; 13441f08af52SDarrick J. Wong ASSERT(nimaps == 1); 13451f08af52SDarrick J. Wong ASSERT(tirec.br_startoff == irec.br_startoff); 13461f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); 13471f08af52SDarrick J. Wong 13481f08af52SDarrick J. Wong /* Trim the extent. */ 13491f08af52SDarrick J. Wong uirec = tirec; 13501f08af52SDarrick J. Wong uirec.br_blockcount = rlen = min_t(xfs_filblks_t, 13511f08af52SDarrick J. Wong tirec.br_blockcount, 13521f08af52SDarrick J. Wong irec.br_blockcount); 13531f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); 13541f08af52SDarrick J. Wong 13551f08af52SDarrick J. Wong /* Remove the mapping from the donor file. */ 13563e08f42aSDarrick J. Wong xfs_bmap_unmap_extent(tp, tip, &uirec); 13571f08af52SDarrick J. Wong 13581f08af52SDarrick J. Wong /* Remove the mapping from the source file. */ 13593e08f42aSDarrick J. Wong xfs_bmap_unmap_extent(tp, ip, &irec); 13601f08af52SDarrick J. Wong 13611f08af52SDarrick J. Wong /* Map the donor file's blocks into the source file. */ 13623e08f42aSDarrick J. Wong xfs_bmap_map_extent(tp, ip, &uirec); 13631f08af52SDarrick J. Wong 13641f08af52SDarrick J. Wong /* Map the source file's blocks into the donor file. */ 13653e08f42aSDarrick J. Wong xfs_bmap_map_extent(tp, tip, &irec); 13661f08af52SDarrick J. Wong 13679e28a242SBrian Foster error = xfs_defer_finish(tpp); 13687a7943c7SBrian Foster tp = *tpp; 13691f08af52SDarrick J. Wong if (error) 13709b1f4e98SBrian Foster goto out; 13711f08af52SDarrick J. Wong 13721f08af52SDarrick J. Wong tirec.br_startoff += rlen; 13731f08af52SDarrick J. Wong if (tirec.br_startblock != HOLESTARTBLOCK && 13741f08af52SDarrick J. Wong tirec.br_startblock != DELAYSTARTBLOCK) 13751f08af52SDarrick J. Wong tirec.br_startblock += rlen; 13761f08af52SDarrick J. Wong tirec.br_blockcount -= rlen; 13771f08af52SDarrick J. Wong } 13781f08af52SDarrick J. Wong 13791f08af52SDarrick J. Wong /* Roll on... */ 13801f08af52SDarrick J. Wong count_fsb -= ilen; 13811f08af52SDarrick J. Wong offset_fsb += ilen; 13821f08af52SDarrick J. Wong } 13831f08af52SDarrick J. Wong 13841f08af52SDarrick J. Wong tip->i_d.di_flags2 = tip_flags2; 13851f08af52SDarrick J. Wong return 0; 13861f08af52SDarrick J. Wong 13871f08af52SDarrick J. Wong out: 13881f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); 13891f08af52SDarrick J. Wong tip->i_d.di_flags2 = tip_flags2; 13901f08af52SDarrick J. Wong return error; 13911f08af52SDarrick J. Wong } 13921f08af52SDarrick J. Wong 139339aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */ 139439aff5fdSDarrick J. Wong STATIC int 139539aff5fdSDarrick J. Wong xfs_swap_extent_forks( 139639aff5fdSDarrick J. Wong struct xfs_trans *tp, 139739aff5fdSDarrick J. Wong struct xfs_inode *ip, 139839aff5fdSDarrick J. Wong struct xfs_inode *tip, 139939aff5fdSDarrick J. Wong int *src_log_flags, 140039aff5fdSDarrick J. Wong int *target_log_flags) 140139aff5fdSDarrick J. Wong { 1402e7f5d5caSDarrick J. Wong xfs_filblks_t aforkblks = 0; 1403e7f5d5caSDarrick J. Wong xfs_filblks_t taforkblks = 0; 1404e7f5d5caSDarrick J. Wong xfs_extnum_t junk; 1405c8ce540dSDarrick J. Wong uint64_t tmp; 140639aff5fdSDarrick J. Wong int error; 140739aff5fdSDarrick J. Wong 140839aff5fdSDarrick J. Wong /* 140939aff5fdSDarrick J. Wong * Count the number of extended attribute blocks 141039aff5fdSDarrick J. Wong */ 141139aff5fdSDarrick J. Wong if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && 141239aff5fdSDarrick J. Wong (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1413e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk, 141439aff5fdSDarrick J. Wong &aforkblks); 141539aff5fdSDarrick J. Wong if (error) 141639aff5fdSDarrick J. Wong return error; 141739aff5fdSDarrick J. Wong } 141839aff5fdSDarrick J. Wong if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && 141939aff5fdSDarrick J. Wong (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { 1420e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk, 142139aff5fdSDarrick J. Wong &taforkblks); 142239aff5fdSDarrick J. Wong if (error) 142339aff5fdSDarrick J. Wong return error; 142439aff5fdSDarrick J. Wong } 142539aff5fdSDarrick J. Wong 142639aff5fdSDarrick J. Wong /* 14276fb10d6dSBrian Foster * Btree format (v3) inodes have the inode number stamped in the bmbt 14286fb10d6dSBrian Foster * block headers. We can't start changing the bmbt blocks until the 14296fb10d6dSBrian Foster * inode owner change is logged so recovery does the right thing in the 14306fb10d6dSBrian Foster * event of a crash. Set the owner change log flags now and leave the 14316fb10d6dSBrian Foster * bmbt scan as the last step. 143239aff5fdSDarrick J. Wong */ 143339aff5fdSDarrick J. Wong if (ip->i_d.di_version == 3 && 14346fb10d6dSBrian Foster ip->i_d.di_format == XFS_DINODE_FMT_BTREE) 143539aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DOWNER; 143639aff5fdSDarrick J. Wong if (tip->i_d.di_version == 3 && 14376fb10d6dSBrian Foster tip->i_d.di_format == XFS_DINODE_FMT_BTREE) 143839aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DOWNER; 143939aff5fdSDarrick J. Wong 144039aff5fdSDarrick J. Wong /* 144139aff5fdSDarrick J. Wong * Swap the data forks of the inodes 144239aff5fdSDarrick J. Wong */ 1443897992b7SGustavo A. R. Silva swap(ip->i_df, tip->i_df); 144439aff5fdSDarrick J. Wong 144539aff5fdSDarrick J. Wong /* 144639aff5fdSDarrick J. Wong * Fix the on-disk inode values 144739aff5fdSDarrick J. Wong */ 1448c8ce540dSDarrick J. Wong tmp = (uint64_t)ip->i_d.di_nblocks; 144939aff5fdSDarrick J. Wong ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; 145039aff5fdSDarrick J. Wong tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; 145139aff5fdSDarrick J. Wong 1452897992b7SGustavo A. R. Silva swap(ip->i_d.di_nextents, tip->i_d.di_nextents); 1453897992b7SGustavo A. R. Silva swap(ip->i_d.di_format, tip->i_d.di_format); 145439aff5fdSDarrick J. Wong 145539aff5fdSDarrick J. Wong /* 145639aff5fdSDarrick J. Wong * The extents in the source inode could still contain speculative 145739aff5fdSDarrick J. Wong * preallocation beyond EOF (e.g. the file is open but not modified 145839aff5fdSDarrick J. Wong * while defrag is in progress). In that case, we need to copy over the 145939aff5fdSDarrick J. Wong * number of delalloc blocks the data fork in the source inode is 146039aff5fdSDarrick J. Wong * tracking beyond EOF so that when the fork is truncated away when the 146139aff5fdSDarrick J. Wong * temporary inode is unlinked we don't underrun the i_delayed_blks 146239aff5fdSDarrick J. Wong * counter on that inode. 146339aff5fdSDarrick J. Wong */ 146439aff5fdSDarrick J. Wong ASSERT(tip->i_delayed_blks == 0); 146539aff5fdSDarrick J. Wong tip->i_delayed_blks = ip->i_delayed_blks; 146639aff5fdSDarrick J. Wong ip->i_delayed_blks = 0; 146739aff5fdSDarrick J. Wong 146839aff5fdSDarrick J. Wong switch (ip->i_d.di_format) { 146939aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 147039aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DEXT; 147139aff5fdSDarrick J. Wong break; 147239aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 147339aff5fdSDarrick J. Wong ASSERT(ip->i_d.di_version < 3 || 147439aff5fdSDarrick J. Wong (*src_log_flags & XFS_ILOG_DOWNER)); 147539aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DBROOT; 147639aff5fdSDarrick J. Wong break; 147739aff5fdSDarrick J. Wong } 147839aff5fdSDarrick J. Wong 147939aff5fdSDarrick J. Wong switch (tip->i_d.di_format) { 148039aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 148139aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DEXT; 148239aff5fdSDarrick J. Wong break; 148339aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 148439aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DBROOT; 148539aff5fdSDarrick J. Wong ASSERT(tip->i_d.di_version < 3 || 148639aff5fdSDarrick J. Wong (*target_log_flags & XFS_ILOG_DOWNER)); 148739aff5fdSDarrick J. Wong break; 148839aff5fdSDarrick J. Wong } 148939aff5fdSDarrick J. Wong 149039aff5fdSDarrick J. Wong return 0; 149139aff5fdSDarrick J. Wong } 149239aff5fdSDarrick J. Wong 14932dd3d709SBrian Foster /* 14942dd3d709SBrian Foster * Fix up the owners of the bmbt blocks to refer to the current inode. The 14952dd3d709SBrian Foster * change owner scan attempts to order all modified buffers in the current 14962dd3d709SBrian Foster * transaction. In the event of ordered buffer failure, the offending buffer is 14972dd3d709SBrian Foster * physically logged as a fallback and the scan returns -EAGAIN. We must roll 14982dd3d709SBrian Foster * the transaction in this case to replenish the fallback log reservation and 14992dd3d709SBrian Foster * restart the scan. This process repeats until the scan completes. 15002dd3d709SBrian Foster */ 15012dd3d709SBrian Foster static int 15022dd3d709SBrian Foster xfs_swap_change_owner( 15032dd3d709SBrian Foster struct xfs_trans **tpp, 15042dd3d709SBrian Foster struct xfs_inode *ip, 15052dd3d709SBrian Foster struct xfs_inode *tmpip) 15062dd3d709SBrian Foster { 15072dd3d709SBrian Foster int error; 15082dd3d709SBrian Foster struct xfs_trans *tp = *tpp; 15092dd3d709SBrian Foster 15102dd3d709SBrian Foster do { 15112dd3d709SBrian Foster error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino, 15122dd3d709SBrian Foster NULL); 15132dd3d709SBrian Foster /* success or fatal error */ 15142dd3d709SBrian Foster if (error != -EAGAIN) 15152dd3d709SBrian Foster break; 15162dd3d709SBrian Foster 15172dd3d709SBrian Foster error = xfs_trans_roll(tpp); 15182dd3d709SBrian Foster if (error) 15192dd3d709SBrian Foster break; 15202dd3d709SBrian Foster tp = *tpp; 15212dd3d709SBrian Foster 15222dd3d709SBrian Foster /* 15232dd3d709SBrian Foster * Redirty both inodes so they can relog and keep the log tail 15242dd3d709SBrian Foster * moving forward. 15252dd3d709SBrian Foster */ 15262dd3d709SBrian Foster xfs_trans_ijoin(tp, ip, 0); 15272dd3d709SBrian Foster xfs_trans_ijoin(tp, tmpip, 0); 15282dd3d709SBrian Foster xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 15292dd3d709SBrian Foster xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE); 15302dd3d709SBrian Foster } while (true); 15312dd3d709SBrian Foster 15322dd3d709SBrian Foster return error; 15332dd3d709SBrian Foster } 15342dd3d709SBrian Foster 15354ef897a2SDave Chinner int 1536a133d952SDave Chinner xfs_swap_extents( 1537e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1538e06259aaSDarrick J. Wong struct xfs_inode *tip, /* tmp inode */ 1539e06259aaSDarrick J. Wong struct xfs_swapext *sxp) 1540a133d952SDave Chinner { 1541e06259aaSDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 1542e06259aaSDarrick J. Wong struct xfs_trans *tp; 1543e06259aaSDarrick J. Wong struct xfs_bstat *sbp = &sxp->sx_stat; 1544a133d952SDave Chinner int src_log_flags, target_log_flags; 1545a133d952SDave Chinner int error = 0; 154681217683SDave Chinner int lock_flags; 1547c8ce540dSDarrick J. Wong uint64_t f; 15482dd3d709SBrian Foster int resblks = 0; 1549a133d952SDave Chinner 1550a133d952SDave Chinner /* 1551723cac48SDave Chinner * Lock the inodes against other IO, page faults and truncate to 1552723cac48SDave Chinner * begin with. Then we can ensure the inodes are flushed and have no 1553723cac48SDave Chinner * page cache safely. Once we have done this we can take the ilocks and 1554723cac48SDave Chinner * do the rest of the checks. 1555a133d952SDave Chinner */ 155665523218SChristoph Hellwig lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 155765523218SChristoph Hellwig lock_flags = XFS_MMAPLOCK_EXCL; 15587c2d238aSDarrick J. Wong xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL); 1559a133d952SDave Chinner 1560a133d952SDave Chinner /* Verify that both files have the same format */ 1561c19b3b05SDave Chinner if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { 15622451337dSDave Chinner error = -EINVAL; 1563a133d952SDave Chinner goto out_unlock; 1564a133d952SDave Chinner } 1565a133d952SDave Chinner 1566a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */ 1567a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 15682451337dSDave Chinner error = -EINVAL; 1569a133d952SDave Chinner goto out_unlock; 1570a133d952SDave Chinner } 1571a133d952SDave Chinner 1572*2713fefaSDarrick J. Wong error = xfs_qm_dqattach(ip); 1573*2713fefaSDarrick J. Wong if (error) 1574*2713fefaSDarrick J. Wong goto out_unlock; 1575*2713fefaSDarrick J. Wong 1576*2713fefaSDarrick J. Wong error = xfs_qm_dqattach(tip); 1577*2713fefaSDarrick J. Wong if (error) 1578*2713fefaSDarrick J. Wong goto out_unlock; 1579*2713fefaSDarrick J. Wong 15804ef897a2SDave Chinner error = xfs_swap_extent_flush(ip); 1581a133d952SDave Chinner if (error) 1582a133d952SDave Chinner goto out_unlock; 15834ef897a2SDave Chinner error = xfs_swap_extent_flush(tip); 15844ef897a2SDave Chinner if (error) 15854ef897a2SDave Chinner goto out_unlock; 1586a133d952SDave Chinner 158796987eeaSChristoph Hellwig if (xfs_inode_has_cow_data(tip)) { 158896987eeaSChristoph Hellwig error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true); 158996987eeaSChristoph Hellwig if (error) 159096987eeaSChristoph Hellwig return error; 159196987eeaSChristoph Hellwig } 159296987eeaSChristoph Hellwig 15931f08af52SDarrick J. Wong /* 15941f08af52SDarrick J. Wong * Extent "swapping" with rmap requires a permanent reservation and 15951f08af52SDarrick J. Wong * a block reservation because it's really just a remap operation 15961f08af52SDarrick J. Wong * performed with log redo items! 15971f08af52SDarrick J. Wong */ 15981f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 1599b3fed434SBrian Foster int w = XFS_DATA_FORK; 1600b3fed434SBrian Foster uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w); 1601b3fed434SBrian Foster uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w); 1602b3fed434SBrian Foster 16031f08af52SDarrick J. Wong /* 1604b3fed434SBrian Foster * Conceptually this shouldn't affect the shape of either bmbt, 1605b3fed434SBrian Foster * but since we atomically move extents one by one, we reserve 1606b3fed434SBrian Foster * enough space to rebuild both trees. 16071f08af52SDarrick J. Wong */ 1608b3fed434SBrian Foster resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w); 1609b3fed434SBrian Foster resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w); 1610b3fed434SBrian Foster 1611b3fed434SBrian Foster /* 1612b3fed434SBrian Foster * Handle the corner case where either inode might straddle the 1613b3fed434SBrian Foster * btree format boundary. If so, the inode could bounce between 1614b3fed434SBrian Foster * btree <-> extent format on unmap -> remap cycles, freeing and 1615b3fed434SBrian Foster * allocating a bmapbt block each time. 1616b3fed434SBrian Foster */ 1617b3fed434SBrian Foster if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1)) 1618b3fed434SBrian Foster resblks += XFS_IFORK_MAXEXT(ip, w); 1619b3fed434SBrian Foster if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1)) 1620b3fed434SBrian Foster resblks += XFS_IFORK_MAXEXT(tip, w); 16212dd3d709SBrian Foster } 16222dd3d709SBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 1623253f4911SChristoph Hellwig if (error) 1624a133d952SDave Chinner goto out_unlock; 1625723cac48SDave Chinner 1626723cac48SDave Chinner /* 1627723cac48SDave Chinner * Lock and join the inodes to the tansaction so that transaction commit 1628723cac48SDave Chinner * or cancel will unlock the inodes from this point onwards. 1629723cac48SDave Chinner */ 16307c2d238aSDarrick J. Wong xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); 16314ef897a2SDave Chinner lock_flags |= XFS_ILOCK_EXCL; 163239aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, ip, 0); 163339aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, tip, 0); 1634723cac48SDave Chinner 1635a133d952SDave Chinner 1636a133d952SDave Chinner /* Verify all data are being swapped */ 1637a133d952SDave Chinner if (sxp->sx_offset != 0 || 1638a133d952SDave Chinner sxp->sx_length != ip->i_d.di_size || 1639a133d952SDave Chinner sxp->sx_length != tip->i_d.di_size) { 16402451337dSDave Chinner error = -EFAULT; 16414ef897a2SDave Chinner goto out_trans_cancel; 1642a133d952SDave Chinner } 1643a133d952SDave Chinner 1644a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0); 1645a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1); 1646a133d952SDave Chinner 1647a133d952SDave Chinner /* check inode formats now that data is flushed */ 1648a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip); 1649a133d952SDave Chinner if (error) { 1650a133d952SDave Chinner xfs_notice(mp, 1651a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.", 1652a133d952SDave Chinner __func__, ip->i_ino); 16534ef897a2SDave Chinner goto out_trans_cancel; 1654a133d952SDave Chinner } 1655a133d952SDave Chinner 1656a133d952SDave Chinner /* 1657a133d952SDave Chinner * Compare the current change & modify times with that 1658a133d952SDave Chinner * passed in. If they differ, we abort this swap. 1659a133d952SDave Chinner * This is the mechanism used to ensure the calling 1660a133d952SDave Chinner * process that the file was not changed out from 1661a133d952SDave Chinner * under it. 1662a133d952SDave Chinner */ 1663a133d952SDave Chinner if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || 1664a133d952SDave Chinner (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || 1665a133d952SDave Chinner (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || 1666a133d952SDave Chinner (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { 16672451337dSDave Chinner error = -EBUSY; 166881217683SDave Chinner goto out_trans_cancel; 1669a133d952SDave Chinner } 1670a133d952SDave Chinner 167121b5c978SDave Chinner /* 167221b5c978SDave Chinner * Note the trickiness in setting the log flags - we set the owner log 167321b5c978SDave Chinner * flag on the opposite inode (i.e. the inode we are setting the new 167421b5c978SDave Chinner * owner to be) because once we swap the forks and log that, log 167521b5c978SDave Chinner * recovery is going to see the fork as owned by the swapped inode, 167621b5c978SDave Chinner * not the pre-swapped inodes. 167721b5c978SDave Chinner */ 167821b5c978SDave Chinner src_log_flags = XFS_ILOG_CORE; 167921b5c978SDave Chinner target_log_flags = XFS_ILOG_CORE; 168039aff5fdSDarrick J. Wong 16811f08af52SDarrick J. Wong if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 16821f08af52SDarrick J. Wong error = xfs_swap_extent_rmap(&tp, ip, tip); 16831f08af52SDarrick J. Wong else 168439aff5fdSDarrick J. Wong error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags, 168539aff5fdSDarrick J. Wong &target_log_flags); 168621b5c978SDave Chinner if (error) 168721b5c978SDave Chinner goto out_trans_cancel; 1688a133d952SDave Chinner 1689f0bc4d13SDarrick J. Wong /* Do we have to swap reflink flags? */ 1690f0bc4d13SDarrick J. Wong if ((ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) ^ 1691f0bc4d13SDarrick J. Wong (tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)) { 1692f0bc4d13SDarrick J. Wong f = ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; 1693f0bc4d13SDarrick J. Wong ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1694f0bc4d13SDarrick J. Wong ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK; 1695f0bc4d13SDarrick J. Wong tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1696f0bc4d13SDarrick J. Wong tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK; 169752bfcdd7SDarrick J. Wong } 169852bfcdd7SDarrick J. Wong 169952bfcdd7SDarrick J. Wong /* Swap the cow forks. */ 170052bfcdd7SDarrick J. Wong if (xfs_sb_version_hasreflink(&mp->m_sb)) { 170152bfcdd7SDarrick J. Wong ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS); 170252bfcdd7SDarrick J. Wong ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS); 170352bfcdd7SDarrick J. Wong 1704897992b7SGustavo A. R. Silva swap(ip->i_cnextents, tip->i_cnextents); 1705897992b7SGustavo A. R. Silva swap(ip->i_cowfp, tip->i_cowfp); 170652bfcdd7SDarrick J. Wong 17075bcffe30SChristoph Hellwig if (ip->i_cowfp && ip->i_cowfp->if_bytes) 170883104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(ip); 170952bfcdd7SDarrick J. Wong else 171052bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(ip); 17115bcffe30SChristoph Hellwig if (tip->i_cowfp && tip->i_cowfp->if_bytes) 171283104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(tip); 171352bfcdd7SDarrick J. Wong else 171452bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(tip); 1715f0bc4d13SDarrick J. Wong } 1716f0bc4d13SDarrick J. Wong 1717a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags); 1718a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags); 1719a133d952SDave Chinner 1720a133d952SDave Chinner /* 17216fb10d6dSBrian Foster * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems 17226fb10d6dSBrian Foster * have inode number owner values in the bmbt blocks that still refer to 17236fb10d6dSBrian Foster * the old inode. Scan each bmbt to fix up the owner values with the 17246fb10d6dSBrian Foster * inode number of the current inode. 17256fb10d6dSBrian Foster */ 17266fb10d6dSBrian Foster if (src_log_flags & XFS_ILOG_DOWNER) { 17272dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, ip, tip); 17286fb10d6dSBrian Foster if (error) 17296fb10d6dSBrian Foster goto out_trans_cancel; 17306fb10d6dSBrian Foster } 17316fb10d6dSBrian Foster if (target_log_flags & XFS_ILOG_DOWNER) { 17322dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, tip, ip); 17336fb10d6dSBrian Foster if (error) 17346fb10d6dSBrian Foster goto out_trans_cancel; 17356fb10d6dSBrian Foster } 17366fb10d6dSBrian Foster 17376fb10d6dSBrian Foster /* 1738a133d952SDave Chinner * If this is a synchronous mount, make sure that the 1739a133d952SDave Chinner * transaction goes to disk before returning to the user. 1740a133d952SDave Chinner */ 1741a133d952SDave Chinner if (mp->m_flags & XFS_MOUNT_WSYNC) 1742a133d952SDave Chinner xfs_trans_set_sync(tp); 1743a133d952SDave Chinner 174470393313SChristoph Hellwig error = xfs_trans_commit(tp); 1745a133d952SDave Chinner 1746a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0); 1747a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1); 174839aff5fdSDarrick J. Wong 174965523218SChristoph Hellwig out_unlock: 175039aff5fdSDarrick J. Wong xfs_iunlock(ip, lock_flags); 175139aff5fdSDarrick J. Wong xfs_iunlock(tip, lock_flags); 175265523218SChristoph Hellwig unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 1753a133d952SDave Chinner return error; 1754a133d952SDave Chinner 175539aff5fdSDarrick J. Wong out_trans_cancel: 175639aff5fdSDarrick J. Wong xfs_trans_cancel(tp); 175765523218SChristoph Hellwig goto out_unlock; 1758a133d952SDave Chinner } 1759