10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 268988114SDave Chinner /* 368988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc. 568988114SDave Chinner * All Rights Reserved. 668988114SDave Chinner */ 768988114SDave Chinner #include "xfs.h" 868988114SDave Chinner #include "xfs_fs.h" 970a9883cSDave Chinner #include "xfs_shared.h" 10239880efSDave Chinner #include "xfs_format.h" 11239880efSDave Chinner #include "xfs_log_format.h" 12239880efSDave Chinner #include "xfs_trans_resv.h" 1368988114SDave Chinner #include "xfs_bit.h" 1468988114SDave Chinner #include "xfs_mount.h" 153ab78df2SDarrick J. Wong #include "xfs_defer.h" 1668988114SDave Chinner #include "xfs_inode.h" 1768988114SDave Chinner #include "xfs_btree.h" 18239880efSDave Chinner #include "xfs_trans.h" 1968988114SDave Chinner #include "xfs_alloc.h" 2068988114SDave Chinner #include "xfs_bmap.h" 2168988114SDave Chinner #include "xfs_bmap_util.h" 22a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 2368988114SDave Chinner #include "xfs_rtalloc.h" 2468988114SDave Chinner #include "xfs_error.h" 2568988114SDave Chinner #include "xfs_quota.h" 2668988114SDave Chinner #include "xfs_trans_space.h" 2768988114SDave Chinner #include "xfs_trace.h" 28c24b5dfaSDave Chinner #include "xfs_icache.h" 29f86f4037SDarrick J. Wong #include "xfs_iomap.h" 30f86f4037SDarrick J. Wong #include "xfs_reflink.h" 31fa5a3872SDarrick J. Wong #include "xfs_rtbitmap.h" 3268988114SDave Chinner 3368988114SDave Chinner /* Kernel only BMAP related definitions and functions */ 3468988114SDave Chinner 3568988114SDave Chinner /* 3668988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it 3768988114SDave Chinner * differently based on whether the file is a real time file or not, because the 3868988114SDave Chinner * bmap code does. 3968988114SDave Chinner */ 4068988114SDave Chinner xfs_daddr_t 4168988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb) 4268988114SDave Chinner { 43ecfc28a4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 44ecfc28a4SChristoph Hellwig return XFS_FSB_TO_BB(ip->i_mount, fsb); 45ecfc28a4SChristoph Hellwig return XFS_FSB_TO_DADDR(ip->i_mount, fsb); 4668988114SDave Chinner } 4768988114SDave Chinner 4868988114SDave Chinner /* 493fbbbea3SDave Chinner * Routine to zero an extent on disk allocated to the specific inode. 503fbbbea3SDave Chinner * 513fbbbea3SDave Chinner * The VFS functions take a linearised filesystem block offset, so we have to 523fbbbea3SDave Chinner * convert the sparse xfs fsb to the right format first. 533fbbbea3SDave Chinner * VFS types are real funky, too. 543fbbbea3SDave Chinner */ 553fbbbea3SDave Chinner int 563fbbbea3SDave Chinner xfs_zero_extent( 573fbbbea3SDave Chinner struct xfs_inode *ip, 583fbbbea3SDave Chinner xfs_fsblock_t start_fsb, 593fbbbea3SDave Chinner xfs_off_t count_fsb) 603fbbbea3SDave Chinner { 613fbbbea3SDave Chinner struct xfs_mount *mp = ip->i_mount; 6230fa529eSChristoph Hellwig struct xfs_buftarg *target = xfs_inode_buftarg(ip); 633fbbbea3SDave Chinner xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); 643fbbbea3SDave Chinner sector_t block = XFS_BB_TO_FSBT(mp, sector); 653fbbbea3SDave Chinner 6630fa529eSChristoph Hellwig return blkdev_issue_zeroout(target->bt_bdev, 673dc29161SMatthew Wilcox block << (mp->m_super->s_blocksize_bits - 9), 683dc29161SMatthew Wilcox count_fsb << (mp->m_super->s_blocksize_bits - 9), 690b3a76e9SDave Chinner GFP_KERNEL, 0); 703fbbbea3SDave Chinner } 713fbbbea3SDave Chinner 7268988114SDave Chinner /* 7368988114SDave Chinner * Extent tree block counting routines. 7468988114SDave Chinner */ 7568988114SDave Chinner 7668988114SDave Chinner /* 77d29cb3e4SDarrick J. Wong * Count leaf blocks given a range of extent records. Delayed allocation 78d29cb3e4SDarrick J. Wong * extents are not counted towards the totals. 7968988114SDave Chinner */ 80e17a5c6fSChristoph Hellwig xfs_extnum_t 8168988114SDave Chinner xfs_bmap_count_leaves( 82d29cb3e4SDarrick J. Wong struct xfs_ifork *ifp, 83e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 8468988114SDave Chinner { 85b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur; 86e17a5c6fSChristoph Hellwig struct xfs_bmbt_irec got; 87b2b1712aSChristoph Hellwig xfs_extnum_t numrecs = 0; 8868988114SDave Chinner 89b2b1712aSChristoph Hellwig for_each_xfs_iext(ifp, &icur, &got) { 90e17a5c6fSChristoph Hellwig if (!isnullstartblock(got.br_startblock)) { 91e17a5c6fSChristoph Hellwig *count += got.br_blockcount; 92e17a5c6fSChristoph Hellwig numrecs++; 9368988114SDave Chinner } 9468988114SDave Chinner } 95b2b1712aSChristoph Hellwig 96e17a5c6fSChristoph Hellwig return numrecs; 97d29cb3e4SDarrick J. Wong } 9868988114SDave Chinner 9968988114SDave Chinner /* 100d29cb3e4SDarrick J. Wong * Count fsblocks of the given fork. Delayed allocation extents are 101d29cb3e4SDarrick J. Wong * not counted towards the totals. 10268988114SDave Chinner */ 103e7f5d5caSDarrick J. Wong int 10468988114SDave Chinner xfs_bmap_count_blocks( 105e7f5d5caSDarrick J. Wong struct xfs_trans *tp, 106e7f5d5caSDarrick J. Wong struct xfs_inode *ip, 107e7f5d5caSDarrick J. Wong int whichfork, 108e7f5d5caSDarrick J. Wong xfs_extnum_t *nextents, 109e7f5d5caSDarrick J. Wong xfs_filblks_t *count) 11068988114SDave Chinner { 111fec40e22SDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 112732436efSDarrick J. Wong struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 113fec40e22SDarrick J. Wong struct xfs_btree_cur *cur; 114fec40e22SDarrick J. Wong xfs_extlen_t btblocks = 0; 115e7f5d5caSDarrick J. Wong int error; 11668988114SDave Chinner 117e7f5d5caSDarrick J. Wong *nextents = 0; 118e7f5d5caSDarrick J. Wong *count = 0; 119fec40e22SDarrick J. Wong 120e7f5d5caSDarrick J. Wong if (!ifp) 12168988114SDave Chinner return 0; 122e7f5d5caSDarrick J. Wong 123f7e67b20SChristoph Hellwig switch (ifp->if_format) { 124e7f5d5caSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 125e7f5d5caSDarrick J. Wong error = xfs_iread_extents(tp, ip, whichfork); 126e7f5d5caSDarrick J. Wong if (error) 127e7f5d5caSDarrick J. Wong return error; 12868988114SDave Chinner 129fec40e22SDarrick J. Wong cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 130fec40e22SDarrick J. Wong error = xfs_btree_count_blocks(cur, &btblocks); 131fec40e22SDarrick J. Wong xfs_btree_del_cursor(cur, error); 132fec40e22SDarrick J. Wong if (error) 133fec40e22SDarrick J. Wong return error; 13468988114SDave Chinner 135fec40e22SDarrick J. Wong /* 136fec40e22SDarrick J. Wong * xfs_btree_count_blocks includes the root block contained in 137fec40e22SDarrick J. Wong * the inode fork in @btblocks, so subtract one because we're 138fec40e22SDarrick J. Wong * only interested in allocated disk blocks. 139fec40e22SDarrick J. Wong */ 140fec40e22SDarrick J. Wong *count += btblocks - 1; 141fec40e22SDarrick J. Wong 14253004ee7SGustavo A. R. Silva fallthrough; 143fec40e22SDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 144fec40e22SDarrick J. Wong *nextents = xfs_bmap_count_leaves(ifp, count); 145fec40e22SDarrick J. Wong break; 146e7f5d5caSDarrick J. Wong } 14768988114SDave Chinner 14868988114SDave Chinner return 0; 14968988114SDave Chinner } 15068988114SDave Chinner 151abbf9e8aSChristoph Hellwig static int 152abbf9e8aSChristoph Hellwig xfs_getbmap_report_one( 153f86f4037SDarrick J. Wong struct xfs_inode *ip, 154abbf9e8aSChristoph Hellwig struct getbmapx *bmv, 155232b5194SChristoph Hellwig struct kgetbmap *out, 156abbf9e8aSChristoph Hellwig int64_t bmv_end, 157abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *got) 158f86f4037SDarrick J. Wong { 159232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries; 160d392bc81SChristoph Hellwig bool shared = false; 161f86f4037SDarrick J. Wong int error; 162f86f4037SDarrick J. Wong 163d392bc81SChristoph Hellwig error = xfs_reflink_trim_around_shared(ip, got, &shared); 164f86f4037SDarrick J. Wong if (error) 165f86f4037SDarrick J. Wong return error; 166f86f4037SDarrick J. Wong 167abbf9e8aSChristoph Hellwig if (isnullstartblock(got->br_startblock) || 168abbf9e8aSChristoph Hellwig got->br_startblock == DELAYSTARTBLOCK) { 169f86f4037SDarrick J. Wong /* 1708ee81ed5SYe Bin * Take the flush completion as being a point-in-time snapshot 1718ee81ed5SYe Bin * where there are no delalloc extents, and if any new ones 1728ee81ed5SYe Bin * have been created racily, just skip them as being 'after' 1738ee81ed5SYe Bin * the flush and so don't get reported. 174f86f4037SDarrick J. Wong */ 1758ee81ed5SYe Bin if (!(bmv->bmv_iflags & BMV_IF_DELALLOC)) 1768ee81ed5SYe Bin return 0; 177abbf9e8aSChristoph Hellwig 178abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_DELALLOC; 179abbf9e8aSChristoph Hellwig p->bmv_block = -2; 180f86f4037SDarrick J. Wong } else { 181abbf9e8aSChristoph Hellwig p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock); 182f86f4037SDarrick J. Wong } 183f86f4037SDarrick J. Wong 184abbf9e8aSChristoph Hellwig if (got->br_state == XFS_EXT_UNWRITTEN && 185abbf9e8aSChristoph Hellwig (bmv->bmv_iflags & BMV_IF_PREALLOC)) 186abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_PREALLOC; 187abbf9e8aSChristoph Hellwig 188abbf9e8aSChristoph Hellwig if (shared) 189abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_SHARED; 190abbf9e8aSChristoph Hellwig 191abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff); 192abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount); 193abbf9e8aSChristoph Hellwig 194abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length; 195abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 196abbf9e8aSChristoph Hellwig bmv->bmv_entries++; 197f86f4037SDarrick J. Wong return 0; 198f86f4037SDarrick J. Wong } 199f86f4037SDarrick J. Wong 200abbf9e8aSChristoph Hellwig static void 201abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole( 202abbf9e8aSChristoph Hellwig struct xfs_inode *ip, 203abbf9e8aSChristoph Hellwig struct getbmapx *bmv, 204232b5194SChristoph Hellwig struct kgetbmap *out, 205abbf9e8aSChristoph Hellwig int64_t bmv_end, 206abbf9e8aSChristoph Hellwig xfs_fileoff_t bno, 207abbf9e8aSChristoph Hellwig xfs_fileoff_t end) 208abbf9e8aSChristoph Hellwig { 209232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries; 210abbf9e8aSChristoph Hellwig 211abbf9e8aSChristoph Hellwig if (bmv->bmv_iflags & BMV_IF_NO_HOLES) 212abbf9e8aSChristoph Hellwig return; 213abbf9e8aSChristoph Hellwig 214abbf9e8aSChristoph Hellwig p->bmv_block = -1; 215abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno); 216abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno); 217abbf9e8aSChristoph Hellwig 218abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length; 219abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset); 220abbf9e8aSChristoph Hellwig bmv->bmv_entries++; 221abbf9e8aSChristoph Hellwig } 222abbf9e8aSChristoph Hellwig 223abbf9e8aSChristoph Hellwig static inline bool 224abbf9e8aSChristoph Hellwig xfs_getbmap_full( 225abbf9e8aSChristoph Hellwig struct getbmapx *bmv) 226abbf9e8aSChristoph Hellwig { 227abbf9e8aSChristoph Hellwig return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1; 228abbf9e8aSChristoph Hellwig } 229abbf9e8aSChristoph Hellwig 230abbf9e8aSChristoph Hellwig static bool 231abbf9e8aSChristoph Hellwig xfs_getbmap_next_rec( 232abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *rec, 233abbf9e8aSChristoph Hellwig xfs_fileoff_t total_end) 234abbf9e8aSChristoph Hellwig { 235abbf9e8aSChristoph Hellwig xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount; 236abbf9e8aSChristoph Hellwig 237abbf9e8aSChristoph Hellwig if (end == total_end) 238abbf9e8aSChristoph Hellwig return false; 239abbf9e8aSChristoph Hellwig 240abbf9e8aSChristoph Hellwig rec->br_startoff += rec->br_blockcount; 241abbf9e8aSChristoph Hellwig if (!isnullstartblock(rec->br_startblock) && 242abbf9e8aSChristoph Hellwig rec->br_startblock != DELAYSTARTBLOCK) 243abbf9e8aSChristoph Hellwig rec->br_startblock += rec->br_blockcount; 244abbf9e8aSChristoph Hellwig rec->br_blockcount = total_end - end; 245abbf9e8aSChristoph Hellwig return true; 246abbf9e8aSChristoph Hellwig } 247abbf9e8aSChristoph Hellwig 24868988114SDave Chinner /* 24968988114SDave Chinner * Get inode's extents as described in bmv, and format for output. 25068988114SDave Chinner * Calls formatter to fill the user's buffer until all extents 25168988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have 25268988114SDave Chinner * been filled, or until the formatter short-circuits the loop, 25368988114SDave Chinner * if it is tracking filled-in extents on its own. 25468988114SDave Chinner */ 25568988114SDave Chinner int /* error code */ 25668988114SDave Chinner xfs_getbmap( 257232b5194SChristoph Hellwig struct xfs_inode *ip, 25868988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */ 259232b5194SChristoph Hellwig struct kgetbmap *out) 26068988114SDave Chinner { 261abbf9e8aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 262abbf9e8aSChristoph Hellwig int iflags = bmv->bmv_iflags; 263232b5194SChristoph Hellwig int whichfork, lock, error = 0; 264abbf9e8aSChristoph Hellwig int64_t bmv_end, max_len; 265abbf9e8aSChristoph Hellwig xfs_fileoff_t bno, first_bno; 266abbf9e8aSChristoph Hellwig struct xfs_ifork *ifp; 267abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec got, rec; 268abbf9e8aSChristoph Hellwig xfs_filblks_t len; 269b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur; 27068988114SDave Chinner 271232b5194SChristoph Hellwig if (bmv->bmv_iflags & ~BMV_IF_VALID) 272232b5194SChristoph Hellwig return -EINVAL; 273f86f4037SDarrick J. Wong #ifndef DEBUG 274f86f4037SDarrick J. Wong /* Only allow CoW fork queries if we're debugging. */ 275f86f4037SDarrick J. Wong if (iflags & BMV_IF_COWFORK) 276f86f4037SDarrick J. Wong return -EINVAL; 277f86f4037SDarrick J. Wong #endif 278f86f4037SDarrick J. Wong if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK)) 279f86f4037SDarrick J. Wong return -EINVAL; 280f86f4037SDarrick J. Wong 281abbf9e8aSChristoph Hellwig if (bmv->bmv_length < -1) 282abbf9e8aSChristoph Hellwig return -EINVAL; 283abbf9e8aSChristoph Hellwig bmv->bmv_entries = 0; 284abbf9e8aSChristoph Hellwig if (bmv->bmv_length == 0) 285abbf9e8aSChristoph Hellwig return 0; 286abbf9e8aSChristoph Hellwig 287f86f4037SDarrick J. Wong if (iflags & BMV_IF_ATTRFORK) 288f86f4037SDarrick J. Wong whichfork = XFS_ATTR_FORK; 289f86f4037SDarrick J. Wong else if (iflags & BMV_IF_COWFORK) 290f86f4037SDarrick J. Wong whichfork = XFS_COW_FORK; 291f86f4037SDarrick J. Wong else 292f86f4037SDarrick J. Wong whichfork = XFS_DATA_FORK; 29368988114SDave Chinner 29468988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED); 295f86f4037SDarrick J. Wong switch (whichfork) { 296abbf9e8aSChristoph Hellwig case XFS_ATTR_FORK: 297001c179cSChenXiaoSong lock = xfs_ilock_attr_map_shared(ip); 298932b42c6SDarrick J. Wong if (!xfs_inode_has_attr_fork(ip)) 299001c179cSChenXiaoSong goto out_unlock_ilock; 300abbf9e8aSChristoph Hellwig 301abbf9e8aSChristoph Hellwig max_len = 1LL << 32; 302abbf9e8aSChristoph Hellwig break; 303abbf9e8aSChristoph Hellwig case XFS_COW_FORK: 304001c179cSChenXiaoSong lock = XFS_ILOCK_SHARED; 305001c179cSChenXiaoSong xfs_ilock(ip, lock); 306001c179cSChenXiaoSong 307abbf9e8aSChristoph Hellwig /* No CoW fork? Just return */ 308001c179cSChenXiaoSong if (!xfs_ifork_ptr(ip, whichfork)) 309001c179cSChenXiaoSong goto out_unlock_ilock; 310abbf9e8aSChristoph Hellwig 311abbf9e8aSChristoph Hellwig if (xfs_get_cowextsz_hint(ip)) 312abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes; 313abbf9e8aSChristoph Hellwig else 314abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip); 315abbf9e8aSChristoph Hellwig break; 316f86f4037SDarrick J. Wong case XFS_DATA_FORK: 317efa70be1SChristoph Hellwig if (!(iflags & BMV_IF_DELALLOC) && 31813d2c10bSChristoph Hellwig (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_disk_size)) { 3192451337dSDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 32068988114SDave Chinner if (error) 32168988114SDave Chinner goto out_unlock_iolock; 322efa70be1SChristoph Hellwig 32368988114SDave Chinner /* 324efa70be1SChristoph Hellwig * Even after flushing the inode, there can still be 325efa70be1SChristoph Hellwig * delalloc blocks on the inode beyond EOF due to 326efa70be1SChristoph Hellwig * speculative preallocation. These are not removed 327efa70be1SChristoph Hellwig * until the release function is called or the inode 328efa70be1SChristoph Hellwig * is inactivated. Hence we cannot assert here that 329efa70be1SChristoph Hellwig * ip->i_delayed_blks == 0. 33068988114SDave Chinner */ 33168988114SDave Chinner } 33268988114SDave Chinner 333abbf9e8aSChristoph Hellwig if (xfs_get_extsz_hint(ip) || 334db07349dSChristoph Hellwig (ip->i_diflags & 335abbf9e8aSChristoph Hellwig (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))) 336abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes; 337abbf9e8aSChristoph Hellwig else 338abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip); 339abbf9e8aSChristoph Hellwig 340309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 341f86f4037SDarrick J. Wong break; 342efa70be1SChristoph Hellwig } 34368988114SDave Chinner 344001c179cSChenXiaoSong ifp = xfs_ifork_ptr(ip, whichfork); 345001c179cSChenXiaoSong 346f7e67b20SChristoph Hellwig switch (ifp->if_format) { 347abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_EXTENTS: 348abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_BTREE: 349abbf9e8aSChristoph Hellwig break; 350abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_LOCAL: 351abbf9e8aSChristoph Hellwig /* Local format inode forks report no extents. */ 35268988114SDave Chinner goto out_unlock_ilock; 353abbf9e8aSChristoph Hellwig default: 354abbf9e8aSChristoph Hellwig error = -EINVAL; 355abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 35668988114SDave Chinner } 35768988114SDave Chinner 358abbf9e8aSChristoph Hellwig if (bmv->bmv_length == -1) { 359abbf9e8aSChristoph Hellwig max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len)); 360abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset); 361abbf9e8aSChristoph Hellwig } 362abbf9e8aSChristoph Hellwig 363abbf9e8aSChristoph Hellwig bmv_end = bmv->bmv_offset + bmv->bmv_length; 364abbf9e8aSChristoph Hellwig 365abbf9e8aSChristoph Hellwig first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset); 366abbf9e8aSChristoph Hellwig len = XFS_BB_TO_FSB(mp, bmv->bmv_length); 367abbf9e8aSChristoph Hellwig 368abbf9e8aSChristoph Hellwig error = xfs_iread_extents(NULL, ip, whichfork); 369abbf9e8aSChristoph Hellwig if (error) 370abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 371abbf9e8aSChristoph Hellwig 372b2b1712aSChristoph Hellwig if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 373abbf9e8aSChristoph Hellwig /* 374abbf9e8aSChristoph Hellwig * Report a whole-file hole if the delalloc flag is set to 375abbf9e8aSChristoph Hellwig * stay compatible with the old implementation. 376abbf9e8aSChristoph Hellwig */ 377abbf9e8aSChristoph Hellwig if (iflags & BMV_IF_DELALLOC) 378abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 379abbf9e8aSChristoph Hellwig XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); 380abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 381abbf9e8aSChristoph Hellwig } 382abbf9e8aSChristoph Hellwig 383abbf9e8aSChristoph Hellwig while (!xfs_getbmap_full(bmv)) { 384abbf9e8aSChristoph Hellwig xfs_trim_extent(&got, first_bno, len); 385abbf9e8aSChristoph Hellwig 386abbf9e8aSChristoph Hellwig /* 387abbf9e8aSChristoph Hellwig * Report an entry for a hole if this extent doesn't directly 388abbf9e8aSChristoph Hellwig * follow the previous one. 389abbf9e8aSChristoph Hellwig */ 390abbf9e8aSChristoph Hellwig if (got.br_startoff > bno) { 391abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno, 392abbf9e8aSChristoph Hellwig got.br_startoff); 393abbf9e8aSChristoph Hellwig if (xfs_getbmap_full(bmv)) 394abbf9e8aSChristoph Hellwig break; 395abbf9e8aSChristoph Hellwig } 396abbf9e8aSChristoph Hellwig 397abbf9e8aSChristoph Hellwig /* 398abbf9e8aSChristoph Hellwig * In order to report shared extents accurately, we report each 399abbf9e8aSChristoph Hellwig * distinct shared / unshared part of a single bmbt record with 400abbf9e8aSChristoph Hellwig * an individual getbmapx record. 401abbf9e8aSChristoph Hellwig */ 402abbf9e8aSChristoph Hellwig bno = got.br_startoff + got.br_blockcount; 403abbf9e8aSChristoph Hellwig rec = got; 40468988114SDave Chinner do { 405abbf9e8aSChristoph Hellwig error = xfs_getbmap_report_one(ip, bmv, out, bmv_end, 406abbf9e8aSChristoph Hellwig &rec); 407abbf9e8aSChristoph Hellwig if (error || xfs_getbmap_full(bmv)) 408abbf9e8aSChristoph Hellwig goto out_unlock_ilock; 409abbf9e8aSChristoph Hellwig } while (xfs_getbmap_next_rec(&rec, bno)); 41068988114SDave Chinner 411b2b1712aSChristoph Hellwig if (!xfs_iext_next_extent(ifp, &icur, &got)) { 412abbf9e8aSChristoph Hellwig xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 41368988114SDave Chinner 4141bba82feSDarrick J. Wong if (bmv->bmv_entries > 0) 4151bba82feSDarrick J. Wong out[bmv->bmv_entries - 1].bmv_oflags |= 4161bba82feSDarrick J. Wong BMV_OF_LAST; 41768988114SDave Chinner 418abbf9e8aSChristoph Hellwig if (whichfork != XFS_ATTR_FORK && bno < end && 419abbf9e8aSChristoph Hellwig !xfs_getbmap_full(bmv)) { 420abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, 421abbf9e8aSChristoph Hellwig bno, end); 422abbf9e8aSChristoph Hellwig } 423abbf9e8aSChristoph Hellwig break; 42468988114SDave Chinner } 42568988114SDave Chinner 426abbf9e8aSChristoph Hellwig if (bno >= first_bno + len) 427abbf9e8aSChristoph Hellwig break; 42868988114SDave Chinner } 42968988114SDave Chinner 43068988114SDave Chinner out_unlock_ilock: 43101f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 43268988114SDave Chinner out_unlock_iolock: 43368988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED); 43468988114SDave Chinner return error; 43568988114SDave Chinner } 43668988114SDave Chinner 43768988114SDave Chinner /* 438e2ac8363SChristoph Hellwig * Dead simple method of punching delalyed allocation blocks from a range in 439e2ac8363SChristoph Hellwig * the inode. This will always punch out both the start and end blocks, even 440e2ac8363SChristoph Hellwig * if the ranges only partially overlap them, so it is up to the caller to 441e2ac8363SChristoph Hellwig * ensure that partial blocks are not passed in. 44268988114SDave Chinner */ 443cc3c92e7SChristoph Hellwig void 44468988114SDave Chinner xfs_bmap_punch_delalloc_range( 44568988114SDave Chinner struct xfs_inode *ip, 4467348b322SDave Chinner xfs_off_t start_byte, 4477348b322SDave Chinner xfs_off_t end_byte) 44868988114SDave Chinner { 4497348b322SDave Chinner struct xfs_mount *mp = ip->i_mount; 450e2ac8363SChristoph Hellwig struct xfs_ifork *ifp = &ip->i_df; 4517348b322SDave Chinner xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); 4527348b322SDave Chinner xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); 453e2ac8363SChristoph Hellwig struct xfs_bmbt_irec got, del; 454e2ac8363SChristoph Hellwig struct xfs_iext_cursor icur; 45568988114SDave Chinner 456b2197a36SChristoph Hellwig ASSERT(!xfs_need_iread_extents(ifp)); 45768988114SDave Chinner 4580065b541SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 459e2ac8363SChristoph Hellwig if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) 460d4380177SChristoph Hellwig goto out_unlock; 461e2ac8363SChristoph Hellwig 462e2ac8363SChristoph Hellwig while (got.br_startoff + got.br_blockcount > start_fsb) { 463e2ac8363SChristoph Hellwig del = got; 4647348b322SDave Chinner xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb); 465e2ac8363SChristoph Hellwig 466e2ac8363SChristoph Hellwig /* 467e2ac8363SChristoph Hellwig * A delete can push the cursor forward. Step back to the 468e2ac8363SChristoph Hellwig * previous extent on non-delalloc or extents outside the 469e2ac8363SChristoph Hellwig * target range. 470e2ac8363SChristoph Hellwig */ 471e2ac8363SChristoph Hellwig if (!del.br_blockcount || 472e2ac8363SChristoph Hellwig !isnullstartblock(del.br_startblock)) { 473e2ac8363SChristoph Hellwig if (!xfs_iext_prev_extent(ifp, &icur, &got)) 474e2ac8363SChristoph Hellwig break; 475e2ac8363SChristoph Hellwig continue; 476e2ac8363SChristoph Hellwig } 477e2ac8363SChristoph Hellwig 478cc3c92e7SChristoph Hellwig xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del); 479cc3c92e7SChristoph Hellwig if (!xfs_iext_get_extent(ifp, &icur, &got)) 480e2ac8363SChristoph Hellwig break; 481e2ac8363SChristoph Hellwig } 48268988114SDave Chinner 483d4380177SChristoph Hellwig out_unlock: 484d4380177SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 48568988114SDave Chinner } 486c24b5dfaSDave Chinner 487c24b5dfaSDave Chinner /* 488c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF 489610b2916SChristoph Hellwig * blocks. 490c24b5dfaSDave Chinner */ 491c24b5dfaSDave Chinner bool 4927d88329eSDarrick J. Wong xfs_can_free_eofblocks( 493610b2916SChristoph Hellwig struct xfs_inode *ip) 494c24b5dfaSDave Chinner { 4957d88329eSDarrick J. Wong struct xfs_bmbt_irec imap; 4967d88329eSDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 4977d88329eSDarrick J. Wong xfs_fileoff_t end_fsb; 4987d88329eSDarrick J. Wong xfs_fileoff_t last_fsb; 4997d88329eSDarrick J. Wong int nimaps = 1; 5007d88329eSDarrick J. Wong int error; 5017d88329eSDarrick J. Wong 5027d88329eSDarrick J. Wong /* 5037d88329eSDarrick J. Wong * Caller must either hold the exclusive io lock; or be inactivating 5047d88329eSDarrick J. Wong * the inode, which guarantees there are no other users of the inode. 5057d88329eSDarrick J. Wong */ 5063fed24ffSMatthew Wilcox (Oracle) if (!(VFS_I(ip)->i_state & I_FREEING)) 5073fed24ffSMatthew Wilcox (Oracle) xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); 5087d88329eSDarrick J. Wong 509c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */ 510c19b3b05SDave Chinner if (!S_ISREG(VFS_I(ip)->i_mode)) 511c24b5dfaSDave Chinner return false; 512c24b5dfaSDave Chinner 513c24b5dfaSDave Chinner /* 514c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not 515c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove. 516c24b5dfaSDave Chinner */ 517c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 && 5182667c6f9SDave Chinner VFS_I(ip)->i_mapping->nrpages == 0 && 519c24b5dfaSDave Chinner ip->i_delayed_blks == 0) 520c24b5dfaSDave Chinner return false; 521c24b5dfaSDave Chinner 522c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */ 523b2197a36SChristoph Hellwig if (xfs_need_iread_extents(&ip->i_df)) 524c24b5dfaSDave Chinner return false; 525c24b5dfaSDave Chinner 526c24b5dfaSDave Chinner /* 527610b2916SChristoph Hellwig * Only free real extents for inodes with persistent preallocations or 528610b2916SChristoph Hellwig * the append-only flag. 529c24b5dfaSDave Chinner */ 530db07349dSChristoph Hellwig if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) 531610b2916SChristoph Hellwig if (ip->i_delayed_blks == 0) 532c24b5dfaSDave Chinner return false; 533c24b5dfaSDave Chinner 5347d88329eSDarrick J. Wong /* 5357d88329eSDarrick J. Wong * Do not try to free post-EOF blocks if EOF is beyond the end of the 5367d88329eSDarrick J. Wong * range supported by the page cache, because the truncation will loop 5377d88329eSDarrick J. Wong * forever. 5387d88329eSDarrick J. Wong */ 5397d88329eSDarrick J. Wong end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); 5406b700a5bSDarrick J. Wong if (xfs_inode_has_bigrtalloc(ip)) 5415f57f730SDarrick J. Wong end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb); 5427d88329eSDarrick J. Wong last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 5437d88329eSDarrick J. Wong if (last_fsb <= end_fsb) 5447d88329eSDarrick J. Wong return false; 5457d88329eSDarrick J. Wong 5467d88329eSDarrick J. Wong /* 5477d88329eSDarrick J. Wong * Look up the mapping for the first block past EOF. If we can't find 5487d88329eSDarrick J. Wong * it, there's nothing to free. 5497d88329eSDarrick J. Wong */ 5507d88329eSDarrick J. Wong xfs_ilock(ip, XFS_ILOCK_SHARED); 5517d88329eSDarrick J. Wong error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps, 5527d88329eSDarrick J. Wong 0); 5537d88329eSDarrick J. Wong xfs_iunlock(ip, XFS_ILOCK_SHARED); 5547d88329eSDarrick J. Wong if (error || nimaps == 0) 5557d88329eSDarrick J. Wong return false; 5567d88329eSDarrick J. Wong 5577d88329eSDarrick J. Wong /* 5587d88329eSDarrick J. Wong * If there's a real mapping there or there are delayed allocation 5597d88329eSDarrick J. Wong * reservations, then we have post-EOF blocks to try to free. 5607d88329eSDarrick J. Wong */ 5617d88329eSDarrick J. Wong return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks; 562c24b5dfaSDave Chinner } 563c24b5dfaSDave Chinner 564c24b5dfaSDave Chinner /* 5653b4683c2SBrian Foster * This is called to free any blocks beyond eof. The caller must hold 5663b4683c2SBrian Foster * IOLOCK_EXCL unless we are in the inode reclaim path and have the only 5673b4683c2SBrian Foster * reference to the inode. 568c24b5dfaSDave Chinner */ 569c24b5dfaSDave Chinner int 570c24b5dfaSDave Chinner xfs_free_eofblocks( 571a36b9261SBrian Foster struct xfs_inode *ip) 572c24b5dfaSDave Chinner { 573a36b9261SBrian Foster struct xfs_trans *tp; 574a36b9261SBrian Foster struct xfs_mount *mp = ip->i_mount; 5757d88329eSDarrick J. Wong int error; 576a36b9261SBrian Foster 5777d88329eSDarrick J. Wong /* Attach the dquots to the inode up front. */ 578c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 579c24b5dfaSDave Chinner if (error) 580c24b5dfaSDave Chinner return error; 581c24b5dfaSDave Chinner 5827d88329eSDarrick J. Wong /* Wait on dio to ensure i_size has settled. */ 583e4229d6bSBrian Foster inode_dio_wait(VFS_I(ip)); 584e4229d6bSBrian Foster 585610b2916SChristoph Hellwig /* 586610b2916SChristoph Hellwig * For preallocated files only free delayed allocations. 587610b2916SChristoph Hellwig * 588610b2916SChristoph Hellwig * Note that this means we also leave speculative preallocations in 589610b2916SChristoph Hellwig * place for preallocated files. 590610b2916SChristoph Hellwig */ 591610b2916SChristoph Hellwig if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) { 592610b2916SChristoph Hellwig if (ip->i_delayed_blks) { 593610b2916SChristoph Hellwig xfs_bmap_punch_delalloc_range(ip, 594610b2916SChristoph Hellwig round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize), 595610b2916SChristoph Hellwig LLONG_MAX); 596610b2916SChristoph Hellwig } 597610b2916SChristoph Hellwig xfs_inode_clear_eofblocks_tag(ip); 598610b2916SChristoph Hellwig return 0; 599610b2916SChristoph Hellwig } 600610b2916SChristoph Hellwig 6017d88329eSDarrick J. Wong error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 602c24b5dfaSDave Chinner if (error) { 60375c8c50fSDave Chinner ASSERT(xfs_is_shutdown(mp)); 604c24b5dfaSDave Chinner return error; 605c24b5dfaSDave Chinner } 606c24b5dfaSDave Chinner 607c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL); 608c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0); 609c24b5dfaSDave Chinner 610c24b5dfaSDave Chinner /* 6117d88329eSDarrick J. Wong * Do not update the on-disk file size. If we update the on-disk file 6127d88329eSDarrick J. Wong * size and then the system crashes before the contents of the file are 6137d88329eSDarrick J. Wong * flushed to disk then the files may be full of holes (ie NULL files 6147d88329eSDarrick J. Wong * bug). 615c24b5dfaSDave Chinner */ 6164e529339SBrian Foster error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK, 6174e529339SBrian Foster XFS_ISIZE(ip), XFS_BMAPI_NODISCARD); 6187d88329eSDarrick J. Wong if (error) 6197d88329eSDarrick J. Wong goto err_cancel; 6207d88329eSDarrick J. Wong 6217d88329eSDarrick J. Wong error = xfs_trans_commit(tp); 6227d88329eSDarrick J. Wong if (error) 6237d88329eSDarrick J. Wong goto out_unlock; 6247d88329eSDarrick J. Wong 6257d88329eSDarrick J. Wong xfs_inode_clear_eofblocks_tag(ip); 6267d88329eSDarrick J. Wong goto out_unlock; 6277d88329eSDarrick J. Wong 6287d88329eSDarrick J. Wong err_cancel: 629c24b5dfaSDave Chinner /* 630c24b5dfaSDave Chinner * If we get an error at this point we simply don't 631c24b5dfaSDave Chinner * bother truncating the file. 632c24b5dfaSDave Chinner */ 6334906e215SChristoph Hellwig xfs_trans_cancel(tp); 6347d88329eSDarrick J. Wong out_unlock: 635c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 636c24b5dfaSDave Chinner return error; 637c24b5dfaSDave Chinner } 638c24b5dfaSDave Chinner 63983aee9e4SChristoph Hellwig int 640c24b5dfaSDave Chinner xfs_alloc_file_space( 64183aee9e4SChristoph Hellwig struct xfs_inode *ip, 642c24b5dfaSDave Chinner xfs_off_t offset, 6434d1b97f9SDarrick J. Wong xfs_off_t len) 644c24b5dfaSDave Chinner { 645c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount; 646c24b5dfaSDave Chinner xfs_off_t count; 647c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb; 648c24b5dfaSDave Chinner xfs_extlen_t extsz, temp; 649c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 650e093c4beSMax Reitz xfs_fileoff_t endoffset_fsb; 651c24b5dfaSDave Chinner int rt; 652c24b5dfaSDave Chinner xfs_trans_t *tp; 653c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp; 654c24b5dfaSDave Chinner int error; 655c24b5dfaSDave Chinner 656c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip); 657c24b5dfaSDave Chinner 65875c8c50fSDave Chinner if (xfs_is_shutdown(mp)) 6592451337dSDave Chinner return -EIO; 660c24b5dfaSDave Chinner 661c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 662c24b5dfaSDave Chinner if (error) 663c24b5dfaSDave Chinner return error; 664c24b5dfaSDave Chinner 665c24b5dfaSDave Chinner if (len <= 0) 6662451337dSDave Chinner return -EINVAL; 667c24b5dfaSDave Chinner 668c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip); 669c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip); 670c24b5dfaSDave Chinner 671c24b5dfaSDave Chinner count = len; 672c24b5dfaSDave Chinner imapp = &imaps[0]; 673c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset); 674e093c4beSMax Reitz endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); 675e093c4beSMax Reitz allocatesize_fsb = endoffset_fsb - startoffset_fsb; 676c24b5dfaSDave Chinner 677c24b5dfaSDave Chinner /* 678c24b5dfaSDave Chinner * Allocate file space until done or until there is an error 679c24b5dfaSDave Chinner */ 680c24b5dfaSDave Chinner while (allocatesize_fsb && !error) { 681c24b5dfaSDave Chinner xfs_fileoff_t s, e; 6823de4eb10SDarrick J. Wong unsigned int dblocks, rblocks, resblks; 68335dc55b9SChristoph Hellwig int nimaps = 1; 684c24b5dfaSDave Chinner 685c24b5dfaSDave Chinner /* 686c24b5dfaSDave Chinner * Determine space reservations for data/realtime. 687c24b5dfaSDave Chinner */ 688c24b5dfaSDave Chinner if (unlikely(extsz)) { 689c24b5dfaSDave Chinner s = startoffset_fsb; 690c24b5dfaSDave Chinner do_div(s, extsz); 691c24b5dfaSDave Chinner s *= extsz; 692c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb; 6930703a8e1SDave Chinner div_u64_rem(startoffset_fsb, extsz, &temp); 6940703a8e1SDave Chinner if (temp) 695c24b5dfaSDave Chinner e += temp; 6960703a8e1SDave Chinner div_u64_rem(e, extsz, &temp); 6970703a8e1SDave Chinner if (temp) 698c24b5dfaSDave Chinner e += extsz - temp; 699c24b5dfaSDave Chinner } else { 700c24b5dfaSDave Chinner s = 0; 701c24b5dfaSDave Chinner e = allocatesize_fsb; 702c24b5dfaSDave Chinner } 703c24b5dfaSDave Chinner 704c24b5dfaSDave Chinner /* 705c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block 706c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are 707c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate 708c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk 70995f0b95eSChandan Babu R * to XFS_BMBT_MAX_EXTLEN (21 bits), so use that to enforce the 71095f0b95eSChandan Babu R * limit. 711c24b5dfaSDave Chinner */ 71295f0b95eSChandan Babu R resblks = min_t(xfs_fileoff_t, (e - s), 71395f0b95eSChandan Babu R (XFS_MAX_BMBT_EXTLEN * nimaps)); 714c24b5dfaSDave Chinner if (unlikely(rt)) { 71502b7ee4eSDarrick J. Wong dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 71602b7ee4eSDarrick J. Wong rblocks = resblks; 717c24b5dfaSDave Chinner } else { 71802b7ee4eSDarrick J. Wong dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); 71902b7ee4eSDarrick J. Wong rblocks = 0; 720c24b5dfaSDave Chinner } 721c24b5dfaSDave Chinner 7223de4eb10SDarrick J. Wong error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 7233de4eb10SDarrick J. Wong dblocks, rblocks, false, &tp); 724c24b5dfaSDave Chinner if (error) 7253de4eb10SDarrick J. Wong break; 726c24b5dfaSDave Chinner 72725576c54SChristoph Hellwig error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, 7284f86bb4bSChandan Babu R XFS_IEXT_ADD_NOSPLIT_CNT); 729727e1acdSChandan Babu R if (error) 73035b11010SDarrick J. Wong goto error; 731727e1acdSChandan Babu R 7326773da87SChristoph Hellwig /* 7336773da87SChristoph Hellwig * If the allocator cannot find a single free extent large 7346773da87SChristoph Hellwig * enough to cover the start block of the requested range, 7356773da87SChristoph Hellwig * xfs_bmapi_write will return -ENOSR. 7366773da87SChristoph Hellwig * 7376773da87SChristoph Hellwig * In that case we simply need to keep looping with the same 7386773da87SChristoph Hellwig * startoffset_fsb so that one of the following allocations 7396773da87SChristoph Hellwig * will eventually reach the requested range. 7406773da87SChristoph Hellwig */ 741c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb, 7424d1b97f9SDarrick J. Wong allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp, 743da781e64SBrian Foster &nimaps); 7446773da87SChristoph Hellwig if (error) { 7456773da87SChristoph Hellwig if (error != -ENOSR) 74635b11010SDarrick J. Wong goto error; 7476773da87SChristoph Hellwig error = 0; 7486773da87SChristoph Hellwig } else { 7496773da87SChristoph Hellwig startoffset_fsb += imapp->br_blockcount; 7506773da87SChristoph Hellwig allocatesize_fsb -= imapp->br_blockcount; 7516773da87SChristoph Hellwig } 752c24b5dfaSDave Chinner 7530b02c8c0SDave Chinner ip->i_diflags |= XFS_DIFLAG_PREALLOC; 7540b02c8c0SDave Chinner xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 7550b02c8c0SDave Chinner 75670393313SChristoph Hellwig error = xfs_trans_commit(tp); 757c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 758c24b5dfaSDave Chinner } 759c24b5dfaSDave Chinner 760c24b5dfaSDave Chinner return error; 761c24b5dfaSDave Chinner 76235b11010SDarrick J. Wong error: 7634906e215SChristoph Hellwig xfs_trans_cancel(tp); 764c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL); 765c24b5dfaSDave Chinner return error; 766c24b5dfaSDave Chinner } 767c24b5dfaSDave Chinner 768bdb0d04fSChristoph Hellwig static int 769bdb0d04fSChristoph Hellwig xfs_unmap_extent( 77083aee9e4SChristoph Hellwig struct xfs_inode *ip, 771bdb0d04fSChristoph Hellwig xfs_fileoff_t startoffset_fsb, 772bdb0d04fSChristoph Hellwig xfs_filblks_t len_fsb, 773bdb0d04fSChristoph Hellwig int *done) 774c24b5dfaSDave Chinner { 775bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 776bdb0d04fSChristoph Hellwig struct xfs_trans *tp; 777bdb0d04fSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 778bdb0d04fSChristoph Hellwig int error; 779c24b5dfaSDave Chinner 7803de4eb10SDarrick J. Wong error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 0, 7813a1af6c3SDarrick J. Wong false, &tp); 782bdb0d04fSChristoph Hellwig if (error) 7833a1af6c3SDarrick J. Wong return error; 784c24b5dfaSDave Chinner 78525576c54SChristoph Hellwig error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, 78685ef08b5SChandan Babu R XFS_IEXT_PUNCH_HOLE_CNT); 78785ef08b5SChandan Babu R if (error) 78885ef08b5SChandan Babu R goto out_trans_cancel; 78985ef08b5SChandan Babu R 7902af52842SBrian Foster error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done); 791bdb0d04fSChristoph Hellwig if (error) 792c8eac49eSBrian Foster goto out_trans_cancel; 793bdb0d04fSChristoph Hellwig 794bdb0d04fSChristoph Hellwig error = xfs_trans_commit(tp); 795bdb0d04fSChristoph Hellwig out_unlock: 796bdb0d04fSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 797bdb0d04fSChristoph Hellwig return error; 798bdb0d04fSChristoph Hellwig 799bdb0d04fSChristoph Hellwig out_trans_cancel: 800bdb0d04fSChristoph Hellwig xfs_trans_cancel(tp); 801bdb0d04fSChristoph Hellwig goto out_unlock; 802bdb0d04fSChristoph Hellwig } 803bdb0d04fSChristoph Hellwig 804249bd908SDave Chinner /* Caller must first wait for the completion of any pending DIOs if required. */ 8052c307174SDave Chinner int 806bdb0d04fSChristoph Hellwig xfs_flush_unmap_range( 807bdb0d04fSChristoph Hellwig struct xfs_inode *ip, 808bdb0d04fSChristoph Hellwig xfs_off_t offset, 809bdb0d04fSChristoph Hellwig xfs_off_t len) 810bdb0d04fSChristoph Hellwig { 811bdb0d04fSChristoph Hellwig struct inode *inode = VFS_I(ip); 812bdb0d04fSChristoph Hellwig xfs_off_t rounding, start, end; 813bdb0d04fSChristoph Hellwig int error; 814bdb0d04fSChristoph Hellwig 815d3b689d7SJohn Garry /* 816d3b689d7SJohn Garry * Make sure we extend the flush out to extent alignment 817d3b689d7SJohn Garry * boundaries so any extent range overlapping the start/end 818d3b689d7SJohn Garry * of the modification we are about to do is clean and idle. 819d3b689d7SJohn Garry */ 820d3b689d7SJohn Garry rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE); 821d3b689d7SJohn Garry start = rounddown_64(offset, rounding); 822d3b689d7SJohn Garry end = roundup_64(offset + len, rounding) - 1; 823bdb0d04fSChristoph Hellwig 824bdb0d04fSChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end); 825c24b5dfaSDave Chinner if (error) 826c24b5dfaSDave Chinner return error; 827bdb0d04fSChristoph Hellwig truncate_pagecache_range(inode, start, end); 828bdb0d04fSChristoph Hellwig return 0; 829c24b5dfaSDave Chinner } 830c24b5dfaSDave Chinner 831c24b5dfaSDave Chinner int 832c24b5dfaSDave Chinner xfs_free_file_space( 833c24b5dfaSDave Chinner struct xfs_inode *ip, 834c24b5dfaSDave Chinner xfs_off_t offset, 835c24b5dfaSDave Chinner xfs_off_t len) 836c24b5dfaSDave Chinner { 837bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 838c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb; 839bdb0d04fSChristoph Hellwig xfs_fileoff_t endoffset_fsb; 8403c2bdc91SChristoph Hellwig int done = 0, error; 841c24b5dfaSDave Chinner 842c24b5dfaSDave Chinner trace_xfs_free_file_space(ip); 843c24b5dfaSDave Chinner 844c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip); 845c24b5dfaSDave Chinner if (error) 846c24b5dfaSDave Chinner return error; 847c24b5dfaSDave Chinner 848c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */ 849bdb0d04fSChristoph Hellwig return 0; 850bdb0d04fSChristoph Hellwig 851c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset); 852c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); 853c24b5dfaSDave Chinner 854fe341eb1SDarrick J. Wong /* We can only free complete realtime extents. */ 8556b700a5bSDarrick J. Wong if (xfs_inode_has_bigrtalloc(ip)) { 8565f57f730SDarrick J. Wong startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb); 8575f57f730SDarrick J. Wong endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb); 858fe341eb1SDarrick J. Wong } 859fe341eb1SDarrick J. Wong 860bdb0d04fSChristoph Hellwig /* 861daa79baeSChristoph Hellwig * Need to zero the stuff we're not freeing, on disk. 862bdb0d04fSChristoph Hellwig */ 8633c2bdc91SChristoph Hellwig if (endoffset_fsb > startoffset_fsb) { 8643c2bdc91SChristoph Hellwig while (!done) { 865bdb0d04fSChristoph Hellwig error = xfs_unmap_extent(ip, startoffset_fsb, 866bdb0d04fSChristoph Hellwig endoffset_fsb - startoffset_fsb, &done); 8673c2bdc91SChristoph Hellwig if (error) 8683c2bdc91SChristoph Hellwig return error; 8693c2bdc91SChristoph Hellwig } 870c24b5dfaSDave Chinner } 871c24b5dfaSDave Chinner 8723c2bdc91SChristoph Hellwig /* 8733c2bdc91SChristoph Hellwig * Now that we've unmap all full blocks we'll have to zero out any 874f1ba5fafSShiyang Ruan * partial block at the beginning and/or end. xfs_zero_range is smart 875f5c54717SChristoph Hellwig * enough to skip any holes, including those we just created, but we 876f5c54717SChristoph Hellwig * must take care not to zero beyond EOF and enlarge i_size. 8773c2bdc91SChristoph Hellwig */ 8783dd09d5aSCalvin Owens if (offset >= XFS_ISIZE(ip)) 8793dd09d5aSCalvin Owens return 0; 8803dd09d5aSCalvin Owens if (offset + len > XFS_ISIZE(ip)) 8813dd09d5aSCalvin Owens len = XFS_ISIZE(ip) - offset; 882f1ba5fafSShiyang Ruan error = xfs_zero_range(ip, offset, len, NULL); 883e53c4b59SDarrick J. Wong if (error) 884e53c4b59SDarrick J. Wong return error; 885e53c4b59SDarrick J. Wong 886e53c4b59SDarrick J. Wong /* 887e53c4b59SDarrick J. Wong * If we zeroed right up to EOF and EOF straddles a page boundary we 888e53c4b59SDarrick J. Wong * must make sure that the post-EOF area is also zeroed because the 889f1ba5fafSShiyang Ruan * page could be mmap'd and xfs_zero_range doesn't do that for us. 890e53c4b59SDarrick J. Wong * Writeback of the eof page will do this, albeit clumsily. 891e53c4b59SDarrick J. Wong */ 892a579121fSDarrick J. Wong if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) { 893e53c4b59SDarrick J. Wong error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 894a579121fSDarrick J. Wong round_down(offset + len, PAGE_SIZE), LLONG_MAX); 895e53c4b59SDarrick J. Wong } 896e53c4b59SDarrick J. Wong 897e53c4b59SDarrick J. Wong return error; 898c24b5dfaSDave Chinner } 899c24b5dfaSDave Chinner 90072c1a739Skbuild test robot static int 9014ed36c6bSChristoph Hellwig xfs_prepare_shift( 902e1d8fb88SNamjae Jeon struct xfs_inode *ip, 9034ed36c6bSChristoph Hellwig loff_t offset) 904e1d8fb88SNamjae Jeon { 905*f23660f0SJohn Garry unsigned int rounding; 906e1d8fb88SNamjae Jeon int error; 907f71721d0SBrian Foster 908f71721d0SBrian Foster /* 909f71721d0SBrian Foster * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 910f71721d0SBrian Foster * into the accessible region of the file. 911f71721d0SBrian Foster */ 912610b2916SChristoph Hellwig if (xfs_can_free_eofblocks(ip)) { 913a36b9261SBrian Foster error = xfs_free_eofblocks(ip); 91441b9d726SBrian Foster if (error) 91541b9d726SBrian Foster return error; 91641b9d726SBrian Foster } 9171669a8caSDave Chinner 918f71721d0SBrian Foster /* 919d0c22041SBrian Foster * Shift operations must stabilize the start block offset boundary along 920d0c22041SBrian Foster * with the full range of the operation. If we don't, a COW writeback 921d0c22041SBrian Foster * completion could race with an insert, front merge with the start 922d0c22041SBrian Foster * extent (after split) during the shift and corrupt the file. Start 923*f23660f0SJohn Garry * with the allocation unit just prior to the start to stabilize the 924*f23660f0SJohn Garry * boundary. 925d0c22041SBrian Foster */ 926*f23660f0SJohn Garry rounding = xfs_inode_alloc_unitsize(ip); 927*f23660f0SJohn Garry offset = rounddown_64(offset, rounding); 928d0c22041SBrian Foster if (offset) 929*f23660f0SJohn Garry offset -= rounding; 930d0c22041SBrian Foster 931d0c22041SBrian Foster /* 932f71721d0SBrian Foster * Writeback and invalidate cache for the remainder of the file as we're 933a904b1caSNamjae Jeon * about to shift down every extent from offset to EOF. 934f71721d0SBrian Foster */ 9357f9f71beSDave Chinner error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip)); 9361749d1eaSBrian Foster if (error) 9371749d1eaSBrian Foster return error; 938e1d8fb88SNamjae Jeon 939a904b1caSNamjae Jeon /* 9403af423b0SDarrick J. Wong * Clean out anything hanging around in the cow fork now that 9413af423b0SDarrick J. Wong * we've flushed all the dirty data out to disk to avoid having 9423af423b0SDarrick J. Wong * CoW extents at the wrong offsets. 9433af423b0SDarrick J. Wong */ 94451d62690SChristoph Hellwig if (xfs_inode_has_cow_data(ip)) { 9453af423b0SDarrick J. Wong error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, 9463af423b0SDarrick J. Wong true); 9473af423b0SDarrick J. Wong if (error) 9483af423b0SDarrick J. Wong return error; 9493af423b0SDarrick J. Wong } 9503af423b0SDarrick J. Wong 9514ed36c6bSChristoph Hellwig return 0; 952e1d8fb88SNamjae Jeon } 953e1d8fb88SNamjae Jeon 954e1d8fb88SNamjae Jeon /* 955a904b1caSNamjae Jeon * xfs_collapse_file_space() 956a904b1caSNamjae Jeon * This routine frees disk space and shift extent for the given file. 957a904b1caSNamjae Jeon * The first thing we do is to free data blocks in the specified range 958a904b1caSNamjae Jeon * by calling xfs_free_file_space(). It would also sync dirty data 959a904b1caSNamjae Jeon * and invalidate page cache over the region on which collapse range 960a904b1caSNamjae Jeon * is working. And Shift extent records to the left to cover a hole. 961a904b1caSNamjae Jeon * RETURNS: 962a904b1caSNamjae Jeon * 0 on success 963a904b1caSNamjae Jeon * errno on error 964a904b1caSNamjae Jeon * 965a904b1caSNamjae Jeon */ 966a904b1caSNamjae Jeon int 967a904b1caSNamjae Jeon xfs_collapse_file_space( 968a904b1caSNamjae Jeon struct xfs_inode *ip, 969a904b1caSNamjae Jeon xfs_off_t offset, 970a904b1caSNamjae Jeon xfs_off_t len) 971a904b1caSNamjae Jeon { 9724ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 9734ed36c6bSChristoph Hellwig struct xfs_trans *tp; 974a904b1caSNamjae Jeon int error; 9754ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); 9764ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 977ecfea3f0SChristoph Hellwig bool done = false; 978a904b1caSNamjae Jeon 9793fed24ffSMatthew Wilcox (Oracle) xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); 9809ad1a23aSChristoph Hellwig 981a904b1caSNamjae Jeon trace_xfs_collapse_file_space(ip); 982a904b1caSNamjae Jeon 983a904b1caSNamjae Jeon error = xfs_free_file_space(ip, offset, len); 984a904b1caSNamjae Jeon if (error) 985a904b1caSNamjae Jeon return error; 986a904b1caSNamjae Jeon 9874ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset); 9884ed36c6bSChristoph Hellwig if (error) 9894ed36c6bSChristoph Hellwig return error; 9904ed36c6bSChristoph Hellwig 991211683b2SBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 9924ed36c6bSChristoph Hellwig if (error) 993211683b2SBrian Foster return error; 9944ed36c6bSChristoph Hellwig 9954ed36c6bSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 996211683b2SBrian Foster xfs_trans_ijoin(tp, ip, 0); 9974ed36c6bSChristoph Hellwig 998211683b2SBrian Foster while (!done) { 999ecfea3f0SChristoph Hellwig error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, 1000333f950cSBrian Foster &done); 10014ed36c6bSChristoph Hellwig if (error) 1002c8eac49eSBrian Foster goto out_trans_cancel; 1003211683b2SBrian Foster if (done) 1004211683b2SBrian Foster break; 10054ed36c6bSChristoph Hellwig 1006211683b2SBrian Foster /* finish any deferred frees and roll the transaction */ 1007211683b2SBrian Foster error = xfs_defer_finish(&tp); 1008211683b2SBrian Foster if (error) 1009211683b2SBrian Foster goto out_trans_cancel; 10104ed36c6bSChristoph Hellwig } 10114ed36c6bSChristoph Hellwig 1012211683b2SBrian Foster error = xfs_trans_commit(tp); 1013211683b2SBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 10144ed36c6bSChristoph Hellwig return error; 10154ed36c6bSChristoph Hellwig 10164ed36c6bSChristoph Hellwig out_trans_cancel: 10174ed36c6bSChristoph Hellwig xfs_trans_cancel(tp); 1018211683b2SBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 10194ed36c6bSChristoph Hellwig return error; 1020a904b1caSNamjae Jeon } 1021a904b1caSNamjae Jeon 1022a904b1caSNamjae Jeon /* 1023a904b1caSNamjae Jeon * xfs_insert_file_space() 1024a904b1caSNamjae Jeon * This routine create hole space by shifting extents for the given file. 1025a904b1caSNamjae Jeon * The first thing we do is to sync dirty data and invalidate page cache 1026a904b1caSNamjae Jeon * over the region on which insert range is working. And split an extent 1027a904b1caSNamjae Jeon * to two extents at given offset by calling xfs_bmap_split_extent. 1028a904b1caSNamjae Jeon * And shift all extent records which are laying between [offset, 1029a904b1caSNamjae Jeon * last allocated extent] to the right to reserve hole range. 1030a904b1caSNamjae Jeon * RETURNS: 1031a904b1caSNamjae Jeon * 0 on success 1032a904b1caSNamjae Jeon * errno on error 1033a904b1caSNamjae Jeon */ 1034a904b1caSNamjae Jeon int 1035a904b1caSNamjae Jeon xfs_insert_file_space( 1036a904b1caSNamjae Jeon struct xfs_inode *ip, 1037a904b1caSNamjae Jeon loff_t offset, 1038a904b1caSNamjae Jeon loff_t len) 1039a904b1caSNamjae Jeon { 10404ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 10414ed36c6bSChristoph Hellwig struct xfs_trans *tp; 10424ed36c6bSChristoph Hellwig int error; 10434ed36c6bSChristoph Hellwig xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, offset); 10444ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = NULLFSBLOCK; 10454ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); 1046ecfea3f0SChristoph Hellwig bool done = false; 10474ed36c6bSChristoph Hellwig 10483fed24ffSMatthew Wilcox (Oracle) xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); 10499ad1a23aSChristoph Hellwig 1050a904b1caSNamjae Jeon trace_xfs_insert_file_space(ip); 1051a904b1caSNamjae Jeon 1052f62cb48eSDarrick J. Wong error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb); 1053f62cb48eSDarrick J. Wong if (error) 1054f62cb48eSDarrick J. Wong return error; 1055f62cb48eSDarrick J. Wong 10564ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset); 10574ed36c6bSChristoph Hellwig if (error) 10584ed36c6bSChristoph Hellwig return error; 10594ed36c6bSChristoph Hellwig 1060b73df17eSBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 1061b73df17eSBrian Foster XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); 1062b73df17eSBrian Foster if (error) 1063b73df17eSBrian Foster return error; 1064b73df17eSBrian Foster 1065b73df17eSBrian Foster xfs_ilock(ip, XFS_ILOCK_EXCL); 1066dd87f87dSBrian Foster xfs_trans_ijoin(tp, ip, 0); 1067b73df17eSBrian Foster 106825576c54SChristoph Hellwig error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, 106985ef08b5SChandan Babu R XFS_IEXT_PUNCH_HOLE_CNT); 107085ef08b5SChandan Babu R if (error) 107185ef08b5SChandan Babu R goto out_trans_cancel; 107285ef08b5SChandan Babu R 1073dd87f87dSBrian Foster /* 1074dd87f87dSBrian Foster * The extent shifting code works on extent granularity. So, if stop_fsb 1075dd87f87dSBrian Foster * is not the starting block of extent, we need to split the extent at 1076dd87f87dSBrian Foster * stop_fsb. 1077dd87f87dSBrian Foster */ 1078b73df17eSBrian Foster error = xfs_bmap_split_extent(tp, ip, stop_fsb); 1079b73df17eSBrian Foster if (error) 1080b73df17eSBrian Foster goto out_trans_cancel; 1081b73df17eSBrian Foster 1082dd87f87dSBrian Foster do { 10839c516e0eSBrian Foster error = xfs_defer_finish(&tp); 10844ed36c6bSChristoph Hellwig if (error) 1085dd87f87dSBrian Foster goto out_trans_cancel; 10864ed36c6bSChristoph Hellwig 1087ecfea3f0SChristoph Hellwig error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, 1088333f950cSBrian Foster &done, stop_fsb); 10894ed36c6bSChristoph Hellwig if (error) 1090c8eac49eSBrian Foster goto out_trans_cancel; 1091dd87f87dSBrian Foster } while (!done); 10924ed36c6bSChristoph Hellwig 10934ed36c6bSChristoph Hellwig error = xfs_trans_commit(tp); 1094dd87f87dSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 10954ed36c6bSChristoph Hellwig return error; 10964ed36c6bSChristoph Hellwig 1097c8eac49eSBrian Foster out_trans_cancel: 10984ed36c6bSChristoph Hellwig xfs_trans_cancel(tp); 1099dd87f87dSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL); 11004ed36c6bSChristoph Hellwig return error; 1101a904b1caSNamjae Jeon } 1102a904b1caSNamjae Jeon 1103a904b1caSNamjae Jeon /* 1104a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is 1105a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with 1106a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1107a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get 1108a133d952SDave Chinner * invalid formats on the target inode. 1109a133d952SDave Chinner * 1110a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has 1111a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a 1112a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just 1113a133d952SDave Chinner * blindly swap data forks on attr2 filesystems. 1114a133d952SDave Chinner * 1115a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with 1116a133d952SDave Chinner * a corrupt temporary inode, either. 1117a133d952SDave Chinner * 1118a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source 1119a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is 1120a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on 1121a133d952SDave Chinner * userspace to get this right. 1122a133d952SDave Chinner */ 1123a133d952SDave Chinner static int 1124a133d952SDave Chinner xfs_swap_extents_check_format( 1125e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1126e06259aaSDarrick J. Wong struct xfs_inode *tip) /* tmp inode */ 1127a133d952SDave Chinner { 1128f7e67b20SChristoph Hellwig struct xfs_ifork *ifp = &ip->i_df; 1129f7e67b20SChristoph Hellwig struct xfs_ifork *tifp = &tip->i_df; 1130a133d952SDave Chinner 1131765d3c39SDarrick J. Wong /* User/group/project quota ids must match if quotas are enforced. */ 1132765d3c39SDarrick J. Wong if (XFS_IS_QUOTA_ON(ip->i_mount) && 1133765d3c39SDarrick J. Wong (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) || 1134765d3c39SDarrick J. Wong !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) || 1135ceaf603cSChristoph Hellwig ip->i_projid != tip->i_projid)) 1136765d3c39SDarrick J. Wong return -EINVAL; 1137765d3c39SDarrick J. Wong 1138a133d952SDave Chinner /* Should never get a local format */ 1139f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_LOCAL || 1140f7e67b20SChristoph Hellwig tifp->if_format == XFS_DINODE_FMT_LOCAL) 11412451337dSDave Chinner return -EINVAL; 1142a133d952SDave Chinner 1143a133d952SDave Chinner /* 1144a133d952SDave Chinner * if the target inode has less extents that then temporary inode then 1145a133d952SDave Chinner * why did userspace call us? 1146a133d952SDave Chinner */ 1147f7e67b20SChristoph Hellwig if (ifp->if_nextents < tifp->if_nextents) 11482451337dSDave Chinner return -EINVAL; 1149a133d952SDave Chinner 1150a133d952SDave Chinner /* 11511f08af52SDarrick J. Wong * If we have to use the (expensive) rmap swap method, we can 11521f08af52SDarrick J. Wong * handle any number of extents and any format. 11531f08af52SDarrick J. Wong */ 115438c26bfdSDave Chinner if (xfs_has_rmapbt(ip->i_mount)) 11551f08af52SDarrick J. Wong return 0; 11561f08af52SDarrick J. Wong 11571f08af52SDarrick J. Wong /* 1158a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree 1159a133d952SDave Chinner * form then we will end up with the target inode in the wrong format 1160a133d952SDave Chinner * as we already know there are less extents in the temp inode. 1161a133d952SDave Chinner */ 1162f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && 1163f7e67b20SChristoph Hellwig tifp->if_format == XFS_DINODE_FMT_BTREE) 11642451337dSDave Chinner return -EINVAL; 1165a133d952SDave Chinner 1166a133d952SDave Chinner /* Check temp in extent form to max in target */ 1167f7e67b20SChristoph Hellwig if (tifp->if_format == XFS_DINODE_FMT_EXTENTS && 1168f7e67b20SChristoph Hellwig tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 11692451337dSDave Chinner return -EINVAL; 1170a133d952SDave Chinner 1171a133d952SDave Chinner /* Check target in extent form to max in temp */ 1172f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && 1173f7e67b20SChristoph Hellwig ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 11742451337dSDave Chinner return -EINVAL; 1175a133d952SDave Chinner 1176a133d952SDave Chinner /* 1177a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit 1178a133d952SDave Chinner * in the target and that it has enough extents to be in btree format 1179a133d952SDave Chinner * in the target. 1180a133d952SDave Chinner * 1181a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions 1182a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in 1183a133d952SDave Chinner * extent format... 1184a133d952SDave Chinner */ 1185f7e67b20SChristoph Hellwig if (tifp->if_format == XFS_DINODE_FMT_BTREE) { 1186932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(ip) && 1187c01147d9SDarrick J. Wong XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip)) 11882451337dSDave Chinner return -EINVAL; 1189f7e67b20SChristoph Hellwig if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) 11902451337dSDave Chinner return -EINVAL; 1191a133d952SDave Chinner } 1192a133d952SDave Chinner 1193a133d952SDave Chinner /* Reciprocal target->temp btree format checks */ 1194f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 1195932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(tip) && 1196c01147d9SDarrick J. Wong XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip)) 11972451337dSDave Chinner return -EINVAL; 1198f7e67b20SChristoph Hellwig if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK)) 11992451337dSDave Chinner return -EINVAL; 1200a133d952SDave Chinner } 1201a133d952SDave Chinner 1202a133d952SDave Chinner return 0; 1203a133d952SDave Chinner } 1204a133d952SDave Chinner 12057abbb8f9SDave Chinner static int 12064ef897a2SDave Chinner xfs_swap_extent_flush( 12074ef897a2SDave Chinner struct xfs_inode *ip) 12084ef897a2SDave Chinner { 12094ef897a2SDave Chinner int error; 12104ef897a2SDave Chinner 12114ef897a2SDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping); 12124ef897a2SDave Chinner if (error) 12134ef897a2SDave Chinner return error; 12144ef897a2SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1); 12154ef897a2SDave Chinner 12164ef897a2SDave Chinner /* Verify O_DIRECT for ftmp */ 12174ef897a2SDave Chinner if (VFS_I(ip)->i_mapping->nrpages) 12184ef897a2SDave Chinner return -EINVAL; 12194ef897a2SDave Chinner return 0; 12204ef897a2SDave Chinner } 12214ef897a2SDave Chinner 12221f08af52SDarrick J. Wong /* 12231f08af52SDarrick J. Wong * Move extents from one file to another, when rmap is enabled. 12241f08af52SDarrick J. Wong */ 12251f08af52SDarrick J. Wong STATIC int 12261f08af52SDarrick J. Wong xfs_swap_extent_rmap( 12271f08af52SDarrick J. Wong struct xfs_trans **tpp, 12281f08af52SDarrick J. Wong struct xfs_inode *ip, 12291f08af52SDarrick J. Wong struct xfs_inode *tip) 12301f08af52SDarrick J. Wong { 12317a7943c7SBrian Foster struct xfs_trans *tp = *tpp; 12321f08af52SDarrick J. Wong struct xfs_bmbt_irec irec; 12331f08af52SDarrick J. Wong struct xfs_bmbt_irec uirec; 12341f08af52SDarrick J. Wong struct xfs_bmbt_irec tirec; 12351f08af52SDarrick J. Wong xfs_fileoff_t offset_fsb; 12361f08af52SDarrick J. Wong xfs_fileoff_t end_fsb; 12371f08af52SDarrick J. Wong xfs_filblks_t count_fsb; 12381f08af52SDarrick J. Wong int error; 12391f08af52SDarrick J. Wong xfs_filblks_t ilen; 12401f08af52SDarrick J. Wong xfs_filblks_t rlen; 12411f08af52SDarrick J. Wong int nimaps; 1242c8ce540dSDarrick J. Wong uint64_t tip_flags2; 12431f08af52SDarrick J. Wong 12441f08af52SDarrick J. Wong /* 12451f08af52SDarrick J. Wong * If the source file has shared blocks, we must flag the donor 12461f08af52SDarrick J. Wong * file as having shared blocks so that we get the shared-block 12471f08af52SDarrick J. Wong * rmap functions when we go to fix up the rmaps. The flags 12481f08af52SDarrick J. Wong * will be switch for reals later. 12491f08af52SDarrick J. Wong */ 12503e09ab8fSChristoph Hellwig tip_flags2 = tip->i_diflags2; 12513e09ab8fSChristoph Hellwig if (ip->i_diflags2 & XFS_DIFLAG2_REFLINK) 12523e09ab8fSChristoph Hellwig tip->i_diflags2 |= XFS_DIFLAG2_REFLINK; 12531f08af52SDarrick J. Wong 12541f08af52SDarrick J. Wong offset_fsb = 0; 12551f08af52SDarrick J. Wong end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip))); 12561f08af52SDarrick J. Wong count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); 12571f08af52SDarrick J. Wong 12581f08af52SDarrick J. Wong while (count_fsb) { 12591f08af52SDarrick J. Wong /* Read extent from the donor file */ 12601f08af52SDarrick J. Wong nimaps = 1; 12611f08af52SDarrick J. Wong error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec, 12621f08af52SDarrick J. Wong &nimaps, 0); 12631f08af52SDarrick J. Wong if (error) 12641f08af52SDarrick J. Wong goto out; 12651f08af52SDarrick J. Wong ASSERT(nimaps == 1); 12661f08af52SDarrick J. Wong ASSERT(tirec.br_startblock != DELAYSTARTBLOCK); 12671f08af52SDarrick J. Wong 12681f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap(tip, &tirec); 12691f08af52SDarrick J. Wong ilen = tirec.br_blockcount; 12701f08af52SDarrick J. Wong 12711f08af52SDarrick J. Wong /* Unmap the old blocks in the source file. */ 12721f08af52SDarrick J. Wong while (tirec.br_blockcount) { 1273692b6cddSDave Chinner ASSERT(tp->t_highest_agno == NULLAGNUMBER); 12741f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); 12751f08af52SDarrick J. Wong 12761f08af52SDarrick J. Wong /* Read extent from the source file */ 12771f08af52SDarrick J. Wong nimaps = 1; 12781f08af52SDarrick J. Wong error = xfs_bmapi_read(ip, tirec.br_startoff, 12791f08af52SDarrick J. Wong tirec.br_blockcount, &irec, 12801f08af52SDarrick J. Wong &nimaps, 0); 12811f08af52SDarrick J. Wong if (error) 1282d5a2e289SBrian Foster goto out; 12831f08af52SDarrick J. Wong ASSERT(nimaps == 1); 12841f08af52SDarrick J. Wong ASSERT(tirec.br_startoff == irec.br_startoff); 12851f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); 12861f08af52SDarrick J. Wong 12871f08af52SDarrick J. Wong /* Trim the extent. */ 12881f08af52SDarrick J. Wong uirec = tirec; 12891f08af52SDarrick J. Wong uirec.br_blockcount = rlen = min_t(xfs_filblks_t, 12901f08af52SDarrick J. Wong tirec.br_blockcount, 12911f08af52SDarrick J. Wong irec.br_blockcount); 12921f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); 12931f08af52SDarrick J. Wong 1294bcc561f2SChandan Babu R if (xfs_bmap_is_real_extent(&uirec)) { 129525576c54SChristoph Hellwig error = xfs_iext_count_extend(tp, ip, 1296bcc561f2SChandan Babu R XFS_DATA_FORK, 1297bcc561f2SChandan Babu R XFS_IEXT_SWAP_RMAP_CNT); 1298bcc561f2SChandan Babu R if (error) 1299bcc561f2SChandan Babu R goto out; 1300bcc561f2SChandan Babu R } 1301bcc561f2SChandan Babu R 1302bcc561f2SChandan Babu R if (xfs_bmap_is_real_extent(&irec)) { 130325576c54SChristoph Hellwig error = xfs_iext_count_extend(tp, tip, 1304bcc561f2SChandan Babu R XFS_DATA_FORK, 1305bcc561f2SChandan Babu R XFS_IEXT_SWAP_RMAP_CNT); 1306bcc561f2SChandan Babu R if (error) 1307bcc561f2SChandan Babu R goto out; 1308bcc561f2SChandan Babu R } 1309bcc561f2SChandan Babu R 13101f08af52SDarrick J. Wong /* Remove the mapping from the donor file. */ 131152f80706SDarrick J. Wong xfs_bmap_unmap_extent(tp, tip, XFS_DATA_FORK, &uirec); 13121f08af52SDarrick J. Wong 13131f08af52SDarrick J. Wong /* Remove the mapping from the source file. */ 131452f80706SDarrick J. Wong xfs_bmap_unmap_extent(tp, ip, XFS_DATA_FORK, &irec); 13151f08af52SDarrick J. Wong 13161f08af52SDarrick J. Wong /* Map the donor file's blocks into the source file. */ 131752f80706SDarrick J. Wong xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, &uirec); 13181f08af52SDarrick J. Wong 13191f08af52SDarrick J. Wong /* Map the source file's blocks into the donor file. */ 132052f80706SDarrick J. Wong xfs_bmap_map_extent(tp, tip, XFS_DATA_FORK, &irec); 13211f08af52SDarrick J. Wong 13229e28a242SBrian Foster error = xfs_defer_finish(tpp); 13237a7943c7SBrian Foster tp = *tpp; 13241f08af52SDarrick J. Wong if (error) 13259b1f4e98SBrian Foster goto out; 13261f08af52SDarrick J. Wong 13271f08af52SDarrick J. Wong tirec.br_startoff += rlen; 13281f08af52SDarrick J. Wong if (tirec.br_startblock != HOLESTARTBLOCK && 13291f08af52SDarrick J. Wong tirec.br_startblock != DELAYSTARTBLOCK) 13301f08af52SDarrick J. Wong tirec.br_startblock += rlen; 13311f08af52SDarrick J. Wong tirec.br_blockcount -= rlen; 13321f08af52SDarrick J. Wong } 13331f08af52SDarrick J. Wong 13341f08af52SDarrick J. Wong /* Roll on... */ 13351f08af52SDarrick J. Wong count_fsb -= ilen; 13361f08af52SDarrick J. Wong offset_fsb += ilen; 13371f08af52SDarrick J. Wong } 13381f08af52SDarrick J. Wong 13393e09ab8fSChristoph Hellwig tip->i_diflags2 = tip_flags2; 13401f08af52SDarrick J. Wong return 0; 13411f08af52SDarrick J. Wong 13421f08af52SDarrick J. Wong out: 13431f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); 13443e09ab8fSChristoph Hellwig tip->i_diflags2 = tip_flags2; 13451f08af52SDarrick J. Wong return error; 13461f08af52SDarrick J. Wong } 13471f08af52SDarrick J. Wong 134839aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */ 134939aff5fdSDarrick J. Wong STATIC int 135039aff5fdSDarrick J. Wong xfs_swap_extent_forks( 135139aff5fdSDarrick J. Wong struct xfs_trans *tp, 135239aff5fdSDarrick J. Wong struct xfs_inode *ip, 135339aff5fdSDarrick J. Wong struct xfs_inode *tip, 135439aff5fdSDarrick J. Wong int *src_log_flags, 135539aff5fdSDarrick J. Wong int *target_log_flags) 135639aff5fdSDarrick J. Wong { 1357e7f5d5caSDarrick J. Wong xfs_filblks_t aforkblks = 0; 1358e7f5d5caSDarrick J. Wong xfs_filblks_t taforkblks = 0; 1359e7f5d5caSDarrick J. Wong xfs_extnum_t junk; 1360c8ce540dSDarrick J. Wong uint64_t tmp; 136139aff5fdSDarrick J. Wong int error; 136239aff5fdSDarrick J. Wong 136339aff5fdSDarrick J. Wong /* 136439aff5fdSDarrick J. Wong * Count the number of extended attribute blocks 136539aff5fdSDarrick J. Wong */ 1366932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(ip) && ip->i_af.if_nextents > 0 && 13672ed5b09bSDarrick J. Wong ip->i_af.if_format != XFS_DINODE_FMT_LOCAL) { 1368e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk, 136939aff5fdSDarrick J. Wong &aforkblks); 137039aff5fdSDarrick J. Wong if (error) 137139aff5fdSDarrick J. Wong return error; 137239aff5fdSDarrick J. Wong } 1373932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(tip) && tip->i_af.if_nextents > 0 && 13742ed5b09bSDarrick J. Wong tip->i_af.if_format != XFS_DINODE_FMT_LOCAL) { 1375e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk, 137639aff5fdSDarrick J. Wong &taforkblks); 137739aff5fdSDarrick J. Wong if (error) 137839aff5fdSDarrick J. Wong return error; 137939aff5fdSDarrick J. Wong } 138039aff5fdSDarrick J. Wong 138139aff5fdSDarrick J. Wong /* 13826fb10d6dSBrian Foster * Btree format (v3) inodes have the inode number stamped in the bmbt 13836fb10d6dSBrian Foster * block headers. We can't start changing the bmbt blocks until the 13846fb10d6dSBrian Foster * inode owner change is logged so recovery does the right thing in the 13856fb10d6dSBrian Foster * event of a crash. Set the owner change log flags now and leave the 13866fb10d6dSBrian Foster * bmbt scan as the last step. 138739aff5fdSDarrick J. Wong */ 138838c26bfdSDave Chinner if (xfs_has_v3inodes(ip->i_mount)) { 1389f7e67b20SChristoph Hellwig if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) 139039aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DOWNER; 1391f7e67b20SChristoph Hellwig if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE) 139239aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DOWNER; 13936471e9c5SChristoph Hellwig } 139439aff5fdSDarrick J. Wong 139539aff5fdSDarrick J. Wong /* 139639aff5fdSDarrick J. Wong * Swap the data forks of the inodes 139739aff5fdSDarrick J. Wong */ 1398897992b7SGustavo A. R. Silva swap(ip->i_df, tip->i_df); 139939aff5fdSDarrick J. Wong 140039aff5fdSDarrick J. Wong /* 140139aff5fdSDarrick J. Wong * Fix the on-disk inode values 140239aff5fdSDarrick J. Wong */ 14036e73a545SChristoph Hellwig tmp = (uint64_t)ip->i_nblocks; 14046e73a545SChristoph Hellwig ip->i_nblocks = tip->i_nblocks - taforkblks + aforkblks; 14056e73a545SChristoph Hellwig tip->i_nblocks = tmp + taforkblks - aforkblks; 140639aff5fdSDarrick J. Wong 140739aff5fdSDarrick J. Wong /* 140839aff5fdSDarrick J. Wong * The extents in the source inode could still contain speculative 140939aff5fdSDarrick J. Wong * preallocation beyond EOF (e.g. the file is open but not modified 141039aff5fdSDarrick J. Wong * while defrag is in progress). In that case, we need to copy over the 141139aff5fdSDarrick J. Wong * number of delalloc blocks the data fork in the source inode is 141239aff5fdSDarrick J. Wong * tracking beyond EOF so that when the fork is truncated away when the 141339aff5fdSDarrick J. Wong * temporary inode is unlinked we don't underrun the i_delayed_blks 141439aff5fdSDarrick J. Wong * counter on that inode. 141539aff5fdSDarrick J. Wong */ 141639aff5fdSDarrick J. Wong ASSERT(tip->i_delayed_blks == 0); 141739aff5fdSDarrick J. Wong tip->i_delayed_blks = ip->i_delayed_blks; 141839aff5fdSDarrick J. Wong ip->i_delayed_blks = 0; 141939aff5fdSDarrick J. Wong 1420f7e67b20SChristoph Hellwig switch (ip->i_df.if_format) { 142139aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 142239aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DEXT; 142339aff5fdSDarrick J. Wong break; 142439aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 142538c26bfdSDave Chinner ASSERT(!xfs_has_v3inodes(ip->i_mount) || 142639aff5fdSDarrick J. Wong (*src_log_flags & XFS_ILOG_DOWNER)); 142739aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DBROOT; 142839aff5fdSDarrick J. Wong break; 142939aff5fdSDarrick J. Wong } 143039aff5fdSDarrick J. Wong 1431f7e67b20SChristoph Hellwig switch (tip->i_df.if_format) { 143239aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS: 143339aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DEXT; 143439aff5fdSDarrick J. Wong break; 143539aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE: 143639aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DBROOT; 143738c26bfdSDave Chinner ASSERT(!xfs_has_v3inodes(ip->i_mount) || 143839aff5fdSDarrick J. Wong (*target_log_flags & XFS_ILOG_DOWNER)); 143939aff5fdSDarrick J. Wong break; 144039aff5fdSDarrick J. Wong } 144139aff5fdSDarrick J. Wong 144239aff5fdSDarrick J. Wong return 0; 144339aff5fdSDarrick J. Wong } 144439aff5fdSDarrick J. Wong 14452dd3d709SBrian Foster /* 14462dd3d709SBrian Foster * Fix up the owners of the bmbt blocks to refer to the current inode. The 14472dd3d709SBrian Foster * change owner scan attempts to order all modified buffers in the current 14482dd3d709SBrian Foster * transaction. In the event of ordered buffer failure, the offending buffer is 14492dd3d709SBrian Foster * physically logged as a fallback and the scan returns -EAGAIN. We must roll 14502dd3d709SBrian Foster * the transaction in this case to replenish the fallback log reservation and 14512dd3d709SBrian Foster * restart the scan. This process repeats until the scan completes. 14522dd3d709SBrian Foster */ 14532dd3d709SBrian Foster static int 14542dd3d709SBrian Foster xfs_swap_change_owner( 14552dd3d709SBrian Foster struct xfs_trans **tpp, 14562dd3d709SBrian Foster struct xfs_inode *ip, 14572dd3d709SBrian Foster struct xfs_inode *tmpip) 14582dd3d709SBrian Foster { 14592dd3d709SBrian Foster int error; 14602dd3d709SBrian Foster struct xfs_trans *tp = *tpp; 14612dd3d709SBrian Foster 14622dd3d709SBrian Foster do { 14632dd3d709SBrian Foster error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino, 14642dd3d709SBrian Foster NULL); 14652dd3d709SBrian Foster /* success or fatal error */ 14662dd3d709SBrian Foster if (error != -EAGAIN) 14672dd3d709SBrian Foster break; 14682dd3d709SBrian Foster 14692dd3d709SBrian Foster error = xfs_trans_roll(tpp); 14702dd3d709SBrian Foster if (error) 14712dd3d709SBrian Foster break; 14722dd3d709SBrian Foster tp = *tpp; 14732dd3d709SBrian Foster 14742dd3d709SBrian Foster /* 14752dd3d709SBrian Foster * Redirty both inodes so they can relog and keep the log tail 14762dd3d709SBrian Foster * moving forward. 14772dd3d709SBrian Foster */ 14782dd3d709SBrian Foster xfs_trans_ijoin(tp, ip, 0); 14792dd3d709SBrian Foster xfs_trans_ijoin(tp, tmpip, 0); 14802dd3d709SBrian Foster xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 14812dd3d709SBrian Foster xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE); 14822dd3d709SBrian Foster } while (true); 14832dd3d709SBrian Foster 14842dd3d709SBrian Foster return error; 14852dd3d709SBrian Foster } 14862dd3d709SBrian Foster 14874ef897a2SDave Chinner int 1488a133d952SDave Chinner xfs_swap_extents( 1489e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */ 1490e06259aaSDarrick J. Wong struct xfs_inode *tip, /* tmp inode */ 1491e06259aaSDarrick J. Wong struct xfs_swapext *sxp) 1492a133d952SDave Chinner { 1493e06259aaSDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 1494e06259aaSDarrick J. Wong struct xfs_trans *tp; 1495e06259aaSDarrick J. Wong struct xfs_bstat *sbp = &sxp->sx_stat; 1496a133d952SDave Chinner int src_log_flags, target_log_flags; 1497a133d952SDave Chinner int error = 0; 1498c8ce540dSDarrick J. Wong uint64_t f; 14992dd3d709SBrian Foster int resblks = 0; 1500f74681baSBrian Foster unsigned int flags = 0; 150175d1e312SJeff Layton struct timespec64 ctime, mtime; 1502a133d952SDave Chinner 1503a133d952SDave Chinner /* 1504723cac48SDave Chinner * Lock the inodes against other IO, page faults and truncate to 1505723cac48SDave Chinner * begin with. Then we can ensure the inodes are flushed and have no 1506723cac48SDave Chinner * page cache safely. Once we have done this we can take the ilocks and 1507723cac48SDave Chinner * do the rest of the checks. 1508a133d952SDave Chinner */ 150965523218SChristoph Hellwig lock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 1510d2c292d8SJan Kara filemap_invalidate_lock_two(VFS_I(ip)->i_mapping, 1511d2c292d8SJan Kara VFS_I(tip)->i_mapping); 1512a133d952SDave Chinner 1513a133d952SDave Chinner /* Verify that both files have the same format */ 1514c19b3b05SDave Chinner if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) { 15152451337dSDave Chinner error = -EINVAL; 1516a133d952SDave Chinner goto out_unlock; 1517a133d952SDave Chinner } 1518a133d952SDave Chinner 1519a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */ 1520a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { 15212451337dSDave Chinner error = -EINVAL; 1522a133d952SDave Chinner goto out_unlock; 1523a133d952SDave Chinner } 1524a133d952SDave Chinner 15252713fefaSDarrick J. Wong error = xfs_qm_dqattach(ip); 15262713fefaSDarrick J. Wong if (error) 15272713fefaSDarrick J. Wong goto out_unlock; 15282713fefaSDarrick J. Wong 15292713fefaSDarrick J. Wong error = xfs_qm_dqattach(tip); 15302713fefaSDarrick J. Wong if (error) 15312713fefaSDarrick J. Wong goto out_unlock; 15322713fefaSDarrick J. Wong 15334ef897a2SDave Chinner error = xfs_swap_extent_flush(ip); 1534a133d952SDave Chinner if (error) 1535a133d952SDave Chinner goto out_unlock; 15364ef897a2SDave Chinner error = xfs_swap_extent_flush(tip); 15374ef897a2SDave Chinner if (error) 15384ef897a2SDave Chinner goto out_unlock; 1539a133d952SDave Chinner 154096987eeaSChristoph Hellwig if (xfs_inode_has_cow_data(tip)) { 154196987eeaSChristoph Hellwig error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true); 154296987eeaSChristoph Hellwig if (error) 15438bc3b5e4SDarrick J. Wong goto out_unlock; 154496987eeaSChristoph Hellwig } 154596987eeaSChristoph Hellwig 15461f08af52SDarrick J. Wong /* 15471f08af52SDarrick J. Wong * Extent "swapping" with rmap requires a permanent reservation and 15481f08af52SDarrick J. Wong * a block reservation because it's really just a remap operation 15491f08af52SDarrick J. Wong * performed with log redo items! 15501f08af52SDarrick J. Wong */ 155138c26bfdSDave Chinner if (xfs_has_rmapbt(mp)) { 1552b3fed434SBrian Foster int w = XFS_DATA_FORK; 1553daf83964SChristoph Hellwig uint32_t ipnext = ip->i_df.if_nextents; 1554daf83964SChristoph Hellwig uint32_t tipnext = tip->i_df.if_nextents; 1555b3fed434SBrian Foster 15561f08af52SDarrick J. Wong /* 1557b3fed434SBrian Foster * Conceptually this shouldn't affect the shape of either bmbt, 1558b3fed434SBrian Foster * but since we atomically move extents one by one, we reserve 1559b3fed434SBrian Foster * enough space to rebuild both trees. 15601f08af52SDarrick J. Wong */ 1561b3fed434SBrian Foster resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w); 1562b3fed434SBrian Foster resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w); 1563b3fed434SBrian Foster 1564b3fed434SBrian Foster /* 1565f74681baSBrian Foster * If either inode straddles a bmapbt block allocation boundary, 1566f74681baSBrian Foster * the rmapbt algorithm triggers repeated allocs and frees as 1567f74681baSBrian Foster * extents are remapped. This can exhaust the block reservation 1568f74681baSBrian Foster * prematurely and cause shutdown. Return freed blocks to the 1569f74681baSBrian Foster * transaction reservation to counter this behavior. 1570b3fed434SBrian Foster */ 1571f74681baSBrian Foster flags |= XFS_TRANS_RES_FDBLKS; 15722dd3d709SBrian Foster } 1573f74681baSBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags, 1574f74681baSBrian Foster &tp); 1575253f4911SChristoph Hellwig if (error) 1576a133d952SDave Chinner goto out_unlock; 1577723cac48SDave Chinner 1578723cac48SDave Chinner /* 1579723cac48SDave Chinner * Lock and join the inodes to the tansaction so that transaction commit 1580723cac48SDave Chinner * or cancel will unlock the inodes from this point onwards. 1581723cac48SDave Chinner */ 15827c2d238aSDarrick J. Wong xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL); 158339aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, ip, 0); 158439aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, tip, 0); 1585723cac48SDave Chinner 1586a133d952SDave Chinner 1587a133d952SDave Chinner /* Verify all data are being swapped */ 1588a133d952SDave Chinner if (sxp->sx_offset != 0 || 158913d2c10bSChristoph Hellwig sxp->sx_length != ip->i_disk_size || 159013d2c10bSChristoph Hellwig sxp->sx_length != tip->i_disk_size) { 15912451337dSDave Chinner error = -EFAULT; 15924ef897a2SDave Chinner goto out_trans_cancel; 1593a133d952SDave Chinner } 1594a133d952SDave Chinner 1595a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0); 1596a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1); 1597a133d952SDave Chinner 1598a133d952SDave Chinner /* check inode formats now that data is flushed */ 1599a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip); 1600a133d952SDave Chinner if (error) { 1601a133d952SDave Chinner xfs_notice(mp, 1602a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.", 1603a133d952SDave Chinner __func__, ip->i_ino); 16044ef897a2SDave Chinner goto out_trans_cancel; 1605a133d952SDave Chinner } 1606a133d952SDave Chinner 1607a133d952SDave Chinner /* 1608a133d952SDave Chinner * Compare the current change & modify times with that 1609a133d952SDave Chinner * passed in. If they differ, we abort this swap. 1610a133d952SDave Chinner * This is the mechanism used to ensure the calling 1611a133d952SDave Chinner * process that the file was not changed out from 1612a133d952SDave Chinner * under it. 1613a133d952SDave Chinner */ 1614a0a415e3SJeff Layton ctime = inode_get_ctime(VFS_I(ip)); 161575d1e312SJeff Layton mtime = inode_get_mtime(VFS_I(ip)); 1616a0a415e3SJeff Layton if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) || 1617a0a415e3SJeff Layton (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) || 161875d1e312SJeff Layton (sbp->bs_mtime.tv_sec != mtime.tv_sec) || 161975d1e312SJeff Layton (sbp->bs_mtime.tv_nsec != mtime.tv_nsec)) { 16202451337dSDave Chinner error = -EBUSY; 162181217683SDave Chinner goto out_trans_cancel; 1622a133d952SDave Chinner } 1623a133d952SDave Chinner 162421b5c978SDave Chinner /* 162521b5c978SDave Chinner * Note the trickiness in setting the log flags - we set the owner log 162621b5c978SDave Chinner * flag on the opposite inode (i.e. the inode we are setting the new 162721b5c978SDave Chinner * owner to be) because once we swap the forks and log that, log 162821b5c978SDave Chinner * recovery is going to see the fork as owned by the swapped inode, 162921b5c978SDave Chinner * not the pre-swapped inodes. 163021b5c978SDave Chinner */ 163121b5c978SDave Chinner src_log_flags = XFS_ILOG_CORE; 163221b5c978SDave Chinner target_log_flags = XFS_ILOG_CORE; 163339aff5fdSDarrick J. Wong 163438c26bfdSDave Chinner if (xfs_has_rmapbt(mp)) 16351f08af52SDarrick J. Wong error = xfs_swap_extent_rmap(&tp, ip, tip); 16361f08af52SDarrick J. Wong else 163739aff5fdSDarrick J. Wong error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags, 163839aff5fdSDarrick J. Wong &target_log_flags); 163921b5c978SDave Chinner if (error) 164021b5c978SDave Chinner goto out_trans_cancel; 1641a133d952SDave Chinner 1642f0bc4d13SDarrick J. Wong /* Do we have to swap reflink flags? */ 16433e09ab8fSChristoph Hellwig if ((ip->i_diflags2 & XFS_DIFLAG2_REFLINK) ^ 16443e09ab8fSChristoph Hellwig (tip->i_diflags2 & XFS_DIFLAG2_REFLINK)) { 16453e09ab8fSChristoph Hellwig f = ip->i_diflags2 & XFS_DIFLAG2_REFLINK; 16463e09ab8fSChristoph Hellwig ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 16473e09ab8fSChristoph Hellwig ip->i_diflags2 |= tip->i_diflags2 & XFS_DIFLAG2_REFLINK; 16483e09ab8fSChristoph Hellwig tip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 16493e09ab8fSChristoph Hellwig tip->i_diflags2 |= f & XFS_DIFLAG2_REFLINK; 165052bfcdd7SDarrick J. Wong } 165152bfcdd7SDarrick J. Wong 165252bfcdd7SDarrick J. Wong /* Swap the cow forks. */ 165338c26bfdSDave Chinner if (xfs_has_reflink(mp)) { 1654f7e67b20SChristoph Hellwig ASSERT(!ip->i_cowfp || 1655f7e67b20SChristoph Hellwig ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS); 1656f7e67b20SChristoph Hellwig ASSERT(!tip->i_cowfp || 1657f7e67b20SChristoph Hellwig tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS); 165852bfcdd7SDarrick J. Wong 1659897992b7SGustavo A. R. Silva swap(ip->i_cowfp, tip->i_cowfp); 166052bfcdd7SDarrick J. Wong 16615bcffe30SChristoph Hellwig if (ip->i_cowfp && ip->i_cowfp->if_bytes) 166283104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(ip); 166352bfcdd7SDarrick J. Wong else 166452bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(ip); 16655bcffe30SChristoph Hellwig if (tip->i_cowfp && tip->i_cowfp->if_bytes) 166683104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(tip); 166752bfcdd7SDarrick J. Wong else 166852bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(tip); 1669f0bc4d13SDarrick J. Wong } 1670f0bc4d13SDarrick J. Wong 1671a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags); 1672a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags); 1673a133d952SDave Chinner 1674a133d952SDave Chinner /* 16756fb10d6dSBrian Foster * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems 16766fb10d6dSBrian Foster * have inode number owner values in the bmbt blocks that still refer to 16776fb10d6dSBrian Foster * the old inode. Scan each bmbt to fix up the owner values with the 16786fb10d6dSBrian Foster * inode number of the current inode. 16796fb10d6dSBrian Foster */ 16806fb10d6dSBrian Foster if (src_log_flags & XFS_ILOG_DOWNER) { 16812dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, ip, tip); 16826fb10d6dSBrian Foster if (error) 16836fb10d6dSBrian Foster goto out_trans_cancel; 16846fb10d6dSBrian Foster } 16856fb10d6dSBrian Foster if (target_log_flags & XFS_ILOG_DOWNER) { 16862dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, tip, ip); 16876fb10d6dSBrian Foster if (error) 16886fb10d6dSBrian Foster goto out_trans_cancel; 16896fb10d6dSBrian Foster } 16906fb10d6dSBrian Foster 16916fb10d6dSBrian Foster /* 1692a133d952SDave Chinner * If this is a synchronous mount, make sure that the 1693a133d952SDave Chinner * transaction goes to disk before returning to the user. 1694a133d952SDave Chinner */ 16950560f31aSDave Chinner if (xfs_has_wsync(mp)) 1696a133d952SDave Chinner xfs_trans_set_sync(tp); 1697a133d952SDave Chinner 169870393313SChristoph Hellwig error = xfs_trans_commit(tp); 1699a133d952SDave Chinner 1700a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0); 1701a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1); 170239aff5fdSDarrick J. Wong 1703d2c292d8SJan Kara out_unlock_ilock: 1704d2c292d8SJan Kara xfs_iunlock(ip, XFS_ILOCK_EXCL); 1705d2c292d8SJan Kara xfs_iunlock(tip, XFS_ILOCK_EXCL); 170665523218SChristoph Hellwig out_unlock: 1707d2c292d8SJan Kara filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping, 1708d2c292d8SJan Kara VFS_I(tip)->i_mapping); 170965523218SChristoph Hellwig unlock_two_nondirectories(VFS_I(ip), VFS_I(tip)); 1710a133d952SDave Chinner return error; 1711a133d952SDave Chinner 171239aff5fdSDarrick J. Wong out_trans_cancel: 171339aff5fdSDarrick J. Wong xfs_trans_cancel(tp); 1714d2c292d8SJan Kara goto out_unlock_ilock; 1715a133d952SDave Chinner } 1716