1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_dir2.h" 17 #include "xfs_inode.h" 18 #include "xfs_btree.h" 19 #include "xfs_trans.h" 20 #include "xfs_alloc.h" 21 #include "xfs_bmap.h" 22 #include "xfs_bmap_util.h" 23 #include "xfs_bmap_btree.h" 24 #include "xfs_rtbitmap.h" 25 #include "xfs_errortag.h" 26 #include "xfs_error.h" 27 #include "xfs_quota.h" 28 #include "xfs_trans_space.h" 29 #include "xfs_buf_item.h" 30 #include "xfs_trace.h" 31 #include "xfs_attr_leaf.h" 32 #include "xfs_filestream.h" 33 #include "xfs_rmap.h" 34 #include "xfs_ag.h" 35 #include "xfs_ag_resv.h" 36 #include "xfs_refcount.h" 37 #include "xfs_icache.h" 38 #include "xfs_iomap.h" 39 #include "xfs_health.h" 40 #include "xfs_bmap_item.h" 41 #include "xfs_symlink_remote.h" 42 #include "xfs_inode_util.h" 43 #include "xfs_rtgroup.h" 44 45 struct kmem_cache *xfs_bmap_intent_cache; 46 47 /* 48 * Miscellaneous helper functions 49 */ 50 51 /* 52 * Compute and fill in the value of the maximum depth of a bmap btree 53 * in this filesystem. Done once, during mount. 54 */ 55 void 56 xfs_bmap_compute_maxlevels( 57 xfs_mount_t *mp, /* file system mount structure */ 58 int whichfork) /* data or attr fork */ 59 { 60 uint64_t maxblocks; /* max blocks at this level */ 61 xfs_extnum_t maxleafents; /* max leaf entries possible */ 62 int level; /* btree level */ 63 int maxrootrecs; /* max records in root block */ 64 int minleafrecs; /* min records in leaf block */ 65 int minnoderecs; /* min records in node block */ 66 int sz; /* root block size */ 67 68 /* 69 * The maximum number of extents in a fork, hence the maximum number of 70 * leaf entries, is controlled by the size of the on-disk extent count. 71 * 72 * Note that we can no longer assume that if we are in ATTR1 that the 73 * fork offset of all the inodes will be 74 * (xfs_default_attroffset(ip) >> 3) because we could have mounted with 75 * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed 76 * but probably at various positions. Therefore, for both ATTR1 and 77 * ATTR2 we have to assume the worst case scenario of a minimum size 78 * available. 79 */ 80 maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp), 81 whichfork); 82 if (whichfork == XFS_DATA_FORK) 83 sz = xfs_bmdr_space_calc(MINDBTPTRS); 84 else 85 sz = xfs_bmdr_space_calc(MINABTPTRS); 86 87 maxrootrecs = xfs_bmdr_maxrecs(sz, 0); 88 minleafrecs = mp->m_bmap_dmnr[0]; 89 minnoderecs = mp->m_bmap_dmnr[1]; 90 maxblocks = howmany_64(maxleafents, minleafrecs); 91 for (level = 1; maxblocks > 1; level++) { 92 if (maxblocks <= maxrootrecs) 93 maxblocks = 1; 94 else 95 maxblocks = howmany_64(maxblocks, minnoderecs); 96 } 97 mp->m_bm_maxlevels[whichfork] = level; 98 ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk()); 99 } 100 101 unsigned int 102 xfs_bmap_compute_attr_offset( 103 struct xfs_mount *mp) 104 { 105 if (mp->m_sb.sb_inodesize == 256) 106 return XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS); 107 return xfs_bmdr_space_calc(6 * MINABTPTRS); 108 } 109 110 STATIC int /* error */ 111 xfs_bmbt_lookup_eq( 112 struct xfs_btree_cur *cur, 113 struct xfs_bmbt_irec *irec, 114 int *stat) /* success/failure */ 115 { 116 cur->bc_rec.b = *irec; 117 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 118 } 119 120 STATIC int /* error */ 121 xfs_bmbt_lookup_first( 122 struct xfs_btree_cur *cur, 123 int *stat) /* success/failure */ 124 { 125 cur->bc_rec.b.br_startoff = 0; 126 cur->bc_rec.b.br_startblock = 0; 127 cur->bc_rec.b.br_blockcount = 0; 128 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 129 } 130 131 /* 132 * Check if the inode needs to be converted to btree format. 133 */ 134 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) 135 { 136 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 137 138 return whichfork != XFS_COW_FORK && 139 ifp->if_format == XFS_DINODE_FMT_EXTENTS && 140 ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork); 141 } 142 143 /* 144 * Check if the inode should be converted to extent format. 145 */ 146 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) 147 { 148 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 149 150 return whichfork != XFS_COW_FORK && 151 ifp->if_format == XFS_DINODE_FMT_BTREE && 152 ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork); 153 } 154 155 /* 156 * Update the record referred to by cur to the value given by irec 157 * This either works (return 0) or gets an EFSCORRUPTED error. 158 */ 159 STATIC int 160 xfs_bmbt_update( 161 struct xfs_btree_cur *cur, 162 struct xfs_bmbt_irec *irec) 163 { 164 union xfs_btree_rec rec; 165 166 xfs_bmbt_disk_set_all(&rec.bmbt, irec); 167 return xfs_btree_update(cur, &rec); 168 } 169 170 /* 171 * Compute the worst-case number of indirect blocks that will be used 172 * for ip's delayed extent of length "len". 173 */ 174 STATIC xfs_filblks_t 175 xfs_bmap_worst_indlen( 176 xfs_inode_t *ip, /* incore inode pointer */ 177 xfs_filblks_t len) /* delayed extent length */ 178 { 179 int level; /* btree level number */ 180 int maxrecs; /* maximum record count at this level */ 181 xfs_mount_t *mp; /* mount structure */ 182 xfs_filblks_t rval; /* return value */ 183 184 mp = ip->i_mount; 185 maxrecs = mp->m_bmap_dmxr[0]; 186 for (level = 0, rval = 0; 187 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); 188 level++) { 189 len += maxrecs - 1; 190 do_div(len, maxrecs); 191 rval += len; 192 if (len == 1) 193 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 194 level - 1; 195 if (level == 0) 196 maxrecs = mp->m_bmap_dmxr[1]; 197 } 198 return rval; 199 } 200 201 /* 202 * Calculate the default attribute fork offset for newly created inodes. 203 */ 204 uint 205 xfs_default_attroffset( 206 struct xfs_inode *ip) 207 { 208 if (ip->i_df.if_format == XFS_DINODE_FMT_DEV) 209 return roundup(sizeof(xfs_dev_t), 8); 210 return M_IGEO(ip->i_mount)->attr_fork_offset; 211 } 212 213 /* 214 * Helper routine to reset inode i_forkoff field when switching attribute fork 215 * from local to extent format - we reset it where possible to make space 216 * available for inline data fork extents. 217 */ 218 STATIC void 219 xfs_bmap_forkoff_reset( 220 xfs_inode_t *ip, 221 int whichfork) 222 { 223 if (whichfork == XFS_ATTR_FORK && 224 ip->i_df.if_format != XFS_DINODE_FMT_DEV && 225 ip->i_df.if_format != XFS_DINODE_FMT_BTREE) { 226 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 227 228 if (dfl_forkoff > ip->i_forkoff) 229 ip->i_forkoff = dfl_forkoff; 230 } 231 } 232 233 static int 234 xfs_bmap_read_buf( 235 struct xfs_mount *mp, /* file system mount point */ 236 struct xfs_trans *tp, /* transaction pointer */ 237 xfs_fsblock_t fsbno, /* file system block number */ 238 struct xfs_buf **bpp) /* buffer for fsbno */ 239 { 240 struct xfs_buf *bp; /* return value */ 241 int error; 242 243 if (!xfs_verify_fsbno(mp, fsbno)) 244 return -EFSCORRUPTED; 245 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 246 XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp, 247 &xfs_bmbt_buf_ops); 248 if (!error) { 249 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 250 *bpp = bp; 251 } 252 return error; 253 } 254 255 #ifdef DEBUG 256 STATIC struct xfs_buf * 257 xfs_bmap_get_bp( 258 struct xfs_btree_cur *cur, 259 xfs_fsblock_t bno) 260 { 261 struct xfs_log_item *lip; 262 int i; 263 264 if (!cur) 265 return NULL; 266 267 for (i = 0; i < cur->bc_maxlevels; i++) { 268 if (!cur->bc_levels[i].bp) 269 break; 270 if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno) 271 return cur->bc_levels[i].bp; 272 } 273 274 /* Chase down all the log items to see if the bp is there */ 275 list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) { 276 struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip; 277 278 if (bip->bli_item.li_type == XFS_LI_BUF && 279 xfs_buf_daddr(bip->bli_buf) == bno) 280 return bip->bli_buf; 281 } 282 283 return NULL; 284 } 285 286 STATIC void 287 xfs_check_block( 288 struct xfs_btree_block *block, 289 xfs_mount_t *mp, 290 int root, 291 short sz) 292 { 293 int i, j, dmxr; 294 __be64 *pp, *thispa; /* pointer to block address */ 295 xfs_bmbt_key_t *prevp, *keyp; 296 297 ASSERT(be16_to_cpu(block->bb_level) > 0); 298 299 prevp = NULL; 300 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { 301 dmxr = mp->m_bmap_dmxr[0]; 302 keyp = xfs_bmbt_key_addr(mp, block, i); 303 304 if (prevp) { 305 ASSERT(be64_to_cpu(prevp->br_startoff) < 306 be64_to_cpu(keyp->br_startoff)); 307 } 308 prevp = keyp; 309 310 /* 311 * Compare the block numbers to see if there are dups. 312 */ 313 if (root) 314 pp = xfs_bmap_broot_ptr_addr(mp, block, i, sz); 315 else 316 pp = xfs_bmbt_ptr_addr(mp, block, i, dmxr); 317 318 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 319 if (root) 320 thispa = xfs_bmap_broot_ptr_addr(mp, block, j, sz); 321 else 322 thispa = xfs_bmbt_ptr_addr(mp, block, j, dmxr); 323 if (*thispa == *pp) { 324 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld", 325 __func__, j, i, 326 (unsigned long long)be64_to_cpu(*thispa)); 327 xfs_err(mp, "%s: ptrs are equal in node\n", 328 __func__); 329 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 330 } 331 } 332 } 333 } 334 335 /* 336 * Check that the extents for the inode ip are in the right order in all 337 * btree leaves. THis becomes prohibitively expensive for large extent count 338 * files, so don't bother with inodes that have more than 10,000 extents in 339 * them. The btree record ordering checks will still be done, so for such large 340 * bmapbt constructs that is going to catch most corruptions. 341 */ 342 STATIC void 343 xfs_bmap_check_leaf_extents( 344 struct xfs_btree_cur *cur, /* btree cursor or null */ 345 xfs_inode_t *ip, /* incore inode pointer */ 346 int whichfork) /* data or attr fork */ 347 { 348 struct xfs_mount *mp = ip->i_mount; 349 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 350 struct xfs_btree_block *block; /* current btree block */ 351 xfs_fsblock_t bno; /* block # of "block" */ 352 struct xfs_buf *bp; /* buffer for "block" */ 353 int error; /* error return value */ 354 xfs_extnum_t i=0, j; /* index into the extents list */ 355 int level; /* btree level, for checking */ 356 __be64 *pp; /* pointer to block address */ 357 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 358 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ 359 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 360 int bp_release = 0; 361 362 if (ifp->if_format != XFS_DINODE_FMT_BTREE) 363 return; 364 365 /* skip large extent count inodes */ 366 if (ip->i_df.if_nextents > 10000) 367 return; 368 369 bno = NULLFSBLOCK; 370 block = ifp->if_broot; 371 /* 372 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 373 */ 374 level = be16_to_cpu(block->bb_level); 375 ASSERT(level > 0); 376 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 377 pp = xfs_bmap_broot_ptr_addr(mp, block, 1, ifp->if_broot_bytes); 378 bno = be64_to_cpu(*pp); 379 380 ASSERT(bno != NULLFSBLOCK); 381 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 382 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 383 384 /* 385 * Go down the tree until leaf level is reached, following the first 386 * pointer (leftmost) at each level. 387 */ 388 while (level-- > 0) { 389 /* See if buf is in cur first */ 390 bp_release = 0; 391 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 392 if (!bp) { 393 bp_release = 1; 394 error = xfs_bmap_read_buf(mp, NULL, bno, &bp); 395 if (xfs_metadata_is_sick(error)) 396 xfs_btree_mark_sick(cur); 397 if (error) 398 goto error_norelse; 399 } 400 block = XFS_BUF_TO_BLOCK(bp); 401 if (level == 0) 402 break; 403 404 /* 405 * Check this block for basic sanity (increasing keys and 406 * no duplicate blocks). 407 */ 408 409 xfs_check_block(block, mp, 0, 0); 410 pp = xfs_bmbt_ptr_addr(mp, block, 1, mp->m_bmap_dmxr[1]); 411 bno = be64_to_cpu(*pp); 412 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) { 413 xfs_btree_mark_sick(cur); 414 error = -EFSCORRUPTED; 415 goto error0; 416 } 417 if (bp_release) { 418 bp_release = 0; 419 xfs_trans_brelse(NULL, bp); 420 } 421 } 422 423 /* 424 * Here with bp and block set to the leftmost leaf node in the tree. 425 */ 426 i = 0; 427 428 /* 429 * Loop over all leaf nodes checking that all extents are in the right order. 430 */ 431 for (;;) { 432 xfs_fsblock_t nextbno; 433 xfs_extnum_t num_recs; 434 435 436 num_recs = xfs_btree_get_numrecs(block); 437 438 /* 439 * Read-ahead the next leaf block, if any. 440 */ 441 442 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 443 444 /* 445 * Check all the extents to make sure they are OK. 446 * If we had a previous block, the last entry should 447 * conform with the first entry in this one. 448 */ 449 450 ep = xfs_bmbt_rec_addr(mp, block, 1); 451 if (i) { 452 ASSERT(xfs_bmbt_disk_get_startoff(&last) + 453 xfs_bmbt_disk_get_blockcount(&last) <= 454 xfs_bmbt_disk_get_startoff(ep)); 455 } 456 for (j = 1; j < num_recs; j++) { 457 nextp = xfs_bmbt_rec_addr(mp, block, j + 1); 458 ASSERT(xfs_bmbt_disk_get_startoff(ep) + 459 xfs_bmbt_disk_get_blockcount(ep) <= 460 xfs_bmbt_disk_get_startoff(nextp)); 461 ep = nextp; 462 } 463 464 last = *ep; 465 i += num_recs; 466 if (bp_release) { 467 bp_release = 0; 468 xfs_trans_brelse(NULL, bp); 469 } 470 bno = nextbno; 471 /* 472 * If we've reached the end, stop. 473 */ 474 if (bno == NULLFSBLOCK) 475 break; 476 477 bp_release = 0; 478 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 479 if (!bp) { 480 bp_release = 1; 481 error = xfs_bmap_read_buf(mp, NULL, bno, &bp); 482 if (xfs_metadata_is_sick(error)) 483 xfs_btree_mark_sick(cur); 484 if (error) 485 goto error_norelse; 486 } 487 block = XFS_BUF_TO_BLOCK(bp); 488 } 489 490 return; 491 492 error0: 493 xfs_warn(mp, "%s: at error0", __func__); 494 if (bp_release) 495 xfs_trans_brelse(NULL, bp); 496 error_norelse: 497 xfs_warn(mp, "%s: BAD after btree leaves for %llu extents", 498 __func__, i); 499 xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); 500 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 501 return; 502 } 503 504 /* 505 * Validate that the bmbt_irecs being returned from bmapi are valid 506 * given the caller's original parameters. Specifically check the 507 * ranges of the returned irecs to ensure that they only extend beyond 508 * the given parameters if the XFS_BMAPI_ENTIRE flag was set. 509 */ 510 STATIC void 511 xfs_bmap_validate_ret( 512 xfs_fileoff_t bno, 513 xfs_filblks_t len, 514 uint32_t flags, 515 xfs_bmbt_irec_t *mval, 516 int nmap, 517 int ret_nmap) 518 { 519 int i; /* index to map values */ 520 521 ASSERT(ret_nmap <= nmap); 522 523 for (i = 0; i < ret_nmap; i++) { 524 ASSERT(mval[i].br_blockcount > 0); 525 if (!(flags & XFS_BMAPI_ENTIRE)) { 526 ASSERT(mval[i].br_startoff >= bno); 527 ASSERT(mval[i].br_blockcount <= len); 528 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= 529 bno + len); 530 } else { 531 ASSERT(mval[i].br_startoff < bno + len); 532 ASSERT(mval[i].br_startoff + mval[i].br_blockcount > 533 bno); 534 } 535 ASSERT(i == 0 || 536 mval[i - 1].br_startoff + mval[i - 1].br_blockcount == 537 mval[i].br_startoff); 538 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && 539 mval[i].br_startblock != HOLESTARTBLOCK); 540 ASSERT(mval[i].br_state == XFS_EXT_NORM || 541 mval[i].br_state == XFS_EXT_UNWRITTEN); 542 } 543 } 544 545 #else 546 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) 547 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0) 548 #endif /* DEBUG */ 549 550 /* 551 * Inode fork format manipulation functions 552 */ 553 554 /* 555 * Convert the inode format to extent format if it currently is in btree format, 556 * but the extent list is small enough that it fits into the extent format. 557 * 558 * Since the extents are already in-core, all we have to do is give up the space 559 * for the btree root and pitch the leaf block. 560 */ 561 STATIC int /* error */ 562 xfs_bmap_btree_to_extents( 563 struct xfs_trans *tp, /* transaction pointer */ 564 struct xfs_inode *ip, /* incore inode pointer */ 565 struct xfs_btree_cur *cur, /* btree cursor */ 566 int *logflagsp, /* inode logging flags */ 567 int whichfork) /* data or attr fork */ 568 { 569 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 570 struct xfs_mount *mp = ip->i_mount; 571 struct xfs_btree_block *rblock = ifp->if_broot; 572 struct xfs_btree_block *cblock;/* child btree block */ 573 xfs_fsblock_t cbno; /* child block number */ 574 struct xfs_buf *cbp; /* child block's buffer */ 575 int error; /* error return value */ 576 __be64 *pp; /* ptr to block address */ 577 struct xfs_owner_info oinfo; 578 579 /* check if we actually need the extent format first: */ 580 if (!xfs_bmap_wants_extents(ip, whichfork)) 581 return 0; 582 583 ASSERT(cur); 584 ASSERT(whichfork != XFS_COW_FORK); 585 ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); 586 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 587 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 588 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false) == 1); 589 590 pp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, ifp->if_broot_bytes); 591 cbno = be64_to_cpu(*pp); 592 #ifdef DEBUG 593 if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) { 594 xfs_btree_mark_sick(cur); 595 return -EFSCORRUPTED; 596 } 597 #endif 598 error = xfs_bmap_read_buf(mp, tp, cbno, &cbp); 599 if (xfs_metadata_is_sick(error)) 600 xfs_btree_mark_sick(cur); 601 if (error) 602 return error; 603 cblock = XFS_BUF_TO_BLOCK(cbp); 604 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) 605 return error; 606 607 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 608 error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, 609 XFS_AG_RESV_NONE, 0); 610 if (error) 611 return error; 612 613 ip->i_nblocks--; 614 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 615 xfs_trans_binval(tp, cbp); 616 if (cur->bc_levels[0].bp == cbp) 617 cur->bc_levels[0].bp = NULL; 618 xfs_bmap_broot_realloc(ip, whichfork, 0); 619 ASSERT(ifp->if_broot == NULL); 620 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 621 *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 622 return 0; 623 } 624 625 /* 626 * Convert an extents-format file into a btree-format file. 627 * The new file will have a root block (in the inode) and a single child block. 628 */ 629 STATIC int /* error */ 630 xfs_bmap_extents_to_btree( 631 struct xfs_trans *tp, /* transaction pointer */ 632 struct xfs_inode *ip, /* incore inode pointer */ 633 struct xfs_btree_cur **curp, /* cursor returned to caller */ 634 int wasdel, /* converting a delayed alloc */ 635 int *logflagsp, /* inode logging flags */ 636 int whichfork) /* data or attr fork */ 637 { 638 struct xfs_btree_block *ablock; /* allocated (child) bt block */ 639 struct xfs_buf *abp; /* buffer for ablock */ 640 struct xfs_alloc_arg args; /* allocation arguments */ 641 struct xfs_bmbt_rec *arp; /* child record pointer */ 642 struct xfs_btree_block *block; /* btree root block */ 643 struct xfs_btree_cur *cur; /* bmap btree cursor */ 644 int error; /* error return value */ 645 struct xfs_ifork *ifp; /* inode fork pointer */ 646 struct xfs_bmbt_key *kp; /* root block key pointer */ 647 struct xfs_mount *mp; /* mount structure */ 648 xfs_bmbt_ptr_t *pp; /* root block address pointer */ 649 struct xfs_iext_cursor icur; 650 struct xfs_bmbt_irec rec; 651 xfs_extnum_t cnt = 0; 652 653 mp = ip->i_mount; 654 ASSERT(whichfork != XFS_COW_FORK); 655 ifp = xfs_ifork_ptr(ip, whichfork); 656 ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS); 657 658 /* 659 * Make space in the inode incore. This needs to be undone if we fail 660 * to expand the root. 661 */ 662 block = xfs_bmap_broot_realloc(ip, whichfork, 1); 663 664 /* 665 * Fill in the root. 666 */ 667 xfs_bmbt_init_block(ip, block, NULL, 1, 1); 668 /* 669 * Need a cursor. Can't allocate until bb_level is filled in. 670 */ 671 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 672 if (wasdel) 673 cur->bc_flags |= XFS_BTREE_BMBT_WASDEL; 674 /* 675 * Convert to a btree with two levels, one record in root. 676 */ 677 ifp->if_format = XFS_DINODE_FMT_BTREE; 678 memset(&args, 0, sizeof(args)); 679 args.tp = tp; 680 args.mp = mp; 681 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); 682 683 args.minlen = args.maxlen = args.prod = 1; 684 args.wasdel = wasdel; 685 *logflagsp = 0; 686 error = xfs_alloc_vextent_start_ag(&args, 687 XFS_INO_TO_FSB(mp, ip->i_ino)); 688 if (error) 689 goto out_root_realloc; 690 691 /* 692 * Allocation can't fail, the space was reserved. 693 */ 694 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 695 error = -ENOSPC; 696 goto out_root_realloc; 697 } 698 699 cur->bc_bmap.allocated++; 700 ip->i_nblocks++; 701 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 702 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, 703 XFS_FSB_TO_DADDR(mp, args.fsbno), 704 mp->m_bsize, 0, &abp); 705 if (error) 706 goto out_unreserve_dquot; 707 708 /* 709 * Fill in the child block. 710 */ 711 ablock = XFS_BUF_TO_BLOCK(abp); 712 xfs_bmbt_init_block(ip, ablock, abp, 0, 0); 713 714 for_each_xfs_iext(ifp, &icur, &rec) { 715 if (isnullstartblock(rec.br_startblock)) 716 continue; 717 arp = xfs_bmbt_rec_addr(mp, ablock, 1 + cnt); 718 xfs_bmbt_disk_set_all(arp, &rec); 719 cnt++; 720 } 721 ASSERT(cnt == ifp->if_nextents); 722 xfs_btree_set_numrecs(ablock, cnt); 723 724 /* 725 * Fill in the root key and pointer. 726 */ 727 kp = xfs_bmbt_key_addr(mp, block, 1); 728 arp = xfs_bmbt_rec_addr(mp, ablock, 1); 729 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); 730 pp = xfs_bmbt_ptr_addr(mp, block, 1, xfs_bmbt_get_maxrecs(cur, 731 be16_to_cpu(block->bb_level))); 732 *pp = cpu_to_be64(args.fsbno); 733 734 /* 735 * Do all this logging at the end so that 736 * the root is at the right level. 737 */ 738 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); 739 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); 740 ASSERT(*curp == NULL); 741 *curp = cur; 742 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); 743 return 0; 744 745 out_unreserve_dquot: 746 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 747 out_root_realloc: 748 xfs_bmap_broot_realloc(ip, whichfork, 0); 749 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 750 ASSERT(ifp->if_broot == NULL); 751 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 752 753 return error; 754 } 755 756 /* 757 * Convert a local file to an extents file. 758 * This code is out of bounds for data forks of regular files, 759 * since the file data needs to get logged so things will stay consistent. 760 * (The bmap-level manipulations are ok, though). 761 */ 762 void 763 xfs_bmap_local_to_extents_empty( 764 struct xfs_trans *tp, 765 struct xfs_inode *ip, 766 int whichfork) 767 { 768 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 769 770 ASSERT(whichfork != XFS_COW_FORK); 771 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 772 ASSERT(ifp->if_bytes == 0); 773 ASSERT(ifp->if_nextents == 0); 774 775 xfs_bmap_forkoff_reset(ip, whichfork); 776 ifp->if_data = NULL; 777 ifp->if_height = 0; 778 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 779 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 780 } 781 782 783 int /* error */ 784 xfs_bmap_local_to_extents( 785 xfs_trans_t *tp, /* transaction pointer */ 786 xfs_inode_t *ip, /* incore inode pointer */ 787 xfs_extlen_t total, /* total blocks needed by transaction */ 788 int *logflagsp, /* inode logging flags */ 789 int whichfork, 790 void (*init_fn)(struct xfs_trans *tp, 791 struct xfs_buf *bp, 792 struct xfs_inode *ip, 793 struct xfs_ifork *ifp, void *priv), 794 void *priv) 795 { 796 int error = 0; 797 int flags; /* logging flags returned */ 798 struct xfs_ifork *ifp; /* inode fork pointer */ 799 xfs_alloc_arg_t args; /* allocation arguments */ 800 struct xfs_buf *bp; /* buffer for extent block */ 801 struct xfs_bmbt_irec rec; 802 struct xfs_iext_cursor icur; 803 804 /* 805 * We don't want to deal with the case of keeping inode data inline yet. 806 * So sending the data fork of a regular inode is invalid. 807 */ 808 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); 809 ifp = xfs_ifork_ptr(ip, whichfork); 810 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 811 812 if (!ifp->if_bytes) { 813 xfs_bmap_local_to_extents_empty(tp, ip, whichfork); 814 flags = XFS_ILOG_CORE; 815 goto done; 816 } 817 818 flags = 0; 819 error = 0; 820 memset(&args, 0, sizeof(args)); 821 args.tp = tp; 822 args.mp = ip->i_mount; 823 args.total = total; 824 args.minlen = args.maxlen = args.prod = 1; 825 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); 826 827 /* 828 * Allocate a block. We know we need only one, since the 829 * file currently fits in an inode. 830 */ 831 args.total = total; 832 args.minlen = args.maxlen = args.prod = 1; 833 error = xfs_alloc_vextent_start_ag(&args, 834 XFS_INO_TO_FSB(args.mp, ip->i_ino)); 835 if (error) 836 goto done; 837 838 /* Can't fail, the space was reserved. */ 839 ASSERT(args.fsbno != NULLFSBLOCK); 840 ASSERT(args.len == 1); 841 error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, 842 XFS_FSB_TO_DADDR(args.mp, args.fsbno), 843 args.mp->m_bsize, 0, &bp); 844 if (error) 845 goto done; 846 847 /* 848 * Initialize the block, copy the data and log the remote buffer. 849 * 850 * The callout is responsible for logging because the remote format 851 * might differ from the local format and thus we don't know how much to 852 * log here. Note that init_fn must also set the buffer log item type 853 * correctly. 854 */ 855 init_fn(tp, bp, ip, ifp, priv); 856 857 /* account for the change in fork size */ 858 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 859 xfs_bmap_local_to_extents_empty(tp, ip, whichfork); 860 flags |= XFS_ILOG_CORE; 861 862 ifp->if_data = NULL; 863 ifp->if_height = 0; 864 865 rec.br_startoff = 0; 866 rec.br_startblock = args.fsbno; 867 rec.br_blockcount = 1; 868 rec.br_state = XFS_EXT_NORM; 869 xfs_iext_first(ifp, &icur); 870 xfs_iext_insert(ip, &icur, &rec, 0); 871 872 ifp->if_nextents = 1; 873 ip->i_nblocks = 1; 874 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 875 flags |= xfs_ilog_fext(whichfork); 876 877 done: 878 *logflagsp = flags; 879 return error; 880 } 881 882 /* 883 * Called from xfs_bmap_add_attrfork to handle btree format files. 884 */ 885 STATIC int /* error */ 886 xfs_bmap_add_attrfork_btree( 887 xfs_trans_t *tp, /* transaction pointer */ 888 xfs_inode_t *ip, /* incore inode pointer */ 889 int *flags) /* inode logging flags */ 890 { 891 struct xfs_btree_block *block = ip->i_df.if_broot; 892 struct xfs_btree_cur *cur; /* btree cursor */ 893 int error; /* error return value */ 894 xfs_mount_t *mp; /* file system mount struct */ 895 int stat; /* newroot status */ 896 897 mp = ip->i_mount; 898 899 if (xfs_bmap_bmdr_space(block) <= xfs_inode_data_fork_size(ip)) 900 *flags |= XFS_ILOG_DBROOT; 901 else { 902 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 903 error = xfs_bmbt_lookup_first(cur, &stat); 904 if (error) 905 goto error0; 906 /* must be at least one entry */ 907 if (XFS_IS_CORRUPT(mp, stat != 1)) { 908 xfs_btree_mark_sick(cur); 909 error = -EFSCORRUPTED; 910 goto error0; 911 } 912 if ((error = xfs_btree_new_iroot(cur, flags, &stat))) 913 goto error0; 914 if (stat == 0) { 915 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 916 return -ENOSPC; 917 } 918 cur->bc_bmap.allocated = 0; 919 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 920 } 921 return 0; 922 error0: 923 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 924 return error; 925 } 926 927 /* 928 * Called from xfs_bmap_add_attrfork to handle extents format files. 929 */ 930 STATIC int /* error */ 931 xfs_bmap_add_attrfork_extents( 932 struct xfs_trans *tp, /* transaction pointer */ 933 struct xfs_inode *ip, /* incore inode pointer */ 934 int *flags) /* inode logging flags */ 935 { 936 struct xfs_btree_cur *cur; /* bmap btree cursor */ 937 int error; /* error return value */ 938 939 if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= 940 xfs_inode_data_fork_size(ip)) 941 return 0; 942 cur = NULL; 943 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags, 944 XFS_DATA_FORK); 945 if (cur) { 946 cur->bc_bmap.allocated = 0; 947 xfs_btree_del_cursor(cur, error); 948 } 949 return error; 950 } 951 952 /* 953 * Called from xfs_bmap_add_attrfork to handle local format files. Each 954 * different data fork content type needs a different callout to do the 955 * conversion. Some are basic and only require special block initialisation 956 * callouts for the data formating, others (directories) are so specialised they 957 * handle everything themselves. 958 * 959 * XXX (dgc): investigate whether directory conversion can use the generic 960 * formatting callout. It should be possible - it's just a very complex 961 * formatter. 962 */ 963 STATIC int /* error */ 964 xfs_bmap_add_attrfork_local( 965 struct xfs_trans *tp, /* transaction pointer */ 966 struct xfs_inode *ip, /* incore inode pointer */ 967 int *flags) /* inode logging flags */ 968 { 969 struct xfs_da_args dargs; /* args for dir/attr code */ 970 971 if (ip->i_df.if_bytes <= xfs_inode_data_fork_size(ip)) 972 return 0; 973 974 if (S_ISDIR(VFS_I(ip)->i_mode)) { 975 memset(&dargs, 0, sizeof(dargs)); 976 dargs.geo = ip->i_mount->m_dir_geo; 977 dargs.dp = ip; 978 dargs.total = dargs.geo->fsbcount; 979 dargs.whichfork = XFS_DATA_FORK; 980 dargs.trans = tp; 981 dargs.owner = ip->i_ino; 982 return xfs_dir2_sf_to_block(&dargs); 983 } 984 985 if (S_ISLNK(VFS_I(ip)->i_mode)) 986 return xfs_bmap_local_to_extents(tp, ip, 1, flags, 987 XFS_DATA_FORK, xfs_symlink_local_to_remote, 988 NULL); 989 990 /* should only be called for types that support local format data */ 991 ASSERT(0); 992 xfs_bmap_mark_sick(ip, XFS_ATTR_FORK); 993 return -EFSCORRUPTED; 994 } 995 996 /* 997 * Set an inode attr fork offset based on the format of the data fork. 998 */ 999 static int 1000 xfs_bmap_set_attrforkoff( 1001 struct xfs_inode *ip, 1002 int size, 1003 int *version) 1004 { 1005 int default_size = xfs_default_attroffset(ip) >> 3; 1006 1007 switch (ip->i_df.if_format) { 1008 case XFS_DINODE_FMT_DEV: 1009 ip->i_forkoff = default_size; 1010 break; 1011 case XFS_DINODE_FMT_LOCAL: 1012 case XFS_DINODE_FMT_EXTENTS: 1013 case XFS_DINODE_FMT_BTREE: 1014 ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size); 1015 if (!ip->i_forkoff) 1016 ip->i_forkoff = default_size; 1017 else if (xfs_has_attr2(ip->i_mount) && version) 1018 *version = 2; 1019 break; 1020 default: 1021 ASSERT(0); 1022 return -EINVAL; 1023 } 1024 1025 return 0; 1026 } 1027 1028 /* 1029 * Convert inode from non-attributed to attributed. Caller must hold the 1030 * ILOCK_EXCL and the file cannot have an attr fork. 1031 */ 1032 int /* error code */ 1033 xfs_bmap_add_attrfork( 1034 struct xfs_trans *tp, 1035 struct xfs_inode *ip, /* incore inode pointer */ 1036 int size, /* space new attribute needs */ 1037 int rsvd) /* xact may use reserved blks */ 1038 { 1039 struct xfs_mount *mp = tp->t_mountp; 1040 int version = 1; /* superblock attr version */ 1041 int logflags; /* logging flags */ 1042 int error; /* error return value */ 1043 1044 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 1045 if (!xfs_is_metadir_inode(ip)) 1046 ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 1047 ASSERT(!xfs_inode_has_attr_fork(ip)); 1048 1049 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1050 error = xfs_bmap_set_attrforkoff(ip, size, &version); 1051 if (error) 1052 return error; 1053 1054 xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); 1055 logflags = 0; 1056 switch (ip->i_df.if_format) { 1057 case XFS_DINODE_FMT_LOCAL: 1058 error = xfs_bmap_add_attrfork_local(tp, ip, &logflags); 1059 break; 1060 case XFS_DINODE_FMT_EXTENTS: 1061 error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags); 1062 break; 1063 case XFS_DINODE_FMT_BTREE: 1064 error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags); 1065 break; 1066 default: 1067 error = 0; 1068 break; 1069 } 1070 if (logflags) 1071 xfs_trans_log_inode(tp, ip, logflags); 1072 if (error) 1073 return error; 1074 if (!xfs_has_attr(mp) || 1075 (!xfs_has_attr2(mp) && version == 2)) { 1076 bool log_sb = false; 1077 1078 spin_lock(&mp->m_sb_lock); 1079 if (!xfs_has_attr(mp)) { 1080 xfs_add_attr(mp); 1081 log_sb = true; 1082 } 1083 if (!xfs_has_attr2(mp) && version == 2) { 1084 xfs_add_attr2(mp); 1085 log_sb = true; 1086 } 1087 spin_unlock(&mp->m_sb_lock); 1088 if (log_sb) 1089 xfs_log_sb(tp); 1090 } 1091 1092 return 0; 1093 } 1094 1095 /* 1096 * Internal and external extent tree search functions. 1097 */ 1098 1099 struct xfs_iread_state { 1100 struct xfs_iext_cursor icur; 1101 xfs_extnum_t loaded; 1102 }; 1103 1104 int 1105 xfs_bmap_complain_bad_rec( 1106 struct xfs_inode *ip, 1107 int whichfork, 1108 xfs_failaddr_t fa, 1109 const struct xfs_bmbt_irec *irec) 1110 { 1111 struct xfs_mount *mp = ip->i_mount; 1112 const char *forkname; 1113 1114 switch (whichfork) { 1115 case XFS_DATA_FORK: forkname = "data"; break; 1116 case XFS_ATTR_FORK: forkname = "attr"; break; 1117 case XFS_COW_FORK: forkname = "CoW"; break; 1118 default: forkname = "???"; break; 1119 } 1120 1121 xfs_warn(mp, 1122 "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!", 1123 ip->i_ino, forkname, fa); 1124 xfs_warn(mp, 1125 "Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x", 1126 irec->br_startoff, irec->br_startblock, irec->br_blockcount, 1127 irec->br_state); 1128 1129 return -EFSCORRUPTED; 1130 } 1131 1132 /* Stuff every bmbt record from this block into the incore extent map. */ 1133 static int 1134 xfs_iread_bmbt_block( 1135 struct xfs_btree_cur *cur, 1136 int level, 1137 void *priv) 1138 { 1139 struct xfs_iread_state *ir = priv; 1140 struct xfs_mount *mp = cur->bc_mp; 1141 struct xfs_inode *ip = cur->bc_ino.ip; 1142 struct xfs_btree_block *block; 1143 struct xfs_buf *bp; 1144 struct xfs_bmbt_rec *frp; 1145 xfs_extnum_t num_recs; 1146 xfs_extnum_t j; 1147 int whichfork = cur->bc_ino.whichfork; 1148 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1149 1150 block = xfs_btree_get_block(cur, level, &bp); 1151 1152 /* Abort if we find more records than nextents. */ 1153 num_recs = xfs_btree_get_numrecs(block); 1154 if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) { 1155 xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).", 1156 (unsigned long long)ip->i_ino); 1157 xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block, 1158 sizeof(*block), __this_address); 1159 xfs_bmap_mark_sick(ip, whichfork); 1160 return -EFSCORRUPTED; 1161 } 1162 1163 /* Copy records into the incore cache. */ 1164 frp = xfs_bmbt_rec_addr(mp, block, 1); 1165 for (j = 0; j < num_recs; j++, frp++, ir->loaded++) { 1166 struct xfs_bmbt_irec new; 1167 xfs_failaddr_t fa; 1168 1169 xfs_bmbt_disk_get_all(frp, &new); 1170 fa = xfs_bmap_validate_extent(ip, whichfork, &new); 1171 if (fa) { 1172 xfs_inode_verifier_error(ip, -EFSCORRUPTED, 1173 "xfs_iread_extents(2)", frp, 1174 sizeof(*frp), fa); 1175 xfs_bmap_mark_sick(ip, whichfork); 1176 return xfs_bmap_complain_bad_rec(ip, whichfork, fa, 1177 &new); 1178 } 1179 xfs_iext_insert(ip, &ir->icur, &new, 1180 xfs_bmap_fork_to_state(whichfork)); 1181 trace_xfs_read_extent(ip, &ir->icur, 1182 xfs_bmap_fork_to_state(whichfork), _THIS_IP_); 1183 xfs_iext_next(ifp, &ir->icur); 1184 } 1185 1186 return 0; 1187 } 1188 1189 /* 1190 * Read in extents from a btree-format inode. 1191 */ 1192 int 1193 xfs_iread_extents( 1194 struct xfs_trans *tp, 1195 struct xfs_inode *ip, 1196 int whichfork) 1197 { 1198 struct xfs_iread_state ir; 1199 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1200 struct xfs_mount *mp = ip->i_mount; 1201 struct xfs_btree_cur *cur; 1202 int error; 1203 1204 if (!xfs_need_iread_extents(ifp)) 1205 return 0; 1206 1207 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 1208 1209 ir.loaded = 0; 1210 xfs_iext_first(ifp, &ir.icur); 1211 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 1212 error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block, 1213 XFS_BTREE_VISIT_RECORDS, &ir); 1214 xfs_btree_del_cursor(cur, error); 1215 if (error) 1216 goto out; 1217 1218 if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) { 1219 xfs_bmap_mark_sick(ip, whichfork); 1220 error = -EFSCORRUPTED; 1221 goto out; 1222 } 1223 ASSERT(ir.loaded == xfs_iext_count(ifp)); 1224 /* 1225 * Use release semantics so that we can use acquire semantics in 1226 * xfs_need_iread_extents and be guaranteed to see a valid mapping tree 1227 * after that load. 1228 */ 1229 smp_store_release(&ifp->if_needextents, 0); 1230 return 0; 1231 out: 1232 if (xfs_metadata_is_sick(error)) 1233 xfs_bmap_mark_sick(ip, whichfork); 1234 xfs_iext_destroy(ifp); 1235 return error; 1236 } 1237 1238 /* 1239 * Returns the relative block number of the first unused block(s) in the given 1240 * fork with at least "len" logically contiguous blocks free. This is the 1241 * lowest-address hole if the fork has holes, else the first block past the end 1242 * of fork. Return 0 if the fork is currently local (in-inode). 1243 */ 1244 int /* error */ 1245 xfs_bmap_first_unused( 1246 struct xfs_trans *tp, /* transaction pointer */ 1247 struct xfs_inode *ip, /* incore inode */ 1248 xfs_extlen_t len, /* size of hole to find */ 1249 xfs_fileoff_t *first_unused, /* unused block */ 1250 int whichfork) /* data or attr fork */ 1251 { 1252 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1253 struct xfs_bmbt_irec got; 1254 struct xfs_iext_cursor icur; 1255 xfs_fileoff_t lastaddr = 0; 1256 xfs_fileoff_t lowest, max; 1257 int error; 1258 1259 if (ifp->if_format == XFS_DINODE_FMT_LOCAL) { 1260 *first_unused = 0; 1261 return 0; 1262 } 1263 1264 ASSERT(xfs_ifork_has_extents(ifp)); 1265 1266 error = xfs_iread_extents(tp, ip, whichfork); 1267 if (error) 1268 return error; 1269 1270 lowest = max = *first_unused; 1271 for_each_xfs_iext(ifp, &icur, &got) { 1272 /* 1273 * See if the hole before this extent will work. 1274 */ 1275 if (got.br_startoff >= lowest + len && 1276 got.br_startoff - max >= len) 1277 break; 1278 lastaddr = got.br_startoff + got.br_blockcount; 1279 max = XFS_FILEOFF_MAX(lastaddr, lowest); 1280 } 1281 1282 *first_unused = max; 1283 return 0; 1284 } 1285 1286 /* 1287 * Returns the file-relative block number of the last block - 1 before 1288 * last_block (input value) in the file. 1289 * This is not based on i_size, it is based on the extent records. 1290 * Returns 0 for local files, as they do not have extent records. 1291 */ 1292 int /* error */ 1293 xfs_bmap_last_before( 1294 struct xfs_trans *tp, /* transaction pointer */ 1295 struct xfs_inode *ip, /* incore inode */ 1296 xfs_fileoff_t *last_block, /* last block */ 1297 int whichfork) /* data or attr fork */ 1298 { 1299 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1300 struct xfs_bmbt_irec got; 1301 struct xfs_iext_cursor icur; 1302 int error; 1303 1304 switch (ifp->if_format) { 1305 case XFS_DINODE_FMT_LOCAL: 1306 *last_block = 0; 1307 return 0; 1308 case XFS_DINODE_FMT_BTREE: 1309 case XFS_DINODE_FMT_EXTENTS: 1310 break; 1311 default: 1312 ASSERT(0); 1313 xfs_bmap_mark_sick(ip, whichfork); 1314 return -EFSCORRUPTED; 1315 } 1316 1317 error = xfs_iread_extents(tp, ip, whichfork); 1318 if (error) 1319 return error; 1320 1321 if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got)) 1322 *last_block = 0; 1323 return 0; 1324 } 1325 1326 int 1327 xfs_bmap_last_extent( 1328 struct xfs_trans *tp, 1329 struct xfs_inode *ip, 1330 int whichfork, 1331 struct xfs_bmbt_irec *rec, 1332 int *is_empty) 1333 { 1334 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1335 struct xfs_iext_cursor icur; 1336 int error; 1337 1338 error = xfs_iread_extents(tp, ip, whichfork); 1339 if (error) 1340 return error; 1341 1342 xfs_iext_last(ifp, &icur); 1343 if (!xfs_iext_get_extent(ifp, &icur, rec)) 1344 *is_empty = 1; 1345 else 1346 *is_empty = 0; 1347 return 0; 1348 } 1349 1350 /* 1351 * Check the last inode extent to determine whether this allocation will result 1352 * in blocks being allocated at the end of the file. When we allocate new data 1353 * blocks at the end of the file which do not start at the previous data block, 1354 * we will try to align the new blocks at stripe unit boundaries. 1355 * 1356 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be 1357 * at, or past the EOF. 1358 */ 1359 STATIC int 1360 xfs_bmap_isaeof( 1361 struct xfs_bmalloca *bma, 1362 int whichfork) 1363 { 1364 struct xfs_bmbt_irec rec; 1365 int is_empty; 1366 int error; 1367 1368 bma->aeof = false; 1369 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, 1370 &is_empty); 1371 if (error) 1372 return error; 1373 1374 if (is_empty) { 1375 bma->aeof = true; 1376 return 0; 1377 } 1378 1379 /* 1380 * Check if we are allocation or past the last extent, or at least into 1381 * the last delayed allocated extent. 1382 */ 1383 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || 1384 (bma->offset >= rec.br_startoff && 1385 isnullstartblock(rec.br_startblock)); 1386 return 0; 1387 } 1388 1389 /* 1390 * Returns the file-relative block number of the first block past eof in 1391 * the file. This is not based on i_size, it is based on the extent records. 1392 * Returns 0 for local files, as they do not have extent records. 1393 */ 1394 int 1395 xfs_bmap_last_offset( 1396 struct xfs_inode *ip, 1397 xfs_fileoff_t *last_block, 1398 int whichfork) 1399 { 1400 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1401 struct xfs_bmbt_irec rec; 1402 int is_empty; 1403 int error; 1404 1405 *last_block = 0; 1406 1407 if (ifp->if_format == XFS_DINODE_FMT_LOCAL) 1408 return 0; 1409 1410 if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) { 1411 xfs_bmap_mark_sick(ip, whichfork); 1412 return -EFSCORRUPTED; 1413 } 1414 1415 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); 1416 if (error || is_empty) 1417 return error; 1418 1419 *last_block = rec.br_startoff + rec.br_blockcount; 1420 return 0; 1421 } 1422 1423 /* 1424 * Extent tree manipulation functions used during allocation. 1425 */ 1426 1427 static inline bool 1428 xfs_bmap_same_rtgroup( 1429 struct xfs_inode *ip, 1430 int whichfork, 1431 struct xfs_bmbt_irec *left, 1432 struct xfs_bmbt_irec *right) 1433 { 1434 struct xfs_mount *mp = ip->i_mount; 1435 1436 if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) { 1437 if (xfs_rtb_to_rgno(mp, left->br_startblock) != 1438 xfs_rtb_to_rgno(mp, right->br_startblock)) 1439 return false; 1440 } 1441 1442 return true; 1443 } 1444 1445 /* 1446 * Convert a delayed allocation to a real allocation. 1447 */ 1448 STATIC int /* error */ 1449 xfs_bmap_add_extent_delay_real( 1450 struct xfs_bmalloca *bma, 1451 int whichfork) 1452 { 1453 struct xfs_mount *mp = bma->ip->i_mount; 1454 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 1455 struct xfs_bmbt_irec *new = &bma->got; 1456 int error; /* error return value */ 1457 int i; /* temp state */ 1458 xfs_fileoff_t new_endoff; /* end offset of new entry */ 1459 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 1460 /* left is 0, right is 1, prev is 2 */ 1461 int rval=0; /* return value (logging flags) */ 1462 uint32_t state = xfs_bmap_fork_to_state(whichfork); 1463 xfs_filblks_t da_new; /* new count del alloc blocks used */ 1464 xfs_filblks_t da_old; /* old count del alloc blocks used */ 1465 xfs_filblks_t temp=0; /* value for da_new calculations */ 1466 int tmp_rval; /* partial logging flags */ 1467 struct xfs_bmbt_irec old; 1468 1469 ASSERT(whichfork != XFS_ATTR_FORK); 1470 ASSERT(!isnullstartblock(new->br_startblock)); 1471 ASSERT(!bma->cur || (bma->cur->bc_flags & XFS_BTREE_BMBT_WASDEL)); 1472 1473 XFS_STATS_INC(mp, xs_add_exlist); 1474 1475 #define LEFT r[0] 1476 #define RIGHT r[1] 1477 #define PREV r[2] 1478 1479 /* 1480 * Set up a bunch of variables to make the tests simpler. 1481 */ 1482 xfs_iext_get_extent(ifp, &bma->icur, &PREV); 1483 new_endoff = new->br_startoff + new->br_blockcount; 1484 ASSERT(isnullstartblock(PREV.br_startblock)); 1485 ASSERT(PREV.br_startoff <= new->br_startoff); 1486 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1487 1488 da_old = startblockval(PREV.br_startblock); 1489 da_new = 0; 1490 1491 /* 1492 * Set flags determining what part of the previous delayed allocation 1493 * extent is being replaced by a real allocation. 1494 */ 1495 if (PREV.br_startoff == new->br_startoff) 1496 state |= BMAP_LEFT_FILLING; 1497 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 1498 state |= BMAP_RIGHT_FILLING; 1499 1500 /* 1501 * Check and set flags if this segment has a left neighbor. 1502 * Don't set contiguous if the combined extent would be too large. 1503 */ 1504 if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) { 1505 state |= BMAP_LEFT_VALID; 1506 if (isnullstartblock(LEFT.br_startblock)) 1507 state |= BMAP_LEFT_DELAY; 1508 } 1509 1510 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 1511 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 1512 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 1513 LEFT.br_state == new->br_state && 1514 LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && 1515 xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new)) 1516 state |= BMAP_LEFT_CONTIG; 1517 1518 /* 1519 * Check and set flags if this segment has a right neighbor. 1520 * Don't set contiguous if the combined extent would be too large. 1521 * Also check for all-three-contiguous being too large. 1522 */ 1523 if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) { 1524 state |= BMAP_RIGHT_VALID; 1525 if (isnullstartblock(RIGHT.br_startblock)) 1526 state |= BMAP_RIGHT_DELAY; 1527 } 1528 1529 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 1530 new_endoff == RIGHT.br_startoff && 1531 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 1532 new->br_state == RIGHT.br_state && 1533 new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 1534 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1535 BMAP_RIGHT_FILLING)) != 1536 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1537 BMAP_RIGHT_FILLING) || 1538 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 1539 <= XFS_MAX_BMBT_EXTLEN) && 1540 xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT)) 1541 state |= BMAP_RIGHT_CONTIG; 1542 1543 error = 0; 1544 /* 1545 * Switch out based on the FILLING and CONTIG state bits. 1546 */ 1547 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1548 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 1549 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1550 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1551 /* 1552 * Filling in all of a previously delayed allocation extent. 1553 * The left and right neighbors are both contiguous with new. 1554 */ 1555 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 1556 1557 xfs_iext_remove(bma->ip, &bma->icur, state); 1558 xfs_iext_remove(bma->ip, &bma->icur, state); 1559 xfs_iext_prev(ifp, &bma->icur); 1560 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1561 ifp->if_nextents--; 1562 1563 if (bma->cur == NULL) 1564 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1565 else { 1566 rval = XFS_ILOG_CORE; 1567 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1568 if (error) 1569 goto done; 1570 if (XFS_IS_CORRUPT(mp, i != 1)) { 1571 xfs_btree_mark_sick(bma->cur); 1572 error = -EFSCORRUPTED; 1573 goto done; 1574 } 1575 error = xfs_btree_delete(bma->cur, &i); 1576 if (error) 1577 goto done; 1578 if (XFS_IS_CORRUPT(mp, i != 1)) { 1579 xfs_btree_mark_sick(bma->cur); 1580 error = -EFSCORRUPTED; 1581 goto done; 1582 } 1583 error = xfs_btree_decrement(bma->cur, 0, &i); 1584 if (error) 1585 goto done; 1586 if (XFS_IS_CORRUPT(mp, i != 1)) { 1587 xfs_btree_mark_sick(bma->cur); 1588 error = -EFSCORRUPTED; 1589 goto done; 1590 } 1591 error = xfs_bmbt_update(bma->cur, &LEFT); 1592 if (error) 1593 goto done; 1594 } 1595 ASSERT(da_new <= da_old); 1596 break; 1597 1598 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1599 /* 1600 * Filling in all of a previously delayed allocation extent. 1601 * The left neighbor is contiguous, the right is not. 1602 */ 1603 old = LEFT; 1604 LEFT.br_blockcount += PREV.br_blockcount; 1605 1606 xfs_iext_remove(bma->ip, &bma->icur, state); 1607 xfs_iext_prev(ifp, &bma->icur); 1608 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1609 1610 if (bma->cur == NULL) 1611 rval = XFS_ILOG_DEXT; 1612 else { 1613 rval = 0; 1614 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1615 if (error) 1616 goto done; 1617 if (XFS_IS_CORRUPT(mp, i != 1)) { 1618 xfs_btree_mark_sick(bma->cur); 1619 error = -EFSCORRUPTED; 1620 goto done; 1621 } 1622 error = xfs_bmbt_update(bma->cur, &LEFT); 1623 if (error) 1624 goto done; 1625 } 1626 ASSERT(da_new <= da_old); 1627 break; 1628 1629 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1630 /* 1631 * Filling in all of a previously delayed allocation extent. 1632 * The right neighbor is contiguous, the left is not. Take care 1633 * with delay -> unwritten extent allocation here because the 1634 * delalloc record we are overwriting is always written. 1635 */ 1636 PREV.br_startblock = new->br_startblock; 1637 PREV.br_blockcount += RIGHT.br_blockcount; 1638 PREV.br_state = new->br_state; 1639 1640 xfs_iext_next(ifp, &bma->icur); 1641 xfs_iext_remove(bma->ip, &bma->icur, state); 1642 xfs_iext_prev(ifp, &bma->icur); 1643 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1644 1645 if (bma->cur == NULL) 1646 rval = XFS_ILOG_DEXT; 1647 else { 1648 rval = 0; 1649 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1650 if (error) 1651 goto done; 1652 if (XFS_IS_CORRUPT(mp, i != 1)) { 1653 xfs_btree_mark_sick(bma->cur); 1654 error = -EFSCORRUPTED; 1655 goto done; 1656 } 1657 error = xfs_bmbt_update(bma->cur, &PREV); 1658 if (error) 1659 goto done; 1660 } 1661 ASSERT(da_new <= da_old); 1662 break; 1663 1664 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 1665 /* 1666 * Filling in all of a previously delayed allocation extent. 1667 * Neither the left nor right neighbors are contiguous with 1668 * the new one. 1669 */ 1670 PREV.br_startblock = new->br_startblock; 1671 PREV.br_state = new->br_state; 1672 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1673 ifp->if_nextents++; 1674 1675 if (bma->cur == NULL) 1676 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1677 else { 1678 rval = XFS_ILOG_CORE; 1679 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1680 if (error) 1681 goto done; 1682 if (XFS_IS_CORRUPT(mp, i != 0)) { 1683 xfs_btree_mark_sick(bma->cur); 1684 error = -EFSCORRUPTED; 1685 goto done; 1686 } 1687 error = xfs_btree_insert(bma->cur, &i); 1688 if (error) 1689 goto done; 1690 if (XFS_IS_CORRUPT(mp, i != 1)) { 1691 xfs_btree_mark_sick(bma->cur); 1692 error = -EFSCORRUPTED; 1693 goto done; 1694 } 1695 } 1696 ASSERT(da_new <= da_old); 1697 break; 1698 1699 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 1700 /* 1701 * Filling in the first part of a previous delayed allocation. 1702 * The left neighbor is contiguous. 1703 */ 1704 old = LEFT; 1705 temp = PREV.br_blockcount - new->br_blockcount; 1706 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1707 startblockval(PREV.br_startblock)); 1708 1709 LEFT.br_blockcount += new->br_blockcount; 1710 1711 PREV.br_blockcount = temp; 1712 PREV.br_startoff += new->br_blockcount; 1713 PREV.br_startblock = nullstartblock(da_new); 1714 1715 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1716 xfs_iext_prev(ifp, &bma->icur); 1717 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1718 1719 if (bma->cur == NULL) 1720 rval = XFS_ILOG_DEXT; 1721 else { 1722 rval = 0; 1723 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1724 if (error) 1725 goto done; 1726 if (XFS_IS_CORRUPT(mp, i != 1)) { 1727 xfs_btree_mark_sick(bma->cur); 1728 error = -EFSCORRUPTED; 1729 goto done; 1730 } 1731 error = xfs_bmbt_update(bma->cur, &LEFT); 1732 if (error) 1733 goto done; 1734 } 1735 ASSERT(da_new <= da_old); 1736 break; 1737 1738 case BMAP_LEFT_FILLING: 1739 /* 1740 * Filling in the first part of a previous delayed allocation. 1741 * The left neighbor is not contiguous. 1742 */ 1743 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1744 ifp->if_nextents++; 1745 1746 if (bma->cur == NULL) 1747 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1748 else { 1749 rval = XFS_ILOG_CORE; 1750 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1751 if (error) 1752 goto done; 1753 if (XFS_IS_CORRUPT(mp, i != 0)) { 1754 xfs_btree_mark_sick(bma->cur); 1755 error = -EFSCORRUPTED; 1756 goto done; 1757 } 1758 error = xfs_btree_insert(bma->cur, &i); 1759 if (error) 1760 goto done; 1761 if (XFS_IS_CORRUPT(mp, i != 1)) { 1762 xfs_btree_mark_sick(bma->cur); 1763 error = -EFSCORRUPTED; 1764 goto done; 1765 } 1766 } 1767 1768 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1769 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1770 &bma->cur, 1, &tmp_rval, whichfork); 1771 rval |= tmp_rval; 1772 if (error) 1773 goto done; 1774 } 1775 1776 temp = PREV.br_blockcount - new->br_blockcount; 1777 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1778 startblockval(PREV.br_startblock) - 1779 (bma->cur ? bma->cur->bc_bmap.allocated : 0)); 1780 1781 PREV.br_startoff = new_endoff; 1782 PREV.br_blockcount = temp; 1783 PREV.br_startblock = nullstartblock(da_new); 1784 xfs_iext_next(ifp, &bma->icur); 1785 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1786 xfs_iext_prev(ifp, &bma->icur); 1787 break; 1788 1789 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1790 /* 1791 * Filling in the last part of a previous delayed allocation. 1792 * The right neighbor is contiguous with the new allocation. 1793 */ 1794 old = RIGHT; 1795 RIGHT.br_startoff = new->br_startoff; 1796 RIGHT.br_startblock = new->br_startblock; 1797 RIGHT.br_blockcount += new->br_blockcount; 1798 1799 if (bma->cur == NULL) 1800 rval = XFS_ILOG_DEXT; 1801 else { 1802 rval = 0; 1803 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1804 if (error) 1805 goto done; 1806 if (XFS_IS_CORRUPT(mp, i != 1)) { 1807 xfs_btree_mark_sick(bma->cur); 1808 error = -EFSCORRUPTED; 1809 goto done; 1810 } 1811 error = xfs_bmbt_update(bma->cur, &RIGHT); 1812 if (error) 1813 goto done; 1814 } 1815 1816 temp = PREV.br_blockcount - new->br_blockcount; 1817 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1818 startblockval(PREV.br_startblock)); 1819 1820 PREV.br_blockcount = temp; 1821 PREV.br_startblock = nullstartblock(da_new); 1822 1823 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1824 xfs_iext_next(ifp, &bma->icur); 1825 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); 1826 ASSERT(da_new <= da_old); 1827 break; 1828 1829 case BMAP_RIGHT_FILLING: 1830 /* 1831 * Filling in the last part of a previous delayed allocation. 1832 * The right neighbor is not contiguous. 1833 */ 1834 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1835 ifp->if_nextents++; 1836 1837 if (bma->cur == NULL) 1838 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1839 else { 1840 rval = XFS_ILOG_CORE; 1841 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1842 if (error) 1843 goto done; 1844 if (XFS_IS_CORRUPT(mp, i != 0)) { 1845 xfs_btree_mark_sick(bma->cur); 1846 error = -EFSCORRUPTED; 1847 goto done; 1848 } 1849 error = xfs_btree_insert(bma->cur, &i); 1850 if (error) 1851 goto done; 1852 if (XFS_IS_CORRUPT(mp, i != 1)) { 1853 xfs_btree_mark_sick(bma->cur); 1854 error = -EFSCORRUPTED; 1855 goto done; 1856 } 1857 } 1858 1859 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1860 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1861 &bma->cur, 1, &tmp_rval, whichfork); 1862 rval |= tmp_rval; 1863 if (error) 1864 goto done; 1865 } 1866 1867 temp = PREV.br_blockcount - new->br_blockcount; 1868 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1869 startblockval(PREV.br_startblock) - 1870 (bma->cur ? bma->cur->bc_bmap.allocated : 0)); 1871 1872 PREV.br_startblock = nullstartblock(da_new); 1873 PREV.br_blockcount = temp; 1874 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1875 xfs_iext_next(ifp, &bma->icur); 1876 ASSERT(da_new <= da_old); 1877 break; 1878 1879 case 0: 1880 /* 1881 * Filling in the middle part of a previous delayed allocation. 1882 * Contiguity is impossible here. 1883 * This case is avoided almost all the time. 1884 * 1885 * We start with a delayed allocation: 1886 * 1887 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ 1888 * PREV @ idx 1889 * 1890 * and we are allocating: 1891 * +rrrrrrrrrrrrrrrrr+ 1892 * new 1893 * 1894 * and we set it up for insertion as: 1895 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ 1896 * new 1897 * PREV @ idx LEFT RIGHT 1898 * inserted at idx + 1 1899 */ 1900 old = PREV; 1901 1902 /* LEFT is the new middle */ 1903 LEFT = *new; 1904 1905 /* RIGHT is the new right */ 1906 RIGHT.br_state = PREV.br_state; 1907 RIGHT.br_startoff = new_endoff; 1908 RIGHT.br_blockcount = 1909 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1910 RIGHT.br_startblock = 1911 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1912 RIGHT.br_blockcount)); 1913 1914 /* truncate PREV */ 1915 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 1916 PREV.br_startblock = 1917 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1918 PREV.br_blockcount)); 1919 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1920 1921 xfs_iext_next(ifp, &bma->icur); 1922 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); 1923 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); 1924 ifp->if_nextents++; 1925 1926 if (bma->cur == NULL) 1927 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1928 else { 1929 rval = XFS_ILOG_CORE; 1930 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1931 if (error) 1932 goto done; 1933 if (XFS_IS_CORRUPT(mp, i != 0)) { 1934 xfs_btree_mark_sick(bma->cur); 1935 error = -EFSCORRUPTED; 1936 goto done; 1937 } 1938 error = xfs_btree_insert(bma->cur, &i); 1939 if (error) 1940 goto done; 1941 if (XFS_IS_CORRUPT(mp, i != 1)) { 1942 xfs_btree_mark_sick(bma->cur); 1943 error = -EFSCORRUPTED; 1944 goto done; 1945 } 1946 } 1947 1948 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1949 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1950 &bma->cur, 1, &tmp_rval, whichfork); 1951 rval |= tmp_rval; 1952 if (error) 1953 goto done; 1954 } 1955 1956 da_new = startblockval(PREV.br_startblock) + 1957 startblockval(RIGHT.br_startblock); 1958 break; 1959 1960 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1961 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1962 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 1963 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1964 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1965 case BMAP_LEFT_CONTIG: 1966 case BMAP_RIGHT_CONTIG: 1967 /* 1968 * These cases are all impossible. 1969 */ 1970 ASSERT(0); 1971 } 1972 1973 /* add reverse mapping unless caller opted out */ 1974 if (!(bma->flags & XFS_BMAPI_NORMAP)) 1975 xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new); 1976 1977 /* convert to a btree if necessary */ 1978 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1979 int tmp_logflags; /* partial log flag return val */ 1980 1981 ASSERT(bma->cur == NULL); 1982 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1983 &bma->cur, da_old > 0, &tmp_logflags, 1984 whichfork); 1985 bma->logflags |= tmp_logflags; 1986 if (error) 1987 goto done; 1988 } 1989 1990 if (da_new != da_old) 1991 xfs_mod_delalloc(bma->ip, 0, (int64_t)da_new - da_old); 1992 1993 if (bma->cur) { 1994 da_new += bma->cur->bc_bmap.allocated; 1995 bma->cur->bc_bmap.allocated = 0; 1996 } 1997 1998 /* adjust for changes in reserved delayed indirect blocks */ 1999 if (da_new < da_old) 2000 xfs_add_fdblocks(mp, da_old - da_new); 2001 else if (da_new > da_old) 2002 error = xfs_dec_fdblocks(mp, da_new - da_old, true); 2003 2004 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); 2005 done: 2006 if (whichfork != XFS_COW_FORK) 2007 bma->logflags |= rval; 2008 return error; 2009 #undef LEFT 2010 #undef RIGHT 2011 #undef PREV 2012 } 2013 2014 /* 2015 * Convert an unwritten allocation to a real allocation or vice versa. 2016 */ 2017 int /* error */ 2018 xfs_bmap_add_extent_unwritten_real( 2019 struct xfs_trans *tp, 2020 xfs_inode_t *ip, /* incore inode pointer */ 2021 int whichfork, 2022 struct xfs_iext_cursor *icur, 2023 struct xfs_btree_cur **curp, /* if *curp is null, not a btree */ 2024 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2025 int *logflagsp) /* inode logging flags */ 2026 { 2027 struct xfs_btree_cur *cur; /* btree cursor */ 2028 int error; /* error return value */ 2029 int i; /* temp state */ 2030 struct xfs_ifork *ifp; /* inode fork pointer */ 2031 xfs_fileoff_t new_endoff; /* end offset of new entry */ 2032 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 2033 /* left is 0, right is 1, prev is 2 */ 2034 int rval=0; /* return value (logging flags) */ 2035 uint32_t state = xfs_bmap_fork_to_state(whichfork); 2036 struct xfs_mount *mp = ip->i_mount; 2037 struct xfs_bmbt_irec old; 2038 2039 *logflagsp = 0; 2040 2041 cur = *curp; 2042 ifp = xfs_ifork_ptr(ip, whichfork); 2043 2044 ASSERT(!isnullstartblock(new->br_startblock)); 2045 2046 XFS_STATS_INC(mp, xs_add_exlist); 2047 2048 #define LEFT r[0] 2049 #define RIGHT r[1] 2050 #define PREV r[2] 2051 2052 /* 2053 * Set up a bunch of variables to make the tests simpler. 2054 */ 2055 error = 0; 2056 xfs_iext_get_extent(ifp, icur, &PREV); 2057 ASSERT(new->br_state != PREV.br_state); 2058 new_endoff = new->br_startoff + new->br_blockcount; 2059 ASSERT(PREV.br_startoff <= new->br_startoff); 2060 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 2061 2062 /* 2063 * Set flags determining what part of the previous oldext allocation 2064 * extent is being replaced by a newext allocation. 2065 */ 2066 if (PREV.br_startoff == new->br_startoff) 2067 state |= BMAP_LEFT_FILLING; 2068 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 2069 state |= BMAP_RIGHT_FILLING; 2070 2071 /* 2072 * Check and set flags if this segment has a left neighbor. 2073 * Don't set contiguous if the combined extent would be too large. 2074 */ 2075 if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) { 2076 state |= BMAP_LEFT_VALID; 2077 if (isnullstartblock(LEFT.br_startblock)) 2078 state |= BMAP_LEFT_DELAY; 2079 } 2080 2081 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2082 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 2083 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 2084 LEFT.br_state == new->br_state && 2085 LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2086 xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new)) 2087 state |= BMAP_LEFT_CONTIG; 2088 2089 /* 2090 * Check and set flags if this segment has a right neighbor. 2091 * Don't set contiguous if the combined extent would be too large. 2092 * Also check for all-three-contiguous being too large. 2093 */ 2094 if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) { 2095 state |= BMAP_RIGHT_VALID; 2096 if (isnullstartblock(RIGHT.br_startblock)) 2097 state |= BMAP_RIGHT_DELAY; 2098 } 2099 2100 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2101 new_endoff == RIGHT.br_startoff && 2102 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 2103 new->br_state == RIGHT.br_state && 2104 new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2105 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2106 BMAP_RIGHT_FILLING)) != 2107 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2108 BMAP_RIGHT_FILLING) || 2109 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 2110 <= XFS_MAX_BMBT_EXTLEN) && 2111 xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT)) 2112 state |= BMAP_RIGHT_CONTIG; 2113 2114 /* 2115 * Switch out based on the FILLING and CONTIG state bits. 2116 */ 2117 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2118 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 2119 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2120 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2121 /* 2122 * Setting all of a previous oldext extent to newext. 2123 * The left and right neighbors are both contiguous with new. 2124 */ 2125 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 2126 2127 xfs_iext_remove(ip, icur, state); 2128 xfs_iext_remove(ip, icur, state); 2129 xfs_iext_prev(ifp, icur); 2130 xfs_iext_update_extent(ip, state, icur, &LEFT); 2131 ifp->if_nextents -= 2; 2132 if (cur == NULL) 2133 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2134 else { 2135 rval = XFS_ILOG_CORE; 2136 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2137 if (error) 2138 goto done; 2139 if (XFS_IS_CORRUPT(mp, i != 1)) { 2140 xfs_btree_mark_sick(cur); 2141 error = -EFSCORRUPTED; 2142 goto done; 2143 } 2144 if ((error = xfs_btree_delete(cur, &i))) 2145 goto done; 2146 if (XFS_IS_CORRUPT(mp, i != 1)) { 2147 xfs_btree_mark_sick(cur); 2148 error = -EFSCORRUPTED; 2149 goto done; 2150 } 2151 if ((error = xfs_btree_decrement(cur, 0, &i))) 2152 goto done; 2153 if (XFS_IS_CORRUPT(mp, i != 1)) { 2154 xfs_btree_mark_sick(cur); 2155 error = -EFSCORRUPTED; 2156 goto done; 2157 } 2158 if ((error = xfs_btree_delete(cur, &i))) 2159 goto done; 2160 if (XFS_IS_CORRUPT(mp, i != 1)) { 2161 xfs_btree_mark_sick(cur); 2162 error = -EFSCORRUPTED; 2163 goto done; 2164 } 2165 if ((error = xfs_btree_decrement(cur, 0, &i))) 2166 goto done; 2167 if (XFS_IS_CORRUPT(mp, i != 1)) { 2168 xfs_btree_mark_sick(cur); 2169 error = -EFSCORRUPTED; 2170 goto done; 2171 } 2172 error = xfs_bmbt_update(cur, &LEFT); 2173 if (error) 2174 goto done; 2175 } 2176 break; 2177 2178 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2179 /* 2180 * Setting all of a previous oldext extent to newext. 2181 * The left neighbor is contiguous, the right is not. 2182 */ 2183 LEFT.br_blockcount += PREV.br_blockcount; 2184 2185 xfs_iext_remove(ip, icur, state); 2186 xfs_iext_prev(ifp, icur); 2187 xfs_iext_update_extent(ip, state, icur, &LEFT); 2188 ifp->if_nextents--; 2189 if (cur == NULL) 2190 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2191 else { 2192 rval = XFS_ILOG_CORE; 2193 error = xfs_bmbt_lookup_eq(cur, &PREV, &i); 2194 if (error) 2195 goto done; 2196 if (XFS_IS_CORRUPT(mp, i != 1)) { 2197 xfs_btree_mark_sick(cur); 2198 error = -EFSCORRUPTED; 2199 goto done; 2200 } 2201 if ((error = xfs_btree_delete(cur, &i))) 2202 goto done; 2203 if (XFS_IS_CORRUPT(mp, i != 1)) { 2204 xfs_btree_mark_sick(cur); 2205 error = -EFSCORRUPTED; 2206 goto done; 2207 } 2208 if ((error = xfs_btree_decrement(cur, 0, &i))) 2209 goto done; 2210 if (XFS_IS_CORRUPT(mp, i != 1)) { 2211 xfs_btree_mark_sick(cur); 2212 error = -EFSCORRUPTED; 2213 goto done; 2214 } 2215 error = xfs_bmbt_update(cur, &LEFT); 2216 if (error) 2217 goto done; 2218 } 2219 break; 2220 2221 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2222 /* 2223 * Setting all of a previous oldext extent to newext. 2224 * The right neighbor is contiguous, the left is not. 2225 */ 2226 PREV.br_blockcount += RIGHT.br_blockcount; 2227 PREV.br_state = new->br_state; 2228 2229 xfs_iext_next(ifp, icur); 2230 xfs_iext_remove(ip, icur, state); 2231 xfs_iext_prev(ifp, icur); 2232 xfs_iext_update_extent(ip, state, icur, &PREV); 2233 ifp->if_nextents--; 2234 2235 if (cur == NULL) 2236 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2237 else { 2238 rval = XFS_ILOG_CORE; 2239 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2240 if (error) 2241 goto done; 2242 if (XFS_IS_CORRUPT(mp, i != 1)) { 2243 xfs_btree_mark_sick(cur); 2244 error = -EFSCORRUPTED; 2245 goto done; 2246 } 2247 if ((error = xfs_btree_delete(cur, &i))) 2248 goto done; 2249 if (XFS_IS_CORRUPT(mp, i != 1)) { 2250 xfs_btree_mark_sick(cur); 2251 error = -EFSCORRUPTED; 2252 goto done; 2253 } 2254 if ((error = xfs_btree_decrement(cur, 0, &i))) 2255 goto done; 2256 if (XFS_IS_CORRUPT(mp, i != 1)) { 2257 xfs_btree_mark_sick(cur); 2258 error = -EFSCORRUPTED; 2259 goto done; 2260 } 2261 error = xfs_bmbt_update(cur, &PREV); 2262 if (error) 2263 goto done; 2264 } 2265 break; 2266 2267 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 2268 /* 2269 * Setting all of a previous oldext extent to newext. 2270 * Neither the left nor right neighbors are contiguous with 2271 * the new one. 2272 */ 2273 PREV.br_state = new->br_state; 2274 xfs_iext_update_extent(ip, state, icur, &PREV); 2275 2276 if (cur == NULL) 2277 rval = XFS_ILOG_DEXT; 2278 else { 2279 rval = 0; 2280 error = xfs_bmbt_lookup_eq(cur, new, &i); 2281 if (error) 2282 goto done; 2283 if (XFS_IS_CORRUPT(mp, i != 1)) { 2284 xfs_btree_mark_sick(cur); 2285 error = -EFSCORRUPTED; 2286 goto done; 2287 } 2288 error = xfs_bmbt_update(cur, &PREV); 2289 if (error) 2290 goto done; 2291 } 2292 break; 2293 2294 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 2295 /* 2296 * Setting the first part of a previous oldext extent to newext. 2297 * The left neighbor is contiguous. 2298 */ 2299 LEFT.br_blockcount += new->br_blockcount; 2300 2301 old = PREV; 2302 PREV.br_startoff += new->br_blockcount; 2303 PREV.br_startblock += new->br_blockcount; 2304 PREV.br_blockcount -= new->br_blockcount; 2305 2306 xfs_iext_update_extent(ip, state, icur, &PREV); 2307 xfs_iext_prev(ifp, icur); 2308 xfs_iext_update_extent(ip, state, icur, &LEFT); 2309 2310 if (cur == NULL) 2311 rval = XFS_ILOG_DEXT; 2312 else { 2313 rval = 0; 2314 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2315 if (error) 2316 goto done; 2317 if (XFS_IS_CORRUPT(mp, i != 1)) { 2318 xfs_btree_mark_sick(cur); 2319 error = -EFSCORRUPTED; 2320 goto done; 2321 } 2322 error = xfs_bmbt_update(cur, &PREV); 2323 if (error) 2324 goto done; 2325 error = xfs_btree_decrement(cur, 0, &i); 2326 if (error) 2327 goto done; 2328 error = xfs_bmbt_update(cur, &LEFT); 2329 if (error) 2330 goto done; 2331 } 2332 break; 2333 2334 case BMAP_LEFT_FILLING: 2335 /* 2336 * Setting the first part of a previous oldext extent to newext. 2337 * The left neighbor is not contiguous. 2338 */ 2339 old = PREV; 2340 PREV.br_startoff += new->br_blockcount; 2341 PREV.br_startblock += new->br_blockcount; 2342 PREV.br_blockcount -= new->br_blockcount; 2343 2344 xfs_iext_update_extent(ip, state, icur, &PREV); 2345 xfs_iext_insert(ip, icur, new, state); 2346 ifp->if_nextents++; 2347 2348 if (cur == NULL) 2349 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2350 else { 2351 rval = XFS_ILOG_CORE; 2352 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2353 if (error) 2354 goto done; 2355 if (XFS_IS_CORRUPT(mp, i != 1)) { 2356 xfs_btree_mark_sick(cur); 2357 error = -EFSCORRUPTED; 2358 goto done; 2359 } 2360 error = xfs_bmbt_update(cur, &PREV); 2361 if (error) 2362 goto done; 2363 cur->bc_rec.b = *new; 2364 if ((error = xfs_btree_insert(cur, &i))) 2365 goto done; 2366 if (XFS_IS_CORRUPT(mp, i != 1)) { 2367 xfs_btree_mark_sick(cur); 2368 error = -EFSCORRUPTED; 2369 goto done; 2370 } 2371 } 2372 break; 2373 2374 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2375 /* 2376 * Setting the last part of a previous oldext extent to newext. 2377 * The right neighbor is contiguous with the new allocation. 2378 */ 2379 old = PREV; 2380 PREV.br_blockcount -= new->br_blockcount; 2381 2382 RIGHT.br_startoff = new->br_startoff; 2383 RIGHT.br_startblock = new->br_startblock; 2384 RIGHT.br_blockcount += new->br_blockcount; 2385 2386 xfs_iext_update_extent(ip, state, icur, &PREV); 2387 xfs_iext_next(ifp, icur); 2388 xfs_iext_update_extent(ip, state, icur, &RIGHT); 2389 2390 if (cur == NULL) 2391 rval = XFS_ILOG_DEXT; 2392 else { 2393 rval = 0; 2394 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2395 if (error) 2396 goto done; 2397 if (XFS_IS_CORRUPT(mp, i != 1)) { 2398 xfs_btree_mark_sick(cur); 2399 error = -EFSCORRUPTED; 2400 goto done; 2401 } 2402 error = xfs_bmbt_update(cur, &PREV); 2403 if (error) 2404 goto done; 2405 error = xfs_btree_increment(cur, 0, &i); 2406 if (error) 2407 goto done; 2408 error = xfs_bmbt_update(cur, &RIGHT); 2409 if (error) 2410 goto done; 2411 } 2412 break; 2413 2414 case BMAP_RIGHT_FILLING: 2415 /* 2416 * Setting the last part of a previous oldext extent to newext. 2417 * The right neighbor is not contiguous. 2418 */ 2419 old = PREV; 2420 PREV.br_blockcount -= new->br_blockcount; 2421 2422 xfs_iext_update_extent(ip, state, icur, &PREV); 2423 xfs_iext_next(ifp, icur); 2424 xfs_iext_insert(ip, icur, new, state); 2425 ifp->if_nextents++; 2426 2427 if (cur == NULL) 2428 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2429 else { 2430 rval = XFS_ILOG_CORE; 2431 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2432 if (error) 2433 goto done; 2434 if (XFS_IS_CORRUPT(mp, i != 1)) { 2435 xfs_btree_mark_sick(cur); 2436 error = -EFSCORRUPTED; 2437 goto done; 2438 } 2439 error = xfs_bmbt_update(cur, &PREV); 2440 if (error) 2441 goto done; 2442 error = xfs_bmbt_lookup_eq(cur, new, &i); 2443 if (error) 2444 goto done; 2445 if (XFS_IS_CORRUPT(mp, i != 0)) { 2446 xfs_btree_mark_sick(cur); 2447 error = -EFSCORRUPTED; 2448 goto done; 2449 } 2450 if ((error = xfs_btree_insert(cur, &i))) 2451 goto done; 2452 if (XFS_IS_CORRUPT(mp, i != 1)) { 2453 xfs_btree_mark_sick(cur); 2454 error = -EFSCORRUPTED; 2455 goto done; 2456 } 2457 } 2458 break; 2459 2460 case 0: 2461 /* 2462 * Setting the middle part of a previous oldext extent to 2463 * newext. Contiguity is impossible here. 2464 * One extent becomes three extents. 2465 */ 2466 old = PREV; 2467 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 2468 2469 r[0] = *new; 2470 r[1].br_startoff = new_endoff; 2471 r[1].br_blockcount = 2472 old.br_startoff + old.br_blockcount - new_endoff; 2473 r[1].br_startblock = new->br_startblock + new->br_blockcount; 2474 r[1].br_state = PREV.br_state; 2475 2476 xfs_iext_update_extent(ip, state, icur, &PREV); 2477 xfs_iext_next(ifp, icur); 2478 xfs_iext_insert(ip, icur, &r[1], state); 2479 xfs_iext_insert(ip, icur, &r[0], state); 2480 ifp->if_nextents += 2; 2481 2482 if (cur == NULL) 2483 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2484 else { 2485 rval = XFS_ILOG_CORE; 2486 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2487 if (error) 2488 goto done; 2489 if (XFS_IS_CORRUPT(mp, i != 1)) { 2490 xfs_btree_mark_sick(cur); 2491 error = -EFSCORRUPTED; 2492 goto done; 2493 } 2494 /* new right extent - oldext */ 2495 error = xfs_bmbt_update(cur, &r[1]); 2496 if (error) 2497 goto done; 2498 /* new left extent - oldext */ 2499 cur->bc_rec.b = PREV; 2500 if ((error = xfs_btree_insert(cur, &i))) 2501 goto done; 2502 if (XFS_IS_CORRUPT(mp, i != 1)) { 2503 xfs_btree_mark_sick(cur); 2504 error = -EFSCORRUPTED; 2505 goto done; 2506 } 2507 /* 2508 * Reset the cursor to the position of the new extent 2509 * we are about to insert as we can't trust it after 2510 * the previous insert. 2511 */ 2512 error = xfs_bmbt_lookup_eq(cur, new, &i); 2513 if (error) 2514 goto done; 2515 if (XFS_IS_CORRUPT(mp, i != 0)) { 2516 xfs_btree_mark_sick(cur); 2517 error = -EFSCORRUPTED; 2518 goto done; 2519 } 2520 /* new middle extent - newext */ 2521 if ((error = xfs_btree_insert(cur, &i))) 2522 goto done; 2523 if (XFS_IS_CORRUPT(mp, i != 1)) { 2524 xfs_btree_mark_sick(cur); 2525 error = -EFSCORRUPTED; 2526 goto done; 2527 } 2528 } 2529 break; 2530 2531 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2532 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2533 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 2534 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2535 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2536 case BMAP_LEFT_CONTIG: 2537 case BMAP_RIGHT_CONTIG: 2538 /* 2539 * These cases are all impossible. 2540 */ 2541 ASSERT(0); 2542 } 2543 2544 /* update reverse mappings */ 2545 xfs_rmap_convert_extent(mp, tp, ip, whichfork, new); 2546 2547 /* convert to a btree if necessary */ 2548 if (xfs_bmap_needs_btree(ip, whichfork)) { 2549 int tmp_logflags; /* partial log flag return val */ 2550 2551 ASSERT(cur == NULL); 2552 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, 2553 &tmp_logflags, whichfork); 2554 *logflagsp |= tmp_logflags; 2555 if (error) 2556 goto done; 2557 } 2558 2559 /* clear out the allocated field, done with it now in any case. */ 2560 if (cur) { 2561 cur->bc_bmap.allocated = 0; 2562 *curp = cur; 2563 } 2564 2565 xfs_bmap_check_leaf_extents(*curp, ip, whichfork); 2566 done: 2567 *logflagsp |= rval; 2568 return error; 2569 #undef LEFT 2570 #undef RIGHT 2571 #undef PREV 2572 } 2573 2574 /* 2575 * Convert a hole to a delayed allocation. 2576 */ 2577 STATIC void 2578 xfs_bmap_add_extent_hole_delay( 2579 xfs_inode_t *ip, /* incore inode pointer */ 2580 int whichfork, 2581 struct xfs_iext_cursor *icur, 2582 xfs_bmbt_irec_t *new) /* new data to add to file extents */ 2583 { 2584 struct xfs_ifork *ifp; /* inode fork pointer */ 2585 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2586 xfs_filblks_t newlen=0; /* new indirect size */ 2587 xfs_filblks_t oldlen=0; /* old indirect size */ 2588 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2589 uint32_t state = xfs_bmap_fork_to_state(whichfork); 2590 xfs_filblks_t temp; /* temp for indirect calculations */ 2591 2592 ifp = xfs_ifork_ptr(ip, whichfork); 2593 ASSERT(isnullstartblock(new->br_startblock)); 2594 2595 /* 2596 * Check and set flags if this segment has a left neighbor 2597 */ 2598 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2599 state |= BMAP_LEFT_VALID; 2600 if (isnullstartblock(left.br_startblock)) 2601 state |= BMAP_LEFT_DELAY; 2602 } 2603 2604 /* 2605 * Check and set flags if the current (right) segment exists. 2606 * If it doesn't exist, we're converting the hole at end-of-file. 2607 */ 2608 if (xfs_iext_get_extent(ifp, icur, &right)) { 2609 state |= BMAP_RIGHT_VALID; 2610 if (isnullstartblock(right.br_startblock)) 2611 state |= BMAP_RIGHT_DELAY; 2612 } 2613 2614 /* 2615 * Set contiguity flags on the left and right neighbors. 2616 * Don't let extents get too large, even if the pieces are contiguous. 2617 */ 2618 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && 2619 left.br_startoff + left.br_blockcount == new->br_startoff && 2620 left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) 2621 state |= BMAP_LEFT_CONTIG; 2622 2623 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && 2624 new->br_startoff + new->br_blockcount == right.br_startoff && 2625 new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2626 (!(state & BMAP_LEFT_CONTIG) || 2627 (left.br_blockcount + new->br_blockcount + 2628 right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))) 2629 state |= BMAP_RIGHT_CONTIG; 2630 2631 /* 2632 * Switch out based on the contiguity flags. 2633 */ 2634 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2635 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2636 /* 2637 * New allocation is contiguous with delayed allocations 2638 * on the left and on the right. 2639 * Merge all three into a single extent record. 2640 */ 2641 temp = left.br_blockcount + new->br_blockcount + 2642 right.br_blockcount; 2643 2644 oldlen = startblockval(left.br_startblock) + 2645 startblockval(new->br_startblock) + 2646 startblockval(right.br_startblock); 2647 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2648 oldlen); 2649 left.br_startblock = nullstartblock(newlen); 2650 left.br_blockcount = temp; 2651 2652 xfs_iext_remove(ip, icur, state); 2653 xfs_iext_prev(ifp, icur); 2654 xfs_iext_update_extent(ip, state, icur, &left); 2655 break; 2656 2657 case BMAP_LEFT_CONTIG: 2658 /* 2659 * New allocation is contiguous with a delayed allocation 2660 * on the left. 2661 * Merge the new allocation with the left neighbor. 2662 */ 2663 temp = left.br_blockcount + new->br_blockcount; 2664 2665 oldlen = startblockval(left.br_startblock) + 2666 startblockval(new->br_startblock); 2667 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2668 oldlen); 2669 left.br_blockcount = temp; 2670 left.br_startblock = nullstartblock(newlen); 2671 2672 xfs_iext_prev(ifp, icur); 2673 xfs_iext_update_extent(ip, state, icur, &left); 2674 break; 2675 2676 case BMAP_RIGHT_CONTIG: 2677 /* 2678 * New allocation is contiguous with a delayed allocation 2679 * on the right. 2680 * Merge the new allocation with the right neighbor. 2681 */ 2682 temp = new->br_blockcount + right.br_blockcount; 2683 oldlen = startblockval(new->br_startblock) + 2684 startblockval(right.br_startblock); 2685 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2686 oldlen); 2687 right.br_startoff = new->br_startoff; 2688 right.br_startblock = nullstartblock(newlen); 2689 right.br_blockcount = temp; 2690 xfs_iext_update_extent(ip, state, icur, &right); 2691 break; 2692 2693 case 0: 2694 /* 2695 * New allocation is not contiguous with another 2696 * delayed allocation. 2697 * Insert a new entry. 2698 */ 2699 oldlen = newlen = 0; 2700 xfs_iext_insert(ip, icur, new, state); 2701 break; 2702 } 2703 if (oldlen != newlen) { 2704 ASSERT(oldlen > newlen); 2705 xfs_add_fdblocks(ip->i_mount, oldlen - newlen); 2706 2707 /* 2708 * Nothing to do for disk quota accounting here. 2709 */ 2710 xfs_mod_delalloc(ip, 0, (int64_t)newlen - oldlen); 2711 } 2712 } 2713 2714 /* 2715 * Convert a hole to a real allocation. 2716 */ 2717 STATIC int /* error */ 2718 xfs_bmap_add_extent_hole_real( 2719 struct xfs_trans *tp, 2720 struct xfs_inode *ip, 2721 int whichfork, 2722 struct xfs_iext_cursor *icur, 2723 struct xfs_btree_cur **curp, 2724 struct xfs_bmbt_irec *new, 2725 int *logflagsp, 2726 uint32_t flags) 2727 { 2728 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 2729 struct xfs_mount *mp = ip->i_mount; 2730 struct xfs_btree_cur *cur = *curp; 2731 int error; /* error return value */ 2732 int i; /* temp state */ 2733 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2734 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2735 int rval=0; /* return value (logging flags) */ 2736 uint32_t state = xfs_bmap_fork_to_state(whichfork); 2737 struct xfs_bmbt_irec old; 2738 2739 ASSERT(!isnullstartblock(new->br_startblock)); 2740 ASSERT(!cur || !(cur->bc_flags & XFS_BTREE_BMBT_WASDEL)); 2741 2742 XFS_STATS_INC(mp, xs_add_exlist); 2743 2744 /* 2745 * Check and set flags if this segment has a left neighbor. 2746 */ 2747 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2748 state |= BMAP_LEFT_VALID; 2749 if (isnullstartblock(left.br_startblock)) 2750 state |= BMAP_LEFT_DELAY; 2751 } 2752 2753 /* 2754 * Check and set flags if this segment has a current value. 2755 * Not true if we're inserting into the "hole" at eof. 2756 */ 2757 if (xfs_iext_get_extent(ifp, icur, &right)) { 2758 state |= BMAP_RIGHT_VALID; 2759 if (isnullstartblock(right.br_startblock)) 2760 state |= BMAP_RIGHT_DELAY; 2761 } 2762 2763 /* 2764 * We're inserting a real allocation between "left" and "right". 2765 * Set the contiguity flags. Don't let extents get too large. 2766 */ 2767 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2768 left.br_startoff + left.br_blockcount == new->br_startoff && 2769 left.br_startblock + left.br_blockcount == new->br_startblock && 2770 left.br_state == new->br_state && 2771 left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2772 xfs_bmap_same_rtgroup(ip, whichfork, &left, new)) 2773 state |= BMAP_LEFT_CONTIG; 2774 2775 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2776 new->br_startoff + new->br_blockcount == right.br_startoff && 2777 new->br_startblock + new->br_blockcount == right.br_startblock && 2778 new->br_state == right.br_state && 2779 new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2780 (!(state & BMAP_LEFT_CONTIG) || 2781 left.br_blockcount + new->br_blockcount + 2782 right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) && 2783 xfs_bmap_same_rtgroup(ip, whichfork, new, &right)) 2784 state |= BMAP_RIGHT_CONTIG; 2785 2786 error = 0; 2787 /* 2788 * Select which case we're in here, and implement it. 2789 */ 2790 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2791 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2792 /* 2793 * New allocation is contiguous with real allocations on the 2794 * left and on the right. 2795 * Merge all three into a single extent record. 2796 */ 2797 left.br_blockcount += new->br_blockcount + right.br_blockcount; 2798 2799 xfs_iext_remove(ip, icur, state); 2800 xfs_iext_prev(ifp, icur); 2801 xfs_iext_update_extent(ip, state, icur, &left); 2802 ifp->if_nextents--; 2803 2804 if (cur == NULL) { 2805 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2806 } else { 2807 rval = XFS_ILOG_CORE; 2808 error = xfs_bmbt_lookup_eq(cur, &right, &i); 2809 if (error) 2810 goto done; 2811 if (XFS_IS_CORRUPT(mp, i != 1)) { 2812 xfs_btree_mark_sick(cur); 2813 error = -EFSCORRUPTED; 2814 goto done; 2815 } 2816 error = xfs_btree_delete(cur, &i); 2817 if (error) 2818 goto done; 2819 if (XFS_IS_CORRUPT(mp, i != 1)) { 2820 xfs_btree_mark_sick(cur); 2821 error = -EFSCORRUPTED; 2822 goto done; 2823 } 2824 error = xfs_btree_decrement(cur, 0, &i); 2825 if (error) 2826 goto done; 2827 if (XFS_IS_CORRUPT(mp, i != 1)) { 2828 xfs_btree_mark_sick(cur); 2829 error = -EFSCORRUPTED; 2830 goto done; 2831 } 2832 error = xfs_bmbt_update(cur, &left); 2833 if (error) 2834 goto done; 2835 } 2836 break; 2837 2838 case BMAP_LEFT_CONTIG: 2839 /* 2840 * New allocation is contiguous with a real allocation 2841 * on the left. 2842 * Merge the new allocation with the left neighbor. 2843 */ 2844 old = left; 2845 left.br_blockcount += new->br_blockcount; 2846 2847 xfs_iext_prev(ifp, icur); 2848 xfs_iext_update_extent(ip, state, icur, &left); 2849 2850 if (cur == NULL) { 2851 rval = xfs_ilog_fext(whichfork); 2852 } else { 2853 rval = 0; 2854 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2855 if (error) 2856 goto done; 2857 if (XFS_IS_CORRUPT(mp, i != 1)) { 2858 xfs_btree_mark_sick(cur); 2859 error = -EFSCORRUPTED; 2860 goto done; 2861 } 2862 error = xfs_bmbt_update(cur, &left); 2863 if (error) 2864 goto done; 2865 } 2866 break; 2867 2868 case BMAP_RIGHT_CONTIG: 2869 /* 2870 * New allocation is contiguous with a real allocation 2871 * on the right. 2872 * Merge the new allocation with the right neighbor. 2873 */ 2874 old = right; 2875 2876 right.br_startoff = new->br_startoff; 2877 right.br_startblock = new->br_startblock; 2878 right.br_blockcount += new->br_blockcount; 2879 xfs_iext_update_extent(ip, state, icur, &right); 2880 2881 if (cur == NULL) { 2882 rval = xfs_ilog_fext(whichfork); 2883 } else { 2884 rval = 0; 2885 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2886 if (error) 2887 goto done; 2888 if (XFS_IS_CORRUPT(mp, i != 1)) { 2889 xfs_btree_mark_sick(cur); 2890 error = -EFSCORRUPTED; 2891 goto done; 2892 } 2893 error = xfs_bmbt_update(cur, &right); 2894 if (error) 2895 goto done; 2896 } 2897 break; 2898 2899 case 0: 2900 /* 2901 * New allocation is not contiguous with another 2902 * real allocation. 2903 * Insert a new entry. 2904 */ 2905 xfs_iext_insert(ip, icur, new, state); 2906 ifp->if_nextents++; 2907 2908 if (cur == NULL) { 2909 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2910 } else { 2911 rval = XFS_ILOG_CORE; 2912 error = xfs_bmbt_lookup_eq(cur, new, &i); 2913 if (error) 2914 goto done; 2915 if (XFS_IS_CORRUPT(mp, i != 0)) { 2916 xfs_btree_mark_sick(cur); 2917 error = -EFSCORRUPTED; 2918 goto done; 2919 } 2920 error = xfs_btree_insert(cur, &i); 2921 if (error) 2922 goto done; 2923 if (XFS_IS_CORRUPT(mp, i != 1)) { 2924 xfs_btree_mark_sick(cur); 2925 error = -EFSCORRUPTED; 2926 goto done; 2927 } 2928 } 2929 break; 2930 } 2931 2932 /* add reverse mapping unless caller opted out */ 2933 if (!(flags & XFS_BMAPI_NORMAP)) 2934 xfs_rmap_map_extent(tp, ip, whichfork, new); 2935 2936 /* convert to a btree if necessary */ 2937 if (xfs_bmap_needs_btree(ip, whichfork)) { 2938 int tmp_logflags; /* partial log flag return val */ 2939 2940 ASSERT(cur == NULL); 2941 error = xfs_bmap_extents_to_btree(tp, ip, curp, 0, 2942 &tmp_logflags, whichfork); 2943 *logflagsp |= tmp_logflags; 2944 cur = *curp; 2945 if (error) 2946 goto done; 2947 } 2948 2949 /* clear out the allocated field, done with it now in any case. */ 2950 if (cur) 2951 cur->bc_bmap.allocated = 0; 2952 2953 xfs_bmap_check_leaf_extents(cur, ip, whichfork); 2954 done: 2955 *logflagsp |= rval; 2956 return error; 2957 } 2958 2959 /* 2960 * Functions used in the extent read, allocate and remove paths 2961 */ 2962 2963 /* 2964 * Adjust the size of the new extent based on i_extsize and rt extsize. 2965 */ 2966 int 2967 xfs_bmap_extsize_align( 2968 xfs_mount_t *mp, 2969 xfs_bmbt_irec_t *gotp, /* next extent pointer */ 2970 xfs_bmbt_irec_t *prevp, /* previous extent pointer */ 2971 xfs_extlen_t extsz, /* align to this extent size */ 2972 int rt, /* is this a realtime inode? */ 2973 int eof, /* is extent at end-of-file? */ 2974 int delay, /* creating delalloc extent? */ 2975 int convert, /* overwriting unwritten extent? */ 2976 xfs_fileoff_t *offp, /* in/out: aligned offset */ 2977 xfs_extlen_t *lenp) /* in/out: aligned length */ 2978 { 2979 xfs_fileoff_t orig_off; /* original offset */ 2980 xfs_extlen_t orig_alen; /* original length */ 2981 xfs_fileoff_t orig_end; /* original off+len */ 2982 xfs_fileoff_t nexto; /* next file offset */ 2983 xfs_fileoff_t prevo; /* previous file offset */ 2984 xfs_fileoff_t align_off; /* temp for offset */ 2985 xfs_extlen_t align_alen; /* temp for length */ 2986 xfs_extlen_t temp; /* temp for calculations */ 2987 2988 if (convert) 2989 return 0; 2990 2991 orig_off = align_off = *offp; 2992 orig_alen = align_alen = *lenp; 2993 orig_end = orig_off + orig_alen; 2994 2995 /* 2996 * If this request overlaps an existing extent, then don't 2997 * attempt to perform any additional alignment. 2998 */ 2999 if (!delay && !eof && 3000 (orig_off >= gotp->br_startoff) && 3001 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { 3002 return 0; 3003 } 3004 3005 /* 3006 * If the file offset is unaligned vs. the extent size 3007 * we need to align it. This will be possible unless 3008 * the file was previously written with a kernel that didn't 3009 * perform this alignment, or if a truncate shot us in the 3010 * foot. 3011 */ 3012 div_u64_rem(orig_off, extsz, &temp); 3013 if (temp) { 3014 align_alen += temp; 3015 align_off -= temp; 3016 } 3017 3018 /* Same adjustment for the end of the requested area. */ 3019 temp = (align_alen % extsz); 3020 if (temp) 3021 align_alen += extsz - temp; 3022 3023 /* 3024 * For large extent hint sizes, the aligned extent might be larger than 3025 * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so 3026 * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer 3027 * allocation loops handle short allocation just fine, so it is safe to 3028 * do this. We only want to do it when we are forced to, though, because 3029 * it means more allocation operations are required. 3030 */ 3031 while (align_alen > XFS_MAX_BMBT_EXTLEN) 3032 align_alen -= extsz; 3033 ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN); 3034 3035 /* 3036 * If the previous block overlaps with this proposed allocation 3037 * then move the start forward without adjusting the length. 3038 */ 3039 if (prevp->br_startoff != NULLFILEOFF) { 3040 if (prevp->br_startblock == HOLESTARTBLOCK) 3041 prevo = prevp->br_startoff; 3042 else 3043 prevo = prevp->br_startoff + prevp->br_blockcount; 3044 } else 3045 prevo = 0; 3046 if (align_off != orig_off && align_off < prevo) 3047 align_off = prevo; 3048 /* 3049 * If the next block overlaps with this proposed allocation 3050 * then move the start back without adjusting the length, 3051 * but not before offset 0. 3052 * This may of course make the start overlap previous block, 3053 * and if we hit the offset 0 limit then the next block 3054 * can still overlap too. 3055 */ 3056 if (!eof && gotp->br_startoff != NULLFILEOFF) { 3057 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || 3058 (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) 3059 nexto = gotp->br_startoff + gotp->br_blockcount; 3060 else 3061 nexto = gotp->br_startoff; 3062 } else 3063 nexto = NULLFILEOFF; 3064 if (!eof && 3065 align_off + align_alen != orig_end && 3066 align_off + align_alen > nexto) 3067 align_off = nexto > align_alen ? nexto - align_alen : 0; 3068 /* 3069 * If we're now overlapping the next or previous extent that 3070 * means we can't fit an extsz piece in this hole. Just move 3071 * the start forward to the first valid spot and set 3072 * the length so we hit the end. 3073 */ 3074 if (align_off != orig_off && align_off < prevo) 3075 align_off = prevo; 3076 if (align_off + align_alen != orig_end && 3077 align_off + align_alen > nexto && 3078 nexto != NULLFILEOFF) { 3079 ASSERT(nexto > prevo); 3080 align_alen = nexto - align_off; 3081 } 3082 3083 /* 3084 * If realtime, and the result isn't a multiple of the realtime 3085 * extent size we need to remove blocks until it is. 3086 */ 3087 if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) { 3088 /* 3089 * We're not covering the original request, or 3090 * we won't be able to once we fix the length. 3091 */ 3092 if (orig_off < align_off || 3093 orig_end > align_off + align_alen || 3094 align_alen - temp < orig_alen) 3095 return -EINVAL; 3096 /* 3097 * Try to fix it by moving the start up. 3098 */ 3099 if (align_off + temp <= orig_off) { 3100 align_alen -= temp; 3101 align_off += temp; 3102 } 3103 /* 3104 * Try to fix it by moving the end in. 3105 */ 3106 else if (align_off + align_alen - temp >= orig_end) 3107 align_alen -= temp; 3108 /* 3109 * Set the start to the minimum then trim the length. 3110 */ 3111 else { 3112 align_alen -= orig_off - align_off; 3113 align_off = orig_off; 3114 align_alen -= xfs_extlen_to_rtxmod(mp, align_alen); 3115 } 3116 /* 3117 * Result doesn't cover the request, fail it. 3118 */ 3119 if (orig_off < align_off || orig_end > align_off + align_alen) 3120 return -EINVAL; 3121 } else { 3122 ASSERT(orig_off >= align_off); 3123 /* see XFS_BMBT_MAX_EXTLEN handling above */ 3124 ASSERT(orig_end <= align_off + align_alen || 3125 align_alen + extsz > XFS_MAX_BMBT_EXTLEN); 3126 } 3127 3128 #ifdef DEBUG 3129 if (!eof && gotp->br_startoff != NULLFILEOFF) 3130 ASSERT(align_off + align_alen <= gotp->br_startoff); 3131 if (prevp->br_startoff != NULLFILEOFF) 3132 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); 3133 #endif 3134 3135 *lenp = align_alen; 3136 *offp = align_off; 3137 return 0; 3138 } 3139 3140 static inline bool 3141 xfs_bmap_adjacent_valid( 3142 struct xfs_bmalloca *ap, 3143 xfs_fsblock_t x, 3144 xfs_fsblock_t y) 3145 { 3146 struct xfs_mount *mp = ap->ip->i_mount; 3147 3148 if (XFS_IS_REALTIME_INODE(ap->ip) && 3149 (ap->datatype & XFS_ALLOC_USERDATA)) { 3150 if (!xfs_has_rtgroups(mp)) 3151 return x < mp->m_sb.sb_rblocks; 3152 3153 return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) && 3154 xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount && 3155 xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents; 3156 3157 } 3158 3159 return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && 3160 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && 3161 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks; 3162 } 3163 3164 #define XFS_ALLOC_GAP_UNITS 4 3165 3166 /* returns true if ap->blkno was modified */ 3167 bool 3168 xfs_bmap_adjacent( 3169 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3170 { 3171 xfs_fsblock_t adjust; /* adjustment to block numbers */ 3172 3173 /* 3174 * If allocating at eof, and there's a previous real block, 3175 * try to use its last block as our starting point. 3176 */ 3177 if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && 3178 !isnullstartblock(ap->prev.br_startblock) && 3179 xfs_bmap_adjacent_valid(ap, 3180 ap->prev.br_startblock + ap->prev.br_blockcount, 3181 ap->prev.br_startblock)) { 3182 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; 3183 /* 3184 * Adjust for the gap between prevp and us. 3185 */ 3186 adjust = ap->offset - 3187 (ap->prev.br_startoff + ap->prev.br_blockcount); 3188 if (adjust && xfs_bmap_adjacent_valid(ap, ap->blkno + adjust, 3189 ap->prev.br_startblock)) 3190 ap->blkno += adjust; 3191 return true; 3192 } 3193 /* 3194 * If not at eof, then compare the two neighbor blocks. 3195 * Figure out whether either one gives us a good starting point, 3196 * and pick the better one. 3197 */ 3198 if (!ap->eof) { 3199 xfs_fsblock_t gotbno; /* right side block number */ 3200 xfs_fsblock_t gotdiff=0; /* right side difference */ 3201 xfs_fsblock_t prevbno; /* left side block number */ 3202 xfs_fsblock_t prevdiff=0; /* left side difference */ 3203 3204 /* 3205 * If there's a previous (left) block, select a requested 3206 * start block based on it. 3207 */ 3208 if (ap->prev.br_startoff != NULLFILEOFF && 3209 !isnullstartblock(ap->prev.br_startblock) && 3210 (prevbno = ap->prev.br_startblock + 3211 ap->prev.br_blockcount) && 3212 xfs_bmap_adjacent_valid(ap, prevbno, 3213 ap->prev.br_startblock)) { 3214 /* 3215 * Calculate gap to end of previous block. 3216 */ 3217 adjust = prevdiff = ap->offset - 3218 (ap->prev.br_startoff + 3219 ap->prev.br_blockcount); 3220 /* 3221 * Figure the startblock based on the previous block's 3222 * end and the gap size. 3223 * Heuristic! 3224 * If the gap is large relative to the piece we're 3225 * allocating, or using it gives us an invalid block 3226 * number, then just use the end of the previous block. 3227 */ 3228 if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3229 xfs_bmap_adjacent_valid(ap, prevbno + prevdiff, 3230 ap->prev.br_startblock)) 3231 prevbno += adjust; 3232 else 3233 prevdiff += adjust; 3234 } 3235 /* 3236 * No previous block or can't follow it, just default. 3237 */ 3238 else 3239 prevbno = NULLFSBLOCK; 3240 /* 3241 * If there's a following (right) block, select a requested 3242 * start block based on it. 3243 */ 3244 if (!isnullstartblock(ap->got.br_startblock)) { 3245 /* 3246 * Calculate gap to start of next block. 3247 */ 3248 adjust = gotdiff = ap->got.br_startoff - ap->offset; 3249 /* 3250 * Figure the startblock based on the next block's 3251 * start and the gap size. 3252 */ 3253 gotbno = ap->got.br_startblock; 3254 /* 3255 * Heuristic! 3256 * If the gap is large relative to the piece we're 3257 * allocating, or using it gives us an invalid block 3258 * number, then just use the start of the next block 3259 * offset by our length. 3260 */ 3261 if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3262 xfs_bmap_adjacent_valid(ap, gotbno - gotdiff, 3263 gotbno)) 3264 gotbno -= adjust; 3265 else if (xfs_bmap_adjacent_valid(ap, gotbno - ap->length, 3266 gotbno)) { 3267 gotbno -= ap->length; 3268 gotdiff += adjust - ap->length; 3269 } else 3270 gotdiff += adjust; 3271 } 3272 /* 3273 * No next block, just default. 3274 */ 3275 else 3276 gotbno = NULLFSBLOCK; 3277 /* 3278 * If both valid, pick the better one, else the only good 3279 * one, else ap->blkno is already set (to 0 or the inode block). 3280 */ 3281 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) { 3282 ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; 3283 return true; 3284 } 3285 if (prevbno != NULLFSBLOCK) { 3286 ap->blkno = prevbno; 3287 return true; 3288 } 3289 if (gotbno != NULLFSBLOCK) { 3290 ap->blkno = gotbno; 3291 return true; 3292 } 3293 } 3294 3295 return false; 3296 } 3297 3298 int 3299 xfs_bmap_longest_free_extent( 3300 struct xfs_perag *pag, 3301 struct xfs_trans *tp, 3302 xfs_extlen_t *blen) 3303 { 3304 xfs_extlen_t longest; 3305 int error = 0; 3306 3307 if (!xfs_perag_initialised_agf(pag)) { 3308 error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK, 3309 NULL); 3310 if (error) 3311 return error; 3312 } 3313 3314 longest = xfs_alloc_longest_free_extent(pag, 3315 xfs_alloc_min_freelist(pag_mount(pag), pag), 3316 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); 3317 if (*blen < longest) 3318 *blen = longest; 3319 3320 return 0; 3321 } 3322 3323 static xfs_extlen_t 3324 xfs_bmap_select_minlen( 3325 struct xfs_bmalloca *ap, 3326 struct xfs_alloc_arg *args, 3327 xfs_extlen_t blen) 3328 { 3329 3330 /* 3331 * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is 3332 * possible that there is enough contiguous free space for this request. 3333 */ 3334 if (blen < ap->minlen) 3335 return ap->minlen; 3336 3337 /* 3338 * If the best seen length is less than the request length, 3339 * use the best as the minimum, otherwise we've got the maxlen we 3340 * were asked for. 3341 */ 3342 if (blen < args->maxlen) 3343 return blen; 3344 return args->maxlen; 3345 } 3346 3347 static int 3348 xfs_bmap_btalloc_select_lengths( 3349 struct xfs_bmalloca *ap, 3350 struct xfs_alloc_arg *args, 3351 xfs_extlen_t *blen) 3352 { 3353 struct xfs_mount *mp = args->mp; 3354 struct xfs_perag *pag; 3355 xfs_agnumber_t agno, startag; 3356 int error = 0; 3357 3358 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { 3359 args->total = ap->minlen; 3360 args->minlen = ap->minlen; 3361 return 0; 3362 } 3363 3364 args->total = ap->total; 3365 startag = XFS_FSB_TO_AGNO(mp, ap->blkno); 3366 if (startag == NULLAGNUMBER) 3367 startag = 0; 3368 3369 *blen = 0; 3370 for_each_perag_wrap(mp, startag, agno, pag) { 3371 error = xfs_bmap_longest_free_extent(pag, args->tp, blen); 3372 if (error && error != -EAGAIN) 3373 break; 3374 error = 0; 3375 if (*blen >= args->maxlen) 3376 break; 3377 } 3378 if (pag) 3379 xfs_perag_rele(pag); 3380 3381 args->minlen = xfs_bmap_select_minlen(ap, args, *blen); 3382 return error; 3383 } 3384 3385 /* Update all inode and quota accounting for the allocation we just did. */ 3386 void 3387 xfs_bmap_alloc_account( 3388 struct xfs_bmalloca *ap) 3389 { 3390 bool isrt = XFS_IS_REALTIME_INODE(ap->ip) && 3391 !(ap->flags & XFS_BMAPI_ATTRFORK); 3392 uint fld; 3393 3394 if (ap->flags & XFS_BMAPI_COWFORK) { 3395 /* 3396 * COW fork blocks are in-core only and thus are treated as 3397 * in-core quota reservation (like delalloc blocks) even when 3398 * converted to real blocks. The quota reservation is not 3399 * accounted to disk until blocks are remapped to the data 3400 * fork. So if these blocks were previously delalloc, we 3401 * already have quota reservation and there's nothing to do 3402 * yet. 3403 */ 3404 if (ap->wasdel) { 3405 xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0); 3406 return; 3407 } 3408 3409 /* 3410 * Otherwise, we've allocated blocks in a hole. The transaction 3411 * has acquired in-core quota reservation for this extent. 3412 * Rather than account these as real blocks, however, we reduce 3413 * the transaction quota reservation based on the allocation. 3414 * This essentially transfers the transaction quota reservation 3415 * to that of a delalloc extent. 3416 */ 3417 ap->ip->i_delayed_blks += ap->length; 3418 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ? 3419 XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS, 3420 -(long)ap->length); 3421 return; 3422 } 3423 3424 /* data/attr fork only */ 3425 ap->ip->i_nblocks += ap->length; 3426 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3427 if (ap->wasdel) { 3428 ap->ip->i_delayed_blks -= ap->length; 3429 xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0); 3430 fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT; 3431 } else { 3432 fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; 3433 } 3434 3435 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length); 3436 } 3437 3438 static int 3439 xfs_bmap_compute_alignments( 3440 struct xfs_bmalloca *ap, 3441 struct xfs_alloc_arg *args) 3442 { 3443 struct xfs_mount *mp = args->mp; 3444 xfs_extlen_t align = 0; /* minimum allocation alignment */ 3445 int stripe_align = 0; 3446 3447 /* stripe alignment for allocation is determined by mount parameters */ 3448 if (mp->m_swidth && xfs_has_swalloc(mp)) 3449 stripe_align = mp->m_swidth; 3450 else if (mp->m_dalign) 3451 stripe_align = mp->m_dalign; 3452 3453 if (ap->flags & XFS_BMAPI_COWFORK) 3454 align = xfs_get_cowextsz_hint(ap->ip); 3455 else if (ap->datatype & XFS_ALLOC_USERDATA) 3456 align = xfs_get_extsz_hint(ap->ip); 3457 if (align) { 3458 if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, 3459 ap->eof, 0, ap->conv, &ap->offset, 3460 &ap->length)) 3461 ASSERT(0); 3462 ASSERT(ap->length); 3463 } 3464 3465 /* apply extent size hints if obtained earlier */ 3466 if (align) { 3467 args->prod = align; 3468 div_u64_rem(ap->offset, args->prod, &args->mod); 3469 if (args->mod) 3470 args->mod = args->prod - args->mod; 3471 } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) { 3472 args->prod = 1; 3473 args->mod = 0; 3474 } else { 3475 args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog; 3476 div_u64_rem(ap->offset, args->prod, &args->mod); 3477 if (args->mod) 3478 args->mod = args->prod - args->mod; 3479 } 3480 3481 return stripe_align; 3482 } 3483 3484 static void 3485 xfs_bmap_process_allocated_extent( 3486 struct xfs_bmalloca *ap, 3487 struct xfs_alloc_arg *args, 3488 xfs_fileoff_t orig_offset, 3489 xfs_extlen_t orig_length) 3490 { 3491 ap->blkno = args->fsbno; 3492 ap->length = args->len; 3493 /* 3494 * If the extent size hint is active, we tried to round the 3495 * caller's allocation request offset down to extsz and the 3496 * length up to another extsz boundary. If we found a free 3497 * extent we mapped it in starting at this new offset. If the 3498 * newly mapped space isn't long enough to cover any of the 3499 * range of offsets that was originally requested, move the 3500 * mapping up so that we can fill as much of the caller's 3501 * original request as possible. Free space is apparently 3502 * very fragmented so we're unlikely to be able to satisfy the 3503 * hints anyway. 3504 */ 3505 if (ap->length <= orig_length) 3506 ap->offset = orig_offset; 3507 else if (ap->offset + ap->length < orig_offset + orig_length) 3508 ap->offset = orig_offset + orig_length - ap->length; 3509 xfs_bmap_alloc_account(ap); 3510 } 3511 3512 static int 3513 xfs_bmap_exact_minlen_extent_alloc( 3514 struct xfs_bmalloca *ap, 3515 struct xfs_alloc_arg *args) 3516 { 3517 if (ap->minlen != 1) { 3518 args->fsbno = NULLFSBLOCK; 3519 return 0; 3520 } 3521 3522 args->alloc_minlen_only = 1; 3523 args->minlen = args->maxlen = ap->minlen; 3524 args->total = ap->total; 3525 3526 /* 3527 * Unlike the longest extent available in an AG, we don't track 3528 * the length of an AG's shortest extent. 3529 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and 3530 * hence we can afford to start traversing from the 0th AG since 3531 * we need not be concerned about a drop in performance in 3532 * "debug only" code paths. 3533 */ 3534 ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0); 3535 3536 /* 3537 * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG 3538 * iteration and then drops args->total to args->minlen, which might be 3539 * required to find an allocation for the transaction reservation when 3540 * the file system is very full. 3541 */ 3542 return xfs_bmap_btalloc_low_space(ap, args); 3543 } 3544 3545 /* 3546 * If we are not low on available data blocks and we are allocating at 3547 * EOF, optimise allocation for contiguous file extension and/or stripe 3548 * alignment of the new extent. 3549 * 3550 * NOTE: ap->aeof is only set if the allocation length is >= the 3551 * stripe unit and the allocation offset is at the end of file. 3552 */ 3553 static int 3554 xfs_bmap_btalloc_at_eof( 3555 struct xfs_bmalloca *ap, 3556 struct xfs_alloc_arg *args, 3557 xfs_extlen_t blen, 3558 int stripe_align, 3559 bool ag_only) 3560 { 3561 struct xfs_mount *mp = args->mp; 3562 struct xfs_perag *caller_pag = args->pag; 3563 int error; 3564 3565 /* 3566 * If there are already extents in the file, try an exact EOF block 3567 * allocation to extend the file as a contiguous extent. If that fails, 3568 * or it's the first allocation in a file, just try for a stripe aligned 3569 * allocation. 3570 */ 3571 if (ap->offset) { 3572 xfs_extlen_t nextminlen = 0; 3573 3574 /* 3575 * Compute the minlen+alignment for the next case. Set slop so 3576 * that the value of minlen+alignment+slop doesn't go up between 3577 * the calls. 3578 */ 3579 args->alignment = 1; 3580 if (blen > stripe_align && blen <= args->maxlen) 3581 nextminlen = blen - stripe_align; 3582 else 3583 nextminlen = args->minlen; 3584 if (nextminlen + stripe_align > args->minlen + 1) 3585 args->minalignslop = nextminlen + stripe_align - 3586 args->minlen - 1; 3587 else 3588 args->minalignslop = 0; 3589 3590 if (!caller_pag) 3591 args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno)); 3592 error = xfs_alloc_vextent_exact_bno(args, ap->blkno); 3593 if (!caller_pag) { 3594 xfs_perag_put(args->pag); 3595 args->pag = NULL; 3596 } 3597 if (error) 3598 return error; 3599 3600 if (args->fsbno != NULLFSBLOCK) 3601 return 0; 3602 /* 3603 * Exact allocation failed. Reset to try an aligned allocation 3604 * according to the original allocation specification. 3605 */ 3606 args->alignment = stripe_align; 3607 args->minlen = nextminlen; 3608 args->minalignslop = 0; 3609 } else { 3610 /* 3611 * Adjust minlen to try and preserve alignment if we 3612 * can't guarantee an aligned maxlen extent. 3613 */ 3614 args->alignment = stripe_align; 3615 if (blen > args->alignment && 3616 blen <= args->maxlen + args->alignment) 3617 args->minlen = blen - args->alignment; 3618 args->minalignslop = 0; 3619 } 3620 3621 if (ag_only) { 3622 error = xfs_alloc_vextent_near_bno(args, ap->blkno); 3623 } else { 3624 args->pag = NULL; 3625 error = xfs_alloc_vextent_start_ag(args, ap->blkno); 3626 ASSERT(args->pag == NULL); 3627 args->pag = caller_pag; 3628 } 3629 if (error) 3630 return error; 3631 3632 if (args->fsbno != NULLFSBLOCK) 3633 return 0; 3634 3635 /* 3636 * Allocation failed, so turn return the allocation args to their 3637 * original non-aligned state so the caller can proceed on allocation 3638 * failure as if this function was never called. 3639 */ 3640 args->alignment = 1; 3641 return 0; 3642 } 3643 3644 /* 3645 * We have failed multiple allocation attempts so now are in a low space 3646 * allocation situation. Try a locality first full filesystem minimum length 3647 * allocation whilst still maintaining necessary total block reservation 3648 * requirements. 3649 * 3650 * If that fails, we are now critically low on space, so perform a last resort 3651 * allocation attempt: no reserve, no locality, blocking, minimum length, full 3652 * filesystem free space scan. We also indicate to future allocations in this 3653 * transaction that we are critically low on space so they don't waste time on 3654 * allocation modes that are unlikely to succeed. 3655 */ 3656 int 3657 xfs_bmap_btalloc_low_space( 3658 struct xfs_bmalloca *ap, 3659 struct xfs_alloc_arg *args) 3660 { 3661 int error; 3662 3663 if (args->minlen > ap->minlen) { 3664 args->minlen = ap->minlen; 3665 error = xfs_alloc_vextent_start_ag(args, ap->blkno); 3666 if (error || args->fsbno != NULLFSBLOCK) 3667 return error; 3668 } 3669 3670 /* Last ditch attempt before failure is declared. */ 3671 args->total = ap->minlen; 3672 error = xfs_alloc_vextent_first_ag(args, 0); 3673 if (error) 3674 return error; 3675 ap->tp->t_flags |= XFS_TRANS_LOWMODE; 3676 return 0; 3677 } 3678 3679 static int 3680 xfs_bmap_btalloc_filestreams( 3681 struct xfs_bmalloca *ap, 3682 struct xfs_alloc_arg *args, 3683 int stripe_align) 3684 { 3685 xfs_extlen_t blen = 0; 3686 int error = 0; 3687 3688 3689 error = xfs_filestream_select_ag(ap, args, &blen); 3690 if (error) 3691 return error; 3692 ASSERT(args->pag); 3693 3694 /* 3695 * If we are in low space mode, then optimal allocation will fail so 3696 * prepare for minimal allocation and jump to the low space algorithm 3697 * immediately. 3698 */ 3699 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { 3700 args->minlen = ap->minlen; 3701 ASSERT(args->fsbno == NULLFSBLOCK); 3702 goto out_low_space; 3703 } 3704 3705 args->minlen = xfs_bmap_select_minlen(ap, args, blen); 3706 if (ap->aeof) 3707 error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align, 3708 true); 3709 3710 if (!error && args->fsbno == NULLFSBLOCK) 3711 error = xfs_alloc_vextent_near_bno(args, ap->blkno); 3712 3713 out_low_space: 3714 /* 3715 * We are now done with the perag reference for the filestreams 3716 * association provided by xfs_filestream_select_ag(). Release it now as 3717 * we've either succeeded, had a fatal error or we are out of space and 3718 * need to do a full filesystem scan for free space which will take it's 3719 * own references. 3720 */ 3721 xfs_perag_rele(args->pag); 3722 args->pag = NULL; 3723 if (error || args->fsbno != NULLFSBLOCK) 3724 return error; 3725 3726 return xfs_bmap_btalloc_low_space(ap, args); 3727 } 3728 3729 static int 3730 xfs_bmap_btalloc_best_length( 3731 struct xfs_bmalloca *ap, 3732 struct xfs_alloc_arg *args, 3733 int stripe_align) 3734 { 3735 xfs_extlen_t blen = 0; 3736 int error; 3737 3738 ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); 3739 xfs_bmap_adjacent(ap); 3740 3741 /* 3742 * Search for an allocation group with a single extent large enough for 3743 * the request. If one isn't found, then adjust the minimum allocation 3744 * size to the largest space found. 3745 */ 3746 error = xfs_bmap_btalloc_select_lengths(ap, args, &blen); 3747 if (error) 3748 return error; 3749 3750 /* 3751 * Don't attempt optimal EOF allocation if previous allocations barely 3752 * succeeded due to being near ENOSPC. It is highly unlikely we'll get 3753 * optimal or even aligned allocations in this case, so don't waste time 3754 * trying. 3755 */ 3756 if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) { 3757 error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align, 3758 false); 3759 if (error || args->fsbno != NULLFSBLOCK) 3760 return error; 3761 } 3762 3763 error = xfs_alloc_vextent_start_ag(args, ap->blkno); 3764 if (error || args->fsbno != NULLFSBLOCK) 3765 return error; 3766 3767 return xfs_bmap_btalloc_low_space(ap, args); 3768 } 3769 3770 static int 3771 xfs_bmap_btalloc( 3772 struct xfs_bmalloca *ap) 3773 { 3774 struct xfs_mount *mp = ap->ip->i_mount; 3775 struct xfs_alloc_arg args = { 3776 .tp = ap->tp, 3777 .mp = mp, 3778 .fsbno = NULLFSBLOCK, 3779 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, 3780 .minleft = ap->minleft, 3781 .wasdel = ap->wasdel, 3782 .resv = XFS_AG_RESV_NONE, 3783 .datatype = ap->datatype, 3784 .alignment = 1, 3785 .minalignslop = 0, 3786 }; 3787 xfs_fileoff_t orig_offset; 3788 xfs_extlen_t orig_length; 3789 int error; 3790 int stripe_align; 3791 3792 ASSERT(ap->length); 3793 orig_offset = ap->offset; 3794 orig_length = ap->length; 3795 3796 stripe_align = xfs_bmap_compute_alignments(ap, &args); 3797 3798 /* Trim the allocation back to the maximum an AG can fit. */ 3799 args.maxlen = min(ap->length, mp->m_ag_max_usable); 3800 3801 if (unlikely(XFS_TEST_ERROR(false, mp, 3802 XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) 3803 error = xfs_bmap_exact_minlen_extent_alloc(ap, &args); 3804 else if ((ap->datatype & XFS_ALLOC_USERDATA) && 3805 xfs_inode_is_filestream(ap->ip)) 3806 error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align); 3807 else 3808 error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align); 3809 if (error) 3810 return error; 3811 3812 if (args.fsbno != NULLFSBLOCK) { 3813 xfs_bmap_process_allocated_extent(ap, &args, orig_offset, 3814 orig_length); 3815 } else { 3816 ap->blkno = NULLFSBLOCK; 3817 ap->length = 0; 3818 } 3819 return 0; 3820 } 3821 3822 /* Trim extent to fit a logical block range. */ 3823 void 3824 xfs_trim_extent( 3825 struct xfs_bmbt_irec *irec, 3826 xfs_fileoff_t bno, 3827 xfs_filblks_t len) 3828 { 3829 xfs_fileoff_t distance; 3830 xfs_fileoff_t end = bno + len; 3831 3832 if (irec->br_startoff + irec->br_blockcount <= bno || 3833 irec->br_startoff >= end) { 3834 irec->br_blockcount = 0; 3835 return; 3836 } 3837 3838 if (irec->br_startoff < bno) { 3839 distance = bno - irec->br_startoff; 3840 if (isnullstartblock(irec->br_startblock)) 3841 irec->br_startblock = DELAYSTARTBLOCK; 3842 if (irec->br_startblock != DELAYSTARTBLOCK && 3843 irec->br_startblock != HOLESTARTBLOCK) 3844 irec->br_startblock += distance; 3845 irec->br_startoff += distance; 3846 irec->br_blockcount -= distance; 3847 } 3848 3849 if (end < irec->br_startoff + irec->br_blockcount) { 3850 distance = irec->br_startoff + irec->br_blockcount - end; 3851 irec->br_blockcount -= distance; 3852 } 3853 } 3854 3855 /* 3856 * Trim the returned map to the required bounds 3857 */ 3858 STATIC void 3859 xfs_bmapi_trim_map( 3860 struct xfs_bmbt_irec *mval, 3861 struct xfs_bmbt_irec *got, 3862 xfs_fileoff_t *bno, 3863 xfs_filblks_t len, 3864 xfs_fileoff_t obno, 3865 xfs_fileoff_t end, 3866 int n, 3867 uint32_t flags) 3868 { 3869 if ((flags & XFS_BMAPI_ENTIRE) || 3870 got->br_startoff + got->br_blockcount <= obno) { 3871 *mval = *got; 3872 if (isnullstartblock(got->br_startblock)) 3873 mval->br_startblock = DELAYSTARTBLOCK; 3874 return; 3875 } 3876 3877 if (obno > *bno) 3878 *bno = obno; 3879 ASSERT((*bno >= obno) || (n == 0)); 3880 ASSERT(*bno < end); 3881 mval->br_startoff = *bno; 3882 if (isnullstartblock(got->br_startblock)) 3883 mval->br_startblock = DELAYSTARTBLOCK; 3884 else 3885 mval->br_startblock = got->br_startblock + 3886 (*bno - got->br_startoff); 3887 /* 3888 * Return the minimum of what we got and what we asked for for 3889 * the length. We can use the len variable here because it is 3890 * modified below and we could have been there before coming 3891 * here if the first part of the allocation didn't overlap what 3892 * was asked for. 3893 */ 3894 mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, 3895 got->br_blockcount - (*bno - got->br_startoff)); 3896 mval->br_state = got->br_state; 3897 ASSERT(mval->br_blockcount <= len); 3898 return; 3899 } 3900 3901 /* 3902 * Update and validate the extent map to return 3903 */ 3904 STATIC void 3905 xfs_bmapi_update_map( 3906 struct xfs_bmbt_irec **map, 3907 xfs_fileoff_t *bno, 3908 xfs_filblks_t *len, 3909 xfs_fileoff_t obno, 3910 xfs_fileoff_t end, 3911 int *n, 3912 uint32_t flags) 3913 { 3914 xfs_bmbt_irec_t *mval = *map; 3915 3916 ASSERT((flags & XFS_BMAPI_ENTIRE) || 3917 ((mval->br_startoff + mval->br_blockcount) <= end)); 3918 ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || 3919 (mval->br_startoff < obno)); 3920 3921 *bno = mval->br_startoff + mval->br_blockcount; 3922 *len = end - *bno; 3923 if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { 3924 /* update previous map with new information */ 3925 ASSERT(mval->br_startblock == mval[-1].br_startblock); 3926 ASSERT(mval->br_blockcount > mval[-1].br_blockcount); 3927 ASSERT(mval->br_state == mval[-1].br_state); 3928 mval[-1].br_blockcount = mval->br_blockcount; 3929 mval[-1].br_state = mval->br_state; 3930 } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && 3931 mval[-1].br_startblock != DELAYSTARTBLOCK && 3932 mval[-1].br_startblock != HOLESTARTBLOCK && 3933 mval->br_startblock == mval[-1].br_startblock + 3934 mval[-1].br_blockcount && 3935 mval[-1].br_state == mval->br_state) { 3936 ASSERT(mval->br_startoff == 3937 mval[-1].br_startoff + mval[-1].br_blockcount); 3938 mval[-1].br_blockcount += mval->br_blockcount; 3939 } else if (*n > 0 && 3940 mval->br_startblock == DELAYSTARTBLOCK && 3941 mval[-1].br_startblock == DELAYSTARTBLOCK && 3942 mval->br_startoff == 3943 mval[-1].br_startoff + mval[-1].br_blockcount) { 3944 mval[-1].br_blockcount += mval->br_blockcount; 3945 mval[-1].br_state = mval->br_state; 3946 } else if (!((*n == 0) && 3947 ((mval->br_startoff + mval->br_blockcount) <= 3948 obno))) { 3949 mval++; 3950 (*n)++; 3951 } 3952 *map = mval; 3953 } 3954 3955 /* 3956 * Map file blocks to filesystem blocks without allocation. 3957 */ 3958 int 3959 xfs_bmapi_read( 3960 struct xfs_inode *ip, 3961 xfs_fileoff_t bno, 3962 xfs_filblks_t len, 3963 struct xfs_bmbt_irec *mval, 3964 int *nmap, 3965 uint32_t flags) 3966 { 3967 struct xfs_mount *mp = ip->i_mount; 3968 int whichfork = xfs_bmapi_whichfork(flags); 3969 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 3970 struct xfs_bmbt_irec got; 3971 xfs_fileoff_t obno; 3972 xfs_fileoff_t end; 3973 struct xfs_iext_cursor icur; 3974 int error; 3975 bool eof = false; 3976 int n = 0; 3977 3978 ASSERT(*nmap >= 1); 3979 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE))); 3980 xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); 3981 3982 if (WARN_ON_ONCE(!ifp)) { 3983 xfs_bmap_mark_sick(ip, whichfork); 3984 return -EFSCORRUPTED; 3985 } 3986 3987 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 3988 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 3989 xfs_bmap_mark_sick(ip, whichfork); 3990 return -EFSCORRUPTED; 3991 } 3992 3993 if (xfs_is_shutdown(mp)) 3994 return -EIO; 3995 3996 XFS_STATS_INC(mp, xs_blk_mapr); 3997 3998 error = xfs_iread_extents(NULL, ip, whichfork); 3999 if (error) 4000 return error; 4001 4002 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) 4003 eof = true; 4004 end = bno + len; 4005 obno = bno; 4006 4007 while (bno < end && n < *nmap) { 4008 /* Reading past eof, act as though there's a hole up to end. */ 4009 if (eof) 4010 got.br_startoff = end; 4011 if (got.br_startoff > bno) { 4012 /* Reading in a hole. */ 4013 mval->br_startoff = bno; 4014 mval->br_startblock = HOLESTARTBLOCK; 4015 mval->br_blockcount = 4016 XFS_FILBLKS_MIN(len, got.br_startoff - bno); 4017 mval->br_state = XFS_EXT_NORM; 4018 bno += mval->br_blockcount; 4019 len -= mval->br_blockcount; 4020 mval++; 4021 n++; 4022 continue; 4023 } 4024 4025 /* set up the extent map to return. */ 4026 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); 4027 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 4028 4029 /* If we're done, stop now. */ 4030 if (bno >= end || n >= *nmap) 4031 break; 4032 4033 /* Else go on to the next record. */ 4034 if (!xfs_iext_next_extent(ifp, &icur, &got)) 4035 eof = true; 4036 } 4037 *nmap = n; 4038 return 0; 4039 } 4040 4041 /* 4042 * Add a delayed allocation extent to an inode. Blocks are reserved from the 4043 * global pool and the extent inserted into the inode in-core extent tree. 4044 * 4045 * On entry, got refers to the first extent beyond the offset of the extent to 4046 * allocate or eof is specified if no such extent exists. On return, got refers 4047 * to the extent record that was inserted to the inode fork. 4048 * 4049 * Note that the allocated extent may have been merged with contiguous extents 4050 * during insertion into the inode fork. Thus, got does not reflect the current 4051 * state of the inode fork on return. If necessary, the caller can use lastx to 4052 * look up the updated record in the inode fork. 4053 */ 4054 int 4055 xfs_bmapi_reserve_delalloc( 4056 struct xfs_inode *ip, 4057 int whichfork, 4058 xfs_fileoff_t off, 4059 xfs_filblks_t len, 4060 xfs_filblks_t prealloc, 4061 struct xfs_bmbt_irec *got, 4062 struct xfs_iext_cursor *icur, 4063 int eof) 4064 { 4065 struct xfs_mount *mp = ip->i_mount; 4066 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4067 xfs_extlen_t alen; 4068 xfs_extlen_t indlen; 4069 uint64_t fdblocks; 4070 int error; 4071 xfs_fileoff_t aoff; 4072 bool use_cowextszhint = 4073 whichfork == XFS_COW_FORK && !prealloc; 4074 4075 retry: 4076 /* 4077 * Cap the alloc length. Keep track of prealloc so we know whether to 4078 * tag the inode before we return. 4079 */ 4080 aoff = off; 4081 alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN); 4082 if (!eof) 4083 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); 4084 if (prealloc && alen >= len) 4085 prealloc = alen - len; 4086 4087 /* 4088 * If we're targetting the COW fork but aren't creating a speculative 4089 * posteof preallocation, try to expand the reservation to align with 4090 * the COW extent size hint if there's sufficient free space. 4091 * 4092 * Unlike the data fork, the CoW cancellation functions will free all 4093 * the reservations at inactivation, so we don't require that every 4094 * delalloc reservation have a dirty pagecache. 4095 */ 4096 if (use_cowextszhint) { 4097 struct xfs_bmbt_irec prev; 4098 xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); 4099 4100 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 4101 prev.br_startoff = NULLFILEOFF; 4102 4103 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 4104 1, 0, &aoff, &alen); 4105 ASSERT(!error); 4106 } 4107 4108 /* 4109 * Make a transaction-less quota reservation for delayed allocation 4110 * blocks. This number gets adjusted later. We return if we haven't 4111 * allocated blocks already inside this loop. 4112 */ 4113 error = xfs_quota_reserve_blkres(ip, alen); 4114 if (error) 4115 goto out; 4116 4117 /* 4118 * Split changing sb for alen and indlen since they could be coming 4119 * from different places. 4120 */ 4121 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); 4122 ASSERT(indlen > 0); 4123 4124 fdblocks = indlen; 4125 if (XFS_IS_REALTIME_INODE(ip)) { 4126 error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen)); 4127 if (error) 4128 goto out_unreserve_quota; 4129 } else { 4130 fdblocks += alen; 4131 } 4132 4133 error = xfs_dec_fdblocks(mp, fdblocks, false); 4134 if (error) 4135 goto out_unreserve_frextents; 4136 4137 ip->i_delayed_blks += alen; 4138 xfs_mod_delalloc(ip, alen, indlen); 4139 4140 got->br_startoff = aoff; 4141 got->br_startblock = nullstartblock(indlen); 4142 got->br_blockcount = alen; 4143 got->br_state = XFS_EXT_NORM; 4144 4145 xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got); 4146 4147 /* 4148 * Tag the inode if blocks were preallocated. Note that COW fork 4149 * preallocation can occur at the start or end of the extent, even when 4150 * prealloc == 0, so we must also check the aligned offset and length. 4151 */ 4152 if (whichfork == XFS_DATA_FORK && prealloc) 4153 xfs_inode_set_eofblocks_tag(ip); 4154 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) 4155 xfs_inode_set_cowblocks_tag(ip); 4156 4157 return 0; 4158 4159 out_unreserve_frextents: 4160 if (XFS_IS_REALTIME_INODE(ip)) 4161 xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, alen)); 4162 out_unreserve_quota: 4163 if (XFS_IS_QUOTA_ON(mp)) 4164 xfs_quota_unreserve_blkres(ip, alen); 4165 out: 4166 if (error == -ENOSPC || error == -EDQUOT) { 4167 trace_xfs_delalloc_enospc(ip, off, len); 4168 4169 if (prealloc || use_cowextszhint) { 4170 /* retry without any preallocation */ 4171 use_cowextszhint = false; 4172 prealloc = 0; 4173 goto retry; 4174 } 4175 } 4176 return error; 4177 } 4178 4179 static int 4180 xfs_bmapi_allocate( 4181 struct xfs_bmalloca *bma) 4182 { 4183 struct xfs_mount *mp = bma->ip->i_mount; 4184 int whichfork = xfs_bmapi_whichfork(bma->flags); 4185 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 4186 int error; 4187 4188 ASSERT(bma->length > 0); 4189 ASSERT(bma->length <= XFS_MAX_BMBT_EXTLEN); 4190 4191 if (bma->flags & XFS_BMAPI_CONTIG) 4192 bma->minlen = bma->length; 4193 else 4194 bma->minlen = 1; 4195 4196 if (!(bma->flags & XFS_BMAPI_METADATA)) { 4197 /* 4198 * For the data and COW fork, the first data in the file is 4199 * treated differently to all other allocations. For the 4200 * attribute fork, we only need to ensure the allocated range 4201 * is not on the busy list. 4202 */ 4203 bma->datatype = XFS_ALLOC_NOBUSY; 4204 if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { 4205 bma->datatype |= XFS_ALLOC_USERDATA; 4206 if (bma->offset == 0) 4207 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; 4208 4209 if (mp->m_dalign && bma->length >= mp->m_dalign) { 4210 error = xfs_bmap_isaeof(bma, whichfork); 4211 if (error) 4212 return error; 4213 } 4214 } 4215 } 4216 4217 if ((bma->datatype & XFS_ALLOC_USERDATA) && 4218 XFS_IS_REALTIME_INODE(bma->ip)) 4219 error = xfs_bmap_rtalloc(bma); 4220 else 4221 error = xfs_bmap_btalloc(bma); 4222 if (error) 4223 return error; 4224 if (bma->blkno == NULLFSBLOCK) 4225 return -ENOSPC; 4226 4227 if (WARN_ON_ONCE(!xfs_valid_startblock(bma->ip, bma->blkno))) { 4228 xfs_bmap_mark_sick(bma->ip, whichfork); 4229 return -EFSCORRUPTED; 4230 } 4231 4232 if (bma->flags & XFS_BMAPI_ZERO) { 4233 error = xfs_zero_extent(bma->ip, bma->blkno, bma->length); 4234 if (error) 4235 return error; 4236 } 4237 4238 if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) 4239 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); 4240 /* 4241 * Bump the number of extents we've allocated 4242 * in this call. 4243 */ 4244 bma->nallocs++; 4245 4246 if (bma->cur && bma->wasdel) 4247 bma->cur->bc_flags |= XFS_BTREE_BMBT_WASDEL; 4248 4249 bma->got.br_startoff = bma->offset; 4250 bma->got.br_startblock = bma->blkno; 4251 bma->got.br_blockcount = bma->length; 4252 bma->got.br_state = XFS_EXT_NORM; 4253 4254 if (bma->flags & XFS_BMAPI_PREALLOC) 4255 bma->got.br_state = XFS_EXT_UNWRITTEN; 4256 4257 if (bma->wasdel) 4258 error = xfs_bmap_add_extent_delay_real(bma, whichfork); 4259 else 4260 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, 4261 whichfork, &bma->icur, &bma->cur, &bma->got, 4262 &bma->logflags, bma->flags); 4263 if (error) 4264 return error; 4265 4266 /* 4267 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real 4268 * or xfs_bmap_add_extent_hole_real might have merged it into one of 4269 * the neighbouring ones. 4270 */ 4271 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4272 4273 ASSERT(bma->got.br_startoff <= bma->offset); 4274 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= 4275 bma->offset + bma->length); 4276 ASSERT(bma->got.br_state == XFS_EXT_NORM || 4277 bma->got.br_state == XFS_EXT_UNWRITTEN); 4278 return 0; 4279 } 4280 4281 STATIC int 4282 xfs_bmapi_convert_unwritten( 4283 struct xfs_bmalloca *bma, 4284 struct xfs_bmbt_irec *mval, 4285 xfs_filblks_t len, 4286 uint32_t flags) 4287 { 4288 int whichfork = xfs_bmapi_whichfork(flags); 4289 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 4290 int tmp_logflags = 0; 4291 int error; 4292 4293 /* check if we need to do unwritten->real conversion */ 4294 if (mval->br_state == XFS_EXT_UNWRITTEN && 4295 (flags & XFS_BMAPI_PREALLOC)) 4296 return 0; 4297 4298 /* check if we need to do real->unwritten conversion */ 4299 if (mval->br_state == XFS_EXT_NORM && 4300 (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != 4301 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) 4302 return 0; 4303 4304 /* 4305 * Modify (by adding) the state flag, if writing. 4306 */ 4307 ASSERT(mval->br_blockcount <= len); 4308 if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) { 4309 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, 4310 bma->ip, whichfork); 4311 } 4312 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4313 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4314 4315 /* 4316 * Before insertion into the bmbt, zero the range being converted 4317 * if required. 4318 */ 4319 if (flags & XFS_BMAPI_ZERO) { 4320 error = xfs_zero_extent(bma->ip, mval->br_startblock, 4321 mval->br_blockcount); 4322 if (error) 4323 return error; 4324 } 4325 4326 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, 4327 &bma->icur, &bma->cur, mval, &tmp_logflags); 4328 /* 4329 * Log the inode core unconditionally in the unwritten extent conversion 4330 * path because the conversion might not have done so (e.g., if the 4331 * extent count hasn't changed). We need to make sure the inode is dirty 4332 * in the transaction for the sake of fsync(), even if nothing has 4333 * changed, because fsync() will not force the log for this transaction 4334 * unless it sees the inode pinned. 4335 * 4336 * Note: If we're only converting cow fork extents, there aren't 4337 * any on-disk updates to make, so we don't need to log anything. 4338 */ 4339 if (whichfork != XFS_COW_FORK) 4340 bma->logflags |= tmp_logflags | XFS_ILOG_CORE; 4341 if (error) 4342 return error; 4343 4344 /* 4345 * Update our extent pointer, given that 4346 * xfs_bmap_add_extent_unwritten_real might have merged it into one 4347 * of the neighbouring ones. 4348 */ 4349 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4350 4351 /* 4352 * We may have combined previously unwritten space with written space, 4353 * so generate another request. 4354 */ 4355 if (mval->br_blockcount < len) 4356 return -EAGAIN; 4357 return 0; 4358 } 4359 4360 xfs_extlen_t 4361 xfs_bmapi_minleft( 4362 struct xfs_trans *tp, 4363 struct xfs_inode *ip, 4364 int fork) 4365 { 4366 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, fork); 4367 4368 if (tp && tp->t_highest_agno != NULLAGNUMBER) 4369 return 0; 4370 if (ifp->if_format != XFS_DINODE_FMT_BTREE) 4371 return 1; 4372 return be16_to_cpu(ifp->if_broot->bb_level) + 1; 4373 } 4374 4375 /* 4376 * Log whatever the flags say, even if error. Otherwise we might miss detecting 4377 * a case where the data is changed, there's an error, and it's not logged so we 4378 * don't shutdown when we should. Don't bother logging extents/btree changes if 4379 * we converted to the other format. 4380 */ 4381 static void 4382 xfs_bmapi_finish( 4383 struct xfs_bmalloca *bma, 4384 int whichfork, 4385 int error) 4386 { 4387 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 4388 4389 if ((bma->logflags & xfs_ilog_fext(whichfork)) && 4390 ifp->if_format != XFS_DINODE_FMT_EXTENTS) 4391 bma->logflags &= ~xfs_ilog_fext(whichfork); 4392 else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) && 4393 ifp->if_format != XFS_DINODE_FMT_BTREE) 4394 bma->logflags &= ~xfs_ilog_fbroot(whichfork); 4395 4396 if (bma->logflags) 4397 xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags); 4398 if (bma->cur) 4399 xfs_btree_del_cursor(bma->cur, error); 4400 } 4401 4402 /* 4403 * Map file blocks to filesystem blocks, and allocate blocks or convert the 4404 * extent state if necessary. Details behaviour is controlled by the flags 4405 * parameter. Only allocates blocks from a single allocation group, to avoid 4406 * locking problems. 4407 * 4408 * Returns 0 on success and places the extent mappings in mval. nmaps is used 4409 * as an input/output parameter where the caller specifies the maximum number 4410 * of mappings that may be returned and xfs_bmapi_write passes back the number 4411 * of mappings (including existing mappings) it found. 4412 * 4413 * Returns a negative error code on failure, including -ENOSPC when it could not 4414 * allocate any blocks and -ENOSR when it did allocate blocks to convert a 4415 * delalloc range, but those blocks were before the passed in range. 4416 */ 4417 int 4418 xfs_bmapi_write( 4419 struct xfs_trans *tp, /* transaction pointer */ 4420 struct xfs_inode *ip, /* incore inode */ 4421 xfs_fileoff_t bno, /* starting file offs. mapped */ 4422 xfs_filblks_t len, /* length to map in file */ 4423 uint32_t flags, /* XFS_BMAPI_... */ 4424 xfs_extlen_t total, /* total blocks needed */ 4425 struct xfs_bmbt_irec *mval, /* output: map values */ 4426 int *nmap) /* i/o: mval size/count */ 4427 { 4428 struct xfs_bmalloca bma = { 4429 .tp = tp, 4430 .ip = ip, 4431 .total = total, 4432 }; 4433 struct xfs_mount *mp = ip->i_mount; 4434 int whichfork = xfs_bmapi_whichfork(flags); 4435 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4436 xfs_fileoff_t end; /* end of mapped file region */ 4437 bool eof = false; /* after the end of extents */ 4438 int error; /* error return */ 4439 int n; /* current extent index */ 4440 xfs_fileoff_t obno; /* old block number (offset) */ 4441 4442 #ifdef DEBUG 4443 xfs_fileoff_t orig_bno; /* original block number value */ 4444 int orig_flags; /* original flags arg value */ 4445 xfs_filblks_t orig_len; /* original value of len arg */ 4446 struct xfs_bmbt_irec *orig_mval; /* original value of mval */ 4447 int orig_nmap; /* original value of *nmap */ 4448 4449 orig_bno = bno; 4450 orig_len = len; 4451 orig_flags = flags; 4452 orig_mval = mval; 4453 orig_nmap = *nmap; 4454 #endif 4455 4456 ASSERT(*nmap >= 1); 4457 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); 4458 ASSERT(tp != NULL); 4459 ASSERT(len > 0); 4460 ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL); 4461 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 4462 ASSERT(!(flags & XFS_BMAPI_REMAP)); 4463 4464 /* zeroing is for currently only for data extents, not metadata */ 4465 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != 4466 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); 4467 /* 4468 * we can allocate unwritten extents or pre-zero allocated blocks, 4469 * but it makes no sense to do both at once. This would result in 4470 * zeroing the unwritten extent twice, but it still being an 4471 * unwritten extent.... 4472 */ 4473 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != 4474 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); 4475 4476 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 4477 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 4478 xfs_bmap_mark_sick(ip, whichfork); 4479 return -EFSCORRUPTED; 4480 } 4481 4482 if (xfs_is_shutdown(mp)) 4483 return -EIO; 4484 4485 XFS_STATS_INC(mp, xs_blk_mapw); 4486 4487 error = xfs_iread_extents(tp, ip, whichfork); 4488 if (error) 4489 goto error0; 4490 4491 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got)) 4492 eof = true; 4493 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4494 bma.prev.br_startoff = NULLFILEOFF; 4495 bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); 4496 4497 n = 0; 4498 end = bno + len; 4499 obno = bno; 4500 while (bno < end && n < *nmap) { 4501 bool need_alloc = false, wasdelay = false; 4502 4503 /* in hole or beyond EOF? */ 4504 if (eof || bma.got.br_startoff > bno) { 4505 /* 4506 * CoW fork conversions should /never/ hit EOF or 4507 * holes. There should always be something for us 4508 * to work on. 4509 */ 4510 ASSERT(!((flags & XFS_BMAPI_CONVERT) && 4511 (flags & XFS_BMAPI_COWFORK))); 4512 4513 need_alloc = true; 4514 } else if (isnullstartblock(bma.got.br_startblock)) { 4515 wasdelay = true; 4516 } 4517 4518 /* 4519 * First, deal with the hole before the allocated space 4520 * that we found, if any. 4521 */ 4522 if (need_alloc || wasdelay) { 4523 bma.eof = eof; 4524 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4525 bma.wasdel = wasdelay; 4526 bma.offset = bno; 4527 bma.flags = flags; 4528 4529 /* 4530 * There's a 32/64 bit type mismatch between the 4531 * allocation length request (which can be 64 bits in 4532 * length) and the bma length request, which is 4533 * xfs_extlen_t and therefore 32 bits. Hence we have to 4534 * be careful and do the min() using the larger type to 4535 * avoid overflows. 4536 */ 4537 bma.length = XFS_FILBLKS_MIN(len, XFS_MAX_BMBT_EXTLEN); 4538 4539 if (wasdelay) { 4540 bma.length = XFS_FILBLKS_MIN(bma.length, 4541 bma.got.br_blockcount - 4542 (bno - bma.got.br_startoff)); 4543 } else { 4544 if (!eof) 4545 bma.length = XFS_FILBLKS_MIN(bma.length, 4546 bma.got.br_startoff - bno); 4547 } 4548 4549 ASSERT(bma.length > 0); 4550 error = xfs_bmapi_allocate(&bma); 4551 if (error) { 4552 /* 4553 * If we already allocated space in a previous 4554 * iteration return what we go so far when 4555 * running out of space. 4556 */ 4557 if (error == -ENOSPC && bma.nallocs) 4558 break; 4559 goto error0; 4560 } 4561 4562 /* 4563 * If this is a CoW allocation, record the data in 4564 * the refcount btree for orphan recovery. 4565 */ 4566 if (whichfork == XFS_COW_FORK) 4567 xfs_refcount_alloc_cow_extent(tp, 4568 XFS_IS_REALTIME_INODE(ip), 4569 bma.blkno, bma.length); 4570 } 4571 4572 /* Deal with the allocated space we found. */ 4573 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, 4574 end, n, flags); 4575 4576 /* Execute unwritten extent conversion if necessary */ 4577 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); 4578 if (error == -EAGAIN) 4579 continue; 4580 if (error) 4581 goto error0; 4582 4583 /* update the extent map to return */ 4584 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 4585 4586 /* 4587 * If we're done, stop now. Stop when we've allocated 4588 * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise 4589 * the transaction may get too big. 4590 */ 4591 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) 4592 break; 4593 4594 /* Else go on to the next record. */ 4595 bma.prev = bma.got; 4596 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) 4597 eof = true; 4598 } 4599 4600 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, 4601 whichfork); 4602 if (error) 4603 goto error0; 4604 4605 ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE || 4606 ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork)); 4607 xfs_bmapi_finish(&bma, whichfork, 0); 4608 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, 4609 orig_nmap, n); 4610 4611 /* 4612 * When converting delayed allocations, xfs_bmapi_allocate ignores 4613 * the passed in bno and always converts from the start of the found 4614 * delalloc extent. 4615 * 4616 * To avoid a successful return with *nmap set to 0, return the magic 4617 * -ENOSR error code for this particular case so that the caller can 4618 * handle it. 4619 */ 4620 if (!n) { 4621 ASSERT(bma.nallocs >= *nmap); 4622 return -ENOSR; 4623 } 4624 *nmap = n; 4625 return 0; 4626 error0: 4627 xfs_bmapi_finish(&bma, whichfork, error); 4628 return error; 4629 } 4630 4631 /* 4632 * Convert an existing delalloc extent to real blocks based on file offset. This 4633 * attempts to allocate the entire delalloc extent and may require multiple 4634 * invocations to allocate the target offset if a large enough physical extent 4635 * is not available. 4636 */ 4637 static int 4638 xfs_bmapi_convert_one_delalloc( 4639 struct xfs_inode *ip, 4640 int whichfork, 4641 xfs_off_t offset, 4642 struct iomap *iomap, 4643 unsigned int *seq) 4644 { 4645 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4646 struct xfs_mount *mp = ip->i_mount; 4647 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 4648 struct xfs_bmalloca bma = { NULL }; 4649 uint16_t flags = 0; 4650 struct xfs_trans *tp; 4651 int error; 4652 4653 if (whichfork == XFS_COW_FORK) 4654 flags |= IOMAP_F_SHARED; 4655 4656 /* 4657 * Space for the extent and indirect blocks was reserved when the 4658 * delalloc extent was created so there's no need to do so here. 4659 */ 4660 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 4661 XFS_TRANS_RESERVE, &tp); 4662 if (error) 4663 return error; 4664 4665 xfs_ilock(ip, XFS_ILOCK_EXCL); 4666 xfs_trans_ijoin(tp, ip, 0); 4667 4668 error = xfs_iext_count_extend(tp, ip, whichfork, 4669 XFS_IEXT_ADD_NOSPLIT_CNT); 4670 if (error) 4671 goto out_trans_cancel; 4672 4673 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) || 4674 bma.got.br_startoff > offset_fsb) { 4675 /* 4676 * No extent found in the range we are trying to convert. This 4677 * should only happen for the COW fork, where another thread 4678 * might have moved the extent to the data fork in the meantime. 4679 */ 4680 WARN_ON_ONCE(whichfork != XFS_COW_FORK); 4681 error = -EAGAIN; 4682 goto out_trans_cancel; 4683 } 4684 4685 /* 4686 * If we find a real extent here we raced with another thread converting 4687 * the extent. Just return the real extent at this offset. 4688 */ 4689 if (!isnullstartblock(bma.got.br_startblock)) { 4690 xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, 4691 xfs_iomap_inode_sequence(ip, flags)); 4692 if (seq) 4693 *seq = READ_ONCE(ifp->if_seq); 4694 goto out_trans_cancel; 4695 } 4696 4697 bma.tp = tp; 4698 bma.ip = ip; 4699 bma.wasdel = true; 4700 bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); 4701 4702 /* 4703 * Always allocate convert from the start of the delalloc extent even if 4704 * that is outside the passed in range to create large contiguous 4705 * extents on disk. 4706 */ 4707 bma.offset = bma.got.br_startoff; 4708 bma.length = bma.got.br_blockcount; 4709 4710 /* 4711 * When we're converting the delalloc reservations backing dirty pages 4712 * in the page cache, we must be careful about how we create the new 4713 * extents: 4714 * 4715 * New CoW fork extents are created unwritten, turned into real extents 4716 * when we're about to write the data to disk, and mapped into the data 4717 * fork after the write finishes. End of story. 4718 * 4719 * New data fork extents must be mapped in as unwritten and converted 4720 * to real extents after the write succeeds to avoid exposing stale 4721 * disk contents if we crash. 4722 */ 4723 bma.flags = XFS_BMAPI_PREALLOC; 4724 if (whichfork == XFS_COW_FORK) 4725 bma.flags |= XFS_BMAPI_COWFORK; 4726 4727 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4728 bma.prev.br_startoff = NULLFILEOFF; 4729 4730 error = xfs_bmapi_allocate(&bma); 4731 if (error) 4732 goto out_finish; 4733 4734 XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); 4735 XFS_STATS_INC(mp, xs_xstrat_quick); 4736 4737 ASSERT(!isnullstartblock(bma.got.br_startblock)); 4738 xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, 4739 xfs_iomap_inode_sequence(ip, flags)); 4740 if (seq) 4741 *seq = READ_ONCE(ifp->if_seq); 4742 4743 if (whichfork == XFS_COW_FORK) 4744 xfs_refcount_alloc_cow_extent(tp, XFS_IS_REALTIME_INODE(ip), 4745 bma.blkno, bma.length); 4746 4747 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, 4748 whichfork); 4749 if (error) 4750 goto out_finish; 4751 4752 xfs_bmapi_finish(&bma, whichfork, 0); 4753 error = xfs_trans_commit(tp); 4754 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4755 return error; 4756 4757 out_finish: 4758 xfs_bmapi_finish(&bma, whichfork, error); 4759 out_trans_cancel: 4760 xfs_trans_cancel(tp); 4761 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4762 return error; 4763 } 4764 4765 /* 4766 * Pass in a dellalloc extent and convert it to real extents, return the real 4767 * extent that maps offset_fsb in iomap. 4768 */ 4769 int 4770 xfs_bmapi_convert_delalloc( 4771 struct xfs_inode *ip, 4772 int whichfork, 4773 loff_t offset, 4774 struct iomap *iomap, 4775 unsigned int *seq) 4776 { 4777 int error; 4778 4779 /* 4780 * Attempt to allocate whatever delalloc extent currently backs offset 4781 * and put the result into iomap. Allocate in a loop because it may 4782 * take several attempts to allocate real blocks for a contiguous 4783 * delalloc extent if free space is sufficiently fragmented. 4784 */ 4785 do { 4786 error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset, 4787 iomap, seq); 4788 if (error) 4789 return error; 4790 } while (iomap->offset + iomap->length <= offset); 4791 4792 return 0; 4793 } 4794 4795 int 4796 xfs_bmapi_remap( 4797 struct xfs_trans *tp, 4798 struct xfs_inode *ip, 4799 xfs_fileoff_t bno, 4800 xfs_filblks_t len, 4801 xfs_fsblock_t startblock, 4802 uint32_t flags) 4803 { 4804 struct xfs_mount *mp = ip->i_mount; 4805 struct xfs_ifork *ifp; 4806 struct xfs_btree_cur *cur = NULL; 4807 struct xfs_bmbt_irec got; 4808 struct xfs_iext_cursor icur; 4809 int whichfork = xfs_bmapi_whichfork(flags); 4810 int logflags = 0, error; 4811 4812 ifp = xfs_ifork_ptr(ip, whichfork); 4813 ASSERT(len > 0); 4814 ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN); 4815 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 4816 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC | 4817 XFS_BMAPI_NORMAP))); 4818 ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != 4819 (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); 4820 4821 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 4822 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 4823 xfs_bmap_mark_sick(ip, whichfork); 4824 return -EFSCORRUPTED; 4825 } 4826 4827 if (xfs_is_shutdown(mp)) 4828 return -EIO; 4829 4830 error = xfs_iread_extents(tp, ip, whichfork); 4831 if (error) 4832 return error; 4833 4834 if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 4835 /* make sure we only reflink into a hole. */ 4836 ASSERT(got.br_startoff > bno); 4837 ASSERT(got.br_startoff - bno >= len); 4838 } 4839 4840 ip->i_nblocks += len; 4841 ip->i_delayed_blks -= len; /* see xfs_bmap_defer_add */ 4842 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4843 4844 if (ifp->if_format == XFS_DINODE_FMT_BTREE) 4845 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 4846 4847 got.br_startoff = bno; 4848 got.br_startblock = startblock; 4849 got.br_blockcount = len; 4850 if (flags & XFS_BMAPI_PREALLOC) 4851 got.br_state = XFS_EXT_UNWRITTEN; 4852 else 4853 got.br_state = XFS_EXT_NORM; 4854 4855 error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur, 4856 &cur, &got, &logflags, flags); 4857 if (error) 4858 goto error0; 4859 4860 error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork); 4861 4862 error0: 4863 if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS) 4864 logflags &= ~XFS_ILOG_DEXT; 4865 else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE) 4866 logflags &= ~XFS_ILOG_DBROOT; 4867 4868 if (logflags) 4869 xfs_trans_log_inode(tp, ip, logflags); 4870 if (cur) 4871 xfs_btree_del_cursor(cur, error); 4872 return error; 4873 } 4874 4875 /* 4876 * When a delalloc extent is split (e.g., due to a hole punch), the original 4877 * indlen reservation must be shared across the two new extents that are left 4878 * behind. 4879 * 4880 * Given the original reservation and the worst case indlen for the two new 4881 * extents (as calculated by xfs_bmap_worst_indlen()), split the original 4882 * reservation fairly across the two new extents. If necessary, steal available 4883 * blocks from a deleted extent to make up a reservation deficiency (e.g., if 4884 * ores == 1). The number of stolen blocks is returned. The availability and 4885 * subsequent accounting of stolen blocks is the responsibility of the caller. 4886 */ 4887 static void 4888 xfs_bmap_split_indlen( 4889 xfs_filblks_t ores, /* original res. */ 4890 xfs_filblks_t *indlen1, /* ext1 worst indlen */ 4891 xfs_filblks_t *indlen2) /* ext2 worst indlen */ 4892 { 4893 xfs_filblks_t len1 = *indlen1; 4894 xfs_filblks_t len2 = *indlen2; 4895 xfs_filblks_t nres = len1 + len2; /* new total res. */ 4896 xfs_filblks_t resfactor; 4897 4898 /* 4899 * We can't meet the total required reservation for the two extents. 4900 * Calculate the percent of the overall shortage between both extents 4901 * and apply this percentage to each of the requested indlen values. 4902 * This distributes the shortage fairly and reduces the chances that one 4903 * of the two extents is left with nothing when extents are repeatedly 4904 * split. 4905 */ 4906 resfactor = (ores * 100); 4907 do_div(resfactor, nres); 4908 len1 *= resfactor; 4909 do_div(len1, 100); 4910 len2 *= resfactor; 4911 do_div(len2, 100); 4912 ASSERT(len1 + len2 <= ores); 4913 ASSERT(len1 < *indlen1 && len2 < *indlen2); 4914 4915 /* 4916 * Hand out the remainder to each extent. If one of the two reservations 4917 * is zero, we want to make sure that one gets a block first. The loop 4918 * below starts with len1, so hand len2 a block right off the bat if it 4919 * is zero. 4920 */ 4921 ores -= (len1 + len2); 4922 ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); 4923 if (ores && !len2 && *indlen2) { 4924 len2++; 4925 ores--; 4926 } 4927 while (ores) { 4928 if (len1 < *indlen1) { 4929 len1++; 4930 ores--; 4931 } 4932 if (!ores) 4933 break; 4934 if (len2 < *indlen2) { 4935 len2++; 4936 ores--; 4937 } 4938 } 4939 4940 *indlen1 = len1; 4941 *indlen2 = len2; 4942 } 4943 4944 void 4945 xfs_bmap_del_extent_delay( 4946 struct xfs_inode *ip, 4947 int whichfork, 4948 struct xfs_iext_cursor *icur, 4949 struct xfs_bmbt_irec *got, 4950 struct xfs_bmbt_irec *del) 4951 { 4952 struct xfs_mount *mp = ip->i_mount; 4953 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4954 struct xfs_bmbt_irec new; 4955 int64_t da_old, da_new, da_diff = 0; 4956 xfs_fileoff_t del_endoff, got_endoff; 4957 xfs_filblks_t got_indlen, new_indlen, stolen = 0; 4958 uint32_t state = xfs_bmap_fork_to_state(whichfork); 4959 uint64_t fdblocks; 4960 bool isrt; 4961 4962 XFS_STATS_INC(mp, xs_del_exlist); 4963 4964 isrt = xfs_ifork_is_realtime(ip, whichfork); 4965 del_endoff = del->br_startoff + del->br_blockcount; 4966 got_endoff = got->br_startoff + got->br_blockcount; 4967 da_old = startblockval(got->br_startblock); 4968 da_new = 0; 4969 4970 ASSERT(del->br_blockcount > 0); 4971 ASSERT(got->br_startoff <= del->br_startoff); 4972 ASSERT(got_endoff >= del_endoff); 4973 4974 /* 4975 * Update the inode delalloc counter now and wait to update the 4976 * sb counters as we might have to borrow some blocks for the 4977 * indirect block accounting. 4978 */ 4979 xfs_quota_unreserve_blkres(ip, del->br_blockcount); 4980 ip->i_delayed_blks -= del->br_blockcount; 4981 4982 if (got->br_startoff == del->br_startoff) 4983 state |= BMAP_LEFT_FILLING; 4984 if (got_endoff == del_endoff) 4985 state |= BMAP_RIGHT_FILLING; 4986 4987 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4988 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4989 /* 4990 * Matches the whole extent. Delete the entry. 4991 */ 4992 xfs_iext_remove(ip, icur, state); 4993 xfs_iext_prev(ifp, icur); 4994 break; 4995 case BMAP_LEFT_FILLING: 4996 /* 4997 * Deleting the first part of the extent. 4998 */ 4999 got->br_startoff = del_endoff; 5000 got->br_blockcount -= del->br_blockcount; 5001 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 5002 got->br_blockcount), da_old); 5003 got->br_startblock = nullstartblock((int)da_new); 5004 xfs_iext_update_extent(ip, state, icur, got); 5005 break; 5006 case BMAP_RIGHT_FILLING: 5007 /* 5008 * Deleting the last part of the extent. 5009 */ 5010 got->br_blockcount = got->br_blockcount - del->br_blockcount; 5011 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 5012 got->br_blockcount), da_old); 5013 got->br_startblock = nullstartblock((int)da_new); 5014 xfs_iext_update_extent(ip, state, icur, got); 5015 break; 5016 case 0: 5017 /* 5018 * Deleting the middle of the extent. 5019 * 5020 * Distribute the original indlen reservation across the two new 5021 * extents. Steal blocks from the deleted extent if necessary. 5022 * Stealing blocks simply fudges the fdblocks accounting below. 5023 * Warn if either of the new indlen reservations is zero as this 5024 * can lead to delalloc problems. 5025 */ 5026 got->br_blockcount = del->br_startoff - got->br_startoff; 5027 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); 5028 5029 new.br_blockcount = got_endoff - del_endoff; 5030 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); 5031 5032 WARN_ON_ONCE(!got_indlen || !new_indlen); 5033 /* 5034 * Steal as many blocks as we can to try and satisfy the worst 5035 * case indlen for both new extents. 5036 * 5037 * However, we can't just steal reservations from the data 5038 * blocks if this is an RT inodes as the data and metadata 5039 * blocks come from different pools. We'll have to live with 5040 * under-filled indirect reservation in this case. 5041 */ 5042 da_new = got_indlen + new_indlen; 5043 if (da_new > da_old && !isrt) { 5044 stolen = XFS_FILBLKS_MIN(da_new - da_old, 5045 del->br_blockcount); 5046 da_old += stolen; 5047 } 5048 if (da_new > da_old) 5049 xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen); 5050 da_new = got_indlen + new_indlen; 5051 5052 got->br_startblock = nullstartblock((int)got_indlen); 5053 5054 new.br_startoff = del_endoff; 5055 new.br_state = got->br_state; 5056 new.br_startblock = nullstartblock((int)new_indlen); 5057 5058 xfs_iext_update_extent(ip, state, icur, got); 5059 xfs_iext_next(ifp, icur); 5060 xfs_iext_insert(ip, icur, &new, state); 5061 5062 del->br_blockcount -= stolen; 5063 break; 5064 } 5065 5066 ASSERT(da_old >= da_new); 5067 da_diff = da_old - da_new; 5068 fdblocks = da_diff; 5069 5070 if (isrt) 5071 xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount)); 5072 else 5073 fdblocks += del->br_blockcount; 5074 5075 xfs_add_fdblocks(mp, fdblocks); 5076 xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff); 5077 } 5078 5079 void 5080 xfs_bmap_del_extent_cow( 5081 struct xfs_inode *ip, 5082 struct xfs_iext_cursor *icur, 5083 struct xfs_bmbt_irec *got, 5084 struct xfs_bmbt_irec *del) 5085 { 5086 struct xfs_mount *mp = ip->i_mount; 5087 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK); 5088 struct xfs_bmbt_irec new; 5089 xfs_fileoff_t del_endoff, got_endoff; 5090 uint32_t state = BMAP_COWFORK; 5091 5092 XFS_STATS_INC(mp, xs_del_exlist); 5093 5094 del_endoff = del->br_startoff + del->br_blockcount; 5095 got_endoff = got->br_startoff + got->br_blockcount; 5096 5097 ASSERT(del->br_blockcount > 0); 5098 ASSERT(got->br_startoff <= del->br_startoff); 5099 ASSERT(got_endoff >= del_endoff); 5100 ASSERT(!isnullstartblock(got->br_startblock)); 5101 5102 if (got->br_startoff == del->br_startoff) 5103 state |= BMAP_LEFT_FILLING; 5104 if (got_endoff == del_endoff) 5105 state |= BMAP_RIGHT_FILLING; 5106 5107 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 5108 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 5109 /* 5110 * Matches the whole extent. Delete the entry. 5111 */ 5112 xfs_iext_remove(ip, icur, state); 5113 xfs_iext_prev(ifp, icur); 5114 break; 5115 case BMAP_LEFT_FILLING: 5116 /* 5117 * Deleting the first part of the extent. 5118 */ 5119 got->br_startoff = del_endoff; 5120 got->br_blockcount -= del->br_blockcount; 5121 got->br_startblock = del->br_startblock + del->br_blockcount; 5122 xfs_iext_update_extent(ip, state, icur, got); 5123 break; 5124 case BMAP_RIGHT_FILLING: 5125 /* 5126 * Deleting the last part of the extent. 5127 */ 5128 got->br_blockcount -= del->br_blockcount; 5129 xfs_iext_update_extent(ip, state, icur, got); 5130 break; 5131 case 0: 5132 /* 5133 * Deleting the middle of the extent. 5134 */ 5135 got->br_blockcount = del->br_startoff - got->br_startoff; 5136 5137 new.br_startoff = del_endoff; 5138 new.br_blockcount = got_endoff - del_endoff; 5139 new.br_state = got->br_state; 5140 new.br_startblock = del->br_startblock + del->br_blockcount; 5141 5142 xfs_iext_update_extent(ip, state, icur, got); 5143 xfs_iext_next(ifp, icur); 5144 xfs_iext_insert(ip, icur, &new, state); 5145 break; 5146 } 5147 ip->i_delayed_blks -= del->br_blockcount; 5148 } 5149 5150 static int 5151 xfs_bmap_free_rtblocks( 5152 struct xfs_trans *tp, 5153 struct xfs_bmbt_irec *del) 5154 { 5155 struct xfs_rtgroup *rtg; 5156 int error; 5157 5158 rtg = xfs_rtgroup_grab(tp->t_mountp, 0); 5159 if (!rtg) 5160 return -EIO; 5161 5162 /* 5163 * Ensure the bitmap and summary inodes are locked and joined to the 5164 * transaction before modifying them. 5165 */ 5166 if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) { 5167 tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED; 5168 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP); 5169 xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP); 5170 } 5171 5172 error = xfs_rtfree_blocks(tp, rtg, del->br_startblock, 5173 del->br_blockcount); 5174 xfs_rtgroup_rele(rtg); 5175 return error; 5176 } 5177 5178 /* 5179 * Called by xfs_bmapi to update file extent records and the btree 5180 * after removing space. 5181 */ 5182 STATIC int /* error */ 5183 xfs_bmap_del_extent_real( 5184 xfs_inode_t *ip, /* incore inode pointer */ 5185 xfs_trans_t *tp, /* current transaction pointer */ 5186 struct xfs_iext_cursor *icur, 5187 struct xfs_btree_cur *cur, /* if null, not a btree */ 5188 xfs_bmbt_irec_t *del, /* data to remove from extents */ 5189 int *logflagsp, /* inode logging flags */ 5190 int whichfork, /* data or attr fork */ 5191 uint32_t bflags) /* bmapi flags */ 5192 { 5193 xfs_fsblock_t del_endblock=0; /* first block past del */ 5194 xfs_fileoff_t del_endoff; /* first offset past del */ 5195 int error = 0; /* error return value */ 5196 struct xfs_bmbt_irec got; /* current extent entry */ 5197 xfs_fileoff_t got_endoff; /* first offset past got */ 5198 int i; /* temp state */ 5199 struct xfs_ifork *ifp; /* inode fork pointer */ 5200 xfs_mount_t *mp; /* mount structure */ 5201 xfs_filblks_t nblks; /* quota/sb block count */ 5202 xfs_bmbt_irec_t new; /* new record to be inserted */ 5203 /* REFERENCED */ 5204 uint qfield; /* quota field to update */ 5205 uint32_t state = xfs_bmap_fork_to_state(whichfork); 5206 struct xfs_bmbt_irec old; 5207 5208 *logflagsp = 0; 5209 5210 mp = ip->i_mount; 5211 XFS_STATS_INC(mp, xs_del_exlist); 5212 5213 ifp = xfs_ifork_ptr(ip, whichfork); 5214 ASSERT(del->br_blockcount > 0); 5215 xfs_iext_get_extent(ifp, icur, &got); 5216 ASSERT(got.br_startoff <= del->br_startoff); 5217 del_endoff = del->br_startoff + del->br_blockcount; 5218 got_endoff = got.br_startoff + got.br_blockcount; 5219 ASSERT(got_endoff >= del_endoff); 5220 ASSERT(!isnullstartblock(got.br_startblock)); 5221 qfield = 0; 5222 5223 /* 5224 * If it's the case where the directory code is running with no block 5225 * reservation, and the deleted block is in the middle of its extent, 5226 * and the resulting insert of an extent would cause transformation to 5227 * btree format, then reject it. The calling code will then swap blocks 5228 * around instead. We have to do this now, rather than waiting for the 5229 * conversion to btree format, since the transaction will be dirty then. 5230 */ 5231 if (tp->t_blk_res == 0 && 5232 ifp->if_format == XFS_DINODE_FMT_EXTENTS && 5233 ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) && 5234 del->br_startoff > got.br_startoff && del_endoff < got_endoff) 5235 return -ENOSPC; 5236 5237 *logflagsp = XFS_ILOG_CORE; 5238 if (xfs_ifork_is_realtime(ip, whichfork)) 5239 qfield = XFS_TRANS_DQ_RTBCOUNT; 5240 else 5241 qfield = XFS_TRANS_DQ_BCOUNT; 5242 nblks = del->br_blockcount; 5243 5244 del_endblock = del->br_startblock + del->br_blockcount; 5245 if (cur) { 5246 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5247 if (error) 5248 return error; 5249 if (XFS_IS_CORRUPT(mp, i != 1)) { 5250 xfs_btree_mark_sick(cur); 5251 return -EFSCORRUPTED; 5252 } 5253 } 5254 5255 if (got.br_startoff == del->br_startoff) 5256 state |= BMAP_LEFT_FILLING; 5257 if (got_endoff == del_endoff) 5258 state |= BMAP_RIGHT_FILLING; 5259 5260 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 5261 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 5262 /* 5263 * Matches the whole extent. Delete the entry. 5264 */ 5265 xfs_iext_remove(ip, icur, state); 5266 xfs_iext_prev(ifp, icur); 5267 ifp->if_nextents--; 5268 5269 *logflagsp |= XFS_ILOG_CORE; 5270 if (!cur) { 5271 *logflagsp |= xfs_ilog_fext(whichfork); 5272 break; 5273 } 5274 if ((error = xfs_btree_delete(cur, &i))) 5275 return error; 5276 if (XFS_IS_CORRUPT(mp, i != 1)) { 5277 xfs_btree_mark_sick(cur); 5278 return -EFSCORRUPTED; 5279 } 5280 break; 5281 case BMAP_LEFT_FILLING: 5282 /* 5283 * Deleting the first part of the extent. 5284 */ 5285 got.br_startoff = del_endoff; 5286 got.br_startblock = del_endblock; 5287 got.br_blockcount -= del->br_blockcount; 5288 xfs_iext_update_extent(ip, state, icur, &got); 5289 if (!cur) { 5290 *logflagsp |= xfs_ilog_fext(whichfork); 5291 break; 5292 } 5293 error = xfs_bmbt_update(cur, &got); 5294 if (error) 5295 return error; 5296 break; 5297 case BMAP_RIGHT_FILLING: 5298 /* 5299 * Deleting the last part of the extent. 5300 */ 5301 got.br_blockcount -= del->br_blockcount; 5302 xfs_iext_update_extent(ip, state, icur, &got); 5303 if (!cur) { 5304 *logflagsp |= xfs_ilog_fext(whichfork); 5305 break; 5306 } 5307 error = xfs_bmbt_update(cur, &got); 5308 if (error) 5309 return error; 5310 break; 5311 case 0: 5312 /* 5313 * Deleting the middle of the extent. 5314 */ 5315 5316 old = got; 5317 5318 got.br_blockcount = del->br_startoff - got.br_startoff; 5319 xfs_iext_update_extent(ip, state, icur, &got); 5320 5321 new.br_startoff = del_endoff; 5322 new.br_blockcount = got_endoff - del_endoff; 5323 new.br_state = got.br_state; 5324 new.br_startblock = del_endblock; 5325 5326 *logflagsp |= XFS_ILOG_CORE; 5327 if (cur) { 5328 error = xfs_bmbt_update(cur, &got); 5329 if (error) 5330 return error; 5331 error = xfs_btree_increment(cur, 0, &i); 5332 if (error) 5333 return error; 5334 cur->bc_rec.b = new; 5335 error = xfs_btree_insert(cur, &i); 5336 if (error && error != -ENOSPC) 5337 return error; 5338 /* 5339 * If get no-space back from btree insert, it tried a 5340 * split, and we have a zero block reservation. Fix up 5341 * our state and return the error. 5342 */ 5343 if (error == -ENOSPC) { 5344 /* 5345 * Reset the cursor, don't trust it after any 5346 * insert operation. 5347 */ 5348 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5349 if (error) 5350 return error; 5351 if (XFS_IS_CORRUPT(mp, i != 1)) { 5352 xfs_btree_mark_sick(cur); 5353 return -EFSCORRUPTED; 5354 } 5355 /* 5356 * Update the btree record back 5357 * to the original value. 5358 */ 5359 error = xfs_bmbt_update(cur, &old); 5360 if (error) 5361 return error; 5362 /* 5363 * Reset the extent record back 5364 * to the original value. 5365 */ 5366 xfs_iext_update_extent(ip, state, icur, &old); 5367 *logflagsp = 0; 5368 return -ENOSPC; 5369 } 5370 if (XFS_IS_CORRUPT(mp, i != 1)) { 5371 xfs_btree_mark_sick(cur); 5372 return -EFSCORRUPTED; 5373 } 5374 } else 5375 *logflagsp |= xfs_ilog_fext(whichfork); 5376 5377 ifp->if_nextents++; 5378 xfs_iext_next(ifp, icur); 5379 xfs_iext_insert(ip, icur, &new, state); 5380 break; 5381 } 5382 5383 /* remove reverse mapping */ 5384 xfs_rmap_unmap_extent(tp, ip, whichfork, del); 5385 5386 /* 5387 * If we need to, add to list of extents to delete. 5388 */ 5389 if (!(bflags & XFS_BMAPI_REMAP)) { 5390 bool isrt = xfs_ifork_is_realtime(ip, whichfork); 5391 5392 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { 5393 xfs_refcount_decrease_extent(tp, isrt, del); 5394 } else if (isrt && !xfs_has_rtgroups(mp)) { 5395 error = xfs_bmap_free_rtblocks(tp, del); 5396 } else { 5397 unsigned int efi_flags = 0; 5398 5399 if ((bflags & XFS_BMAPI_NODISCARD) || 5400 del->br_state == XFS_EXT_UNWRITTEN) 5401 efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD; 5402 5403 /* 5404 * Historically, we did not use EFIs to free realtime 5405 * extents. However, when reverse mapping is enabled, 5406 * we must maintain the same order of operations as the 5407 * data device, which is: Remove the file mapping, 5408 * remove the reverse mapping, and then free the 5409 * blocks. Reflink for realtime volumes requires the 5410 * same sort of ordering. Both features rely on 5411 * rtgroups, so let's gate rt EFI usage on rtgroups. 5412 */ 5413 if (isrt) 5414 efi_flags |= XFS_FREE_EXTENT_REALTIME; 5415 5416 error = xfs_free_extent_later(tp, del->br_startblock, 5417 del->br_blockcount, NULL, 5418 XFS_AG_RESV_NONE, efi_flags); 5419 } 5420 if (error) 5421 return error; 5422 } 5423 5424 /* 5425 * Adjust inode # blocks in the file. 5426 */ 5427 if (nblks) 5428 ip->i_nblocks -= nblks; 5429 /* 5430 * Adjust quota data. 5431 */ 5432 if (qfield && !(bflags & XFS_BMAPI_REMAP)) 5433 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); 5434 5435 return 0; 5436 } 5437 5438 /* 5439 * Unmap (remove) blocks from a file. 5440 * If nexts is nonzero then the number of extents to remove is limited to 5441 * that value. If not all extents in the block range can be removed then 5442 * *done is set. 5443 */ 5444 static int 5445 __xfs_bunmapi( 5446 struct xfs_trans *tp, /* transaction pointer */ 5447 struct xfs_inode *ip, /* incore inode */ 5448 xfs_fileoff_t start, /* first file offset deleted */ 5449 xfs_filblks_t *rlen, /* i/o: amount remaining */ 5450 uint32_t flags, /* misc flags */ 5451 xfs_extnum_t nexts) /* number of extents max */ 5452 { 5453 struct xfs_btree_cur *cur; /* bmap btree cursor */ 5454 struct xfs_bmbt_irec del; /* extent being deleted */ 5455 int error; /* error return value */ 5456 xfs_extnum_t extno; /* extent number in list */ 5457 struct xfs_bmbt_irec got; /* current extent record */ 5458 struct xfs_ifork *ifp; /* inode fork pointer */ 5459 int isrt; /* freeing in rt area */ 5460 int logflags; /* transaction logging flags */ 5461 xfs_extlen_t mod; /* rt extent offset */ 5462 struct xfs_mount *mp = ip->i_mount; 5463 int tmp_logflags; /* partial logging flags */ 5464 int wasdel; /* was a delayed alloc extent */ 5465 int whichfork; /* data or attribute fork */ 5466 xfs_filblks_t len = *rlen; /* length to unmap in file */ 5467 xfs_fileoff_t end; 5468 struct xfs_iext_cursor icur; 5469 bool done = false; 5470 5471 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_); 5472 5473 whichfork = xfs_bmapi_whichfork(flags); 5474 ASSERT(whichfork != XFS_COW_FORK); 5475 ifp = xfs_ifork_ptr(ip, whichfork); 5476 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) { 5477 xfs_bmap_mark_sick(ip, whichfork); 5478 return -EFSCORRUPTED; 5479 } 5480 if (xfs_is_shutdown(mp)) 5481 return -EIO; 5482 5483 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 5484 ASSERT(len > 0); 5485 ASSERT(nexts >= 0); 5486 5487 error = xfs_iread_extents(tp, ip, whichfork); 5488 if (error) 5489 return error; 5490 5491 if (xfs_iext_count(ifp) == 0) { 5492 *rlen = 0; 5493 return 0; 5494 } 5495 XFS_STATS_INC(mp, xs_blk_unmap); 5496 isrt = xfs_ifork_is_realtime(ip, whichfork); 5497 end = start + len; 5498 5499 if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { 5500 *rlen = 0; 5501 return 0; 5502 } 5503 end--; 5504 5505 logflags = 0; 5506 if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 5507 ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); 5508 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5509 } else 5510 cur = NULL; 5511 5512 extno = 0; 5513 while (end != (xfs_fileoff_t)-1 && end >= start && 5514 (nexts == 0 || extno < nexts)) { 5515 /* 5516 * Is the found extent after a hole in which end lives? 5517 * Just back up to the previous extent, if so. 5518 */ 5519 if (got.br_startoff > end && 5520 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5521 done = true; 5522 break; 5523 } 5524 /* 5525 * Is the last block of this extent before the range 5526 * we're supposed to delete? If so, we're done. 5527 */ 5528 end = XFS_FILEOFF_MIN(end, 5529 got.br_startoff + got.br_blockcount - 1); 5530 if (end < start) 5531 break; 5532 /* 5533 * Then deal with the (possibly delayed) allocated space 5534 * we found. 5535 */ 5536 del = got; 5537 wasdel = isnullstartblock(del.br_startblock); 5538 5539 if (got.br_startoff < start) { 5540 del.br_startoff = start; 5541 del.br_blockcount -= start - got.br_startoff; 5542 if (!wasdel) 5543 del.br_startblock += start - got.br_startoff; 5544 } 5545 if (del.br_startoff + del.br_blockcount > end + 1) 5546 del.br_blockcount = end + 1 - del.br_startoff; 5547 5548 if (!isrt || (flags & XFS_BMAPI_REMAP)) 5549 goto delete; 5550 5551 mod = xfs_rtb_to_rtxoff(mp, 5552 del.br_startblock + del.br_blockcount); 5553 if (mod) { 5554 /* 5555 * Realtime extent not lined up at the end. 5556 * The extent could have been split into written 5557 * and unwritten pieces, or we could just be 5558 * unmapping part of it. But we can't really 5559 * get rid of part of a realtime extent. 5560 */ 5561 if (del.br_state == XFS_EXT_UNWRITTEN) { 5562 /* 5563 * This piece is unwritten, or we're not 5564 * using unwritten extents. Skip over it. 5565 */ 5566 ASSERT((flags & XFS_BMAPI_REMAP) || end >= mod); 5567 end -= mod > del.br_blockcount ? 5568 del.br_blockcount : mod; 5569 if (end < got.br_startoff && 5570 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5571 done = true; 5572 break; 5573 } 5574 continue; 5575 } 5576 /* 5577 * It's written, turn it unwritten. 5578 * This is better than zeroing it. 5579 */ 5580 ASSERT(del.br_state == XFS_EXT_NORM); 5581 ASSERT(tp->t_blk_res > 0); 5582 /* 5583 * If this spans a realtime extent boundary, 5584 * chop it back to the start of the one we end at. 5585 */ 5586 if (del.br_blockcount > mod) { 5587 del.br_startoff += del.br_blockcount - mod; 5588 del.br_startblock += del.br_blockcount - mod; 5589 del.br_blockcount = mod; 5590 } 5591 del.br_state = XFS_EXT_UNWRITTEN; 5592 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5593 whichfork, &icur, &cur, &del, 5594 &logflags); 5595 if (error) 5596 goto error0; 5597 goto nodelete; 5598 } 5599 5600 mod = xfs_rtb_to_rtxoff(mp, del.br_startblock); 5601 if (mod) { 5602 xfs_extlen_t off = mp->m_sb.sb_rextsize - mod; 5603 5604 /* 5605 * Realtime extent is lined up at the end but not 5606 * at the front. We'll get rid of full extents if 5607 * we can. 5608 */ 5609 if (del.br_blockcount > off) { 5610 del.br_blockcount -= off; 5611 del.br_startoff += off; 5612 del.br_startblock += off; 5613 } else if (del.br_startoff == start && 5614 (del.br_state == XFS_EXT_UNWRITTEN || 5615 tp->t_blk_res == 0)) { 5616 /* 5617 * Can't make it unwritten. There isn't 5618 * a full extent here so just skip it. 5619 */ 5620 ASSERT(end >= del.br_blockcount); 5621 end -= del.br_blockcount; 5622 if (got.br_startoff > end && 5623 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5624 done = true; 5625 break; 5626 } 5627 continue; 5628 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5629 struct xfs_bmbt_irec prev; 5630 xfs_fileoff_t unwrite_start; 5631 5632 /* 5633 * This one is already unwritten. 5634 * It must have a written left neighbor. 5635 * Unwrite the killed part of that one and 5636 * try again. 5637 */ 5638 if (!xfs_iext_prev_extent(ifp, &icur, &prev)) 5639 ASSERT(0); 5640 ASSERT(prev.br_state == XFS_EXT_NORM); 5641 ASSERT(!isnullstartblock(prev.br_startblock)); 5642 ASSERT(del.br_startblock == 5643 prev.br_startblock + prev.br_blockcount); 5644 unwrite_start = max3(start, 5645 del.br_startoff - mod, 5646 prev.br_startoff); 5647 mod = unwrite_start - prev.br_startoff; 5648 prev.br_startoff = unwrite_start; 5649 prev.br_startblock += mod; 5650 prev.br_blockcount -= mod; 5651 prev.br_state = XFS_EXT_UNWRITTEN; 5652 error = xfs_bmap_add_extent_unwritten_real(tp, 5653 ip, whichfork, &icur, &cur, 5654 &prev, &logflags); 5655 if (error) 5656 goto error0; 5657 goto nodelete; 5658 } else { 5659 ASSERT(del.br_state == XFS_EXT_NORM); 5660 del.br_state = XFS_EXT_UNWRITTEN; 5661 error = xfs_bmap_add_extent_unwritten_real(tp, 5662 ip, whichfork, &icur, &cur, 5663 &del, &logflags); 5664 if (error) 5665 goto error0; 5666 goto nodelete; 5667 } 5668 } 5669 5670 delete: 5671 if (wasdel) { 5672 xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del); 5673 } else { 5674 error = xfs_bmap_del_extent_real(ip, tp, &icur, cur, 5675 &del, &tmp_logflags, whichfork, 5676 flags); 5677 logflags |= tmp_logflags; 5678 if (error) 5679 goto error0; 5680 } 5681 5682 end = del.br_startoff - 1; 5683 nodelete: 5684 /* 5685 * If not done go on to the next (previous) record. 5686 */ 5687 if (end != (xfs_fileoff_t)-1 && end >= start) { 5688 if (!xfs_iext_get_extent(ifp, &icur, &got) || 5689 (got.br_startoff > end && 5690 !xfs_iext_prev_extent(ifp, &icur, &got))) { 5691 done = true; 5692 break; 5693 } 5694 extno++; 5695 } 5696 } 5697 if (done || end == (xfs_fileoff_t)-1 || end < start) 5698 *rlen = 0; 5699 else 5700 *rlen = end - start + 1; 5701 5702 /* 5703 * Convert to a btree if necessary. 5704 */ 5705 if (xfs_bmap_needs_btree(ip, whichfork)) { 5706 ASSERT(cur == NULL); 5707 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, 5708 &tmp_logflags, whichfork); 5709 logflags |= tmp_logflags; 5710 } else { 5711 error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, 5712 whichfork); 5713 } 5714 5715 error0: 5716 /* 5717 * Log everything. Do this after conversion, there's no point in 5718 * logging the extent records if we've converted to btree format. 5719 */ 5720 if ((logflags & xfs_ilog_fext(whichfork)) && 5721 ifp->if_format != XFS_DINODE_FMT_EXTENTS) 5722 logflags &= ~xfs_ilog_fext(whichfork); 5723 else if ((logflags & xfs_ilog_fbroot(whichfork)) && 5724 ifp->if_format != XFS_DINODE_FMT_BTREE) 5725 logflags &= ~xfs_ilog_fbroot(whichfork); 5726 /* 5727 * Log inode even in the error case, if the transaction 5728 * is dirty we'll need to shut down the filesystem. 5729 */ 5730 if (logflags) 5731 xfs_trans_log_inode(tp, ip, logflags); 5732 if (cur) { 5733 if (!error) 5734 cur->bc_bmap.allocated = 0; 5735 xfs_btree_del_cursor(cur, error); 5736 } 5737 return error; 5738 } 5739 5740 /* Unmap a range of a file. */ 5741 int 5742 xfs_bunmapi( 5743 xfs_trans_t *tp, 5744 struct xfs_inode *ip, 5745 xfs_fileoff_t bno, 5746 xfs_filblks_t len, 5747 uint32_t flags, 5748 xfs_extnum_t nexts, 5749 int *done) 5750 { 5751 int error; 5752 5753 error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts); 5754 *done = (len == 0); 5755 return error; 5756 } 5757 5758 /* 5759 * Determine whether an extent shift can be accomplished by a merge with the 5760 * extent that precedes the target hole of the shift. 5761 */ 5762 STATIC bool 5763 xfs_bmse_can_merge( 5764 struct xfs_inode *ip, 5765 int whichfork, 5766 struct xfs_bmbt_irec *left, /* preceding extent */ 5767 struct xfs_bmbt_irec *got, /* current extent to shift */ 5768 xfs_fileoff_t shift) /* shift fsb */ 5769 { 5770 xfs_fileoff_t startoff; 5771 5772 startoff = got->br_startoff - shift; 5773 5774 /* 5775 * The extent, once shifted, must be adjacent in-file and on-disk with 5776 * the preceding extent. 5777 */ 5778 if ((left->br_startoff + left->br_blockcount != startoff) || 5779 (left->br_startblock + left->br_blockcount != got->br_startblock) || 5780 (left->br_state != got->br_state) || 5781 (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) || 5782 !xfs_bmap_same_rtgroup(ip, whichfork, left, got)) 5783 return false; 5784 5785 return true; 5786 } 5787 5788 /* 5789 * A bmap extent shift adjusts the file offset of an extent to fill a preceding 5790 * hole in the file. If an extent shift would result in the extent being fully 5791 * adjacent to the extent that currently precedes the hole, we can merge with 5792 * the preceding extent rather than do the shift. 5793 * 5794 * This function assumes the caller has verified a shift-by-merge is possible 5795 * with the provided extents via xfs_bmse_can_merge(). 5796 */ 5797 STATIC int 5798 xfs_bmse_merge( 5799 struct xfs_trans *tp, 5800 struct xfs_inode *ip, 5801 int whichfork, 5802 xfs_fileoff_t shift, /* shift fsb */ 5803 struct xfs_iext_cursor *icur, 5804 struct xfs_bmbt_irec *got, /* extent to shift */ 5805 struct xfs_bmbt_irec *left, /* preceding extent */ 5806 struct xfs_btree_cur *cur, 5807 int *logflags) /* output */ 5808 { 5809 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 5810 struct xfs_bmbt_irec new; 5811 xfs_filblks_t blockcount; 5812 int error, i; 5813 struct xfs_mount *mp = ip->i_mount; 5814 5815 blockcount = left->br_blockcount + got->br_blockcount; 5816 5817 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 5818 ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift)); 5819 5820 new = *left; 5821 new.br_blockcount = blockcount; 5822 5823 /* 5824 * Update the on-disk extent count, the btree if necessary and log the 5825 * inode. 5826 */ 5827 ifp->if_nextents--; 5828 *logflags |= XFS_ILOG_CORE; 5829 if (!cur) { 5830 *logflags |= XFS_ILOG_DEXT; 5831 goto done; 5832 } 5833 5834 /* lookup and remove the extent to merge */ 5835 error = xfs_bmbt_lookup_eq(cur, got, &i); 5836 if (error) 5837 return error; 5838 if (XFS_IS_CORRUPT(mp, i != 1)) { 5839 xfs_btree_mark_sick(cur); 5840 return -EFSCORRUPTED; 5841 } 5842 5843 error = xfs_btree_delete(cur, &i); 5844 if (error) 5845 return error; 5846 if (XFS_IS_CORRUPT(mp, i != 1)) { 5847 xfs_btree_mark_sick(cur); 5848 return -EFSCORRUPTED; 5849 } 5850 5851 /* lookup and update size of the previous extent */ 5852 error = xfs_bmbt_lookup_eq(cur, left, &i); 5853 if (error) 5854 return error; 5855 if (XFS_IS_CORRUPT(mp, i != 1)) { 5856 xfs_btree_mark_sick(cur); 5857 return -EFSCORRUPTED; 5858 } 5859 5860 error = xfs_bmbt_update(cur, &new); 5861 if (error) 5862 return error; 5863 5864 /* change to extent format if required after extent removal */ 5865 error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork); 5866 if (error) 5867 return error; 5868 5869 done: 5870 xfs_iext_remove(ip, icur, 0); 5871 xfs_iext_prev(ifp, icur); 5872 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5873 &new); 5874 5875 /* update reverse mapping. rmap functions merge the rmaps for us */ 5876 xfs_rmap_unmap_extent(tp, ip, whichfork, got); 5877 memcpy(&new, got, sizeof(new)); 5878 new.br_startoff = left->br_startoff + left->br_blockcount; 5879 xfs_rmap_map_extent(tp, ip, whichfork, &new); 5880 return 0; 5881 } 5882 5883 static int 5884 xfs_bmap_shift_update_extent( 5885 struct xfs_trans *tp, 5886 struct xfs_inode *ip, 5887 int whichfork, 5888 struct xfs_iext_cursor *icur, 5889 struct xfs_bmbt_irec *got, 5890 struct xfs_btree_cur *cur, 5891 int *logflags, 5892 xfs_fileoff_t startoff) 5893 { 5894 struct xfs_mount *mp = ip->i_mount; 5895 struct xfs_bmbt_irec prev = *got; 5896 int error, i; 5897 5898 *logflags |= XFS_ILOG_CORE; 5899 5900 got->br_startoff = startoff; 5901 5902 if (cur) { 5903 error = xfs_bmbt_lookup_eq(cur, &prev, &i); 5904 if (error) 5905 return error; 5906 if (XFS_IS_CORRUPT(mp, i != 1)) { 5907 xfs_btree_mark_sick(cur); 5908 return -EFSCORRUPTED; 5909 } 5910 5911 error = xfs_bmbt_update(cur, got); 5912 if (error) 5913 return error; 5914 } else { 5915 *logflags |= XFS_ILOG_DEXT; 5916 } 5917 5918 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5919 got); 5920 5921 /* update reverse mapping */ 5922 xfs_rmap_unmap_extent(tp, ip, whichfork, &prev); 5923 xfs_rmap_map_extent(tp, ip, whichfork, got); 5924 return 0; 5925 } 5926 5927 int 5928 xfs_bmap_collapse_extents( 5929 struct xfs_trans *tp, 5930 struct xfs_inode *ip, 5931 xfs_fileoff_t *next_fsb, 5932 xfs_fileoff_t offset_shift_fsb, 5933 bool *done) 5934 { 5935 int whichfork = XFS_DATA_FORK; 5936 struct xfs_mount *mp = ip->i_mount; 5937 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 5938 struct xfs_btree_cur *cur = NULL; 5939 struct xfs_bmbt_irec got, prev; 5940 struct xfs_iext_cursor icur; 5941 xfs_fileoff_t new_startoff; 5942 int error = 0; 5943 int logflags = 0; 5944 5945 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 5946 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 5947 xfs_bmap_mark_sick(ip, whichfork); 5948 return -EFSCORRUPTED; 5949 } 5950 5951 if (xfs_is_shutdown(mp)) 5952 return -EIO; 5953 5954 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 5955 5956 error = xfs_iread_extents(tp, ip, whichfork); 5957 if (error) 5958 return error; 5959 5960 if (ifp->if_format == XFS_DINODE_FMT_BTREE) 5961 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5962 5963 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 5964 *done = true; 5965 goto del_cursor; 5966 } 5967 if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { 5968 xfs_bmap_mark_sick(ip, whichfork); 5969 error = -EFSCORRUPTED; 5970 goto del_cursor; 5971 } 5972 5973 new_startoff = got.br_startoff - offset_shift_fsb; 5974 if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { 5975 if (new_startoff < prev.br_startoff + prev.br_blockcount) { 5976 error = -EINVAL; 5977 goto del_cursor; 5978 } 5979 5980 if (xfs_bmse_can_merge(ip, whichfork, &prev, &got, 5981 offset_shift_fsb)) { 5982 error = xfs_bmse_merge(tp, ip, whichfork, 5983 offset_shift_fsb, &icur, &got, &prev, 5984 cur, &logflags); 5985 if (error) 5986 goto del_cursor; 5987 goto done; 5988 } 5989 } else { 5990 if (got.br_startoff < offset_shift_fsb) { 5991 error = -EINVAL; 5992 goto del_cursor; 5993 } 5994 } 5995 5996 error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got, 5997 cur, &logflags, new_startoff); 5998 if (error) 5999 goto del_cursor; 6000 6001 done: 6002 if (!xfs_iext_next_extent(ifp, &icur, &got)) { 6003 *done = true; 6004 goto del_cursor; 6005 } 6006 6007 *next_fsb = got.br_startoff; 6008 del_cursor: 6009 if (cur) 6010 xfs_btree_del_cursor(cur, error); 6011 if (logflags) 6012 xfs_trans_log_inode(tp, ip, logflags); 6013 return error; 6014 } 6015 6016 /* Make sure we won't be right-shifting an extent past the maximum bound. */ 6017 int 6018 xfs_bmap_can_insert_extents( 6019 struct xfs_inode *ip, 6020 xfs_fileoff_t off, 6021 xfs_fileoff_t shift) 6022 { 6023 struct xfs_bmbt_irec got; 6024 int is_empty; 6025 int error = 0; 6026 6027 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); 6028 6029 if (xfs_is_shutdown(ip->i_mount)) 6030 return -EIO; 6031 6032 xfs_ilock(ip, XFS_ILOCK_EXCL); 6033 error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty); 6034 if (!error && !is_empty && got.br_startoff >= off && 6035 ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff) 6036 error = -EINVAL; 6037 xfs_iunlock(ip, XFS_ILOCK_EXCL); 6038 6039 return error; 6040 } 6041 6042 int 6043 xfs_bmap_insert_extents( 6044 struct xfs_trans *tp, 6045 struct xfs_inode *ip, 6046 xfs_fileoff_t *next_fsb, 6047 xfs_fileoff_t offset_shift_fsb, 6048 bool *done, 6049 xfs_fileoff_t stop_fsb) 6050 { 6051 int whichfork = XFS_DATA_FORK; 6052 struct xfs_mount *mp = ip->i_mount; 6053 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 6054 struct xfs_btree_cur *cur = NULL; 6055 struct xfs_bmbt_irec got, next; 6056 struct xfs_iext_cursor icur; 6057 xfs_fileoff_t new_startoff; 6058 int error = 0; 6059 int logflags = 0; 6060 6061 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 6062 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 6063 xfs_bmap_mark_sick(ip, whichfork); 6064 return -EFSCORRUPTED; 6065 } 6066 6067 if (xfs_is_shutdown(mp)) 6068 return -EIO; 6069 6070 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 6071 6072 error = xfs_iread_extents(tp, ip, whichfork); 6073 if (error) 6074 return error; 6075 6076 if (ifp->if_format == XFS_DINODE_FMT_BTREE) 6077 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 6078 6079 if (*next_fsb == NULLFSBLOCK) { 6080 xfs_iext_last(ifp, &icur); 6081 if (!xfs_iext_get_extent(ifp, &icur, &got) || 6082 stop_fsb > got.br_startoff) { 6083 *done = true; 6084 goto del_cursor; 6085 } 6086 } else { 6087 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 6088 *done = true; 6089 goto del_cursor; 6090 } 6091 } 6092 if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { 6093 xfs_bmap_mark_sick(ip, whichfork); 6094 error = -EFSCORRUPTED; 6095 goto del_cursor; 6096 } 6097 6098 if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) { 6099 xfs_bmap_mark_sick(ip, whichfork); 6100 error = -EFSCORRUPTED; 6101 goto del_cursor; 6102 } 6103 6104 new_startoff = got.br_startoff + offset_shift_fsb; 6105 if (xfs_iext_peek_next_extent(ifp, &icur, &next)) { 6106 if (new_startoff + got.br_blockcount > next.br_startoff) { 6107 error = -EINVAL; 6108 goto del_cursor; 6109 } 6110 6111 /* 6112 * Unlike a left shift (which involves a hole punch), a right 6113 * shift does not modify extent neighbors in any way. We should 6114 * never find mergeable extents in this scenario. Check anyways 6115 * and warn if we encounter two extents that could be one. 6116 */ 6117 if (xfs_bmse_can_merge(ip, whichfork, &got, &next, 6118 offset_shift_fsb)) 6119 WARN_ON_ONCE(1); 6120 } 6121 6122 error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got, 6123 cur, &logflags, new_startoff); 6124 if (error) 6125 goto del_cursor; 6126 6127 if (!xfs_iext_prev_extent(ifp, &icur, &got) || 6128 stop_fsb >= got.br_startoff + got.br_blockcount) { 6129 *done = true; 6130 goto del_cursor; 6131 } 6132 6133 *next_fsb = got.br_startoff; 6134 del_cursor: 6135 if (cur) 6136 xfs_btree_del_cursor(cur, error); 6137 if (logflags) 6138 xfs_trans_log_inode(tp, ip, logflags); 6139 return error; 6140 } 6141 6142 /* 6143 * Splits an extent into two extents at split_fsb block such that it is the 6144 * first block of the current_ext. @ext is a target extent to be split. 6145 * @split_fsb is a block where the extents is split. If split_fsb lies in a 6146 * hole or the first block of extents, just return 0. 6147 */ 6148 int 6149 xfs_bmap_split_extent( 6150 struct xfs_trans *tp, 6151 struct xfs_inode *ip, 6152 xfs_fileoff_t split_fsb) 6153 { 6154 int whichfork = XFS_DATA_FORK; 6155 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 6156 struct xfs_btree_cur *cur = NULL; 6157 struct xfs_bmbt_irec got; 6158 struct xfs_bmbt_irec new; /* split extent */ 6159 struct xfs_mount *mp = ip->i_mount; 6160 xfs_fsblock_t gotblkcnt; /* new block count for got */ 6161 struct xfs_iext_cursor icur; 6162 int error = 0; 6163 int logflags = 0; 6164 int i = 0; 6165 6166 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 6167 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 6168 xfs_bmap_mark_sick(ip, whichfork); 6169 return -EFSCORRUPTED; 6170 } 6171 6172 if (xfs_is_shutdown(mp)) 6173 return -EIO; 6174 6175 /* Read in all the extents */ 6176 error = xfs_iread_extents(tp, ip, whichfork); 6177 if (error) 6178 return error; 6179 6180 /* 6181 * If there are not extents, or split_fsb lies in a hole we are done. 6182 */ 6183 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) || 6184 got.br_startoff >= split_fsb) 6185 return 0; 6186 6187 gotblkcnt = split_fsb - got.br_startoff; 6188 new.br_startoff = split_fsb; 6189 new.br_startblock = got.br_startblock + gotblkcnt; 6190 new.br_blockcount = got.br_blockcount - gotblkcnt; 6191 new.br_state = got.br_state; 6192 6193 if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 6194 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 6195 error = xfs_bmbt_lookup_eq(cur, &got, &i); 6196 if (error) 6197 goto del_cursor; 6198 if (XFS_IS_CORRUPT(mp, i != 1)) { 6199 xfs_btree_mark_sick(cur); 6200 error = -EFSCORRUPTED; 6201 goto del_cursor; 6202 } 6203 } 6204 6205 got.br_blockcount = gotblkcnt; 6206 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur, 6207 &got); 6208 6209 logflags = XFS_ILOG_CORE; 6210 if (cur) { 6211 error = xfs_bmbt_update(cur, &got); 6212 if (error) 6213 goto del_cursor; 6214 } else 6215 logflags |= XFS_ILOG_DEXT; 6216 6217 /* Add new extent */ 6218 xfs_iext_next(ifp, &icur); 6219 xfs_iext_insert(ip, &icur, &new, 0); 6220 ifp->if_nextents++; 6221 6222 if (cur) { 6223 error = xfs_bmbt_lookup_eq(cur, &new, &i); 6224 if (error) 6225 goto del_cursor; 6226 if (XFS_IS_CORRUPT(mp, i != 0)) { 6227 xfs_btree_mark_sick(cur); 6228 error = -EFSCORRUPTED; 6229 goto del_cursor; 6230 } 6231 error = xfs_btree_insert(cur, &i); 6232 if (error) 6233 goto del_cursor; 6234 if (XFS_IS_CORRUPT(mp, i != 1)) { 6235 xfs_btree_mark_sick(cur); 6236 error = -EFSCORRUPTED; 6237 goto del_cursor; 6238 } 6239 } 6240 6241 /* 6242 * Convert to a btree if necessary. 6243 */ 6244 if (xfs_bmap_needs_btree(ip, whichfork)) { 6245 int tmp_logflags; /* partial log flag return val */ 6246 6247 ASSERT(cur == NULL); 6248 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, 6249 &tmp_logflags, whichfork); 6250 logflags |= tmp_logflags; 6251 } 6252 6253 del_cursor: 6254 if (cur) { 6255 cur->bc_bmap.allocated = 0; 6256 xfs_btree_del_cursor(cur, error); 6257 } 6258 6259 if (logflags) 6260 xfs_trans_log_inode(tp, ip, logflags); 6261 return error; 6262 } 6263 6264 /* Record a bmap intent. */ 6265 static inline void 6266 __xfs_bmap_add( 6267 struct xfs_trans *tp, 6268 enum xfs_bmap_intent_type type, 6269 struct xfs_inode *ip, 6270 int whichfork, 6271 struct xfs_bmbt_irec *bmap) 6272 { 6273 struct xfs_bmap_intent *bi; 6274 6275 if ((whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) || 6276 bmap->br_startblock == HOLESTARTBLOCK || 6277 bmap->br_startblock == DELAYSTARTBLOCK) 6278 return; 6279 6280 bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL); 6281 INIT_LIST_HEAD(&bi->bi_list); 6282 bi->bi_type = type; 6283 bi->bi_owner = ip; 6284 bi->bi_whichfork = whichfork; 6285 bi->bi_bmap = *bmap; 6286 6287 xfs_bmap_defer_add(tp, bi); 6288 } 6289 6290 /* Map an extent into a file. */ 6291 void 6292 xfs_bmap_map_extent( 6293 struct xfs_trans *tp, 6294 struct xfs_inode *ip, 6295 int whichfork, 6296 struct xfs_bmbt_irec *PREV) 6297 { 6298 __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, whichfork, PREV); 6299 } 6300 6301 /* Unmap an extent out of a file. */ 6302 void 6303 xfs_bmap_unmap_extent( 6304 struct xfs_trans *tp, 6305 struct xfs_inode *ip, 6306 int whichfork, 6307 struct xfs_bmbt_irec *PREV) 6308 { 6309 __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, whichfork, PREV); 6310 } 6311 6312 /* 6313 * Process one of the deferred bmap operations. We pass back the 6314 * btree cursor to maintain our lock on the bmapbt between calls. 6315 */ 6316 int 6317 xfs_bmap_finish_one( 6318 struct xfs_trans *tp, 6319 struct xfs_bmap_intent *bi) 6320 { 6321 struct xfs_bmbt_irec *bmap = &bi->bi_bmap; 6322 int error = 0; 6323 int flags = 0; 6324 6325 if (bi->bi_whichfork == XFS_ATTR_FORK) 6326 flags |= XFS_BMAPI_ATTRFORK; 6327 6328 ASSERT(tp->t_highest_agno == NULLAGNUMBER); 6329 6330 trace_xfs_bmap_deferred(bi); 6331 6332 if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE)) 6333 return -EIO; 6334 6335 switch (bi->bi_type) { 6336 case XFS_BMAP_MAP: 6337 if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN) 6338 flags |= XFS_BMAPI_PREALLOC; 6339 error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff, 6340 bmap->br_blockcount, bmap->br_startblock, 6341 flags); 6342 bmap->br_blockcount = 0; 6343 break; 6344 case XFS_BMAP_UNMAP: 6345 error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff, 6346 &bmap->br_blockcount, flags | XFS_BMAPI_REMAP, 6347 1); 6348 break; 6349 default: 6350 ASSERT(0); 6351 xfs_bmap_mark_sick(bi->bi_owner, bi->bi_whichfork); 6352 error = -EFSCORRUPTED; 6353 } 6354 6355 return error; 6356 } 6357 6358 /* Check that an extent does not have invalid flags or bad ranges. */ 6359 xfs_failaddr_t 6360 xfs_bmap_validate_extent_raw( 6361 struct xfs_mount *mp, 6362 bool rtfile, 6363 int whichfork, 6364 struct xfs_bmbt_irec *irec) 6365 { 6366 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 6367 return __this_address; 6368 6369 if (rtfile && whichfork == XFS_DATA_FORK) { 6370 if (!xfs_verify_rtbext(mp, irec->br_startblock, 6371 irec->br_blockcount)) 6372 return __this_address; 6373 } else { 6374 if (!xfs_verify_fsbext(mp, irec->br_startblock, 6375 irec->br_blockcount)) 6376 return __this_address; 6377 } 6378 if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK) 6379 return __this_address; 6380 return NULL; 6381 } 6382 6383 int __init 6384 xfs_bmap_intent_init_cache(void) 6385 { 6386 xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent", 6387 sizeof(struct xfs_bmap_intent), 6388 0, 0, NULL); 6389 6390 return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM; 6391 } 6392 6393 void 6394 xfs_bmap_intent_destroy_cache(void) 6395 { 6396 kmem_cache_destroy(xfs_bmap_intent_cache); 6397 xfs_bmap_intent_cache = NULL; 6398 } 6399 6400 /* Check that an inode's extent does not have invalid flags or bad ranges. */ 6401 xfs_failaddr_t 6402 xfs_bmap_validate_extent( 6403 struct xfs_inode *ip, 6404 int whichfork, 6405 struct xfs_bmbt_irec *irec) 6406 { 6407 return xfs_bmap_validate_extent_raw(ip->i_mount, 6408 XFS_IS_REALTIME_INODE(ip), whichfork, irec); 6409 } 6410 6411 /* 6412 * Used in xfs_itruncate_extents(). This is the maximum number of extents 6413 * freed from a file in a single transaction. 6414 */ 6415 #define XFS_ITRUNC_MAX_EXTENTS 2 6416 6417 /* 6418 * Unmap every extent in part of an inode's fork. We don't do any higher level 6419 * invalidation work at all. 6420 */ 6421 int 6422 xfs_bunmapi_range( 6423 struct xfs_trans **tpp, 6424 struct xfs_inode *ip, 6425 uint32_t flags, 6426 xfs_fileoff_t startoff, 6427 xfs_fileoff_t endoff) 6428 { 6429 xfs_filblks_t unmap_len = endoff - startoff + 1; 6430 int error = 0; 6431 6432 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 6433 6434 while (unmap_len > 0) { 6435 ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER); 6436 error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags, 6437 XFS_ITRUNC_MAX_EXTENTS); 6438 if (error) 6439 goto out; 6440 6441 /* free the just unmapped extents */ 6442 error = xfs_defer_finish(tpp); 6443 if (error) 6444 goto out; 6445 cond_resched(); 6446 } 6447 out: 6448 return error; 6449 } 6450 6451 struct xfs_bmap_query_range { 6452 xfs_bmap_query_range_fn fn; 6453 void *priv; 6454 }; 6455 6456 /* Format btree record and pass to our callback. */ 6457 STATIC int 6458 xfs_bmap_query_range_helper( 6459 struct xfs_btree_cur *cur, 6460 const union xfs_btree_rec *rec, 6461 void *priv) 6462 { 6463 struct xfs_bmap_query_range *query = priv; 6464 struct xfs_bmbt_irec irec; 6465 xfs_failaddr_t fa; 6466 6467 xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 6468 fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork, 6469 &irec); 6470 if (fa) { 6471 xfs_btree_mark_sick(cur); 6472 return xfs_bmap_complain_bad_rec(cur->bc_ino.ip, 6473 cur->bc_ino.whichfork, fa, &irec); 6474 } 6475 6476 return query->fn(cur, &irec, query->priv); 6477 } 6478 6479 /* Find all bmaps. */ 6480 int 6481 xfs_bmap_query_all( 6482 struct xfs_btree_cur *cur, 6483 xfs_bmap_query_range_fn fn, 6484 void *priv) 6485 { 6486 struct xfs_bmap_query_range query = { 6487 .priv = priv, 6488 .fn = fn, 6489 }; 6490 6491 return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query); 6492 } 6493 6494 /* Helper function to extract extent size hint from inode */ 6495 xfs_extlen_t 6496 xfs_get_extsz_hint( 6497 struct xfs_inode *ip) 6498 { 6499 /* 6500 * No point in aligning allocations if we need to COW to actually 6501 * write to them. 6502 */ 6503 if (!xfs_is_always_cow_inode(ip) && 6504 (ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) 6505 return ip->i_extsize; 6506 if (XFS_IS_REALTIME_INODE(ip) && 6507 ip->i_mount->m_sb.sb_rextsize > 1) 6508 return ip->i_mount->m_sb.sb_rextsize; 6509 return 0; 6510 } 6511 6512 /* 6513 * Helper function to extract CoW extent size hint from inode. 6514 * Between the extent size hint and the CoW extent size hint, we 6515 * return the greater of the two. If the value is zero (automatic), 6516 * use the default size. 6517 */ 6518 xfs_extlen_t 6519 xfs_get_cowextsz_hint( 6520 struct xfs_inode *ip) 6521 { 6522 xfs_extlen_t a, b; 6523 6524 a = 0; 6525 if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 6526 a = ip->i_cowextsize; 6527 if (XFS_IS_REALTIME_INODE(ip)) { 6528 b = 0; 6529 if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) 6530 b = ip->i_extsize; 6531 } else { 6532 b = xfs_get_extsz_hint(ip); 6533 } 6534 6535 a = max(a, b); 6536 if (a == 0) 6537 return XFS_DEFAULT_COWEXTSZ_HINT; 6538 return a; 6539 } 6540