1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_dir2.h" 17 #include "xfs_inode.h" 18 #include "xfs_btree.h" 19 #include "xfs_trans.h" 20 #include "xfs_alloc.h" 21 #include "xfs_bmap.h" 22 #include "xfs_bmap_util.h" 23 #include "xfs_bmap_btree.h" 24 #include "xfs_rtbitmap.h" 25 #include "xfs_errortag.h" 26 #include "xfs_error.h" 27 #include "xfs_quota.h" 28 #include "xfs_trans_space.h" 29 #include "xfs_buf_item.h" 30 #include "xfs_trace.h" 31 #include "xfs_attr_leaf.h" 32 #include "xfs_filestream.h" 33 #include "xfs_rmap.h" 34 #include "xfs_ag.h" 35 #include "xfs_ag_resv.h" 36 #include "xfs_refcount.h" 37 #include "xfs_icache.h" 38 #include "xfs_iomap.h" 39 #include "xfs_health.h" 40 #include "xfs_bmap_item.h" 41 #include "xfs_symlink_remote.h" 42 #include "xfs_inode_util.h" 43 #include "xfs_rtgroup.h" 44 45 struct kmem_cache *xfs_bmap_intent_cache; 46 47 /* 48 * Miscellaneous helper functions 49 */ 50 51 /* 52 * Compute and fill in the value of the maximum depth of a bmap btree 53 * in this filesystem. Done once, during mount. 54 */ 55 void 56 xfs_bmap_compute_maxlevels( 57 xfs_mount_t *mp, /* file system mount structure */ 58 int whichfork) /* data or attr fork */ 59 { 60 uint64_t maxblocks; /* max blocks at this level */ 61 xfs_extnum_t maxleafents; /* max leaf entries possible */ 62 int level; /* btree level */ 63 int maxrootrecs; /* max records in root block */ 64 int minleafrecs; /* min records in leaf block */ 65 int minnoderecs; /* min records in node block */ 66 int sz; /* root block size */ 67 68 /* 69 * The maximum number of extents in a fork, hence the maximum number of 70 * leaf entries, is controlled by the size of the on-disk extent count. 71 * 72 * Note that we can no longer assume that if we are in ATTR1 that the 73 * fork offset of all the inodes will be 74 * (xfs_default_attroffset(ip) >> 3) because we could have mounted with 75 * ATTR2 and then mounted back with ATTR1, keeping the i_forkoff's fixed 76 * but probably at various positions. Therefore, for both ATTR1 and 77 * ATTR2 we have to assume the worst case scenario of a minimum size 78 * available. 79 */ 80 maxleafents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp), 81 whichfork); 82 if (whichfork == XFS_DATA_FORK) 83 sz = xfs_bmdr_space_calc(MINDBTPTRS); 84 else 85 sz = xfs_bmdr_space_calc(MINABTPTRS); 86 87 maxrootrecs = xfs_bmdr_maxrecs(sz, 0); 88 minleafrecs = mp->m_bmap_dmnr[0]; 89 minnoderecs = mp->m_bmap_dmnr[1]; 90 maxblocks = howmany_64(maxleafents, minleafrecs); 91 for (level = 1; maxblocks > 1; level++) { 92 if (maxblocks <= maxrootrecs) 93 maxblocks = 1; 94 else 95 maxblocks = howmany_64(maxblocks, minnoderecs); 96 } 97 mp->m_bm_maxlevels[whichfork] = level; 98 ASSERT(mp->m_bm_maxlevels[whichfork] <= xfs_bmbt_maxlevels_ondisk()); 99 } 100 101 unsigned int 102 xfs_bmap_compute_attr_offset( 103 struct xfs_mount *mp) 104 { 105 if (mp->m_sb.sb_inodesize == 256) 106 return XFS_LITINO(mp) - xfs_bmdr_space_calc(MINABTPTRS); 107 return xfs_bmdr_space_calc(6 * MINABTPTRS); 108 } 109 110 STATIC int /* error */ 111 xfs_bmbt_lookup_eq( 112 struct xfs_btree_cur *cur, 113 struct xfs_bmbt_irec *irec, 114 int *stat) /* success/failure */ 115 { 116 cur->bc_rec.b = *irec; 117 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 118 } 119 120 STATIC int /* error */ 121 xfs_bmbt_lookup_first( 122 struct xfs_btree_cur *cur, 123 int *stat) /* success/failure */ 124 { 125 cur->bc_rec.b.br_startoff = 0; 126 cur->bc_rec.b.br_startblock = 0; 127 cur->bc_rec.b.br_blockcount = 0; 128 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 129 } 130 131 /* 132 * Check if the inode needs to be converted to btree format. 133 */ 134 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) 135 { 136 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 137 138 return whichfork != XFS_COW_FORK && 139 ifp->if_format == XFS_DINODE_FMT_EXTENTS && 140 ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork); 141 } 142 143 /* 144 * Check if the inode should be converted to extent format. 145 */ 146 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) 147 { 148 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 149 150 return whichfork != XFS_COW_FORK && 151 ifp->if_format == XFS_DINODE_FMT_BTREE && 152 ifp->if_nextents <= XFS_IFORK_MAXEXT(ip, whichfork); 153 } 154 155 /* 156 * Update the record referred to by cur to the value given by irec 157 * This either works (return 0) or gets an EFSCORRUPTED error. 158 */ 159 STATIC int 160 xfs_bmbt_update( 161 struct xfs_btree_cur *cur, 162 struct xfs_bmbt_irec *irec) 163 { 164 union xfs_btree_rec rec; 165 166 xfs_bmbt_disk_set_all(&rec.bmbt, irec); 167 return xfs_btree_update(cur, &rec); 168 } 169 170 /* 171 * Compute the worst-case number of indirect blocks that will be used 172 * for ip's delayed extent of length "len". 173 */ 174 STATIC xfs_filblks_t 175 xfs_bmap_worst_indlen( 176 xfs_inode_t *ip, /* incore inode pointer */ 177 xfs_filblks_t len) /* delayed extent length */ 178 { 179 int level; /* btree level number */ 180 int maxrecs; /* maximum record count at this level */ 181 xfs_mount_t *mp; /* mount structure */ 182 xfs_filblks_t rval; /* return value */ 183 184 mp = ip->i_mount; 185 maxrecs = mp->m_bmap_dmxr[0]; 186 for (level = 0, rval = 0; 187 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); 188 level++) { 189 len += maxrecs - 1; 190 do_div(len, maxrecs); 191 rval += len; 192 if (len == 1) 193 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 194 level - 1; 195 if (level == 0) 196 maxrecs = mp->m_bmap_dmxr[1]; 197 } 198 return rval; 199 } 200 201 /* 202 * Calculate the default attribute fork offset for newly created inodes. 203 */ 204 uint 205 xfs_default_attroffset( 206 struct xfs_inode *ip) 207 { 208 if (ip->i_df.if_format == XFS_DINODE_FMT_DEV) 209 return roundup(sizeof(xfs_dev_t), 8); 210 return M_IGEO(ip->i_mount)->attr_fork_offset; 211 } 212 213 /* 214 * Helper routine to reset inode i_forkoff field when switching attribute fork 215 * from local to extent format - we reset it where possible to make space 216 * available for inline data fork extents. 217 */ 218 STATIC void 219 xfs_bmap_forkoff_reset( 220 xfs_inode_t *ip, 221 int whichfork) 222 { 223 if (whichfork == XFS_ATTR_FORK && 224 ip->i_df.if_format != XFS_DINODE_FMT_DEV && 225 ip->i_df.if_format != XFS_DINODE_FMT_BTREE) { 226 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 227 228 if (dfl_forkoff > ip->i_forkoff) 229 ip->i_forkoff = dfl_forkoff; 230 } 231 } 232 233 static int 234 xfs_bmap_read_buf( 235 struct xfs_mount *mp, /* file system mount point */ 236 struct xfs_trans *tp, /* transaction pointer */ 237 xfs_fsblock_t fsbno, /* file system block number */ 238 struct xfs_buf **bpp) /* buffer for fsbno */ 239 { 240 struct xfs_buf *bp; /* return value */ 241 int error; 242 243 if (!xfs_verify_fsbno(mp, fsbno)) 244 return -EFSCORRUPTED; 245 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 246 XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp, 247 &xfs_bmbt_buf_ops); 248 if (!error) { 249 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 250 *bpp = bp; 251 } 252 return error; 253 } 254 255 #ifdef DEBUG 256 STATIC struct xfs_buf * 257 xfs_bmap_get_bp( 258 struct xfs_btree_cur *cur, 259 xfs_fsblock_t bno) 260 { 261 struct xfs_log_item *lip; 262 int i; 263 264 if (!cur) 265 return NULL; 266 267 for (i = 0; i < cur->bc_maxlevels; i++) { 268 if (!cur->bc_levels[i].bp) 269 break; 270 if (xfs_buf_daddr(cur->bc_levels[i].bp) == bno) 271 return cur->bc_levels[i].bp; 272 } 273 274 /* Chase down all the log items to see if the bp is there */ 275 list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) { 276 struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip; 277 278 if (bip->bli_item.li_type == XFS_LI_BUF && 279 xfs_buf_daddr(bip->bli_buf) == bno) 280 return bip->bli_buf; 281 } 282 283 return NULL; 284 } 285 286 STATIC void 287 xfs_check_block( 288 struct xfs_btree_block *block, 289 xfs_mount_t *mp, 290 int root, 291 short sz) 292 { 293 int i, j, dmxr; 294 __be64 *pp, *thispa; /* pointer to block address */ 295 xfs_bmbt_key_t *prevp, *keyp; 296 297 ASSERT(be16_to_cpu(block->bb_level) > 0); 298 299 prevp = NULL; 300 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { 301 dmxr = mp->m_bmap_dmxr[0]; 302 keyp = xfs_bmbt_key_addr(mp, block, i); 303 304 if (prevp) { 305 ASSERT(be64_to_cpu(prevp->br_startoff) < 306 be64_to_cpu(keyp->br_startoff)); 307 } 308 prevp = keyp; 309 310 /* 311 * Compare the block numbers to see if there are dups. 312 */ 313 if (root) 314 pp = xfs_bmap_broot_ptr_addr(mp, block, i, sz); 315 else 316 pp = xfs_bmbt_ptr_addr(mp, block, i, dmxr); 317 318 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 319 if (root) 320 thispa = xfs_bmap_broot_ptr_addr(mp, block, j, sz); 321 else 322 thispa = xfs_bmbt_ptr_addr(mp, block, j, dmxr); 323 if (*thispa == *pp) { 324 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %lld", 325 __func__, j, i, 326 (unsigned long long)be64_to_cpu(*thispa)); 327 xfs_err(mp, "%s: ptrs are equal in node\n", 328 __func__); 329 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 330 } 331 } 332 } 333 } 334 335 /* 336 * Check that the extents for the inode ip are in the right order in all 337 * btree leaves. THis becomes prohibitively expensive for large extent count 338 * files, so don't bother with inodes that have more than 10,000 extents in 339 * them. The btree record ordering checks will still be done, so for such large 340 * bmapbt constructs that is going to catch most corruptions. 341 */ 342 STATIC void 343 xfs_bmap_check_leaf_extents( 344 struct xfs_btree_cur *cur, /* btree cursor or null */ 345 xfs_inode_t *ip, /* incore inode pointer */ 346 int whichfork) /* data or attr fork */ 347 { 348 struct xfs_mount *mp = ip->i_mount; 349 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 350 struct xfs_btree_block *block; /* current btree block */ 351 xfs_fsblock_t bno; /* block # of "block" */ 352 struct xfs_buf *bp; /* buffer for "block" */ 353 int error; /* error return value */ 354 xfs_extnum_t i=0, j; /* index into the extents list */ 355 int level; /* btree level, for checking */ 356 __be64 *pp; /* pointer to block address */ 357 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 358 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ 359 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 360 int bp_release = 0; 361 362 if (ifp->if_format != XFS_DINODE_FMT_BTREE) 363 return; 364 365 /* skip large extent count inodes */ 366 if (ip->i_df.if_nextents > 10000) 367 return; 368 369 bno = NULLFSBLOCK; 370 block = ifp->if_broot; 371 /* 372 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 373 */ 374 level = be16_to_cpu(block->bb_level); 375 ASSERT(level > 0); 376 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 377 pp = xfs_bmap_broot_ptr_addr(mp, block, 1, ifp->if_broot_bytes); 378 bno = be64_to_cpu(*pp); 379 380 ASSERT(bno != NULLFSBLOCK); 381 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 382 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 383 384 /* 385 * Go down the tree until leaf level is reached, following the first 386 * pointer (leftmost) at each level. 387 */ 388 while (level-- > 0) { 389 /* See if buf is in cur first */ 390 bp_release = 0; 391 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 392 if (!bp) { 393 bp_release = 1; 394 error = xfs_bmap_read_buf(mp, NULL, bno, &bp); 395 if (xfs_metadata_is_sick(error)) 396 xfs_btree_mark_sick(cur); 397 if (error) 398 goto error_norelse; 399 } 400 block = XFS_BUF_TO_BLOCK(bp); 401 if (level == 0) 402 break; 403 404 /* 405 * Check this block for basic sanity (increasing keys and 406 * no duplicate blocks). 407 */ 408 409 xfs_check_block(block, mp, 0, 0); 410 pp = xfs_bmbt_ptr_addr(mp, block, 1, mp->m_bmap_dmxr[1]); 411 bno = be64_to_cpu(*pp); 412 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) { 413 xfs_btree_mark_sick(cur); 414 error = -EFSCORRUPTED; 415 goto error0; 416 } 417 if (bp_release) { 418 bp_release = 0; 419 xfs_trans_brelse(NULL, bp); 420 } 421 } 422 423 /* 424 * Here with bp and block set to the leftmost leaf node in the tree. 425 */ 426 i = 0; 427 428 /* 429 * Loop over all leaf nodes checking that all extents are in the right order. 430 */ 431 for (;;) { 432 xfs_fsblock_t nextbno; 433 xfs_extnum_t num_recs; 434 435 436 num_recs = xfs_btree_get_numrecs(block); 437 438 /* 439 * Read-ahead the next leaf block, if any. 440 */ 441 442 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 443 444 /* 445 * Check all the extents to make sure they are OK. 446 * If we had a previous block, the last entry should 447 * conform with the first entry in this one. 448 */ 449 450 ep = xfs_bmbt_rec_addr(mp, block, 1); 451 if (i) { 452 ASSERT(xfs_bmbt_disk_get_startoff(&last) + 453 xfs_bmbt_disk_get_blockcount(&last) <= 454 xfs_bmbt_disk_get_startoff(ep)); 455 } 456 for (j = 1; j < num_recs; j++) { 457 nextp = xfs_bmbt_rec_addr(mp, block, j + 1); 458 ASSERT(xfs_bmbt_disk_get_startoff(ep) + 459 xfs_bmbt_disk_get_blockcount(ep) <= 460 xfs_bmbt_disk_get_startoff(nextp)); 461 ep = nextp; 462 } 463 464 last = *ep; 465 i += num_recs; 466 if (bp_release) { 467 bp_release = 0; 468 xfs_trans_brelse(NULL, bp); 469 } 470 bno = nextbno; 471 /* 472 * If we've reached the end, stop. 473 */ 474 if (bno == NULLFSBLOCK) 475 break; 476 477 bp_release = 0; 478 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 479 if (!bp) { 480 bp_release = 1; 481 error = xfs_bmap_read_buf(mp, NULL, bno, &bp); 482 if (xfs_metadata_is_sick(error)) 483 xfs_btree_mark_sick(cur); 484 if (error) 485 goto error_norelse; 486 } 487 block = XFS_BUF_TO_BLOCK(bp); 488 } 489 490 return; 491 492 error0: 493 xfs_warn(mp, "%s: at error0", __func__); 494 if (bp_release) 495 xfs_trans_brelse(NULL, bp); 496 error_norelse: 497 xfs_warn(mp, "%s: BAD after btree leaves for %llu extents", 498 __func__, i); 499 xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); 500 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 501 return; 502 } 503 504 /* 505 * Validate that the bmbt_irecs being returned from bmapi are valid 506 * given the caller's original parameters. Specifically check the 507 * ranges of the returned irecs to ensure that they only extend beyond 508 * the given parameters if the XFS_BMAPI_ENTIRE flag was set. 509 */ 510 STATIC void 511 xfs_bmap_validate_ret( 512 xfs_fileoff_t bno, 513 xfs_filblks_t len, 514 uint32_t flags, 515 xfs_bmbt_irec_t *mval, 516 int nmap, 517 int ret_nmap) 518 { 519 int i; /* index to map values */ 520 521 ASSERT(ret_nmap <= nmap); 522 523 for (i = 0; i < ret_nmap; i++) { 524 ASSERT(mval[i].br_blockcount > 0); 525 if (!(flags & XFS_BMAPI_ENTIRE)) { 526 ASSERT(mval[i].br_startoff >= bno); 527 ASSERT(mval[i].br_blockcount <= len); 528 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= 529 bno + len); 530 } else { 531 ASSERT(mval[i].br_startoff < bno + len); 532 ASSERT(mval[i].br_startoff + mval[i].br_blockcount > 533 bno); 534 } 535 ASSERT(i == 0 || 536 mval[i - 1].br_startoff + mval[i - 1].br_blockcount == 537 mval[i].br_startoff); 538 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && 539 mval[i].br_startblock != HOLESTARTBLOCK); 540 ASSERT(mval[i].br_state == XFS_EXT_NORM || 541 mval[i].br_state == XFS_EXT_UNWRITTEN); 542 } 543 } 544 545 #else 546 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) 547 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0) 548 #endif /* DEBUG */ 549 550 /* 551 * Inode fork format manipulation functions 552 */ 553 554 /* 555 * Convert the inode format to extent format if it currently is in btree format, 556 * but the extent list is small enough that it fits into the extent format. 557 * 558 * Since the extents are already in-core, all we have to do is give up the space 559 * for the btree root and pitch the leaf block. 560 */ 561 STATIC int /* error */ 562 xfs_bmap_btree_to_extents( 563 struct xfs_trans *tp, /* transaction pointer */ 564 struct xfs_inode *ip, /* incore inode pointer */ 565 struct xfs_btree_cur *cur, /* btree cursor */ 566 int *logflagsp, /* inode logging flags */ 567 int whichfork) /* data or attr fork */ 568 { 569 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 570 struct xfs_mount *mp = ip->i_mount; 571 struct xfs_btree_block *rblock = ifp->if_broot; 572 struct xfs_btree_block *cblock;/* child btree block */ 573 xfs_fsblock_t cbno; /* child block number */ 574 struct xfs_buf *cbp; /* child block's buffer */ 575 int error; /* error return value */ 576 __be64 *pp; /* ptr to block address */ 577 struct xfs_owner_info oinfo; 578 579 /* check if we actually need the extent format first: */ 580 if (!xfs_bmap_wants_extents(ip, whichfork)) 581 return 0; 582 583 ASSERT(cur); 584 ASSERT(whichfork != XFS_COW_FORK); 585 ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); 586 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 587 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 588 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, false) == 1); 589 590 pp = xfs_bmap_broot_ptr_addr(mp, rblock, 1, ifp->if_broot_bytes); 591 cbno = be64_to_cpu(*pp); 592 #ifdef DEBUG 593 if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) { 594 xfs_btree_mark_sick(cur); 595 return -EFSCORRUPTED; 596 } 597 #endif 598 error = xfs_bmap_read_buf(mp, tp, cbno, &cbp); 599 if (xfs_metadata_is_sick(error)) 600 xfs_btree_mark_sick(cur); 601 if (error) 602 return error; 603 cblock = XFS_BUF_TO_BLOCK(cbp); 604 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) 605 return error; 606 607 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 608 error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, 609 XFS_AG_RESV_NONE, 0); 610 if (error) 611 return error; 612 613 ip->i_nblocks--; 614 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 615 xfs_trans_binval(tp, cbp); 616 if (cur->bc_levels[0].bp == cbp) 617 cur->bc_levels[0].bp = NULL; 618 xfs_iroot_realloc(ip, -1, whichfork); 619 ASSERT(ifp->if_broot == NULL); 620 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 621 *logflagsp |= XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 622 return 0; 623 } 624 625 /* 626 * Convert an extents-format file into a btree-format file. 627 * The new file will have a root block (in the inode) and a single child block. 628 */ 629 STATIC int /* error */ 630 xfs_bmap_extents_to_btree( 631 struct xfs_trans *tp, /* transaction pointer */ 632 struct xfs_inode *ip, /* incore inode pointer */ 633 struct xfs_btree_cur **curp, /* cursor returned to caller */ 634 int wasdel, /* converting a delayed alloc */ 635 int *logflagsp, /* inode logging flags */ 636 int whichfork) /* data or attr fork */ 637 { 638 struct xfs_btree_block *ablock; /* allocated (child) bt block */ 639 struct xfs_buf *abp; /* buffer for ablock */ 640 struct xfs_alloc_arg args; /* allocation arguments */ 641 struct xfs_bmbt_rec *arp; /* child record pointer */ 642 struct xfs_btree_block *block; /* btree root block */ 643 struct xfs_btree_cur *cur; /* bmap btree cursor */ 644 int error; /* error return value */ 645 struct xfs_ifork *ifp; /* inode fork pointer */ 646 struct xfs_bmbt_key *kp; /* root block key pointer */ 647 struct xfs_mount *mp; /* mount structure */ 648 xfs_bmbt_ptr_t *pp; /* root block address pointer */ 649 struct xfs_iext_cursor icur; 650 struct xfs_bmbt_irec rec; 651 xfs_extnum_t cnt = 0; 652 653 mp = ip->i_mount; 654 ASSERT(whichfork != XFS_COW_FORK); 655 ifp = xfs_ifork_ptr(ip, whichfork); 656 ASSERT(ifp->if_format == XFS_DINODE_FMT_EXTENTS); 657 658 /* 659 * Make space in the inode incore. This needs to be undone if we fail 660 * to expand the root. 661 */ 662 xfs_iroot_realloc(ip, 1, whichfork); 663 664 /* 665 * Fill in the root. 666 */ 667 block = ifp->if_broot; 668 xfs_bmbt_init_block(ip, block, NULL, 1, 1); 669 /* 670 * Need a cursor. Can't allocate until bb_level is filled in. 671 */ 672 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 673 if (wasdel) 674 cur->bc_flags |= XFS_BTREE_BMBT_WASDEL; 675 /* 676 * Convert to a btree with two levels, one record in root. 677 */ 678 ifp->if_format = XFS_DINODE_FMT_BTREE; 679 memset(&args, 0, sizeof(args)); 680 args.tp = tp; 681 args.mp = mp; 682 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); 683 684 args.minlen = args.maxlen = args.prod = 1; 685 args.wasdel = wasdel; 686 *logflagsp = 0; 687 error = xfs_alloc_vextent_start_ag(&args, 688 XFS_INO_TO_FSB(mp, ip->i_ino)); 689 if (error) 690 goto out_root_realloc; 691 692 /* 693 * Allocation can't fail, the space was reserved. 694 */ 695 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 696 error = -ENOSPC; 697 goto out_root_realloc; 698 } 699 700 cur->bc_bmap.allocated++; 701 ip->i_nblocks++; 702 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 703 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, 704 XFS_FSB_TO_DADDR(mp, args.fsbno), 705 mp->m_bsize, 0, &abp); 706 if (error) 707 goto out_unreserve_dquot; 708 709 /* 710 * Fill in the child block. 711 */ 712 ablock = XFS_BUF_TO_BLOCK(abp); 713 xfs_bmbt_init_block(ip, ablock, abp, 0, 0); 714 715 for_each_xfs_iext(ifp, &icur, &rec) { 716 if (isnullstartblock(rec.br_startblock)) 717 continue; 718 arp = xfs_bmbt_rec_addr(mp, ablock, 1 + cnt); 719 xfs_bmbt_disk_set_all(arp, &rec); 720 cnt++; 721 } 722 ASSERT(cnt == ifp->if_nextents); 723 xfs_btree_set_numrecs(ablock, cnt); 724 725 /* 726 * Fill in the root key and pointer. 727 */ 728 kp = xfs_bmbt_key_addr(mp, block, 1); 729 arp = xfs_bmbt_rec_addr(mp, ablock, 1); 730 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); 731 pp = xfs_bmbt_ptr_addr(mp, block, 1, xfs_bmbt_get_maxrecs(cur, 732 be16_to_cpu(block->bb_level))); 733 *pp = cpu_to_be64(args.fsbno); 734 735 /* 736 * Do all this logging at the end so that 737 * the root is at the right level. 738 */ 739 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); 740 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); 741 ASSERT(*curp == NULL); 742 *curp = cur; 743 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); 744 return 0; 745 746 out_unreserve_dquot: 747 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 748 out_root_realloc: 749 xfs_iroot_realloc(ip, -1, whichfork); 750 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 751 ASSERT(ifp->if_broot == NULL); 752 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 753 754 return error; 755 } 756 757 /* 758 * Convert a local file to an extents file. 759 * This code is out of bounds for data forks of regular files, 760 * since the file data needs to get logged so things will stay consistent. 761 * (The bmap-level manipulations are ok, though). 762 */ 763 void 764 xfs_bmap_local_to_extents_empty( 765 struct xfs_trans *tp, 766 struct xfs_inode *ip, 767 int whichfork) 768 { 769 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 770 771 ASSERT(whichfork != XFS_COW_FORK); 772 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 773 ASSERT(ifp->if_bytes == 0); 774 ASSERT(ifp->if_nextents == 0); 775 776 xfs_bmap_forkoff_reset(ip, whichfork); 777 ifp->if_data = NULL; 778 ifp->if_height = 0; 779 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 780 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 781 } 782 783 784 int /* error */ 785 xfs_bmap_local_to_extents( 786 xfs_trans_t *tp, /* transaction pointer */ 787 xfs_inode_t *ip, /* incore inode pointer */ 788 xfs_extlen_t total, /* total blocks needed by transaction */ 789 int *logflagsp, /* inode logging flags */ 790 int whichfork, 791 void (*init_fn)(struct xfs_trans *tp, 792 struct xfs_buf *bp, 793 struct xfs_inode *ip, 794 struct xfs_ifork *ifp, void *priv), 795 void *priv) 796 { 797 int error = 0; 798 int flags; /* logging flags returned */ 799 struct xfs_ifork *ifp; /* inode fork pointer */ 800 xfs_alloc_arg_t args; /* allocation arguments */ 801 struct xfs_buf *bp; /* buffer for extent block */ 802 struct xfs_bmbt_irec rec; 803 struct xfs_iext_cursor icur; 804 805 /* 806 * We don't want to deal with the case of keeping inode data inline yet. 807 * So sending the data fork of a regular inode is invalid. 808 */ 809 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); 810 ifp = xfs_ifork_ptr(ip, whichfork); 811 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 812 813 if (!ifp->if_bytes) { 814 xfs_bmap_local_to_extents_empty(tp, ip, whichfork); 815 flags = XFS_ILOG_CORE; 816 goto done; 817 } 818 819 flags = 0; 820 error = 0; 821 memset(&args, 0, sizeof(args)); 822 args.tp = tp; 823 args.mp = ip->i_mount; 824 args.total = total; 825 args.minlen = args.maxlen = args.prod = 1; 826 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); 827 828 /* 829 * Allocate a block. We know we need only one, since the 830 * file currently fits in an inode. 831 */ 832 args.total = total; 833 args.minlen = args.maxlen = args.prod = 1; 834 error = xfs_alloc_vextent_start_ag(&args, 835 XFS_INO_TO_FSB(args.mp, ip->i_ino)); 836 if (error) 837 goto done; 838 839 /* Can't fail, the space was reserved. */ 840 ASSERT(args.fsbno != NULLFSBLOCK); 841 ASSERT(args.len == 1); 842 error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, 843 XFS_FSB_TO_DADDR(args.mp, args.fsbno), 844 args.mp->m_bsize, 0, &bp); 845 if (error) 846 goto done; 847 848 /* 849 * Initialize the block, copy the data and log the remote buffer. 850 * 851 * The callout is responsible for logging because the remote format 852 * might differ from the local format and thus we don't know how much to 853 * log here. Note that init_fn must also set the buffer log item type 854 * correctly. 855 */ 856 init_fn(tp, bp, ip, ifp, priv); 857 858 /* account for the change in fork size */ 859 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 860 xfs_bmap_local_to_extents_empty(tp, ip, whichfork); 861 flags |= XFS_ILOG_CORE; 862 863 ifp->if_data = NULL; 864 ifp->if_height = 0; 865 866 rec.br_startoff = 0; 867 rec.br_startblock = args.fsbno; 868 rec.br_blockcount = 1; 869 rec.br_state = XFS_EXT_NORM; 870 xfs_iext_first(ifp, &icur); 871 xfs_iext_insert(ip, &icur, &rec, 0); 872 873 ifp->if_nextents = 1; 874 ip->i_nblocks = 1; 875 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 876 flags |= xfs_ilog_fext(whichfork); 877 878 done: 879 *logflagsp = flags; 880 return error; 881 } 882 883 /* 884 * Called from xfs_bmap_add_attrfork to handle btree format files. 885 */ 886 STATIC int /* error */ 887 xfs_bmap_add_attrfork_btree( 888 xfs_trans_t *tp, /* transaction pointer */ 889 xfs_inode_t *ip, /* incore inode pointer */ 890 int *flags) /* inode logging flags */ 891 { 892 struct xfs_btree_block *block = ip->i_df.if_broot; 893 struct xfs_btree_cur *cur; /* btree cursor */ 894 int error; /* error return value */ 895 xfs_mount_t *mp; /* file system mount struct */ 896 int stat; /* newroot status */ 897 898 mp = ip->i_mount; 899 900 if (xfs_bmap_bmdr_space(block) <= xfs_inode_data_fork_size(ip)) 901 *flags |= XFS_ILOG_DBROOT; 902 else { 903 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 904 error = xfs_bmbt_lookup_first(cur, &stat); 905 if (error) 906 goto error0; 907 /* must be at least one entry */ 908 if (XFS_IS_CORRUPT(mp, stat != 1)) { 909 xfs_btree_mark_sick(cur); 910 error = -EFSCORRUPTED; 911 goto error0; 912 } 913 if ((error = xfs_btree_new_iroot(cur, flags, &stat))) 914 goto error0; 915 if (stat == 0) { 916 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 917 return -ENOSPC; 918 } 919 cur->bc_bmap.allocated = 0; 920 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 921 } 922 return 0; 923 error0: 924 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 925 return error; 926 } 927 928 /* 929 * Called from xfs_bmap_add_attrfork to handle extents format files. 930 */ 931 STATIC int /* error */ 932 xfs_bmap_add_attrfork_extents( 933 struct xfs_trans *tp, /* transaction pointer */ 934 struct xfs_inode *ip, /* incore inode pointer */ 935 int *flags) /* inode logging flags */ 936 { 937 struct xfs_btree_cur *cur; /* bmap btree cursor */ 938 int error; /* error return value */ 939 940 if (ip->i_df.if_nextents * sizeof(struct xfs_bmbt_rec) <= 941 xfs_inode_data_fork_size(ip)) 942 return 0; 943 cur = NULL; 944 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags, 945 XFS_DATA_FORK); 946 if (cur) { 947 cur->bc_bmap.allocated = 0; 948 xfs_btree_del_cursor(cur, error); 949 } 950 return error; 951 } 952 953 /* 954 * Called from xfs_bmap_add_attrfork to handle local format files. Each 955 * different data fork content type needs a different callout to do the 956 * conversion. Some are basic and only require special block initialisation 957 * callouts for the data formating, others (directories) are so specialised they 958 * handle everything themselves. 959 * 960 * XXX (dgc): investigate whether directory conversion can use the generic 961 * formatting callout. It should be possible - it's just a very complex 962 * formatter. 963 */ 964 STATIC int /* error */ 965 xfs_bmap_add_attrfork_local( 966 struct xfs_trans *tp, /* transaction pointer */ 967 struct xfs_inode *ip, /* incore inode pointer */ 968 int *flags) /* inode logging flags */ 969 { 970 struct xfs_da_args dargs; /* args for dir/attr code */ 971 972 if (ip->i_df.if_bytes <= xfs_inode_data_fork_size(ip)) 973 return 0; 974 975 if (S_ISDIR(VFS_I(ip)->i_mode)) { 976 memset(&dargs, 0, sizeof(dargs)); 977 dargs.geo = ip->i_mount->m_dir_geo; 978 dargs.dp = ip; 979 dargs.total = dargs.geo->fsbcount; 980 dargs.whichfork = XFS_DATA_FORK; 981 dargs.trans = tp; 982 dargs.owner = ip->i_ino; 983 return xfs_dir2_sf_to_block(&dargs); 984 } 985 986 if (S_ISLNK(VFS_I(ip)->i_mode)) 987 return xfs_bmap_local_to_extents(tp, ip, 1, flags, 988 XFS_DATA_FORK, xfs_symlink_local_to_remote, 989 NULL); 990 991 /* should only be called for types that support local format data */ 992 ASSERT(0); 993 xfs_bmap_mark_sick(ip, XFS_ATTR_FORK); 994 return -EFSCORRUPTED; 995 } 996 997 /* 998 * Set an inode attr fork offset based on the format of the data fork. 999 */ 1000 static int 1001 xfs_bmap_set_attrforkoff( 1002 struct xfs_inode *ip, 1003 int size, 1004 int *version) 1005 { 1006 int default_size = xfs_default_attroffset(ip) >> 3; 1007 1008 switch (ip->i_df.if_format) { 1009 case XFS_DINODE_FMT_DEV: 1010 ip->i_forkoff = default_size; 1011 break; 1012 case XFS_DINODE_FMT_LOCAL: 1013 case XFS_DINODE_FMT_EXTENTS: 1014 case XFS_DINODE_FMT_BTREE: 1015 ip->i_forkoff = xfs_attr_shortform_bytesfit(ip, size); 1016 if (!ip->i_forkoff) 1017 ip->i_forkoff = default_size; 1018 else if (xfs_has_attr2(ip->i_mount) && version) 1019 *version = 2; 1020 break; 1021 default: 1022 ASSERT(0); 1023 return -EINVAL; 1024 } 1025 1026 return 0; 1027 } 1028 1029 /* 1030 * Convert inode from non-attributed to attributed. Caller must hold the 1031 * ILOCK_EXCL and the file cannot have an attr fork. 1032 */ 1033 int /* error code */ 1034 xfs_bmap_add_attrfork( 1035 struct xfs_trans *tp, 1036 struct xfs_inode *ip, /* incore inode pointer */ 1037 int size, /* space new attribute needs */ 1038 int rsvd) /* xact may use reserved blks */ 1039 { 1040 struct xfs_mount *mp = tp->t_mountp; 1041 int version = 1; /* superblock attr version */ 1042 int logflags; /* logging flags */ 1043 int error; /* error return value */ 1044 1045 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 1046 if (xfs_is_metadir_inode(ip)) 1047 ASSERT(XFS_IS_DQDETACHED(ip)); 1048 else 1049 ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 1050 ASSERT(!xfs_inode_has_attr_fork(ip)); 1051 1052 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1053 error = xfs_bmap_set_attrforkoff(ip, size, &version); 1054 if (error) 1055 return error; 1056 1057 xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); 1058 logflags = 0; 1059 switch (ip->i_df.if_format) { 1060 case XFS_DINODE_FMT_LOCAL: 1061 error = xfs_bmap_add_attrfork_local(tp, ip, &logflags); 1062 break; 1063 case XFS_DINODE_FMT_EXTENTS: 1064 error = xfs_bmap_add_attrfork_extents(tp, ip, &logflags); 1065 break; 1066 case XFS_DINODE_FMT_BTREE: 1067 error = xfs_bmap_add_attrfork_btree(tp, ip, &logflags); 1068 break; 1069 default: 1070 error = 0; 1071 break; 1072 } 1073 if (logflags) 1074 xfs_trans_log_inode(tp, ip, logflags); 1075 if (error) 1076 return error; 1077 if (!xfs_has_attr(mp) || 1078 (!xfs_has_attr2(mp) && version == 2)) { 1079 bool log_sb = false; 1080 1081 spin_lock(&mp->m_sb_lock); 1082 if (!xfs_has_attr(mp)) { 1083 xfs_add_attr(mp); 1084 log_sb = true; 1085 } 1086 if (!xfs_has_attr2(mp) && version == 2) { 1087 xfs_add_attr2(mp); 1088 log_sb = true; 1089 } 1090 spin_unlock(&mp->m_sb_lock); 1091 if (log_sb) 1092 xfs_log_sb(tp); 1093 } 1094 1095 return 0; 1096 } 1097 1098 /* 1099 * Internal and external extent tree search functions. 1100 */ 1101 1102 struct xfs_iread_state { 1103 struct xfs_iext_cursor icur; 1104 xfs_extnum_t loaded; 1105 }; 1106 1107 int 1108 xfs_bmap_complain_bad_rec( 1109 struct xfs_inode *ip, 1110 int whichfork, 1111 xfs_failaddr_t fa, 1112 const struct xfs_bmbt_irec *irec) 1113 { 1114 struct xfs_mount *mp = ip->i_mount; 1115 const char *forkname; 1116 1117 switch (whichfork) { 1118 case XFS_DATA_FORK: forkname = "data"; break; 1119 case XFS_ATTR_FORK: forkname = "attr"; break; 1120 case XFS_COW_FORK: forkname = "CoW"; break; 1121 default: forkname = "???"; break; 1122 } 1123 1124 xfs_warn(mp, 1125 "Bmap BTree record corruption in inode 0x%llx %s fork detected at %pS!", 1126 ip->i_ino, forkname, fa); 1127 xfs_warn(mp, 1128 "Offset 0x%llx, start block 0x%llx, block count 0x%llx state 0x%x", 1129 irec->br_startoff, irec->br_startblock, irec->br_blockcount, 1130 irec->br_state); 1131 1132 return -EFSCORRUPTED; 1133 } 1134 1135 /* Stuff every bmbt record from this block into the incore extent map. */ 1136 static int 1137 xfs_iread_bmbt_block( 1138 struct xfs_btree_cur *cur, 1139 int level, 1140 void *priv) 1141 { 1142 struct xfs_iread_state *ir = priv; 1143 struct xfs_mount *mp = cur->bc_mp; 1144 struct xfs_inode *ip = cur->bc_ino.ip; 1145 struct xfs_btree_block *block; 1146 struct xfs_buf *bp; 1147 struct xfs_bmbt_rec *frp; 1148 xfs_extnum_t num_recs; 1149 xfs_extnum_t j; 1150 int whichfork = cur->bc_ino.whichfork; 1151 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1152 1153 block = xfs_btree_get_block(cur, level, &bp); 1154 1155 /* Abort if we find more records than nextents. */ 1156 num_recs = xfs_btree_get_numrecs(block); 1157 if (unlikely(ir->loaded + num_recs > ifp->if_nextents)) { 1158 xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).", 1159 (unsigned long long)ip->i_ino); 1160 xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block, 1161 sizeof(*block), __this_address); 1162 xfs_bmap_mark_sick(ip, whichfork); 1163 return -EFSCORRUPTED; 1164 } 1165 1166 /* Copy records into the incore cache. */ 1167 frp = xfs_bmbt_rec_addr(mp, block, 1); 1168 for (j = 0; j < num_recs; j++, frp++, ir->loaded++) { 1169 struct xfs_bmbt_irec new; 1170 xfs_failaddr_t fa; 1171 1172 xfs_bmbt_disk_get_all(frp, &new); 1173 fa = xfs_bmap_validate_extent(ip, whichfork, &new); 1174 if (fa) { 1175 xfs_inode_verifier_error(ip, -EFSCORRUPTED, 1176 "xfs_iread_extents(2)", frp, 1177 sizeof(*frp), fa); 1178 xfs_bmap_mark_sick(ip, whichfork); 1179 return xfs_bmap_complain_bad_rec(ip, whichfork, fa, 1180 &new); 1181 } 1182 xfs_iext_insert(ip, &ir->icur, &new, 1183 xfs_bmap_fork_to_state(whichfork)); 1184 trace_xfs_read_extent(ip, &ir->icur, 1185 xfs_bmap_fork_to_state(whichfork), _THIS_IP_); 1186 xfs_iext_next(ifp, &ir->icur); 1187 } 1188 1189 return 0; 1190 } 1191 1192 /* 1193 * Read in extents from a btree-format inode. 1194 */ 1195 int 1196 xfs_iread_extents( 1197 struct xfs_trans *tp, 1198 struct xfs_inode *ip, 1199 int whichfork) 1200 { 1201 struct xfs_iread_state ir; 1202 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1203 struct xfs_mount *mp = ip->i_mount; 1204 struct xfs_btree_cur *cur; 1205 int error; 1206 1207 if (!xfs_need_iread_extents(ifp)) 1208 return 0; 1209 1210 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 1211 1212 ir.loaded = 0; 1213 xfs_iext_first(ifp, &ir.icur); 1214 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 1215 error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block, 1216 XFS_BTREE_VISIT_RECORDS, &ir); 1217 xfs_btree_del_cursor(cur, error); 1218 if (error) 1219 goto out; 1220 1221 if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) { 1222 xfs_bmap_mark_sick(ip, whichfork); 1223 error = -EFSCORRUPTED; 1224 goto out; 1225 } 1226 ASSERT(ir.loaded == xfs_iext_count(ifp)); 1227 /* 1228 * Use release semantics so that we can use acquire semantics in 1229 * xfs_need_iread_extents and be guaranteed to see a valid mapping tree 1230 * after that load. 1231 */ 1232 smp_store_release(&ifp->if_needextents, 0); 1233 return 0; 1234 out: 1235 if (xfs_metadata_is_sick(error)) 1236 xfs_bmap_mark_sick(ip, whichfork); 1237 xfs_iext_destroy(ifp); 1238 return error; 1239 } 1240 1241 /* 1242 * Returns the relative block number of the first unused block(s) in the given 1243 * fork with at least "len" logically contiguous blocks free. This is the 1244 * lowest-address hole if the fork has holes, else the first block past the end 1245 * of fork. Return 0 if the fork is currently local (in-inode). 1246 */ 1247 int /* error */ 1248 xfs_bmap_first_unused( 1249 struct xfs_trans *tp, /* transaction pointer */ 1250 struct xfs_inode *ip, /* incore inode */ 1251 xfs_extlen_t len, /* size of hole to find */ 1252 xfs_fileoff_t *first_unused, /* unused block */ 1253 int whichfork) /* data or attr fork */ 1254 { 1255 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1256 struct xfs_bmbt_irec got; 1257 struct xfs_iext_cursor icur; 1258 xfs_fileoff_t lastaddr = 0; 1259 xfs_fileoff_t lowest, max; 1260 int error; 1261 1262 if (ifp->if_format == XFS_DINODE_FMT_LOCAL) { 1263 *first_unused = 0; 1264 return 0; 1265 } 1266 1267 ASSERT(xfs_ifork_has_extents(ifp)); 1268 1269 error = xfs_iread_extents(tp, ip, whichfork); 1270 if (error) 1271 return error; 1272 1273 lowest = max = *first_unused; 1274 for_each_xfs_iext(ifp, &icur, &got) { 1275 /* 1276 * See if the hole before this extent will work. 1277 */ 1278 if (got.br_startoff >= lowest + len && 1279 got.br_startoff - max >= len) 1280 break; 1281 lastaddr = got.br_startoff + got.br_blockcount; 1282 max = XFS_FILEOFF_MAX(lastaddr, lowest); 1283 } 1284 1285 *first_unused = max; 1286 return 0; 1287 } 1288 1289 /* 1290 * Returns the file-relative block number of the last block - 1 before 1291 * last_block (input value) in the file. 1292 * This is not based on i_size, it is based on the extent records. 1293 * Returns 0 for local files, as they do not have extent records. 1294 */ 1295 int /* error */ 1296 xfs_bmap_last_before( 1297 struct xfs_trans *tp, /* transaction pointer */ 1298 struct xfs_inode *ip, /* incore inode */ 1299 xfs_fileoff_t *last_block, /* last block */ 1300 int whichfork) /* data or attr fork */ 1301 { 1302 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1303 struct xfs_bmbt_irec got; 1304 struct xfs_iext_cursor icur; 1305 int error; 1306 1307 switch (ifp->if_format) { 1308 case XFS_DINODE_FMT_LOCAL: 1309 *last_block = 0; 1310 return 0; 1311 case XFS_DINODE_FMT_BTREE: 1312 case XFS_DINODE_FMT_EXTENTS: 1313 break; 1314 default: 1315 ASSERT(0); 1316 xfs_bmap_mark_sick(ip, whichfork); 1317 return -EFSCORRUPTED; 1318 } 1319 1320 error = xfs_iread_extents(tp, ip, whichfork); 1321 if (error) 1322 return error; 1323 1324 if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got)) 1325 *last_block = 0; 1326 return 0; 1327 } 1328 1329 int 1330 xfs_bmap_last_extent( 1331 struct xfs_trans *tp, 1332 struct xfs_inode *ip, 1333 int whichfork, 1334 struct xfs_bmbt_irec *rec, 1335 int *is_empty) 1336 { 1337 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1338 struct xfs_iext_cursor icur; 1339 int error; 1340 1341 error = xfs_iread_extents(tp, ip, whichfork); 1342 if (error) 1343 return error; 1344 1345 xfs_iext_last(ifp, &icur); 1346 if (!xfs_iext_get_extent(ifp, &icur, rec)) 1347 *is_empty = 1; 1348 else 1349 *is_empty = 0; 1350 return 0; 1351 } 1352 1353 /* 1354 * Check the last inode extent to determine whether this allocation will result 1355 * in blocks being allocated at the end of the file. When we allocate new data 1356 * blocks at the end of the file which do not start at the previous data block, 1357 * we will try to align the new blocks at stripe unit boundaries. 1358 * 1359 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be 1360 * at, or past the EOF. 1361 */ 1362 STATIC int 1363 xfs_bmap_isaeof( 1364 struct xfs_bmalloca *bma, 1365 int whichfork) 1366 { 1367 struct xfs_bmbt_irec rec; 1368 int is_empty; 1369 int error; 1370 1371 bma->aeof = false; 1372 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, 1373 &is_empty); 1374 if (error) 1375 return error; 1376 1377 if (is_empty) { 1378 bma->aeof = true; 1379 return 0; 1380 } 1381 1382 /* 1383 * Check if we are allocation or past the last extent, or at least into 1384 * the last delayed allocated extent. 1385 */ 1386 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || 1387 (bma->offset >= rec.br_startoff && 1388 isnullstartblock(rec.br_startblock)); 1389 return 0; 1390 } 1391 1392 /* 1393 * Returns the file-relative block number of the first block past eof in 1394 * the file. This is not based on i_size, it is based on the extent records. 1395 * Returns 0 for local files, as they do not have extent records. 1396 */ 1397 int 1398 xfs_bmap_last_offset( 1399 struct xfs_inode *ip, 1400 xfs_fileoff_t *last_block, 1401 int whichfork) 1402 { 1403 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 1404 struct xfs_bmbt_irec rec; 1405 int is_empty; 1406 int error; 1407 1408 *last_block = 0; 1409 1410 if (ifp->if_format == XFS_DINODE_FMT_LOCAL) 1411 return 0; 1412 1413 if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) { 1414 xfs_bmap_mark_sick(ip, whichfork); 1415 return -EFSCORRUPTED; 1416 } 1417 1418 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); 1419 if (error || is_empty) 1420 return error; 1421 1422 *last_block = rec.br_startoff + rec.br_blockcount; 1423 return 0; 1424 } 1425 1426 /* 1427 * Extent tree manipulation functions used during allocation. 1428 */ 1429 1430 static inline bool 1431 xfs_bmap_same_rtgroup( 1432 struct xfs_inode *ip, 1433 int whichfork, 1434 struct xfs_bmbt_irec *left, 1435 struct xfs_bmbt_irec *right) 1436 { 1437 struct xfs_mount *mp = ip->i_mount; 1438 1439 if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) { 1440 if (xfs_rtb_to_rgno(mp, left->br_startblock) != 1441 xfs_rtb_to_rgno(mp, right->br_startblock)) 1442 return false; 1443 } 1444 1445 return true; 1446 } 1447 1448 /* 1449 * Convert a delayed allocation to a real allocation. 1450 */ 1451 STATIC int /* error */ 1452 xfs_bmap_add_extent_delay_real( 1453 struct xfs_bmalloca *bma, 1454 int whichfork) 1455 { 1456 struct xfs_mount *mp = bma->ip->i_mount; 1457 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 1458 struct xfs_bmbt_irec *new = &bma->got; 1459 int error; /* error return value */ 1460 int i; /* temp state */ 1461 xfs_fileoff_t new_endoff; /* end offset of new entry */ 1462 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 1463 /* left is 0, right is 1, prev is 2 */ 1464 int rval=0; /* return value (logging flags) */ 1465 uint32_t state = xfs_bmap_fork_to_state(whichfork); 1466 xfs_filblks_t da_new; /* new count del alloc blocks used */ 1467 xfs_filblks_t da_old; /* old count del alloc blocks used */ 1468 xfs_filblks_t temp=0; /* value for da_new calculations */ 1469 int tmp_rval; /* partial logging flags */ 1470 struct xfs_bmbt_irec old; 1471 1472 ASSERT(whichfork != XFS_ATTR_FORK); 1473 ASSERT(!isnullstartblock(new->br_startblock)); 1474 ASSERT(!bma->cur || (bma->cur->bc_flags & XFS_BTREE_BMBT_WASDEL)); 1475 1476 XFS_STATS_INC(mp, xs_add_exlist); 1477 1478 #define LEFT r[0] 1479 #define RIGHT r[1] 1480 #define PREV r[2] 1481 1482 /* 1483 * Set up a bunch of variables to make the tests simpler. 1484 */ 1485 xfs_iext_get_extent(ifp, &bma->icur, &PREV); 1486 new_endoff = new->br_startoff + new->br_blockcount; 1487 ASSERT(isnullstartblock(PREV.br_startblock)); 1488 ASSERT(PREV.br_startoff <= new->br_startoff); 1489 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1490 1491 da_old = startblockval(PREV.br_startblock); 1492 da_new = 0; 1493 1494 /* 1495 * Set flags determining what part of the previous delayed allocation 1496 * extent is being replaced by a real allocation. 1497 */ 1498 if (PREV.br_startoff == new->br_startoff) 1499 state |= BMAP_LEFT_FILLING; 1500 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 1501 state |= BMAP_RIGHT_FILLING; 1502 1503 /* 1504 * Check and set flags if this segment has a left neighbor. 1505 * Don't set contiguous if the combined extent would be too large. 1506 */ 1507 if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) { 1508 state |= BMAP_LEFT_VALID; 1509 if (isnullstartblock(LEFT.br_startblock)) 1510 state |= BMAP_LEFT_DELAY; 1511 } 1512 1513 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 1514 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 1515 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 1516 LEFT.br_state == new->br_state && 1517 LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && 1518 xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new)) 1519 state |= BMAP_LEFT_CONTIG; 1520 1521 /* 1522 * Check and set flags if this segment has a right neighbor. 1523 * Don't set contiguous if the combined extent would be too large. 1524 * Also check for all-three-contiguous being too large. 1525 */ 1526 if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) { 1527 state |= BMAP_RIGHT_VALID; 1528 if (isnullstartblock(RIGHT.br_startblock)) 1529 state |= BMAP_RIGHT_DELAY; 1530 } 1531 1532 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 1533 new_endoff == RIGHT.br_startoff && 1534 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 1535 new->br_state == RIGHT.br_state && 1536 new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 1537 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1538 BMAP_RIGHT_FILLING)) != 1539 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1540 BMAP_RIGHT_FILLING) || 1541 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 1542 <= XFS_MAX_BMBT_EXTLEN) && 1543 xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT)) 1544 state |= BMAP_RIGHT_CONTIG; 1545 1546 error = 0; 1547 /* 1548 * Switch out based on the FILLING and CONTIG state bits. 1549 */ 1550 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1551 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 1552 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1553 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1554 /* 1555 * Filling in all of a previously delayed allocation extent. 1556 * The left and right neighbors are both contiguous with new. 1557 */ 1558 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 1559 1560 xfs_iext_remove(bma->ip, &bma->icur, state); 1561 xfs_iext_remove(bma->ip, &bma->icur, state); 1562 xfs_iext_prev(ifp, &bma->icur); 1563 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1564 ifp->if_nextents--; 1565 1566 if (bma->cur == NULL) 1567 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1568 else { 1569 rval = XFS_ILOG_CORE; 1570 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1571 if (error) 1572 goto done; 1573 if (XFS_IS_CORRUPT(mp, i != 1)) { 1574 xfs_btree_mark_sick(bma->cur); 1575 error = -EFSCORRUPTED; 1576 goto done; 1577 } 1578 error = xfs_btree_delete(bma->cur, &i); 1579 if (error) 1580 goto done; 1581 if (XFS_IS_CORRUPT(mp, i != 1)) { 1582 xfs_btree_mark_sick(bma->cur); 1583 error = -EFSCORRUPTED; 1584 goto done; 1585 } 1586 error = xfs_btree_decrement(bma->cur, 0, &i); 1587 if (error) 1588 goto done; 1589 if (XFS_IS_CORRUPT(mp, i != 1)) { 1590 xfs_btree_mark_sick(bma->cur); 1591 error = -EFSCORRUPTED; 1592 goto done; 1593 } 1594 error = xfs_bmbt_update(bma->cur, &LEFT); 1595 if (error) 1596 goto done; 1597 } 1598 ASSERT(da_new <= da_old); 1599 break; 1600 1601 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1602 /* 1603 * Filling in all of a previously delayed allocation extent. 1604 * The left neighbor is contiguous, the right is not. 1605 */ 1606 old = LEFT; 1607 LEFT.br_blockcount += PREV.br_blockcount; 1608 1609 xfs_iext_remove(bma->ip, &bma->icur, state); 1610 xfs_iext_prev(ifp, &bma->icur); 1611 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1612 1613 if (bma->cur == NULL) 1614 rval = XFS_ILOG_DEXT; 1615 else { 1616 rval = 0; 1617 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1618 if (error) 1619 goto done; 1620 if (XFS_IS_CORRUPT(mp, i != 1)) { 1621 xfs_btree_mark_sick(bma->cur); 1622 error = -EFSCORRUPTED; 1623 goto done; 1624 } 1625 error = xfs_bmbt_update(bma->cur, &LEFT); 1626 if (error) 1627 goto done; 1628 } 1629 ASSERT(da_new <= da_old); 1630 break; 1631 1632 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1633 /* 1634 * Filling in all of a previously delayed allocation extent. 1635 * The right neighbor is contiguous, the left is not. Take care 1636 * with delay -> unwritten extent allocation here because the 1637 * delalloc record we are overwriting is always written. 1638 */ 1639 PREV.br_startblock = new->br_startblock; 1640 PREV.br_blockcount += RIGHT.br_blockcount; 1641 PREV.br_state = new->br_state; 1642 1643 xfs_iext_next(ifp, &bma->icur); 1644 xfs_iext_remove(bma->ip, &bma->icur, state); 1645 xfs_iext_prev(ifp, &bma->icur); 1646 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1647 1648 if (bma->cur == NULL) 1649 rval = XFS_ILOG_DEXT; 1650 else { 1651 rval = 0; 1652 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1653 if (error) 1654 goto done; 1655 if (XFS_IS_CORRUPT(mp, i != 1)) { 1656 xfs_btree_mark_sick(bma->cur); 1657 error = -EFSCORRUPTED; 1658 goto done; 1659 } 1660 error = xfs_bmbt_update(bma->cur, &PREV); 1661 if (error) 1662 goto done; 1663 } 1664 ASSERT(da_new <= da_old); 1665 break; 1666 1667 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 1668 /* 1669 * Filling in all of a previously delayed allocation extent. 1670 * Neither the left nor right neighbors are contiguous with 1671 * the new one. 1672 */ 1673 PREV.br_startblock = new->br_startblock; 1674 PREV.br_state = new->br_state; 1675 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1676 ifp->if_nextents++; 1677 1678 if (bma->cur == NULL) 1679 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1680 else { 1681 rval = XFS_ILOG_CORE; 1682 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1683 if (error) 1684 goto done; 1685 if (XFS_IS_CORRUPT(mp, i != 0)) { 1686 xfs_btree_mark_sick(bma->cur); 1687 error = -EFSCORRUPTED; 1688 goto done; 1689 } 1690 error = xfs_btree_insert(bma->cur, &i); 1691 if (error) 1692 goto done; 1693 if (XFS_IS_CORRUPT(mp, i != 1)) { 1694 xfs_btree_mark_sick(bma->cur); 1695 error = -EFSCORRUPTED; 1696 goto done; 1697 } 1698 } 1699 ASSERT(da_new <= da_old); 1700 break; 1701 1702 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 1703 /* 1704 * Filling in the first part of a previous delayed allocation. 1705 * The left neighbor is contiguous. 1706 */ 1707 old = LEFT; 1708 temp = PREV.br_blockcount - new->br_blockcount; 1709 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1710 startblockval(PREV.br_startblock)); 1711 1712 LEFT.br_blockcount += new->br_blockcount; 1713 1714 PREV.br_blockcount = temp; 1715 PREV.br_startoff += new->br_blockcount; 1716 PREV.br_startblock = nullstartblock(da_new); 1717 1718 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1719 xfs_iext_prev(ifp, &bma->icur); 1720 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1721 1722 if (bma->cur == NULL) 1723 rval = XFS_ILOG_DEXT; 1724 else { 1725 rval = 0; 1726 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1727 if (error) 1728 goto done; 1729 if (XFS_IS_CORRUPT(mp, i != 1)) { 1730 xfs_btree_mark_sick(bma->cur); 1731 error = -EFSCORRUPTED; 1732 goto done; 1733 } 1734 error = xfs_bmbt_update(bma->cur, &LEFT); 1735 if (error) 1736 goto done; 1737 } 1738 ASSERT(da_new <= da_old); 1739 break; 1740 1741 case BMAP_LEFT_FILLING: 1742 /* 1743 * Filling in the first part of a previous delayed allocation. 1744 * The left neighbor is not contiguous. 1745 */ 1746 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1747 ifp->if_nextents++; 1748 1749 if (bma->cur == NULL) 1750 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1751 else { 1752 rval = XFS_ILOG_CORE; 1753 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1754 if (error) 1755 goto done; 1756 if (XFS_IS_CORRUPT(mp, i != 0)) { 1757 xfs_btree_mark_sick(bma->cur); 1758 error = -EFSCORRUPTED; 1759 goto done; 1760 } 1761 error = xfs_btree_insert(bma->cur, &i); 1762 if (error) 1763 goto done; 1764 if (XFS_IS_CORRUPT(mp, i != 1)) { 1765 xfs_btree_mark_sick(bma->cur); 1766 error = -EFSCORRUPTED; 1767 goto done; 1768 } 1769 } 1770 1771 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1772 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1773 &bma->cur, 1, &tmp_rval, whichfork); 1774 rval |= tmp_rval; 1775 if (error) 1776 goto done; 1777 } 1778 1779 temp = PREV.br_blockcount - new->br_blockcount; 1780 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1781 startblockval(PREV.br_startblock) - 1782 (bma->cur ? bma->cur->bc_bmap.allocated : 0)); 1783 1784 PREV.br_startoff = new_endoff; 1785 PREV.br_blockcount = temp; 1786 PREV.br_startblock = nullstartblock(da_new); 1787 xfs_iext_next(ifp, &bma->icur); 1788 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1789 xfs_iext_prev(ifp, &bma->icur); 1790 break; 1791 1792 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1793 /* 1794 * Filling in the last part of a previous delayed allocation. 1795 * The right neighbor is contiguous with the new allocation. 1796 */ 1797 old = RIGHT; 1798 RIGHT.br_startoff = new->br_startoff; 1799 RIGHT.br_startblock = new->br_startblock; 1800 RIGHT.br_blockcount += new->br_blockcount; 1801 1802 if (bma->cur == NULL) 1803 rval = XFS_ILOG_DEXT; 1804 else { 1805 rval = 0; 1806 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1807 if (error) 1808 goto done; 1809 if (XFS_IS_CORRUPT(mp, i != 1)) { 1810 xfs_btree_mark_sick(bma->cur); 1811 error = -EFSCORRUPTED; 1812 goto done; 1813 } 1814 error = xfs_bmbt_update(bma->cur, &RIGHT); 1815 if (error) 1816 goto done; 1817 } 1818 1819 temp = PREV.br_blockcount - new->br_blockcount; 1820 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1821 startblockval(PREV.br_startblock)); 1822 1823 PREV.br_blockcount = temp; 1824 PREV.br_startblock = nullstartblock(da_new); 1825 1826 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1827 xfs_iext_next(ifp, &bma->icur); 1828 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); 1829 ASSERT(da_new <= da_old); 1830 break; 1831 1832 case BMAP_RIGHT_FILLING: 1833 /* 1834 * Filling in the last part of a previous delayed allocation. 1835 * The right neighbor is not contiguous. 1836 */ 1837 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1838 ifp->if_nextents++; 1839 1840 if (bma->cur == NULL) 1841 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1842 else { 1843 rval = XFS_ILOG_CORE; 1844 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1845 if (error) 1846 goto done; 1847 if (XFS_IS_CORRUPT(mp, i != 0)) { 1848 xfs_btree_mark_sick(bma->cur); 1849 error = -EFSCORRUPTED; 1850 goto done; 1851 } 1852 error = xfs_btree_insert(bma->cur, &i); 1853 if (error) 1854 goto done; 1855 if (XFS_IS_CORRUPT(mp, i != 1)) { 1856 xfs_btree_mark_sick(bma->cur); 1857 error = -EFSCORRUPTED; 1858 goto done; 1859 } 1860 } 1861 1862 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1863 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1864 &bma->cur, 1, &tmp_rval, whichfork); 1865 rval |= tmp_rval; 1866 if (error) 1867 goto done; 1868 } 1869 1870 temp = PREV.br_blockcount - new->br_blockcount; 1871 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1872 startblockval(PREV.br_startblock) - 1873 (bma->cur ? bma->cur->bc_bmap.allocated : 0)); 1874 1875 PREV.br_startblock = nullstartblock(da_new); 1876 PREV.br_blockcount = temp; 1877 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1878 xfs_iext_next(ifp, &bma->icur); 1879 ASSERT(da_new <= da_old); 1880 break; 1881 1882 case 0: 1883 /* 1884 * Filling in the middle part of a previous delayed allocation. 1885 * Contiguity is impossible here. 1886 * This case is avoided almost all the time. 1887 * 1888 * We start with a delayed allocation: 1889 * 1890 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ 1891 * PREV @ idx 1892 * 1893 * and we are allocating: 1894 * +rrrrrrrrrrrrrrrrr+ 1895 * new 1896 * 1897 * and we set it up for insertion as: 1898 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ 1899 * new 1900 * PREV @ idx LEFT RIGHT 1901 * inserted at idx + 1 1902 */ 1903 old = PREV; 1904 1905 /* LEFT is the new middle */ 1906 LEFT = *new; 1907 1908 /* RIGHT is the new right */ 1909 RIGHT.br_state = PREV.br_state; 1910 RIGHT.br_startoff = new_endoff; 1911 RIGHT.br_blockcount = 1912 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1913 RIGHT.br_startblock = 1914 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1915 RIGHT.br_blockcount)); 1916 1917 /* truncate PREV */ 1918 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 1919 PREV.br_startblock = 1920 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1921 PREV.br_blockcount)); 1922 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1923 1924 xfs_iext_next(ifp, &bma->icur); 1925 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); 1926 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); 1927 ifp->if_nextents++; 1928 1929 if (bma->cur == NULL) 1930 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1931 else { 1932 rval = XFS_ILOG_CORE; 1933 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1934 if (error) 1935 goto done; 1936 if (XFS_IS_CORRUPT(mp, i != 0)) { 1937 xfs_btree_mark_sick(bma->cur); 1938 error = -EFSCORRUPTED; 1939 goto done; 1940 } 1941 error = xfs_btree_insert(bma->cur, &i); 1942 if (error) 1943 goto done; 1944 if (XFS_IS_CORRUPT(mp, i != 1)) { 1945 xfs_btree_mark_sick(bma->cur); 1946 error = -EFSCORRUPTED; 1947 goto done; 1948 } 1949 } 1950 1951 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1952 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1953 &bma->cur, 1, &tmp_rval, whichfork); 1954 rval |= tmp_rval; 1955 if (error) 1956 goto done; 1957 } 1958 1959 da_new = startblockval(PREV.br_startblock) + 1960 startblockval(RIGHT.br_startblock); 1961 break; 1962 1963 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1964 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1965 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 1966 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1967 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1968 case BMAP_LEFT_CONTIG: 1969 case BMAP_RIGHT_CONTIG: 1970 /* 1971 * These cases are all impossible. 1972 */ 1973 ASSERT(0); 1974 } 1975 1976 /* add reverse mapping unless caller opted out */ 1977 if (!(bma->flags & XFS_BMAPI_NORMAP)) 1978 xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new); 1979 1980 /* convert to a btree if necessary */ 1981 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1982 int tmp_logflags; /* partial log flag return val */ 1983 1984 ASSERT(bma->cur == NULL); 1985 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1986 &bma->cur, da_old > 0, &tmp_logflags, 1987 whichfork); 1988 bma->logflags |= tmp_logflags; 1989 if (error) 1990 goto done; 1991 } 1992 1993 if (da_new != da_old) 1994 xfs_mod_delalloc(bma->ip, 0, (int64_t)da_new - da_old); 1995 1996 if (bma->cur) { 1997 da_new += bma->cur->bc_bmap.allocated; 1998 bma->cur->bc_bmap.allocated = 0; 1999 } 2000 2001 /* adjust for changes in reserved delayed indirect blocks */ 2002 if (da_new < da_old) 2003 xfs_add_fdblocks(mp, da_old - da_new); 2004 else if (da_new > da_old) 2005 error = xfs_dec_fdblocks(mp, da_new - da_old, true); 2006 2007 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); 2008 done: 2009 if (whichfork != XFS_COW_FORK) 2010 bma->logflags |= rval; 2011 return error; 2012 #undef LEFT 2013 #undef RIGHT 2014 #undef PREV 2015 } 2016 2017 /* 2018 * Convert an unwritten allocation to a real allocation or vice versa. 2019 */ 2020 int /* error */ 2021 xfs_bmap_add_extent_unwritten_real( 2022 struct xfs_trans *tp, 2023 xfs_inode_t *ip, /* incore inode pointer */ 2024 int whichfork, 2025 struct xfs_iext_cursor *icur, 2026 struct xfs_btree_cur **curp, /* if *curp is null, not a btree */ 2027 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2028 int *logflagsp) /* inode logging flags */ 2029 { 2030 struct xfs_btree_cur *cur; /* btree cursor */ 2031 int error; /* error return value */ 2032 int i; /* temp state */ 2033 struct xfs_ifork *ifp; /* inode fork pointer */ 2034 xfs_fileoff_t new_endoff; /* end offset of new entry */ 2035 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 2036 /* left is 0, right is 1, prev is 2 */ 2037 int rval=0; /* return value (logging flags) */ 2038 uint32_t state = xfs_bmap_fork_to_state(whichfork); 2039 struct xfs_mount *mp = ip->i_mount; 2040 struct xfs_bmbt_irec old; 2041 2042 *logflagsp = 0; 2043 2044 cur = *curp; 2045 ifp = xfs_ifork_ptr(ip, whichfork); 2046 2047 ASSERT(!isnullstartblock(new->br_startblock)); 2048 2049 XFS_STATS_INC(mp, xs_add_exlist); 2050 2051 #define LEFT r[0] 2052 #define RIGHT r[1] 2053 #define PREV r[2] 2054 2055 /* 2056 * Set up a bunch of variables to make the tests simpler. 2057 */ 2058 error = 0; 2059 xfs_iext_get_extent(ifp, icur, &PREV); 2060 ASSERT(new->br_state != PREV.br_state); 2061 new_endoff = new->br_startoff + new->br_blockcount; 2062 ASSERT(PREV.br_startoff <= new->br_startoff); 2063 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 2064 2065 /* 2066 * Set flags determining what part of the previous oldext allocation 2067 * extent is being replaced by a newext allocation. 2068 */ 2069 if (PREV.br_startoff == new->br_startoff) 2070 state |= BMAP_LEFT_FILLING; 2071 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 2072 state |= BMAP_RIGHT_FILLING; 2073 2074 /* 2075 * Check and set flags if this segment has a left neighbor. 2076 * Don't set contiguous if the combined extent would be too large. 2077 */ 2078 if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) { 2079 state |= BMAP_LEFT_VALID; 2080 if (isnullstartblock(LEFT.br_startblock)) 2081 state |= BMAP_LEFT_DELAY; 2082 } 2083 2084 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2085 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 2086 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 2087 LEFT.br_state == new->br_state && 2088 LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2089 xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new)) 2090 state |= BMAP_LEFT_CONTIG; 2091 2092 /* 2093 * Check and set flags if this segment has a right neighbor. 2094 * Don't set contiguous if the combined extent would be too large. 2095 * Also check for all-three-contiguous being too large. 2096 */ 2097 if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) { 2098 state |= BMAP_RIGHT_VALID; 2099 if (isnullstartblock(RIGHT.br_startblock)) 2100 state |= BMAP_RIGHT_DELAY; 2101 } 2102 2103 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2104 new_endoff == RIGHT.br_startoff && 2105 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 2106 new->br_state == RIGHT.br_state && 2107 new->br_blockcount + RIGHT.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2108 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2109 BMAP_RIGHT_FILLING)) != 2110 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2111 BMAP_RIGHT_FILLING) || 2112 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 2113 <= XFS_MAX_BMBT_EXTLEN) && 2114 xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT)) 2115 state |= BMAP_RIGHT_CONTIG; 2116 2117 /* 2118 * Switch out based on the FILLING and CONTIG state bits. 2119 */ 2120 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2121 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 2122 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2123 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2124 /* 2125 * Setting all of a previous oldext extent to newext. 2126 * The left and right neighbors are both contiguous with new. 2127 */ 2128 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 2129 2130 xfs_iext_remove(ip, icur, state); 2131 xfs_iext_remove(ip, icur, state); 2132 xfs_iext_prev(ifp, icur); 2133 xfs_iext_update_extent(ip, state, icur, &LEFT); 2134 ifp->if_nextents -= 2; 2135 if (cur == NULL) 2136 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2137 else { 2138 rval = XFS_ILOG_CORE; 2139 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2140 if (error) 2141 goto done; 2142 if (XFS_IS_CORRUPT(mp, i != 1)) { 2143 xfs_btree_mark_sick(cur); 2144 error = -EFSCORRUPTED; 2145 goto done; 2146 } 2147 if ((error = xfs_btree_delete(cur, &i))) 2148 goto done; 2149 if (XFS_IS_CORRUPT(mp, i != 1)) { 2150 xfs_btree_mark_sick(cur); 2151 error = -EFSCORRUPTED; 2152 goto done; 2153 } 2154 if ((error = xfs_btree_decrement(cur, 0, &i))) 2155 goto done; 2156 if (XFS_IS_CORRUPT(mp, i != 1)) { 2157 xfs_btree_mark_sick(cur); 2158 error = -EFSCORRUPTED; 2159 goto done; 2160 } 2161 if ((error = xfs_btree_delete(cur, &i))) 2162 goto done; 2163 if (XFS_IS_CORRUPT(mp, i != 1)) { 2164 xfs_btree_mark_sick(cur); 2165 error = -EFSCORRUPTED; 2166 goto done; 2167 } 2168 if ((error = xfs_btree_decrement(cur, 0, &i))) 2169 goto done; 2170 if (XFS_IS_CORRUPT(mp, i != 1)) { 2171 xfs_btree_mark_sick(cur); 2172 error = -EFSCORRUPTED; 2173 goto done; 2174 } 2175 error = xfs_bmbt_update(cur, &LEFT); 2176 if (error) 2177 goto done; 2178 } 2179 break; 2180 2181 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2182 /* 2183 * Setting all of a previous oldext extent to newext. 2184 * The left neighbor is contiguous, the right is not. 2185 */ 2186 LEFT.br_blockcount += PREV.br_blockcount; 2187 2188 xfs_iext_remove(ip, icur, state); 2189 xfs_iext_prev(ifp, icur); 2190 xfs_iext_update_extent(ip, state, icur, &LEFT); 2191 ifp->if_nextents--; 2192 if (cur == NULL) 2193 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2194 else { 2195 rval = XFS_ILOG_CORE; 2196 error = xfs_bmbt_lookup_eq(cur, &PREV, &i); 2197 if (error) 2198 goto done; 2199 if (XFS_IS_CORRUPT(mp, i != 1)) { 2200 xfs_btree_mark_sick(cur); 2201 error = -EFSCORRUPTED; 2202 goto done; 2203 } 2204 if ((error = xfs_btree_delete(cur, &i))) 2205 goto done; 2206 if (XFS_IS_CORRUPT(mp, i != 1)) { 2207 xfs_btree_mark_sick(cur); 2208 error = -EFSCORRUPTED; 2209 goto done; 2210 } 2211 if ((error = xfs_btree_decrement(cur, 0, &i))) 2212 goto done; 2213 if (XFS_IS_CORRUPT(mp, i != 1)) { 2214 xfs_btree_mark_sick(cur); 2215 error = -EFSCORRUPTED; 2216 goto done; 2217 } 2218 error = xfs_bmbt_update(cur, &LEFT); 2219 if (error) 2220 goto done; 2221 } 2222 break; 2223 2224 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2225 /* 2226 * Setting all of a previous oldext extent to newext. 2227 * The right neighbor is contiguous, the left is not. 2228 */ 2229 PREV.br_blockcount += RIGHT.br_blockcount; 2230 PREV.br_state = new->br_state; 2231 2232 xfs_iext_next(ifp, icur); 2233 xfs_iext_remove(ip, icur, state); 2234 xfs_iext_prev(ifp, icur); 2235 xfs_iext_update_extent(ip, state, icur, &PREV); 2236 ifp->if_nextents--; 2237 2238 if (cur == NULL) 2239 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2240 else { 2241 rval = XFS_ILOG_CORE; 2242 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2243 if (error) 2244 goto done; 2245 if (XFS_IS_CORRUPT(mp, i != 1)) { 2246 xfs_btree_mark_sick(cur); 2247 error = -EFSCORRUPTED; 2248 goto done; 2249 } 2250 if ((error = xfs_btree_delete(cur, &i))) 2251 goto done; 2252 if (XFS_IS_CORRUPT(mp, i != 1)) { 2253 xfs_btree_mark_sick(cur); 2254 error = -EFSCORRUPTED; 2255 goto done; 2256 } 2257 if ((error = xfs_btree_decrement(cur, 0, &i))) 2258 goto done; 2259 if (XFS_IS_CORRUPT(mp, i != 1)) { 2260 xfs_btree_mark_sick(cur); 2261 error = -EFSCORRUPTED; 2262 goto done; 2263 } 2264 error = xfs_bmbt_update(cur, &PREV); 2265 if (error) 2266 goto done; 2267 } 2268 break; 2269 2270 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 2271 /* 2272 * Setting all of a previous oldext extent to newext. 2273 * Neither the left nor right neighbors are contiguous with 2274 * the new one. 2275 */ 2276 PREV.br_state = new->br_state; 2277 xfs_iext_update_extent(ip, state, icur, &PREV); 2278 2279 if (cur == NULL) 2280 rval = XFS_ILOG_DEXT; 2281 else { 2282 rval = 0; 2283 error = xfs_bmbt_lookup_eq(cur, new, &i); 2284 if (error) 2285 goto done; 2286 if (XFS_IS_CORRUPT(mp, i != 1)) { 2287 xfs_btree_mark_sick(cur); 2288 error = -EFSCORRUPTED; 2289 goto done; 2290 } 2291 error = xfs_bmbt_update(cur, &PREV); 2292 if (error) 2293 goto done; 2294 } 2295 break; 2296 2297 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 2298 /* 2299 * Setting the first part of a previous oldext extent to newext. 2300 * The left neighbor is contiguous. 2301 */ 2302 LEFT.br_blockcount += new->br_blockcount; 2303 2304 old = PREV; 2305 PREV.br_startoff += new->br_blockcount; 2306 PREV.br_startblock += new->br_blockcount; 2307 PREV.br_blockcount -= new->br_blockcount; 2308 2309 xfs_iext_update_extent(ip, state, icur, &PREV); 2310 xfs_iext_prev(ifp, icur); 2311 xfs_iext_update_extent(ip, state, icur, &LEFT); 2312 2313 if (cur == NULL) 2314 rval = XFS_ILOG_DEXT; 2315 else { 2316 rval = 0; 2317 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2318 if (error) 2319 goto done; 2320 if (XFS_IS_CORRUPT(mp, i != 1)) { 2321 xfs_btree_mark_sick(cur); 2322 error = -EFSCORRUPTED; 2323 goto done; 2324 } 2325 error = xfs_bmbt_update(cur, &PREV); 2326 if (error) 2327 goto done; 2328 error = xfs_btree_decrement(cur, 0, &i); 2329 if (error) 2330 goto done; 2331 error = xfs_bmbt_update(cur, &LEFT); 2332 if (error) 2333 goto done; 2334 } 2335 break; 2336 2337 case BMAP_LEFT_FILLING: 2338 /* 2339 * Setting the first part of a previous oldext extent to newext. 2340 * The left neighbor is not contiguous. 2341 */ 2342 old = PREV; 2343 PREV.br_startoff += new->br_blockcount; 2344 PREV.br_startblock += new->br_blockcount; 2345 PREV.br_blockcount -= new->br_blockcount; 2346 2347 xfs_iext_update_extent(ip, state, icur, &PREV); 2348 xfs_iext_insert(ip, icur, new, state); 2349 ifp->if_nextents++; 2350 2351 if (cur == NULL) 2352 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2353 else { 2354 rval = XFS_ILOG_CORE; 2355 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2356 if (error) 2357 goto done; 2358 if (XFS_IS_CORRUPT(mp, i != 1)) { 2359 xfs_btree_mark_sick(cur); 2360 error = -EFSCORRUPTED; 2361 goto done; 2362 } 2363 error = xfs_bmbt_update(cur, &PREV); 2364 if (error) 2365 goto done; 2366 cur->bc_rec.b = *new; 2367 if ((error = xfs_btree_insert(cur, &i))) 2368 goto done; 2369 if (XFS_IS_CORRUPT(mp, i != 1)) { 2370 xfs_btree_mark_sick(cur); 2371 error = -EFSCORRUPTED; 2372 goto done; 2373 } 2374 } 2375 break; 2376 2377 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2378 /* 2379 * Setting the last part of a previous oldext extent to newext. 2380 * The right neighbor is contiguous with the new allocation. 2381 */ 2382 old = PREV; 2383 PREV.br_blockcount -= new->br_blockcount; 2384 2385 RIGHT.br_startoff = new->br_startoff; 2386 RIGHT.br_startblock = new->br_startblock; 2387 RIGHT.br_blockcount += new->br_blockcount; 2388 2389 xfs_iext_update_extent(ip, state, icur, &PREV); 2390 xfs_iext_next(ifp, icur); 2391 xfs_iext_update_extent(ip, state, icur, &RIGHT); 2392 2393 if (cur == NULL) 2394 rval = XFS_ILOG_DEXT; 2395 else { 2396 rval = 0; 2397 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2398 if (error) 2399 goto done; 2400 if (XFS_IS_CORRUPT(mp, i != 1)) { 2401 xfs_btree_mark_sick(cur); 2402 error = -EFSCORRUPTED; 2403 goto done; 2404 } 2405 error = xfs_bmbt_update(cur, &PREV); 2406 if (error) 2407 goto done; 2408 error = xfs_btree_increment(cur, 0, &i); 2409 if (error) 2410 goto done; 2411 error = xfs_bmbt_update(cur, &RIGHT); 2412 if (error) 2413 goto done; 2414 } 2415 break; 2416 2417 case BMAP_RIGHT_FILLING: 2418 /* 2419 * Setting the last part of a previous oldext extent to newext. 2420 * The right neighbor is not contiguous. 2421 */ 2422 old = PREV; 2423 PREV.br_blockcount -= new->br_blockcount; 2424 2425 xfs_iext_update_extent(ip, state, icur, &PREV); 2426 xfs_iext_next(ifp, icur); 2427 xfs_iext_insert(ip, icur, new, state); 2428 ifp->if_nextents++; 2429 2430 if (cur == NULL) 2431 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2432 else { 2433 rval = XFS_ILOG_CORE; 2434 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2435 if (error) 2436 goto done; 2437 if (XFS_IS_CORRUPT(mp, i != 1)) { 2438 xfs_btree_mark_sick(cur); 2439 error = -EFSCORRUPTED; 2440 goto done; 2441 } 2442 error = xfs_bmbt_update(cur, &PREV); 2443 if (error) 2444 goto done; 2445 error = xfs_bmbt_lookup_eq(cur, new, &i); 2446 if (error) 2447 goto done; 2448 if (XFS_IS_CORRUPT(mp, i != 0)) { 2449 xfs_btree_mark_sick(cur); 2450 error = -EFSCORRUPTED; 2451 goto done; 2452 } 2453 if ((error = xfs_btree_insert(cur, &i))) 2454 goto done; 2455 if (XFS_IS_CORRUPT(mp, i != 1)) { 2456 xfs_btree_mark_sick(cur); 2457 error = -EFSCORRUPTED; 2458 goto done; 2459 } 2460 } 2461 break; 2462 2463 case 0: 2464 /* 2465 * Setting the middle part of a previous oldext extent to 2466 * newext. Contiguity is impossible here. 2467 * One extent becomes three extents. 2468 */ 2469 old = PREV; 2470 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 2471 2472 r[0] = *new; 2473 r[1].br_startoff = new_endoff; 2474 r[1].br_blockcount = 2475 old.br_startoff + old.br_blockcount - new_endoff; 2476 r[1].br_startblock = new->br_startblock + new->br_blockcount; 2477 r[1].br_state = PREV.br_state; 2478 2479 xfs_iext_update_extent(ip, state, icur, &PREV); 2480 xfs_iext_next(ifp, icur); 2481 xfs_iext_insert(ip, icur, &r[1], state); 2482 xfs_iext_insert(ip, icur, &r[0], state); 2483 ifp->if_nextents += 2; 2484 2485 if (cur == NULL) 2486 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2487 else { 2488 rval = XFS_ILOG_CORE; 2489 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2490 if (error) 2491 goto done; 2492 if (XFS_IS_CORRUPT(mp, i != 1)) { 2493 xfs_btree_mark_sick(cur); 2494 error = -EFSCORRUPTED; 2495 goto done; 2496 } 2497 /* new right extent - oldext */ 2498 error = xfs_bmbt_update(cur, &r[1]); 2499 if (error) 2500 goto done; 2501 /* new left extent - oldext */ 2502 cur->bc_rec.b = PREV; 2503 if ((error = xfs_btree_insert(cur, &i))) 2504 goto done; 2505 if (XFS_IS_CORRUPT(mp, i != 1)) { 2506 xfs_btree_mark_sick(cur); 2507 error = -EFSCORRUPTED; 2508 goto done; 2509 } 2510 /* 2511 * Reset the cursor to the position of the new extent 2512 * we are about to insert as we can't trust it after 2513 * the previous insert. 2514 */ 2515 error = xfs_bmbt_lookup_eq(cur, new, &i); 2516 if (error) 2517 goto done; 2518 if (XFS_IS_CORRUPT(mp, i != 0)) { 2519 xfs_btree_mark_sick(cur); 2520 error = -EFSCORRUPTED; 2521 goto done; 2522 } 2523 /* new middle extent - newext */ 2524 if ((error = xfs_btree_insert(cur, &i))) 2525 goto done; 2526 if (XFS_IS_CORRUPT(mp, i != 1)) { 2527 xfs_btree_mark_sick(cur); 2528 error = -EFSCORRUPTED; 2529 goto done; 2530 } 2531 } 2532 break; 2533 2534 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2535 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2536 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 2537 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2538 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2539 case BMAP_LEFT_CONTIG: 2540 case BMAP_RIGHT_CONTIG: 2541 /* 2542 * These cases are all impossible. 2543 */ 2544 ASSERT(0); 2545 } 2546 2547 /* update reverse mappings */ 2548 xfs_rmap_convert_extent(mp, tp, ip, whichfork, new); 2549 2550 /* convert to a btree if necessary */ 2551 if (xfs_bmap_needs_btree(ip, whichfork)) { 2552 int tmp_logflags; /* partial log flag return val */ 2553 2554 ASSERT(cur == NULL); 2555 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, 2556 &tmp_logflags, whichfork); 2557 *logflagsp |= tmp_logflags; 2558 if (error) 2559 goto done; 2560 } 2561 2562 /* clear out the allocated field, done with it now in any case. */ 2563 if (cur) { 2564 cur->bc_bmap.allocated = 0; 2565 *curp = cur; 2566 } 2567 2568 xfs_bmap_check_leaf_extents(*curp, ip, whichfork); 2569 done: 2570 *logflagsp |= rval; 2571 return error; 2572 #undef LEFT 2573 #undef RIGHT 2574 #undef PREV 2575 } 2576 2577 /* 2578 * Convert a hole to a delayed allocation. 2579 */ 2580 STATIC void 2581 xfs_bmap_add_extent_hole_delay( 2582 xfs_inode_t *ip, /* incore inode pointer */ 2583 int whichfork, 2584 struct xfs_iext_cursor *icur, 2585 xfs_bmbt_irec_t *new) /* new data to add to file extents */ 2586 { 2587 struct xfs_ifork *ifp; /* inode fork pointer */ 2588 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2589 xfs_filblks_t newlen=0; /* new indirect size */ 2590 xfs_filblks_t oldlen=0; /* old indirect size */ 2591 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2592 uint32_t state = xfs_bmap_fork_to_state(whichfork); 2593 xfs_filblks_t temp; /* temp for indirect calculations */ 2594 2595 ifp = xfs_ifork_ptr(ip, whichfork); 2596 ASSERT(isnullstartblock(new->br_startblock)); 2597 2598 /* 2599 * Check and set flags if this segment has a left neighbor 2600 */ 2601 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2602 state |= BMAP_LEFT_VALID; 2603 if (isnullstartblock(left.br_startblock)) 2604 state |= BMAP_LEFT_DELAY; 2605 } 2606 2607 /* 2608 * Check and set flags if the current (right) segment exists. 2609 * If it doesn't exist, we're converting the hole at end-of-file. 2610 */ 2611 if (xfs_iext_get_extent(ifp, icur, &right)) { 2612 state |= BMAP_RIGHT_VALID; 2613 if (isnullstartblock(right.br_startblock)) 2614 state |= BMAP_RIGHT_DELAY; 2615 } 2616 2617 /* 2618 * Set contiguity flags on the left and right neighbors. 2619 * Don't let extents get too large, even if the pieces are contiguous. 2620 */ 2621 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && 2622 left.br_startoff + left.br_blockcount == new->br_startoff && 2623 left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN) 2624 state |= BMAP_LEFT_CONTIG; 2625 2626 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && 2627 new->br_startoff + new->br_blockcount == right.br_startoff && 2628 new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2629 (!(state & BMAP_LEFT_CONTIG) || 2630 (left.br_blockcount + new->br_blockcount + 2631 right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))) 2632 state |= BMAP_RIGHT_CONTIG; 2633 2634 /* 2635 * Switch out based on the contiguity flags. 2636 */ 2637 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2638 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2639 /* 2640 * New allocation is contiguous with delayed allocations 2641 * on the left and on the right. 2642 * Merge all three into a single extent record. 2643 */ 2644 temp = left.br_blockcount + new->br_blockcount + 2645 right.br_blockcount; 2646 2647 oldlen = startblockval(left.br_startblock) + 2648 startblockval(new->br_startblock) + 2649 startblockval(right.br_startblock); 2650 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2651 oldlen); 2652 left.br_startblock = nullstartblock(newlen); 2653 left.br_blockcount = temp; 2654 2655 xfs_iext_remove(ip, icur, state); 2656 xfs_iext_prev(ifp, icur); 2657 xfs_iext_update_extent(ip, state, icur, &left); 2658 break; 2659 2660 case BMAP_LEFT_CONTIG: 2661 /* 2662 * New allocation is contiguous with a delayed allocation 2663 * on the left. 2664 * Merge the new allocation with the left neighbor. 2665 */ 2666 temp = left.br_blockcount + new->br_blockcount; 2667 2668 oldlen = startblockval(left.br_startblock) + 2669 startblockval(new->br_startblock); 2670 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2671 oldlen); 2672 left.br_blockcount = temp; 2673 left.br_startblock = nullstartblock(newlen); 2674 2675 xfs_iext_prev(ifp, icur); 2676 xfs_iext_update_extent(ip, state, icur, &left); 2677 break; 2678 2679 case BMAP_RIGHT_CONTIG: 2680 /* 2681 * New allocation is contiguous with a delayed allocation 2682 * on the right. 2683 * Merge the new allocation with the right neighbor. 2684 */ 2685 temp = new->br_blockcount + right.br_blockcount; 2686 oldlen = startblockval(new->br_startblock) + 2687 startblockval(right.br_startblock); 2688 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2689 oldlen); 2690 right.br_startoff = new->br_startoff; 2691 right.br_startblock = nullstartblock(newlen); 2692 right.br_blockcount = temp; 2693 xfs_iext_update_extent(ip, state, icur, &right); 2694 break; 2695 2696 case 0: 2697 /* 2698 * New allocation is not contiguous with another 2699 * delayed allocation. 2700 * Insert a new entry. 2701 */ 2702 oldlen = newlen = 0; 2703 xfs_iext_insert(ip, icur, new, state); 2704 break; 2705 } 2706 if (oldlen != newlen) { 2707 ASSERT(oldlen > newlen); 2708 xfs_add_fdblocks(ip->i_mount, oldlen - newlen); 2709 2710 /* 2711 * Nothing to do for disk quota accounting here. 2712 */ 2713 xfs_mod_delalloc(ip, 0, (int64_t)newlen - oldlen); 2714 } 2715 } 2716 2717 /* 2718 * Convert a hole to a real allocation. 2719 */ 2720 STATIC int /* error */ 2721 xfs_bmap_add_extent_hole_real( 2722 struct xfs_trans *tp, 2723 struct xfs_inode *ip, 2724 int whichfork, 2725 struct xfs_iext_cursor *icur, 2726 struct xfs_btree_cur **curp, 2727 struct xfs_bmbt_irec *new, 2728 int *logflagsp, 2729 uint32_t flags) 2730 { 2731 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 2732 struct xfs_mount *mp = ip->i_mount; 2733 struct xfs_btree_cur *cur = *curp; 2734 int error; /* error return value */ 2735 int i; /* temp state */ 2736 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2737 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2738 int rval=0; /* return value (logging flags) */ 2739 uint32_t state = xfs_bmap_fork_to_state(whichfork); 2740 struct xfs_bmbt_irec old; 2741 2742 ASSERT(!isnullstartblock(new->br_startblock)); 2743 ASSERT(!cur || !(cur->bc_flags & XFS_BTREE_BMBT_WASDEL)); 2744 2745 XFS_STATS_INC(mp, xs_add_exlist); 2746 2747 /* 2748 * Check and set flags if this segment has a left neighbor. 2749 */ 2750 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2751 state |= BMAP_LEFT_VALID; 2752 if (isnullstartblock(left.br_startblock)) 2753 state |= BMAP_LEFT_DELAY; 2754 } 2755 2756 /* 2757 * Check and set flags if this segment has a current value. 2758 * Not true if we're inserting into the "hole" at eof. 2759 */ 2760 if (xfs_iext_get_extent(ifp, icur, &right)) { 2761 state |= BMAP_RIGHT_VALID; 2762 if (isnullstartblock(right.br_startblock)) 2763 state |= BMAP_RIGHT_DELAY; 2764 } 2765 2766 /* 2767 * We're inserting a real allocation between "left" and "right". 2768 * Set the contiguity flags. Don't let extents get too large. 2769 */ 2770 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2771 left.br_startoff + left.br_blockcount == new->br_startoff && 2772 left.br_startblock + left.br_blockcount == new->br_startblock && 2773 left.br_state == new->br_state && 2774 left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2775 xfs_bmap_same_rtgroup(ip, whichfork, &left, new)) 2776 state |= BMAP_LEFT_CONTIG; 2777 2778 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2779 new->br_startoff + new->br_blockcount == right.br_startoff && 2780 new->br_startblock + new->br_blockcount == right.br_startblock && 2781 new->br_state == right.br_state && 2782 new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN && 2783 (!(state & BMAP_LEFT_CONTIG) || 2784 left.br_blockcount + new->br_blockcount + 2785 right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) && 2786 xfs_bmap_same_rtgroup(ip, whichfork, new, &right)) 2787 state |= BMAP_RIGHT_CONTIG; 2788 2789 error = 0; 2790 /* 2791 * Select which case we're in here, and implement it. 2792 */ 2793 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2794 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2795 /* 2796 * New allocation is contiguous with real allocations on the 2797 * left and on the right. 2798 * Merge all three into a single extent record. 2799 */ 2800 left.br_blockcount += new->br_blockcount + right.br_blockcount; 2801 2802 xfs_iext_remove(ip, icur, state); 2803 xfs_iext_prev(ifp, icur); 2804 xfs_iext_update_extent(ip, state, icur, &left); 2805 ifp->if_nextents--; 2806 2807 if (cur == NULL) { 2808 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2809 } else { 2810 rval = XFS_ILOG_CORE; 2811 error = xfs_bmbt_lookup_eq(cur, &right, &i); 2812 if (error) 2813 goto done; 2814 if (XFS_IS_CORRUPT(mp, i != 1)) { 2815 xfs_btree_mark_sick(cur); 2816 error = -EFSCORRUPTED; 2817 goto done; 2818 } 2819 error = xfs_btree_delete(cur, &i); 2820 if (error) 2821 goto done; 2822 if (XFS_IS_CORRUPT(mp, i != 1)) { 2823 xfs_btree_mark_sick(cur); 2824 error = -EFSCORRUPTED; 2825 goto done; 2826 } 2827 error = xfs_btree_decrement(cur, 0, &i); 2828 if (error) 2829 goto done; 2830 if (XFS_IS_CORRUPT(mp, i != 1)) { 2831 xfs_btree_mark_sick(cur); 2832 error = -EFSCORRUPTED; 2833 goto done; 2834 } 2835 error = xfs_bmbt_update(cur, &left); 2836 if (error) 2837 goto done; 2838 } 2839 break; 2840 2841 case BMAP_LEFT_CONTIG: 2842 /* 2843 * New allocation is contiguous with a real allocation 2844 * on the left. 2845 * Merge the new allocation with the left neighbor. 2846 */ 2847 old = left; 2848 left.br_blockcount += new->br_blockcount; 2849 2850 xfs_iext_prev(ifp, icur); 2851 xfs_iext_update_extent(ip, state, icur, &left); 2852 2853 if (cur == NULL) { 2854 rval = xfs_ilog_fext(whichfork); 2855 } else { 2856 rval = 0; 2857 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2858 if (error) 2859 goto done; 2860 if (XFS_IS_CORRUPT(mp, i != 1)) { 2861 xfs_btree_mark_sick(cur); 2862 error = -EFSCORRUPTED; 2863 goto done; 2864 } 2865 error = xfs_bmbt_update(cur, &left); 2866 if (error) 2867 goto done; 2868 } 2869 break; 2870 2871 case BMAP_RIGHT_CONTIG: 2872 /* 2873 * New allocation is contiguous with a real allocation 2874 * on the right. 2875 * Merge the new allocation with the right neighbor. 2876 */ 2877 old = right; 2878 2879 right.br_startoff = new->br_startoff; 2880 right.br_startblock = new->br_startblock; 2881 right.br_blockcount += new->br_blockcount; 2882 xfs_iext_update_extent(ip, state, icur, &right); 2883 2884 if (cur == NULL) { 2885 rval = xfs_ilog_fext(whichfork); 2886 } else { 2887 rval = 0; 2888 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2889 if (error) 2890 goto done; 2891 if (XFS_IS_CORRUPT(mp, i != 1)) { 2892 xfs_btree_mark_sick(cur); 2893 error = -EFSCORRUPTED; 2894 goto done; 2895 } 2896 error = xfs_bmbt_update(cur, &right); 2897 if (error) 2898 goto done; 2899 } 2900 break; 2901 2902 case 0: 2903 /* 2904 * New allocation is not contiguous with another 2905 * real allocation. 2906 * Insert a new entry. 2907 */ 2908 xfs_iext_insert(ip, icur, new, state); 2909 ifp->if_nextents++; 2910 2911 if (cur == NULL) { 2912 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2913 } else { 2914 rval = XFS_ILOG_CORE; 2915 error = xfs_bmbt_lookup_eq(cur, new, &i); 2916 if (error) 2917 goto done; 2918 if (XFS_IS_CORRUPT(mp, i != 0)) { 2919 xfs_btree_mark_sick(cur); 2920 error = -EFSCORRUPTED; 2921 goto done; 2922 } 2923 error = xfs_btree_insert(cur, &i); 2924 if (error) 2925 goto done; 2926 if (XFS_IS_CORRUPT(mp, i != 1)) { 2927 xfs_btree_mark_sick(cur); 2928 error = -EFSCORRUPTED; 2929 goto done; 2930 } 2931 } 2932 break; 2933 } 2934 2935 /* add reverse mapping unless caller opted out */ 2936 if (!(flags & XFS_BMAPI_NORMAP)) 2937 xfs_rmap_map_extent(tp, ip, whichfork, new); 2938 2939 /* convert to a btree if necessary */ 2940 if (xfs_bmap_needs_btree(ip, whichfork)) { 2941 int tmp_logflags; /* partial log flag return val */ 2942 2943 ASSERT(cur == NULL); 2944 error = xfs_bmap_extents_to_btree(tp, ip, curp, 0, 2945 &tmp_logflags, whichfork); 2946 *logflagsp |= tmp_logflags; 2947 cur = *curp; 2948 if (error) 2949 goto done; 2950 } 2951 2952 /* clear out the allocated field, done with it now in any case. */ 2953 if (cur) 2954 cur->bc_bmap.allocated = 0; 2955 2956 xfs_bmap_check_leaf_extents(cur, ip, whichfork); 2957 done: 2958 *logflagsp |= rval; 2959 return error; 2960 } 2961 2962 /* 2963 * Functions used in the extent read, allocate and remove paths 2964 */ 2965 2966 /* 2967 * Adjust the size of the new extent based on i_extsize and rt extsize. 2968 */ 2969 int 2970 xfs_bmap_extsize_align( 2971 xfs_mount_t *mp, 2972 xfs_bmbt_irec_t *gotp, /* next extent pointer */ 2973 xfs_bmbt_irec_t *prevp, /* previous extent pointer */ 2974 xfs_extlen_t extsz, /* align to this extent size */ 2975 int rt, /* is this a realtime inode? */ 2976 int eof, /* is extent at end-of-file? */ 2977 int delay, /* creating delalloc extent? */ 2978 int convert, /* overwriting unwritten extent? */ 2979 xfs_fileoff_t *offp, /* in/out: aligned offset */ 2980 xfs_extlen_t *lenp) /* in/out: aligned length */ 2981 { 2982 xfs_fileoff_t orig_off; /* original offset */ 2983 xfs_extlen_t orig_alen; /* original length */ 2984 xfs_fileoff_t orig_end; /* original off+len */ 2985 xfs_fileoff_t nexto; /* next file offset */ 2986 xfs_fileoff_t prevo; /* previous file offset */ 2987 xfs_fileoff_t align_off; /* temp for offset */ 2988 xfs_extlen_t align_alen; /* temp for length */ 2989 xfs_extlen_t temp; /* temp for calculations */ 2990 2991 if (convert) 2992 return 0; 2993 2994 orig_off = align_off = *offp; 2995 orig_alen = align_alen = *lenp; 2996 orig_end = orig_off + orig_alen; 2997 2998 /* 2999 * If this request overlaps an existing extent, then don't 3000 * attempt to perform any additional alignment. 3001 */ 3002 if (!delay && !eof && 3003 (orig_off >= gotp->br_startoff) && 3004 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { 3005 return 0; 3006 } 3007 3008 /* 3009 * If the file offset is unaligned vs. the extent size 3010 * we need to align it. This will be possible unless 3011 * the file was previously written with a kernel that didn't 3012 * perform this alignment, or if a truncate shot us in the 3013 * foot. 3014 */ 3015 div_u64_rem(orig_off, extsz, &temp); 3016 if (temp) { 3017 align_alen += temp; 3018 align_off -= temp; 3019 } 3020 3021 /* Same adjustment for the end of the requested area. */ 3022 temp = (align_alen % extsz); 3023 if (temp) 3024 align_alen += extsz - temp; 3025 3026 /* 3027 * For large extent hint sizes, the aligned extent might be larger than 3028 * XFS_BMBT_MAX_EXTLEN. In that case, reduce the size by an extsz so 3029 * that it pulls the length back under XFS_BMBT_MAX_EXTLEN. The outer 3030 * allocation loops handle short allocation just fine, so it is safe to 3031 * do this. We only want to do it when we are forced to, though, because 3032 * it means more allocation operations are required. 3033 */ 3034 while (align_alen > XFS_MAX_BMBT_EXTLEN) 3035 align_alen -= extsz; 3036 ASSERT(align_alen <= XFS_MAX_BMBT_EXTLEN); 3037 3038 /* 3039 * If the previous block overlaps with this proposed allocation 3040 * then move the start forward without adjusting the length. 3041 */ 3042 if (prevp->br_startoff != NULLFILEOFF) { 3043 if (prevp->br_startblock == HOLESTARTBLOCK) 3044 prevo = prevp->br_startoff; 3045 else 3046 prevo = prevp->br_startoff + prevp->br_blockcount; 3047 } else 3048 prevo = 0; 3049 if (align_off != orig_off && align_off < prevo) 3050 align_off = prevo; 3051 /* 3052 * If the next block overlaps with this proposed allocation 3053 * then move the start back without adjusting the length, 3054 * but not before offset 0. 3055 * This may of course make the start overlap previous block, 3056 * and if we hit the offset 0 limit then the next block 3057 * can still overlap too. 3058 */ 3059 if (!eof && gotp->br_startoff != NULLFILEOFF) { 3060 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || 3061 (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) 3062 nexto = gotp->br_startoff + gotp->br_blockcount; 3063 else 3064 nexto = gotp->br_startoff; 3065 } else 3066 nexto = NULLFILEOFF; 3067 if (!eof && 3068 align_off + align_alen != orig_end && 3069 align_off + align_alen > nexto) 3070 align_off = nexto > align_alen ? nexto - align_alen : 0; 3071 /* 3072 * If we're now overlapping the next or previous extent that 3073 * means we can't fit an extsz piece in this hole. Just move 3074 * the start forward to the first valid spot and set 3075 * the length so we hit the end. 3076 */ 3077 if (align_off != orig_off && align_off < prevo) 3078 align_off = prevo; 3079 if (align_off + align_alen != orig_end && 3080 align_off + align_alen > nexto && 3081 nexto != NULLFILEOFF) { 3082 ASSERT(nexto > prevo); 3083 align_alen = nexto - align_off; 3084 } 3085 3086 /* 3087 * If realtime, and the result isn't a multiple of the realtime 3088 * extent size we need to remove blocks until it is. 3089 */ 3090 if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) { 3091 /* 3092 * We're not covering the original request, or 3093 * we won't be able to once we fix the length. 3094 */ 3095 if (orig_off < align_off || 3096 orig_end > align_off + align_alen || 3097 align_alen - temp < orig_alen) 3098 return -EINVAL; 3099 /* 3100 * Try to fix it by moving the start up. 3101 */ 3102 if (align_off + temp <= orig_off) { 3103 align_alen -= temp; 3104 align_off += temp; 3105 } 3106 /* 3107 * Try to fix it by moving the end in. 3108 */ 3109 else if (align_off + align_alen - temp >= orig_end) 3110 align_alen -= temp; 3111 /* 3112 * Set the start to the minimum then trim the length. 3113 */ 3114 else { 3115 align_alen -= orig_off - align_off; 3116 align_off = orig_off; 3117 align_alen -= xfs_extlen_to_rtxmod(mp, align_alen); 3118 } 3119 /* 3120 * Result doesn't cover the request, fail it. 3121 */ 3122 if (orig_off < align_off || orig_end > align_off + align_alen) 3123 return -EINVAL; 3124 } else { 3125 ASSERT(orig_off >= align_off); 3126 /* see XFS_BMBT_MAX_EXTLEN handling above */ 3127 ASSERT(orig_end <= align_off + align_alen || 3128 align_alen + extsz > XFS_MAX_BMBT_EXTLEN); 3129 } 3130 3131 #ifdef DEBUG 3132 if (!eof && gotp->br_startoff != NULLFILEOFF) 3133 ASSERT(align_off + align_alen <= gotp->br_startoff); 3134 if (prevp->br_startoff != NULLFILEOFF) 3135 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); 3136 #endif 3137 3138 *lenp = align_alen; 3139 *offp = align_off; 3140 return 0; 3141 } 3142 3143 static inline bool 3144 xfs_bmap_adjacent_valid( 3145 struct xfs_bmalloca *ap, 3146 xfs_fsblock_t x, 3147 xfs_fsblock_t y) 3148 { 3149 struct xfs_mount *mp = ap->ip->i_mount; 3150 3151 if (XFS_IS_REALTIME_INODE(ap->ip) && 3152 (ap->datatype & XFS_ALLOC_USERDATA)) { 3153 if (!xfs_has_rtgroups(mp)) 3154 return x < mp->m_sb.sb_rblocks; 3155 3156 return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) && 3157 xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount && 3158 xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents; 3159 3160 } 3161 3162 return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && 3163 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && 3164 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks; 3165 } 3166 3167 #define XFS_ALLOC_GAP_UNITS 4 3168 3169 /* returns true if ap->blkno was modified */ 3170 bool 3171 xfs_bmap_adjacent( 3172 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3173 { 3174 xfs_fsblock_t adjust; /* adjustment to block numbers */ 3175 3176 /* 3177 * If allocating at eof, and there's a previous real block, 3178 * try to use its last block as our starting point. 3179 */ 3180 if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && 3181 !isnullstartblock(ap->prev.br_startblock) && 3182 xfs_bmap_adjacent_valid(ap, 3183 ap->prev.br_startblock + ap->prev.br_blockcount, 3184 ap->prev.br_startblock)) { 3185 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; 3186 /* 3187 * Adjust for the gap between prevp and us. 3188 */ 3189 adjust = ap->offset - 3190 (ap->prev.br_startoff + ap->prev.br_blockcount); 3191 if (adjust && xfs_bmap_adjacent_valid(ap, ap->blkno + adjust, 3192 ap->prev.br_startblock)) 3193 ap->blkno += adjust; 3194 return true; 3195 } 3196 /* 3197 * If not at eof, then compare the two neighbor blocks. 3198 * Figure out whether either one gives us a good starting point, 3199 * and pick the better one. 3200 */ 3201 if (!ap->eof) { 3202 xfs_fsblock_t gotbno; /* right side block number */ 3203 xfs_fsblock_t gotdiff=0; /* right side difference */ 3204 xfs_fsblock_t prevbno; /* left side block number */ 3205 xfs_fsblock_t prevdiff=0; /* left side difference */ 3206 3207 /* 3208 * If there's a previous (left) block, select a requested 3209 * start block based on it. 3210 */ 3211 if (ap->prev.br_startoff != NULLFILEOFF && 3212 !isnullstartblock(ap->prev.br_startblock) && 3213 (prevbno = ap->prev.br_startblock + 3214 ap->prev.br_blockcount) && 3215 xfs_bmap_adjacent_valid(ap, prevbno, 3216 ap->prev.br_startblock)) { 3217 /* 3218 * Calculate gap to end of previous block. 3219 */ 3220 adjust = prevdiff = ap->offset - 3221 (ap->prev.br_startoff + 3222 ap->prev.br_blockcount); 3223 /* 3224 * Figure the startblock based on the previous block's 3225 * end and the gap size. 3226 * Heuristic! 3227 * If the gap is large relative to the piece we're 3228 * allocating, or using it gives us an invalid block 3229 * number, then just use the end of the previous block. 3230 */ 3231 if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3232 xfs_bmap_adjacent_valid(ap, prevbno + prevdiff, 3233 ap->prev.br_startblock)) 3234 prevbno += adjust; 3235 else 3236 prevdiff += adjust; 3237 } 3238 /* 3239 * No previous block or can't follow it, just default. 3240 */ 3241 else 3242 prevbno = NULLFSBLOCK; 3243 /* 3244 * If there's a following (right) block, select a requested 3245 * start block based on it. 3246 */ 3247 if (!isnullstartblock(ap->got.br_startblock)) { 3248 /* 3249 * Calculate gap to start of next block. 3250 */ 3251 adjust = gotdiff = ap->got.br_startoff - ap->offset; 3252 /* 3253 * Figure the startblock based on the next block's 3254 * start and the gap size. 3255 */ 3256 gotbno = ap->got.br_startblock; 3257 /* 3258 * Heuristic! 3259 * If the gap is large relative to the piece we're 3260 * allocating, or using it gives us an invalid block 3261 * number, then just use the start of the next block 3262 * offset by our length. 3263 */ 3264 if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3265 xfs_bmap_adjacent_valid(ap, gotbno - gotdiff, 3266 gotbno)) 3267 gotbno -= adjust; 3268 else if (xfs_bmap_adjacent_valid(ap, gotbno - ap->length, 3269 gotbno)) { 3270 gotbno -= ap->length; 3271 gotdiff += adjust - ap->length; 3272 } else 3273 gotdiff += adjust; 3274 } 3275 /* 3276 * No next block, just default. 3277 */ 3278 else 3279 gotbno = NULLFSBLOCK; 3280 /* 3281 * If both valid, pick the better one, else the only good 3282 * one, else ap->blkno is already set (to 0 or the inode block). 3283 */ 3284 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) { 3285 ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; 3286 return true; 3287 } 3288 if (prevbno != NULLFSBLOCK) { 3289 ap->blkno = prevbno; 3290 return true; 3291 } 3292 if (gotbno != NULLFSBLOCK) { 3293 ap->blkno = gotbno; 3294 return true; 3295 } 3296 } 3297 3298 return false; 3299 } 3300 3301 int 3302 xfs_bmap_longest_free_extent( 3303 struct xfs_perag *pag, 3304 struct xfs_trans *tp, 3305 xfs_extlen_t *blen) 3306 { 3307 xfs_extlen_t longest; 3308 int error = 0; 3309 3310 if (!xfs_perag_initialised_agf(pag)) { 3311 error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK, 3312 NULL); 3313 if (error) 3314 return error; 3315 } 3316 3317 longest = xfs_alloc_longest_free_extent(pag, 3318 xfs_alloc_min_freelist(pag_mount(pag), pag), 3319 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); 3320 if (*blen < longest) 3321 *blen = longest; 3322 3323 return 0; 3324 } 3325 3326 static xfs_extlen_t 3327 xfs_bmap_select_minlen( 3328 struct xfs_bmalloca *ap, 3329 struct xfs_alloc_arg *args, 3330 xfs_extlen_t blen) 3331 { 3332 3333 /* 3334 * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is 3335 * possible that there is enough contiguous free space for this request. 3336 */ 3337 if (blen < ap->minlen) 3338 return ap->minlen; 3339 3340 /* 3341 * If the best seen length is less than the request length, 3342 * use the best as the minimum, otherwise we've got the maxlen we 3343 * were asked for. 3344 */ 3345 if (blen < args->maxlen) 3346 return blen; 3347 return args->maxlen; 3348 } 3349 3350 static int 3351 xfs_bmap_btalloc_select_lengths( 3352 struct xfs_bmalloca *ap, 3353 struct xfs_alloc_arg *args, 3354 xfs_extlen_t *blen) 3355 { 3356 struct xfs_mount *mp = args->mp; 3357 struct xfs_perag *pag; 3358 xfs_agnumber_t agno, startag; 3359 int error = 0; 3360 3361 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { 3362 args->total = ap->minlen; 3363 args->minlen = ap->minlen; 3364 return 0; 3365 } 3366 3367 args->total = ap->total; 3368 startag = XFS_FSB_TO_AGNO(mp, ap->blkno); 3369 if (startag == NULLAGNUMBER) 3370 startag = 0; 3371 3372 *blen = 0; 3373 for_each_perag_wrap(mp, startag, agno, pag) { 3374 error = xfs_bmap_longest_free_extent(pag, args->tp, blen); 3375 if (error && error != -EAGAIN) 3376 break; 3377 error = 0; 3378 if (*blen >= args->maxlen) 3379 break; 3380 } 3381 if (pag) 3382 xfs_perag_rele(pag); 3383 3384 args->minlen = xfs_bmap_select_minlen(ap, args, *blen); 3385 return error; 3386 } 3387 3388 /* Update all inode and quota accounting for the allocation we just did. */ 3389 void 3390 xfs_bmap_alloc_account( 3391 struct xfs_bmalloca *ap) 3392 { 3393 bool isrt = XFS_IS_REALTIME_INODE(ap->ip) && 3394 !(ap->flags & XFS_BMAPI_ATTRFORK); 3395 uint fld; 3396 3397 if (ap->flags & XFS_BMAPI_COWFORK) { 3398 /* 3399 * COW fork blocks are in-core only and thus are treated as 3400 * in-core quota reservation (like delalloc blocks) even when 3401 * converted to real blocks. The quota reservation is not 3402 * accounted to disk until blocks are remapped to the data 3403 * fork. So if these blocks were previously delalloc, we 3404 * already have quota reservation and there's nothing to do 3405 * yet. 3406 */ 3407 if (ap->wasdel) { 3408 xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0); 3409 return; 3410 } 3411 3412 /* 3413 * Otherwise, we've allocated blocks in a hole. The transaction 3414 * has acquired in-core quota reservation for this extent. 3415 * Rather than account these as real blocks, however, we reduce 3416 * the transaction quota reservation based on the allocation. 3417 * This essentially transfers the transaction quota reservation 3418 * to that of a delalloc extent. 3419 */ 3420 ap->ip->i_delayed_blks += ap->length; 3421 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ? 3422 XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS, 3423 -(long)ap->length); 3424 return; 3425 } 3426 3427 /* data/attr fork only */ 3428 ap->ip->i_nblocks += ap->length; 3429 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3430 if (ap->wasdel) { 3431 ap->ip->i_delayed_blks -= ap->length; 3432 xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0); 3433 fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT; 3434 } else { 3435 fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; 3436 } 3437 3438 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length); 3439 } 3440 3441 static int 3442 xfs_bmap_compute_alignments( 3443 struct xfs_bmalloca *ap, 3444 struct xfs_alloc_arg *args) 3445 { 3446 struct xfs_mount *mp = args->mp; 3447 xfs_extlen_t align = 0; /* minimum allocation alignment */ 3448 int stripe_align = 0; 3449 3450 /* stripe alignment for allocation is determined by mount parameters */ 3451 if (mp->m_swidth && xfs_has_swalloc(mp)) 3452 stripe_align = mp->m_swidth; 3453 else if (mp->m_dalign) 3454 stripe_align = mp->m_dalign; 3455 3456 if (ap->flags & XFS_BMAPI_COWFORK) 3457 align = xfs_get_cowextsz_hint(ap->ip); 3458 else if (ap->datatype & XFS_ALLOC_USERDATA) 3459 align = xfs_get_extsz_hint(ap->ip); 3460 if (align) { 3461 if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, 3462 ap->eof, 0, ap->conv, &ap->offset, 3463 &ap->length)) 3464 ASSERT(0); 3465 ASSERT(ap->length); 3466 } 3467 3468 /* apply extent size hints if obtained earlier */ 3469 if (align) { 3470 args->prod = align; 3471 div_u64_rem(ap->offset, args->prod, &args->mod); 3472 if (args->mod) 3473 args->mod = args->prod - args->mod; 3474 } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) { 3475 args->prod = 1; 3476 args->mod = 0; 3477 } else { 3478 args->prod = PAGE_SIZE >> mp->m_sb.sb_blocklog; 3479 div_u64_rem(ap->offset, args->prod, &args->mod); 3480 if (args->mod) 3481 args->mod = args->prod - args->mod; 3482 } 3483 3484 return stripe_align; 3485 } 3486 3487 static void 3488 xfs_bmap_process_allocated_extent( 3489 struct xfs_bmalloca *ap, 3490 struct xfs_alloc_arg *args, 3491 xfs_fileoff_t orig_offset, 3492 xfs_extlen_t orig_length) 3493 { 3494 ap->blkno = args->fsbno; 3495 ap->length = args->len; 3496 /* 3497 * If the extent size hint is active, we tried to round the 3498 * caller's allocation request offset down to extsz and the 3499 * length up to another extsz boundary. If we found a free 3500 * extent we mapped it in starting at this new offset. If the 3501 * newly mapped space isn't long enough to cover any of the 3502 * range of offsets that was originally requested, move the 3503 * mapping up so that we can fill as much of the caller's 3504 * original request as possible. Free space is apparently 3505 * very fragmented so we're unlikely to be able to satisfy the 3506 * hints anyway. 3507 */ 3508 if (ap->length <= orig_length) 3509 ap->offset = orig_offset; 3510 else if (ap->offset + ap->length < orig_offset + orig_length) 3511 ap->offset = orig_offset + orig_length - ap->length; 3512 xfs_bmap_alloc_account(ap); 3513 } 3514 3515 static int 3516 xfs_bmap_exact_minlen_extent_alloc( 3517 struct xfs_bmalloca *ap, 3518 struct xfs_alloc_arg *args) 3519 { 3520 if (ap->minlen != 1) { 3521 args->fsbno = NULLFSBLOCK; 3522 return 0; 3523 } 3524 3525 args->alloc_minlen_only = 1; 3526 args->minlen = args->maxlen = ap->minlen; 3527 args->total = ap->total; 3528 3529 /* 3530 * Unlike the longest extent available in an AG, we don't track 3531 * the length of an AG's shortest extent. 3532 * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and 3533 * hence we can afford to start traversing from the 0th AG since 3534 * we need not be concerned about a drop in performance in 3535 * "debug only" code paths. 3536 */ 3537 ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0); 3538 3539 /* 3540 * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG 3541 * iteration and then drops args->total to args->minlen, which might be 3542 * required to find an allocation for the transaction reservation when 3543 * the file system is very full. 3544 */ 3545 return xfs_bmap_btalloc_low_space(ap, args); 3546 } 3547 3548 /* 3549 * If we are not low on available data blocks and we are allocating at 3550 * EOF, optimise allocation for contiguous file extension and/or stripe 3551 * alignment of the new extent. 3552 * 3553 * NOTE: ap->aeof is only set if the allocation length is >= the 3554 * stripe unit and the allocation offset is at the end of file. 3555 */ 3556 static int 3557 xfs_bmap_btalloc_at_eof( 3558 struct xfs_bmalloca *ap, 3559 struct xfs_alloc_arg *args, 3560 xfs_extlen_t blen, 3561 int stripe_align, 3562 bool ag_only) 3563 { 3564 struct xfs_mount *mp = args->mp; 3565 struct xfs_perag *caller_pag = args->pag; 3566 int error; 3567 3568 /* 3569 * If there are already extents in the file, try an exact EOF block 3570 * allocation to extend the file as a contiguous extent. If that fails, 3571 * or it's the first allocation in a file, just try for a stripe aligned 3572 * allocation. 3573 */ 3574 if (ap->offset) { 3575 xfs_extlen_t nextminlen = 0; 3576 3577 /* 3578 * Compute the minlen+alignment for the next case. Set slop so 3579 * that the value of minlen+alignment+slop doesn't go up between 3580 * the calls. 3581 */ 3582 args->alignment = 1; 3583 if (blen > stripe_align && blen <= args->maxlen) 3584 nextminlen = blen - stripe_align; 3585 else 3586 nextminlen = args->minlen; 3587 if (nextminlen + stripe_align > args->minlen + 1) 3588 args->minalignslop = nextminlen + stripe_align - 3589 args->minlen - 1; 3590 else 3591 args->minalignslop = 0; 3592 3593 if (!caller_pag) 3594 args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno)); 3595 error = xfs_alloc_vextent_exact_bno(args, ap->blkno); 3596 if (!caller_pag) { 3597 xfs_perag_put(args->pag); 3598 args->pag = NULL; 3599 } 3600 if (error) 3601 return error; 3602 3603 if (args->fsbno != NULLFSBLOCK) 3604 return 0; 3605 /* 3606 * Exact allocation failed. Reset to try an aligned allocation 3607 * according to the original allocation specification. 3608 */ 3609 args->alignment = stripe_align; 3610 args->minlen = nextminlen; 3611 args->minalignslop = 0; 3612 } else { 3613 /* 3614 * Adjust minlen to try and preserve alignment if we 3615 * can't guarantee an aligned maxlen extent. 3616 */ 3617 args->alignment = stripe_align; 3618 if (blen > args->alignment && 3619 blen <= args->maxlen + args->alignment) 3620 args->minlen = blen - args->alignment; 3621 args->minalignslop = 0; 3622 } 3623 3624 if (ag_only) { 3625 error = xfs_alloc_vextent_near_bno(args, ap->blkno); 3626 } else { 3627 args->pag = NULL; 3628 error = xfs_alloc_vextent_start_ag(args, ap->blkno); 3629 ASSERT(args->pag == NULL); 3630 args->pag = caller_pag; 3631 } 3632 if (error) 3633 return error; 3634 3635 if (args->fsbno != NULLFSBLOCK) 3636 return 0; 3637 3638 /* 3639 * Allocation failed, so turn return the allocation args to their 3640 * original non-aligned state so the caller can proceed on allocation 3641 * failure as if this function was never called. 3642 */ 3643 args->alignment = 1; 3644 return 0; 3645 } 3646 3647 /* 3648 * We have failed multiple allocation attempts so now are in a low space 3649 * allocation situation. Try a locality first full filesystem minimum length 3650 * allocation whilst still maintaining necessary total block reservation 3651 * requirements. 3652 * 3653 * If that fails, we are now critically low on space, so perform a last resort 3654 * allocation attempt: no reserve, no locality, blocking, minimum length, full 3655 * filesystem free space scan. We also indicate to future allocations in this 3656 * transaction that we are critically low on space so they don't waste time on 3657 * allocation modes that are unlikely to succeed. 3658 */ 3659 int 3660 xfs_bmap_btalloc_low_space( 3661 struct xfs_bmalloca *ap, 3662 struct xfs_alloc_arg *args) 3663 { 3664 int error; 3665 3666 if (args->minlen > ap->minlen) { 3667 args->minlen = ap->minlen; 3668 error = xfs_alloc_vextent_start_ag(args, ap->blkno); 3669 if (error || args->fsbno != NULLFSBLOCK) 3670 return error; 3671 } 3672 3673 /* Last ditch attempt before failure is declared. */ 3674 args->total = ap->minlen; 3675 error = xfs_alloc_vextent_first_ag(args, 0); 3676 if (error) 3677 return error; 3678 ap->tp->t_flags |= XFS_TRANS_LOWMODE; 3679 return 0; 3680 } 3681 3682 static int 3683 xfs_bmap_btalloc_filestreams( 3684 struct xfs_bmalloca *ap, 3685 struct xfs_alloc_arg *args, 3686 int stripe_align) 3687 { 3688 xfs_extlen_t blen = 0; 3689 int error = 0; 3690 3691 3692 error = xfs_filestream_select_ag(ap, args, &blen); 3693 if (error) 3694 return error; 3695 ASSERT(args->pag); 3696 3697 /* 3698 * If we are in low space mode, then optimal allocation will fail so 3699 * prepare for minimal allocation and jump to the low space algorithm 3700 * immediately. 3701 */ 3702 if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { 3703 args->minlen = ap->minlen; 3704 ASSERT(args->fsbno == NULLFSBLOCK); 3705 goto out_low_space; 3706 } 3707 3708 args->minlen = xfs_bmap_select_minlen(ap, args, blen); 3709 if (ap->aeof) 3710 error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align, 3711 true); 3712 3713 if (!error && args->fsbno == NULLFSBLOCK) 3714 error = xfs_alloc_vextent_near_bno(args, ap->blkno); 3715 3716 out_low_space: 3717 /* 3718 * We are now done with the perag reference for the filestreams 3719 * association provided by xfs_filestream_select_ag(). Release it now as 3720 * we've either succeeded, had a fatal error or we are out of space and 3721 * need to do a full filesystem scan for free space which will take it's 3722 * own references. 3723 */ 3724 xfs_perag_rele(args->pag); 3725 args->pag = NULL; 3726 if (error || args->fsbno != NULLFSBLOCK) 3727 return error; 3728 3729 return xfs_bmap_btalloc_low_space(ap, args); 3730 } 3731 3732 static int 3733 xfs_bmap_btalloc_best_length( 3734 struct xfs_bmalloca *ap, 3735 struct xfs_alloc_arg *args, 3736 int stripe_align) 3737 { 3738 xfs_extlen_t blen = 0; 3739 int error; 3740 3741 ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); 3742 xfs_bmap_adjacent(ap); 3743 3744 /* 3745 * Search for an allocation group with a single extent large enough for 3746 * the request. If one isn't found, then adjust the minimum allocation 3747 * size to the largest space found. 3748 */ 3749 error = xfs_bmap_btalloc_select_lengths(ap, args, &blen); 3750 if (error) 3751 return error; 3752 3753 /* 3754 * Don't attempt optimal EOF allocation if previous allocations barely 3755 * succeeded due to being near ENOSPC. It is highly unlikely we'll get 3756 * optimal or even aligned allocations in this case, so don't waste time 3757 * trying. 3758 */ 3759 if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) { 3760 error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align, 3761 false); 3762 if (error || args->fsbno != NULLFSBLOCK) 3763 return error; 3764 } 3765 3766 error = xfs_alloc_vextent_start_ag(args, ap->blkno); 3767 if (error || args->fsbno != NULLFSBLOCK) 3768 return error; 3769 3770 return xfs_bmap_btalloc_low_space(ap, args); 3771 } 3772 3773 static int 3774 xfs_bmap_btalloc( 3775 struct xfs_bmalloca *ap) 3776 { 3777 struct xfs_mount *mp = ap->ip->i_mount; 3778 struct xfs_alloc_arg args = { 3779 .tp = ap->tp, 3780 .mp = mp, 3781 .fsbno = NULLFSBLOCK, 3782 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, 3783 .minleft = ap->minleft, 3784 .wasdel = ap->wasdel, 3785 .resv = XFS_AG_RESV_NONE, 3786 .datatype = ap->datatype, 3787 .alignment = 1, 3788 .minalignslop = 0, 3789 }; 3790 xfs_fileoff_t orig_offset; 3791 xfs_extlen_t orig_length; 3792 int error; 3793 int stripe_align; 3794 3795 ASSERT(ap->length); 3796 orig_offset = ap->offset; 3797 orig_length = ap->length; 3798 3799 stripe_align = xfs_bmap_compute_alignments(ap, &args); 3800 3801 /* Trim the allocation back to the maximum an AG can fit. */ 3802 args.maxlen = min(ap->length, mp->m_ag_max_usable); 3803 3804 if (unlikely(XFS_TEST_ERROR(false, mp, 3805 XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) 3806 error = xfs_bmap_exact_minlen_extent_alloc(ap, &args); 3807 else if ((ap->datatype & XFS_ALLOC_USERDATA) && 3808 xfs_inode_is_filestream(ap->ip)) 3809 error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align); 3810 else 3811 error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align); 3812 if (error) 3813 return error; 3814 3815 if (args.fsbno != NULLFSBLOCK) { 3816 xfs_bmap_process_allocated_extent(ap, &args, orig_offset, 3817 orig_length); 3818 } else { 3819 ap->blkno = NULLFSBLOCK; 3820 ap->length = 0; 3821 } 3822 return 0; 3823 } 3824 3825 /* Trim extent to fit a logical block range. */ 3826 void 3827 xfs_trim_extent( 3828 struct xfs_bmbt_irec *irec, 3829 xfs_fileoff_t bno, 3830 xfs_filblks_t len) 3831 { 3832 xfs_fileoff_t distance; 3833 xfs_fileoff_t end = bno + len; 3834 3835 if (irec->br_startoff + irec->br_blockcount <= bno || 3836 irec->br_startoff >= end) { 3837 irec->br_blockcount = 0; 3838 return; 3839 } 3840 3841 if (irec->br_startoff < bno) { 3842 distance = bno - irec->br_startoff; 3843 if (isnullstartblock(irec->br_startblock)) 3844 irec->br_startblock = DELAYSTARTBLOCK; 3845 if (irec->br_startblock != DELAYSTARTBLOCK && 3846 irec->br_startblock != HOLESTARTBLOCK) 3847 irec->br_startblock += distance; 3848 irec->br_startoff += distance; 3849 irec->br_blockcount -= distance; 3850 } 3851 3852 if (end < irec->br_startoff + irec->br_blockcount) { 3853 distance = irec->br_startoff + irec->br_blockcount - end; 3854 irec->br_blockcount -= distance; 3855 } 3856 } 3857 3858 /* 3859 * Trim the returned map to the required bounds 3860 */ 3861 STATIC void 3862 xfs_bmapi_trim_map( 3863 struct xfs_bmbt_irec *mval, 3864 struct xfs_bmbt_irec *got, 3865 xfs_fileoff_t *bno, 3866 xfs_filblks_t len, 3867 xfs_fileoff_t obno, 3868 xfs_fileoff_t end, 3869 int n, 3870 uint32_t flags) 3871 { 3872 if ((flags & XFS_BMAPI_ENTIRE) || 3873 got->br_startoff + got->br_blockcount <= obno) { 3874 *mval = *got; 3875 if (isnullstartblock(got->br_startblock)) 3876 mval->br_startblock = DELAYSTARTBLOCK; 3877 return; 3878 } 3879 3880 if (obno > *bno) 3881 *bno = obno; 3882 ASSERT((*bno >= obno) || (n == 0)); 3883 ASSERT(*bno < end); 3884 mval->br_startoff = *bno; 3885 if (isnullstartblock(got->br_startblock)) 3886 mval->br_startblock = DELAYSTARTBLOCK; 3887 else 3888 mval->br_startblock = got->br_startblock + 3889 (*bno - got->br_startoff); 3890 /* 3891 * Return the minimum of what we got and what we asked for for 3892 * the length. We can use the len variable here because it is 3893 * modified below and we could have been there before coming 3894 * here if the first part of the allocation didn't overlap what 3895 * was asked for. 3896 */ 3897 mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, 3898 got->br_blockcount - (*bno - got->br_startoff)); 3899 mval->br_state = got->br_state; 3900 ASSERT(mval->br_blockcount <= len); 3901 return; 3902 } 3903 3904 /* 3905 * Update and validate the extent map to return 3906 */ 3907 STATIC void 3908 xfs_bmapi_update_map( 3909 struct xfs_bmbt_irec **map, 3910 xfs_fileoff_t *bno, 3911 xfs_filblks_t *len, 3912 xfs_fileoff_t obno, 3913 xfs_fileoff_t end, 3914 int *n, 3915 uint32_t flags) 3916 { 3917 xfs_bmbt_irec_t *mval = *map; 3918 3919 ASSERT((flags & XFS_BMAPI_ENTIRE) || 3920 ((mval->br_startoff + mval->br_blockcount) <= end)); 3921 ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || 3922 (mval->br_startoff < obno)); 3923 3924 *bno = mval->br_startoff + mval->br_blockcount; 3925 *len = end - *bno; 3926 if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { 3927 /* update previous map with new information */ 3928 ASSERT(mval->br_startblock == mval[-1].br_startblock); 3929 ASSERT(mval->br_blockcount > mval[-1].br_blockcount); 3930 ASSERT(mval->br_state == mval[-1].br_state); 3931 mval[-1].br_blockcount = mval->br_blockcount; 3932 mval[-1].br_state = mval->br_state; 3933 } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && 3934 mval[-1].br_startblock != DELAYSTARTBLOCK && 3935 mval[-1].br_startblock != HOLESTARTBLOCK && 3936 mval->br_startblock == mval[-1].br_startblock + 3937 mval[-1].br_blockcount && 3938 mval[-1].br_state == mval->br_state) { 3939 ASSERT(mval->br_startoff == 3940 mval[-1].br_startoff + mval[-1].br_blockcount); 3941 mval[-1].br_blockcount += mval->br_blockcount; 3942 } else if (*n > 0 && 3943 mval->br_startblock == DELAYSTARTBLOCK && 3944 mval[-1].br_startblock == DELAYSTARTBLOCK && 3945 mval->br_startoff == 3946 mval[-1].br_startoff + mval[-1].br_blockcount) { 3947 mval[-1].br_blockcount += mval->br_blockcount; 3948 mval[-1].br_state = mval->br_state; 3949 } else if (!((*n == 0) && 3950 ((mval->br_startoff + mval->br_blockcount) <= 3951 obno))) { 3952 mval++; 3953 (*n)++; 3954 } 3955 *map = mval; 3956 } 3957 3958 /* 3959 * Map file blocks to filesystem blocks without allocation. 3960 */ 3961 int 3962 xfs_bmapi_read( 3963 struct xfs_inode *ip, 3964 xfs_fileoff_t bno, 3965 xfs_filblks_t len, 3966 struct xfs_bmbt_irec *mval, 3967 int *nmap, 3968 uint32_t flags) 3969 { 3970 struct xfs_mount *mp = ip->i_mount; 3971 int whichfork = xfs_bmapi_whichfork(flags); 3972 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 3973 struct xfs_bmbt_irec got; 3974 xfs_fileoff_t obno; 3975 xfs_fileoff_t end; 3976 struct xfs_iext_cursor icur; 3977 int error; 3978 bool eof = false; 3979 int n = 0; 3980 3981 ASSERT(*nmap >= 1); 3982 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE))); 3983 xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); 3984 3985 if (WARN_ON_ONCE(!ifp)) { 3986 xfs_bmap_mark_sick(ip, whichfork); 3987 return -EFSCORRUPTED; 3988 } 3989 3990 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 3991 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 3992 xfs_bmap_mark_sick(ip, whichfork); 3993 return -EFSCORRUPTED; 3994 } 3995 3996 if (xfs_is_shutdown(mp)) 3997 return -EIO; 3998 3999 XFS_STATS_INC(mp, xs_blk_mapr); 4000 4001 error = xfs_iread_extents(NULL, ip, whichfork); 4002 if (error) 4003 return error; 4004 4005 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) 4006 eof = true; 4007 end = bno + len; 4008 obno = bno; 4009 4010 while (bno < end && n < *nmap) { 4011 /* Reading past eof, act as though there's a hole up to end. */ 4012 if (eof) 4013 got.br_startoff = end; 4014 if (got.br_startoff > bno) { 4015 /* Reading in a hole. */ 4016 mval->br_startoff = bno; 4017 mval->br_startblock = HOLESTARTBLOCK; 4018 mval->br_blockcount = 4019 XFS_FILBLKS_MIN(len, got.br_startoff - bno); 4020 mval->br_state = XFS_EXT_NORM; 4021 bno += mval->br_blockcount; 4022 len -= mval->br_blockcount; 4023 mval++; 4024 n++; 4025 continue; 4026 } 4027 4028 /* set up the extent map to return. */ 4029 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); 4030 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 4031 4032 /* If we're done, stop now. */ 4033 if (bno >= end || n >= *nmap) 4034 break; 4035 4036 /* Else go on to the next record. */ 4037 if (!xfs_iext_next_extent(ifp, &icur, &got)) 4038 eof = true; 4039 } 4040 *nmap = n; 4041 return 0; 4042 } 4043 4044 /* 4045 * Add a delayed allocation extent to an inode. Blocks are reserved from the 4046 * global pool and the extent inserted into the inode in-core extent tree. 4047 * 4048 * On entry, got refers to the first extent beyond the offset of the extent to 4049 * allocate or eof is specified if no such extent exists. On return, got refers 4050 * to the extent record that was inserted to the inode fork. 4051 * 4052 * Note that the allocated extent may have been merged with contiguous extents 4053 * during insertion into the inode fork. Thus, got does not reflect the current 4054 * state of the inode fork on return. If necessary, the caller can use lastx to 4055 * look up the updated record in the inode fork. 4056 */ 4057 int 4058 xfs_bmapi_reserve_delalloc( 4059 struct xfs_inode *ip, 4060 int whichfork, 4061 xfs_fileoff_t off, 4062 xfs_filblks_t len, 4063 xfs_filblks_t prealloc, 4064 struct xfs_bmbt_irec *got, 4065 struct xfs_iext_cursor *icur, 4066 int eof) 4067 { 4068 struct xfs_mount *mp = ip->i_mount; 4069 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4070 xfs_extlen_t alen; 4071 xfs_extlen_t indlen; 4072 uint64_t fdblocks; 4073 int error; 4074 xfs_fileoff_t aoff; 4075 bool use_cowextszhint = 4076 whichfork == XFS_COW_FORK && !prealloc; 4077 4078 retry: 4079 /* 4080 * Cap the alloc length. Keep track of prealloc so we know whether to 4081 * tag the inode before we return. 4082 */ 4083 aoff = off; 4084 alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN); 4085 if (!eof) 4086 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); 4087 if (prealloc && alen >= len) 4088 prealloc = alen - len; 4089 4090 /* 4091 * If we're targetting the COW fork but aren't creating a speculative 4092 * posteof preallocation, try to expand the reservation to align with 4093 * the COW extent size hint if there's sufficient free space. 4094 * 4095 * Unlike the data fork, the CoW cancellation functions will free all 4096 * the reservations at inactivation, so we don't require that every 4097 * delalloc reservation have a dirty pagecache. 4098 */ 4099 if (use_cowextszhint) { 4100 struct xfs_bmbt_irec prev; 4101 xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); 4102 4103 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 4104 prev.br_startoff = NULLFILEOFF; 4105 4106 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 4107 1, 0, &aoff, &alen); 4108 ASSERT(!error); 4109 } 4110 4111 /* 4112 * Make a transaction-less quota reservation for delayed allocation 4113 * blocks. This number gets adjusted later. We return if we haven't 4114 * allocated blocks already inside this loop. 4115 */ 4116 error = xfs_quota_reserve_blkres(ip, alen); 4117 if (error) 4118 goto out; 4119 4120 /* 4121 * Split changing sb for alen and indlen since they could be coming 4122 * from different places. 4123 */ 4124 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); 4125 ASSERT(indlen > 0); 4126 4127 fdblocks = indlen; 4128 if (XFS_IS_REALTIME_INODE(ip)) { 4129 error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen)); 4130 if (error) 4131 goto out_unreserve_quota; 4132 } else { 4133 fdblocks += alen; 4134 } 4135 4136 error = xfs_dec_fdblocks(mp, fdblocks, false); 4137 if (error) 4138 goto out_unreserve_frextents; 4139 4140 ip->i_delayed_blks += alen; 4141 xfs_mod_delalloc(ip, alen, indlen); 4142 4143 got->br_startoff = aoff; 4144 got->br_startblock = nullstartblock(indlen); 4145 got->br_blockcount = alen; 4146 got->br_state = XFS_EXT_NORM; 4147 4148 xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got); 4149 4150 /* 4151 * Tag the inode if blocks were preallocated. Note that COW fork 4152 * preallocation can occur at the start or end of the extent, even when 4153 * prealloc == 0, so we must also check the aligned offset and length. 4154 */ 4155 if (whichfork == XFS_DATA_FORK && prealloc) 4156 xfs_inode_set_eofblocks_tag(ip); 4157 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) 4158 xfs_inode_set_cowblocks_tag(ip); 4159 4160 return 0; 4161 4162 out_unreserve_frextents: 4163 if (XFS_IS_REALTIME_INODE(ip)) 4164 xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, alen)); 4165 out_unreserve_quota: 4166 if (XFS_IS_QUOTA_ON(mp)) 4167 xfs_quota_unreserve_blkres(ip, alen); 4168 out: 4169 if (error == -ENOSPC || error == -EDQUOT) { 4170 trace_xfs_delalloc_enospc(ip, off, len); 4171 4172 if (prealloc || use_cowextszhint) { 4173 /* retry without any preallocation */ 4174 use_cowextszhint = false; 4175 prealloc = 0; 4176 goto retry; 4177 } 4178 } 4179 return error; 4180 } 4181 4182 static int 4183 xfs_bmapi_allocate( 4184 struct xfs_bmalloca *bma) 4185 { 4186 struct xfs_mount *mp = bma->ip->i_mount; 4187 int whichfork = xfs_bmapi_whichfork(bma->flags); 4188 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 4189 int error; 4190 4191 ASSERT(bma->length > 0); 4192 ASSERT(bma->length <= XFS_MAX_BMBT_EXTLEN); 4193 4194 if (bma->flags & XFS_BMAPI_CONTIG) 4195 bma->minlen = bma->length; 4196 else 4197 bma->minlen = 1; 4198 4199 if (!(bma->flags & XFS_BMAPI_METADATA)) { 4200 /* 4201 * For the data and COW fork, the first data in the file is 4202 * treated differently to all other allocations. For the 4203 * attribute fork, we only need to ensure the allocated range 4204 * is not on the busy list. 4205 */ 4206 bma->datatype = XFS_ALLOC_NOBUSY; 4207 if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { 4208 bma->datatype |= XFS_ALLOC_USERDATA; 4209 if (bma->offset == 0) 4210 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; 4211 4212 if (mp->m_dalign && bma->length >= mp->m_dalign) { 4213 error = xfs_bmap_isaeof(bma, whichfork); 4214 if (error) 4215 return error; 4216 } 4217 } 4218 } 4219 4220 if ((bma->datatype & XFS_ALLOC_USERDATA) && 4221 XFS_IS_REALTIME_INODE(bma->ip)) 4222 error = xfs_bmap_rtalloc(bma); 4223 else 4224 error = xfs_bmap_btalloc(bma); 4225 if (error) 4226 return error; 4227 if (bma->blkno == NULLFSBLOCK) 4228 return -ENOSPC; 4229 4230 if (WARN_ON_ONCE(!xfs_valid_startblock(bma->ip, bma->blkno))) { 4231 xfs_bmap_mark_sick(bma->ip, whichfork); 4232 return -EFSCORRUPTED; 4233 } 4234 4235 if (bma->flags & XFS_BMAPI_ZERO) { 4236 error = xfs_zero_extent(bma->ip, bma->blkno, bma->length); 4237 if (error) 4238 return error; 4239 } 4240 4241 if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) 4242 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); 4243 /* 4244 * Bump the number of extents we've allocated 4245 * in this call. 4246 */ 4247 bma->nallocs++; 4248 4249 if (bma->cur && bma->wasdel) 4250 bma->cur->bc_flags |= XFS_BTREE_BMBT_WASDEL; 4251 4252 bma->got.br_startoff = bma->offset; 4253 bma->got.br_startblock = bma->blkno; 4254 bma->got.br_blockcount = bma->length; 4255 bma->got.br_state = XFS_EXT_NORM; 4256 4257 if (bma->flags & XFS_BMAPI_PREALLOC) 4258 bma->got.br_state = XFS_EXT_UNWRITTEN; 4259 4260 if (bma->wasdel) 4261 error = xfs_bmap_add_extent_delay_real(bma, whichfork); 4262 else 4263 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, 4264 whichfork, &bma->icur, &bma->cur, &bma->got, 4265 &bma->logflags, bma->flags); 4266 if (error) 4267 return error; 4268 4269 /* 4270 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real 4271 * or xfs_bmap_add_extent_hole_real might have merged it into one of 4272 * the neighbouring ones. 4273 */ 4274 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4275 4276 ASSERT(bma->got.br_startoff <= bma->offset); 4277 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= 4278 bma->offset + bma->length); 4279 ASSERT(bma->got.br_state == XFS_EXT_NORM || 4280 bma->got.br_state == XFS_EXT_UNWRITTEN); 4281 return 0; 4282 } 4283 4284 STATIC int 4285 xfs_bmapi_convert_unwritten( 4286 struct xfs_bmalloca *bma, 4287 struct xfs_bmbt_irec *mval, 4288 xfs_filblks_t len, 4289 uint32_t flags) 4290 { 4291 int whichfork = xfs_bmapi_whichfork(flags); 4292 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 4293 int tmp_logflags = 0; 4294 int error; 4295 4296 /* check if we need to do unwritten->real conversion */ 4297 if (mval->br_state == XFS_EXT_UNWRITTEN && 4298 (flags & XFS_BMAPI_PREALLOC)) 4299 return 0; 4300 4301 /* check if we need to do real->unwritten conversion */ 4302 if (mval->br_state == XFS_EXT_NORM && 4303 (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != 4304 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) 4305 return 0; 4306 4307 /* 4308 * Modify (by adding) the state flag, if writing. 4309 */ 4310 ASSERT(mval->br_blockcount <= len); 4311 if (ifp->if_format == XFS_DINODE_FMT_BTREE && !bma->cur) { 4312 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, 4313 bma->ip, whichfork); 4314 } 4315 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4316 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4317 4318 /* 4319 * Before insertion into the bmbt, zero the range being converted 4320 * if required. 4321 */ 4322 if (flags & XFS_BMAPI_ZERO) { 4323 error = xfs_zero_extent(bma->ip, mval->br_startblock, 4324 mval->br_blockcount); 4325 if (error) 4326 return error; 4327 } 4328 4329 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, 4330 &bma->icur, &bma->cur, mval, &tmp_logflags); 4331 /* 4332 * Log the inode core unconditionally in the unwritten extent conversion 4333 * path because the conversion might not have done so (e.g., if the 4334 * extent count hasn't changed). We need to make sure the inode is dirty 4335 * in the transaction for the sake of fsync(), even if nothing has 4336 * changed, because fsync() will not force the log for this transaction 4337 * unless it sees the inode pinned. 4338 * 4339 * Note: If we're only converting cow fork extents, there aren't 4340 * any on-disk updates to make, so we don't need to log anything. 4341 */ 4342 if (whichfork != XFS_COW_FORK) 4343 bma->logflags |= tmp_logflags | XFS_ILOG_CORE; 4344 if (error) 4345 return error; 4346 4347 /* 4348 * Update our extent pointer, given that 4349 * xfs_bmap_add_extent_unwritten_real might have merged it into one 4350 * of the neighbouring ones. 4351 */ 4352 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4353 4354 /* 4355 * We may have combined previously unwritten space with written space, 4356 * so generate another request. 4357 */ 4358 if (mval->br_blockcount < len) 4359 return -EAGAIN; 4360 return 0; 4361 } 4362 4363 xfs_extlen_t 4364 xfs_bmapi_minleft( 4365 struct xfs_trans *tp, 4366 struct xfs_inode *ip, 4367 int fork) 4368 { 4369 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, fork); 4370 4371 if (tp && tp->t_highest_agno != NULLAGNUMBER) 4372 return 0; 4373 if (ifp->if_format != XFS_DINODE_FMT_BTREE) 4374 return 1; 4375 return be16_to_cpu(ifp->if_broot->bb_level) + 1; 4376 } 4377 4378 /* 4379 * Log whatever the flags say, even if error. Otherwise we might miss detecting 4380 * a case where the data is changed, there's an error, and it's not logged so we 4381 * don't shutdown when we should. Don't bother logging extents/btree changes if 4382 * we converted to the other format. 4383 */ 4384 static void 4385 xfs_bmapi_finish( 4386 struct xfs_bmalloca *bma, 4387 int whichfork, 4388 int error) 4389 { 4390 struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork); 4391 4392 if ((bma->logflags & xfs_ilog_fext(whichfork)) && 4393 ifp->if_format != XFS_DINODE_FMT_EXTENTS) 4394 bma->logflags &= ~xfs_ilog_fext(whichfork); 4395 else if ((bma->logflags & xfs_ilog_fbroot(whichfork)) && 4396 ifp->if_format != XFS_DINODE_FMT_BTREE) 4397 bma->logflags &= ~xfs_ilog_fbroot(whichfork); 4398 4399 if (bma->logflags) 4400 xfs_trans_log_inode(bma->tp, bma->ip, bma->logflags); 4401 if (bma->cur) 4402 xfs_btree_del_cursor(bma->cur, error); 4403 } 4404 4405 /* 4406 * Map file blocks to filesystem blocks, and allocate blocks or convert the 4407 * extent state if necessary. Details behaviour is controlled by the flags 4408 * parameter. Only allocates blocks from a single allocation group, to avoid 4409 * locking problems. 4410 * 4411 * Returns 0 on success and places the extent mappings in mval. nmaps is used 4412 * as an input/output parameter where the caller specifies the maximum number 4413 * of mappings that may be returned and xfs_bmapi_write passes back the number 4414 * of mappings (including existing mappings) it found. 4415 * 4416 * Returns a negative error code on failure, including -ENOSPC when it could not 4417 * allocate any blocks and -ENOSR when it did allocate blocks to convert a 4418 * delalloc range, but those blocks were before the passed in range. 4419 */ 4420 int 4421 xfs_bmapi_write( 4422 struct xfs_trans *tp, /* transaction pointer */ 4423 struct xfs_inode *ip, /* incore inode */ 4424 xfs_fileoff_t bno, /* starting file offs. mapped */ 4425 xfs_filblks_t len, /* length to map in file */ 4426 uint32_t flags, /* XFS_BMAPI_... */ 4427 xfs_extlen_t total, /* total blocks needed */ 4428 struct xfs_bmbt_irec *mval, /* output: map values */ 4429 int *nmap) /* i/o: mval size/count */ 4430 { 4431 struct xfs_bmalloca bma = { 4432 .tp = tp, 4433 .ip = ip, 4434 .total = total, 4435 }; 4436 struct xfs_mount *mp = ip->i_mount; 4437 int whichfork = xfs_bmapi_whichfork(flags); 4438 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4439 xfs_fileoff_t end; /* end of mapped file region */ 4440 bool eof = false; /* after the end of extents */ 4441 int error; /* error return */ 4442 int n; /* current extent index */ 4443 xfs_fileoff_t obno; /* old block number (offset) */ 4444 4445 #ifdef DEBUG 4446 xfs_fileoff_t orig_bno; /* original block number value */ 4447 int orig_flags; /* original flags arg value */ 4448 xfs_filblks_t orig_len; /* original value of len arg */ 4449 struct xfs_bmbt_irec *orig_mval; /* original value of mval */ 4450 int orig_nmap; /* original value of *nmap */ 4451 4452 orig_bno = bno; 4453 orig_len = len; 4454 orig_flags = flags; 4455 orig_mval = mval; 4456 orig_nmap = *nmap; 4457 #endif 4458 4459 ASSERT(*nmap >= 1); 4460 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); 4461 ASSERT(tp != NULL); 4462 ASSERT(len > 0); 4463 ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL); 4464 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 4465 ASSERT(!(flags & XFS_BMAPI_REMAP)); 4466 4467 /* zeroing is for currently only for data extents, not metadata */ 4468 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != 4469 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); 4470 /* 4471 * we can allocate unwritten extents or pre-zero allocated blocks, 4472 * but it makes no sense to do both at once. This would result in 4473 * zeroing the unwritten extent twice, but it still being an 4474 * unwritten extent.... 4475 */ 4476 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != 4477 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); 4478 4479 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 4480 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 4481 xfs_bmap_mark_sick(ip, whichfork); 4482 return -EFSCORRUPTED; 4483 } 4484 4485 if (xfs_is_shutdown(mp)) 4486 return -EIO; 4487 4488 XFS_STATS_INC(mp, xs_blk_mapw); 4489 4490 error = xfs_iread_extents(tp, ip, whichfork); 4491 if (error) 4492 goto error0; 4493 4494 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got)) 4495 eof = true; 4496 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4497 bma.prev.br_startoff = NULLFILEOFF; 4498 bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); 4499 4500 n = 0; 4501 end = bno + len; 4502 obno = bno; 4503 while (bno < end && n < *nmap) { 4504 bool need_alloc = false, wasdelay = false; 4505 4506 /* in hole or beyond EOF? */ 4507 if (eof || bma.got.br_startoff > bno) { 4508 /* 4509 * CoW fork conversions should /never/ hit EOF or 4510 * holes. There should always be something for us 4511 * to work on. 4512 */ 4513 ASSERT(!((flags & XFS_BMAPI_CONVERT) && 4514 (flags & XFS_BMAPI_COWFORK))); 4515 4516 need_alloc = true; 4517 } else if (isnullstartblock(bma.got.br_startblock)) { 4518 wasdelay = true; 4519 } 4520 4521 /* 4522 * First, deal with the hole before the allocated space 4523 * that we found, if any. 4524 */ 4525 if (need_alloc || wasdelay) { 4526 bma.eof = eof; 4527 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4528 bma.wasdel = wasdelay; 4529 bma.offset = bno; 4530 bma.flags = flags; 4531 4532 /* 4533 * There's a 32/64 bit type mismatch between the 4534 * allocation length request (which can be 64 bits in 4535 * length) and the bma length request, which is 4536 * xfs_extlen_t and therefore 32 bits. Hence we have to 4537 * be careful and do the min() using the larger type to 4538 * avoid overflows. 4539 */ 4540 bma.length = XFS_FILBLKS_MIN(len, XFS_MAX_BMBT_EXTLEN); 4541 4542 if (wasdelay) { 4543 bma.length = XFS_FILBLKS_MIN(bma.length, 4544 bma.got.br_blockcount - 4545 (bno - bma.got.br_startoff)); 4546 } else { 4547 if (!eof) 4548 bma.length = XFS_FILBLKS_MIN(bma.length, 4549 bma.got.br_startoff - bno); 4550 } 4551 4552 ASSERT(bma.length > 0); 4553 error = xfs_bmapi_allocate(&bma); 4554 if (error) { 4555 /* 4556 * If we already allocated space in a previous 4557 * iteration return what we go so far when 4558 * running out of space. 4559 */ 4560 if (error == -ENOSPC && bma.nallocs) 4561 break; 4562 goto error0; 4563 } 4564 4565 /* 4566 * If this is a CoW allocation, record the data in 4567 * the refcount btree for orphan recovery. 4568 */ 4569 if (whichfork == XFS_COW_FORK) 4570 xfs_refcount_alloc_cow_extent(tp, bma.blkno, 4571 bma.length); 4572 } 4573 4574 /* Deal with the allocated space we found. */ 4575 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, 4576 end, n, flags); 4577 4578 /* Execute unwritten extent conversion if necessary */ 4579 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); 4580 if (error == -EAGAIN) 4581 continue; 4582 if (error) 4583 goto error0; 4584 4585 /* update the extent map to return */ 4586 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 4587 4588 /* 4589 * If we're done, stop now. Stop when we've allocated 4590 * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise 4591 * the transaction may get too big. 4592 */ 4593 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) 4594 break; 4595 4596 /* Else go on to the next record. */ 4597 bma.prev = bma.got; 4598 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) 4599 eof = true; 4600 } 4601 4602 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, 4603 whichfork); 4604 if (error) 4605 goto error0; 4606 4607 ASSERT(ifp->if_format != XFS_DINODE_FMT_BTREE || 4608 ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork)); 4609 xfs_bmapi_finish(&bma, whichfork, 0); 4610 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, 4611 orig_nmap, n); 4612 4613 /* 4614 * When converting delayed allocations, xfs_bmapi_allocate ignores 4615 * the passed in bno and always converts from the start of the found 4616 * delalloc extent. 4617 * 4618 * To avoid a successful return with *nmap set to 0, return the magic 4619 * -ENOSR error code for this particular case so that the caller can 4620 * handle it. 4621 */ 4622 if (!n) { 4623 ASSERT(bma.nallocs >= *nmap); 4624 return -ENOSR; 4625 } 4626 *nmap = n; 4627 return 0; 4628 error0: 4629 xfs_bmapi_finish(&bma, whichfork, error); 4630 return error; 4631 } 4632 4633 /* 4634 * Convert an existing delalloc extent to real blocks based on file offset. This 4635 * attempts to allocate the entire delalloc extent and may require multiple 4636 * invocations to allocate the target offset if a large enough physical extent 4637 * is not available. 4638 */ 4639 static int 4640 xfs_bmapi_convert_one_delalloc( 4641 struct xfs_inode *ip, 4642 int whichfork, 4643 xfs_off_t offset, 4644 struct iomap *iomap, 4645 unsigned int *seq) 4646 { 4647 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4648 struct xfs_mount *mp = ip->i_mount; 4649 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 4650 struct xfs_bmalloca bma = { NULL }; 4651 uint16_t flags = 0; 4652 struct xfs_trans *tp; 4653 int error; 4654 4655 if (whichfork == XFS_COW_FORK) 4656 flags |= IOMAP_F_SHARED; 4657 4658 /* 4659 * Space for the extent and indirect blocks was reserved when the 4660 * delalloc extent was created so there's no need to do so here. 4661 */ 4662 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 4663 XFS_TRANS_RESERVE, &tp); 4664 if (error) 4665 return error; 4666 4667 xfs_ilock(ip, XFS_ILOCK_EXCL); 4668 xfs_trans_ijoin(tp, ip, 0); 4669 4670 error = xfs_iext_count_extend(tp, ip, whichfork, 4671 XFS_IEXT_ADD_NOSPLIT_CNT); 4672 if (error) 4673 goto out_trans_cancel; 4674 4675 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &bma.icur, &bma.got) || 4676 bma.got.br_startoff > offset_fsb) { 4677 /* 4678 * No extent found in the range we are trying to convert. This 4679 * should only happen for the COW fork, where another thread 4680 * might have moved the extent to the data fork in the meantime. 4681 */ 4682 WARN_ON_ONCE(whichfork != XFS_COW_FORK); 4683 error = -EAGAIN; 4684 goto out_trans_cancel; 4685 } 4686 4687 /* 4688 * If we find a real extent here we raced with another thread converting 4689 * the extent. Just return the real extent at this offset. 4690 */ 4691 if (!isnullstartblock(bma.got.br_startblock)) { 4692 xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, 4693 xfs_iomap_inode_sequence(ip, flags)); 4694 if (seq) 4695 *seq = READ_ONCE(ifp->if_seq); 4696 goto out_trans_cancel; 4697 } 4698 4699 bma.tp = tp; 4700 bma.ip = ip; 4701 bma.wasdel = true; 4702 bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); 4703 4704 /* 4705 * Always allocate convert from the start of the delalloc extent even if 4706 * that is outside the passed in range to create large contiguous 4707 * extents on disk. 4708 */ 4709 bma.offset = bma.got.br_startoff; 4710 bma.length = bma.got.br_blockcount; 4711 4712 /* 4713 * When we're converting the delalloc reservations backing dirty pages 4714 * in the page cache, we must be careful about how we create the new 4715 * extents: 4716 * 4717 * New CoW fork extents are created unwritten, turned into real extents 4718 * when we're about to write the data to disk, and mapped into the data 4719 * fork after the write finishes. End of story. 4720 * 4721 * New data fork extents must be mapped in as unwritten and converted 4722 * to real extents after the write succeeds to avoid exposing stale 4723 * disk contents if we crash. 4724 */ 4725 bma.flags = XFS_BMAPI_PREALLOC; 4726 if (whichfork == XFS_COW_FORK) 4727 bma.flags |= XFS_BMAPI_COWFORK; 4728 4729 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4730 bma.prev.br_startoff = NULLFILEOFF; 4731 4732 error = xfs_bmapi_allocate(&bma); 4733 if (error) 4734 goto out_finish; 4735 4736 XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); 4737 XFS_STATS_INC(mp, xs_xstrat_quick); 4738 4739 ASSERT(!isnullstartblock(bma.got.br_startblock)); 4740 xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, 4741 xfs_iomap_inode_sequence(ip, flags)); 4742 if (seq) 4743 *seq = READ_ONCE(ifp->if_seq); 4744 4745 if (whichfork == XFS_COW_FORK) 4746 xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length); 4747 4748 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags, 4749 whichfork); 4750 if (error) 4751 goto out_finish; 4752 4753 xfs_bmapi_finish(&bma, whichfork, 0); 4754 error = xfs_trans_commit(tp); 4755 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4756 return error; 4757 4758 out_finish: 4759 xfs_bmapi_finish(&bma, whichfork, error); 4760 out_trans_cancel: 4761 xfs_trans_cancel(tp); 4762 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4763 return error; 4764 } 4765 4766 /* 4767 * Pass in a dellalloc extent and convert it to real extents, return the real 4768 * extent that maps offset_fsb in iomap. 4769 */ 4770 int 4771 xfs_bmapi_convert_delalloc( 4772 struct xfs_inode *ip, 4773 int whichfork, 4774 loff_t offset, 4775 struct iomap *iomap, 4776 unsigned int *seq) 4777 { 4778 int error; 4779 4780 /* 4781 * Attempt to allocate whatever delalloc extent currently backs offset 4782 * and put the result into iomap. Allocate in a loop because it may 4783 * take several attempts to allocate real blocks for a contiguous 4784 * delalloc extent if free space is sufficiently fragmented. 4785 */ 4786 do { 4787 error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset, 4788 iomap, seq); 4789 if (error) 4790 return error; 4791 } while (iomap->offset + iomap->length <= offset); 4792 4793 return 0; 4794 } 4795 4796 int 4797 xfs_bmapi_remap( 4798 struct xfs_trans *tp, 4799 struct xfs_inode *ip, 4800 xfs_fileoff_t bno, 4801 xfs_filblks_t len, 4802 xfs_fsblock_t startblock, 4803 uint32_t flags) 4804 { 4805 struct xfs_mount *mp = ip->i_mount; 4806 struct xfs_ifork *ifp; 4807 struct xfs_btree_cur *cur = NULL; 4808 struct xfs_bmbt_irec got; 4809 struct xfs_iext_cursor icur; 4810 int whichfork = xfs_bmapi_whichfork(flags); 4811 int logflags = 0, error; 4812 4813 ifp = xfs_ifork_ptr(ip, whichfork); 4814 ASSERT(len > 0); 4815 ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN); 4816 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 4817 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC | 4818 XFS_BMAPI_NORMAP))); 4819 ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != 4820 (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); 4821 4822 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 4823 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 4824 xfs_bmap_mark_sick(ip, whichfork); 4825 return -EFSCORRUPTED; 4826 } 4827 4828 if (xfs_is_shutdown(mp)) 4829 return -EIO; 4830 4831 error = xfs_iread_extents(tp, ip, whichfork); 4832 if (error) 4833 return error; 4834 4835 if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 4836 /* make sure we only reflink into a hole. */ 4837 ASSERT(got.br_startoff > bno); 4838 ASSERT(got.br_startoff - bno >= len); 4839 } 4840 4841 ip->i_nblocks += len; 4842 ip->i_delayed_blks -= len; /* see xfs_bmap_defer_add */ 4843 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4844 4845 if (ifp->if_format == XFS_DINODE_FMT_BTREE) 4846 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 4847 4848 got.br_startoff = bno; 4849 got.br_startblock = startblock; 4850 got.br_blockcount = len; 4851 if (flags & XFS_BMAPI_PREALLOC) 4852 got.br_state = XFS_EXT_UNWRITTEN; 4853 else 4854 got.br_state = XFS_EXT_NORM; 4855 4856 error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur, 4857 &cur, &got, &logflags, flags); 4858 if (error) 4859 goto error0; 4860 4861 error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, whichfork); 4862 4863 error0: 4864 if (ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS) 4865 logflags &= ~XFS_ILOG_DEXT; 4866 else if (ip->i_df.if_format != XFS_DINODE_FMT_BTREE) 4867 logflags &= ~XFS_ILOG_DBROOT; 4868 4869 if (logflags) 4870 xfs_trans_log_inode(tp, ip, logflags); 4871 if (cur) 4872 xfs_btree_del_cursor(cur, error); 4873 return error; 4874 } 4875 4876 /* 4877 * When a delalloc extent is split (e.g., due to a hole punch), the original 4878 * indlen reservation must be shared across the two new extents that are left 4879 * behind. 4880 * 4881 * Given the original reservation and the worst case indlen for the two new 4882 * extents (as calculated by xfs_bmap_worst_indlen()), split the original 4883 * reservation fairly across the two new extents. If necessary, steal available 4884 * blocks from a deleted extent to make up a reservation deficiency (e.g., if 4885 * ores == 1). The number of stolen blocks is returned. The availability and 4886 * subsequent accounting of stolen blocks is the responsibility of the caller. 4887 */ 4888 static void 4889 xfs_bmap_split_indlen( 4890 xfs_filblks_t ores, /* original res. */ 4891 xfs_filblks_t *indlen1, /* ext1 worst indlen */ 4892 xfs_filblks_t *indlen2) /* ext2 worst indlen */ 4893 { 4894 xfs_filblks_t len1 = *indlen1; 4895 xfs_filblks_t len2 = *indlen2; 4896 xfs_filblks_t nres = len1 + len2; /* new total res. */ 4897 xfs_filblks_t resfactor; 4898 4899 /* 4900 * We can't meet the total required reservation for the two extents. 4901 * Calculate the percent of the overall shortage between both extents 4902 * and apply this percentage to each of the requested indlen values. 4903 * This distributes the shortage fairly and reduces the chances that one 4904 * of the two extents is left with nothing when extents are repeatedly 4905 * split. 4906 */ 4907 resfactor = (ores * 100); 4908 do_div(resfactor, nres); 4909 len1 *= resfactor; 4910 do_div(len1, 100); 4911 len2 *= resfactor; 4912 do_div(len2, 100); 4913 ASSERT(len1 + len2 <= ores); 4914 ASSERT(len1 < *indlen1 && len2 < *indlen2); 4915 4916 /* 4917 * Hand out the remainder to each extent. If one of the two reservations 4918 * is zero, we want to make sure that one gets a block first. The loop 4919 * below starts with len1, so hand len2 a block right off the bat if it 4920 * is zero. 4921 */ 4922 ores -= (len1 + len2); 4923 ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); 4924 if (ores && !len2 && *indlen2) { 4925 len2++; 4926 ores--; 4927 } 4928 while (ores) { 4929 if (len1 < *indlen1) { 4930 len1++; 4931 ores--; 4932 } 4933 if (!ores) 4934 break; 4935 if (len2 < *indlen2) { 4936 len2++; 4937 ores--; 4938 } 4939 } 4940 4941 *indlen1 = len1; 4942 *indlen2 = len2; 4943 } 4944 4945 void 4946 xfs_bmap_del_extent_delay( 4947 struct xfs_inode *ip, 4948 int whichfork, 4949 struct xfs_iext_cursor *icur, 4950 struct xfs_bmbt_irec *got, 4951 struct xfs_bmbt_irec *del) 4952 { 4953 struct xfs_mount *mp = ip->i_mount; 4954 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 4955 struct xfs_bmbt_irec new; 4956 int64_t da_old, da_new, da_diff = 0; 4957 xfs_fileoff_t del_endoff, got_endoff; 4958 xfs_filblks_t got_indlen, new_indlen, stolen = 0; 4959 uint32_t state = xfs_bmap_fork_to_state(whichfork); 4960 uint64_t fdblocks; 4961 bool isrt; 4962 4963 XFS_STATS_INC(mp, xs_del_exlist); 4964 4965 isrt = xfs_ifork_is_realtime(ip, whichfork); 4966 del_endoff = del->br_startoff + del->br_blockcount; 4967 got_endoff = got->br_startoff + got->br_blockcount; 4968 da_old = startblockval(got->br_startblock); 4969 da_new = 0; 4970 4971 ASSERT(del->br_blockcount > 0); 4972 ASSERT(got->br_startoff <= del->br_startoff); 4973 ASSERT(got_endoff >= del_endoff); 4974 4975 /* 4976 * Update the inode delalloc counter now and wait to update the 4977 * sb counters as we might have to borrow some blocks for the 4978 * indirect block accounting. 4979 */ 4980 xfs_quota_unreserve_blkres(ip, del->br_blockcount); 4981 ip->i_delayed_blks -= del->br_blockcount; 4982 4983 if (got->br_startoff == del->br_startoff) 4984 state |= BMAP_LEFT_FILLING; 4985 if (got_endoff == del_endoff) 4986 state |= BMAP_RIGHT_FILLING; 4987 4988 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4989 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4990 /* 4991 * Matches the whole extent. Delete the entry. 4992 */ 4993 xfs_iext_remove(ip, icur, state); 4994 xfs_iext_prev(ifp, icur); 4995 break; 4996 case BMAP_LEFT_FILLING: 4997 /* 4998 * Deleting the first part of the extent. 4999 */ 5000 got->br_startoff = del_endoff; 5001 got->br_blockcount -= del->br_blockcount; 5002 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 5003 got->br_blockcount), da_old); 5004 got->br_startblock = nullstartblock((int)da_new); 5005 xfs_iext_update_extent(ip, state, icur, got); 5006 break; 5007 case BMAP_RIGHT_FILLING: 5008 /* 5009 * Deleting the last part of the extent. 5010 */ 5011 got->br_blockcount = got->br_blockcount - del->br_blockcount; 5012 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 5013 got->br_blockcount), da_old); 5014 got->br_startblock = nullstartblock((int)da_new); 5015 xfs_iext_update_extent(ip, state, icur, got); 5016 break; 5017 case 0: 5018 /* 5019 * Deleting the middle of the extent. 5020 * 5021 * Distribute the original indlen reservation across the two new 5022 * extents. Steal blocks from the deleted extent if necessary. 5023 * Stealing blocks simply fudges the fdblocks accounting below. 5024 * Warn if either of the new indlen reservations is zero as this 5025 * can lead to delalloc problems. 5026 */ 5027 got->br_blockcount = del->br_startoff - got->br_startoff; 5028 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); 5029 5030 new.br_blockcount = got_endoff - del_endoff; 5031 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); 5032 5033 WARN_ON_ONCE(!got_indlen || !new_indlen); 5034 /* 5035 * Steal as many blocks as we can to try and satisfy the worst 5036 * case indlen for both new extents. 5037 * 5038 * However, we can't just steal reservations from the data 5039 * blocks if this is an RT inodes as the data and metadata 5040 * blocks come from different pools. We'll have to live with 5041 * under-filled indirect reservation in this case. 5042 */ 5043 da_new = got_indlen + new_indlen; 5044 if (da_new > da_old && !isrt) { 5045 stolen = XFS_FILBLKS_MIN(da_new - da_old, 5046 del->br_blockcount); 5047 da_old += stolen; 5048 } 5049 if (da_new > da_old) 5050 xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen); 5051 da_new = got_indlen + new_indlen; 5052 5053 got->br_startblock = nullstartblock((int)got_indlen); 5054 5055 new.br_startoff = del_endoff; 5056 new.br_state = got->br_state; 5057 new.br_startblock = nullstartblock((int)new_indlen); 5058 5059 xfs_iext_update_extent(ip, state, icur, got); 5060 xfs_iext_next(ifp, icur); 5061 xfs_iext_insert(ip, icur, &new, state); 5062 5063 del->br_blockcount -= stolen; 5064 break; 5065 } 5066 5067 ASSERT(da_old >= da_new); 5068 da_diff = da_old - da_new; 5069 fdblocks = da_diff; 5070 5071 if (isrt) 5072 xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount)); 5073 else 5074 fdblocks += del->br_blockcount; 5075 5076 xfs_add_fdblocks(mp, fdblocks); 5077 xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff); 5078 } 5079 5080 void 5081 xfs_bmap_del_extent_cow( 5082 struct xfs_inode *ip, 5083 struct xfs_iext_cursor *icur, 5084 struct xfs_bmbt_irec *got, 5085 struct xfs_bmbt_irec *del) 5086 { 5087 struct xfs_mount *mp = ip->i_mount; 5088 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK); 5089 struct xfs_bmbt_irec new; 5090 xfs_fileoff_t del_endoff, got_endoff; 5091 uint32_t state = BMAP_COWFORK; 5092 5093 XFS_STATS_INC(mp, xs_del_exlist); 5094 5095 del_endoff = del->br_startoff + del->br_blockcount; 5096 got_endoff = got->br_startoff + got->br_blockcount; 5097 5098 ASSERT(del->br_blockcount > 0); 5099 ASSERT(got->br_startoff <= del->br_startoff); 5100 ASSERT(got_endoff >= del_endoff); 5101 ASSERT(!isnullstartblock(got->br_startblock)); 5102 5103 if (got->br_startoff == del->br_startoff) 5104 state |= BMAP_LEFT_FILLING; 5105 if (got_endoff == del_endoff) 5106 state |= BMAP_RIGHT_FILLING; 5107 5108 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 5109 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 5110 /* 5111 * Matches the whole extent. Delete the entry. 5112 */ 5113 xfs_iext_remove(ip, icur, state); 5114 xfs_iext_prev(ifp, icur); 5115 break; 5116 case BMAP_LEFT_FILLING: 5117 /* 5118 * Deleting the first part of the extent. 5119 */ 5120 got->br_startoff = del_endoff; 5121 got->br_blockcount -= del->br_blockcount; 5122 got->br_startblock = del->br_startblock + del->br_blockcount; 5123 xfs_iext_update_extent(ip, state, icur, got); 5124 break; 5125 case BMAP_RIGHT_FILLING: 5126 /* 5127 * Deleting the last part of the extent. 5128 */ 5129 got->br_blockcount -= del->br_blockcount; 5130 xfs_iext_update_extent(ip, state, icur, got); 5131 break; 5132 case 0: 5133 /* 5134 * Deleting the middle of the extent. 5135 */ 5136 got->br_blockcount = del->br_startoff - got->br_startoff; 5137 5138 new.br_startoff = del_endoff; 5139 new.br_blockcount = got_endoff - del_endoff; 5140 new.br_state = got->br_state; 5141 new.br_startblock = del->br_startblock + del->br_blockcount; 5142 5143 xfs_iext_update_extent(ip, state, icur, got); 5144 xfs_iext_next(ifp, icur); 5145 xfs_iext_insert(ip, icur, &new, state); 5146 break; 5147 } 5148 ip->i_delayed_blks -= del->br_blockcount; 5149 } 5150 5151 static int 5152 xfs_bmap_free_rtblocks( 5153 struct xfs_trans *tp, 5154 struct xfs_bmbt_irec *del) 5155 { 5156 struct xfs_rtgroup *rtg; 5157 int error; 5158 5159 rtg = xfs_rtgroup_grab(tp->t_mountp, 0); 5160 if (!rtg) 5161 return -EIO; 5162 5163 /* 5164 * Ensure the bitmap and summary inodes are locked and joined to the 5165 * transaction before modifying them. 5166 */ 5167 if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) { 5168 tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED; 5169 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP); 5170 xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP); 5171 } 5172 5173 error = xfs_rtfree_blocks(tp, rtg, del->br_startblock, 5174 del->br_blockcount); 5175 xfs_rtgroup_rele(rtg); 5176 return error; 5177 } 5178 5179 /* 5180 * Called by xfs_bmapi to update file extent records and the btree 5181 * after removing space. 5182 */ 5183 STATIC int /* error */ 5184 xfs_bmap_del_extent_real( 5185 xfs_inode_t *ip, /* incore inode pointer */ 5186 xfs_trans_t *tp, /* current transaction pointer */ 5187 struct xfs_iext_cursor *icur, 5188 struct xfs_btree_cur *cur, /* if null, not a btree */ 5189 xfs_bmbt_irec_t *del, /* data to remove from extents */ 5190 int *logflagsp, /* inode logging flags */ 5191 int whichfork, /* data or attr fork */ 5192 uint32_t bflags) /* bmapi flags */ 5193 { 5194 xfs_fsblock_t del_endblock=0; /* first block past del */ 5195 xfs_fileoff_t del_endoff; /* first offset past del */ 5196 int error = 0; /* error return value */ 5197 struct xfs_bmbt_irec got; /* current extent entry */ 5198 xfs_fileoff_t got_endoff; /* first offset past got */ 5199 int i; /* temp state */ 5200 struct xfs_ifork *ifp; /* inode fork pointer */ 5201 xfs_mount_t *mp; /* mount structure */ 5202 xfs_filblks_t nblks; /* quota/sb block count */ 5203 xfs_bmbt_irec_t new; /* new record to be inserted */ 5204 /* REFERENCED */ 5205 uint qfield; /* quota field to update */ 5206 uint32_t state = xfs_bmap_fork_to_state(whichfork); 5207 struct xfs_bmbt_irec old; 5208 5209 *logflagsp = 0; 5210 5211 mp = ip->i_mount; 5212 XFS_STATS_INC(mp, xs_del_exlist); 5213 5214 ifp = xfs_ifork_ptr(ip, whichfork); 5215 ASSERT(del->br_blockcount > 0); 5216 xfs_iext_get_extent(ifp, icur, &got); 5217 ASSERT(got.br_startoff <= del->br_startoff); 5218 del_endoff = del->br_startoff + del->br_blockcount; 5219 got_endoff = got.br_startoff + got.br_blockcount; 5220 ASSERT(got_endoff >= del_endoff); 5221 ASSERT(!isnullstartblock(got.br_startblock)); 5222 qfield = 0; 5223 5224 /* 5225 * If it's the case where the directory code is running with no block 5226 * reservation, and the deleted block is in the middle of its extent, 5227 * and the resulting insert of an extent would cause transformation to 5228 * btree format, then reject it. The calling code will then swap blocks 5229 * around instead. We have to do this now, rather than waiting for the 5230 * conversion to btree format, since the transaction will be dirty then. 5231 */ 5232 if (tp->t_blk_res == 0 && 5233 ifp->if_format == XFS_DINODE_FMT_EXTENTS && 5234 ifp->if_nextents >= XFS_IFORK_MAXEXT(ip, whichfork) && 5235 del->br_startoff > got.br_startoff && del_endoff < got_endoff) 5236 return -ENOSPC; 5237 5238 *logflagsp = XFS_ILOG_CORE; 5239 if (xfs_ifork_is_realtime(ip, whichfork)) 5240 qfield = XFS_TRANS_DQ_RTBCOUNT; 5241 else 5242 qfield = XFS_TRANS_DQ_BCOUNT; 5243 nblks = del->br_blockcount; 5244 5245 del_endblock = del->br_startblock + del->br_blockcount; 5246 if (cur) { 5247 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5248 if (error) 5249 return error; 5250 if (XFS_IS_CORRUPT(mp, i != 1)) { 5251 xfs_btree_mark_sick(cur); 5252 return -EFSCORRUPTED; 5253 } 5254 } 5255 5256 if (got.br_startoff == del->br_startoff) 5257 state |= BMAP_LEFT_FILLING; 5258 if (got_endoff == del_endoff) 5259 state |= BMAP_RIGHT_FILLING; 5260 5261 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 5262 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 5263 /* 5264 * Matches the whole extent. Delete the entry. 5265 */ 5266 xfs_iext_remove(ip, icur, state); 5267 xfs_iext_prev(ifp, icur); 5268 ifp->if_nextents--; 5269 5270 *logflagsp |= XFS_ILOG_CORE; 5271 if (!cur) { 5272 *logflagsp |= xfs_ilog_fext(whichfork); 5273 break; 5274 } 5275 if ((error = xfs_btree_delete(cur, &i))) 5276 return error; 5277 if (XFS_IS_CORRUPT(mp, i != 1)) { 5278 xfs_btree_mark_sick(cur); 5279 return -EFSCORRUPTED; 5280 } 5281 break; 5282 case BMAP_LEFT_FILLING: 5283 /* 5284 * Deleting the first part of the extent. 5285 */ 5286 got.br_startoff = del_endoff; 5287 got.br_startblock = del_endblock; 5288 got.br_blockcount -= del->br_blockcount; 5289 xfs_iext_update_extent(ip, state, icur, &got); 5290 if (!cur) { 5291 *logflagsp |= xfs_ilog_fext(whichfork); 5292 break; 5293 } 5294 error = xfs_bmbt_update(cur, &got); 5295 if (error) 5296 return error; 5297 break; 5298 case BMAP_RIGHT_FILLING: 5299 /* 5300 * Deleting the last part of the extent. 5301 */ 5302 got.br_blockcount -= del->br_blockcount; 5303 xfs_iext_update_extent(ip, state, icur, &got); 5304 if (!cur) { 5305 *logflagsp |= xfs_ilog_fext(whichfork); 5306 break; 5307 } 5308 error = xfs_bmbt_update(cur, &got); 5309 if (error) 5310 return error; 5311 break; 5312 case 0: 5313 /* 5314 * Deleting the middle of the extent. 5315 */ 5316 5317 old = got; 5318 5319 got.br_blockcount = del->br_startoff - got.br_startoff; 5320 xfs_iext_update_extent(ip, state, icur, &got); 5321 5322 new.br_startoff = del_endoff; 5323 new.br_blockcount = got_endoff - del_endoff; 5324 new.br_state = got.br_state; 5325 new.br_startblock = del_endblock; 5326 5327 *logflagsp |= XFS_ILOG_CORE; 5328 if (cur) { 5329 error = xfs_bmbt_update(cur, &got); 5330 if (error) 5331 return error; 5332 error = xfs_btree_increment(cur, 0, &i); 5333 if (error) 5334 return error; 5335 cur->bc_rec.b = new; 5336 error = xfs_btree_insert(cur, &i); 5337 if (error && error != -ENOSPC) 5338 return error; 5339 /* 5340 * If get no-space back from btree insert, it tried a 5341 * split, and we have a zero block reservation. Fix up 5342 * our state and return the error. 5343 */ 5344 if (error == -ENOSPC) { 5345 /* 5346 * Reset the cursor, don't trust it after any 5347 * insert operation. 5348 */ 5349 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5350 if (error) 5351 return error; 5352 if (XFS_IS_CORRUPT(mp, i != 1)) { 5353 xfs_btree_mark_sick(cur); 5354 return -EFSCORRUPTED; 5355 } 5356 /* 5357 * Update the btree record back 5358 * to the original value. 5359 */ 5360 error = xfs_bmbt_update(cur, &old); 5361 if (error) 5362 return error; 5363 /* 5364 * Reset the extent record back 5365 * to the original value. 5366 */ 5367 xfs_iext_update_extent(ip, state, icur, &old); 5368 *logflagsp = 0; 5369 return -ENOSPC; 5370 } 5371 if (XFS_IS_CORRUPT(mp, i != 1)) { 5372 xfs_btree_mark_sick(cur); 5373 return -EFSCORRUPTED; 5374 } 5375 } else 5376 *logflagsp |= xfs_ilog_fext(whichfork); 5377 5378 ifp->if_nextents++; 5379 xfs_iext_next(ifp, icur); 5380 xfs_iext_insert(ip, icur, &new, state); 5381 break; 5382 } 5383 5384 /* remove reverse mapping */ 5385 xfs_rmap_unmap_extent(tp, ip, whichfork, del); 5386 5387 /* 5388 * If we need to, add to list of extents to delete. 5389 */ 5390 if (!(bflags & XFS_BMAPI_REMAP)) { 5391 bool isrt = xfs_ifork_is_realtime(ip, whichfork); 5392 5393 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { 5394 xfs_refcount_decrease_extent(tp, del); 5395 } else if (isrt && !xfs_has_rtgroups(mp)) { 5396 error = xfs_bmap_free_rtblocks(tp, del); 5397 } else { 5398 unsigned int efi_flags = 0; 5399 5400 if ((bflags & XFS_BMAPI_NODISCARD) || 5401 del->br_state == XFS_EXT_UNWRITTEN) 5402 efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD; 5403 5404 /* 5405 * Historically, we did not use EFIs to free realtime 5406 * extents. However, when reverse mapping is enabled, 5407 * we must maintain the same order of operations as the 5408 * data device, which is: Remove the file mapping, 5409 * remove the reverse mapping, and then free the 5410 * blocks. Reflink for realtime volumes requires the 5411 * same sort of ordering. Both features rely on 5412 * rtgroups, so let's gate rt EFI usage on rtgroups. 5413 */ 5414 if (isrt) 5415 efi_flags |= XFS_FREE_EXTENT_REALTIME; 5416 5417 error = xfs_free_extent_later(tp, del->br_startblock, 5418 del->br_blockcount, NULL, 5419 XFS_AG_RESV_NONE, efi_flags); 5420 } 5421 if (error) 5422 return error; 5423 } 5424 5425 /* 5426 * Adjust inode # blocks in the file. 5427 */ 5428 if (nblks) 5429 ip->i_nblocks -= nblks; 5430 /* 5431 * Adjust quota data. 5432 */ 5433 if (qfield && !(bflags & XFS_BMAPI_REMAP)) 5434 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); 5435 5436 return 0; 5437 } 5438 5439 /* 5440 * Unmap (remove) blocks from a file. 5441 * If nexts is nonzero then the number of extents to remove is limited to 5442 * that value. If not all extents in the block range can be removed then 5443 * *done is set. 5444 */ 5445 static int 5446 __xfs_bunmapi( 5447 struct xfs_trans *tp, /* transaction pointer */ 5448 struct xfs_inode *ip, /* incore inode */ 5449 xfs_fileoff_t start, /* first file offset deleted */ 5450 xfs_filblks_t *rlen, /* i/o: amount remaining */ 5451 uint32_t flags, /* misc flags */ 5452 xfs_extnum_t nexts) /* number of extents max */ 5453 { 5454 struct xfs_btree_cur *cur; /* bmap btree cursor */ 5455 struct xfs_bmbt_irec del; /* extent being deleted */ 5456 int error; /* error return value */ 5457 xfs_extnum_t extno; /* extent number in list */ 5458 struct xfs_bmbt_irec got; /* current extent record */ 5459 struct xfs_ifork *ifp; /* inode fork pointer */ 5460 int isrt; /* freeing in rt area */ 5461 int logflags; /* transaction logging flags */ 5462 xfs_extlen_t mod; /* rt extent offset */ 5463 struct xfs_mount *mp = ip->i_mount; 5464 int tmp_logflags; /* partial logging flags */ 5465 int wasdel; /* was a delayed alloc extent */ 5466 int whichfork; /* data or attribute fork */ 5467 xfs_filblks_t len = *rlen; /* length to unmap in file */ 5468 xfs_fileoff_t end; 5469 struct xfs_iext_cursor icur; 5470 bool done = false; 5471 5472 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_); 5473 5474 whichfork = xfs_bmapi_whichfork(flags); 5475 ASSERT(whichfork != XFS_COW_FORK); 5476 ifp = xfs_ifork_ptr(ip, whichfork); 5477 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) { 5478 xfs_bmap_mark_sick(ip, whichfork); 5479 return -EFSCORRUPTED; 5480 } 5481 if (xfs_is_shutdown(mp)) 5482 return -EIO; 5483 5484 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 5485 ASSERT(len > 0); 5486 ASSERT(nexts >= 0); 5487 5488 error = xfs_iread_extents(tp, ip, whichfork); 5489 if (error) 5490 return error; 5491 5492 if (xfs_iext_count(ifp) == 0) { 5493 *rlen = 0; 5494 return 0; 5495 } 5496 XFS_STATS_INC(mp, xs_blk_unmap); 5497 isrt = xfs_ifork_is_realtime(ip, whichfork); 5498 end = start + len; 5499 5500 if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { 5501 *rlen = 0; 5502 return 0; 5503 } 5504 end--; 5505 5506 logflags = 0; 5507 if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 5508 ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); 5509 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5510 } else 5511 cur = NULL; 5512 5513 extno = 0; 5514 while (end != (xfs_fileoff_t)-1 && end >= start && 5515 (nexts == 0 || extno < nexts)) { 5516 /* 5517 * Is the found extent after a hole in which end lives? 5518 * Just back up to the previous extent, if so. 5519 */ 5520 if (got.br_startoff > end && 5521 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5522 done = true; 5523 break; 5524 } 5525 /* 5526 * Is the last block of this extent before the range 5527 * we're supposed to delete? If so, we're done. 5528 */ 5529 end = XFS_FILEOFF_MIN(end, 5530 got.br_startoff + got.br_blockcount - 1); 5531 if (end < start) 5532 break; 5533 /* 5534 * Then deal with the (possibly delayed) allocated space 5535 * we found. 5536 */ 5537 del = got; 5538 wasdel = isnullstartblock(del.br_startblock); 5539 5540 if (got.br_startoff < start) { 5541 del.br_startoff = start; 5542 del.br_blockcount -= start - got.br_startoff; 5543 if (!wasdel) 5544 del.br_startblock += start - got.br_startoff; 5545 } 5546 if (del.br_startoff + del.br_blockcount > end + 1) 5547 del.br_blockcount = end + 1 - del.br_startoff; 5548 5549 if (!isrt || (flags & XFS_BMAPI_REMAP)) 5550 goto delete; 5551 5552 mod = xfs_rtb_to_rtxoff(mp, 5553 del.br_startblock + del.br_blockcount); 5554 if (mod) { 5555 /* 5556 * Realtime extent not lined up at the end. 5557 * The extent could have been split into written 5558 * and unwritten pieces, or we could just be 5559 * unmapping part of it. But we can't really 5560 * get rid of part of a realtime extent. 5561 */ 5562 if (del.br_state == XFS_EXT_UNWRITTEN) { 5563 /* 5564 * This piece is unwritten, or we're not 5565 * using unwritten extents. Skip over it. 5566 */ 5567 ASSERT((flags & XFS_BMAPI_REMAP) || end >= mod); 5568 end -= mod > del.br_blockcount ? 5569 del.br_blockcount : mod; 5570 if (end < got.br_startoff && 5571 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5572 done = true; 5573 break; 5574 } 5575 continue; 5576 } 5577 /* 5578 * It's written, turn it unwritten. 5579 * This is better than zeroing it. 5580 */ 5581 ASSERT(del.br_state == XFS_EXT_NORM); 5582 ASSERT(tp->t_blk_res > 0); 5583 /* 5584 * If this spans a realtime extent boundary, 5585 * chop it back to the start of the one we end at. 5586 */ 5587 if (del.br_blockcount > mod) { 5588 del.br_startoff += del.br_blockcount - mod; 5589 del.br_startblock += del.br_blockcount - mod; 5590 del.br_blockcount = mod; 5591 } 5592 del.br_state = XFS_EXT_UNWRITTEN; 5593 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5594 whichfork, &icur, &cur, &del, 5595 &logflags); 5596 if (error) 5597 goto error0; 5598 goto nodelete; 5599 } 5600 5601 mod = xfs_rtb_to_rtxoff(mp, del.br_startblock); 5602 if (mod) { 5603 xfs_extlen_t off = mp->m_sb.sb_rextsize - mod; 5604 5605 /* 5606 * Realtime extent is lined up at the end but not 5607 * at the front. We'll get rid of full extents if 5608 * we can. 5609 */ 5610 if (del.br_blockcount > off) { 5611 del.br_blockcount -= off; 5612 del.br_startoff += off; 5613 del.br_startblock += off; 5614 } else if (del.br_startoff == start && 5615 (del.br_state == XFS_EXT_UNWRITTEN || 5616 tp->t_blk_res == 0)) { 5617 /* 5618 * Can't make it unwritten. There isn't 5619 * a full extent here so just skip it. 5620 */ 5621 ASSERT(end >= del.br_blockcount); 5622 end -= del.br_blockcount; 5623 if (got.br_startoff > end && 5624 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5625 done = true; 5626 break; 5627 } 5628 continue; 5629 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5630 struct xfs_bmbt_irec prev; 5631 xfs_fileoff_t unwrite_start; 5632 5633 /* 5634 * This one is already unwritten. 5635 * It must have a written left neighbor. 5636 * Unwrite the killed part of that one and 5637 * try again. 5638 */ 5639 if (!xfs_iext_prev_extent(ifp, &icur, &prev)) 5640 ASSERT(0); 5641 ASSERT(prev.br_state == XFS_EXT_NORM); 5642 ASSERT(!isnullstartblock(prev.br_startblock)); 5643 ASSERT(del.br_startblock == 5644 prev.br_startblock + prev.br_blockcount); 5645 unwrite_start = max3(start, 5646 del.br_startoff - mod, 5647 prev.br_startoff); 5648 mod = unwrite_start - prev.br_startoff; 5649 prev.br_startoff = unwrite_start; 5650 prev.br_startblock += mod; 5651 prev.br_blockcount -= mod; 5652 prev.br_state = XFS_EXT_UNWRITTEN; 5653 error = xfs_bmap_add_extent_unwritten_real(tp, 5654 ip, whichfork, &icur, &cur, 5655 &prev, &logflags); 5656 if (error) 5657 goto error0; 5658 goto nodelete; 5659 } else { 5660 ASSERT(del.br_state == XFS_EXT_NORM); 5661 del.br_state = XFS_EXT_UNWRITTEN; 5662 error = xfs_bmap_add_extent_unwritten_real(tp, 5663 ip, whichfork, &icur, &cur, 5664 &del, &logflags); 5665 if (error) 5666 goto error0; 5667 goto nodelete; 5668 } 5669 } 5670 5671 delete: 5672 if (wasdel) { 5673 xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del); 5674 } else { 5675 error = xfs_bmap_del_extent_real(ip, tp, &icur, cur, 5676 &del, &tmp_logflags, whichfork, 5677 flags); 5678 logflags |= tmp_logflags; 5679 if (error) 5680 goto error0; 5681 } 5682 5683 end = del.br_startoff - 1; 5684 nodelete: 5685 /* 5686 * If not done go on to the next (previous) record. 5687 */ 5688 if (end != (xfs_fileoff_t)-1 && end >= start) { 5689 if (!xfs_iext_get_extent(ifp, &icur, &got) || 5690 (got.br_startoff > end && 5691 !xfs_iext_prev_extent(ifp, &icur, &got))) { 5692 done = true; 5693 break; 5694 } 5695 extno++; 5696 } 5697 } 5698 if (done || end == (xfs_fileoff_t)-1 || end < start) 5699 *rlen = 0; 5700 else 5701 *rlen = end - start + 1; 5702 5703 /* 5704 * Convert to a btree if necessary. 5705 */ 5706 if (xfs_bmap_needs_btree(ip, whichfork)) { 5707 ASSERT(cur == NULL); 5708 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, 5709 &tmp_logflags, whichfork); 5710 logflags |= tmp_logflags; 5711 } else { 5712 error = xfs_bmap_btree_to_extents(tp, ip, cur, &logflags, 5713 whichfork); 5714 } 5715 5716 error0: 5717 /* 5718 * Log everything. Do this after conversion, there's no point in 5719 * logging the extent records if we've converted to btree format. 5720 */ 5721 if ((logflags & xfs_ilog_fext(whichfork)) && 5722 ifp->if_format != XFS_DINODE_FMT_EXTENTS) 5723 logflags &= ~xfs_ilog_fext(whichfork); 5724 else if ((logflags & xfs_ilog_fbroot(whichfork)) && 5725 ifp->if_format != XFS_DINODE_FMT_BTREE) 5726 logflags &= ~xfs_ilog_fbroot(whichfork); 5727 /* 5728 * Log inode even in the error case, if the transaction 5729 * is dirty we'll need to shut down the filesystem. 5730 */ 5731 if (logflags) 5732 xfs_trans_log_inode(tp, ip, logflags); 5733 if (cur) { 5734 if (!error) 5735 cur->bc_bmap.allocated = 0; 5736 xfs_btree_del_cursor(cur, error); 5737 } 5738 return error; 5739 } 5740 5741 /* Unmap a range of a file. */ 5742 int 5743 xfs_bunmapi( 5744 xfs_trans_t *tp, 5745 struct xfs_inode *ip, 5746 xfs_fileoff_t bno, 5747 xfs_filblks_t len, 5748 uint32_t flags, 5749 xfs_extnum_t nexts, 5750 int *done) 5751 { 5752 int error; 5753 5754 error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts); 5755 *done = (len == 0); 5756 return error; 5757 } 5758 5759 /* 5760 * Determine whether an extent shift can be accomplished by a merge with the 5761 * extent that precedes the target hole of the shift. 5762 */ 5763 STATIC bool 5764 xfs_bmse_can_merge( 5765 struct xfs_inode *ip, 5766 int whichfork, 5767 struct xfs_bmbt_irec *left, /* preceding extent */ 5768 struct xfs_bmbt_irec *got, /* current extent to shift */ 5769 xfs_fileoff_t shift) /* shift fsb */ 5770 { 5771 xfs_fileoff_t startoff; 5772 5773 startoff = got->br_startoff - shift; 5774 5775 /* 5776 * The extent, once shifted, must be adjacent in-file and on-disk with 5777 * the preceding extent. 5778 */ 5779 if ((left->br_startoff + left->br_blockcount != startoff) || 5780 (left->br_startblock + left->br_blockcount != got->br_startblock) || 5781 (left->br_state != got->br_state) || 5782 (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) || 5783 !xfs_bmap_same_rtgroup(ip, whichfork, left, got)) 5784 return false; 5785 5786 return true; 5787 } 5788 5789 /* 5790 * A bmap extent shift adjusts the file offset of an extent to fill a preceding 5791 * hole in the file. If an extent shift would result in the extent being fully 5792 * adjacent to the extent that currently precedes the hole, we can merge with 5793 * the preceding extent rather than do the shift. 5794 * 5795 * This function assumes the caller has verified a shift-by-merge is possible 5796 * with the provided extents via xfs_bmse_can_merge(). 5797 */ 5798 STATIC int 5799 xfs_bmse_merge( 5800 struct xfs_trans *tp, 5801 struct xfs_inode *ip, 5802 int whichfork, 5803 xfs_fileoff_t shift, /* shift fsb */ 5804 struct xfs_iext_cursor *icur, 5805 struct xfs_bmbt_irec *got, /* extent to shift */ 5806 struct xfs_bmbt_irec *left, /* preceding extent */ 5807 struct xfs_btree_cur *cur, 5808 int *logflags) /* output */ 5809 { 5810 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 5811 struct xfs_bmbt_irec new; 5812 xfs_filblks_t blockcount; 5813 int error, i; 5814 struct xfs_mount *mp = ip->i_mount; 5815 5816 blockcount = left->br_blockcount + got->br_blockcount; 5817 5818 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 5819 ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift)); 5820 5821 new = *left; 5822 new.br_blockcount = blockcount; 5823 5824 /* 5825 * Update the on-disk extent count, the btree if necessary and log the 5826 * inode. 5827 */ 5828 ifp->if_nextents--; 5829 *logflags |= XFS_ILOG_CORE; 5830 if (!cur) { 5831 *logflags |= XFS_ILOG_DEXT; 5832 goto done; 5833 } 5834 5835 /* lookup and remove the extent to merge */ 5836 error = xfs_bmbt_lookup_eq(cur, got, &i); 5837 if (error) 5838 return error; 5839 if (XFS_IS_CORRUPT(mp, i != 1)) { 5840 xfs_btree_mark_sick(cur); 5841 return -EFSCORRUPTED; 5842 } 5843 5844 error = xfs_btree_delete(cur, &i); 5845 if (error) 5846 return error; 5847 if (XFS_IS_CORRUPT(mp, i != 1)) { 5848 xfs_btree_mark_sick(cur); 5849 return -EFSCORRUPTED; 5850 } 5851 5852 /* lookup and update size of the previous extent */ 5853 error = xfs_bmbt_lookup_eq(cur, left, &i); 5854 if (error) 5855 return error; 5856 if (XFS_IS_CORRUPT(mp, i != 1)) { 5857 xfs_btree_mark_sick(cur); 5858 return -EFSCORRUPTED; 5859 } 5860 5861 error = xfs_bmbt_update(cur, &new); 5862 if (error) 5863 return error; 5864 5865 /* change to extent format if required after extent removal */ 5866 error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork); 5867 if (error) 5868 return error; 5869 5870 done: 5871 xfs_iext_remove(ip, icur, 0); 5872 xfs_iext_prev(ifp, icur); 5873 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5874 &new); 5875 5876 /* update reverse mapping. rmap functions merge the rmaps for us */ 5877 xfs_rmap_unmap_extent(tp, ip, whichfork, got); 5878 memcpy(&new, got, sizeof(new)); 5879 new.br_startoff = left->br_startoff + left->br_blockcount; 5880 xfs_rmap_map_extent(tp, ip, whichfork, &new); 5881 return 0; 5882 } 5883 5884 static int 5885 xfs_bmap_shift_update_extent( 5886 struct xfs_trans *tp, 5887 struct xfs_inode *ip, 5888 int whichfork, 5889 struct xfs_iext_cursor *icur, 5890 struct xfs_bmbt_irec *got, 5891 struct xfs_btree_cur *cur, 5892 int *logflags, 5893 xfs_fileoff_t startoff) 5894 { 5895 struct xfs_mount *mp = ip->i_mount; 5896 struct xfs_bmbt_irec prev = *got; 5897 int error, i; 5898 5899 *logflags |= XFS_ILOG_CORE; 5900 5901 got->br_startoff = startoff; 5902 5903 if (cur) { 5904 error = xfs_bmbt_lookup_eq(cur, &prev, &i); 5905 if (error) 5906 return error; 5907 if (XFS_IS_CORRUPT(mp, i != 1)) { 5908 xfs_btree_mark_sick(cur); 5909 return -EFSCORRUPTED; 5910 } 5911 5912 error = xfs_bmbt_update(cur, got); 5913 if (error) 5914 return error; 5915 } else { 5916 *logflags |= XFS_ILOG_DEXT; 5917 } 5918 5919 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5920 got); 5921 5922 /* update reverse mapping */ 5923 xfs_rmap_unmap_extent(tp, ip, whichfork, &prev); 5924 xfs_rmap_map_extent(tp, ip, whichfork, got); 5925 return 0; 5926 } 5927 5928 int 5929 xfs_bmap_collapse_extents( 5930 struct xfs_trans *tp, 5931 struct xfs_inode *ip, 5932 xfs_fileoff_t *next_fsb, 5933 xfs_fileoff_t offset_shift_fsb, 5934 bool *done) 5935 { 5936 int whichfork = XFS_DATA_FORK; 5937 struct xfs_mount *mp = ip->i_mount; 5938 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 5939 struct xfs_btree_cur *cur = NULL; 5940 struct xfs_bmbt_irec got, prev; 5941 struct xfs_iext_cursor icur; 5942 xfs_fileoff_t new_startoff; 5943 int error = 0; 5944 int logflags = 0; 5945 5946 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 5947 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 5948 xfs_bmap_mark_sick(ip, whichfork); 5949 return -EFSCORRUPTED; 5950 } 5951 5952 if (xfs_is_shutdown(mp)) 5953 return -EIO; 5954 5955 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 5956 5957 error = xfs_iread_extents(tp, ip, whichfork); 5958 if (error) 5959 return error; 5960 5961 if (ifp->if_format == XFS_DINODE_FMT_BTREE) 5962 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5963 5964 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 5965 *done = true; 5966 goto del_cursor; 5967 } 5968 if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { 5969 xfs_bmap_mark_sick(ip, whichfork); 5970 error = -EFSCORRUPTED; 5971 goto del_cursor; 5972 } 5973 5974 new_startoff = got.br_startoff - offset_shift_fsb; 5975 if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { 5976 if (new_startoff < prev.br_startoff + prev.br_blockcount) { 5977 error = -EINVAL; 5978 goto del_cursor; 5979 } 5980 5981 if (xfs_bmse_can_merge(ip, whichfork, &prev, &got, 5982 offset_shift_fsb)) { 5983 error = xfs_bmse_merge(tp, ip, whichfork, 5984 offset_shift_fsb, &icur, &got, &prev, 5985 cur, &logflags); 5986 if (error) 5987 goto del_cursor; 5988 goto done; 5989 } 5990 } else { 5991 if (got.br_startoff < offset_shift_fsb) { 5992 error = -EINVAL; 5993 goto del_cursor; 5994 } 5995 } 5996 5997 error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got, 5998 cur, &logflags, new_startoff); 5999 if (error) 6000 goto del_cursor; 6001 6002 done: 6003 if (!xfs_iext_next_extent(ifp, &icur, &got)) { 6004 *done = true; 6005 goto del_cursor; 6006 } 6007 6008 *next_fsb = got.br_startoff; 6009 del_cursor: 6010 if (cur) 6011 xfs_btree_del_cursor(cur, error); 6012 if (logflags) 6013 xfs_trans_log_inode(tp, ip, logflags); 6014 return error; 6015 } 6016 6017 /* Make sure we won't be right-shifting an extent past the maximum bound. */ 6018 int 6019 xfs_bmap_can_insert_extents( 6020 struct xfs_inode *ip, 6021 xfs_fileoff_t off, 6022 xfs_fileoff_t shift) 6023 { 6024 struct xfs_bmbt_irec got; 6025 int is_empty; 6026 int error = 0; 6027 6028 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); 6029 6030 if (xfs_is_shutdown(ip->i_mount)) 6031 return -EIO; 6032 6033 xfs_ilock(ip, XFS_ILOCK_EXCL); 6034 error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty); 6035 if (!error && !is_empty && got.br_startoff >= off && 6036 ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff) 6037 error = -EINVAL; 6038 xfs_iunlock(ip, XFS_ILOCK_EXCL); 6039 6040 return error; 6041 } 6042 6043 int 6044 xfs_bmap_insert_extents( 6045 struct xfs_trans *tp, 6046 struct xfs_inode *ip, 6047 xfs_fileoff_t *next_fsb, 6048 xfs_fileoff_t offset_shift_fsb, 6049 bool *done, 6050 xfs_fileoff_t stop_fsb) 6051 { 6052 int whichfork = XFS_DATA_FORK; 6053 struct xfs_mount *mp = ip->i_mount; 6054 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 6055 struct xfs_btree_cur *cur = NULL; 6056 struct xfs_bmbt_irec got, next; 6057 struct xfs_iext_cursor icur; 6058 xfs_fileoff_t new_startoff; 6059 int error = 0; 6060 int logflags = 0; 6061 6062 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 6063 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 6064 xfs_bmap_mark_sick(ip, whichfork); 6065 return -EFSCORRUPTED; 6066 } 6067 6068 if (xfs_is_shutdown(mp)) 6069 return -EIO; 6070 6071 xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 6072 6073 error = xfs_iread_extents(tp, ip, whichfork); 6074 if (error) 6075 return error; 6076 6077 if (ifp->if_format == XFS_DINODE_FMT_BTREE) 6078 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 6079 6080 if (*next_fsb == NULLFSBLOCK) { 6081 xfs_iext_last(ifp, &icur); 6082 if (!xfs_iext_get_extent(ifp, &icur, &got) || 6083 stop_fsb > got.br_startoff) { 6084 *done = true; 6085 goto del_cursor; 6086 } 6087 } else { 6088 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 6089 *done = true; 6090 goto del_cursor; 6091 } 6092 } 6093 if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { 6094 xfs_bmap_mark_sick(ip, whichfork); 6095 error = -EFSCORRUPTED; 6096 goto del_cursor; 6097 } 6098 6099 if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) { 6100 xfs_bmap_mark_sick(ip, whichfork); 6101 error = -EFSCORRUPTED; 6102 goto del_cursor; 6103 } 6104 6105 new_startoff = got.br_startoff + offset_shift_fsb; 6106 if (xfs_iext_peek_next_extent(ifp, &icur, &next)) { 6107 if (new_startoff + got.br_blockcount > next.br_startoff) { 6108 error = -EINVAL; 6109 goto del_cursor; 6110 } 6111 6112 /* 6113 * Unlike a left shift (which involves a hole punch), a right 6114 * shift does not modify extent neighbors in any way. We should 6115 * never find mergeable extents in this scenario. Check anyways 6116 * and warn if we encounter two extents that could be one. 6117 */ 6118 if (xfs_bmse_can_merge(ip, whichfork, &got, &next, 6119 offset_shift_fsb)) 6120 WARN_ON_ONCE(1); 6121 } 6122 6123 error = xfs_bmap_shift_update_extent(tp, ip, whichfork, &icur, &got, 6124 cur, &logflags, new_startoff); 6125 if (error) 6126 goto del_cursor; 6127 6128 if (!xfs_iext_prev_extent(ifp, &icur, &got) || 6129 stop_fsb >= got.br_startoff + got.br_blockcount) { 6130 *done = true; 6131 goto del_cursor; 6132 } 6133 6134 *next_fsb = got.br_startoff; 6135 del_cursor: 6136 if (cur) 6137 xfs_btree_del_cursor(cur, error); 6138 if (logflags) 6139 xfs_trans_log_inode(tp, ip, logflags); 6140 return error; 6141 } 6142 6143 /* 6144 * Splits an extent into two extents at split_fsb block such that it is the 6145 * first block of the current_ext. @ext is a target extent to be split. 6146 * @split_fsb is a block where the extents is split. If split_fsb lies in a 6147 * hole or the first block of extents, just return 0. 6148 */ 6149 int 6150 xfs_bmap_split_extent( 6151 struct xfs_trans *tp, 6152 struct xfs_inode *ip, 6153 xfs_fileoff_t split_fsb) 6154 { 6155 int whichfork = XFS_DATA_FORK; 6156 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 6157 struct xfs_btree_cur *cur = NULL; 6158 struct xfs_bmbt_irec got; 6159 struct xfs_bmbt_irec new; /* split extent */ 6160 struct xfs_mount *mp = ip->i_mount; 6161 xfs_fsblock_t gotblkcnt; /* new block count for got */ 6162 struct xfs_iext_cursor icur; 6163 int error = 0; 6164 int logflags = 0; 6165 int i = 0; 6166 6167 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || 6168 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 6169 xfs_bmap_mark_sick(ip, whichfork); 6170 return -EFSCORRUPTED; 6171 } 6172 6173 if (xfs_is_shutdown(mp)) 6174 return -EIO; 6175 6176 /* Read in all the extents */ 6177 error = xfs_iread_extents(tp, ip, whichfork); 6178 if (error) 6179 return error; 6180 6181 /* 6182 * If there are not extents, or split_fsb lies in a hole we are done. 6183 */ 6184 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) || 6185 got.br_startoff >= split_fsb) 6186 return 0; 6187 6188 gotblkcnt = split_fsb - got.br_startoff; 6189 new.br_startoff = split_fsb; 6190 new.br_startblock = got.br_startblock + gotblkcnt; 6191 new.br_blockcount = got.br_blockcount - gotblkcnt; 6192 new.br_state = got.br_state; 6193 6194 if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 6195 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 6196 error = xfs_bmbt_lookup_eq(cur, &got, &i); 6197 if (error) 6198 goto del_cursor; 6199 if (XFS_IS_CORRUPT(mp, i != 1)) { 6200 xfs_btree_mark_sick(cur); 6201 error = -EFSCORRUPTED; 6202 goto del_cursor; 6203 } 6204 } 6205 6206 got.br_blockcount = gotblkcnt; 6207 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur, 6208 &got); 6209 6210 logflags = XFS_ILOG_CORE; 6211 if (cur) { 6212 error = xfs_bmbt_update(cur, &got); 6213 if (error) 6214 goto del_cursor; 6215 } else 6216 logflags |= XFS_ILOG_DEXT; 6217 6218 /* Add new extent */ 6219 xfs_iext_next(ifp, &icur); 6220 xfs_iext_insert(ip, &icur, &new, 0); 6221 ifp->if_nextents++; 6222 6223 if (cur) { 6224 error = xfs_bmbt_lookup_eq(cur, &new, &i); 6225 if (error) 6226 goto del_cursor; 6227 if (XFS_IS_CORRUPT(mp, i != 0)) { 6228 xfs_btree_mark_sick(cur); 6229 error = -EFSCORRUPTED; 6230 goto del_cursor; 6231 } 6232 error = xfs_btree_insert(cur, &i); 6233 if (error) 6234 goto del_cursor; 6235 if (XFS_IS_CORRUPT(mp, i != 1)) { 6236 xfs_btree_mark_sick(cur); 6237 error = -EFSCORRUPTED; 6238 goto del_cursor; 6239 } 6240 } 6241 6242 /* 6243 * Convert to a btree if necessary. 6244 */ 6245 if (xfs_bmap_needs_btree(ip, whichfork)) { 6246 int tmp_logflags; /* partial log flag return val */ 6247 6248 ASSERT(cur == NULL); 6249 error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, 6250 &tmp_logflags, whichfork); 6251 logflags |= tmp_logflags; 6252 } 6253 6254 del_cursor: 6255 if (cur) { 6256 cur->bc_bmap.allocated = 0; 6257 xfs_btree_del_cursor(cur, error); 6258 } 6259 6260 if (logflags) 6261 xfs_trans_log_inode(tp, ip, logflags); 6262 return error; 6263 } 6264 6265 /* Record a bmap intent. */ 6266 static inline void 6267 __xfs_bmap_add( 6268 struct xfs_trans *tp, 6269 enum xfs_bmap_intent_type type, 6270 struct xfs_inode *ip, 6271 int whichfork, 6272 struct xfs_bmbt_irec *bmap) 6273 { 6274 struct xfs_bmap_intent *bi; 6275 6276 if ((whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) || 6277 bmap->br_startblock == HOLESTARTBLOCK || 6278 bmap->br_startblock == DELAYSTARTBLOCK) 6279 return; 6280 6281 bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL); 6282 INIT_LIST_HEAD(&bi->bi_list); 6283 bi->bi_type = type; 6284 bi->bi_owner = ip; 6285 bi->bi_whichfork = whichfork; 6286 bi->bi_bmap = *bmap; 6287 6288 xfs_bmap_defer_add(tp, bi); 6289 } 6290 6291 /* Map an extent into a file. */ 6292 void 6293 xfs_bmap_map_extent( 6294 struct xfs_trans *tp, 6295 struct xfs_inode *ip, 6296 int whichfork, 6297 struct xfs_bmbt_irec *PREV) 6298 { 6299 __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, whichfork, PREV); 6300 } 6301 6302 /* Unmap an extent out of a file. */ 6303 void 6304 xfs_bmap_unmap_extent( 6305 struct xfs_trans *tp, 6306 struct xfs_inode *ip, 6307 int whichfork, 6308 struct xfs_bmbt_irec *PREV) 6309 { 6310 __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, whichfork, PREV); 6311 } 6312 6313 /* 6314 * Process one of the deferred bmap operations. We pass back the 6315 * btree cursor to maintain our lock on the bmapbt between calls. 6316 */ 6317 int 6318 xfs_bmap_finish_one( 6319 struct xfs_trans *tp, 6320 struct xfs_bmap_intent *bi) 6321 { 6322 struct xfs_bmbt_irec *bmap = &bi->bi_bmap; 6323 int error = 0; 6324 int flags = 0; 6325 6326 if (bi->bi_whichfork == XFS_ATTR_FORK) 6327 flags |= XFS_BMAPI_ATTRFORK; 6328 6329 ASSERT(tp->t_highest_agno == NULLAGNUMBER); 6330 6331 trace_xfs_bmap_deferred(bi); 6332 6333 if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE)) 6334 return -EIO; 6335 6336 switch (bi->bi_type) { 6337 case XFS_BMAP_MAP: 6338 if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN) 6339 flags |= XFS_BMAPI_PREALLOC; 6340 error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff, 6341 bmap->br_blockcount, bmap->br_startblock, 6342 flags); 6343 bmap->br_blockcount = 0; 6344 break; 6345 case XFS_BMAP_UNMAP: 6346 error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff, 6347 &bmap->br_blockcount, flags | XFS_BMAPI_REMAP, 6348 1); 6349 break; 6350 default: 6351 ASSERT(0); 6352 xfs_bmap_mark_sick(bi->bi_owner, bi->bi_whichfork); 6353 error = -EFSCORRUPTED; 6354 } 6355 6356 return error; 6357 } 6358 6359 /* Check that an extent does not have invalid flags or bad ranges. */ 6360 xfs_failaddr_t 6361 xfs_bmap_validate_extent_raw( 6362 struct xfs_mount *mp, 6363 bool rtfile, 6364 int whichfork, 6365 struct xfs_bmbt_irec *irec) 6366 { 6367 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 6368 return __this_address; 6369 6370 if (rtfile && whichfork == XFS_DATA_FORK) { 6371 if (!xfs_verify_rtbext(mp, irec->br_startblock, 6372 irec->br_blockcount)) 6373 return __this_address; 6374 } else { 6375 if (!xfs_verify_fsbext(mp, irec->br_startblock, 6376 irec->br_blockcount)) 6377 return __this_address; 6378 } 6379 if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK) 6380 return __this_address; 6381 return NULL; 6382 } 6383 6384 int __init 6385 xfs_bmap_intent_init_cache(void) 6386 { 6387 xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent", 6388 sizeof(struct xfs_bmap_intent), 6389 0, 0, NULL); 6390 6391 return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM; 6392 } 6393 6394 void 6395 xfs_bmap_intent_destroy_cache(void) 6396 { 6397 kmem_cache_destroy(xfs_bmap_intent_cache); 6398 xfs_bmap_intent_cache = NULL; 6399 } 6400 6401 /* Check that an inode's extent does not have invalid flags or bad ranges. */ 6402 xfs_failaddr_t 6403 xfs_bmap_validate_extent( 6404 struct xfs_inode *ip, 6405 int whichfork, 6406 struct xfs_bmbt_irec *irec) 6407 { 6408 return xfs_bmap_validate_extent_raw(ip->i_mount, 6409 XFS_IS_REALTIME_INODE(ip), whichfork, irec); 6410 } 6411 6412 /* 6413 * Used in xfs_itruncate_extents(). This is the maximum number of extents 6414 * freed from a file in a single transaction. 6415 */ 6416 #define XFS_ITRUNC_MAX_EXTENTS 2 6417 6418 /* 6419 * Unmap every extent in part of an inode's fork. We don't do any higher level 6420 * invalidation work at all. 6421 */ 6422 int 6423 xfs_bunmapi_range( 6424 struct xfs_trans **tpp, 6425 struct xfs_inode *ip, 6426 uint32_t flags, 6427 xfs_fileoff_t startoff, 6428 xfs_fileoff_t endoff) 6429 { 6430 xfs_filblks_t unmap_len = endoff - startoff + 1; 6431 int error = 0; 6432 6433 xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); 6434 6435 while (unmap_len > 0) { 6436 ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER); 6437 error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags, 6438 XFS_ITRUNC_MAX_EXTENTS); 6439 if (error) 6440 goto out; 6441 6442 /* free the just unmapped extents */ 6443 error = xfs_defer_finish(tpp); 6444 if (error) 6445 goto out; 6446 cond_resched(); 6447 } 6448 out: 6449 return error; 6450 } 6451 6452 struct xfs_bmap_query_range { 6453 xfs_bmap_query_range_fn fn; 6454 void *priv; 6455 }; 6456 6457 /* Format btree record and pass to our callback. */ 6458 STATIC int 6459 xfs_bmap_query_range_helper( 6460 struct xfs_btree_cur *cur, 6461 const union xfs_btree_rec *rec, 6462 void *priv) 6463 { 6464 struct xfs_bmap_query_range *query = priv; 6465 struct xfs_bmbt_irec irec; 6466 xfs_failaddr_t fa; 6467 6468 xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 6469 fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork, 6470 &irec); 6471 if (fa) { 6472 xfs_btree_mark_sick(cur); 6473 return xfs_bmap_complain_bad_rec(cur->bc_ino.ip, 6474 cur->bc_ino.whichfork, fa, &irec); 6475 } 6476 6477 return query->fn(cur, &irec, query->priv); 6478 } 6479 6480 /* Find all bmaps. */ 6481 int 6482 xfs_bmap_query_all( 6483 struct xfs_btree_cur *cur, 6484 xfs_bmap_query_range_fn fn, 6485 void *priv) 6486 { 6487 struct xfs_bmap_query_range query = { 6488 .priv = priv, 6489 .fn = fn, 6490 }; 6491 6492 return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query); 6493 } 6494 6495 /* Helper function to extract extent size hint from inode */ 6496 xfs_extlen_t 6497 xfs_get_extsz_hint( 6498 struct xfs_inode *ip) 6499 { 6500 /* 6501 * No point in aligning allocations if we need to COW to actually 6502 * write to them. 6503 */ 6504 if (xfs_is_always_cow_inode(ip)) 6505 return 0; 6506 if ((ip->i_diflags & XFS_DIFLAG_EXTSIZE) && ip->i_extsize) 6507 return ip->i_extsize; 6508 if (XFS_IS_REALTIME_INODE(ip) && 6509 ip->i_mount->m_sb.sb_rextsize > 1) 6510 return ip->i_mount->m_sb.sb_rextsize; 6511 return 0; 6512 } 6513 6514 /* 6515 * Helper function to extract CoW extent size hint from inode. 6516 * Between the extent size hint and the CoW extent size hint, we 6517 * return the greater of the two. If the value is zero (automatic), 6518 * use the default size. 6519 */ 6520 xfs_extlen_t 6521 xfs_get_cowextsz_hint( 6522 struct xfs_inode *ip) 6523 { 6524 xfs_extlen_t a, b; 6525 6526 a = 0; 6527 if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) 6528 a = ip->i_cowextsize; 6529 b = xfs_get_extsz_hint(ip); 6530 6531 a = max(a, b); 6532 if (a == 0) 6533 return XFS_DEFAULT_COWEXTSZ_HINT; 6534 return a; 6535 } 6536