1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 27 struct kmem_cache *xfs_refcount_intent_cache; 28 29 /* Allowable refcount adjustment amounts. */ 30 enum xfs_refc_adjust_op { 31 XFS_REFCOUNT_ADJUST_INCREASE = 1, 32 XFS_REFCOUNT_ADJUST_DECREASE = -1, 33 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 34 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 35 }; 36 37 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 38 xfs_agblock_t agbno, xfs_extlen_t aglen); 39 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 40 xfs_agblock_t agbno, xfs_extlen_t aglen); 41 42 /* 43 * Look up the first record less than or equal to [bno, len] in the btree 44 * given by cur. 45 */ 46 int 47 xfs_refcount_lookup_le( 48 struct xfs_btree_cur *cur, 49 xfs_agblock_t bno, 50 int *stat) 51 { 52 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 53 XFS_LOOKUP_LE); 54 cur->bc_rec.rc.rc_startblock = bno; 55 cur->bc_rec.rc.rc_blockcount = 0; 56 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 57 } 58 59 /* 60 * Look up the first record greater than or equal to [bno, len] in the btree 61 * given by cur. 62 */ 63 int 64 xfs_refcount_lookup_ge( 65 struct xfs_btree_cur *cur, 66 xfs_agblock_t bno, 67 int *stat) 68 { 69 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 70 XFS_LOOKUP_GE); 71 cur->bc_rec.rc.rc_startblock = bno; 72 cur->bc_rec.rc.rc_blockcount = 0; 73 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 74 } 75 76 /* 77 * Look up the first record equal to [bno, len] in the btree 78 * given by cur. 79 */ 80 int 81 xfs_refcount_lookup_eq( 82 struct xfs_btree_cur *cur, 83 xfs_agblock_t bno, 84 int *stat) 85 { 86 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 87 XFS_LOOKUP_LE); 88 cur->bc_rec.rc.rc_startblock = bno; 89 cur->bc_rec.rc.rc_blockcount = 0; 90 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 91 } 92 93 /* Convert on-disk record to in-core format. */ 94 void 95 xfs_refcount_btrec_to_irec( 96 const union xfs_btree_rec *rec, 97 struct xfs_refcount_irec *irec) 98 { 99 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 100 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 101 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 102 } 103 104 /* 105 * Get the data from the pointed-to record. 106 */ 107 int 108 xfs_refcount_get_rec( 109 struct xfs_btree_cur *cur, 110 struct xfs_refcount_irec *irec, 111 int *stat) 112 { 113 struct xfs_mount *mp = cur->bc_mp; 114 struct xfs_perag *pag = cur->bc_ag.pag; 115 union xfs_btree_rec *rec; 116 int error; 117 xfs_agblock_t realstart; 118 119 error = xfs_btree_get_rec(cur, &rec, stat); 120 if (error || !*stat) 121 return error; 122 123 xfs_refcount_btrec_to_irec(rec, irec); 124 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 125 goto out_bad_rec; 126 127 /* handle special COW-staging state */ 128 realstart = irec->rc_startblock; 129 if (realstart & XFS_REFC_COW_START) { 130 if (irec->rc_refcount != 1) 131 goto out_bad_rec; 132 realstart &= ~XFS_REFC_COW_START; 133 } else if (irec->rc_refcount < 2) { 134 goto out_bad_rec; 135 } 136 137 /* check for valid extent range, including overflow */ 138 if (!xfs_verify_agbno(pag, realstart)) 139 goto out_bad_rec; 140 if (realstart > realstart + irec->rc_blockcount) 141 goto out_bad_rec; 142 if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1)) 143 goto out_bad_rec; 144 145 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 146 goto out_bad_rec; 147 148 trace_xfs_refcount_get(cur->bc_mp, pag->pag_agno, irec); 149 return 0; 150 151 out_bad_rec: 152 xfs_warn(mp, 153 "Refcount BTree record corruption in AG %d detected!", 154 pag->pag_agno); 155 xfs_warn(mp, 156 "Start block 0x%x, block count 0x%x, references 0x%x", 157 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 158 return -EFSCORRUPTED; 159 } 160 161 /* 162 * Update the record referred to by cur to the value given 163 * by [bno, len, refcount]. 164 * This either works (return 0) or gets an EFSCORRUPTED error. 165 */ 166 STATIC int 167 xfs_refcount_update( 168 struct xfs_btree_cur *cur, 169 struct xfs_refcount_irec *irec) 170 { 171 union xfs_btree_rec rec; 172 int error; 173 174 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 175 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 176 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 177 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 178 error = xfs_btree_update(cur, &rec); 179 if (error) 180 trace_xfs_refcount_update_error(cur->bc_mp, 181 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 182 return error; 183 } 184 185 /* 186 * Insert the record referred to by cur to the value given 187 * by [bno, len, refcount]. 188 * This either works (return 0) or gets an EFSCORRUPTED error. 189 */ 190 int 191 xfs_refcount_insert( 192 struct xfs_btree_cur *cur, 193 struct xfs_refcount_irec *irec, 194 int *i) 195 { 196 int error; 197 198 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 199 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 200 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 201 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 202 error = xfs_btree_insert(cur, i); 203 if (error) 204 goto out_error; 205 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 206 error = -EFSCORRUPTED; 207 goto out_error; 208 } 209 210 out_error: 211 if (error) 212 trace_xfs_refcount_insert_error(cur->bc_mp, 213 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 214 return error; 215 } 216 217 /* 218 * Remove the record referred to by cur, then set the pointer to the spot 219 * where the record could be re-inserted, in case we want to increment or 220 * decrement the cursor. 221 * This either works (return 0) or gets an EFSCORRUPTED error. 222 */ 223 STATIC int 224 xfs_refcount_delete( 225 struct xfs_btree_cur *cur, 226 int *i) 227 { 228 struct xfs_refcount_irec irec; 229 int found_rec; 230 int error; 231 232 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 233 if (error) 234 goto out_error; 235 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 236 error = -EFSCORRUPTED; 237 goto out_error; 238 } 239 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); 240 error = xfs_btree_delete(cur, i); 241 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 242 error = -EFSCORRUPTED; 243 goto out_error; 244 } 245 if (error) 246 goto out_error; 247 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 248 out_error: 249 if (error) 250 trace_xfs_refcount_delete_error(cur->bc_mp, 251 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 252 return error; 253 } 254 255 /* 256 * Adjusting the Reference Count 257 * 258 * As stated elsewhere, the reference count btree (refcbt) stores 259 * >1 reference counts for extents of physical blocks. In this 260 * operation, we're either raising or lowering the reference count of 261 * some subrange stored in the tree: 262 * 263 * <------ adjustment range ------> 264 * ----+ +---+-----+ +--+--------+--------- 265 * 2 | | 3 | 4 | |17| 55 | 10 266 * ----+ +---+-----+ +--+--------+--------- 267 * X axis is physical blocks number; 268 * reference counts are the numbers inside the rectangles 269 * 270 * The first thing we need to do is to ensure that there are no 271 * refcount extents crossing either boundary of the range to be 272 * adjusted. For any extent that does cross a boundary, split it into 273 * two extents so that we can increment the refcount of one of the 274 * pieces later: 275 * 276 * <------ adjustment range ------> 277 * ----+ +---+-----+ +--+--------+----+---- 278 * 2 | | 3 | 2 | |17| 55 | 10 | 10 279 * ----+ +---+-----+ +--+--------+----+---- 280 * 281 * For this next step, let's assume that all the physical blocks in 282 * the adjustment range are mapped to a file and are therefore in use 283 * at least once. Therefore, we can infer that any gap in the 284 * refcount tree within the adjustment range represents a physical 285 * extent with refcount == 1: 286 * 287 * <------ adjustment range ------> 288 * ----+---+---+-----+-+--+--------+----+---- 289 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 290 * ----+---+---+-----+-+--+--------+----+---- 291 * ^ 292 * 293 * For each extent that falls within the interval range, figure out 294 * which extent is to the left or the right of that extent. Now we 295 * have a left, current, and right extent. If the new reference count 296 * of the center extent enables us to merge left, center, and right 297 * into one record covering all three, do so. If the center extent is 298 * at the left end of the range, abuts the left extent, and its new 299 * reference count matches the left extent's record, then merge them. 300 * If the center extent is at the right end of the range, abuts the 301 * right extent, and the reference counts match, merge those. In the 302 * example, we can left merge (assuming an increment operation): 303 * 304 * <------ adjustment range ------> 305 * --------+---+-----+-+--+--------+----+---- 306 * 2 | 3 | 2 |1|17| 55 | 10 | 10 307 * --------+---+-----+-+--+--------+----+---- 308 * ^ 309 * 310 * For all other extents within the range, adjust the reference count 311 * or delete it if the refcount falls below 2. If we were 312 * incrementing, the end result looks like this: 313 * 314 * <------ adjustment range ------> 315 * --------+---+-----+-+--+--------+----+---- 316 * 2 | 4 | 3 |2|18| 56 | 11 | 10 317 * --------+---+-----+-+--+--------+----+---- 318 * 319 * The result of a decrement operation looks as such: 320 * 321 * <------ adjustment range ------> 322 * ----+ +---+ +--+--------+----+---- 323 * 2 | | 2 | |16| 54 | 9 | 10 324 * ----+ +---+ +--+--------+----+---- 325 * DDDD 111111DD 326 * 327 * The blocks marked "D" are freed; the blocks marked "1" are only 328 * referenced once and therefore the record is removed from the 329 * refcount btree. 330 */ 331 332 /* Next block after this extent. */ 333 static inline xfs_agblock_t 334 xfs_refc_next( 335 struct xfs_refcount_irec *rc) 336 { 337 return rc->rc_startblock + rc->rc_blockcount; 338 } 339 340 /* 341 * Split a refcount extent that crosses agbno. 342 */ 343 STATIC int 344 xfs_refcount_split_extent( 345 struct xfs_btree_cur *cur, 346 xfs_agblock_t agbno, 347 bool *shape_changed) 348 { 349 struct xfs_refcount_irec rcext, tmp; 350 int found_rec; 351 int error; 352 353 *shape_changed = false; 354 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 355 if (error) 356 goto out_error; 357 if (!found_rec) 358 return 0; 359 360 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 361 if (error) 362 goto out_error; 363 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 364 error = -EFSCORRUPTED; 365 goto out_error; 366 } 367 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 368 return 0; 369 370 *shape_changed = true; 371 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 372 &rcext, agbno); 373 374 /* Establish the right extent. */ 375 tmp = rcext; 376 tmp.rc_startblock = agbno; 377 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 378 error = xfs_refcount_update(cur, &tmp); 379 if (error) 380 goto out_error; 381 382 /* Insert the left extent. */ 383 tmp = rcext; 384 tmp.rc_blockcount = agbno - rcext.rc_startblock; 385 error = xfs_refcount_insert(cur, &tmp, &found_rec); 386 if (error) 387 goto out_error; 388 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 389 error = -EFSCORRUPTED; 390 goto out_error; 391 } 392 return error; 393 394 out_error: 395 trace_xfs_refcount_split_extent_error(cur->bc_mp, 396 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 397 return error; 398 } 399 400 /* 401 * Merge the left, center, and right extents. 402 */ 403 STATIC int 404 xfs_refcount_merge_center_extents( 405 struct xfs_btree_cur *cur, 406 struct xfs_refcount_irec *left, 407 struct xfs_refcount_irec *center, 408 struct xfs_refcount_irec *right, 409 unsigned long long extlen, 410 xfs_extlen_t *aglen) 411 { 412 int error; 413 int found_rec; 414 415 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 416 cur->bc_ag.pag->pag_agno, left, center, right); 417 418 /* 419 * Make sure the center and right extents are not in the btree. 420 * If the center extent was synthesized, the first delete call 421 * removes the right extent and we skip the second deletion. 422 * If center and right were in the btree, then the first delete 423 * call removes the center and the second one removes the right 424 * extent. 425 */ 426 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 427 &found_rec); 428 if (error) 429 goto out_error; 430 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 431 error = -EFSCORRUPTED; 432 goto out_error; 433 } 434 435 error = xfs_refcount_delete(cur, &found_rec); 436 if (error) 437 goto out_error; 438 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 439 error = -EFSCORRUPTED; 440 goto out_error; 441 } 442 443 if (center->rc_refcount > 1) { 444 error = xfs_refcount_delete(cur, &found_rec); 445 if (error) 446 goto out_error; 447 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 448 error = -EFSCORRUPTED; 449 goto out_error; 450 } 451 } 452 453 /* Enlarge the left extent. */ 454 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 455 &found_rec); 456 if (error) 457 goto out_error; 458 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 459 error = -EFSCORRUPTED; 460 goto out_error; 461 } 462 463 left->rc_blockcount = extlen; 464 error = xfs_refcount_update(cur, left); 465 if (error) 466 goto out_error; 467 468 *aglen = 0; 469 return error; 470 471 out_error: 472 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 473 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 474 return error; 475 } 476 477 /* 478 * Merge with the left extent. 479 */ 480 STATIC int 481 xfs_refcount_merge_left_extent( 482 struct xfs_btree_cur *cur, 483 struct xfs_refcount_irec *left, 484 struct xfs_refcount_irec *cleft, 485 xfs_agblock_t *agbno, 486 xfs_extlen_t *aglen) 487 { 488 int error; 489 int found_rec; 490 491 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 492 cur->bc_ag.pag->pag_agno, left, cleft); 493 494 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 495 if (cleft->rc_refcount > 1) { 496 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 497 &found_rec); 498 if (error) 499 goto out_error; 500 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 501 error = -EFSCORRUPTED; 502 goto out_error; 503 } 504 505 error = xfs_refcount_delete(cur, &found_rec); 506 if (error) 507 goto out_error; 508 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 509 error = -EFSCORRUPTED; 510 goto out_error; 511 } 512 } 513 514 /* Enlarge the left extent. */ 515 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 516 &found_rec); 517 if (error) 518 goto out_error; 519 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 520 error = -EFSCORRUPTED; 521 goto out_error; 522 } 523 524 left->rc_blockcount += cleft->rc_blockcount; 525 error = xfs_refcount_update(cur, left); 526 if (error) 527 goto out_error; 528 529 *agbno += cleft->rc_blockcount; 530 *aglen -= cleft->rc_blockcount; 531 return error; 532 533 out_error: 534 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 535 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 536 return error; 537 } 538 539 /* 540 * Merge with the right extent. 541 */ 542 STATIC int 543 xfs_refcount_merge_right_extent( 544 struct xfs_btree_cur *cur, 545 struct xfs_refcount_irec *right, 546 struct xfs_refcount_irec *cright, 547 xfs_extlen_t *aglen) 548 { 549 int error; 550 int found_rec; 551 552 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 553 cur->bc_ag.pag->pag_agno, cright, right); 554 555 /* 556 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 557 * remove it. 558 */ 559 if (cright->rc_refcount > 1) { 560 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 561 &found_rec); 562 if (error) 563 goto out_error; 564 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 565 error = -EFSCORRUPTED; 566 goto out_error; 567 } 568 569 error = xfs_refcount_delete(cur, &found_rec); 570 if (error) 571 goto out_error; 572 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 573 error = -EFSCORRUPTED; 574 goto out_error; 575 } 576 } 577 578 /* Enlarge the right extent. */ 579 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 580 &found_rec); 581 if (error) 582 goto out_error; 583 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 584 error = -EFSCORRUPTED; 585 goto out_error; 586 } 587 588 right->rc_startblock -= cright->rc_blockcount; 589 right->rc_blockcount += cright->rc_blockcount; 590 error = xfs_refcount_update(cur, right); 591 if (error) 592 goto out_error; 593 594 *aglen -= cright->rc_blockcount; 595 return error; 596 597 out_error: 598 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 599 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 600 return error; 601 } 602 603 #define XFS_FIND_RCEXT_SHARED 1 604 #define XFS_FIND_RCEXT_COW 2 605 /* 606 * Find the left extent and the one after it (cleft). This function assumes 607 * that we've already split any extent crossing agbno. 608 */ 609 STATIC int 610 xfs_refcount_find_left_extents( 611 struct xfs_btree_cur *cur, 612 struct xfs_refcount_irec *left, 613 struct xfs_refcount_irec *cleft, 614 xfs_agblock_t agbno, 615 xfs_extlen_t aglen, 616 int flags) 617 { 618 struct xfs_refcount_irec tmp; 619 int error; 620 int found_rec; 621 622 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 623 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 624 if (error) 625 goto out_error; 626 if (!found_rec) 627 return 0; 628 629 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 630 if (error) 631 goto out_error; 632 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 633 error = -EFSCORRUPTED; 634 goto out_error; 635 } 636 637 if (xfs_refc_next(&tmp) != agbno) 638 return 0; 639 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 640 return 0; 641 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 642 return 0; 643 /* We have a left extent; retrieve (or invent) the next right one */ 644 *left = tmp; 645 646 error = xfs_btree_increment(cur, 0, &found_rec); 647 if (error) 648 goto out_error; 649 if (found_rec) { 650 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 651 if (error) 652 goto out_error; 653 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 654 error = -EFSCORRUPTED; 655 goto out_error; 656 } 657 658 /* if tmp starts at the end of our range, just use that */ 659 if (tmp.rc_startblock == agbno) 660 *cleft = tmp; 661 else { 662 /* 663 * There's a gap in the refcntbt at the start of the 664 * range we're interested in (refcount == 1) so 665 * synthesize the implied extent and pass it back. 666 * We assume here that the agbno/aglen range was 667 * passed in from a data fork extent mapping and 668 * therefore is allocated to exactly one owner. 669 */ 670 cleft->rc_startblock = agbno; 671 cleft->rc_blockcount = min(aglen, 672 tmp.rc_startblock - agbno); 673 cleft->rc_refcount = 1; 674 } 675 } else { 676 /* 677 * No extents, so pretend that there's one covering the whole 678 * range. 679 */ 680 cleft->rc_startblock = agbno; 681 cleft->rc_blockcount = aglen; 682 cleft->rc_refcount = 1; 683 } 684 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 685 left, cleft, agbno); 686 return error; 687 688 out_error: 689 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 690 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 691 return error; 692 } 693 694 /* 695 * Find the right extent and the one before it (cright). This function 696 * assumes that we've already split any extents crossing agbno + aglen. 697 */ 698 STATIC int 699 xfs_refcount_find_right_extents( 700 struct xfs_btree_cur *cur, 701 struct xfs_refcount_irec *right, 702 struct xfs_refcount_irec *cright, 703 xfs_agblock_t agbno, 704 xfs_extlen_t aglen, 705 int flags) 706 { 707 struct xfs_refcount_irec tmp; 708 int error; 709 int found_rec; 710 711 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 712 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 713 if (error) 714 goto out_error; 715 if (!found_rec) 716 return 0; 717 718 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 719 if (error) 720 goto out_error; 721 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 722 error = -EFSCORRUPTED; 723 goto out_error; 724 } 725 726 if (tmp.rc_startblock != agbno + aglen) 727 return 0; 728 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 729 return 0; 730 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 731 return 0; 732 /* We have a right extent; retrieve (or invent) the next left one */ 733 *right = tmp; 734 735 error = xfs_btree_decrement(cur, 0, &found_rec); 736 if (error) 737 goto out_error; 738 if (found_rec) { 739 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 740 if (error) 741 goto out_error; 742 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 743 error = -EFSCORRUPTED; 744 goto out_error; 745 } 746 747 /* if tmp ends at the end of our range, just use that */ 748 if (xfs_refc_next(&tmp) == agbno + aglen) 749 *cright = tmp; 750 else { 751 /* 752 * There's a gap in the refcntbt at the end of the 753 * range we're interested in (refcount == 1) so 754 * create the implied extent and pass it back. 755 * We assume here that the agbno/aglen range was 756 * passed in from a data fork extent mapping and 757 * therefore is allocated to exactly one owner. 758 */ 759 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 760 cright->rc_blockcount = right->rc_startblock - 761 cright->rc_startblock; 762 cright->rc_refcount = 1; 763 } 764 } else { 765 /* 766 * No extents, so pretend that there's one covering the whole 767 * range. 768 */ 769 cright->rc_startblock = agbno; 770 cright->rc_blockcount = aglen; 771 cright->rc_refcount = 1; 772 } 773 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 774 cright, right, agbno + aglen); 775 return error; 776 777 out_error: 778 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 779 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 780 return error; 781 } 782 783 /* Is this extent valid? */ 784 static inline bool 785 xfs_refc_valid( 786 struct xfs_refcount_irec *rc) 787 { 788 return rc->rc_startblock != NULLAGBLOCK; 789 } 790 791 /* 792 * Try to merge with any extents on the boundaries of the adjustment range. 793 */ 794 STATIC int 795 xfs_refcount_merge_extents( 796 struct xfs_btree_cur *cur, 797 xfs_agblock_t *agbno, 798 xfs_extlen_t *aglen, 799 enum xfs_refc_adjust_op adjust, 800 int flags, 801 bool *shape_changed) 802 { 803 struct xfs_refcount_irec left = {0}, cleft = {0}; 804 struct xfs_refcount_irec cright = {0}, right = {0}; 805 int error; 806 unsigned long long ulen; 807 bool cequal; 808 809 *shape_changed = false; 810 /* 811 * Find the extent just below agbno [left], just above agbno [cleft], 812 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 813 * [right]. 814 */ 815 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 816 *aglen, flags); 817 if (error) 818 return error; 819 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 820 *aglen, flags); 821 if (error) 822 return error; 823 824 /* No left or right extent to merge; exit. */ 825 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 826 return 0; 827 828 cequal = (cleft.rc_startblock == cright.rc_startblock) && 829 (cleft.rc_blockcount == cright.rc_blockcount); 830 831 /* Try to merge left, cleft, and right. cleft must == cright. */ 832 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 833 right.rc_blockcount; 834 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 835 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 836 left.rc_refcount == cleft.rc_refcount + adjust && 837 right.rc_refcount == cleft.rc_refcount + adjust && 838 ulen < MAXREFCEXTLEN) { 839 *shape_changed = true; 840 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 841 &right, ulen, aglen); 842 } 843 844 /* Try to merge left and cleft. */ 845 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 846 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 847 left.rc_refcount == cleft.rc_refcount + adjust && 848 ulen < MAXREFCEXTLEN) { 849 *shape_changed = true; 850 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 851 agbno, aglen); 852 if (error) 853 return error; 854 855 /* 856 * If we just merged left + cleft and cleft == cright, 857 * we no longer have a cright to merge with right. We're done. 858 */ 859 if (cequal) 860 return 0; 861 } 862 863 /* Try to merge cright and right. */ 864 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 865 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 866 right.rc_refcount == cright.rc_refcount + adjust && 867 ulen < MAXREFCEXTLEN) { 868 *shape_changed = true; 869 return xfs_refcount_merge_right_extent(cur, &right, &cright, 870 aglen); 871 } 872 873 return error; 874 } 875 876 /* 877 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 878 * true incorrectly is a shutdown FS; the penalty for guessing false 879 * incorrectly is more transaction rolls than might be necessary. 880 * Be conservative here. 881 */ 882 static bool 883 xfs_refcount_still_have_space( 884 struct xfs_btree_cur *cur) 885 { 886 unsigned long overhead; 887 888 /* 889 * Worst case estimate: full splits of the free space and rmap btrees 890 * to handle each of the shape changes to the refcount btree. 891 */ 892 overhead = xfs_allocfree_block_count(cur->bc_mp, 893 cur->bc_ag.refc.shape_changes); 894 overhead += cur->bc_mp->m_refc_maxlevels; 895 overhead *= cur->bc_mp->m_sb.sb_blocksize; 896 897 /* 898 * Only allow 2 refcount extent updates per transaction if the 899 * refcount continue update "error" has been injected. 900 */ 901 if (cur->bc_ag.refc.nr_ops > 2 && 902 XFS_TEST_ERROR(false, cur->bc_mp, 903 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 904 return false; 905 906 if (cur->bc_ag.refc.nr_ops == 0) 907 return true; 908 else if (overhead > cur->bc_tp->t_log_res) 909 return false; 910 return cur->bc_tp->t_log_res - overhead > 911 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 912 } 913 914 /* 915 * Adjust the refcounts of middle extents. At this point we should have 916 * split extents that crossed the adjustment range; merged with adjacent 917 * extents; and updated agbno/aglen to reflect the merges. Therefore, 918 * all we have to do is update the extents inside [agbno, agbno + aglen]. 919 */ 920 STATIC int 921 xfs_refcount_adjust_extents( 922 struct xfs_btree_cur *cur, 923 xfs_agblock_t *agbno, 924 xfs_extlen_t *aglen, 925 enum xfs_refc_adjust_op adj) 926 { 927 struct xfs_refcount_irec ext, tmp; 928 int error; 929 int found_rec, found_tmp; 930 xfs_fsblock_t fsbno; 931 932 /* Merging did all the work already. */ 933 if (*aglen == 0) 934 return 0; 935 936 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 937 if (error) 938 goto out_error; 939 940 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 941 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 942 if (error) 943 goto out_error; 944 if (!found_rec) { 945 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 946 ext.rc_blockcount = 0; 947 ext.rc_refcount = 0; 948 } 949 950 /* 951 * Deal with a hole in the refcount tree; if a file maps to 952 * these blocks and there's no refcountbt record, pretend that 953 * there is one with refcount == 1. 954 */ 955 if (ext.rc_startblock != *agbno) { 956 tmp.rc_startblock = *agbno; 957 tmp.rc_blockcount = min(*aglen, 958 ext.rc_startblock - *agbno); 959 tmp.rc_refcount = 1 + adj; 960 trace_xfs_refcount_modify_extent(cur->bc_mp, 961 cur->bc_ag.pag->pag_agno, &tmp); 962 963 /* 964 * Either cover the hole (increment) or 965 * delete the range (decrement). 966 */ 967 cur->bc_ag.refc.nr_ops++; 968 if (tmp.rc_refcount) { 969 error = xfs_refcount_insert(cur, &tmp, 970 &found_tmp); 971 if (error) 972 goto out_error; 973 if (XFS_IS_CORRUPT(cur->bc_mp, 974 found_tmp != 1)) { 975 error = -EFSCORRUPTED; 976 goto out_error; 977 } 978 } else { 979 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 980 cur->bc_ag.pag->pag_agno, 981 tmp.rc_startblock); 982 xfs_free_extent_later(cur->bc_tp, fsbno, 983 tmp.rc_blockcount, NULL); 984 } 985 986 (*agbno) += tmp.rc_blockcount; 987 (*aglen) -= tmp.rc_blockcount; 988 989 error = xfs_refcount_lookup_ge(cur, *agbno, 990 &found_rec); 991 if (error) 992 goto out_error; 993 } 994 995 /* Stop if there's nothing left to modify */ 996 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 997 break; 998 999 /* 1000 * Adjust the reference count and either update the tree 1001 * (incr) or free the blocks (decr). 1002 */ 1003 if (ext.rc_refcount == MAXREFCOUNT) 1004 goto skip; 1005 ext.rc_refcount += adj; 1006 trace_xfs_refcount_modify_extent(cur->bc_mp, 1007 cur->bc_ag.pag->pag_agno, &ext); 1008 cur->bc_ag.refc.nr_ops++; 1009 if (ext.rc_refcount > 1) { 1010 error = xfs_refcount_update(cur, &ext); 1011 if (error) 1012 goto out_error; 1013 } else if (ext.rc_refcount == 1) { 1014 error = xfs_refcount_delete(cur, &found_rec); 1015 if (error) 1016 goto out_error; 1017 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1018 error = -EFSCORRUPTED; 1019 goto out_error; 1020 } 1021 goto advloop; 1022 } else { 1023 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1024 cur->bc_ag.pag->pag_agno, 1025 ext.rc_startblock); 1026 xfs_free_extent_later(cur->bc_tp, fsbno, 1027 ext.rc_blockcount, NULL); 1028 } 1029 1030 skip: 1031 error = xfs_btree_increment(cur, 0, &found_rec); 1032 if (error) 1033 goto out_error; 1034 1035 advloop: 1036 (*agbno) += ext.rc_blockcount; 1037 (*aglen) -= ext.rc_blockcount; 1038 } 1039 1040 return error; 1041 out_error: 1042 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1043 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1044 return error; 1045 } 1046 1047 /* Adjust the reference count of a range of AG blocks. */ 1048 STATIC int 1049 xfs_refcount_adjust( 1050 struct xfs_btree_cur *cur, 1051 xfs_agblock_t agbno, 1052 xfs_extlen_t aglen, 1053 xfs_agblock_t *new_agbno, 1054 xfs_extlen_t *new_aglen, 1055 enum xfs_refc_adjust_op adj) 1056 { 1057 bool shape_changed; 1058 int shape_changes = 0; 1059 int error; 1060 1061 *new_agbno = agbno; 1062 *new_aglen = aglen; 1063 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1064 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1065 agbno, aglen); 1066 else 1067 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1068 agbno, aglen); 1069 1070 /* 1071 * Ensure that no rcextents cross the boundary of the adjustment range. 1072 */ 1073 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1074 if (error) 1075 goto out_error; 1076 if (shape_changed) 1077 shape_changes++; 1078 1079 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1080 if (error) 1081 goto out_error; 1082 if (shape_changed) 1083 shape_changes++; 1084 1085 /* 1086 * Try to merge with the left or right extents of the range. 1087 */ 1088 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1089 XFS_FIND_RCEXT_SHARED, &shape_changed); 1090 if (error) 1091 goto out_error; 1092 if (shape_changed) 1093 shape_changes++; 1094 if (shape_changes) 1095 cur->bc_ag.refc.shape_changes++; 1096 1097 /* Now that we've taken care of the ends, adjust the middle extents */ 1098 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); 1099 if (error) 1100 goto out_error; 1101 1102 return 0; 1103 1104 out_error: 1105 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1106 error, _RET_IP_); 1107 return error; 1108 } 1109 1110 /* Clean up after calling xfs_refcount_finish_one. */ 1111 void 1112 xfs_refcount_finish_one_cleanup( 1113 struct xfs_trans *tp, 1114 struct xfs_btree_cur *rcur, 1115 int error) 1116 { 1117 struct xfs_buf *agbp; 1118 1119 if (rcur == NULL) 1120 return; 1121 agbp = rcur->bc_ag.agbp; 1122 xfs_btree_del_cursor(rcur, error); 1123 if (error) 1124 xfs_trans_brelse(tp, agbp); 1125 } 1126 1127 /* 1128 * Process one of the deferred refcount operations. We pass back the 1129 * btree cursor to maintain our lock on the btree between calls. 1130 * This saves time and eliminates a buffer deadlock between the 1131 * superblock and the AGF because we'll always grab them in the same 1132 * order. 1133 */ 1134 int 1135 xfs_refcount_finish_one( 1136 struct xfs_trans *tp, 1137 enum xfs_refcount_intent_type type, 1138 xfs_fsblock_t startblock, 1139 xfs_extlen_t blockcount, 1140 xfs_fsblock_t *new_fsb, 1141 xfs_extlen_t *new_len, 1142 struct xfs_btree_cur **pcur) 1143 { 1144 struct xfs_mount *mp = tp->t_mountp; 1145 struct xfs_btree_cur *rcur; 1146 struct xfs_buf *agbp = NULL; 1147 int error = 0; 1148 xfs_agblock_t bno; 1149 xfs_agblock_t new_agbno; 1150 unsigned long nr_ops = 0; 1151 int shape_changes = 0; 1152 struct xfs_perag *pag; 1153 1154 pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); 1155 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1156 1157 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1158 type, XFS_FSB_TO_AGBNO(mp, startblock), 1159 blockcount); 1160 1161 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { 1162 error = -EIO; 1163 goto out_drop; 1164 } 1165 1166 /* 1167 * If we haven't gotten a cursor or the cursor AG doesn't match 1168 * the startblock, get one now. 1169 */ 1170 rcur = *pcur; 1171 if (rcur != NULL && rcur->bc_ag.pag != pag) { 1172 nr_ops = rcur->bc_ag.refc.nr_ops; 1173 shape_changes = rcur->bc_ag.refc.shape_changes; 1174 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1175 rcur = NULL; 1176 *pcur = NULL; 1177 } 1178 if (rcur == NULL) { 1179 error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING, 1180 &agbp); 1181 if (error) 1182 goto out_drop; 1183 1184 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1185 rcur->bc_ag.refc.nr_ops = nr_ops; 1186 rcur->bc_ag.refc.shape_changes = shape_changes; 1187 } 1188 *pcur = rcur; 1189 1190 switch (type) { 1191 case XFS_REFCOUNT_INCREASE: 1192 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1193 new_len, XFS_REFCOUNT_ADJUST_INCREASE); 1194 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1195 break; 1196 case XFS_REFCOUNT_DECREASE: 1197 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1198 new_len, XFS_REFCOUNT_ADJUST_DECREASE); 1199 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1200 break; 1201 case XFS_REFCOUNT_ALLOC_COW: 1202 *new_fsb = startblock + blockcount; 1203 *new_len = 0; 1204 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1205 break; 1206 case XFS_REFCOUNT_FREE_COW: 1207 *new_fsb = startblock + blockcount; 1208 *new_len = 0; 1209 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1210 break; 1211 default: 1212 ASSERT(0); 1213 error = -EFSCORRUPTED; 1214 } 1215 if (!error && *new_len > 0) 1216 trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, 1217 bno, blockcount, new_agbno, *new_len); 1218 out_drop: 1219 xfs_perag_put(pag); 1220 return error; 1221 } 1222 1223 /* 1224 * Record a refcount intent for later processing. 1225 */ 1226 static void 1227 __xfs_refcount_add( 1228 struct xfs_trans *tp, 1229 enum xfs_refcount_intent_type type, 1230 xfs_fsblock_t startblock, 1231 xfs_extlen_t blockcount) 1232 { 1233 struct xfs_refcount_intent *ri; 1234 1235 trace_xfs_refcount_defer(tp->t_mountp, 1236 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1237 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1238 blockcount); 1239 1240 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1241 GFP_NOFS | __GFP_NOFAIL); 1242 INIT_LIST_HEAD(&ri->ri_list); 1243 ri->ri_type = type; 1244 ri->ri_startblock = startblock; 1245 ri->ri_blockcount = blockcount; 1246 1247 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1248 } 1249 1250 /* 1251 * Increase the reference count of the blocks backing a file's extent. 1252 */ 1253 void 1254 xfs_refcount_increase_extent( 1255 struct xfs_trans *tp, 1256 struct xfs_bmbt_irec *PREV) 1257 { 1258 if (!xfs_has_reflink(tp->t_mountp)) 1259 return; 1260 1261 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1262 PREV->br_blockcount); 1263 } 1264 1265 /* 1266 * Decrease the reference count of the blocks backing a file's extent. 1267 */ 1268 void 1269 xfs_refcount_decrease_extent( 1270 struct xfs_trans *tp, 1271 struct xfs_bmbt_irec *PREV) 1272 { 1273 if (!xfs_has_reflink(tp->t_mountp)) 1274 return; 1275 1276 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1277 PREV->br_blockcount); 1278 } 1279 1280 /* 1281 * Given an AG extent, find the lowest-numbered run of shared blocks 1282 * within that range and return the range in fbno/flen. If 1283 * find_end_of_shared is set, return the longest contiguous extent of 1284 * shared blocks; if not, just return the first extent we find. If no 1285 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1286 * and 0, respectively. 1287 */ 1288 int 1289 xfs_refcount_find_shared( 1290 struct xfs_btree_cur *cur, 1291 xfs_agblock_t agbno, 1292 xfs_extlen_t aglen, 1293 xfs_agblock_t *fbno, 1294 xfs_extlen_t *flen, 1295 bool find_end_of_shared) 1296 { 1297 struct xfs_refcount_irec tmp; 1298 int i; 1299 int have; 1300 int error; 1301 1302 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1303 agbno, aglen); 1304 1305 /* By default, skip the whole range */ 1306 *fbno = NULLAGBLOCK; 1307 *flen = 0; 1308 1309 /* Try to find a refcount extent that crosses the start */ 1310 error = xfs_refcount_lookup_le(cur, agbno, &have); 1311 if (error) 1312 goto out_error; 1313 if (!have) { 1314 /* No left extent, look at the next one */ 1315 error = xfs_btree_increment(cur, 0, &have); 1316 if (error) 1317 goto out_error; 1318 if (!have) 1319 goto done; 1320 } 1321 error = xfs_refcount_get_rec(cur, &tmp, &i); 1322 if (error) 1323 goto out_error; 1324 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1325 error = -EFSCORRUPTED; 1326 goto out_error; 1327 } 1328 1329 /* If the extent ends before the start, look at the next one */ 1330 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1331 error = xfs_btree_increment(cur, 0, &have); 1332 if (error) 1333 goto out_error; 1334 if (!have) 1335 goto done; 1336 error = xfs_refcount_get_rec(cur, &tmp, &i); 1337 if (error) 1338 goto out_error; 1339 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1340 error = -EFSCORRUPTED; 1341 goto out_error; 1342 } 1343 } 1344 1345 /* If the extent starts after the range we want, bail out */ 1346 if (tmp.rc_startblock >= agbno + aglen) 1347 goto done; 1348 1349 /* We found the start of a shared extent! */ 1350 if (tmp.rc_startblock < agbno) { 1351 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1352 tmp.rc_startblock = agbno; 1353 } 1354 1355 *fbno = tmp.rc_startblock; 1356 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1357 if (!find_end_of_shared) 1358 goto done; 1359 1360 /* Otherwise, find the end of this shared extent */ 1361 while (*fbno + *flen < agbno + aglen) { 1362 error = xfs_btree_increment(cur, 0, &have); 1363 if (error) 1364 goto out_error; 1365 if (!have) 1366 break; 1367 error = xfs_refcount_get_rec(cur, &tmp, &i); 1368 if (error) 1369 goto out_error; 1370 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1371 error = -EFSCORRUPTED; 1372 goto out_error; 1373 } 1374 if (tmp.rc_startblock >= agbno + aglen || 1375 tmp.rc_startblock != *fbno + *flen) 1376 break; 1377 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1378 } 1379 1380 done: 1381 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1382 cur->bc_ag.pag->pag_agno, *fbno, *flen); 1383 1384 out_error: 1385 if (error) 1386 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1387 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1388 return error; 1389 } 1390 1391 /* 1392 * Recovering CoW Blocks After a Crash 1393 * 1394 * Due to the way that the copy on write mechanism works, there's a window of 1395 * opportunity in which we can lose track of allocated blocks during a crash. 1396 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1397 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1398 * no longer in the free space btree but are not otherwise recorded anywhere 1399 * until the write completes and the blocks are mapped into the file. A crash 1400 * in between allocation and remapping results in the replacement blocks being 1401 * lost. This situation is exacerbated by the CoW extent size hint because 1402 * allocations can hang around for long time. 1403 * 1404 * However, there is a place where we can record these allocations before they 1405 * become mappings -- the reference count btree. The btree does not record 1406 * extents with refcount == 1, so we can record allocations with a refcount of 1407 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1408 * no conflict with shared block records. These mappings should be created 1409 * when we allocate blocks to the CoW fork and deleted when they're removed 1410 * from the CoW fork. 1411 * 1412 * Minor nit: records for in-progress CoW allocations and records for shared 1413 * extents must never be merged, to preserve the property that (except for CoW 1414 * allocations) there are no refcount btree entries with refcount == 1. The 1415 * only time this could potentially happen is when unsharing a block that's 1416 * adjacent to CoW allocations, so we must be careful to avoid this. 1417 * 1418 * At mount time we recover lost CoW allocations by searching the refcount 1419 * btree for these refcount == 1 mappings. These represent CoW allocations 1420 * that were in progress at the time the filesystem went down, so we can free 1421 * them to get the space back. 1422 * 1423 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1424 * several reasons -- first, EFIs pin the tail of the log and would have to be 1425 * periodically relogged to avoid filling up the log. Second, CoW completions 1426 * will have to file an EFD and create new EFIs for whatever remains in the 1427 * CoW fork; this partially takes care of (1) but extent-size reservations 1428 * will have to periodically relog even if there's no writeout in progress. 1429 * This can happen if the CoW extent size hint is set, which you really want. 1430 * Third, EFIs cannot currently be automatically relogged into newer 1431 * transactions to advance the log tail. Fourth, stuffing the log full of 1432 * EFIs places an upper bound on the number of CoW allocations that can be 1433 * held filesystem-wide at any given time. Recording them in the refcount 1434 * btree doesn't require us to maintain any state in memory and doesn't pin 1435 * the log. 1436 */ 1437 /* 1438 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1439 * in that they're not referenced anywhere else in the filesystem, so we 1440 * stash them in the refcount btree with a refcount of 1 until either file 1441 * remapping (or CoW cancellation) happens. 1442 */ 1443 STATIC int 1444 xfs_refcount_adjust_cow_extents( 1445 struct xfs_btree_cur *cur, 1446 xfs_agblock_t agbno, 1447 xfs_extlen_t aglen, 1448 enum xfs_refc_adjust_op adj) 1449 { 1450 struct xfs_refcount_irec ext, tmp; 1451 int error; 1452 int found_rec, found_tmp; 1453 1454 if (aglen == 0) 1455 return 0; 1456 1457 /* Find any overlapping refcount records */ 1458 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1459 if (error) 1460 goto out_error; 1461 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1462 if (error) 1463 goto out_error; 1464 if (!found_rec) { 1465 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1466 XFS_REFC_COW_START; 1467 ext.rc_blockcount = 0; 1468 ext.rc_refcount = 0; 1469 } 1470 1471 switch (adj) { 1472 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1473 /* Adding a CoW reservation, there should be nothing here. */ 1474 if (XFS_IS_CORRUPT(cur->bc_mp, 1475 agbno + aglen > ext.rc_startblock)) { 1476 error = -EFSCORRUPTED; 1477 goto out_error; 1478 } 1479 1480 tmp.rc_startblock = agbno; 1481 tmp.rc_blockcount = aglen; 1482 tmp.rc_refcount = 1; 1483 trace_xfs_refcount_modify_extent(cur->bc_mp, 1484 cur->bc_ag.pag->pag_agno, &tmp); 1485 1486 error = xfs_refcount_insert(cur, &tmp, 1487 &found_tmp); 1488 if (error) 1489 goto out_error; 1490 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1491 error = -EFSCORRUPTED; 1492 goto out_error; 1493 } 1494 break; 1495 case XFS_REFCOUNT_ADJUST_COW_FREE: 1496 /* Removing a CoW reservation, there should be one extent. */ 1497 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1498 error = -EFSCORRUPTED; 1499 goto out_error; 1500 } 1501 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1502 error = -EFSCORRUPTED; 1503 goto out_error; 1504 } 1505 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1506 error = -EFSCORRUPTED; 1507 goto out_error; 1508 } 1509 1510 ext.rc_refcount = 0; 1511 trace_xfs_refcount_modify_extent(cur->bc_mp, 1512 cur->bc_ag.pag->pag_agno, &ext); 1513 error = xfs_refcount_delete(cur, &found_rec); 1514 if (error) 1515 goto out_error; 1516 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1517 error = -EFSCORRUPTED; 1518 goto out_error; 1519 } 1520 break; 1521 default: 1522 ASSERT(0); 1523 } 1524 1525 return error; 1526 out_error: 1527 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1528 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1529 return error; 1530 } 1531 1532 /* 1533 * Add or remove refcount btree entries for CoW reservations. 1534 */ 1535 STATIC int 1536 xfs_refcount_adjust_cow( 1537 struct xfs_btree_cur *cur, 1538 xfs_agblock_t agbno, 1539 xfs_extlen_t aglen, 1540 enum xfs_refc_adjust_op adj) 1541 { 1542 bool shape_changed; 1543 int error; 1544 1545 agbno += XFS_REFC_COW_START; 1546 1547 /* 1548 * Ensure that no rcextents cross the boundary of the adjustment range. 1549 */ 1550 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1551 if (error) 1552 goto out_error; 1553 1554 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1555 if (error) 1556 goto out_error; 1557 1558 /* 1559 * Try to merge with the left or right extents of the range. 1560 */ 1561 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1562 XFS_FIND_RCEXT_COW, &shape_changed); 1563 if (error) 1564 goto out_error; 1565 1566 /* Now that we've taken care of the ends, adjust the middle extents */ 1567 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1568 if (error) 1569 goto out_error; 1570 1571 return 0; 1572 1573 out_error: 1574 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1575 error, _RET_IP_); 1576 return error; 1577 } 1578 1579 /* 1580 * Record a CoW allocation in the refcount btree. 1581 */ 1582 STATIC int 1583 __xfs_refcount_cow_alloc( 1584 struct xfs_btree_cur *rcur, 1585 xfs_agblock_t agbno, 1586 xfs_extlen_t aglen) 1587 { 1588 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1589 agbno, aglen); 1590 1591 /* Add refcount btree reservation */ 1592 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1593 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1594 } 1595 1596 /* 1597 * Remove a CoW allocation from the refcount btree. 1598 */ 1599 STATIC int 1600 __xfs_refcount_cow_free( 1601 struct xfs_btree_cur *rcur, 1602 xfs_agblock_t agbno, 1603 xfs_extlen_t aglen) 1604 { 1605 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1606 agbno, aglen); 1607 1608 /* Remove refcount btree reservation */ 1609 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1610 XFS_REFCOUNT_ADJUST_COW_FREE); 1611 } 1612 1613 /* Record a CoW staging extent in the refcount btree. */ 1614 void 1615 xfs_refcount_alloc_cow_extent( 1616 struct xfs_trans *tp, 1617 xfs_fsblock_t fsb, 1618 xfs_extlen_t len) 1619 { 1620 struct xfs_mount *mp = tp->t_mountp; 1621 1622 if (!xfs_has_reflink(mp)) 1623 return; 1624 1625 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1626 1627 /* Add rmap entry */ 1628 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1629 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1630 } 1631 1632 /* Forget a CoW staging event in the refcount btree. */ 1633 void 1634 xfs_refcount_free_cow_extent( 1635 struct xfs_trans *tp, 1636 xfs_fsblock_t fsb, 1637 xfs_extlen_t len) 1638 { 1639 struct xfs_mount *mp = tp->t_mountp; 1640 1641 if (!xfs_has_reflink(mp)) 1642 return; 1643 1644 /* Remove rmap entry */ 1645 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1646 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1647 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1648 } 1649 1650 struct xfs_refcount_recovery { 1651 struct list_head rr_list; 1652 struct xfs_refcount_irec rr_rrec; 1653 }; 1654 1655 /* Stuff an extent on the recovery list. */ 1656 STATIC int 1657 xfs_refcount_recover_extent( 1658 struct xfs_btree_cur *cur, 1659 const union xfs_btree_rec *rec, 1660 void *priv) 1661 { 1662 struct list_head *debris = priv; 1663 struct xfs_refcount_recovery *rr; 1664 1665 if (XFS_IS_CORRUPT(cur->bc_mp, 1666 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1667 return -EFSCORRUPTED; 1668 1669 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); 1670 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1671 list_add_tail(&rr->rr_list, debris); 1672 1673 return 0; 1674 } 1675 1676 /* Find and remove leftover CoW reservations. */ 1677 int 1678 xfs_refcount_recover_cow_leftovers( 1679 struct xfs_mount *mp, 1680 struct xfs_perag *pag) 1681 { 1682 struct xfs_trans *tp; 1683 struct xfs_btree_cur *cur; 1684 struct xfs_buf *agbp; 1685 struct xfs_refcount_recovery *rr, *n; 1686 struct list_head debris; 1687 union xfs_btree_irec low; 1688 union xfs_btree_irec high; 1689 xfs_fsblock_t fsb; 1690 xfs_agblock_t agbno; 1691 int error; 1692 1693 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1694 return -EOPNOTSUPP; 1695 1696 INIT_LIST_HEAD(&debris); 1697 1698 /* 1699 * In this first part, we use an empty transaction to gather up 1700 * all the leftover CoW extents so that we can subsequently 1701 * delete them. The empty transaction is used to avoid 1702 * a buffer lock deadlock if there happens to be a loop in the 1703 * refcountbt because we're allowed to re-grab a buffer that is 1704 * already attached to our transaction. When we're done 1705 * recording the CoW debris we cancel the (empty) transaction 1706 * and everything goes away cleanly. 1707 */ 1708 error = xfs_trans_alloc_empty(mp, &tp); 1709 if (error) 1710 return error; 1711 1712 error = xfs_alloc_read_agf(pag, tp, 0, &agbp); 1713 if (error) 1714 goto out_trans; 1715 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1716 1717 /* Find all the leftover CoW staging extents. */ 1718 memset(&low, 0, sizeof(low)); 1719 memset(&high, 0, sizeof(high)); 1720 low.rc.rc_startblock = XFS_REFC_COW_START; 1721 high.rc.rc_startblock = -1U; 1722 error = xfs_btree_query_range(cur, &low, &high, 1723 xfs_refcount_recover_extent, &debris); 1724 xfs_btree_del_cursor(cur, error); 1725 xfs_trans_brelse(tp, agbp); 1726 xfs_trans_cancel(tp); 1727 if (error) 1728 goto out_free; 1729 1730 /* Now iterate the list to free the leftovers */ 1731 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1732 /* Set up transaction. */ 1733 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1734 if (error) 1735 goto out_free; 1736 1737 trace_xfs_refcount_recover_extent(mp, pag->pag_agno, 1738 &rr->rr_rrec); 1739 1740 /* Free the orphan record */ 1741 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1742 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); 1743 xfs_refcount_free_cow_extent(tp, fsb, 1744 rr->rr_rrec.rc_blockcount); 1745 1746 /* Free the block. */ 1747 xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1748 1749 error = xfs_trans_commit(tp); 1750 if (error) 1751 goto out_free; 1752 1753 list_del(&rr->rr_list); 1754 kmem_free(rr); 1755 } 1756 1757 return error; 1758 out_trans: 1759 xfs_trans_cancel(tp); 1760 out_free: 1761 /* Free the leftover list */ 1762 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1763 list_del(&rr->rr_list); 1764 kmem_free(rr); 1765 } 1766 return error; 1767 } 1768 1769 /* Is there a record covering a given extent? */ 1770 int 1771 xfs_refcount_has_record( 1772 struct xfs_btree_cur *cur, 1773 xfs_agblock_t bno, 1774 xfs_extlen_t len, 1775 bool *exists) 1776 { 1777 union xfs_btree_irec low; 1778 union xfs_btree_irec high; 1779 1780 memset(&low, 0, sizeof(low)); 1781 low.rc.rc_startblock = bno; 1782 memset(&high, 0xFF, sizeof(high)); 1783 high.rc.rc_startblock = bno + len - 1; 1784 1785 return xfs_btree_has_record(cur, &low, &high, exists); 1786 } 1787 1788 int __init 1789 xfs_refcount_intent_init_cache(void) 1790 { 1791 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 1792 sizeof(struct xfs_refcount_intent), 1793 0, 0, NULL); 1794 1795 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 1796 } 1797 1798 void 1799 xfs_refcount_intent_destroy_cache(void) 1800 { 1801 kmem_cache_destroy(xfs_refcount_intent_cache); 1802 xfs_refcount_intent_cache = NULL; 1803 } 1804