1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 #include "xfs_health.h" 27 #include "xfs_refcount_item.h" 28 #include "xfs_rtgroup.h" 29 #include "xfs_rtalloc.h" 30 #include "xfs_rtrefcount_btree.h" 31 32 struct kmem_cache *xfs_refcount_intent_cache; 33 34 /* Allowable refcount adjustment amounts. */ 35 enum xfs_refc_adjust_op { 36 XFS_REFCOUNT_ADJUST_INCREASE = 1, 37 XFS_REFCOUNT_ADJUST_DECREASE = -1, 38 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 39 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 40 }; 41 42 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 43 xfs_agblock_t agbno, xfs_extlen_t aglen); 44 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 45 xfs_agblock_t agbno, xfs_extlen_t aglen); 46 47 /* 48 * Look up the first record less than or equal to [bno, len] in the btree 49 * given by cur. 50 */ 51 int 52 xfs_refcount_lookup_le( 53 struct xfs_btree_cur *cur, 54 enum xfs_refc_domain domain, 55 xfs_agblock_t bno, 56 int *stat) 57 { 58 trace_xfs_refcount_lookup(cur, 59 xfs_refcount_encode_startblock(bno, domain), 60 XFS_LOOKUP_LE); 61 cur->bc_rec.rc.rc_startblock = bno; 62 cur->bc_rec.rc.rc_blockcount = 0; 63 cur->bc_rec.rc.rc_domain = domain; 64 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 65 } 66 67 /* 68 * Look up the first record greater than or equal to [bno, len] in the btree 69 * given by cur. 70 */ 71 int 72 xfs_refcount_lookup_ge( 73 struct xfs_btree_cur *cur, 74 enum xfs_refc_domain domain, 75 xfs_agblock_t bno, 76 int *stat) 77 { 78 trace_xfs_refcount_lookup(cur, 79 xfs_refcount_encode_startblock(bno, domain), 80 XFS_LOOKUP_GE); 81 cur->bc_rec.rc.rc_startblock = bno; 82 cur->bc_rec.rc.rc_blockcount = 0; 83 cur->bc_rec.rc.rc_domain = domain; 84 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 85 } 86 87 /* 88 * Look up the first record equal to [bno, len] in the btree 89 * given by cur. 90 */ 91 int 92 xfs_refcount_lookup_eq( 93 struct xfs_btree_cur *cur, 94 enum xfs_refc_domain domain, 95 xfs_agblock_t bno, 96 int *stat) 97 { 98 trace_xfs_refcount_lookup(cur, 99 xfs_refcount_encode_startblock(bno, domain), 100 XFS_LOOKUP_LE); 101 cur->bc_rec.rc.rc_startblock = bno; 102 cur->bc_rec.rc.rc_blockcount = 0; 103 cur->bc_rec.rc.rc_domain = domain; 104 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 105 } 106 107 /* Convert on-disk record to in-core format. */ 108 void 109 xfs_refcount_btrec_to_irec( 110 const union xfs_btree_rec *rec, 111 struct xfs_refcount_irec *irec) 112 { 113 uint32_t start; 114 115 start = be32_to_cpu(rec->refc.rc_startblock); 116 if (start & XFS_REFC_COWFLAG) { 117 start &= ~XFS_REFC_COWFLAG; 118 irec->rc_domain = XFS_REFC_DOMAIN_COW; 119 } else { 120 irec->rc_domain = XFS_REFC_DOMAIN_SHARED; 121 } 122 123 irec->rc_startblock = start; 124 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 125 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 126 } 127 128 /* Simple checks for refcount records. */ 129 xfs_failaddr_t 130 xfs_refcount_check_irec( 131 struct xfs_perag *pag, 132 const struct xfs_refcount_irec *irec) 133 { 134 if (irec->rc_blockcount == 0 || irec->rc_blockcount > XFS_REFC_LEN_MAX) 135 return __this_address; 136 137 if (!xfs_refcount_check_domain(irec)) 138 return __this_address; 139 140 /* check for valid extent range, including overflow */ 141 if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) 142 return __this_address; 143 144 if (irec->rc_refcount == 0 || irec->rc_refcount > XFS_REFC_REFCOUNT_MAX) 145 return __this_address; 146 147 return NULL; 148 } 149 150 xfs_failaddr_t 151 xfs_rtrefcount_check_irec( 152 struct xfs_rtgroup *rtg, 153 const struct xfs_refcount_irec *irec) 154 { 155 if (irec->rc_blockcount == 0 || irec->rc_blockcount > XFS_REFC_LEN_MAX) 156 return __this_address; 157 158 if (!xfs_refcount_check_domain(irec)) 159 return __this_address; 160 161 /* check for valid extent range, including overflow */ 162 if (!xfs_verify_rgbext(rtg, irec->rc_startblock, irec->rc_blockcount)) 163 return __this_address; 164 165 if (irec->rc_refcount == 0 || irec->rc_refcount > XFS_REFC_REFCOUNT_MAX) 166 return __this_address; 167 168 return NULL; 169 } 170 171 static inline xfs_failaddr_t 172 xfs_refcount_check_btrec( 173 struct xfs_btree_cur *cur, 174 const struct xfs_refcount_irec *irec) 175 { 176 if (xfs_btree_is_rtrefcount(cur->bc_ops)) 177 return xfs_rtrefcount_check_irec(to_rtg(cur->bc_group), irec); 178 return xfs_refcount_check_irec(to_perag(cur->bc_group), irec); 179 } 180 181 static inline int 182 xfs_refcount_complain_bad_rec( 183 struct xfs_btree_cur *cur, 184 xfs_failaddr_t fa, 185 const struct xfs_refcount_irec *irec) 186 { 187 struct xfs_mount *mp = cur->bc_mp; 188 189 if (xfs_btree_is_rtrefcount(cur->bc_ops)) { 190 xfs_warn(mp, 191 "RT Refcount BTree record corruption in rtgroup %u detected at %pS!", 192 cur->bc_group->xg_gno, fa); 193 } else { 194 xfs_warn(mp, 195 "Refcount BTree record corruption in AG %d detected at %pS!", 196 cur->bc_group->xg_gno, fa); 197 } 198 xfs_warn(mp, 199 "Start block 0x%x, block count 0x%x, references 0x%x", 200 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 201 xfs_btree_mark_sick(cur); 202 return -EFSCORRUPTED; 203 } 204 205 /* 206 * Get the data from the pointed-to record. 207 */ 208 int 209 xfs_refcount_get_rec( 210 struct xfs_btree_cur *cur, 211 struct xfs_refcount_irec *irec, 212 int *stat) 213 { 214 union xfs_btree_rec *rec; 215 xfs_failaddr_t fa; 216 int error; 217 218 error = xfs_btree_get_rec(cur, &rec, stat); 219 if (error || !*stat) 220 return error; 221 222 xfs_refcount_btrec_to_irec(rec, irec); 223 fa = xfs_refcount_check_btrec(cur, irec); 224 if (fa) 225 return xfs_refcount_complain_bad_rec(cur, fa, irec); 226 227 trace_xfs_refcount_get(cur, irec); 228 return 0; 229 } 230 231 /* 232 * Update the record referred to by cur to the value given 233 * by [bno, len, refcount]. 234 * This either works (return 0) or gets an EFSCORRUPTED error. 235 */ 236 STATIC int 237 xfs_refcount_update( 238 struct xfs_btree_cur *cur, 239 struct xfs_refcount_irec *irec) 240 { 241 union xfs_btree_rec rec; 242 uint32_t start; 243 int error; 244 245 trace_xfs_refcount_update(cur, irec); 246 247 start = xfs_refcount_encode_startblock(irec->rc_startblock, 248 irec->rc_domain); 249 rec.refc.rc_startblock = cpu_to_be32(start); 250 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 251 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 252 253 error = xfs_btree_update(cur, &rec); 254 if (error) 255 trace_xfs_refcount_update_error(cur, error, _RET_IP_); 256 return error; 257 } 258 259 /* 260 * Insert the record referred to by cur to the value given 261 * by [bno, len, refcount]. 262 * This either works (return 0) or gets an EFSCORRUPTED error. 263 */ 264 int 265 xfs_refcount_insert( 266 struct xfs_btree_cur *cur, 267 struct xfs_refcount_irec *irec, 268 int *i) 269 { 270 int error; 271 272 trace_xfs_refcount_insert(cur, irec); 273 274 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 275 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 276 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 277 cur->bc_rec.rc.rc_domain = irec->rc_domain; 278 279 error = xfs_btree_insert(cur, i); 280 if (error) 281 goto out_error; 282 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 283 xfs_btree_mark_sick(cur); 284 error = -EFSCORRUPTED; 285 goto out_error; 286 } 287 288 out_error: 289 if (error) 290 trace_xfs_refcount_insert_error(cur, error, _RET_IP_); 291 return error; 292 } 293 294 /* 295 * Remove the record referred to by cur, then set the pointer to the spot 296 * where the record could be re-inserted, in case we want to increment or 297 * decrement the cursor. 298 * This either works (return 0) or gets an EFSCORRUPTED error. 299 */ 300 STATIC int 301 xfs_refcount_delete( 302 struct xfs_btree_cur *cur, 303 int *i) 304 { 305 struct xfs_refcount_irec irec; 306 int found_rec; 307 int error; 308 309 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 310 if (error) 311 goto out_error; 312 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 313 xfs_btree_mark_sick(cur); 314 error = -EFSCORRUPTED; 315 goto out_error; 316 } 317 trace_xfs_refcount_delete(cur, &irec); 318 error = xfs_btree_delete(cur, i); 319 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 320 xfs_btree_mark_sick(cur); 321 error = -EFSCORRUPTED; 322 goto out_error; 323 } 324 if (error) 325 goto out_error; 326 error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, 327 &found_rec); 328 out_error: 329 if (error) 330 trace_xfs_refcount_delete_error(cur, error, _RET_IP_); 331 return error; 332 } 333 334 /* 335 * Adjusting the Reference Count 336 * 337 * As stated elsewhere, the reference count btree (refcbt) stores 338 * >1 reference counts for extents of physical blocks. In this 339 * operation, we're either raising or lowering the reference count of 340 * some subrange stored in the tree: 341 * 342 * <------ adjustment range ------> 343 * ----+ +---+-----+ +--+--------+--------- 344 * 2 | | 3 | 4 | |17| 55 | 10 345 * ----+ +---+-----+ +--+--------+--------- 346 * X axis is physical blocks number; 347 * reference counts are the numbers inside the rectangles 348 * 349 * The first thing we need to do is to ensure that there are no 350 * refcount extents crossing either boundary of the range to be 351 * adjusted. For any extent that does cross a boundary, split it into 352 * two extents so that we can increment the refcount of one of the 353 * pieces later: 354 * 355 * <------ adjustment range ------> 356 * ----+ +---+-----+ +--+--------+----+---- 357 * 2 | | 3 | 2 | |17| 55 | 10 | 10 358 * ----+ +---+-----+ +--+--------+----+---- 359 * 360 * For this next step, let's assume that all the physical blocks in 361 * the adjustment range are mapped to a file and are therefore in use 362 * at least once. Therefore, we can infer that any gap in the 363 * refcount tree within the adjustment range represents a physical 364 * extent with refcount == 1: 365 * 366 * <------ adjustment range ------> 367 * ----+---+---+-----+-+--+--------+----+---- 368 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 369 * ----+---+---+-----+-+--+--------+----+---- 370 * ^ 371 * 372 * For each extent that falls within the interval range, figure out 373 * which extent is to the left or the right of that extent. Now we 374 * have a left, current, and right extent. If the new reference count 375 * of the center extent enables us to merge left, center, and right 376 * into one record covering all three, do so. If the center extent is 377 * at the left end of the range, abuts the left extent, and its new 378 * reference count matches the left extent's record, then merge them. 379 * If the center extent is at the right end of the range, abuts the 380 * right extent, and the reference counts match, merge those. In the 381 * example, we can left merge (assuming an increment operation): 382 * 383 * <------ adjustment range ------> 384 * --------+---+-----+-+--+--------+----+---- 385 * 2 | 3 | 2 |1|17| 55 | 10 | 10 386 * --------+---+-----+-+--+--------+----+---- 387 * ^ 388 * 389 * For all other extents within the range, adjust the reference count 390 * or delete it if the refcount falls below 2. If we were 391 * incrementing, the end result looks like this: 392 * 393 * <------ adjustment range ------> 394 * --------+---+-----+-+--+--------+----+---- 395 * 2 | 4 | 3 |2|18| 56 | 11 | 10 396 * --------+---+-----+-+--+--------+----+---- 397 * 398 * The result of a decrement operation looks as such: 399 * 400 * <------ adjustment range ------> 401 * ----+ +---+ +--+--------+----+---- 402 * 2 | | 2 | |16| 54 | 9 | 10 403 * ----+ +---+ +--+--------+----+---- 404 * DDDD 111111DD 405 * 406 * The blocks marked "D" are freed; the blocks marked "1" are only 407 * referenced once and therefore the record is removed from the 408 * refcount btree. 409 */ 410 411 /* Next block after this extent. */ 412 static inline xfs_agblock_t 413 xfs_refc_next( 414 struct xfs_refcount_irec *rc) 415 { 416 return rc->rc_startblock + rc->rc_blockcount; 417 } 418 419 /* 420 * Split a refcount extent that crosses agbno. 421 */ 422 STATIC int 423 xfs_refcount_split_extent( 424 struct xfs_btree_cur *cur, 425 enum xfs_refc_domain domain, 426 xfs_agblock_t agbno, 427 bool *shape_changed) 428 { 429 struct xfs_refcount_irec rcext, tmp; 430 int found_rec; 431 int error; 432 433 *shape_changed = false; 434 error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); 435 if (error) 436 goto out_error; 437 if (!found_rec) 438 return 0; 439 440 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 441 if (error) 442 goto out_error; 443 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 444 xfs_btree_mark_sick(cur); 445 error = -EFSCORRUPTED; 446 goto out_error; 447 } 448 if (rcext.rc_domain != domain) 449 return 0; 450 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 451 return 0; 452 453 *shape_changed = true; 454 trace_xfs_refcount_split_extent(cur, &rcext, agbno); 455 456 /* Establish the right extent. */ 457 tmp = rcext; 458 tmp.rc_startblock = agbno; 459 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 460 error = xfs_refcount_update(cur, &tmp); 461 if (error) 462 goto out_error; 463 464 /* Insert the left extent. */ 465 tmp = rcext; 466 tmp.rc_blockcount = agbno - rcext.rc_startblock; 467 error = xfs_refcount_insert(cur, &tmp, &found_rec); 468 if (error) 469 goto out_error; 470 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 471 xfs_btree_mark_sick(cur); 472 error = -EFSCORRUPTED; 473 goto out_error; 474 } 475 return error; 476 477 out_error: 478 trace_xfs_refcount_split_extent_error(cur, error, _RET_IP_); 479 return error; 480 } 481 482 /* 483 * Merge the left, center, and right extents. 484 */ 485 STATIC int 486 xfs_refcount_merge_center_extents( 487 struct xfs_btree_cur *cur, 488 struct xfs_refcount_irec *left, 489 struct xfs_refcount_irec *center, 490 struct xfs_refcount_irec *right, 491 unsigned long long extlen, 492 xfs_extlen_t *aglen) 493 { 494 int error; 495 int found_rec; 496 497 trace_xfs_refcount_merge_center_extents(cur, left, center, right); 498 499 ASSERT(left->rc_domain == center->rc_domain); 500 ASSERT(right->rc_domain == center->rc_domain); 501 502 /* 503 * Make sure the center and right extents are not in the btree. 504 * If the center extent was synthesized, the first delete call 505 * removes the right extent and we skip the second deletion. 506 * If center and right were in the btree, then the first delete 507 * call removes the center and the second one removes the right 508 * extent. 509 */ 510 error = xfs_refcount_lookup_ge(cur, center->rc_domain, 511 center->rc_startblock, &found_rec); 512 if (error) 513 goto out_error; 514 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 515 xfs_btree_mark_sick(cur); 516 error = -EFSCORRUPTED; 517 goto out_error; 518 } 519 520 error = xfs_refcount_delete(cur, &found_rec); 521 if (error) 522 goto out_error; 523 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 524 xfs_btree_mark_sick(cur); 525 error = -EFSCORRUPTED; 526 goto out_error; 527 } 528 529 if (center->rc_refcount > 1) { 530 error = xfs_refcount_delete(cur, &found_rec); 531 if (error) 532 goto out_error; 533 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 534 xfs_btree_mark_sick(cur); 535 error = -EFSCORRUPTED; 536 goto out_error; 537 } 538 } 539 540 /* Enlarge the left extent. */ 541 error = xfs_refcount_lookup_le(cur, left->rc_domain, 542 left->rc_startblock, &found_rec); 543 if (error) 544 goto out_error; 545 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 546 xfs_btree_mark_sick(cur); 547 error = -EFSCORRUPTED; 548 goto out_error; 549 } 550 551 left->rc_blockcount = extlen; 552 error = xfs_refcount_update(cur, left); 553 if (error) 554 goto out_error; 555 556 *aglen = 0; 557 return error; 558 559 out_error: 560 trace_xfs_refcount_merge_center_extents_error(cur, error, _RET_IP_); 561 return error; 562 } 563 564 /* 565 * Merge with the left extent. 566 */ 567 STATIC int 568 xfs_refcount_merge_left_extent( 569 struct xfs_btree_cur *cur, 570 struct xfs_refcount_irec *left, 571 struct xfs_refcount_irec *cleft, 572 xfs_agblock_t *agbno, 573 xfs_extlen_t *aglen) 574 { 575 int error; 576 int found_rec; 577 578 trace_xfs_refcount_merge_left_extent(cur, left, cleft); 579 580 ASSERT(left->rc_domain == cleft->rc_domain); 581 582 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 583 if (cleft->rc_refcount > 1) { 584 error = xfs_refcount_lookup_le(cur, cleft->rc_domain, 585 cleft->rc_startblock, &found_rec); 586 if (error) 587 goto out_error; 588 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 589 xfs_btree_mark_sick(cur); 590 error = -EFSCORRUPTED; 591 goto out_error; 592 } 593 594 error = xfs_refcount_delete(cur, &found_rec); 595 if (error) 596 goto out_error; 597 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 598 xfs_btree_mark_sick(cur); 599 error = -EFSCORRUPTED; 600 goto out_error; 601 } 602 } 603 604 /* Enlarge the left extent. */ 605 error = xfs_refcount_lookup_le(cur, left->rc_domain, 606 left->rc_startblock, &found_rec); 607 if (error) 608 goto out_error; 609 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 610 xfs_btree_mark_sick(cur); 611 error = -EFSCORRUPTED; 612 goto out_error; 613 } 614 615 left->rc_blockcount += cleft->rc_blockcount; 616 error = xfs_refcount_update(cur, left); 617 if (error) 618 goto out_error; 619 620 *agbno += cleft->rc_blockcount; 621 *aglen -= cleft->rc_blockcount; 622 return error; 623 624 out_error: 625 trace_xfs_refcount_merge_left_extent_error(cur, error, _RET_IP_); 626 return error; 627 } 628 629 /* 630 * Merge with the right extent. 631 */ 632 STATIC int 633 xfs_refcount_merge_right_extent( 634 struct xfs_btree_cur *cur, 635 struct xfs_refcount_irec *right, 636 struct xfs_refcount_irec *cright, 637 xfs_extlen_t *aglen) 638 { 639 int error; 640 int found_rec; 641 642 trace_xfs_refcount_merge_right_extent(cur, cright, right); 643 644 ASSERT(right->rc_domain == cright->rc_domain); 645 646 /* 647 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 648 * remove it. 649 */ 650 if (cright->rc_refcount > 1) { 651 error = xfs_refcount_lookup_le(cur, cright->rc_domain, 652 cright->rc_startblock, &found_rec); 653 if (error) 654 goto out_error; 655 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 656 xfs_btree_mark_sick(cur); 657 error = -EFSCORRUPTED; 658 goto out_error; 659 } 660 661 error = xfs_refcount_delete(cur, &found_rec); 662 if (error) 663 goto out_error; 664 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 665 xfs_btree_mark_sick(cur); 666 error = -EFSCORRUPTED; 667 goto out_error; 668 } 669 } 670 671 /* Enlarge the right extent. */ 672 error = xfs_refcount_lookup_le(cur, right->rc_domain, 673 right->rc_startblock, &found_rec); 674 if (error) 675 goto out_error; 676 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 677 xfs_btree_mark_sick(cur); 678 error = -EFSCORRUPTED; 679 goto out_error; 680 } 681 682 right->rc_startblock -= cright->rc_blockcount; 683 right->rc_blockcount += cright->rc_blockcount; 684 error = xfs_refcount_update(cur, right); 685 if (error) 686 goto out_error; 687 688 *aglen -= cright->rc_blockcount; 689 return error; 690 691 out_error: 692 trace_xfs_refcount_merge_right_extent_error(cur, error, _RET_IP_); 693 return error; 694 } 695 696 /* 697 * Find the left extent and the one after it (cleft). This function assumes 698 * that we've already split any extent crossing agbno. 699 */ 700 STATIC int 701 xfs_refcount_find_left_extents( 702 struct xfs_btree_cur *cur, 703 struct xfs_refcount_irec *left, 704 struct xfs_refcount_irec *cleft, 705 enum xfs_refc_domain domain, 706 xfs_agblock_t agbno, 707 xfs_extlen_t aglen) 708 { 709 struct xfs_refcount_irec tmp; 710 int error; 711 int found_rec; 712 713 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 714 error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); 715 if (error) 716 goto out_error; 717 if (!found_rec) 718 return 0; 719 720 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 721 if (error) 722 goto out_error; 723 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 724 xfs_btree_mark_sick(cur); 725 error = -EFSCORRUPTED; 726 goto out_error; 727 } 728 729 if (tmp.rc_domain != domain) 730 return 0; 731 if (xfs_refc_next(&tmp) != agbno) 732 return 0; 733 /* We have a left extent; retrieve (or invent) the next right one */ 734 *left = tmp; 735 736 error = xfs_btree_increment(cur, 0, &found_rec); 737 if (error) 738 goto out_error; 739 if (found_rec) { 740 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 741 if (error) 742 goto out_error; 743 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 744 xfs_btree_mark_sick(cur); 745 error = -EFSCORRUPTED; 746 goto out_error; 747 } 748 749 if (tmp.rc_domain != domain) 750 goto not_found; 751 752 /* if tmp starts at the end of our range, just use that */ 753 if (tmp.rc_startblock == agbno) 754 *cleft = tmp; 755 else { 756 /* 757 * There's a gap in the refcntbt at the start of the 758 * range we're interested in (refcount == 1) so 759 * synthesize the implied extent and pass it back. 760 * We assume here that the agbno/aglen range was 761 * passed in from a data fork extent mapping and 762 * therefore is allocated to exactly one owner. 763 */ 764 cleft->rc_startblock = agbno; 765 cleft->rc_blockcount = min(aglen, 766 tmp.rc_startblock - agbno); 767 cleft->rc_refcount = 1; 768 cleft->rc_domain = domain; 769 } 770 } else { 771 not_found: 772 /* 773 * No extents, so pretend that there's one covering the whole 774 * range. 775 */ 776 cleft->rc_startblock = agbno; 777 cleft->rc_blockcount = aglen; 778 cleft->rc_refcount = 1; 779 cleft->rc_domain = domain; 780 } 781 trace_xfs_refcount_find_left_extent(cur, left, cleft, agbno); 782 return error; 783 784 out_error: 785 trace_xfs_refcount_find_left_extent_error(cur, error, _RET_IP_); 786 return error; 787 } 788 789 /* 790 * Find the right extent and the one before it (cright). This function 791 * assumes that we've already split any extents crossing agbno + aglen. 792 */ 793 STATIC int 794 xfs_refcount_find_right_extents( 795 struct xfs_btree_cur *cur, 796 struct xfs_refcount_irec *right, 797 struct xfs_refcount_irec *cright, 798 enum xfs_refc_domain domain, 799 xfs_agblock_t agbno, 800 xfs_extlen_t aglen) 801 { 802 struct xfs_refcount_irec tmp; 803 int error; 804 int found_rec; 805 806 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 807 error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); 808 if (error) 809 goto out_error; 810 if (!found_rec) 811 return 0; 812 813 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 814 if (error) 815 goto out_error; 816 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 817 xfs_btree_mark_sick(cur); 818 error = -EFSCORRUPTED; 819 goto out_error; 820 } 821 822 if (tmp.rc_domain != domain) 823 return 0; 824 if (tmp.rc_startblock != agbno + aglen) 825 return 0; 826 /* We have a right extent; retrieve (or invent) the next left one */ 827 *right = tmp; 828 829 error = xfs_btree_decrement(cur, 0, &found_rec); 830 if (error) 831 goto out_error; 832 if (found_rec) { 833 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 834 if (error) 835 goto out_error; 836 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 837 xfs_btree_mark_sick(cur); 838 error = -EFSCORRUPTED; 839 goto out_error; 840 } 841 842 if (tmp.rc_domain != domain) 843 goto not_found; 844 845 /* if tmp ends at the end of our range, just use that */ 846 if (xfs_refc_next(&tmp) == agbno + aglen) 847 *cright = tmp; 848 else { 849 /* 850 * There's a gap in the refcntbt at the end of the 851 * range we're interested in (refcount == 1) so 852 * create the implied extent and pass it back. 853 * We assume here that the agbno/aglen range was 854 * passed in from a data fork extent mapping and 855 * therefore is allocated to exactly one owner. 856 */ 857 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 858 cright->rc_blockcount = right->rc_startblock - 859 cright->rc_startblock; 860 cright->rc_refcount = 1; 861 cright->rc_domain = domain; 862 } 863 } else { 864 not_found: 865 /* 866 * No extents, so pretend that there's one covering the whole 867 * range. 868 */ 869 cright->rc_startblock = agbno; 870 cright->rc_blockcount = aglen; 871 cright->rc_refcount = 1; 872 cright->rc_domain = domain; 873 } 874 trace_xfs_refcount_find_right_extent(cur, cright, right, 875 agbno + aglen); 876 return error; 877 878 out_error: 879 trace_xfs_refcount_find_right_extent_error(cur, error, _RET_IP_); 880 return error; 881 } 882 883 /* Is this extent valid? */ 884 static inline bool 885 xfs_refc_valid( 886 const struct xfs_refcount_irec *rc) 887 { 888 return rc->rc_startblock != NULLAGBLOCK; 889 } 890 891 static inline xfs_nlink_t 892 xfs_refc_merge_refcount( 893 const struct xfs_refcount_irec *irec, 894 enum xfs_refc_adjust_op adjust) 895 { 896 /* Once a record hits XFS_REFC_REFCOUNT_MAX, it is pinned forever */ 897 if (irec->rc_refcount == XFS_REFC_REFCOUNT_MAX) 898 return XFS_REFC_REFCOUNT_MAX; 899 return irec->rc_refcount + adjust; 900 } 901 902 static inline bool 903 xfs_refc_want_merge_center( 904 const struct xfs_refcount_irec *left, 905 const struct xfs_refcount_irec *cleft, 906 const struct xfs_refcount_irec *cright, 907 const struct xfs_refcount_irec *right, 908 bool cleft_is_cright, 909 enum xfs_refc_adjust_op adjust, 910 unsigned long long *ulenp) 911 { 912 unsigned long long ulen = left->rc_blockcount; 913 xfs_nlink_t new_refcount; 914 915 /* 916 * To merge with a center record, both shoulder records must be 917 * adjacent to the record we want to adjust. This is only true if 918 * find_left and find_right made all four records valid. 919 */ 920 if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || 921 !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) 922 return false; 923 924 /* There must only be one record for the entire range. */ 925 if (!cleft_is_cright) 926 return false; 927 928 /* The shoulder record refcounts must match the new refcount. */ 929 new_refcount = xfs_refc_merge_refcount(cleft, adjust); 930 if (left->rc_refcount != new_refcount) 931 return false; 932 if (right->rc_refcount != new_refcount) 933 return false; 934 935 /* 936 * The new record cannot exceed the max length. ulen is a ULL as the 937 * individual record block counts can be up to (u32 - 1) in length 938 * hence we need to catch u32 addition overflows here. 939 */ 940 ulen += cleft->rc_blockcount + right->rc_blockcount; 941 if (ulen >= XFS_REFC_LEN_MAX) 942 return false; 943 944 *ulenp = ulen; 945 return true; 946 } 947 948 static inline bool 949 xfs_refc_want_merge_left( 950 const struct xfs_refcount_irec *left, 951 const struct xfs_refcount_irec *cleft, 952 enum xfs_refc_adjust_op adjust) 953 { 954 unsigned long long ulen = left->rc_blockcount; 955 xfs_nlink_t new_refcount; 956 957 /* 958 * For a left merge, the left shoulder record must be adjacent to the 959 * start of the range. If this is true, find_left made left and cleft 960 * contain valid contents. 961 */ 962 if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) 963 return false; 964 965 /* Left shoulder record refcount must match the new refcount. */ 966 new_refcount = xfs_refc_merge_refcount(cleft, adjust); 967 if (left->rc_refcount != new_refcount) 968 return false; 969 970 /* 971 * The new record cannot exceed the max length. ulen is a ULL as the 972 * individual record block counts can be up to (u32 - 1) in length 973 * hence we need to catch u32 addition overflows here. 974 */ 975 ulen += cleft->rc_blockcount; 976 if (ulen >= XFS_REFC_LEN_MAX) 977 return false; 978 979 return true; 980 } 981 982 static inline bool 983 xfs_refc_want_merge_right( 984 const struct xfs_refcount_irec *cright, 985 const struct xfs_refcount_irec *right, 986 enum xfs_refc_adjust_op adjust) 987 { 988 unsigned long long ulen = right->rc_blockcount; 989 xfs_nlink_t new_refcount; 990 991 /* 992 * For a right merge, the right shoulder record must be adjacent to the 993 * end of the range. If this is true, find_right made cright and right 994 * contain valid contents. 995 */ 996 if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) 997 return false; 998 999 /* Right shoulder record refcount must match the new refcount. */ 1000 new_refcount = xfs_refc_merge_refcount(cright, adjust); 1001 if (right->rc_refcount != new_refcount) 1002 return false; 1003 1004 /* 1005 * The new record cannot exceed the max length. ulen is a ULL as the 1006 * individual record block counts can be up to (u32 - 1) in length 1007 * hence we need to catch u32 addition overflows here. 1008 */ 1009 ulen += cright->rc_blockcount; 1010 if (ulen >= XFS_REFC_LEN_MAX) 1011 return false; 1012 1013 return true; 1014 } 1015 1016 /* 1017 * Try to merge with any extents on the boundaries of the adjustment range. 1018 */ 1019 STATIC int 1020 xfs_refcount_merge_extents( 1021 struct xfs_btree_cur *cur, 1022 enum xfs_refc_domain domain, 1023 xfs_agblock_t *agbno, 1024 xfs_extlen_t *aglen, 1025 enum xfs_refc_adjust_op adjust, 1026 bool *shape_changed) 1027 { 1028 struct xfs_refcount_irec left = {0}, cleft = {0}; 1029 struct xfs_refcount_irec cright = {0}, right = {0}; 1030 int error; 1031 unsigned long long ulen; 1032 bool cequal; 1033 1034 *shape_changed = false; 1035 /* 1036 * Find the extent just below agbno [left], just above agbno [cleft], 1037 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 1038 * [right]. 1039 */ 1040 error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, 1041 *agbno, *aglen); 1042 if (error) 1043 return error; 1044 error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, 1045 *agbno, *aglen); 1046 if (error) 1047 return error; 1048 1049 /* No left or right extent to merge; exit. */ 1050 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 1051 return 0; 1052 1053 cequal = (cleft.rc_startblock == cright.rc_startblock) && 1054 (cleft.rc_blockcount == cright.rc_blockcount); 1055 1056 /* Try to merge left, cleft, and right. cleft must == cright. */ 1057 if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, 1058 adjust, &ulen)) { 1059 *shape_changed = true; 1060 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 1061 &right, ulen, aglen); 1062 } 1063 1064 /* Try to merge left and cleft. */ 1065 if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { 1066 *shape_changed = true; 1067 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 1068 agbno, aglen); 1069 if (error) 1070 return error; 1071 1072 /* 1073 * If we just merged left + cleft and cleft == cright, 1074 * we no longer have a cright to merge with right. We're done. 1075 */ 1076 if (cequal) 1077 return 0; 1078 } 1079 1080 /* Try to merge cright and right. */ 1081 if (xfs_refc_want_merge_right(&cright, &right, adjust)) { 1082 *shape_changed = true; 1083 return xfs_refcount_merge_right_extent(cur, &right, &cright, 1084 aglen); 1085 } 1086 1087 return 0; 1088 } 1089 1090 /* 1091 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 1092 * true incorrectly is a shutdown FS; the penalty for guessing false 1093 * incorrectly is more transaction rolls than might be necessary. 1094 * Be conservative here. 1095 */ 1096 static bool 1097 xfs_refcount_still_have_space( 1098 struct xfs_btree_cur *cur) 1099 { 1100 unsigned long overhead; 1101 1102 /* 1103 * Worst case estimate: full splits of the free space and rmap btrees 1104 * to handle each of the shape changes to the refcount btree. 1105 */ 1106 overhead = xfs_allocfree_block_count(cur->bc_mp, 1107 cur->bc_refc.shape_changes); 1108 overhead += cur->bc_maxlevels; 1109 overhead *= cur->bc_mp->m_sb.sb_blocksize; 1110 1111 /* 1112 * Only allow 2 refcount extent updates per transaction if the 1113 * refcount continue update "error" has been injected. 1114 */ 1115 if (cur->bc_refc.nr_ops > 2 && 1116 XFS_TEST_ERROR(false, cur->bc_mp, 1117 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 1118 return false; 1119 1120 if (cur->bc_refc.nr_ops == 0) 1121 return true; 1122 else if (overhead > cur->bc_tp->t_log_res) 1123 return false; 1124 return cur->bc_tp->t_log_res - overhead > 1125 cur->bc_refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 1126 } 1127 1128 /* Schedule an extent free. */ 1129 static int 1130 xrefc_free_extent( 1131 struct xfs_btree_cur *cur, 1132 struct xfs_refcount_irec *rec) 1133 { 1134 unsigned int flags = 0; 1135 1136 if (xfs_btree_is_rtrefcount(cur->bc_ops)) 1137 flags |= XFS_FREE_EXTENT_REALTIME; 1138 1139 return xfs_free_extent_later(cur->bc_tp, 1140 xfs_gbno_to_fsb(cur->bc_group, rec->rc_startblock), 1141 rec->rc_blockcount, NULL, XFS_AG_RESV_NONE, flags); 1142 } 1143 1144 /* 1145 * Adjust the refcounts of middle extents. At this point we should have 1146 * split extents that crossed the adjustment range; merged with adjacent 1147 * extents; and updated agbno/aglen to reflect the merges. Therefore, 1148 * all we have to do is update the extents inside [agbno, agbno + aglen]. 1149 */ 1150 STATIC int 1151 xfs_refcount_adjust_extents( 1152 struct xfs_btree_cur *cur, 1153 xfs_agblock_t *agbno, 1154 xfs_extlen_t *aglen, 1155 enum xfs_refc_adjust_op adj) 1156 { 1157 struct xfs_refcount_irec ext, tmp; 1158 int error; 1159 int found_rec, found_tmp; 1160 1161 /* Merging did all the work already. */ 1162 if (*aglen == 0) 1163 return 0; 1164 1165 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, 1166 &found_rec); 1167 if (error) 1168 goto out_error; 1169 1170 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 1171 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1172 if (error) 1173 goto out_error; 1174 if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { 1175 ext.rc_startblock = xfs_group_max_blocks(cur->bc_group); 1176 ext.rc_blockcount = 0; 1177 ext.rc_refcount = 0; 1178 ext.rc_domain = XFS_REFC_DOMAIN_SHARED; 1179 } 1180 1181 /* 1182 * Deal with a hole in the refcount tree; if a file maps to 1183 * these blocks and there's no refcountbt record, pretend that 1184 * there is one with refcount == 1. 1185 */ 1186 if (ext.rc_startblock != *agbno) { 1187 tmp.rc_startblock = *agbno; 1188 tmp.rc_blockcount = min(*aglen, 1189 ext.rc_startblock - *agbno); 1190 tmp.rc_refcount = 1 + adj; 1191 tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; 1192 1193 trace_xfs_refcount_modify_extent(cur, &tmp); 1194 1195 /* 1196 * Either cover the hole (increment) or 1197 * delete the range (decrement). 1198 */ 1199 cur->bc_refc.nr_ops++; 1200 if (tmp.rc_refcount) { 1201 error = xfs_refcount_insert(cur, &tmp, 1202 &found_tmp); 1203 if (error) 1204 goto out_error; 1205 if (XFS_IS_CORRUPT(cur->bc_mp, 1206 found_tmp != 1)) { 1207 xfs_btree_mark_sick(cur); 1208 error = -EFSCORRUPTED; 1209 goto out_error; 1210 } 1211 } else { 1212 error = xrefc_free_extent(cur, &tmp); 1213 if (error) 1214 goto out_error; 1215 } 1216 1217 (*agbno) += tmp.rc_blockcount; 1218 (*aglen) -= tmp.rc_blockcount; 1219 1220 /* Stop if there's nothing left to modify */ 1221 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 1222 break; 1223 1224 /* Move the cursor to the start of ext. */ 1225 error = xfs_refcount_lookup_ge(cur, 1226 XFS_REFC_DOMAIN_SHARED, *agbno, 1227 &found_rec); 1228 if (error) 1229 goto out_error; 1230 } 1231 1232 /* 1233 * A previous step trimmed agbno/aglen such that the end of the 1234 * range would not be in the middle of the record. If this is 1235 * no longer the case, something is seriously wrong with the 1236 * btree. Make sure we never feed the synthesized record into 1237 * the processing loop below. 1238 */ 1239 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || 1240 XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { 1241 xfs_btree_mark_sick(cur); 1242 error = -EFSCORRUPTED; 1243 goto out_error; 1244 } 1245 1246 /* 1247 * Adjust the reference count and either update the tree 1248 * (incr) or free the blocks (decr). 1249 */ 1250 if (ext.rc_refcount == XFS_REFC_REFCOUNT_MAX) 1251 goto skip; 1252 ext.rc_refcount += adj; 1253 trace_xfs_refcount_modify_extent(cur, &ext); 1254 cur->bc_refc.nr_ops++; 1255 if (ext.rc_refcount > 1) { 1256 error = xfs_refcount_update(cur, &ext); 1257 if (error) 1258 goto out_error; 1259 } else if (ext.rc_refcount == 1) { 1260 error = xfs_refcount_delete(cur, &found_rec); 1261 if (error) 1262 goto out_error; 1263 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1264 xfs_btree_mark_sick(cur); 1265 error = -EFSCORRUPTED; 1266 goto out_error; 1267 } 1268 goto advloop; 1269 } else { 1270 error = xrefc_free_extent(cur, &ext); 1271 if (error) 1272 goto out_error; 1273 } 1274 1275 skip: 1276 error = xfs_btree_increment(cur, 0, &found_rec); 1277 if (error) 1278 goto out_error; 1279 1280 advloop: 1281 (*agbno) += ext.rc_blockcount; 1282 (*aglen) -= ext.rc_blockcount; 1283 } 1284 1285 return error; 1286 out_error: 1287 trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); 1288 return error; 1289 } 1290 1291 /* Adjust the reference count of a range of AG blocks. */ 1292 STATIC int 1293 xfs_refcount_adjust( 1294 struct xfs_btree_cur *cur, 1295 xfs_agblock_t *agbno, 1296 xfs_extlen_t *aglen, 1297 enum xfs_refc_adjust_op adj) 1298 { 1299 bool shape_changed; 1300 int shape_changes = 0; 1301 int error; 1302 1303 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1304 trace_xfs_refcount_increase(cur, *agbno, *aglen); 1305 else 1306 trace_xfs_refcount_decrease(cur, *agbno, *aglen); 1307 1308 /* 1309 * Ensure that no rcextents cross the boundary of the adjustment range. 1310 */ 1311 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1312 *agbno, &shape_changed); 1313 if (error) 1314 goto out_error; 1315 if (shape_changed) 1316 shape_changes++; 1317 1318 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1319 *agbno + *aglen, &shape_changed); 1320 if (error) 1321 goto out_error; 1322 if (shape_changed) 1323 shape_changes++; 1324 1325 /* 1326 * Try to merge with the left or right extents of the range. 1327 */ 1328 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, 1329 agbno, aglen, adj, &shape_changed); 1330 if (error) 1331 goto out_error; 1332 if (shape_changed) 1333 shape_changes++; 1334 if (shape_changes) 1335 cur->bc_refc.shape_changes++; 1336 1337 /* Now that we've taken care of the ends, adjust the middle extents */ 1338 error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); 1339 if (error) 1340 goto out_error; 1341 1342 return 0; 1343 1344 out_error: 1345 trace_xfs_refcount_adjust_error(cur, error, _RET_IP_); 1346 return error; 1347 } 1348 1349 /* 1350 * Set up a continuation a deferred refcount operation by updating the intent. 1351 * Checks to make sure we're not going to run off the end of the AG. 1352 */ 1353 static inline int 1354 xfs_refcount_continue_op( 1355 struct xfs_btree_cur *cur, 1356 struct xfs_refcount_intent *ri, 1357 xfs_agblock_t new_agbno) 1358 { 1359 struct xfs_mount *mp = cur->bc_mp; 1360 struct xfs_perag *pag = to_perag(cur->bc_group); 1361 1362 if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, 1363 ri->ri_blockcount))) { 1364 xfs_btree_mark_sick(cur); 1365 return -EFSCORRUPTED; 1366 } 1367 1368 ri->ri_startblock = xfs_agbno_to_fsb(pag, new_agbno); 1369 1370 ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); 1371 ASSERT(pag_agno(pag) == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); 1372 1373 return 0; 1374 } 1375 1376 /* 1377 * Process one of the deferred refcount operations. We pass back the 1378 * btree cursor to maintain our lock on the btree between calls. 1379 * This saves time and eliminates a buffer deadlock between the 1380 * superblock and the AGF because we'll always grab them in the same 1381 * order. 1382 */ 1383 int 1384 xfs_refcount_finish_one( 1385 struct xfs_trans *tp, 1386 struct xfs_refcount_intent *ri, 1387 struct xfs_btree_cur **pcur) 1388 { 1389 struct xfs_mount *mp = tp->t_mountp; 1390 struct xfs_btree_cur *rcur = *pcur; 1391 struct xfs_buf *agbp = NULL; 1392 int error = 0; 1393 xfs_agblock_t bno; 1394 unsigned long nr_ops = 0; 1395 int shape_changes = 0; 1396 1397 bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); 1398 1399 trace_xfs_refcount_deferred(mp, ri); 1400 1401 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1402 return -EIO; 1403 1404 /* 1405 * If we haven't gotten a cursor or the cursor AG doesn't match 1406 * the startblock, get one now. 1407 */ 1408 if (rcur != NULL && rcur->bc_group != ri->ri_group) { 1409 nr_ops = rcur->bc_refc.nr_ops; 1410 shape_changes = rcur->bc_refc.shape_changes; 1411 xfs_btree_del_cursor(rcur, 0); 1412 rcur = NULL; 1413 *pcur = NULL; 1414 } 1415 if (rcur == NULL) { 1416 struct xfs_perag *pag = to_perag(ri->ri_group); 1417 1418 error = xfs_alloc_read_agf(pag, tp, 1419 XFS_ALLOC_FLAG_FREEING, &agbp); 1420 if (error) 1421 return error; 1422 1423 *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1424 rcur->bc_refc.nr_ops = nr_ops; 1425 rcur->bc_refc.shape_changes = shape_changes; 1426 } 1427 1428 switch (ri->ri_type) { 1429 case XFS_REFCOUNT_INCREASE: 1430 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1431 XFS_REFCOUNT_ADJUST_INCREASE); 1432 if (error) 1433 return error; 1434 if (ri->ri_blockcount > 0) 1435 error = xfs_refcount_continue_op(rcur, ri, bno); 1436 break; 1437 case XFS_REFCOUNT_DECREASE: 1438 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1439 XFS_REFCOUNT_ADJUST_DECREASE); 1440 if (error) 1441 return error; 1442 if (ri->ri_blockcount > 0) 1443 error = xfs_refcount_continue_op(rcur, ri, bno); 1444 break; 1445 case XFS_REFCOUNT_ALLOC_COW: 1446 error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); 1447 if (error) 1448 return error; 1449 ri->ri_blockcount = 0; 1450 break; 1451 case XFS_REFCOUNT_FREE_COW: 1452 error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); 1453 if (error) 1454 return error; 1455 ri->ri_blockcount = 0; 1456 break; 1457 default: 1458 ASSERT(0); 1459 return -EFSCORRUPTED; 1460 } 1461 if (!error && ri->ri_blockcount > 0) 1462 trace_xfs_refcount_finish_one_leftover(mp, ri); 1463 return error; 1464 } 1465 1466 /* 1467 * Set up a continuation a deferred rtrefcount operation by updating the 1468 * intent. Checks to make sure we're not going to run off the end of the 1469 * rtgroup. 1470 */ 1471 static inline int 1472 xfs_rtrefcount_continue_op( 1473 struct xfs_btree_cur *cur, 1474 struct xfs_refcount_intent *ri, 1475 xfs_agblock_t new_agbno) 1476 { 1477 struct xfs_mount *mp = cur->bc_mp; 1478 struct xfs_rtgroup *rtg = to_rtg(ri->ri_group); 1479 1480 if (XFS_IS_CORRUPT(mp, !xfs_verify_rgbext(rtg, new_agbno, 1481 ri->ri_blockcount))) { 1482 xfs_btree_mark_sick(cur); 1483 return -EFSCORRUPTED; 1484 } 1485 1486 ri->ri_startblock = xfs_rgbno_to_rtb(rtg, new_agbno); 1487 1488 ASSERT(xfs_verify_rtbext(mp, ri->ri_startblock, ri->ri_blockcount)); 1489 return 0; 1490 } 1491 1492 /* 1493 * Process one of the deferred realtime refcount operations. We pass back the 1494 * btree cursor to maintain our lock on the btree between calls. 1495 */ 1496 int 1497 xfs_rtrefcount_finish_one( 1498 struct xfs_trans *tp, 1499 struct xfs_refcount_intent *ri, 1500 struct xfs_btree_cur **pcur) 1501 { 1502 struct xfs_mount *mp = tp->t_mountp; 1503 struct xfs_rtgroup *rtg = to_rtg(ri->ri_group); 1504 struct xfs_btree_cur *rcur = *pcur; 1505 int error = 0; 1506 xfs_rgblock_t bno; 1507 unsigned long nr_ops = 0; 1508 int shape_changes = 0; 1509 1510 bno = xfs_rtb_to_rgbno(mp, ri->ri_startblock); 1511 1512 trace_xfs_refcount_deferred(mp, ri); 1513 1514 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1515 return -EIO; 1516 1517 /* 1518 * If we haven't gotten a cursor or the cursor AG doesn't match 1519 * the startblock, get one now. 1520 */ 1521 if (rcur != NULL && rcur->bc_group != ri->ri_group) { 1522 nr_ops = rcur->bc_refc.nr_ops; 1523 shape_changes = rcur->bc_refc.shape_changes; 1524 xfs_btree_del_cursor(rcur, 0); 1525 rcur = NULL; 1526 *pcur = NULL; 1527 } 1528 if (rcur == NULL) { 1529 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_REFCOUNT); 1530 xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_REFCOUNT); 1531 *pcur = rcur = xfs_rtrefcountbt_init_cursor(tp, rtg); 1532 1533 rcur->bc_refc.nr_ops = nr_ops; 1534 rcur->bc_refc.shape_changes = shape_changes; 1535 } 1536 1537 switch (ri->ri_type) { 1538 case XFS_REFCOUNT_INCREASE: 1539 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1540 XFS_REFCOUNT_ADJUST_INCREASE); 1541 if (error) 1542 return error; 1543 if (ri->ri_blockcount > 0) 1544 error = xfs_rtrefcount_continue_op(rcur, ri, bno); 1545 break; 1546 case XFS_REFCOUNT_DECREASE: 1547 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1548 XFS_REFCOUNT_ADJUST_DECREASE); 1549 if (error) 1550 return error; 1551 if (ri->ri_blockcount > 0) 1552 error = xfs_rtrefcount_continue_op(rcur, ri, bno); 1553 break; 1554 case XFS_REFCOUNT_ALLOC_COW: 1555 error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); 1556 if (error) 1557 return error; 1558 ri->ri_blockcount = 0; 1559 break; 1560 case XFS_REFCOUNT_FREE_COW: 1561 error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); 1562 if (error) 1563 return error; 1564 ri->ri_blockcount = 0; 1565 break; 1566 default: 1567 ASSERT(0); 1568 return -EFSCORRUPTED; 1569 } 1570 if (!error && ri->ri_blockcount > 0) 1571 trace_xfs_refcount_finish_one_leftover(mp, ri); 1572 return error; 1573 } 1574 1575 /* 1576 * Record a refcount intent for later processing. 1577 */ 1578 static void 1579 __xfs_refcount_add( 1580 struct xfs_trans *tp, 1581 enum xfs_refcount_intent_type type, 1582 bool isrt, 1583 xfs_fsblock_t startblock, 1584 xfs_extlen_t blockcount) 1585 { 1586 struct xfs_refcount_intent *ri; 1587 1588 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1589 GFP_KERNEL | __GFP_NOFAIL); 1590 INIT_LIST_HEAD(&ri->ri_list); 1591 ri->ri_type = type; 1592 ri->ri_startblock = startblock; 1593 ri->ri_blockcount = blockcount; 1594 ri->ri_realtime = isrt; 1595 1596 xfs_refcount_defer_add(tp, ri); 1597 } 1598 1599 /* 1600 * Increase the reference count of the blocks backing a file's extent. 1601 */ 1602 void 1603 xfs_refcount_increase_extent( 1604 struct xfs_trans *tp, 1605 bool isrt, 1606 struct xfs_bmbt_irec *PREV) 1607 { 1608 if (!xfs_has_reflink(tp->t_mountp)) 1609 return; 1610 1611 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, isrt, PREV->br_startblock, 1612 PREV->br_blockcount); 1613 } 1614 1615 /* 1616 * Decrease the reference count of the blocks backing a file's extent. 1617 */ 1618 void 1619 xfs_refcount_decrease_extent( 1620 struct xfs_trans *tp, 1621 bool isrt, 1622 struct xfs_bmbt_irec *PREV) 1623 { 1624 if (!xfs_has_reflink(tp->t_mountp)) 1625 return; 1626 1627 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, isrt, PREV->br_startblock, 1628 PREV->br_blockcount); 1629 } 1630 1631 /* 1632 * Given an AG extent, find the lowest-numbered run of shared blocks 1633 * within that range and return the range in fbno/flen. If 1634 * find_end_of_shared is set, return the longest contiguous extent of 1635 * shared blocks; if not, just return the first extent we find. If no 1636 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1637 * and 0, respectively. 1638 */ 1639 int 1640 xfs_refcount_find_shared( 1641 struct xfs_btree_cur *cur, 1642 xfs_agblock_t agbno, 1643 xfs_extlen_t aglen, 1644 xfs_agblock_t *fbno, 1645 xfs_extlen_t *flen, 1646 bool find_end_of_shared) 1647 { 1648 struct xfs_refcount_irec tmp; 1649 int i; 1650 int have; 1651 int error; 1652 1653 trace_xfs_refcount_find_shared(cur, agbno, aglen); 1654 1655 /* By default, skip the whole range */ 1656 *fbno = NULLAGBLOCK; 1657 *flen = 0; 1658 1659 /* Try to find a refcount extent that crosses the start */ 1660 error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, 1661 &have); 1662 if (error) 1663 goto out_error; 1664 if (!have) { 1665 /* No left extent, look at the next one */ 1666 error = xfs_btree_increment(cur, 0, &have); 1667 if (error) 1668 goto out_error; 1669 if (!have) 1670 goto done; 1671 } 1672 error = xfs_refcount_get_rec(cur, &tmp, &i); 1673 if (error) 1674 goto out_error; 1675 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1676 xfs_btree_mark_sick(cur); 1677 error = -EFSCORRUPTED; 1678 goto out_error; 1679 } 1680 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1681 goto done; 1682 1683 /* If the extent ends before the start, look at the next one */ 1684 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1685 error = xfs_btree_increment(cur, 0, &have); 1686 if (error) 1687 goto out_error; 1688 if (!have) 1689 goto done; 1690 error = xfs_refcount_get_rec(cur, &tmp, &i); 1691 if (error) 1692 goto out_error; 1693 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1694 xfs_btree_mark_sick(cur); 1695 error = -EFSCORRUPTED; 1696 goto out_error; 1697 } 1698 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1699 goto done; 1700 } 1701 1702 /* If the extent starts after the range we want, bail out */ 1703 if (tmp.rc_startblock >= agbno + aglen) 1704 goto done; 1705 1706 /* We found the start of a shared extent! */ 1707 if (tmp.rc_startblock < agbno) { 1708 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1709 tmp.rc_startblock = agbno; 1710 } 1711 1712 *fbno = tmp.rc_startblock; 1713 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1714 if (!find_end_of_shared) 1715 goto done; 1716 1717 /* Otherwise, find the end of this shared extent */ 1718 while (*fbno + *flen < agbno + aglen) { 1719 error = xfs_btree_increment(cur, 0, &have); 1720 if (error) 1721 goto out_error; 1722 if (!have) 1723 break; 1724 error = xfs_refcount_get_rec(cur, &tmp, &i); 1725 if (error) 1726 goto out_error; 1727 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1728 xfs_btree_mark_sick(cur); 1729 error = -EFSCORRUPTED; 1730 goto out_error; 1731 } 1732 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || 1733 tmp.rc_startblock >= agbno + aglen || 1734 tmp.rc_startblock != *fbno + *flen) 1735 break; 1736 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1737 } 1738 1739 done: 1740 trace_xfs_refcount_find_shared_result(cur, *fbno, *flen); 1741 1742 out_error: 1743 if (error) 1744 trace_xfs_refcount_find_shared_error(cur, error, _RET_IP_); 1745 return error; 1746 } 1747 1748 /* 1749 * Recovering CoW Blocks After a Crash 1750 * 1751 * Due to the way that the copy on write mechanism works, there's a window of 1752 * opportunity in which we can lose track of allocated blocks during a crash. 1753 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1754 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1755 * no longer in the free space btree but are not otherwise recorded anywhere 1756 * until the write completes and the blocks are mapped into the file. A crash 1757 * in between allocation and remapping results in the replacement blocks being 1758 * lost. This situation is exacerbated by the CoW extent size hint because 1759 * allocations can hang around for long time. 1760 * 1761 * However, there is a place where we can record these allocations before they 1762 * become mappings -- the reference count btree. The btree does not record 1763 * extents with refcount == 1, so we can record allocations with a refcount of 1764 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1765 * no conflict with shared block records. These mappings should be created 1766 * when we allocate blocks to the CoW fork and deleted when they're removed 1767 * from the CoW fork. 1768 * 1769 * Minor nit: records for in-progress CoW allocations and records for shared 1770 * extents must never be merged, to preserve the property that (except for CoW 1771 * allocations) there are no refcount btree entries with refcount == 1. The 1772 * only time this could potentially happen is when unsharing a block that's 1773 * adjacent to CoW allocations, so we must be careful to avoid this. 1774 * 1775 * At mount time we recover lost CoW allocations by searching the refcount 1776 * btree for these refcount == 1 mappings. These represent CoW allocations 1777 * that were in progress at the time the filesystem went down, so we can free 1778 * them to get the space back. 1779 * 1780 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1781 * several reasons -- first, EFIs pin the tail of the log and would have to be 1782 * periodically relogged to avoid filling up the log. Second, CoW completions 1783 * will have to file an EFD and create new EFIs for whatever remains in the 1784 * CoW fork; this partially takes care of (1) but extent-size reservations 1785 * will have to periodically relog even if there's no writeout in progress. 1786 * This can happen if the CoW extent size hint is set, which you really want. 1787 * Third, EFIs cannot currently be automatically relogged into newer 1788 * transactions to advance the log tail. Fourth, stuffing the log full of 1789 * EFIs places an upper bound on the number of CoW allocations that can be 1790 * held filesystem-wide at any given time. Recording them in the refcount 1791 * btree doesn't require us to maintain any state in memory and doesn't pin 1792 * the log. 1793 */ 1794 /* 1795 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1796 * in that they're not referenced anywhere else in the filesystem, so we 1797 * stash them in the refcount btree with a refcount of 1 until either file 1798 * remapping (or CoW cancellation) happens. 1799 */ 1800 STATIC int 1801 xfs_refcount_adjust_cow_extents( 1802 struct xfs_btree_cur *cur, 1803 xfs_agblock_t agbno, 1804 xfs_extlen_t aglen, 1805 enum xfs_refc_adjust_op adj) 1806 { 1807 struct xfs_refcount_irec ext, tmp; 1808 int error; 1809 int found_rec, found_tmp; 1810 1811 if (aglen == 0) 1812 return 0; 1813 1814 /* Find any overlapping refcount records */ 1815 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, 1816 &found_rec); 1817 if (error) 1818 goto out_error; 1819 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1820 if (error) 1821 goto out_error; 1822 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && 1823 ext.rc_domain != XFS_REFC_DOMAIN_COW)) { 1824 xfs_btree_mark_sick(cur); 1825 error = -EFSCORRUPTED; 1826 goto out_error; 1827 } 1828 if (!found_rec) { 1829 ext.rc_startblock = xfs_group_max_blocks(cur->bc_group); 1830 ext.rc_blockcount = 0; 1831 ext.rc_refcount = 0; 1832 ext.rc_domain = XFS_REFC_DOMAIN_COW; 1833 } 1834 1835 switch (adj) { 1836 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1837 /* Adding a CoW reservation, there should be nothing here. */ 1838 if (XFS_IS_CORRUPT(cur->bc_mp, 1839 agbno + aglen > ext.rc_startblock)) { 1840 xfs_btree_mark_sick(cur); 1841 error = -EFSCORRUPTED; 1842 goto out_error; 1843 } 1844 1845 tmp.rc_startblock = agbno; 1846 tmp.rc_blockcount = aglen; 1847 tmp.rc_refcount = 1; 1848 tmp.rc_domain = XFS_REFC_DOMAIN_COW; 1849 1850 trace_xfs_refcount_modify_extent(cur, &tmp); 1851 1852 error = xfs_refcount_insert(cur, &tmp, 1853 &found_tmp); 1854 if (error) 1855 goto out_error; 1856 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1857 xfs_btree_mark_sick(cur); 1858 error = -EFSCORRUPTED; 1859 goto out_error; 1860 } 1861 break; 1862 case XFS_REFCOUNT_ADJUST_COW_FREE: 1863 /* Removing a CoW reservation, there should be one extent. */ 1864 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1865 xfs_btree_mark_sick(cur); 1866 error = -EFSCORRUPTED; 1867 goto out_error; 1868 } 1869 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1870 xfs_btree_mark_sick(cur); 1871 error = -EFSCORRUPTED; 1872 goto out_error; 1873 } 1874 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1875 xfs_btree_mark_sick(cur); 1876 error = -EFSCORRUPTED; 1877 goto out_error; 1878 } 1879 1880 ext.rc_refcount = 0; 1881 trace_xfs_refcount_modify_extent(cur, &ext); 1882 error = xfs_refcount_delete(cur, &found_rec); 1883 if (error) 1884 goto out_error; 1885 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1886 xfs_btree_mark_sick(cur); 1887 error = -EFSCORRUPTED; 1888 goto out_error; 1889 } 1890 break; 1891 default: 1892 ASSERT(0); 1893 } 1894 1895 return error; 1896 out_error: 1897 trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); 1898 return error; 1899 } 1900 1901 /* 1902 * Add or remove refcount btree entries for CoW reservations. 1903 */ 1904 STATIC int 1905 xfs_refcount_adjust_cow( 1906 struct xfs_btree_cur *cur, 1907 xfs_agblock_t agbno, 1908 xfs_extlen_t aglen, 1909 enum xfs_refc_adjust_op adj) 1910 { 1911 bool shape_changed; 1912 int error; 1913 1914 /* 1915 * Ensure that no rcextents cross the boundary of the adjustment range. 1916 */ 1917 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1918 agbno, &shape_changed); 1919 if (error) 1920 goto out_error; 1921 1922 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1923 agbno + aglen, &shape_changed); 1924 if (error) 1925 goto out_error; 1926 1927 /* 1928 * Try to merge with the left or right extents of the range. 1929 */ 1930 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, 1931 &aglen, adj, &shape_changed); 1932 if (error) 1933 goto out_error; 1934 1935 /* Now that we've taken care of the ends, adjust the middle extents */ 1936 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1937 if (error) 1938 goto out_error; 1939 1940 return 0; 1941 1942 out_error: 1943 trace_xfs_refcount_adjust_cow_error(cur, error, _RET_IP_); 1944 return error; 1945 } 1946 1947 /* 1948 * Record a CoW allocation in the refcount btree. 1949 */ 1950 STATIC int 1951 __xfs_refcount_cow_alloc( 1952 struct xfs_btree_cur *rcur, 1953 xfs_agblock_t agbno, 1954 xfs_extlen_t aglen) 1955 { 1956 trace_xfs_refcount_cow_increase(rcur, agbno, aglen); 1957 1958 /* Add refcount btree reservation */ 1959 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1960 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1961 } 1962 1963 /* 1964 * Remove a CoW allocation from the refcount btree. 1965 */ 1966 STATIC int 1967 __xfs_refcount_cow_free( 1968 struct xfs_btree_cur *rcur, 1969 xfs_agblock_t agbno, 1970 xfs_extlen_t aglen) 1971 { 1972 trace_xfs_refcount_cow_decrease(rcur, agbno, aglen); 1973 1974 /* Remove refcount btree reservation */ 1975 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1976 XFS_REFCOUNT_ADJUST_COW_FREE); 1977 } 1978 1979 /* Record a CoW staging extent in the refcount btree. */ 1980 void 1981 xfs_refcount_alloc_cow_extent( 1982 struct xfs_trans *tp, 1983 bool isrt, 1984 xfs_fsblock_t fsb, 1985 xfs_extlen_t len) 1986 { 1987 struct xfs_mount *mp = tp->t_mountp; 1988 1989 if (!xfs_has_reflink(mp)) 1990 return; 1991 1992 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, isrt, fsb, len); 1993 1994 /* Add rmap entry */ 1995 xfs_rmap_alloc_extent(tp, isrt, fsb, len, XFS_RMAP_OWN_COW); 1996 } 1997 1998 /* Forget a CoW staging event in the refcount btree. */ 1999 void 2000 xfs_refcount_free_cow_extent( 2001 struct xfs_trans *tp, 2002 bool isrt, 2003 xfs_fsblock_t fsb, 2004 xfs_extlen_t len) 2005 { 2006 struct xfs_mount *mp = tp->t_mountp; 2007 2008 if (!xfs_has_reflink(mp)) 2009 return; 2010 2011 /* Remove rmap entry */ 2012 xfs_rmap_free_extent(tp, isrt, fsb, len, XFS_RMAP_OWN_COW); 2013 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, isrt, fsb, len); 2014 } 2015 2016 struct xfs_refcount_recovery { 2017 struct list_head rr_list; 2018 struct xfs_refcount_irec rr_rrec; 2019 }; 2020 2021 /* Stuff an extent on the recovery list. */ 2022 STATIC int 2023 xfs_refcount_recover_extent( 2024 struct xfs_btree_cur *cur, 2025 const union xfs_btree_rec *rec, 2026 void *priv) 2027 { 2028 struct list_head *debris = priv; 2029 struct xfs_refcount_recovery *rr; 2030 2031 if (XFS_IS_CORRUPT(cur->bc_mp, 2032 be32_to_cpu(rec->refc.rc_refcount) != 1)) { 2033 xfs_btree_mark_sick(cur); 2034 return -EFSCORRUPTED; 2035 } 2036 2037 rr = kmalloc(sizeof(struct xfs_refcount_recovery), 2038 GFP_KERNEL | __GFP_NOFAIL); 2039 INIT_LIST_HEAD(&rr->rr_list); 2040 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 2041 2042 if (xfs_refcount_check_btrec(cur, &rr->rr_rrec) != NULL || 2043 XFS_IS_CORRUPT(cur->bc_mp, 2044 rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { 2045 xfs_btree_mark_sick(cur); 2046 kfree(rr); 2047 return -EFSCORRUPTED; 2048 } 2049 2050 list_add_tail(&rr->rr_list, debris); 2051 return 0; 2052 } 2053 2054 /* Find and remove leftover CoW reservations. */ 2055 int 2056 xfs_refcount_recover_cow_leftovers( 2057 struct xfs_group *xg) 2058 { 2059 struct xfs_mount *mp = xg->xg_mount; 2060 bool isrt = xg->xg_type == XG_TYPE_RTG; 2061 struct xfs_trans *tp; 2062 struct xfs_btree_cur *cur; 2063 struct xfs_buf *agbp = NULL; 2064 struct xfs_refcount_recovery *rr, *n; 2065 struct list_head debris; 2066 union xfs_btree_irec low = { 2067 .rc.rc_domain = XFS_REFC_DOMAIN_COW, 2068 }; 2069 union xfs_btree_irec high = { 2070 .rc.rc_domain = XFS_REFC_DOMAIN_COW, 2071 .rc.rc_startblock = -1U, 2072 }; 2073 xfs_fsblock_t fsb; 2074 int error; 2075 2076 /* reflink filesystems must not have groups larger than 2^31-1 blocks */ 2077 BUILD_BUG_ON(XFS_MAX_RGBLOCKS >= XFS_REFC_COWFLAG); 2078 BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); 2079 2080 if (isrt) { 2081 if (!xfs_has_rtgroups(mp)) 2082 return 0; 2083 if (xfs_group_max_blocks(xg) >= XFS_MAX_RGBLOCKS) 2084 return -EOPNOTSUPP; 2085 } else { 2086 if (xfs_group_max_blocks(xg) > XFS_MAX_CRC_AG_BLOCKS) 2087 return -EOPNOTSUPP; 2088 } 2089 2090 INIT_LIST_HEAD(&debris); 2091 2092 /* 2093 * In this first part, we use an empty transaction to gather up 2094 * all the leftover CoW extents so that we can subsequently 2095 * delete them. The empty transaction is used to avoid 2096 * a buffer lock deadlock if there happens to be a loop in the 2097 * refcountbt because we're allowed to re-grab a buffer that is 2098 * already attached to our transaction. When we're done 2099 * recording the CoW debris we cancel the (empty) transaction 2100 * and everything goes away cleanly. 2101 */ 2102 error = xfs_trans_alloc_empty(mp, &tp); 2103 if (error) 2104 return error; 2105 2106 if (isrt) { 2107 xfs_rtgroup_lock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); 2108 cur = xfs_rtrefcountbt_init_cursor(tp, to_rtg(xg)); 2109 } else { 2110 error = xfs_alloc_read_agf(to_perag(xg), tp, 0, &agbp); 2111 if (error) 2112 goto out_trans; 2113 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, to_perag(xg)); 2114 } 2115 2116 /* Find all the leftover CoW staging extents. */ 2117 error = xfs_btree_query_range(cur, &low, &high, 2118 xfs_refcount_recover_extent, &debris); 2119 xfs_btree_del_cursor(cur, error); 2120 if (agbp) 2121 xfs_trans_brelse(tp, agbp); 2122 else 2123 xfs_rtgroup_unlock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); 2124 xfs_trans_cancel(tp); 2125 if (error) 2126 goto out_free; 2127 2128 /* Now iterate the list to free the leftovers */ 2129 list_for_each_entry_safe(rr, n, &debris, rr_list) { 2130 /* Set up transaction. */ 2131 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 2132 if (error) 2133 goto out_free; 2134 2135 /* Free the orphan record */ 2136 fsb = xfs_gbno_to_fsb(xg, rr->rr_rrec.rc_startblock); 2137 xfs_refcount_free_cow_extent(tp, isrt, fsb, 2138 rr->rr_rrec.rc_blockcount); 2139 2140 /* Free the block. */ 2141 error = xfs_free_extent_later(tp, fsb, 2142 rr->rr_rrec.rc_blockcount, NULL, 2143 XFS_AG_RESV_NONE, 2144 isrt ? XFS_FREE_EXTENT_REALTIME : 0); 2145 if (error) 2146 goto out_trans; 2147 2148 error = xfs_trans_commit(tp); 2149 if (error) 2150 goto out_free; 2151 2152 list_del(&rr->rr_list); 2153 kfree(rr); 2154 } 2155 2156 return error; 2157 out_trans: 2158 xfs_trans_cancel(tp); 2159 out_free: 2160 /* Free the leftover list */ 2161 list_for_each_entry_safe(rr, n, &debris, rr_list) { 2162 list_del(&rr->rr_list); 2163 kfree(rr); 2164 } 2165 return error; 2166 } 2167 2168 /* 2169 * Scan part of the keyspace of the refcount records and tell us if the area 2170 * has no records, is fully mapped by records, or is partially filled. 2171 */ 2172 int 2173 xfs_refcount_has_records( 2174 struct xfs_btree_cur *cur, 2175 enum xfs_refc_domain domain, 2176 xfs_agblock_t bno, 2177 xfs_extlen_t len, 2178 enum xbtree_recpacking *outcome) 2179 { 2180 union xfs_btree_irec low; 2181 union xfs_btree_irec high; 2182 2183 memset(&low, 0, sizeof(low)); 2184 low.rc.rc_startblock = bno; 2185 memset(&high, 0xFF, sizeof(high)); 2186 high.rc.rc_startblock = bno + len - 1; 2187 low.rc.rc_domain = high.rc.rc_domain = domain; 2188 2189 return xfs_btree_has_records(cur, &low, &high, NULL, outcome); 2190 } 2191 2192 struct xfs_refcount_query_range_info { 2193 xfs_refcount_query_range_fn fn; 2194 void *priv; 2195 }; 2196 2197 /* Format btree record and pass to our callback. */ 2198 STATIC int 2199 xfs_refcount_query_range_helper( 2200 struct xfs_btree_cur *cur, 2201 const union xfs_btree_rec *rec, 2202 void *priv) 2203 { 2204 struct xfs_refcount_query_range_info *query = priv; 2205 struct xfs_refcount_irec irec; 2206 xfs_failaddr_t fa; 2207 2208 xfs_refcount_btrec_to_irec(rec, &irec); 2209 fa = xfs_refcount_check_btrec(cur, &irec); 2210 if (fa) 2211 return xfs_refcount_complain_bad_rec(cur, fa, &irec); 2212 2213 return query->fn(cur, &irec, query->priv); 2214 } 2215 2216 /* Find all refcount records between two keys. */ 2217 int 2218 xfs_refcount_query_range( 2219 struct xfs_btree_cur *cur, 2220 const struct xfs_refcount_irec *low_rec, 2221 const struct xfs_refcount_irec *high_rec, 2222 xfs_refcount_query_range_fn fn, 2223 void *priv) 2224 { 2225 union xfs_btree_irec low_brec = { .rc = *low_rec }; 2226 union xfs_btree_irec high_brec = { .rc = *high_rec }; 2227 struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn }; 2228 2229 return xfs_btree_query_range(cur, &low_brec, &high_brec, 2230 xfs_refcount_query_range_helper, &query); 2231 } 2232 2233 int __init 2234 xfs_refcount_intent_init_cache(void) 2235 { 2236 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 2237 sizeof(struct xfs_refcount_intent), 2238 0, 0, NULL); 2239 2240 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 2241 } 2242 2243 void 2244 xfs_refcount_intent_destroy_cache(void) 2245 { 2246 kmem_cache_destroy(xfs_refcount_intent_cache); 2247 xfs_refcount_intent_cache = NULL; 2248 } 2249