1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 #include "xfs_health.h" 27 #include "xfs_refcount_item.h" 28 #include "xfs_rtgroup.h" 29 #include "xfs_rtalloc.h" 30 #include "xfs_rtrefcount_btree.h" 31 32 struct kmem_cache *xfs_refcount_intent_cache; 33 34 /* Allowable refcount adjustment amounts. */ 35 enum xfs_refc_adjust_op { 36 XFS_REFCOUNT_ADJUST_INCREASE = 1, 37 XFS_REFCOUNT_ADJUST_DECREASE = -1, 38 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 39 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 40 }; 41 42 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 43 xfs_agblock_t agbno, xfs_extlen_t aglen); 44 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 45 xfs_agblock_t agbno, xfs_extlen_t aglen); 46 47 /* 48 * Look up the first record less than or equal to [bno, len] in the btree 49 * given by cur. 50 */ 51 int 52 xfs_refcount_lookup_le( 53 struct xfs_btree_cur *cur, 54 enum xfs_refc_domain domain, 55 xfs_agblock_t bno, 56 int *stat) 57 { 58 trace_xfs_refcount_lookup(cur, 59 xfs_refcount_encode_startblock(bno, domain), 60 XFS_LOOKUP_LE); 61 cur->bc_rec.rc.rc_startblock = bno; 62 cur->bc_rec.rc.rc_blockcount = 0; 63 cur->bc_rec.rc.rc_domain = domain; 64 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 65 } 66 67 /* 68 * Look up the first record greater than or equal to [bno, len] in the btree 69 * given by cur. 70 */ 71 int 72 xfs_refcount_lookup_ge( 73 struct xfs_btree_cur *cur, 74 enum xfs_refc_domain domain, 75 xfs_agblock_t bno, 76 int *stat) 77 { 78 trace_xfs_refcount_lookup(cur, 79 xfs_refcount_encode_startblock(bno, domain), 80 XFS_LOOKUP_GE); 81 cur->bc_rec.rc.rc_startblock = bno; 82 cur->bc_rec.rc.rc_blockcount = 0; 83 cur->bc_rec.rc.rc_domain = domain; 84 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 85 } 86 87 /* 88 * Look up the first record equal to [bno, len] in the btree 89 * given by cur. 90 */ 91 int 92 xfs_refcount_lookup_eq( 93 struct xfs_btree_cur *cur, 94 enum xfs_refc_domain domain, 95 xfs_agblock_t bno, 96 int *stat) 97 { 98 trace_xfs_refcount_lookup(cur, 99 xfs_refcount_encode_startblock(bno, domain), 100 XFS_LOOKUP_LE); 101 cur->bc_rec.rc.rc_startblock = bno; 102 cur->bc_rec.rc.rc_blockcount = 0; 103 cur->bc_rec.rc.rc_domain = domain; 104 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 105 } 106 107 /* Convert on-disk record to in-core format. */ 108 void 109 xfs_refcount_btrec_to_irec( 110 const union xfs_btree_rec *rec, 111 struct xfs_refcount_irec *irec) 112 { 113 uint32_t start; 114 115 start = be32_to_cpu(rec->refc.rc_startblock); 116 if (start & XFS_REFC_COWFLAG) { 117 start &= ~XFS_REFC_COWFLAG; 118 irec->rc_domain = XFS_REFC_DOMAIN_COW; 119 } else { 120 irec->rc_domain = XFS_REFC_DOMAIN_SHARED; 121 } 122 123 irec->rc_startblock = start; 124 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 125 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 126 } 127 128 /* Simple checks for refcount records. */ 129 xfs_failaddr_t 130 xfs_refcount_check_irec( 131 struct xfs_perag *pag, 132 const struct xfs_refcount_irec *irec) 133 { 134 if (irec->rc_blockcount == 0 || irec->rc_blockcount > XFS_REFC_LEN_MAX) 135 return __this_address; 136 137 if (!xfs_refcount_check_domain(irec)) 138 return __this_address; 139 140 /* check for valid extent range, including overflow */ 141 if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) 142 return __this_address; 143 144 if (irec->rc_refcount == 0 || irec->rc_refcount > XFS_REFC_REFCOUNT_MAX) 145 return __this_address; 146 147 return NULL; 148 } 149 150 xfs_failaddr_t 151 xfs_rtrefcount_check_irec( 152 struct xfs_rtgroup *rtg, 153 const struct xfs_refcount_irec *irec) 154 { 155 if (irec->rc_blockcount == 0 || irec->rc_blockcount > XFS_REFC_LEN_MAX) 156 return __this_address; 157 158 if (!xfs_refcount_check_domain(irec)) 159 return __this_address; 160 161 /* check for valid extent range, including overflow */ 162 if (!xfs_verify_rgbext(rtg, irec->rc_startblock, irec->rc_blockcount)) 163 return __this_address; 164 165 if (irec->rc_refcount == 0 || irec->rc_refcount > XFS_REFC_REFCOUNT_MAX) 166 return __this_address; 167 168 return NULL; 169 } 170 171 static inline xfs_failaddr_t 172 xfs_refcount_check_btrec( 173 struct xfs_btree_cur *cur, 174 const struct xfs_refcount_irec *irec) 175 { 176 if (xfs_btree_is_rtrefcount(cur->bc_ops)) 177 return xfs_rtrefcount_check_irec(to_rtg(cur->bc_group), irec); 178 return xfs_refcount_check_irec(to_perag(cur->bc_group), irec); 179 } 180 181 static inline int 182 xfs_refcount_complain_bad_rec( 183 struct xfs_btree_cur *cur, 184 xfs_failaddr_t fa, 185 const struct xfs_refcount_irec *irec) 186 { 187 struct xfs_mount *mp = cur->bc_mp; 188 189 if (xfs_btree_is_rtrefcount(cur->bc_ops)) { 190 xfs_warn(mp, 191 "RT Refcount BTree record corruption in rtgroup %u detected at %pS!", 192 cur->bc_group->xg_gno, fa); 193 } else { 194 xfs_warn(mp, 195 "Refcount BTree record corruption in AG %d detected at %pS!", 196 cur->bc_group->xg_gno, fa); 197 } 198 xfs_warn(mp, 199 "Start block 0x%x, block count 0x%x, references 0x%x", 200 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 201 xfs_btree_mark_sick(cur); 202 return -EFSCORRUPTED; 203 } 204 205 /* 206 * Get the data from the pointed-to record. 207 */ 208 int 209 xfs_refcount_get_rec( 210 struct xfs_btree_cur *cur, 211 struct xfs_refcount_irec *irec, 212 int *stat) 213 { 214 union xfs_btree_rec *rec; 215 xfs_failaddr_t fa; 216 int error; 217 218 error = xfs_btree_get_rec(cur, &rec, stat); 219 if (error || !*stat) 220 return error; 221 222 xfs_refcount_btrec_to_irec(rec, irec); 223 fa = xfs_refcount_check_btrec(cur, irec); 224 if (fa) 225 return xfs_refcount_complain_bad_rec(cur, fa, irec); 226 227 trace_xfs_refcount_get(cur, irec); 228 return 0; 229 } 230 231 /* 232 * Update the record referred to by cur to the value given 233 * by [bno, len, refcount]. 234 * This either works (return 0) or gets an EFSCORRUPTED error. 235 */ 236 STATIC int 237 xfs_refcount_update( 238 struct xfs_btree_cur *cur, 239 struct xfs_refcount_irec *irec) 240 { 241 union xfs_btree_rec rec; 242 uint32_t start; 243 int error; 244 245 trace_xfs_refcount_update(cur, irec); 246 247 start = xfs_refcount_encode_startblock(irec->rc_startblock, 248 irec->rc_domain); 249 rec.refc.rc_startblock = cpu_to_be32(start); 250 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 251 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 252 253 error = xfs_btree_update(cur, &rec); 254 if (error) 255 trace_xfs_refcount_update_error(cur, error, _RET_IP_); 256 return error; 257 } 258 259 /* 260 * Insert the record referred to by cur to the value given 261 * by [bno, len, refcount]. 262 * This either works (return 0) or gets an EFSCORRUPTED error. 263 */ 264 int 265 xfs_refcount_insert( 266 struct xfs_btree_cur *cur, 267 struct xfs_refcount_irec *irec, 268 int *i) 269 { 270 int error; 271 272 trace_xfs_refcount_insert(cur, irec); 273 274 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 275 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 276 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 277 cur->bc_rec.rc.rc_domain = irec->rc_domain; 278 279 error = xfs_btree_insert(cur, i); 280 if (error) 281 goto out_error; 282 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 283 xfs_btree_mark_sick(cur); 284 error = -EFSCORRUPTED; 285 goto out_error; 286 } 287 288 out_error: 289 if (error) 290 trace_xfs_refcount_insert_error(cur, error, _RET_IP_); 291 return error; 292 } 293 294 /* 295 * Remove the record referred to by cur, then set the pointer to the spot 296 * where the record could be re-inserted, in case we want to increment or 297 * decrement the cursor. 298 * This either works (return 0) or gets an EFSCORRUPTED error. 299 */ 300 STATIC int 301 xfs_refcount_delete( 302 struct xfs_btree_cur *cur, 303 int *i) 304 { 305 struct xfs_refcount_irec irec; 306 int found_rec; 307 int error; 308 309 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 310 if (error) 311 goto out_error; 312 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 313 xfs_btree_mark_sick(cur); 314 error = -EFSCORRUPTED; 315 goto out_error; 316 } 317 trace_xfs_refcount_delete(cur, &irec); 318 error = xfs_btree_delete(cur, i); 319 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 320 xfs_btree_mark_sick(cur); 321 error = -EFSCORRUPTED; 322 goto out_error; 323 } 324 if (error) 325 goto out_error; 326 error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, 327 &found_rec); 328 out_error: 329 if (error) 330 trace_xfs_refcount_delete_error(cur, error, _RET_IP_); 331 return error; 332 } 333 334 /* 335 * Adjusting the Reference Count 336 * 337 * As stated elsewhere, the reference count btree (refcbt) stores 338 * >1 reference counts for extents of physical blocks. In this 339 * operation, we're either raising or lowering the reference count of 340 * some subrange stored in the tree: 341 * 342 * <------ adjustment range ------> 343 * ----+ +---+-----+ +--+--------+--------- 344 * 2 | | 3 | 4 | |17| 55 | 10 345 * ----+ +---+-----+ +--+--------+--------- 346 * X axis is physical blocks number; 347 * reference counts are the numbers inside the rectangles 348 * 349 * The first thing we need to do is to ensure that there are no 350 * refcount extents crossing either boundary of the range to be 351 * adjusted. For any extent that does cross a boundary, split it into 352 * two extents so that we can increment the refcount of one of the 353 * pieces later: 354 * 355 * <------ adjustment range ------> 356 * ----+ +---+-----+ +--+--------+----+---- 357 * 2 | | 3 | 2 | |17| 55 | 10 | 10 358 * ----+ +---+-----+ +--+--------+----+---- 359 * 360 * For this next step, let's assume that all the physical blocks in 361 * the adjustment range are mapped to a file and are therefore in use 362 * at least once. Therefore, we can infer that any gap in the 363 * refcount tree within the adjustment range represents a physical 364 * extent with refcount == 1: 365 * 366 * <------ adjustment range ------> 367 * ----+---+---+-----+-+--+--------+----+---- 368 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 369 * ----+---+---+-----+-+--+--------+----+---- 370 * ^ 371 * 372 * For each extent that falls within the interval range, figure out 373 * which extent is to the left or the right of that extent. Now we 374 * have a left, current, and right extent. If the new reference count 375 * of the center extent enables us to merge left, center, and right 376 * into one record covering all three, do so. If the center extent is 377 * at the left end of the range, abuts the left extent, and its new 378 * reference count matches the left extent's record, then merge them. 379 * If the center extent is at the right end of the range, abuts the 380 * right extent, and the reference counts match, merge those. In the 381 * example, we can left merge (assuming an increment operation): 382 * 383 * <------ adjustment range ------> 384 * --------+---+-----+-+--+--------+----+---- 385 * 2 | 3 | 2 |1|17| 55 | 10 | 10 386 * --------+---+-----+-+--+--------+----+---- 387 * ^ 388 * 389 * For all other extents within the range, adjust the reference count 390 * or delete it if the refcount falls below 2. If we were 391 * incrementing, the end result looks like this: 392 * 393 * <------ adjustment range ------> 394 * --------+---+-----+-+--+--------+----+---- 395 * 2 | 4 | 3 |2|18| 56 | 11 | 10 396 * --------+---+-----+-+--+--------+----+---- 397 * 398 * The result of a decrement operation looks as such: 399 * 400 * <------ adjustment range ------> 401 * ----+ +---+ +--+--------+----+---- 402 * 2 | | 2 | |16| 54 | 9 | 10 403 * ----+ +---+ +--+--------+----+---- 404 * DDDD 111111DD 405 * 406 * The blocks marked "D" are freed; the blocks marked "1" are only 407 * referenced once and therefore the record is removed from the 408 * refcount btree. 409 */ 410 411 /* Next block after this extent. */ 412 static inline xfs_agblock_t 413 xfs_refc_next( 414 struct xfs_refcount_irec *rc) 415 { 416 return rc->rc_startblock + rc->rc_blockcount; 417 } 418 419 /* 420 * Split a refcount extent that crosses agbno. 421 */ 422 STATIC int 423 xfs_refcount_split_extent( 424 struct xfs_btree_cur *cur, 425 enum xfs_refc_domain domain, 426 xfs_agblock_t agbno, 427 bool *shape_changed) 428 { 429 struct xfs_refcount_irec rcext, tmp; 430 int found_rec; 431 int error; 432 433 *shape_changed = false; 434 error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); 435 if (error) 436 goto out_error; 437 if (!found_rec) 438 return 0; 439 440 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 441 if (error) 442 goto out_error; 443 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 444 xfs_btree_mark_sick(cur); 445 error = -EFSCORRUPTED; 446 goto out_error; 447 } 448 if (rcext.rc_domain != domain) 449 return 0; 450 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 451 return 0; 452 453 *shape_changed = true; 454 trace_xfs_refcount_split_extent(cur, &rcext, agbno); 455 456 /* Establish the right extent. */ 457 tmp = rcext; 458 tmp.rc_startblock = agbno; 459 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 460 error = xfs_refcount_update(cur, &tmp); 461 if (error) 462 goto out_error; 463 464 /* Insert the left extent. */ 465 tmp = rcext; 466 tmp.rc_blockcount = agbno - rcext.rc_startblock; 467 error = xfs_refcount_insert(cur, &tmp, &found_rec); 468 if (error) 469 goto out_error; 470 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 471 xfs_btree_mark_sick(cur); 472 error = -EFSCORRUPTED; 473 goto out_error; 474 } 475 return error; 476 477 out_error: 478 trace_xfs_refcount_split_extent_error(cur, error, _RET_IP_); 479 return error; 480 } 481 482 /* 483 * Merge the left, center, and right extents. 484 */ 485 STATIC int 486 xfs_refcount_merge_center_extents( 487 struct xfs_btree_cur *cur, 488 struct xfs_refcount_irec *left, 489 struct xfs_refcount_irec *center, 490 struct xfs_refcount_irec *right, 491 unsigned long long extlen, 492 xfs_extlen_t *aglen) 493 { 494 int error; 495 int found_rec; 496 497 trace_xfs_refcount_merge_center_extents(cur, left, center, right); 498 499 ASSERT(left->rc_domain == center->rc_domain); 500 ASSERT(right->rc_domain == center->rc_domain); 501 502 /* 503 * Make sure the center and right extents are not in the btree. 504 * If the center extent was synthesized, the first delete call 505 * removes the right extent and we skip the second deletion. 506 * If center and right were in the btree, then the first delete 507 * call removes the center and the second one removes the right 508 * extent. 509 */ 510 error = xfs_refcount_lookup_ge(cur, center->rc_domain, 511 center->rc_startblock, &found_rec); 512 if (error) 513 goto out_error; 514 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 515 xfs_btree_mark_sick(cur); 516 error = -EFSCORRUPTED; 517 goto out_error; 518 } 519 520 error = xfs_refcount_delete(cur, &found_rec); 521 if (error) 522 goto out_error; 523 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 524 xfs_btree_mark_sick(cur); 525 error = -EFSCORRUPTED; 526 goto out_error; 527 } 528 529 if (center->rc_refcount > 1) { 530 error = xfs_refcount_delete(cur, &found_rec); 531 if (error) 532 goto out_error; 533 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 534 xfs_btree_mark_sick(cur); 535 error = -EFSCORRUPTED; 536 goto out_error; 537 } 538 } 539 540 /* Enlarge the left extent. */ 541 error = xfs_refcount_lookup_le(cur, left->rc_domain, 542 left->rc_startblock, &found_rec); 543 if (error) 544 goto out_error; 545 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 546 xfs_btree_mark_sick(cur); 547 error = -EFSCORRUPTED; 548 goto out_error; 549 } 550 551 left->rc_blockcount = extlen; 552 error = xfs_refcount_update(cur, left); 553 if (error) 554 goto out_error; 555 556 *aglen = 0; 557 return error; 558 559 out_error: 560 trace_xfs_refcount_merge_center_extents_error(cur, error, _RET_IP_); 561 return error; 562 } 563 564 /* 565 * Merge with the left extent. 566 */ 567 STATIC int 568 xfs_refcount_merge_left_extent( 569 struct xfs_btree_cur *cur, 570 struct xfs_refcount_irec *left, 571 struct xfs_refcount_irec *cleft, 572 xfs_agblock_t *agbno, 573 xfs_extlen_t *aglen) 574 { 575 int error; 576 int found_rec; 577 578 trace_xfs_refcount_merge_left_extent(cur, left, cleft); 579 580 ASSERT(left->rc_domain == cleft->rc_domain); 581 582 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 583 if (cleft->rc_refcount > 1) { 584 error = xfs_refcount_lookup_le(cur, cleft->rc_domain, 585 cleft->rc_startblock, &found_rec); 586 if (error) 587 goto out_error; 588 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 589 xfs_btree_mark_sick(cur); 590 error = -EFSCORRUPTED; 591 goto out_error; 592 } 593 594 error = xfs_refcount_delete(cur, &found_rec); 595 if (error) 596 goto out_error; 597 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 598 xfs_btree_mark_sick(cur); 599 error = -EFSCORRUPTED; 600 goto out_error; 601 } 602 } 603 604 /* Enlarge the left extent. */ 605 error = xfs_refcount_lookup_le(cur, left->rc_domain, 606 left->rc_startblock, &found_rec); 607 if (error) 608 goto out_error; 609 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 610 xfs_btree_mark_sick(cur); 611 error = -EFSCORRUPTED; 612 goto out_error; 613 } 614 615 left->rc_blockcount += cleft->rc_blockcount; 616 error = xfs_refcount_update(cur, left); 617 if (error) 618 goto out_error; 619 620 *agbno += cleft->rc_blockcount; 621 *aglen -= cleft->rc_blockcount; 622 return error; 623 624 out_error: 625 trace_xfs_refcount_merge_left_extent_error(cur, error, _RET_IP_); 626 return error; 627 } 628 629 /* 630 * Merge with the right extent. 631 */ 632 STATIC int 633 xfs_refcount_merge_right_extent( 634 struct xfs_btree_cur *cur, 635 struct xfs_refcount_irec *right, 636 struct xfs_refcount_irec *cright, 637 xfs_extlen_t *aglen) 638 { 639 int error; 640 int found_rec; 641 642 trace_xfs_refcount_merge_right_extent(cur, cright, right); 643 644 ASSERT(right->rc_domain == cright->rc_domain); 645 646 /* 647 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 648 * remove it. 649 */ 650 if (cright->rc_refcount > 1) { 651 error = xfs_refcount_lookup_le(cur, cright->rc_domain, 652 cright->rc_startblock, &found_rec); 653 if (error) 654 goto out_error; 655 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 656 xfs_btree_mark_sick(cur); 657 error = -EFSCORRUPTED; 658 goto out_error; 659 } 660 661 error = xfs_refcount_delete(cur, &found_rec); 662 if (error) 663 goto out_error; 664 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 665 xfs_btree_mark_sick(cur); 666 error = -EFSCORRUPTED; 667 goto out_error; 668 } 669 } 670 671 /* Enlarge the right extent. */ 672 error = xfs_refcount_lookup_le(cur, right->rc_domain, 673 right->rc_startblock, &found_rec); 674 if (error) 675 goto out_error; 676 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 677 xfs_btree_mark_sick(cur); 678 error = -EFSCORRUPTED; 679 goto out_error; 680 } 681 682 right->rc_startblock -= cright->rc_blockcount; 683 right->rc_blockcount += cright->rc_blockcount; 684 error = xfs_refcount_update(cur, right); 685 if (error) 686 goto out_error; 687 688 *aglen -= cright->rc_blockcount; 689 return error; 690 691 out_error: 692 trace_xfs_refcount_merge_right_extent_error(cur, error, _RET_IP_); 693 return error; 694 } 695 696 /* 697 * Find the left extent and the one after it (cleft). This function assumes 698 * that we've already split any extent crossing agbno. 699 */ 700 STATIC int 701 xfs_refcount_find_left_extents( 702 struct xfs_btree_cur *cur, 703 struct xfs_refcount_irec *left, 704 struct xfs_refcount_irec *cleft, 705 enum xfs_refc_domain domain, 706 xfs_agblock_t agbno, 707 xfs_extlen_t aglen) 708 { 709 struct xfs_refcount_irec tmp; 710 int error; 711 int found_rec; 712 713 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 714 error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); 715 if (error) 716 goto out_error; 717 if (!found_rec) 718 return 0; 719 720 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 721 if (error) 722 goto out_error; 723 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 724 xfs_btree_mark_sick(cur); 725 error = -EFSCORRUPTED; 726 goto out_error; 727 } 728 729 if (tmp.rc_domain != domain) 730 return 0; 731 if (xfs_refc_next(&tmp) != agbno) 732 return 0; 733 /* We have a left extent; retrieve (or invent) the next right one */ 734 *left = tmp; 735 736 error = xfs_btree_increment(cur, 0, &found_rec); 737 if (error) 738 goto out_error; 739 if (found_rec) { 740 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 741 if (error) 742 goto out_error; 743 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 744 xfs_btree_mark_sick(cur); 745 error = -EFSCORRUPTED; 746 goto out_error; 747 } 748 749 if (tmp.rc_domain != domain) 750 goto not_found; 751 752 /* if tmp starts at the end of our range, just use that */ 753 if (tmp.rc_startblock == agbno) 754 *cleft = tmp; 755 else { 756 /* 757 * There's a gap in the refcntbt at the start of the 758 * range we're interested in (refcount == 1) so 759 * synthesize the implied extent and pass it back. 760 * We assume here that the agbno/aglen range was 761 * passed in from a data fork extent mapping and 762 * therefore is allocated to exactly one owner. 763 */ 764 cleft->rc_startblock = agbno; 765 cleft->rc_blockcount = min(aglen, 766 tmp.rc_startblock - agbno); 767 cleft->rc_refcount = 1; 768 cleft->rc_domain = domain; 769 } 770 } else { 771 not_found: 772 /* 773 * No extents, so pretend that there's one covering the whole 774 * range. 775 */ 776 cleft->rc_startblock = agbno; 777 cleft->rc_blockcount = aglen; 778 cleft->rc_refcount = 1; 779 cleft->rc_domain = domain; 780 } 781 trace_xfs_refcount_find_left_extent(cur, left, cleft, agbno); 782 return error; 783 784 out_error: 785 trace_xfs_refcount_find_left_extent_error(cur, error, _RET_IP_); 786 return error; 787 } 788 789 /* 790 * Find the right extent and the one before it (cright). This function 791 * assumes that we've already split any extents crossing agbno + aglen. 792 */ 793 STATIC int 794 xfs_refcount_find_right_extents( 795 struct xfs_btree_cur *cur, 796 struct xfs_refcount_irec *right, 797 struct xfs_refcount_irec *cright, 798 enum xfs_refc_domain domain, 799 xfs_agblock_t agbno, 800 xfs_extlen_t aglen) 801 { 802 struct xfs_refcount_irec tmp; 803 int error; 804 int found_rec; 805 806 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 807 error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); 808 if (error) 809 goto out_error; 810 if (!found_rec) 811 return 0; 812 813 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 814 if (error) 815 goto out_error; 816 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 817 xfs_btree_mark_sick(cur); 818 error = -EFSCORRUPTED; 819 goto out_error; 820 } 821 822 if (tmp.rc_domain != domain) 823 return 0; 824 if (tmp.rc_startblock != agbno + aglen) 825 return 0; 826 /* We have a right extent; retrieve (or invent) the next left one */ 827 *right = tmp; 828 829 error = xfs_btree_decrement(cur, 0, &found_rec); 830 if (error) 831 goto out_error; 832 if (found_rec) { 833 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 834 if (error) 835 goto out_error; 836 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 837 xfs_btree_mark_sick(cur); 838 error = -EFSCORRUPTED; 839 goto out_error; 840 } 841 842 if (tmp.rc_domain != domain) 843 goto not_found; 844 845 /* if tmp ends at the end of our range, just use that */ 846 if (xfs_refc_next(&tmp) == agbno + aglen) 847 *cright = tmp; 848 else { 849 /* 850 * There's a gap in the refcntbt at the end of the 851 * range we're interested in (refcount == 1) so 852 * create the implied extent and pass it back. 853 * We assume here that the agbno/aglen range was 854 * passed in from a data fork extent mapping and 855 * therefore is allocated to exactly one owner. 856 */ 857 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 858 cright->rc_blockcount = right->rc_startblock - 859 cright->rc_startblock; 860 cright->rc_refcount = 1; 861 cright->rc_domain = domain; 862 } 863 } else { 864 not_found: 865 /* 866 * No extents, so pretend that there's one covering the whole 867 * range. 868 */ 869 cright->rc_startblock = agbno; 870 cright->rc_blockcount = aglen; 871 cright->rc_refcount = 1; 872 cright->rc_domain = domain; 873 } 874 trace_xfs_refcount_find_right_extent(cur, cright, right, 875 agbno + aglen); 876 return error; 877 878 out_error: 879 trace_xfs_refcount_find_right_extent_error(cur, error, _RET_IP_); 880 return error; 881 } 882 883 /* Is this extent valid? */ 884 static inline bool 885 xfs_refc_valid( 886 const struct xfs_refcount_irec *rc) 887 { 888 return rc->rc_startblock != NULLAGBLOCK; 889 } 890 891 static inline xfs_nlink_t 892 xfs_refc_merge_refcount( 893 const struct xfs_refcount_irec *irec, 894 enum xfs_refc_adjust_op adjust) 895 { 896 /* Once a record hits XFS_REFC_REFCOUNT_MAX, it is pinned forever */ 897 if (irec->rc_refcount == XFS_REFC_REFCOUNT_MAX) 898 return XFS_REFC_REFCOUNT_MAX; 899 return irec->rc_refcount + adjust; 900 } 901 902 static inline bool 903 xfs_refc_want_merge_center( 904 const struct xfs_refcount_irec *left, 905 const struct xfs_refcount_irec *cleft, 906 const struct xfs_refcount_irec *cright, 907 const struct xfs_refcount_irec *right, 908 bool cleft_is_cright, 909 enum xfs_refc_adjust_op adjust, 910 unsigned long long *ulenp) 911 { 912 unsigned long long ulen = left->rc_blockcount; 913 xfs_nlink_t new_refcount; 914 915 /* 916 * To merge with a center record, both shoulder records must be 917 * adjacent to the record we want to adjust. This is only true if 918 * find_left and find_right made all four records valid. 919 */ 920 if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || 921 !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) 922 return false; 923 924 /* There must only be one record for the entire range. */ 925 if (!cleft_is_cright) 926 return false; 927 928 /* The shoulder record refcounts must match the new refcount. */ 929 new_refcount = xfs_refc_merge_refcount(cleft, adjust); 930 if (left->rc_refcount != new_refcount) 931 return false; 932 if (right->rc_refcount != new_refcount) 933 return false; 934 935 /* 936 * The new record cannot exceed the max length. ulen is a ULL as the 937 * individual record block counts can be up to (u32 - 1) in length 938 * hence we need to catch u32 addition overflows here. 939 */ 940 ulen += cleft->rc_blockcount + right->rc_blockcount; 941 if (ulen >= XFS_REFC_LEN_MAX) 942 return false; 943 944 *ulenp = ulen; 945 return true; 946 } 947 948 static inline bool 949 xfs_refc_want_merge_left( 950 const struct xfs_refcount_irec *left, 951 const struct xfs_refcount_irec *cleft, 952 enum xfs_refc_adjust_op adjust) 953 { 954 unsigned long long ulen = left->rc_blockcount; 955 xfs_nlink_t new_refcount; 956 957 /* 958 * For a left merge, the left shoulder record must be adjacent to the 959 * start of the range. If this is true, find_left made left and cleft 960 * contain valid contents. 961 */ 962 if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) 963 return false; 964 965 /* Left shoulder record refcount must match the new refcount. */ 966 new_refcount = xfs_refc_merge_refcount(cleft, adjust); 967 if (left->rc_refcount != new_refcount) 968 return false; 969 970 /* 971 * The new record cannot exceed the max length. ulen is a ULL as the 972 * individual record block counts can be up to (u32 - 1) in length 973 * hence we need to catch u32 addition overflows here. 974 */ 975 ulen += cleft->rc_blockcount; 976 if (ulen >= XFS_REFC_LEN_MAX) 977 return false; 978 979 return true; 980 } 981 982 static inline bool 983 xfs_refc_want_merge_right( 984 const struct xfs_refcount_irec *cright, 985 const struct xfs_refcount_irec *right, 986 enum xfs_refc_adjust_op adjust) 987 { 988 unsigned long long ulen = right->rc_blockcount; 989 xfs_nlink_t new_refcount; 990 991 /* 992 * For a right merge, the right shoulder record must be adjacent to the 993 * end of the range. If this is true, find_right made cright and right 994 * contain valid contents. 995 */ 996 if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) 997 return false; 998 999 /* Right shoulder record refcount must match the new refcount. */ 1000 new_refcount = xfs_refc_merge_refcount(cright, adjust); 1001 if (right->rc_refcount != new_refcount) 1002 return false; 1003 1004 /* 1005 * The new record cannot exceed the max length. ulen is a ULL as the 1006 * individual record block counts can be up to (u32 - 1) in length 1007 * hence we need to catch u32 addition overflows here. 1008 */ 1009 ulen += cright->rc_blockcount; 1010 if (ulen >= XFS_REFC_LEN_MAX) 1011 return false; 1012 1013 return true; 1014 } 1015 1016 /* 1017 * Try to merge with any extents on the boundaries of the adjustment range. 1018 */ 1019 STATIC int 1020 xfs_refcount_merge_extents( 1021 struct xfs_btree_cur *cur, 1022 enum xfs_refc_domain domain, 1023 xfs_agblock_t *agbno, 1024 xfs_extlen_t *aglen, 1025 enum xfs_refc_adjust_op adjust, 1026 bool *shape_changed) 1027 { 1028 struct xfs_refcount_irec left = {0}, cleft = {0}; 1029 struct xfs_refcount_irec cright = {0}, right = {0}; 1030 int error; 1031 unsigned long long ulen; 1032 bool cequal; 1033 1034 *shape_changed = false; 1035 /* 1036 * Find the extent just below agbno [left], just above agbno [cleft], 1037 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 1038 * [right]. 1039 */ 1040 error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, 1041 *agbno, *aglen); 1042 if (error) 1043 return error; 1044 error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, 1045 *agbno, *aglen); 1046 if (error) 1047 return error; 1048 1049 /* No left or right extent to merge; exit. */ 1050 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 1051 return 0; 1052 1053 cequal = (cleft.rc_startblock == cright.rc_startblock) && 1054 (cleft.rc_blockcount == cright.rc_blockcount); 1055 1056 /* Try to merge left, cleft, and right. cleft must == cright. */ 1057 if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, 1058 adjust, &ulen)) { 1059 *shape_changed = true; 1060 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 1061 &right, ulen, aglen); 1062 } 1063 1064 /* Try to merge left and cleft. */ 1065 if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { 1066 *shape_changed = true; 1067 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 1068 agbno, aglen); 1069 if (error) 1070 return error; 1071 1072 /* 1073 * If we just merged left + cleft and cleft == cright, 1074 * we no longer have a cright to merge with right. We're done. 1075 */ 1076 if (cequal) 1077 return 0; 1078 } 1079 1080 /* Try to merge cright and right. */ 1081 if (xfs_refc_want_merge_right(&cright, &right, adjust)) { 1082 *shape_changed = true; 1083 return xfs_refcount_merge_right_extent(cur, &right, &cright, 1084 aglen); 1085 } 1086 1087 return 0; 1088 } 1089 1090 /* 1091 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 1092 * true incorrectly is a shutdown FS; the penalty for guessing false 1093 * incorrectly is more transaction rolls than might be necessary. 1094 * Be conservative here. 1095 */ 1096 static bool 1097 xfs_refcount_still_have_space( 1098 struct xfs_btree_cur *cur) 1099 { 1100 unsigned long overhead; 1101 1102 /* 1103 * Worst case estimate: full splits of the free space and rmap btrees 1104 * to handle each of the shape changes to the refcount btree. 1105 */ 1106 overhead = xfs_allocfree_block_count(cur->bc_mp, 1107 cur->bc_refc.shape_changes); 1108 overhead += cur->bc_maxlevels; 1109 overhead *= cur->bc_mp->m_sb.sb_blocksize; 1110 1111 /* 1112 * Only allow 2 refcount extent updates per transaction if the 1113 * refcount continue update "error" has been injected. 1114 */ 1115 if (cur->bc_refc.nr_ops > 2 && 1116 XFS_TEST_ERROR(cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 1117 return false; 1118 1119 if (cur->bc_refc.nr_ops == 0) 1120 return true; 1121 else if (overhead > cur->bc_tp->t_log_res) 1122 return false; 1123 return cur->bc_tp->t_log_res - overhead > 1124 cur->bc_refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 1125 } 1126 1127 /* Schedule an extent free. */ 1128 static int 1129 xrefc_free_extent( 1130 struct xfs_btree_cur *cur, 1131 struct xfs_refcount_irec *rec) 1132 { 1133 unsigned int flags = 0; 1134 1135 if (xfs_btree_is_rtrefcount(cur->bc_ops)) 1136 flags |= XFS_FREE_EXTENT_REALTIME; 1137 1138 return xfs_free_extent_later(cur->bc_tp, 1139 xfs_gbno_to_fsb(cur->bc_group, rec->rc_startblock), 1140 rec->rc_blockcount, NULL, XFS_AG_RESV_NONE, flags); 1141 } 1142 1143 /* 1144 * Adjust the refcounts of middle extents. At this point we should have 1145 * split extents that crossed the adjustment range; merged with adjacent 1146 * extents; and updated agbno/aglen to reflect the merges. Therefore, 1147 * all we have to do is update the extents inside [agbno, agbno + aglen]. 1148 */ 1149 STATIC int 1150 xfs_refcount_adjust_extents( 1151 struct xfs_btree_cur *cur, 1152 xfs_agblock_t *agbno, 1153 xfs_extlen_t *aglen, 1154 enum xfs_refc_adjust_op adj) 1155 { 1156 struct xfs_refcount_irec ext, tmp; 1157 int error; 1158 int found_rec, found_tmp; 1159 1160 /* Merging did all the work already. */ 1161 if (*aglen == 0) 1162 return 0; 1163 1164 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, 1165 &found_rec); 1166 if (error) 1167 goto out_error; 1168 1169 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 1170 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1171 if (error) 1172 goto out_error; 1173 if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { 1174 ext.rc_startblock = xfs_group_max_blocks(cur->bc_group); 1175 ext.rc_blockcount = 0; 1176 ext.rc_refcount = 0; 1177 ext.rc_domain = XFS_REFC_DOMAIN_SHARED; 1178 } 1179 1180 /* 1181 * Deal with a hole in the refcount tree; if a file maps to 1182 * these blocks and there's no refcountbt record, pretend that 1183 * there is one with refcount == 1. 1184 */ 1185 if (ext.rc_startblock != *agbno) { 1186 tmp.rc_startblock = *agbno; 1187 tmp.rc_blockcount = min(*aglen, 1188 ext.rc_startblock - *agbno); 1189 tmp.rc_refcount = 1 + adj; 1190 tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; 1191 1192 trace_xfs_refcount_modify_extent(cur, &tmp); 1193 1194 /* 1195 * Either cover the hole (increment) or 1196 * delete the range (decrement). 1197 */ 1198 cur->bc_refc.nr_ops++; 1199 if (tmp.rc_refcount) { 1200 error = xfs_refcount_insert(cur, &tmp, 1201 &found_tmp); 1202 if (error) 1203 goto out_error; 1204 if (XFS_IS_CORRUPT(cur->bc_mp, 1205 found_tmp != 1)) { 1206 xfs_btree_mark_sick(cur); 1207 error = -EFSCORRUPTED; 1208 goto out_error; 1209 } 1210 } else { 1211 error = xrefc_free_extent(cur, &tmp); 1212 if (error) 1213 goto out_error; 1214 } 1215 1216 (*agbno) += tmp.rc_blockcount; 1217 (*aglen) -= tmp.rc_blockcount; 1218 1219 /* Stop if there's nothing left to modify */ 1220 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 1221 break; 1222 1223 /* Move the cursor to the start of ext. */ 1224 error = xfs_refcount_lookup_ge(cur, 1225 XFS_REFC_DOMAIN_SHARED, *agbno, 1226 &found_rec); 1227 if (error) 1228 goto out_error; 1229 } 1230 1231 /* 1232 * A previous step trimmed agbno/aglen such that the end of the 1233 * range would not be in the middle of the record. If this is 1234 * no longer the case, something is seriously wrong with the 1235 * btree. Make sure we never feed the synthesized record into 1236 * the processing loop below. 1237 */ 1238 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || 1239 XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { 1240 xfs_btree_mark_sick(cur); 1241 error = -EFSCORRUPTED; 1242 goto out_error; 1243 } 1244 1245 /* 1246 * Adjust the reference count and either update the tree 1247 * (incr) or free the blocks (decr). 1248 */ 1249 if (ext.rc_refcount == XFS_REFC_REFCOUNT_MAX) 1250 goto skip; 1251 ext.rc_refcount += adj; 1252 trace_xfs_refcount_modify_extent(cur, &ext); 1253 cur->bc_refc.nr_ops++; 1254 if (ext.rc_refcount > 1) { 1255 error = xfs_refcount_update(cur, &ext); 1256 if (error) 1257 goto out_error; 1258 } else if (ext.rc_refcount == 1) { 1259 error = xfs_refcount_delete(cur, &found_rec); 1260 if (error) 1261 goto out_error; 1262 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1263 xfs_btree_mark_sick(cur); 1264 error = -EFSCORRUPTED; 1265 goto out_error; 1266 } 1267 goto advloop; 1268 } else { 1269 error = xrefc_free_extent(cur, &ext); 1270 if (error) 1271 goto out_error; 1272 } 1273 1274 skip: 1275 error = xfs_btree_increment(cur, 0, &found_rec); 1276 if (error) 1277 goto out_error; 1278 1279 advloop: 1280 (*agbno) += ext.rc_blockcount; 1281 (*aglen) -= ext.rc_blockcount; 1282 } 1283 1284 return error; 1285 out_error: 1286 trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); 1287 return error; 1288 } 1289 1290 /* Adjust the reference count of a range of AG blocks. */ 1291 STATIC int 1292 xfs_refcount_adjust( 1293 struct xfs_btree_cur *cur, 1294 xfs_agblock_t *agbno, 1295 xfs_extlen_t *aglen, 1296 enum xfs_refc_adjust_op adj) 1297 { 1298 bool shape_changed; 1299 int shape_changes = 0; 1300 int error; 1301 1302 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1303 trace_xfs_refcount_increase(cur, *agbno, *aglen); 1304 else 1305 trace_xfs_refcount_decrease(cur, *agbno, *aglen); 1306 1307 /* 1308 * Ensure that no rcextents cross the boundary of the adjustment range. 1309 */ 1310 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1311 *agbno, &shape_changed); 1312 if (error) 1313 goto out_error; 1314 if (shape_changed) 1315 shape_changes++; 1316 1317 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1318 *agbno + *aglen, &shape_changed); 1319 if (error) 1320 goto out_error; 1321 if (shape_changed) 1322 shape_changes++; 1323 1324 /* 1325 * Try to merge with the left or right extents of the range. 1326 */ 1327 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, 1328 agbno, aglen, adj, &shape_changed); 1329 if (error) 1330 goto out_error; 1331 if (shape_changed) 1332 shape_changes++; 1333 if (shape_changes) 1334 cur->bc_refc.shape_changes++; 1335 1336 /* Now that we've taken care of the ends, adjust the middle extents */ 1337 error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); 1338 if (error) 1339 goto out_error; 1340 1341 return 0; 1342 1343 out_error: 1344 trace_xfs_refcount_adjust_error(cur, error, _RET_IP_); 1345 return error; 1346 } 1347 1348 /* 1349 * Set up a continuation a deferred refcount operation by updating the intent. 1350 * Checks to make sure we're not going to run off the end of the AG. 1351 */ 1352 static inline int 1353 xfs_refcount_continue_op( 1354 struct xfs_btree_cur *cur, 1355 struct xfs_refcount_intent *ri, 1356 xfs_agblock_t new_agbno) 1357 { 1358 struct xfs_mount *mp = cur->bc_mp; 1359 struct xfs_perag *pag = to_perag(cur->bc_group); 1360 1361 if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, 1362 ri->ri_blockcount))) { 1363 xfs_btree_mark_sick(cur); 1364 return -EFSCORRUPTED; 1365 } 1366 1367 ri->ri_startblock = xfs_agbno_to_fsb(pag, new_agbno); 1368 1369 ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); 1370 ASSERT(pag_agno(pag) == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); 1371 1372 return 0; 1373 } 1374 1375 /* 1376 * Process one of the deferred refcount operations. We pass back the 1377 * btree cursor to maintain our lock on the btree between calls. 1378 * This saves time and eliminates a buffer deadlock between the 1379 * superblock and the AGF because we'll always grab them in the same 1380 * order. 1381 */ 1382 int 1383 xfs_refcount_finish_one( 1384 struct xfs_trans *tp, 1385 struct xfs_refcount_intent *ri, 1386 struct xfs_btree_cur **pcur) 1387 { 1388 struct xfs_mount *mp = tp->t_mountp; 1389 struct xfs_btree_cur *rcur = *pcur; 1390 struct xfs_buf *agbp = NULL; 1391 int error = 0; 1392 xfs_agblock_t bno; 1393 unsigned long nr_ops = 0; 1394 int shape_changes = 0; 1395 1396 bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); 1397 1398 trace_xfs_refcount_deferred(mp, ri); 1399 1400 if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1401 return -EIO; 1402 1403 /* 1404 * If we haven't gotten a cursor or the cursor AG doesn't match 1405 * the startblock, get one now. 1406 */ 1407 if (rcur != NULL && rcur->bc_group != ri->ri_group) { 1408 nr_ops = rcur->bc_refc.nr_ops; 1409 shape_changes = rcur->bc_refc.shape_changes; 1410 xfs_btree_del_cursor(rcur, 0); 1411 rcur = NULL; 1412 *pcur = NULL; 1413 } 1414 if (rcur == NULL) { 1415 struct xfs_perag *pag = to_perag(ri->ri_group); 1416 1417 error = xfs_alloc_read_agf(pag, tp, 1418 XFS_ALLOC_FLAG_FREEING, &agbp); 1419 if (error) 1420 return error; 1421 1422 *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1423 rcur->bc_refc.nr_ops = nr_ops; 1424 rcur->bc_refc.shape_changes = shape_changes; 1425 } 1426 1427 switch (ri->ri_type) { 1428 case XFS_REFCOUNT_INCREASE: 1429 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1430 XFS_REFCOUNT_ADJUST_INCREASE); 1431 if (error) 1432 return error; 1433 if (ri->ri_blockcount > 0) 1434 error = xfs_refcount_continue_op(rcur, ri, bno); 1435 break; 1436 case XFS_REFCOUNT_DECREASE: 1437 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1438 XFS_REFCOUNT_ADJUST_DECREASE); 1439 if (error) 1440 return error; 1441 if (ri->ri_blockcount > 0) 1442 error = xfs_refcount_continue_op(rcur, ri, bno); 1443 break; 1444 case XFS_REFCOUNT_ALLOC_COW: 1445 error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); 1446 if (error) 1447 return error; 1448 ri->ri_blockcount = 0; 1449 break; 1450 case XFS_REFCOUNT_FREE_COW: 1451 error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); 1452 if (error) 1453 return error; 1454 ri->ri_blockcount = 0; 1455 break; 1456 default: 1457 ASSERT(0); 1458 return -EFSCORRUPTED; 1459 } 1460 if (!error && ri->ri_blockcount > 0) 1461 trace_xfs_refcount_finish_one_leftover(mp, ri); 1462 return error; 1463 } 1464 1465 /* 1466 * Set up a continuation a deferred rtrefcount operation by updating the 1467 * intent. Checks to make sure we're not going to run off the end of the 1468 * rtgroup. 1469 */ 1470 static inline int 1471 xfs_rtrefcount_continue_op( 1472 struct xfs_btree_cur *cur, 1473 struct xfs_refcount_intent *ri, 1474 xfs_agblock_t new_agbno) 1475 { 1476 struct xfs_mount *mp = cur->bc_mp; 1477 struct xfs_rtgroup *rtg = to_rtg(ri->ri_group); 1478 1479 if (XFS_IS_CORRUPT(mp, !xfs_verify_rgbext(rtg, new_agbno, 1480 ri->ri_blockcount))) { 1481 xfs_btree_mark_sick(cur); 1482 return -EFSCORRUPTED; 1483 } 1484 1485 ri->ri_startblock = xfs_rgbno_to_rtb(rtg, new_agbno); 1486 1487 ASSERT(xfs_verify_rtbext(mp, ri->ri_startblock, ri->ri_blockcount)); 1488 return 0; 1489 } 1490 1491 /* 1492 * Process one of the deferred realtime refcount operations. We pass back the 1493 * btree cursor to maintain our lock on the btree between calls. 1494 */ 1495 int 1496 xfs_rtrefcount_finish_one( 1497 struct xfs_trans *tp, 1498 struct xfs_refcount_intent *ri, 1499 struct xfs_btree_cur **pcur) 1500 { 1501 struct xfs_mount *mp = tp->t_mountp; 1502 struct xfs_rtgroup *rtg = to_rtg(ri->ri_group); 1503 struct xfs_btree_cur *rcur = *pcur; 1504 int error = 0; 1505 xfs_rgblock_t bno; 1506 unsigned long nr_ops = 0; 1507 int shape_changes = 0; 1508 1509 bno = xfs_rtb_to_rgbno(mp, ri->ri_startblock); 1510 1511 trace_xfs_refcount_deferred(mp, ri); 1512 1513 if (XFS_TEST_ERROR(mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1514 return -EIO; 1515 1516 /* 1517 * If we haven't gotten a cursor or the cursor AG doesn't match 1518 * the startblock, get one now. 1519 */ 1520 if (rcur != NULL && rcur->bc_group != ri->ri_group) { 1521 nr_ops = rcur->bc_refc.nr_ops; 1522 shape_changes = rcur->bc_refc.shape_changes; 1523 xfs_btree_del_cursor(rcur, 0); 1524 rcur = NULL; 1525 *pcur = NULL; 1526 } 1527 if (rcur == NULL) { 1528 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_REFCOUNT); 1529 xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_REFCOUNT); 1530 *pcur = rcur = xfs_rtrefcountbt_init_cursor(tp, rtg); 1531 1532 rcur->bc_refc.nr_ops = nr_ops; 1533 rcur->bc_refc.shape_changes = shape_changes; 1534 } 1535 1536 switch (ri->ri_type) { 1537 case XFS_REFCOUNT_INCREASE: 1538 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1539 XFS_REFCOUNT_ADJUST_INCREASE); 1540 if (error) 1541 return error; 1542 if (ri->ri_blockcount > 0) 1543 error = xfs_rtrefcount_continue_op(rcur, ri, bno); 1544 break; 1545 case XFS_REFCOUNT_DECREASE: 1546 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1547 XFS_REFCOUNT_ADJUST_DECREASE); 1548 if (error) 1549 return error; 1550 if (ri->ri_blockcount > 0) 1551 error = xfs_rtrefcount_continue_op(rcur, ri, bno); 1552 break; 1553 case XFS_REFCOUNT_ALLOC_COW: 1554 error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); 1555 if (error) 1556 return error; 1557 ri->ri_blockcount = 0; 1558 break; 1559 case XFS_REFCOUNT_FREE_COW: 1560 error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); 1561 if (error) 1562 return error; 1563 ri->ri_blockcount = 0; 1564 break; 1565 default: 1566 ASSERT(0); 1567 return -EFSCORRUPTED; 1568 } 1569 if (!error && ri->ri_blockcount > 0) 1570 trace_xfs_refcount_finish_one_leftover(mp, ri); 1571 return error; 1572 } 1573 1574 /* 1575 * Record a refcount intent for later processing. 1576 */ 1577 static void 1578 __xfs_refcount_add( 1579 struct xfs_trans *tp, 1580 enum xfs_refcount_intent_type type, 1581 bool isrt, 1582 xfs_fsblock_t startblock, 1583 xfs_extlen_t blockcount) 1584 { 1585 struct xfs_refcount_intent *ri; 1586 1587 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1588 GFP_KERNEL | __GFP_NOFAIL); 1589 INIT_LIST_HEAD(&ri->ri_list); 1590 ri->ri_type = type; 1591 ri->ri_startblock = startblock; 1592 ri->ri_blockcount = blockcount; 1593 ri->ri_realtime = isrt; 1594 1595 xfs_refcount_defer_add(tp, ri); 1596 } 1597 1598 /* 1599 * Increase the reference count of the blocks backing a file's extent. 1600 */ 1601 void 1602 xfs_refcount_increase_extent( 1603 struct xfs_trans *tp, 1604 bool isrt, 1605 struct xfs_bmbt_irec *PREV) 1606 { 1607 if (!xfs_has_reflink(tp->t_mountp)) 1608 return; 1609 1610 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, isrt, PREV->br_startblock, 1611 PREV->br_blockcount); 1612 } 1613 1614 /* 1615 * Decrease the reference count of the blocks backing a file's extent. 1616 */ 1617 void 1618 xfs_refcount_decrease_extent( 1619 struct xfs_trans *tp, 1620 bool isrt, 1621 struct xfs_bmbt_irec *PREV) 1622 { 1623 if (!xfs_has_reflink(tp->t_mountp)) 1624 return; 1625 1626 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, isrt, PREV->br_startblock, 1627 PREV->br_blockcount); 1628 } 1629 1630 /* 1631 * Given an AG extent, find the lowest-numbered run of shared blocks 1632 * within that range and return the range in fbno/flen. If 1633 * find_end_of_shared is set, return the longest contiguous extent of 1634 * shared blocks; if not, just return the first extent we find. If no 1635 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1636 * and 0, respectively. 1637 */ 1638 int 1639 xfs_refcount_find_shared( 1640 struct xfs_btree_cur *cur, 1641 xfs_agblock_t agbno, 1642 xfs_extlen_t aglen, 1643 xfs_agblock_t *fbno, 1644 xfs_extlen_t *flen, 1645 bool find_end_of_shared) 1646 { 1647 struct xfs_refcount_irec tmp; 1648 int i; 1649 int have; 1650 int error; 1651 1652 trace_xfs_refcount_find_shared(cur, agbno, aglen); 1653 1654 /* By default, skip the whole range */ 1655 *fbno = NULLAGBLOCK; 1656 *flen = 0; 1657 1658 /* Try to find a refcount extent that crosses the start */ 1659 error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, 1660 &have); 1661 if (error) 1662 goto out_error; 1663 if (!have) { 1664 /* No left extent, look at the next one */ 1665 error = xfs_btree_increment(cur, 0, &have); 1666 if (error) 1667 goto out_error; 1668 if (!have) 1669 goto done; 1670 } 1671 error = xfs_refcount_get_rec(cur, &tmp, &i); 1672 if (error) 1673 goto out_error; 1674 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1675 xfs_btree_mark_sick(cur); 1676 error = -EFSCORRUPTED; 1677 goto out_error; 1678 } 1679 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1680 goto done; 1681 1682 /* If the extent ends before the start, look at the next one */ 1683 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1684 error = xfs_btree_increment(cur, 0, &have); 1685 if (error) 1686 goto out_error; 1687 if (!have) 1688 goto done; 1689 error = xfs_refcount_get_rec(cur, &tmp, &i); 1690 if (error) 1691 goto out_error; 1692 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1693 xfs_btree_mark_sick(cur); 1694 error = -EFSCORRUPTED; 1695 goto out_error; 1696 } 1697 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1698 goto done; 1699 } 1700 1701 /* If the extent starts after the range we want, bail out */ 1702 if (tmp.rc_startblock >= agbno + aglen) 1703 goto done; 1704 1705 /* We found the start of a shared extent! */ 1706 if (tmp.rc_startblock < agbno) { 1707 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1708 tmp.rc_startblock = agbno; 1709 } 1710 1711 *fbno = tmp.rc_startblock; 1712 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1713 if (!find_end_of_shared) 1714 goto done; 1715 1716 /* Otherwise, find the end of this shared extent */ 1717 while (*fbno + *flen < agbno + aglen) { 1718 error = xfs_btree_increment(cur, 0, &have); 1719 if (error) 1720 goto out_error; 1721 if (!have) 1722 break; 1723 error = xfs_refcount_get_rec(cur, &tmp, &i); 1724 if (error) 1725 goto out_error; 1726 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1727 xfs_btree_mark_sick(cur); 1728 error = -EFSCORRUPTED; 1729 goto out_error; 1730 } 1731 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || 1732 tmp.rc_startblock >= agbno + aglen || 1733 tmp.rc_startblock != *fbno + *flen) 1734 break; 1735 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1736 } 1737 1738 done: 1739 trace_xfs_refcount_find_shared_result(cur, *fbno, *flen); 1740 1741 out_error: 1742 if (error) 1743 trace_xfs_refcount_find_shared_error(cur, error, _RET_IP_); 1744 return error; 1745 } 1746 1747 /* 1748 * Recovering CoW Blocks After a Crash 1749 * 1750 * Due to the way that the copy on write mechanism works, there's a window of 1751 * opportunity in which we can lose track of allocated blocks during a crash. 1752 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1753 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1754 * no longer in the free space btree but are not otherwise recorded anywhere 1755 * until the write completes and the blocks are mapped into the file. A crash 1756 * in between allocation and remapping results in the replacement blocks being 1757 * lost. This situation is exacerbated by the CoW extent size hint because 1758 * allocations can hang around for long time. 1759 * 1760 * However, there is a place where we can record these allocations before they 1761 * become mappings -- the reference count btree. The btree does not record 1762 * extents with refcount == 1, so we can record allocations with a refcount of 1763 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1764 * no conflict with shared block records. These mappings should be created 1765 * when we allocate blocks to the CoW fork and deleted when they're removed 1766 * from the CoW fork. 1767 * 1768 * Minor nit: records for in-progress CoW allocations and records for shared 1769 * extents must never be merged, to preserve the property that (except for CoW 1770 * allocations) there are no refcount btree entries with refcount == 1. The 1771 * only time this could potentially happen is when unsharing a block that's 1772 * adjacent to CoW allocations, so we must be careful to avoid this. 1773 * 1774 * At mount time we recover lost CoW allocations by searching the refcount 1775 * btree for these refcount == 1 mappings. These represent CoW allocations 1776 * that were in progress at the time the filesystem went down, so we can free 1777 * them to get the space back. 1778 * 1779 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1780 * several reasons -- first, EFIs pin the tail of the log and would have to be 1781 * periodically relogged to avoid filling up the log. Second, CoW completions 1782 * will have to file an EFD and create new EFIs for whatever remains in the 1783 * CoW fork; this partially takes care of (1) but extent-size reservations 1784 * will have to periodically relog even if there's no writeout in progress. 1785 * This can happen if the CoW extent size hint is set, which you really want. 1786 * Third, EFIs cannot currently be automatically relogged into newer 1787 * transactions to advance the log tail. Fourth, stuffing the log full of 1788 * EFIs places an upper bound on the number of CoW allocations that can be 1789 * held filesystem-wide at any given time. Recording them in the refcount 1790 * btree doesn't require us to maintain any state in memory and doesn't pin 1791 * the log. 1792 */ 1793 /* 1794 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1795 * in that they're not referenced anywhere else in the filesystem, so we 1796 * stash them in the refcount btree with a refcount of 1 until either file 1797 * remapping (or CoW cancellation) happens. 1798 */ 1799 STATIC int 1800 xfs_refcount_adjust_cow_extents( 1801 struct xfs_btree_cur *cur, 1802 xfs_agblock_t agbno, 1803 xfs_extlen_t aglen, 1804 enum xfs_refc_adjust_op adj) 1805 { 1806 struct xfs_refcount_irec ext, tmp; 1807 int error; 1808 int found_rec, found_tmp; 1809 1810 if (aglen == 0) 1811 return 0; 1812 1813 /* Find any overlapping refcount records */ 1814 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, 1815 &found_rec); 1816 if (error) 1817 goto out_error; 1818 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1819 if (error) 1820 goto out_error; 1821 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && 1822 ext.rc_domain != XFS_REFC_DOMAIN_COW)) { 1823 xfs_btree_mark_sick(cur); 1824 error = -EFSCORRUPTED; 1825 goto out_error; 1826 } 1827 if (!found_rec) { 1828 ext.rc_startblock = xfs_group_max_blocks(cur->bc_group); 1829 ext.rc_blockcount = 0; 1830 ext.rc_refcount = 0; 1831 ext.rc_domain = XFS_REFC_DOMAIN_COW; 1832 } 1833 1834 switch (adj) { 1835 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1836 /* Adding a CoW reservation, there should be nothing here. */ 1837 if (XFS_IS_CORRUPT(cur->bc_mp, 1838 agbno + aglen > ext.rc_startblock)) { 1839 xfs_btree_mark_sick(cur); 1840 error = -EFSCORRUPTED; 1841 goto out_error; 1842 } 1843 1844 tmp.rc_startblock = agbno; 1845 tmp.rc_blockcount = aglen; 1846 tmp.rc_refcount = 1; 1847 tmp.rc_domain = XFS_REFC_DOMAIN_COW; 1848 1849 trace_xfs_refcount_modify_extent(cur, &tmp); 1850 1851 error = xfs_refcount_insert(cur, &tmp, 1852 &found_tmp); 1853 if (error) 1854 goto out_error; 1855 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1856 xfs_btree_mark_sick(cur); 1857 error = -EFSCORRUPTED; 1858 goto out_error; 1859 } 1860 break; 1861 case XFS_REFCOUNT_ADJUST_COW_FREE: 1862 /* Removing a CoW reservation, there should be one extent. */ 1863 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1864 xfs_btree_mark_sick(cur); 1865 error = -EFSCORRUPTED; 1866 goto out_error; 1867 } 1868 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1869 xfs_btree_mark_sick(cur); 1870 error = -EFSCORRUPTED; 1871 goto out_error; 1872 } 1873 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1874 xfs_btree_mark_sick(cur); 1875 error = -EFSCORRUPTED; 1876 goto out_error; 1877 } 1878 1879 ext.rc_refcount = 0; 1880 trace_xfs_refcount_modify_extent(cur, &ext); 1881 error = xfs_refcount_delete(cur, &found_rec); 1882 if (error) 1883 goto out_error; 1884 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1885 xfs_btree_mark_sick(cur); 1886 error = -EFSCORRUPTED; 1887 goto out_error; 1888 } 1889 break; 1890 default: 1891 ASSERT(0); 1892 } 1893 1894 return error; 1895 out_error: 1896 trace_xfs_refcount_modify_extent_error(cur, error, _RET_IP_); 1897 return error; 1898 } 1899 1900 /* 1901 * Add or remove refcount btree entries for CoW reservations. 1902 */ 1903 STATIC int 1904 xfs_refcount_adjust_cow( 1905 struct xfs_btree_cur *cur, 1906 xfs_agblock_t agbno, 1907 xfs_extlen_t aglen, 1908 enum xfs_refc_adjust_op adj) 1909 { 1910 bool shape_changed; 1911 int error; 1912 1913 /* 1914 * Ensure that no rcextents cross the boundary of the adjustment range. 1915 */ 1916 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1917 agbno, &shape_changed); 1918 if (error) 1919 goto out_error; 1920 1921 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1922 agbno + aglen, &shape_changed); 1923 if (error) 1924 goto out_error; 1925 1926 /* 1927 * Try to merge with the left or right extents of the range. 1928 */ 1929 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, 1930 &aglen, adj, &shape_changed); 1931 if (error) 1932 goto out_error; 1933 1934 /* Now that we've taken care of the ends, adjust the middle extents */ 1935 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1936 if (error) 1937 goto out_error; 1938 1939 return 0; 1940 1941 out_error: 1942 trace_xfs_refcount_adjust_cow_error(cur, error, _RET_IP_); 1943 return error; 1944 } 1945 1946 /* 1947 * Record a CoW allocation in the refcount btree. 1948 */ 1949 STATIC int 1950 __xfs_refcount_cow_alloc( 1951 struct xfs_btree_cur *rcur, 1952 xfs_agblock_t agbno, 1953 xfs_extlen_t aglen) 1954 { 1955 trace_xfs_refcount_cow_increase(rcur, agbno, aglen); 1956 1957 /* Add refcount btree reservation */ 1958 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1959 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1960 } 1961 1962 /* 1963 * Remove a CoW allocation from the refcount btree. 1964 */ 1965 STATIC int 1966 __xfs_refcount_cow_free( 1967 struct xfs_btree_cur *rcur, 1968 xfs_agblock_t agbno, 1969 xfs_extlen_t aglen) 1970 { 1971 trace_xfs_refcount_cow_decrease(rcur, agbno, aglen); 1972 1973 /* Remove refcount btree reservation */ 1974 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1975 XFS_REFCOUNT_ADJUST_COW_FREE); 1976 } 1977 1978 /* Record a CoW staging extent in the refcount btree. */ 1979 void 1980 xfs_refcount_alloc_cow_extent( 1981 struct xfs_trans *tp, 1982 bool isrt, 1983 xfs_fsblock_t fsb, 1984 xfs_extlen_t len) 1985 { 1986 struct xfs_mount *mp = tp->t_mountp; 1987 1988 if (!xfs_has_reflink(mp)) 1989 return; 1990 1991 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, isrt, fsb, len); 1992 1993 /* Add rmap entry */ 1994 xfs_rmap_alloc_extent(tp, isrt, fsb, len, XFS_RMAP_OWN_COW); 1995 } 1996 1997 /* Forget a CoW staging event in the refcount btree. */ 1998 void 1999 xfs_refcount_free_cow_extent( 2000 struct xfs_trans *tp, 2001 bool isrt, 2002 xfs_fsblock_t fsb, 2003 xfs_extlen_t len) 2004 { 2005 struct xfs_mount *mp = tp->t_mountp; 2006 2007 if (!xfs_has_reflink(mp)) 2008 return; 2009 2010 /* Remove rmap entry */ 2011 xfs_rmap_free_extent(tp, isrt, fsb, len, XFS_RMAP_OWN_COW); 2012 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, isrt, fsb, len); 2013 } 2014 2015 struct xfs_refcount_recovery { 2016 struct list_head rr_list; 2017 struct xfs_refcount_irec rr_rrec; 2018 }; 2019 2020 /* Stuff an extent on the recovery list. */ 2021 STATIC int 2022 xfs_refcount_recover_extent( 2023 struct xfs_btree_cur *cur, 2024 const union xfs_btree_rec *rec, 2025 void *priv) 2026 { 2027 struct list_head *debris = priv; 2028 struct xfs_refcount_recovery *rr; 2029 2030 if (XFS_IS_CORRUPT(cur->bc_mp, 2031 be32_to_cpu(rec->refc.rc_refcount) != 1)) { 2032 xfs_btree_mark_sick(cur); 2033 return -EFSCORRUPTED; 2034 } 2035 2036 rr = kmalloc(sizeof(struct xfs_refcount_recovery), 2037 GFP_KERNEL | __GFP_NOFAIL); 2038 INIT_LIST_HEAD(&rr->rr_list); 2039 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 2040 2041 if (xfs_refcount_check_btrec(cur, &rr->rr_rrec) != NULL || 2042 XFS_IS_CORRUPT(cur->bc_mp, 2043 rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { 2044 xfs_btree_mark_sick(cur); 2045 kfree(rr); 2046 return -EFSCORRUPTED; 2047 } 2048 2049 list_add_tail(&rr->rr_list, debris); 2050 return 0; 2051 } 2052 2053 /* Find and remove leftover CoW reservations. */ 2054 int 2055 xfs_refcount_recover_cow_leftovers( 2056 struct xfs_group *xg) 2057 { 2058 struct xfs_mount *mp = xg->xg_mount; 2059 bool isrt = xg->xg_type == XG_TYPE_RTG; 2060 struct xfs_trans *tp; 2061 struct xfs_btree_cur *cur; 2062 struct xfs_buf *agbp = NULL; 2063 struct xfs_refcount_recovery *rr, *n; 2064 struct list_head debris; 2065 union xfs_btree_irec low = { 2066 .rc.rc_domain = XFS_REFC_DOMAIN_COW, 2067 }; 2068 union xfs_btree_irec high = { 2069 .rc.rc_domain = XFS_REFC_DOMAIN_COW, 2070 .rc.rc_startblock = -1U, 2071 }; 2072 xfs_fsblock_t fsb; 2073 int error; 2074 2075 /* reflink filesystems must not have groups larger than 2^31-1 blocks */ 2076 BUILD_BUG_ON(XFS_MAX_RGBLOCKS >= XFS_REFC_COWFLAG); 2077 BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); 2078 2079 if (isrt) { 2080 if (!xfs_has_rtgroups(mp)) 2081 return 0; 2082 if (xfs_group_max_blocks(xg) >= XFS_MAX_RGBLOCKS) 2083 return -EOPNOTSUPP; 2084 } else { 2085 if (xfs_group_max_blocks(xg) > XFS_MAX_CRC_AG_BLOCKS) 2086 return -EOPNOTSUPP; 2087 } 2088 2089 INIT_LIST_HEAD(&debris); 2090 2091 /* 2092 * In this first part, we use an empty transaction to gather up 2093 * all the leftover CoW extents so that we can subsequently 2094 * delete them. The empty transaction is used to avoid 2095 * a buffer lock deadlock if there happens to be a loop in the 2096 * refcountbt because we're allowed to re-grab a buffer that is 2097 * already attached to our transaction. When we're done 2098 * recording the CoW debris we cancel the (empty) transaction 2099 * and everything goes away cleanly. 2100 */ 2101 tp = xfs_trans_alloc_empty(mp); 2102 2103 if (isrt) { 2104 xfs_rtgroup_lock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); 2105 cur = xfs_rtrefcountbt_init_cursor(tp, to_rtg(xg)); 2106 } else { 2107 error = xfs_alloc_read_agf(to_perag(xg), tp, 0, &agbp); 2108 if (error) 2109 goto out_trans; 2110 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, to_perag(xg)); 2111 } 2112 2113 /* Find all the leftover CoW staging extents. */ 2114 error = xfs_btree_query_range(cur, &low, &high, 2115 xfs_refcount_recover_extent, &debris); 2116 xfs_btree_del_cursor(cur, error); 2117 if (agbp) 2118 xfs_trans_brelse(tp, agbp); 2119 else 2120 xfs_rtgroup_unlock(to_rtg(xg), XFS_RTGLOCK_REFCOUNT); 2121 xfs_trans_cancel(tp); 2122 if (error) 2123 goto out_free; 2124 2125 /* Now iterate the list to free the leftovers */ 2126 list_for_each_entry_safe(rr, n, &debris, rr_list) { 2127 /* Set up transaction. */ 2128 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 2129 if (error) 2130 goto out_free; 2131 2132 /* Free the orphan record */ 2133 fsb = xfs_gbno_to_fsb(xg, rr->rr_rrec.rc_startblock); 2134 xfs_refcount_free_cow_extent(tp, isrt, fsb, 2135 rr->rr_rrec.rc_blockcount); 2136 2137 /* Free the block. */ 2138 error = xfs_free_extent_later(tp, fsb, 2139 rr->rr_rrec.rc_blockcount, NULL, 2140 XFS_AG_RESV_NONE, 2141 isrt ? XFS_FREE_EXTENT_REALTIME : 0); 2142 if (error) 2143 goto out_trans; 2144 2145 error = xfs_trans_commit(tp); 2146 if (error) 2147 goto out_free; 2148 2149 list_del(&rr->rr_list); 2150 kfree(rr); 2151 } 2152 2153 return error; 2154 out_trans: 2155 xfs_trans_cancel(tp); 2156 out_free: 2157 /* Free the leftover list */ 2158 list_for_each_entry_safe(rr, n, &debris, rr_list) { 2159 list_del(&rr->rr_list); 2160 kfree(rr); 2161 } 2162 return error; 2163 } 2164 2165 /* 2166 * Scan part of the keyspace of the refcount records and tell us if the area 2167 * has no records, is fully mapped by records, or is partially filled. 2168 */ 2169 int 2170 xfs_refcount_has_records( 2171 struct xfs_btree_cur *cur, 2172 enum xfs_refc_domain domain, 2173 xfs_agblock_t bno, 2174 xfs_extlen_t len, 2175 enum xbtree_recpacking *outcome) 2176 { 2177 union xfs_btree_irec low; 2178 union xfs_btree_irec high; 2179 2180 memset(&low, 0, sizeof(low)); 2181 low.rc.rc_startblock = bno; 2182 memset(&high, 0xFF, sizeof(high)); 2183 high.rc.rc_startblock = bno + len - 1; 2184 low.rc.rc_domain = high.rc.rc_domain = domain; 2185 2186 return xfs_btree_has_records(cur, &low, &high, NULL, outcome); 2187 } 2188 2189 struct xfs_refcount_query_range_info { 2190 xfs_refcount_query_range_fn fn; 2191 void *priv; 2192 }; 2193 2194 /* Format btree record and pass to our callback. */ 2195 STATIC int 2196 xfs_refcount_query_range_helper( 2197 struct xfs_btree_cur *cur, 2198 const union xfs_btree_rec *rec, 2199 void *priv) 2200 { 2201 struct xfs_refcount_query_range_info *query = priv; 2202 struct xfs_refcount_irec irec; 2203 xfs_failaddr_t fa; 2204 2205 xfs_refcount_btrec_to_irec(rec, &irec); 2206 fa = xfs_refcount_check_btrec(cur, &irec); 2207 if (fa) 2208 return xfs_refcount_complain_bad_rec(cur, fa, &irec); 2209 2210 return query->fn(cur, &irec, query->priv); 2211 } 2212 2213 /* Find all refcount records between two keys. */ 2214 int 2215 xfs_refcount_query_range( 2216 struct xfs_btree_cur *cur, 2217 const struct xfs_refcount_irec *low_rec, 2218 const struct xfs_refcount_irec *high_rec, 2219 xfs_refcount_query_range_fn fn, 2220 void *priv) 2221 { 2222 union xfs_btree_irec low_brec = { .rc = *low_rec }; 2223 union xfs_btree_irec high_brec = { .rc = *high_rec }; 2224 struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn }; 2225 2226 return xfs_btree_query_range(cur, &low_brec, &high_brec, 2227 xfs_refcount_query_range_helper, &query); 2228 } 2229 2230 int __init 2231 xfs_refcount_intent_init_cache(void) 2232 { 2233 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 2234 sizeof(struct xfs_refcount_intent), 2235 0, 0, NULL); 2236 2237 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 2238 } 2239 2240 void 2241 xfs_refcount_intent_destroy_cache(void) 2242 { 2243 kmem_cache_destroy(xfs_refcount_intent_cache); 2244 xfs_refcount_intent_cache = NULL; 2245 } 2246