1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 #include "xfs_health.h" 27 28 struct kmem_cache *xfs_refcount_intent_cache; 29 30 /* Allowable refcount adjustment amounts. */ 31 enum xfs_refc_adjust_op { 32 XFS_REFCOUNT_ADJUST_INCREASE = 1, 33 XFS_REFCOUNT_ADJUST_DECREASE = -1, 34 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 35 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 36 }; 37 38 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 39 xfs_agblock_t agbno, xfs_extlen_t aglen); 40 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 41 xfs_agblock_t agbno, xfs_extlen_t aglen); 42 43 /* 44 * Look up the first record less than or equal to [bno, len] in the btree 45 * given by cur. 46 */ 47 int 48 xfs_refcount_lookup_le( 49 struct xfs_btree_cur *cur, 50 enum xfs_refc_domain domain, 51 xfs_agblock_t bno, 52 int *stat) 53 { 54 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 55 xfs_refcount_encode_startblock(bno, domain), 56 XFS_LOOKUP_LE); 57 cur->bc_rec.rc.rc_startblock = bno; 58 cur->bc_rec.rc.rc_blockcount = 0; 59 cur->bc_rec.rc.rc_domain = domain; 60 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 61 } 62 63 /* 64 * Look up the first record greater than or equal to [bno, len] in the btree 65 * given by cur. 66 */ 67 int 68 xfs_refcount_lookup_ge( 69 struct xfs_btree_cur *cur, 70 enum xfs_refc_domain domain, 71 xfs_agblock_t bno, 72 int *stat) 73 { 74 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 75 xfs_refcount_encode_startblock(bno, domain), 76 XFS_LOOKUP_GE); 77 cur->bc_rec.rc.rc_startblock = bno; 78 cur->bc_rec.rc.rc_blockcount = 0; 79 cur->bc_rec.rc.rc_domain = domain; 80 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 81 } 82 83 /* 84 * Look up the first record equal to [bno, len] in the btree 85 * given by cur. 86 */ 87 int 88 xfs_refcount_lookup_eq( 89 struct xfs_btree_cur *cur, 90 enum xfs_refc_domain domain, 91 xfs_agblock_t bno, 92 int *stat) 93 { 94 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 95 xfs_refcount_encode_startblock(bno, domain), 96 XFS_LOOKUP_LE); 97 cur->bc_rec.rc.rc_startblock = bno; 98 cur->bc_rec.rc.rc_blockcount = 0; 99 cur->bc_rec.rc.rc_domain = domain; 100 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 101 } 102 103 /* Convert on-disk record to in-core format. */ 104 void 105 xfs_refcount_btrec_to_irec( 106 const union xfs_btree_rec *rec, 107 struct xfs_refcount_irec *irec) 108 { 109 uint32_t start; 110 111 start = be32_to_cpu(rec->refc.rc_startblock); 112 if (start & XFS_REFC_COWFLAG) { 113 start &= ~XFS_REFC_COWFLAG; 114 irec->rc_domain = XFS_REFC_DOMAIN_COW; 115 } else { 116 irec->rc_domain = XFS_REFC_DOMAIN_SHARED; 117 } 118 119 irec->rc_startblock = start; 120 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 121 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 122 } 123 124 /* Simple checks for refcount records. */ 125 xfs_failaddr_t 126 xfs_refcount_check_irec( 127 struct xfs_perag *pag, 128 const struct xfs_refcount_irec *irec) 129 { 130 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 131 return __this_address; 132 133 if (!xfs_refcount_check_domain(irec)) 134 return __this_address; 135 136 /* check for valid extent range, including overflow */ 137 if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) 138 return __this_address; 139 140 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 141 return __this_address; 142 143 return NULL; 144 } 145 146 static inline int 147 xfs_refcount_complain_bad_rec( 148 struct xfs_btree_cur *cur, 149 xfs_failaddr_t fa, 150 const struct xfs_refcount_irec *irec) 151 { 152 struct xfs_mount *mp = cur->bc_mp; 153 154 xfs_warn(mp, 155 "Refcount BTree record corruption in AG %d detected at %pS!", 156 cur->bc_ag.pag->pag_agno, fa); 157 xfs_warn(mp, 158 "Start block 0x%x, block count 0x%x, references 0x%x", 159 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 160 xfs_btree_mark_sick(cur); 161 return -EFSCORRUPTED; 162 } 163 164 /* 165 * Get the data from the pointed-to record. 166 */ 167 int 168 xfs_refcount_get_rec( 169 struct xfs_btree_cur *cur, 170 struct xfs_refcount_irec *irec, 171 int *stat) 172 { 173 union xfs_btree_rec *rec; 174 xfs_failaddr_t fa; 175 int error; 176 177 error = xfs_btree_get_rec(cur, &rec, stat); 178 if (error || !*stat) 179 return error; 180 181 xfs_refcount_btrec_to_irec(rec, irec); 182 fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec); 183 if (fa) 184 return xfs_refcount_complain_bad_rec(cur, fa, irec); 185 186 trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 187 return 0; 188 } 189 190 /* 191 * Update the record referred to by cur to the value given 192 * by [bno, len, refcount]. 193 * This either works (return 0) or gets an EFSCORRUPTED error. 194 */ 195 STATIC int 196 xfs_refcount_update( 197 struct xfs_btree_cur *cur, 198 struct xfs_refcount_irec *irec) 199 { 200 union xfs_btree_rec rec; 201 uint32_t start; 202 int error; 203 204 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 205 206 start = xfs_refcount_encode_startblock(irec->rc_startblock, 207 irec->rc_domain); 208 rec.refc.rc_startblock = cpu_to_be32(start); 209 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 210 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 211 212 error = xfs_btree_update(cur, &rec); 213 if (error) 214 trace_xfs_refcount_update_error(cur->bc_mp, 215 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 216 return error; 217 } 218 219 /* 220 * Insert the record referred to by cur to the value given 221 * by [bno, len, refcount]. 222 * This either works (return 0) or gets an EFSCORRUPTED error. 223 */ 224 int 225 xfs_refcount_insert( 226 struct xfs_btree_cur *cur, 227 struct xfs_refcount_irec *irec, 228 int *i) 229 { 230 int error; 231 232 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 233 234 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 235 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 236 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 237 cur->bc_rec.rc.rc_domain = irec->rc_domain; 238 239 error = xfs_btree_insert(cur, i); 240 if (error) 241 goto out_error; 242 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 243 xfs_btree_mark_sick(cur); 244 error = -EFSCORRUPTED; 245 goto out_error; 246 } 247 248 out_error: 249 if (error) 250 trace_xfs_refcount_insert_error(cur->bc_mp, 251 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 252 return error; 253 } 254 255 /* 256 * Remove the record referred to by cur, then set the pointer to the spot 257 * where the record could be re-inserted, in case we want to increment or 258 * decrement the cursor. 259 * This either works (return 0) or gets an EFSCORRUPTED error. 260 */ 261 STATIC int 262 xfs_refcount_delete( 263 struct xfs_btree_cur *cur, 264 int *i) 265 { 266 struct xfs_refcount_irec irec; 267 int found_rec; 268 int error; 269 270 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 271 if (error) 272 goto out_error; 273 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 274 xfs_btree_mark_sick(cur); 275 error = -EFSCORRUPTED; 276 goto out_error; 277 } 278 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); 279 error = xfs_btree_delete(cur, i); 280 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 281 xfs_btree_mark_sick(cur); 282 error = -EFSCORRUPTED; 283 goto out_error; 284 } 285 if (error) 286 goto out_error; 287 error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, 288 &found_rec); 289 out_error: 290 if (error) 291 trace_xfs_refcount_delete_error(cur->bc_mp, 292 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 293 return error; 294 } 295 296 /* 297 * Adjusting the Reference Count 298 * 299 * As stated elsewhere, the reference count btree (refcbt) stores 300 * >1 reference counts for extents of physical blocks. In this 301 * operation, we're either raising or lowering the reference count of 302 * some subrange stored in the tree: 303 * 304 * <------ adjustment range ------> 305 * ----+ +---+-----+ +--+--------+--------- 306 * 2 | | 3 | 4 | |17| 55 | 10 307 * ----+ +---+-----+ +--+--------+--------- 308 * X axis is physical blocks number; 309 * reference counts are the numbers inside the rectangles 310 * 311 * The first thing we need to do is to ensure that there are no 312 * refcount extents crossing either boundary of the range to be 313 * adjusted. For any extent that does cross a boundary, split it into 314 * two extents so that we can increment the refcount of one of the 315 * pieces later: 316 * 317 * <------ adjustment range ------> 318 * ----+ +---+-----+ +--+--------+----+---- 319 * 2 | | 3 | 2 | |17| 55 | 10 | 10 320 * ----+ +---+-----+ +--+--------+----+---- 321 * 322 * For this next step, let's assume that all the physical blocks in 323 * the adjustment range are mapped to a file and are therefore in use 324 * at least once. Therefore, we can infer that any gap in the 325 * refcount tree within the adjustment range represents a physical 326 * extent with refcount == 1: 327 * 328 * <------ adjustment range ------> 329 * ----+---+---+-----+-+--+--------+----+---- 330 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 331 * ----+---+---+-----+-+--+--------+----+---- 332 * ^ 333 * 334 * For each extent that falls within the interval range, figure out 335 * which extent is to the left or the right of that extent. Now we 336 * have a left, current, and right extent. If the new reference count 337 * of the center extent enables us to merge left, center, and right 338 * into one record covering all three, do so. If the center extent is 339 * at the left end of the range, abuts the left extent, and its new 340 * reference count matches the left extent's record, then merge them. 341 * If the center extent is at the right end of the range, abuts the 342 * right extent, and the reference counts match, merge those. In the 343 * example, we can left merge (assuming an increment operation): 344 * 345 * <------ adjustment range ------> 346 * --------+---+-----+-+--+--------+----+---- 347 * 2 | 3 | 2 |1|17| 55 | 10 | 10 348 * --------+---+-----+-+--+--------+----+---- 349 * ^ 350 * 351 * For all other extents within the range, adjust the reference count 352 * or delete it if the refcount falls below 2. If we were 353 * incrementing, the end result looks like this: 354 * 355 * <------ adjustment range ------> 356 * --------+---+-----+-+--+--------+----+---- 357 * 2 | 4 | 3 |2|18| 56 | 11 | 10 358 * --------+---+-----+-+--+--------+----+---- 359 * 360 * The result of a decrement operation looks as such: 361 * 362 * <------ adjustment range ------> 363 * ----+ +---+ +--+--------+----+---- 364 * 2 | | 2 | |16| 54 | 9 | 10 365 * ----+ +---+ +--+--------+----+---- 366 * DDDD 111111DD 367 * 368 * The blocks marked "D" are freed; the blocks marked "1" are only 369 * referenced once and therefore the record is removed from the 370 * refcount btree. 371 */ 372 373 /* Next block after this extent. */ 374 static inline xfs_agblock_t 375 xfs_refc_next( 376 struct xfs_refcount_irec *rc) 377 { 378 return rc->rc_startblock + rc->rc_blockcount; 379 } 380 381 /* 382 * Split a refcount extent that crosses agbno. 383 */ 384 STATIC int 385 xfs_refcount_split_extent( 386 struct xfs_btree_cur *cur, 387 enum xfs_refc_domain domain, 388 xfs_agblock_t agbno, 389 bool *shape_changed) 390 { 391 struct xfs_refcount_irec rcext, tmp; 392 int found_rec; 393 int error; 394 395 *shape_changed = false; 396 error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); 397 if (error) 398 goto out_error; 399 if (!found_rec) 400 return 0; 401 402 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 403 if (error) 404 goto out_error; 405 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 406 xfs_btree_mark_sick(cur); 407 error = -EFSCORRUPTED; 408 goto out_error; 409 } 410 if (rcext.rc_domain != domain) 411 return 0; 412 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 413 return 0; 414 415 *shape_changed = true; 416 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 417 &rcext, agbno); 418 419 /* Establish the right extent. */ 420 tmp = rcext; 421 tmp.rc_startblock = agbno; 422 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 423 error = xfs_refcount_update(cur, &tmp); 424 if (error) 425 goto out_error; 426 427 /* Insert the left extent. */ 428 tmp = rcext; 429 tmp.rc_blockcount = agbno - rcext.rc_startblock; 430 error = xfs_refcount_insert(cur, &tmp, &found_rec); 431 if (error) 432 goto out_error; 433 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 434 xfs_btree_mark_sick(cur); 435 error = -EFSCORRUPTED; 436 goto out_error; 437 } 438 return error; 439 440 out_error: 441 trace_xfs_refcount_split_extent_error(cur->bc_mp, 442 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 443 return error; 444 } 445 446 /* 447 * Merge the left, center, and right extents. 448 */ 449 STATIC int 450 xfs_refcount_merge_center_extents( 451 struct xfs_btree_cur *cur, 452 struct xfs_refcount_irec *left, 453 struct xfs_refcount_irec *center, 454 struct xfs_refcount_irec *right, 455 unsigned long long extlen, 456 xfs_extlen_t *aglen) 457 { 458 int error; 459 int found_rec; 460 461 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 462 cur->bc_ag.pag->pag_agno, left, center, right); 463 464 ASSERT(left->rc_domain == center->rc_domain); 465 ASSERT(right->rc_domain == center->rc_domain); 466 467 /* 468 * Make sure the center and right extents are not in the btree. 469 * If the center extent was synthesized, the first delete call 470 * removes the right extent and we skip the second deletion. 471 * If center and right were in the btree, then the first delete 472 * call removes the center and the second one removes the right 473 * extent. 474 */ 475 error = xfs_refcount_lookup_ge(cur, center->rc_domain, 476 center->rc_startblock, &found_rec); 477 if (error) 478 goto out_error; 479 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 480 xfs_btree_mark_sick(cur); 481 error = -EFSCORRUPTED; 482 goto out_error; 483 } 484 485 error = xfs_refcount_delete(cur, &found_rec); 486 if (error) 487 goto out_error; 488 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 489 xfs_btree_mark_sick(cur); 490 error = -EFSCORRUPTED; 491 goto out_error; 492 } 493 494 if (center->rc_refcount > 1) { 495 error = xfs_refcount_delete(cur, &found_rec); 496 if (error) 497 goto out_error; 498 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 499 xfs_btree_mark_sick(cur); 500 error = -EFSCORRUPTED; 501 goto out_error; 502 } 503 } 504 505 /* Enlarge the left extent. */ 506 error = xfs_refcount_lookup_le(cur, left->rc_domain, 507 left->rc_startblock, &found_rec); 508 if (error) 509 goto out_error; 510 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 511 xfs_btree_mark_sick(cur); 512 error = -EFSCORRUPTED; 513 goto out_error; 514 } 515 516 left->rc_blockcount = extlen; 517 error = xfs_refcount_update(cur, left); 518 if (error) 519 goto out_error; 520 521 *aglen = 0; 522 return error; 523 524 out_error: 525 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 526 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 527 return error; 528 } 529 530 /* 531 * Merge with the left extent. 532 */ 533 STATIC int 534 xfs_refcount_merge_left_extent( 535 struct xfs_btree_cur *cur, 536 struct xfs_refcount_irec *left, 537 struct xfs_refcount_irec *cleft, 538 xfs_agblock_t *agbno, 539 xfs_extlen_t *aglen) 540 { 541 int error; 542 int found_rec; 543 544 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 545 cur->bc_ag.pag->pag_agno, left, cleft); 546 547 ASSERT(left->rc_domain == cleft->rc_domain); 548 549 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 550 if (cleft->rc_refcount > 1) { 551 error = xfs_refcount_lookup_le(cur, cleft->rc_domain, 552 cleft->rc_startblock, &found_rec); 553 if (error) 554 goto out_error; 555 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 556 xfs_btree_mark_sick(cur); 557 error = -EFSCORRUPTED; 558 goto out_error; 559 } 560 561 error = xfs_refcount_delete(cur, &found_rec); 562 if (error) 563 goto out_error; 564 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 565 xfs_btree_mark_sick(cur); 566 error = -EFSCORRUPTED; 567 goto out_error; 568 } 569 } 570 571 /* Enlarge the left extent. */ 572 error = xfs_refcount_lookup_le(cur, left->rc_domain, 573 left->rc_startblock, &found_rec); 574 if (error) 575 goto out_error; 576 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 577 xfs_btree_mark_sick(cur); 578 error = -EFSCORRUPTED; 579 goto out_error; 580 } 581 582 left->rc_blockcount += cleft->rc_blockcount; 583 error = xfs_refcount_update(cur, left); 584 if (error) 585 goto out_error; 586 587 *agbno += cleft->rc_blockcount; 588 *aglen -= cleft->rc_blockcount; 589 return error; 590 591 out_error: 592 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 593 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 594 return error; 595 } 596 597 /* 598 * Merge with the right extent. 599 */ 600 STATIC int 601 xfs_refcount_merge_right_extent( 602 struct xfs_btree_cur *cur, 603 struct xfs_refcount_irec *right, 604 struct xfs_refcount_irec *cright, 605 xfs_extlen_t *aglen) 606 { 607 int error; 608 int found_rec; 609 610 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 611 cur->bc_ag.pag->pag_agno, cright, right); 612 613 ASSERT(right->rc_domain == cright->rc_domain); 614 615 /* 616 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 617 * remove it. 618 */ 619 if (cright->rc_refcount > 1) { 620 error = xfs_refcount_lookup_le(cur, cright->rc_domain, 621 cright->rc_startblock, &found_rec); 622 if (error) 623 goto out_error; 624 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 625 xfs_btree_mark_sick(cur); 626 error = -EFSCORRUPTED; 627 goto out_error; 628 } 629 630 error = xfs_refcount_delete(cur, &found_rec); 631 if (error) 632 goto out_error; 633 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 634 xfs_btree_mark_sick(cur); 635 error = -EFSCORRUPTED; 636 goto out_error; 637 } 638 } 639 640 /* Enlarge the right extent. */ 641 error = xfs_refcount_lookup_le(cur, right->rc_domain, 642 right->rc_startblock, &found_rec); 643 if (error) 644 goto out_error; 645 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 646 xfs_btree_mark_sick(cur); 647 error = -EFSCORRUPTED; 648 goto out_error; 649 } 650 651 right->rc_startblock -= cright->rc_blockcount; 652 right->rc_blockcount += cright->rc_blockcount; 653 error = xfs_refcount_update(cur, right); 654 if (error) 655 goto out_error; 656 657 *aglen -= cright->rc_blockcount; 658 return error; 659 660 out_error: 661 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 662 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 663 return error; 664 } 665 666 /* 667 * Find the left extent and the one after it (cleft). This function assumes 668 * that we've already split any extent crossing agbno. 669 */ 670 STATIC int 671 xfs_refcount_find_left_extents( 672 struct xfs_btree_cur *cur, 673 struct xfs_refcount_irec *left, 674 struct xfs_refcount_irec *cleft, 675 enum xfs_refc_domain domain, 676 xfs_agblock_t agbno, 677 xfs_extlen_t aglen) 678 { 679 struct xfs_refcount_irec tmp; 680 int error; 681 int found_rec; 682 683 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 684 error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); 685 if (error) 686 goto out_error; 687 if (!found_rec) 688 return 0; 689 690 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 691 if (error) 692 goto out_error; 693 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 694 xfs_btree_mark_sick(cur); 695 error = -EFSCORRUPTED; 696 goto out_error; 697 } 698 699 if (tmp.rc_domain != domain) 700 return 0; 701 if (xfs_refc_next(&tmp) != agbno) 702 return 0; 703 /* We have a left extent; retrieve (or invent) the next right one */ 704 *left = tmp; 705 706 error = xfs_btree_increment(cur, 0, &found_rec); 707 if (error) 708 goto out_error; 709 if (found_rec) { 710 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 711 if (error) 712 goto out_error; 713 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 714 xfs_btree_mark_sick(cur); 715 error = -EFSCORRUPTED; 716 goto out_error; 717 } 718 719 if (tmp.rc_domain != domain) 720 goto not_found; 721 722 /* if tmp starts at the end of our range, just use that */ 723 if (tmp.rc_startblock == agbno) 724 *cleft = tmp; 725 else { 726 /* 727 * There's a gap in the refcntbt at the start of the 728 * range we're interested in (refcount == 1) so 729 * synthesize the implied extent and pass it back. 730 * We assume here that the agbno/aglen range was 731 * passed in from a data fork extent mapping and 732 * therefore is allocated to exactly one owner. 733 */ 734 cleft->rc_startblock = agbno; 735 cleft->rc_blockcount = min(aglen, 736 tmp.rc_startblock - agbno); 737 cleft->rc_refcount = 1; 738 cleft->rc_domain = domain; 739 } 740 } else { 741 not_found: 742 /* 743 * No extents, so pretend that there's one covering the whole 744 * range. 745 */ 746 cleft->rc_startblock = agbno; 747 cleft->rc_blockcount = aglen; 748 cleft->rc_refcount = 1; 749 cleft->rc_domain = domain; 750 } 751 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 752 left, cleft, agbno); 753 return error; 754 755 out_error: 756 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 757 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 758 return error; 759 } 760 761 /* 762 * Find the right extent and the one before it (cright). This function 763 * assumes that we've already split any extents crossing agbno + aglen. 764 */ 765 STATIC int 766 xfs_refcount_find_right_extents( 767 struct xfs_btree_cur *cur, 768 struct xfs_refcount_irec *right, 769 struct xfs_refcount_irec *cright, 770 enum xfs_refc_domain domain, 771 xfs_agblock_t agbno, 772 xfs_extlen_t aglen) 773 { 774 struct xfs_refcount_irec tmp; 775 int error; 776 int found_rec; 777 778 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 779 error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); 780 if (error) 781 goto out_error; 782 if (!found_rec) 783 return 0; 784 785 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 786 if (error) 787 goto out_error; 788 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 789 xfs_btree_mark_sick(cur); 790 error = -EFSCORRUPTED; 791 goto out_error; 792 } 793 794 if (tmp.rc_domain != domain) 795 return 0; 796 if (tmp.rc_startblock != agbno + aglen) 797 return 0; 798 /* We have a right extent; retrieve (or invent) the next left one */ 799 *right = tmp; 800 801 error = xfs_btree_decrement(cur, 0, &found_rec); 802 if (error) 803 goto out_error; 804 if (found_rec) { 805 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 806 if (error) 807 goto out_error; 808 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 809 xfs_btree_mark_sick(cur); 810 error = -EFSCORRUPTED; 811 goto out_error; 812 } 813 814 if (tmp.rc_domain != domain) 815 goto not_found; 816 817 /* if tmp ends at the end of our range, just use that */ 818 if (xfs_refc_next(&tmp) == agbno + aglen) 819 *cright = tmp; 820 else { 821 /* 822 * There's a gap in the refcntbt at the end of the 823 * range we're interested in (refcount == 1) so 824 * create the implied extent and pass it back. 825 * We assume here that the agbno/aglen range was 826 * passed in from a data fork extent mapping and 827 * therefore is allocated to exactly one owner. 828 */ 829 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 830 cright->rc_blockcount = right->rc_startblock - 831 cright->rc_startblock; 832 cright->rc_refcount = 1; 833 cright->rc_domain = domain; 834 } 835 } else { 836 not_found: 837 /* 838 * No extents, so pretend that there's one covering the whole 839 * range. 840 */ 841 cright->rc_startblock = agbno; 842 cright->rc_blockcount = aglen; 843 cright->rc_refcount = 1; 844 cright->rc_domain = domain; 845 } 846 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 847 cright, right, agbno + aglen); 848 return error; 849 850 out_error: 851 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 852 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 853 return error; 854 } 855 856 /* Is this extent valid? */ 857 static inline bool 858 xfs_refc_valid( 859 const struct xfs_refcount_irec *rc) 860 { 861 return rc->rc_startblock != NULLAGBLOCK; 862 } 863 864 static inline xfs_nlink_t 865 xfs_refc_merge_refcount( 866 const struct xfs_refcount_irec *irec, 867 enum xfs_refc_adjust_op adjust) 868 { 869 /* Once a record hits MAXREFCOUNT, it is pinned there forever */ 870 if (irec->rc_refcount == MAXREFCOUNT) 871 return MAXREFCOUNT; 872 return irec->rc_refcount + adjust; 873 } 874 875 static inline bool 876 xfs_refc_want_merge_center( 877 const struct xfs_refcount_irec *left, 878 const struct xfs_refcount_irec *cleft, 879 const struct xfs_refcount_irec *cright, 880 const struct xfs_refcount_irec *right, 881 bool cleft_is_cright, 882 enum xfs_refc_adjust_op adjust, 883 unsigned long long *ulenp) 884 { 885 unsigned long long ulen = left->rc_blockcount; 886 xfs_nlink_t new_refcount; 887 888 /* 889 * To merge with a center record, both shoulder records must be 890 * adjacent to the record we want to adjust. This is only true if 891 * find_left and find_right made all four records valid. 892 */ 893 if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || 894 !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) 895 return false; 896 897 /* There must only be one record for the entire range. */ 898 if (!cleft_is_cright) 899 return false; 900 901 /* The shoulder record refcounts must match the new refcount. */ 902 new_refcount = xfs_refc_merge_refcount(cleft, adjust); 903 if (left->rc_refcount != new_refcount) 904 return false; 905 if (right->rc_refcount != new_refcount) 906 return false; 907 908 /* 909 * The new record cannot exceed the max length. ulen is a ULL as the 910 * individual record block counts can be up to (u32 - 1) in length 911 * hence we need to catch u32 addition overflows here. 912 */ 913 ulen += cleft->rc_blockcount + right->rc_blockcount; 914 if (ulen >= MAXREFCEXTLEN) 915 return false; 916 917 *ulenp = ulen; 918 return true; 919 } 920 921 static inline bool 922 xfs_refc_want_merge_left( 923 const struct xfs_refcount_irec *left, 924 const struct xfs_refcount_irec *cleft, 925 enum xfs_refc_adjust_op adjust) 926 { 927 unsigned long long ulen = left->rc_blockcount; 928 xfs_nlink_t new_refcount; 929 930 /* 931 * For a left merge, the left shoulder record must be adjacent to the 932 * start of the range. If this is true, find_left made left and cleft 933 * contain valid contents. 934 */ 935 if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) 936 return false; 937 938 /* Left shoulder record refcount must match the new refcount. */ 939 new_refcount = xfs_refc_merge_refcount(cleft, adjust); 940 if (left->rc_refcount != new_refcount) 941 return false; 942 943 /* 944 * The new record cannot exceed the max length. ulen is a ULL as the 945 * individual record block counts can be up to (u32 - 1) in length 946 * hence we need to catch u32 addition overflows here. 947 */ 948 ulen += cleft->rc_blockcount; 949 if (ulen >= MAXREFCEXTLEN) 950 return false; 951 952 return true; 953 } 954 955 static inline bool 956 xfs_refc_want_merge_right( 957 const struct xfs_refcount_irec *cright, 958 const struct xfs_refcount_irec *right, 959 enum xfs_refc_adjust_op adjust) 960 { 961 unsigned long long ulen = right->rc_blockcount; 962 xfs_nlink_t new_refcount; 963 964 /* 965 * For a right merge, the right shoulder record must be adjacent to the 966 * end of the range. If this is true, find_right made cright and right 967 * contain valid contents. 968 */ 969 if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) 970 return false; 971 972 /* Right shoulder record refcount must match the new refcount. */ 973 new_refcount = xfs_refc_merge_refcount(cright, adjust); 974 if (right->rc_refcount != new_refcount) 975 return false; 976 977 /* 978 * The new record cannot exceed the max length. ulen is a ULL as the 979 * individual record block counts can be up to (u32 - 1) in length 980 * hence we need to catch u32 addition overflows here. 981 */ 982 ulen += cright->rc_blockcount; 983 if (ulen >= MAXREFCEXTLEN) 984 return false; 985 986 return true; 987 } 988 989 /* 990 * Try to merge with any extents on the boundaries of the adjustment range. 991 */ 992 STATIC int 993 xfs_refcount_merge_extents( 994 struct xfs_btree_cur *cur, 995 enum xfs_refc_domain domain, 996 xfs_agblock_t *agbno, 997 xfs_extlen_t *aglen, 998 enum xfs_refc_adjust_op adjust, 999 bool *shape_changed) 1000 { 1001 struct xfs_refcount_irec left = {0}, cleft = {0}; 1002 struct xfs_refcount_irec cright = {0}, right = {0}; 1003 int error; 1004 unsigned long long ulen; 1005 bool cequal; 1006 1007 *shape_changed = false; 1008 /* 1009 * Find the extent just below agbno [left], just above agbno [cleft], 1010 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 1011 * [right]. 1012 */ 1013 error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, 1014 *agbno, *aglen); 1015 if (error) 1016 return error; 1017 error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, 1018 *agbno, *aglen); 1019 if (error) 1020 return error; 1021 1022 /* No left or right extent to merge; exit. */ 1023 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 1024 return 0; 1025 1026 cequal = (cleft.rc_startblock == cright.rc_startblock) && 1027 (cleft.rc_blockcount == cright.rc_blockcount); 1028 1029 /* Try to merge left, cleft, and right. cleft must == cright. */ 1030 if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, 1031 adjust, &ulen)) { 1032 *shape_changed = true; 1033 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 1034 &right, ulen, aglen); 1035 } 1036 1037 /* Try to merge left and cleft. */ 1038 if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { 1039 *shape_changed = true; 1040 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 1041 agbno, aglen); 1042 if (error) 1043 return error; 1044 1045 /* 1046 * If we just merged left + cleft and cleft == cright, 1047 * we no longer have a cright to merge with right. We're done. 1048 */ 1049 if (cequal) 1050 return 0; 1051 } 1052 1053 /* Try to merge cright and right. */ 1054 if (xfs_refc_want_merge_right(&cright, &right, adjust)) { 1055 *shape_changed = true; 1056 return xfs_refcount_merge_right_extent(cur, &right, &cright, 1057 aglen); 1058 } 1059 1060 return 0; 1061 } 1062 1063 /* 1064 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 1065 * true incorrectly is a shutdown FS; the penalty for guessing false 1066 * incorrectly is more transaction rolls than might be necessary. 1067 * Be conservative here. 1068 */ 1069 static bool 1070 xfs_refcount_still_have_space( 1071 struct xfs_btree_cur *cur) 1072 { 1073 unsigned long overhead; 1074 1075 /* 1076 * Worst case estimate: full splits of the free space and rmap btrees 1077 * to handle each of the shape changes to the refcount btree. 1078 */ 1079 overhead = xfs_allocfree_block_count(cur->bc_mp, 1080 cur->bc_refc.shape_changes); 1081 overhead += cur->bc_mp->m_refc_maxlevels; 1082 overhead *= cur->bc_mp->m_sb.sb_blocksize; 1083 1084 /* 1085 * Only allow 2 refcount extent updates per transaction if the 1086 * refcount continue update "error" has been injected. 1087 */ 1088 if (cur->bc_refc.nr_ops > 2 && 1089 XFS_TEST_ERROR(false, cur->bc_mp, 1090 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 1091 return false; 1092 1093 if (cur->bc_refc.nr_ops == 0) 1094 return true; 1095 else if (overhead > cur->bc_tp->t_log_res) 1096 return false; 1097 return cur->bc_tp->t_log_res - overhead > 1098 cur->bc_refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 1099 } 1100 1101 /* 1102 * Adjust the refcounts of middle extents. At this point we should have 1103 * split extents that crossed the adjustment range; merged with adjacent 1104 * extents; and updated agbno/aglen to reflect the merges. Therefore, 1105 * all we have to do is update the extents inside [agbno, agbno + aglen]. 1106 */ 1107 STATIC int 1108 xfs_refcount_adjust_extents( 1109 struct xfs_btree_cur *cur, 1110 xfs_agblock_t *agbno, 1111 xfs_extlen_t *aglen, 1112 enum xfs_refc_adjust_op adj) 1113 { 1114 struct xfs_refcount_irec ext, tmp; 1115 int error; 1116 int found_rec, found_tmp; 1117 xfs_fsblock_t fsbno; 1118 1119 /* Merging did all the work already. */ 1120 if (*aglen == 0) 1121 return 0; 1122 1123 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, 1124 &found_rec); 1125 if (error) 1126 goto out_error; 1127 1128 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 1129 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1130 if (error) 1131 goto out_error; 1132 if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { 1133 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 1134 ext.rc_blockcount = 0; 1135 ext.rc_refcount = 0; 1136 ext.rc_domain = XFS_REFC_DOMAIN_SHARED; 1137 } 1138 1139 /* 1140 * Deal with a hole in the refcount tree; if a file maps to 1141 * these blocks and there's no refcountbt record, pretend that 1142 * there is one with refcount == 1. 1143 */ 1144 if (ext.rc_startblock != *agbno) { 1145 tmp.rc_startblock = *agbno; 1146 tmp.rc_blockcount = min(*aglen, 1147 ext.rc_startblock - *agbno); 1148 tmp.rc_refcount = 1 + adj; 1149 tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; 1150 1151 trace_xfs_refcount_modify_extent(cur->bc_mp, 1152 cur->bc_ag.pag->pag_agno, &tmp); 1153 1154 /* 1155 * Either cover the hole (increment) or 1156 * delete the range (decrement). 1157 */ 1158 cur->bc_refc.nr_ops++; 1159 if (tmp.rc_refcount) { 1160 error = xfs_refcount_insert(cur, &tmp, 1161 &found_tmp); 1162 if (error) 1163 goto out_error; 1164 if (XFS_IS_CORRUPT(cur->bc_mp, 1165 found_tmp != 1)) { 1166 xfs_btree_mark_sick(cur); 1167 error = -EFSCORRUPTED; 1168 goto out_error; 1169 } 1170 } else { 1171 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1172 cur->bc_ag.pag->pag_agno, 1173 tmp.rc_startblock); 1174 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1175 tmp.rc_blockcount, NULL, 1176 XFS_AG_RESV_NONE, false); 1177 if (error) 1178 goto out_error; 1179 } 1180 1181 (*agbno) += tmp.rc_blockcount; 1182 (*aglen) -= tmp.rc_blockcount; 1183 1184 /* Stop if there's nothing left to modify */ 1185 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 1186 break; 1187 1188 /* Move the cursor to the start of ext. */ 1189 error = xfs_refcount_lookup_ge(cur, 1190 XFS_REFC_DOMAIN_SHARED, *agbno, 1191 &found_rec); 1192 if (error) 1193 goto out_error; 1194 } 1195 1196 /* 1197 * A previous step trimmed agbno/aglen such that the end of the 1198 * range would not be in the middle of the record. If this is 1199 * no longer the case, something is seriously wrong with the 1200 * btree. Make sure we never feed the synthesized record into 1201 * the processing loop below. 1202 */ 1203 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || 1204 XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { 1205 xfs_btree_mark_sick(cur); 1206 error = -EFSCORRUPTED; 1207 goto out_error; 1208 } 1209 1210 /* 1211 * Adjust the reference count and either update the tree 1212 * (incr) or free the blocks (decr). 1213 */ 1214 if (ext.rc_refcount == MAXREFCOUNT) 1215 goto skip; 1216 ext.rc_refcount += adj; 1217 trace_xfs_refcount_modify_extent(cur->bc_mp, 1218 cur->bc_ag.pag->pag_agno, &ext); 1219 cur->bc_refc.nr_ops++; 1220 if (ext.rc_refcount > 1) { 1221 error = xfs_refcount_update(cur, &ext); 1222 if (error) 1223 goto out_error; 1224 } else if (ext.rc_refcount == 1) { 1225 error = xfs_refcount_delete(cur, &found_rec); 1226 if (error) 1227 goto out_error; 1228 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1229 xfs_btree_mark_sick(cur); 1230 error = -EFSCORRUPTED; 1231 goto out_error; 1232 } 1233 goto advloop; 1234 } else { 1235 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1236 cur->bc_ag.pag->pag_agno, 1237 ext.rc_startblock); 1238 error = xfs_free_extent_later(cur->bc_tp, fsbno, 1239 ext.rc_blockcount, NULL, 1240 XFS_AG_RESV_NONE, false); 1241 if (error) 1242 goto out_error; 1243 } 1244 1245 skip: 1246 error = xfs_btree_increment(cur, 0, &found_rec); 1247 if (error) 1248 goto out_error; 1249 1250 advloop: 1251 (*agbno) += ext.rc_blockcount; 1252 (*aglen) -= ext.rc_blockcount; 1253 } 1254 1255 return error; 1256 out_error: 1257 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1258 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1259 return error; 1260 } 1261 1262 /* Adjust the reference count of a range of AG blocks. */ 1263 STATIC int 1264 xfs_refcount_adjust( 1265 struct xfs_btree_cur *cur, 1266 xfs_agblock_t *agbno, 1267 xfs_extlen_t *aglen, 1268 enum xfs_refc_adjust_op adj) 1269 { 1270 bool shape_changed; 1271 int shape_changes = 0; 1272 int error; 1273 1274 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1275 trace_xfs_refcount_increase(cur->bc_mp, 1276 cur->bc_ag.pag->pag_agno, *agbno, *aglen); 1277 else 1278 trace_xfs_refcount_decrease(cur->bc_mp, 1279 cur->bc_ag.pag->pag_agno, *agbno, *aglen); 1280 1281 /* 1282 * Ensure that no rcextents cross the boundary of the adjustment range. 1283 */ 1284 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1285 *agbno, &shape_changed); 1286 if (error) 1287 goto out_error; 1288 if (shape_changed) 1289 shape_changes++; 1290 1291 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1292 *agbno + *aglen, &shape_changed); 1293 if (error) 1294 goto out_error; 1295 if (shape_changed) 1296 shape_changes++; 1297 1298 /* 1299 * Try to merge with the left or right extents of the range. 1300 */ 1301 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, 1302 agbno, aglen, adj, &shape_changed); 1303 if (error) 1304 goto out_error; 1305 if (shape_changed) 1306 shape_changes++; 1307 if (shape_changes) 1308 cur->bc_refc.shape_changes++; 1309 1310 /* Now that we've taken care of the ends, adjust the middle extents */ 1311 error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); 1312 if (error) 1313 goto out_error; 1314 1315 return 0; 1316 1317 out_error: 1318 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1319 error, _RET_IP_); 1320 return error; 1321 } 1322 1323 /* Clean up after calling xfs_refcount_finish_one. */ 1324 void 1325 xfs_refcount_finish_one_cleanup( 1326 struct xfs_trans *tp, 1327 struct xfs_btree_cur *rcur, 1328 int error) 1329 { 1330 struct xfs_buf *agbp; 1331 1332 if (rcur == NULL) 1333 return; 1334 agbp = rcur->bc_ag.agbp; 1335 xfs_btree_del_cursor(rcur, error); 1336 if (error) 1337 xfs_trans_brelse(tp, agbp); 1338 } 1339 1340 /* 1341 * Set up a continuation a deferred refcount operation by updating the intent. 1342 * Checks to make sure we're not going to run off the end of the AG. 1343 */ 1344 static inline int 1345 xfs_refcount_continue_op( 1346 struct xfs_btree_cur *cur, 1347 struct xfs_refcount_intent *ri, 1348 xfs_agblock_t new_agbno) 1349 { 1350 struct xfs_mount *mp = cur->bc_mp; 1351 struct xfs_perag *pag = cur->bc_ag.pag; 1352 1353 if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, 1354 ri->ri_blockcount))) { 1355 xfs_btree_mark_sick(cur); 1356 return -EFSCORRUPTED; 1357 } 1358 1359 ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1360 1361 ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); 1362 ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); 1363 1364 return 0; 1365 } 1366 1367 /* 1368 * Process one of the deferred refcount operations. We pass back the 1369 * btree cursor to maintain our lock on the btree between calls. 1370 * This saves time and eliminates a buffer deadlock between the 1371 * superblock and the AGF because we'll always grab them in the same 1372 * order. 1373 */ 1374 int 1375 xfs_refcount_finish_one( 1376 struct xfs_trans *tp, 1377 struct xfs_refcount_intent *ri, 1378 struct xfs_btree_cur **pcur) 1379 { 1380 struct xfs_mount *mp = tp->t_mountp; 1381 struct xfs_btree_cur *rcur; 1382 struct xfs_buf *agbp = NULL; 1383 int error = 0; 1384 xfs_agblock_t bno; 1385 unsigned long nr_ops = 0; 1386 int shape_changes = 0; 1387 1388 bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); 1389 1390 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock), 1391 ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock), 1392 ri->ri_blockcount); 1393 1394 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1395 return -EIO; 1396 1397 /* 1398 * If we haven't gotten a cursor or the cursor AG doesn't match 1399 * the startblock, get one now. 1400 */ 1401 rcur = *pcur; 1402 if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { 1403 nr_ops = rcur->bc_refc.nr_ops; 1404 shape_changes = rcur->bc_refc.shape_changes; 1405 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1406 rcur = NULL; 1407 *pcur = NULL; 1408 } 1409 if (rcur == NULL) { 1410 error = xfs_alloc_read_agf(ri->ri_pag, tp, 1411 XFS_ALLOC_FLAG_FREEING, &agbp); 1412 if (error) 1413 return error; 1414 1415 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag); 1416 rcur->bc_refc.nr_ops = nr_ops; 1417 rcur->bc_refc.shape_changes = shape_changes; 1418 } 1419 *pcur = rcur; 1420 1421 switch (ri->ri_type) { 1422 case XFS_REFCOUNT_INCREASE: 1423 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1424 XFS_REFCOUNT_ADJUST_INCREASE); 1425 if (error) 1426 return error; 1427 if (ri->ri_blockcount > 0) 1428 error = xfs_refcount_continue_op(rcur, ri, bno); 1429 break; 1430 case XFS_REFCOUNT_DECREASE: 1431 error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, 1432 XFS_REFCOUNT_ADJUST_DECREASE); 1433 if (error) 1434 return error; 1435 if (ri->ri_blockcount > 0) 1436 error = xfs_refcount_continue_op(rcur, ri, bno); 1437 break; 1438 case XFS_REFCOUNT_ALLOC_COW: 1439 error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); 1440 if (error) 1441 return error; 1442 ri->ri_blockcount = 0; 1443 break; 1444 case XFS_REFCOUNT_FREE_COW: 1445 error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); 1446 if (error) 1447 return error; 1448 ri->ri_blockcount = 0; 1449 break; 1450 default: 1451 ASSERT(0); 1452 return -EFSCORRUPTED; 1453 } 1454 if (!error && ri->ri_blockcount > 0) 1455 trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno, 1456 ri->ri_type, bno, ri->ri_blockcount); 1457 return error; 1458 } 1459 1460 /* 1461 * Record a refcount intent for later processing. 1462 */ 1463 static void 1464 __xfs_refcount_add( 1465 struct xfs_trans *tp, 1466 enum xfs_refcount_intent_type type, 1467 xfs_fsblock_t startblock, 1468 xfs_extlen_t blockcount) 1469 { 1470 struct xfs_refcount_intent *ri; 1471 1472 trace_xfs_refcount_defer(tp->t_mountp, 1473 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1474 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1475 blockcount); 1476 1477 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1478 GFP_KERNEL | __GFP_NOFAIL); 1479 INIT_LIST_HEAD(&ri->ri_list); 1480 ri->ri_type = type; 1481 ri->ri_startblock = startblock; 1482 ri->ri_blockcount = blockcount; 1483 1484 xfs_refcount_update_get_group(tp->t_mountp, ri); 1485 xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type); 1486 } 1487 1488 /* 1489 * Increase the reference count of the blocks backing a file's extent. 1490 */ 1491 void 1492 xfs_refcount_increase_extent( 1493 struct xfs_trans *tp, 1494 struct xfs_bmbt_irec *PREV) 1495 { 1496 if (!xfs_has_reflink(tp->t_mountp)) 1497 return; 1498 1499 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1500 PREV->br_blockcount); 1501 } 1502 1503 /* 1504 * Decrease the reference count of the blocks backing a file's extent. 1505 */ 1506 void 1507 xfs_refcount_decrease_extent( 1508 struct xfs_trans *tp, 1509 struct xfs_bmbt_irec *PREV) 1510 { 1511 if (!xfs_has_reflink(tp->t_mountp)) 1512 return; 1513 1514 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1515 PREV->br_blockcount); 1516 } 1517 1518 /* 1519 * Given an AG extent, find the lowest-numbered run of shared blocks 1520 * within that range and return the range in fbno/flen. If 1521 * find_end_of_shared is set, return the longest contiguous extent of 1522 * shared blocks; if not, just return the first extent we find. If no 1523 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1524 * and 0, respectively. 1525 */ 1526 int 1527 xfs_refcount_find_shared( 1528 struct xfs_btree_cur *cur, 1529 xfs_agblock_t agbno, 1530 xfs_extlen_t aglen, 1531 xfs_agblock_t *fbno, 1532 xfs_extlen_t *flen, 1533 bool find_end_of_shared) 1534 { 1535 struct xfs_refcount_irec tmp; 1536 int i; 1537 int have; 1538 int error; 1539 1540 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1541 agbno, aglen); 1542 1543 /* By default, skip the whole range */ 1544 *fbno = NULLAGBLOCK; 1545 *flen = 0; 1546 1547 /* Try to find a refcount extent that crosses the start */ 1548 error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, 1549 &have); 1550 if (error) 1551 goto out_error; 1552 if (!have) { 1553 /* No left extent, look at the next one */ 1554 error = xfs_btree_increment(cur, 0, &have); 1555 if (error) 1556 goto out_error; 1557 if (!have) 1558 goto done; 1559 } 1560 error = xfs_refcount_get_rec(cur, &tmp, &i); 1561 if (error) 1562 goto out_error; 1563 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1564 xfs_btree_mark_sick(cur); 1565 error = -EFSCORRUPTED; 1566 goto out_error; 1567 } 1568 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1569 goto done; 1570 1571 /* If the extent ends before the start, look at the next one */ 1572 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1573 error = xfs_btree_increment(cur, 0, &have); 1574 if (error) 1575 goto out_error; 1576 if (!have) 1577 goto done; 1578 error = xfs_refcount_get_rec(cur, &tmp, &i); 1579 if (error) 1580 goto out_error; 1581 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1582 xfs_btree_mark_sick(cur); 1583 error = -EFSCORRUPTED; 1584 goto out_error; 1585 } 1586 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1587 goto done; 1588 } 1589 1590 /* If the extent starts after the range we want, bail out */ 1591 if (tmp.rc_startblock >= agbno + aglen) 1592 goto done; 1593 1594 /* We found the start of a shared extent! */ 1595 if (tmp.rc_startblock < agbno) { 1596 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1597 tmp.rc_startblock = agbno; 1598 } 1599 1600 *fbno = tmp.rc_startblock; 1601 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1602 if (!find_end_of_shared) 1603 goto done; 1604 1605 /* Otherwise, find the end of this shared extent */ 1606 while (*fbno + *flen < agbno + aglen) { 1607 error = xfs_btree_increment(cur, 0, &have); 1608 if (error) 1609 goto out_error; 1610 if (!have) 1611 break; 1612 error = xfs_refcount_get_rec(cur, &tmp, &i); 1613 if (error) 1614 goto out_error; 1615 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1616 xfs_btree_mark_sick(cur); 1617 error = -EFSCORRUPTED; 1618 goto out_error; 1619 } 1620 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || 1621 tmp.rc_startblock >= agbno + aglen || 1622 tmp.rc_startblock != *fbno + *flen) 1623 break; 1624 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1625 } 1626 1627 done: 1628 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1629 cur->bc_ag.pag->pag_agno, *fbno, *flen); 1630 1631 out_error: 1632 if (error) 1633 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1634 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1635 return error; 1636 } 1637 1638 /* 1639 * Recovering CoW Blocks After a Crash 1640 * 1641 * Due to the way that the copy on write mechanism works, there's a window of 1642 * opportunity in which we can lose track of allocated blocks during a crash. 1643 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1644 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1645 * no longer in the free space btree but are not otherwise recorded anywhere 1646 * until the write completes and the blocks are mapped into the file. A crash 1647 * in between allocation and remapping results in the replacement blocks being 1648 * lost. This situation is exacerbated by the CoW extent size hint because 1649 * allocations can hang around for long time. 1650 * 1651 * However, there is a place where we can record these allocations before they 1652 * become mappings -- the reference count btree. The btree does not record 1653 * extents with refcount == 1, so we can record allocations with a refcount of 1654 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1655 * no conflict with shared block records. These mappings should be created 1656 * when we allocate blocks to the CoW fork and deleted when they're removed 1657 * from the CoW fork. 1658 * 1659 * Minor nit: records for in-progress CoW allocations and records for shared 1660 * extents must never be merged, to preserve the property that (except for CoW 1661 * allocations) there are no refcount btree entries with refcount == 1. The 1662 * only time this could potentially happen is when unsharing a block that's 1663 * adjacent to CoW allocations, so we must be careful to avoid this. 1664 * 1665 * At mount time we recover lost CoW allocations by searching the refcount 1666 * btree for these refcount == 1 mappings. These represent CoW allocations 1667 * that were in progress at the time the filesystem went down, so we can free 1668 * them to get the space back. 1669 * 1670 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1671 * several reasons -- first, EFIs pin the tail of the log and would have to be 1672 * periodically relogged to avoid filling up the log. Second, CoW completions 1673 * will have to file an EFD and create new EFIs for whatever remains in the 1674 * CoW fork; this partially takes care of (1) but extent-size reservations 1675 * will have to periodically relog even if there's no writeout in progress. 1676 * This can happen if the CoW extent size hint is set, which you really want. 1677 * Third, EFIs cannot currently be automatically relogged into newer 1678 * transactions to advance the log tail. Fourth, stuffing the log full of 1679 * EFIs places an upper bound on the number of CoW allocations that can be 1680 * held filesystem-wide at any given time. Recording them in the refcount 1681 * btree doesn't require us to maintain any state in memory and doesn't pin 1682 * the log. 1683 */ 1684 /* 1685 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1686 * in that they're not referenced anywhere else in the filesystem, so we 1687 * stash them in the refcount btree with a refcount of 1 until either file 1688 * remapping (or CoW cancellation) happens. 1689 */ 1690 STATIC int 1691 xfs_refcount_adjust_cow_extents( 1692 struct xfs_btree_cur *cur, 1693 xfs_agblock_t agbno, 1694 xfs_extlen_t aglen, 1695 enum xfs_refc_adjust_op adj) 1696 { 1697 struct xfs_refcount_irec ext, tmp; 1698 int error; 1699 int found_rec, found_tmp; 1700 1701 if (aglen == 0) 1702 return 0; 1703 1704 /* Find any overlapping refcount records */ 1705 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, 1706 &found_rec); 1707 if (error) 1708 goto out_error; 1709 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1710 if (error) 1711 goto out_error; 1712 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && 1713 ext.rc_domain != XFS_REFC_DOMAIN_COW)) { 1714 xfs_btree_mark_sick(cur); 1715 error = -EFSCORRUPTED; 1716 goto out_error; 1717 } 1718 if (!found_rec) { 1719 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 1720 ext.rc_blockcount = 0; 1721 ext.rc_refcount = 0; 1722 ext.rc_domain = XFS_REFC_DOMAIN_COW; 1723 } 1724 1725 switch (adj) { 1726 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1727 /* Adding a CoW reservation, there should be nothing here. */ 1728 if (XFS_IS_CORRUPT(cur->bc_mp, 1729 agbno + aglen > ext.rc_startblock)) { 1730 xfs_btree_mark_sick(cur); 1731 error = -EFSCORRUPTED; 1732 goto out_error; 1733 } 1734 1735 tmp.rc_startblock = agbno; 1736 tmp.rc_blockcount = aglen; 1737 tmp.rc_refcount = 1; 1738 tmp.rc_domain = XFS_REFC_DOMAIN_COW; 1739 1740 trace_xfs_refcount_modify_extent(cur->bc_mp, 1741 cur->bc_ag.pag->pag_agno, &tmp); 1742 1743 error = xfs_refcount_insert(cur, &tmp, 1744 &found_tmp); 1745 if (error) 1746 goto out_error; 1747 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1748 xfs_btree_mark_sick(cur); 1749 error = -EFSCORRUPTED; 1750 goto out_error; 1751 } 1752 break; 1753 case XFS_REFCOUNT_ADJUST_COW_FREE: 1754 /* Removing a CoW reservation, there should be one extent. */ 1755 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1756 xfs_btree_mark_sick(cur); 1757 error = -EFSCORRUPTED; 1758 goto out_error; 1759 } 1760 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1761 xfs_btree_mark_sick(cur); 1762 error = -EFSCORRUPTED; 1763 goto out_error; 1764 } 1765 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1766 xfs_btree_mark_sick(cur); 1767 error = -EFSCORRUPTED; 1768 goto out_error; 1769 } 1770 1771 ext.rc_refcount = 0; 1772 trace_xfs_refcount_modify_extent(cur->bc_mp, 1773 cur->bc_ag.pag->pag_agno, &ext); 1774 error = xfs_refcount_delete(cur, &found_rec); 1775 if (error) 1776 goto out_error; 1777 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1778 xfs_btree_mark_sick(cur); 1779 error = -EFSCORRUPTED; 1780 goto out_error; 1781 } 1782 break; 1783 default: 1784 ASSERT(0); 1785 } 1786 1787 return error; 1788 out_error: 1789 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1790 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1791 return error; 1792 } 1793 1794 /* 1795 * Add or remove refcount btree entries for CoW reservations. 1796 */ 1797 STATIC int 1798 xfs_refcount_adjust_cow( 1799 struct xfs_btree_cur *cur, 1800 xfs_agblock_t agbno, 1801 xfs_extlen_t aglen, 1802 enum xfs_refc_adjust_op adj) 1803 { 1804 bool shape_changed; 1805 int error; 1806 1807 /* 1808 * Ensure that no rcextents cross the boundary of the adjustment range. 1809 */ 1810 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1811 agbno, &shape_changed); 1812 if (error) 1813 goto out_error; 1814 1815 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1816 agbno + aglen, &shape_changed); 1817 if (error) 1818 goto out_error; 1819 1820 /* 1821 * Try to merge with the left or right extents of the range. 1822 */ 1823 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, 1824 &aglen, adj, &shape_changed); 1825 if (error) 1826 goto out_error; 1827 1828 /* Now that we've taken care of the ends, adjust the middle extents */ 1829 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1830 if (error) 1831 goto out_error; 1832 1833 return 0; 1834 1835 out_error: 1836 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1837 error, _RET_IP_); 1838 return error; 1839 } 1840 1841 /* 1842 * Record a CoW allocation in the refcount btree. 1843 */ 1844 STATIC int 1845 __xfs_refcount_cow_alloc( 1846 struct xfs_btree_cur *rcur, 1847 xfs_agblock_t agbno, 1848 xfs_extlen_t aglen) 1849 { 1850 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1851 agbno, aglen); 1852 1853 /* Add refcount btree reservation */ 1854 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1855 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1856 } 1857 1858 /* 1859 * Remove a CoW allocation from the refcount btree. 1860 */ 1861 STATIC int 1862 __xfs_refcount_cow_free( 1863 struct xfs_btree_cur *rcur, 1864 xfs_agblock_t agbno, 1865 xfs_extlen_t aglen) 1866 { 1867 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1868 agbno, aglen); 1869 1870 /* Remove refcount btree reservation */ 1871 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1872 XFS_REFCOUNT_ADJUST_COW_FREE); 1873 } 1874 1875 /* Record a CoW staging extent in the refcount btree. */ 1876 void 1877 xfs_refcount_alloc_cow_extent( 1878 struct xfs_trans *tp, 1879 xfs_fsblock_t fsb, 1880 xfs_extlen_t len) 1881 { 1882 struct xfs_mount *mp = tp->t_mountp; 1883 1884 if (!xfs_has_reflink(mp)) 1885 return; 1886 1887 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1888 1889 /* Add rmap entry */ 1890 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1891 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1892 } 1893 1894 /* Forget a CoW staging event in the refcount btree. */ 1895 void 1896 xfs_refcount_free_cow_extent( 1897 struct xfs_trans *tp, 1898 xfs_fsblock_t fsb, 1899 xfs_extlen_t len) 1900 { 1901 struct xfs_mount *mp = tp->t_mountp; 1902 1903 if (!xfs_has_reflink(mp)) 1904 return; 1905 1906 /* Remove rmap entry */ 1907 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1908 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1909 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1910 } 1911 1912 struct xfs_refcount_recovery { 1913 struct list_head rr_list; 1914 struct xfs_refcount_irec rr_rrec; 1915 }; 1916 1917 /* Stuff an extent on the recovery list. */ 1918 STATIC int 1919 xfs_refcount_recover_extent( 1920 struct xfs_btree_cur *cur, 1921 const union xfs_btree_rec *rec, 1922 void *priv) 1923 { 1924 struct list_head *debris = priv; 1925 struct xfs_refcount_recovery *rr; 1926 1927 if (XFS_IS_CORRUPT(cur->bc_mp, 1928 be32_to_cpu(rec->refc.rc_refcount) != 1)) { 1929 xfs_btree_mark_sick(cur); 1930 return -EFSCORRUPTED; 1931 } 1932 1933 rr = kmalloc(sizeof(struct xfs_refcount_recovery), 1934 GFP_KERNEL | __GFP_NOFAIL); 1935 INIT_LIST_HEAD(&rr->rr_list); 1936 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1937 1938 if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL || 1939 XFS_IS_CORRUPT(cur->bc_mp, 1940 rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { 1941 xfs_btree_mark_sick(cur); 1942 kfree(rr); 1943 return -EFSCORRUPTED; 1944 } 1945 1946 list_add_tail(&rr->rr_list, debris); 1947 return 0; 1948 } 1949 1950 /* Find and remove leftover CoW reservations. */ 1951 int 1952 xfs_refcount_recover_cow_leftovers( 1953 struct xfs_mount *mp, 1954 struct xfs_perag *pag) 1955 { 1956 struct xfs_trans *tp; 1957 struct xfs_btree_cur *cur; 1958 struct xfs_buf *agbp; 1959 struct xfs_refcount_recovery *rr, *n; 1960 struct list_head debris; 1961 union xfs_btree_irec low = { 1962 .rc.rc_domain = XFS_REFC_DOMAIN_COW, 1963 }; 1964 union xfs_btree_irec high = { 1965 .rc.rc_domain = XFS_REFC_DOMAIN_COW, 1966 .rc.rc_startblock = -1U, 1967 }; 1968 xfs_fsblock_t fsb; 1969 int error; 1970 1971 /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ 1972 BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); 1973 if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) 1974 return -EOPNOTSUPP; 1975 1976 INIT_LIST_HEAD(&debris); 1977 1978 /* 1979 * In this first part, we use an empty transaction to gather up 1980 * all the leftover CoW extents so that we can subsequently 1981 * delete them. The empty transaction is used to avoid 1982 * a buffer lock deadlock if there happens to be a loop in the 1983 * refcountbt because we're allowed to re-grab a buffer that is 1984 * already attached to our transaction. When we're done 1985 * recording the CoW debris we cancel the (empty) transaction 1986 * and everything goes away cleanly. 1987 */ 1988 error = xfs_trans_alloc_empty(mp, &tp); 1989 if (error) 1990 return error; 1991 1992 error = xfs_alloc_read_agf(pag, tp, 0, &agbp); 1993 if (error) 1994 goto out_trans; 1995 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1996 1997 /* Find all the leftover CoW staging extents. */ 1998 error = xfs_btree_query_range(cur, &low, &high, 1999 xfs_refcount_recover_extent, &debris); 2000 xfs_btree_del_cursor(cur, error); 2001 xfs_trans_brelse(tp, agbp); 2002 xfs_trans_cancel(tp); 2003 if (error) 2004 goto out_free; 2005 2006 /* Now iterate the list to free the leftovers */ 2007 list_for_each_entry_safe(rr, n, &debris, rr_list) { 2008 /* Set up transaction. */ 2009 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 2010 if (error) 2011 goto out_free; 2012 2013 trace_xfs_refcount_recover_extent(mp, pag->pag_agno, 2014 &rr->rr_rrec); 2015 2016 /* Free the orphan record */ 2017 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, 2018 rr->rr_rrec.rc_startblock); 2019 xfs_refcount_free_cow_extent(tp, fsb, 2020 rr->rr_rrec.rc_blockcount); 2021 2022 /* Free the block. */ 2023 error = xfs_free_extent_later(tp, fsb, 2024 rr->rr_rrec.rc_blockcount, NULL, 2025 XFS_AG_RESV_NONE, false); 2026 if (error) 2027 goto out_trans; 2028 2029 error = xfs_trans_commit(tp); 2030 if (error) 2031 goto out_free; 2032 2033 list_del(&rr->rr_list); 2034 kfree(rr); 2035 } 2036 2037 return error; 2038 out_trans: 2039 xfs_trans_cancel(tp); 2040 out_free: 2041 /* Free the leftover list */ 2042 list_for_each_entry_safe(rr, n, &debris, rr_list) { 2043 list_del(&rr->rr_list); 2044 kfree(rr); 2045 } 2046 return error; 2047 } 2048 2049 /* 2050 * Scan part of the keyspace of the refcount records and tell us if the area 2051 * has no records, is fully mapped by records, or is partially filled. 2052 */ 2053 int 2054 xfs_refcount_has_records( 2055 struct xfs_btree_cur *cur, 2056 enum xfs_refc_domain domain, 2057 xfs_agblock_t bno, 2058 xfs_extlen_t len, 2059 enum xbtree_recpacking *outcome) 2060 { 2061 union xfs_btree_irec low; 2062 union xfs_btree_irec high; 2063 2064 memset(&low, 0, sizeof(low)); 2065 low.rc.rc_startblock = bno; 2066 memset(&high, 0xFF, sizeof(high)); 2067 high.rc.rc_startblock = bno + len - 1; 2068 low.rc.rc_domain = high.rc.rc_domain = domain; 2069 2070 return xfs_btree_has_records(cur, &low, &high, NULL, outcome); 2071 } 2072 2073 struct xfs_refcount_query_range_info { 2074 xfs_refcount_query_range_fn fn; 2075 void *priv; 2076 }; 2077 2078 /* Format btree record and pass to our callback. */ 2079 STATIC int 2080 xfs_refcount_query_range_helper( 2081 struct xfs_btree_cur *cur, 2082 const union xfs_btree_rec *rec, 2083 void *priv) 2084 { 2085 struct xfs_refcount_query_range_info *query = priv; 2086 struct xfs_refcount_irec irec; 2087 xfs_failaddr_t fa; 2088 2089 xfs_refcount_btrec_to_irec(rec, &irec); 2090 fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec); 2091 if (fa) 2092 return xfs_refcount_complain_bad_rec(cur, fa, &irec); 2093 2094 return query->fn(cur, &irec, query->priv); 2095 } 2096 2097 /* Find all refcount records between two keys. */ 2098 int 2099 xfs_refcount_query_range( 2100 struct xfs_btree_cur *cur, 2101 const struct xfs_refcount_irec *low_rec, 2102 const struct xfs_refcount_irec *high_rec, 2103 xfs_refcount_query_range_fn fn, 2104 void *priv) 2105 { 2106 union xfs_btree_irec low_brec = { .rc = *low_rec }; 2107 union xfs_btree_irec high_brec = { .rc = *high_rec }; 2108 struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn }; 2109 2110 return xfs_btree_query_range(cur, &low_brec, &high_brec, 2111 xfs_refcount_query_range_helper, &query); 2112 } 2113 2114 int __init 2115 xfs_refcount_intent_init_cache(void) 2116 { 2117 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 2118 sizeof(struct xfs_refcount_intent), 2119 0, 0, NULL); 2120 2121 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 2122 } 2123 2124 void 2125 xfs_refcount_intent_destroy_cache(void) 2126 { 2127 kmem_cache_destroy(xfs_refcount_intent_cache); 2128 xfs_refcount_intent_cache = NULL; 2129 } 2130