1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_alloc_btree.h" 16 #include "xfs_alloc.h" 17 #include "xfs_extent_busy.h" 18 #include "xfs_error.h" 19 #include "xfs_health.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_ag.h" 23 24 static struct kmem_cache *xfs_allocbt_cur_cache; 25 26 STATIC struct xfs_btree_cur * 27 xfs_bnobt_dup_cursor( 28 struct xfs_btree_cur *cur) 29 { 30 return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, 31 cur->bc_ag.pag); 32 } 33 34 STATIC struct xfs_btree_cur * 35 xfs_cntbt_dup_cursor( 36 struct xfs_btree_cur *cur) 37 { 38 return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, 39 cur->bc_ag.pag); 40 } 41 42 43 STATIC void 44 xfs_allocbt_set_root( 45 struct xfs_btree_cur *cur, 46 const union xfs_btree_ptr *ptr, 47 int inc) 48 { 49 struct xfs_buf *agbp = cur->bc_ag.agbp; 50 struct xfs_agf *agf = agbp->b_addr; 51 52 ASSERT(ptr->s != 0); 53 54 if (xfs_btree_is_bno(cur->bc_ops)) { 55 agf->agf_bno_root = ptr->s; 56 be32_add_cpu(&agf->agf_bno_level, inc); 57 cur->bc_ag.pag->pagf_bno_level += inc; 58 } else { 59 agf->agf_cnt_root = ptr->s; 60 be32_add_cpu(&agf->agf_cnt_level, inc); 61 cur->bc_ag.pag->pagf_cnt_level += inc; 62 } 63 64 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 65 } 66 67 STATIC int 68 xfs_allocbt_alloc_block( 69 struct xfs_btree_cur *cur, 70 const union xfs_btree_ptr *start, 71 union xfs_btree_ptr *new, 72 int *stat) 73 { 74 int error; 75 xfs_agblock_t bno; 76 77 /* Allocate the new block from the freelist. If we can't, give up. */ 78 error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp, 79 cur->bc_ag.agbp, &bno, 1); 80 if (error) 81 return error; 82 83 if (bno == NULLAGBLOCK) { 84 *stat = 0; 85 return 0; 86 } 87 88 atomic64_inc(&cur->bc_mp->m_allocbt_blks); 89 xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false); 90 91 new->s = cpu_to_be32(bno); 92 93 *stat = 1; 94 return 0; 95 } 96 97 STATIC int 98 xfs_allocbt_free_block( 99 struct xfs_btree_cur *cur, 100 struct xfs_buf *bp) 101 { 102 struct xfs_buf *agbp = cur->bc_ag.agbp; 103 xfs_agblock_t bno; 104 int error; 105 106 bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); 107 error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL, 108 bno, 1); 109 if (error) 110 return error; 111 112 atomic64_dec(&cur->bc_mp->m_allocbt_blks); 113 xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1, 114 XFS_EXTENT_BUSY_SKIP_DISCARD); 115 return 0; 116 } 117 118 STATIC int 119 xfs_allocbt_get_minrecs( 120 struct xfs_btree_cur *cur, 121 int level) 122 { 123 return cur->bc_mp->m_alloc_mnr[level != 0]; 124 } 125 126 STATIC int 127 xfs_allocbt_get_maxrecs( 128 struct xfs_btree_cur *cur, 129 int level) 130 { 131 return cur->bc_mp->m_alloc_mxr[level != 0]; 132 } 133 134 STATIC void 135 xfs_allocbt_init_key_from_rec( 136 union xfs_btree_key *key, 137 const union xfs_btree_rec *rec) 138 { 139 key->alloc.ar_startblock = rec->alloc.ar_startblock; 140 key->alloc.ar_blockcount = rec->alloc.ar_blockcount; 141 } 142 143 STATIC void 144 xfs_bnobt_init_high_key_from_rec( 145 union xfs_btree_key *key, 146 const union xfs_btree_rec *rec) 147 { 148 __u32 x; 149 150 x = be32_to_cpu(rec->alloc.ar_startblock); 151 x += be32_to_cpu(rec->alloc.ar_blockcount) - 1; 152 key->alloc.ar_startblock = cpu_to_be32(x); 153 key->alloc.ar_blockcount = 0; 154 } 155 156 STATIC void 157 xfs_cntbt_init_high_key_from_rec( 158 union xfs_btree_key *key, 159 const union xfs_btree_rec *rec) 160 { 161 key->alloc.ar_blockcount = rec->alloc.ar_blockcount; 162 key->alloc.ar_startblock = 0; 163 } 164 165 STATIC void 166 xfs_allocbt_init_rec_from_cur( 167 struct xfs_btree_cur *cur, 168 union xfs_btree_rec *rec) 169 { 170 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); 171 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); 172 } 173 174 STATIC void 175 xfs_allocbt_init_ptr_from_cur( 176 struct xfs_btree_cur *cur, 177 union xfs_btree_ptr *ptr) 178 { 179 struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; 180 181 ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); 182 183 if (xfs_btree_is_bno(cur->bc_ops)) 184 ptr->s = agf->agf_bno_root; 185 else 186 ptr->s = agf->agf_cnt_root; 187 } 188 189 STATIC int64_t 190 xfs_bnobt_key_diff( 191 struct xfs_btree_cur *cur, 192 const union xfs_btree_key *key) 193 { 194 struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; 195 const struct xfs_alloc_rec *kp = &key->alloc; 196 197 return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; 198 } 199 200 STATIC int64_t 201 xfs_cntbt_key_diff( 202 struct xfs_btree_cur *cur, 203 const union xfs_btree_key *key) 204 { 205 struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; 206 const struct xfs_alloc_rec *kp = &key->alloc; 207 int64_t diff; 208 209 diff = (int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; 210 if (diff) 211 return diff; 212 213 return (int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; 214 } 215 216 STATIC int64_t 217 xfs_bnobt_diff_two_keys( 218 struct xfs_btree_cur *cur, 219 const union xfs_btree_key *k1, 220 const union xfs_btree_key *k2, 221 const union xfs_btree_key *mask) 222 { 223 ASSERT(!mask || mask->alloc.ar_startblock); 224 225 return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) - 226 be32_to_cpu(k2->alloc.ar_startblock); 227 } 228 229 STATIC int64_t 230 xfs_cntbt_diff_two_keys( 231 struct xfs_btree_cur *cur, 232 const union xfs_btree_key *k1, 233 const union xfs_btree_key *k2, 234 const union xfs_btree_key *mask) 235 { 236 int64_t diff; 237 238 ASSERT(!mask || (mask->alloc.ar_blockcount && 239 mask->alloc.ar_startblock)); 240 241 diff = be32_to_cpu(k1->alloc.ar_blockcount) - 242 be32_to_cpu(k2->alloc.ar_blockcount); 243 if (diff) 244 return diff; 245 246 return be32_to_cpu(k1->alloc.ar_startblock) - 247 be32_to_cpu(k2->alloc.ar_startblock); 248 } 249 250 static xfs_failaddr_t 251 xfs_allocbt_verify( 252 struct xfs_buf *bp) 253 { 254 struct xfs_mount *mp = bp->b_mount; 255 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 256 struct xfs_perag *pag = bp->b_pag; 257 xfs_failaddr_t fa; 258 unsigned int level; 259 260 if (!xfs_verify_magic(bp, block->bb_magic)) 261 return __this_address; 262 263 if (xfs_has_crc(mp)) { 264 fa = xfs_btree_agblock_v5hdr_verify(bp); 265 if (fa) 266 return fa; 267 } 268 269 /* 270 * The perag may not be attached during grow operations or fully 271 * initialized from the AGF during log recovery. Therefore we can only 272 * check against maximum tree depth from those contexts. 273 * 274 * Otherwise check against the per-tree limit. Peek at one of the 275 * verifier magic values to determine the type of tree we're verifying 276 * against. 277 */ 278 level = be16_to_cpu(block->bb_level); 279 if (pag && xfs_perag_initialised_agf(pag)) { 280 unsigned int maxlevel, repair_maxlevel = 0; 281 282 /* 283 * Online repair could be rewriting the free space btrees, so 284 * we'll validate against the larger of either tree while this 285 * is going on. 286 */ 287 if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) { 288 maxlevel = pag->pagf_cnt_level; 289 #ifdef CONFIG_XFS_ONLINE_REPAIR 290 repair_maxlevel = pag->pagf_repair_cnt_level; 291 #endif 292 } else { 293 maxlevel = pag->pagf_bno_level; 294 #ifdef CONFIG_XFS_ONLINE_REPAIR 295 repair_maxlevel = pag->pagf_repair_bno_level; 296 #endif 297 } 298 299 if (level >= max(maxlevel, repair_maxlevel)) 300 return __this_address; 301 } else if (level >= mp->m_alloc_maxlevels) 302 return __this_address; 303 304 return xfs_btree_agblock_verify(bp, mp->m_alloc_mxr[level != 0]); 305 } 306 307 static void 308 xfs_allocbt_read_verify( 309 struct xfs_buf *bp) 310 { 311 xfs_failaddr_t fa; 312 313 if (!xfs_btree_agblock_verify_crc(bp)) 314 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 315 else { 316 fa = xfs_allocbt_verify(bp); 317 if (fa) 318 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 319 } 320 321 if (bp->b_error) 322 trace_xfs_btree_corrupt(bp, _RET_IP_); 323 } 324 325 static void 326 xfs_allocbt_write_verify( 327 struct xfs_buf *bp) 328 { 329 xfs_failaddr_t fa; 330 331 fa = xfs_allocbt_verify(bp); 332 if (fa) { 333 trace_xfs_btree_corrupt(bp, _RET_IP_); 334 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 335 return; 336 } 337 xfs_btree_agblock_calc_crc(bp); 338 339 } 340 341 const struct xfs_buf_ops xfs_bnobt_buf_ops = { 342 .name = "xfs_bnobt", 343 .magic = { cpu_to_be32(XFS_ABTB_MAGIC), 344 cpu_to_be32(XFS_ABTB_CRC_MAGIC) }, 345 .verify_read = xfs_allocbt_read_verify, 346 .verify_write = xfs_allocbt_write_verify, 347 .verify_struct = xfs_allocbt_verify, 348 }; 349 350 const struct xfs_buf_ops xfs_cntbt_buf_ops = { 351 .name = "xfs_cntbt", 352 .magic = { cpu_to_be32(XFS_ABTC_MAGIC), 353 cpu_to_be32(XFS_ABTC_CRC_MAGIC) }, 354 .verify_read = xfs_allocbt_read_verify, 355 .verify_write = xfs_allocbt_write_verify, 356 .verify_struct = xfs_allocbt_verify, 357 }; 358 359 STATIC int 360 xfs_bnobt_keys_inorder( 361 struct xfs_btree_cur *cur, 362 const union xfs_btree_key *k1, 363 const union xfs_btree_key *k2) 364 { 365 return be32_to_cpu(k1->alloc.ar_startblock) < 366 be32_to_cpu(k2->alloc.ar_startblock); 367 } 368 369 STATIC int 370 xfs_bnobt_recs_inorder( 371 struct xfs_btree_cur *cur, 372 const union xfs_btree_rec *r1, 373 const union xfs_btree_rec *r2) 374 { 375 return be32_to_cpu(r1->alloc.ar_startblock) + 376 be32_to_cpu(r1->alloc.ar_blockcount) <= 377 be32_to_cpu(r2->alloc.ar_startblock); 378 } 379 380 STATIC int 381 xfs_cntbt_keys_inorder( 382 struct xfs_btree_cur *cur, 383 const union xfs_btree_key *k1, 384 const union xfs_btree_key *k2) 385 { 386 return be32_to_cpu(k1->alloc.ar_blockcount) < 387 be32_to_cpu(k2->alloc.ar_blockcount) || 388 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && 389 be32_to_cpu(k1->alloc.ar_startblock) < 390 be32_to_cpu(k2->alloc.ar_startblock)); 391 } 392 393 STATIC int 394 xfs_cntbt_recs_inorder( 395 struct xfs_btree_cur *cur, 396 const union xfs_btree_rec *r1, 397 const union xfs_btree_rec *r2) 398 { 399 return be32_to_cpu(r1->alloc.ar_blockcount) < 400 be32_to_cpu(r2->alloc.ar_blockcount) || 401 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && 402 be32_to_cpu(r1->alloc.ar_startblock) < 403 be32_to_cpu(r2->alloc.ar_startblock)); 404 } 405 406 STATIC enum xbtree_key_contig 407 xfs_allocbt_keys_contiguous( 408 struct xfs_btree_cur *cur, 409 const union xfs_btree_key *key1, 410 const union xfs_btree_key *key2, 411 const union xfs_btree_key *mask) 412 { 413 ASSERT(!mask || mask->alloc.ar_startblock); 414 415 return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock), 416 be32_to_cpu(key2->alloc.ar_startblock)); 417 } 418 419 const struct xfs_btree_ops xfs_bnobt_ops = { 420 .name = "bno", 421 .type = XFS_BTREE_TYPE_AG, 422 423 .rec_len = sizeof(xfs_alloc_rec_t), 424 .key_len = sizeof(xfs_alloc_key_t), 425 .ptr_len = XFS_BTREE_SHORT_PTR_LEN, 426 427 .lru_refs = XFS_ALLOC_BTREE_REF, 428 .statoff = XFS_STATS_CALC_INDEX(xs_abtb_2), 429 .sick_mask = XFS_SICK_AG_BNOBT, 430 431 .dup_cursor = xfs_bnobt_dup_cursor, 432 .set_root = xfs_allocbt_set_root, 433 .alloc_block = xfs_allocbt_alloc_block, 434 .free_block = xfs_allocbt_free_block, 435 .get_minrecs = xfs_allocbt_get_minrecs, 436 .get_maxrecs = xfs_allocbt_get_maxrecs, 437 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 438 .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec, 439 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 440 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 441 .key_diff = xfs_bnobt_key_diff, 442 .buf_ops = &xfs_bnobt_buf_ops, 443 .diff_two_keys = xfs_bnobt_diff_two_keys, 444 .keys_inorder = xfs_bnobt_keys_inorder, 445 .recs_inorder = xfs_bnobt_recs_inorder, 446 .keys_contiguous = xfs_allocbt_keys_contiguous, 447 }; 448 449 const struct xfs_btree_ops xfs_cntbt_ops = { 450 .name = "cnt", 451 .type = XFS_BTREE_TYPE_AG, 452 453 .rec_len = sizeof(xfs_alloc_rec_t), 454 .key_len = sizeof(xfs_alloc_key_t), 455 .ptr_len = XFS_BTREE_SHORT_PTR_LEN, 456 457 .lru_refs = XFS_ALLOC_BTREE_REF, 458 .statoff = XFS_STATS_CALC_INDEX(xs_abtc_2), 459 .sick_mask = XFS_SICK_AG_CNTBT, 460 461 .dup_cursor = xfs_cntbt_dup_cursor, 462 .set_root = xfs_allocbt_set_root, 463 .alloc_block = xfs_allocbt_alloc_block, 464 .free_block = xfs_allocbt_free_block, 465 .get_minrecs = xfs_allocbt_get_minrecs, 466 .get_maxrecs = xfs_allocbt_get_maxrecs, 467 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 468 .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec, 469 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 470 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 471 .key_diff = xfs_cntbt_key_diff, 472 .buf_ops = &xfs_cntbt_buf_ops, 473 .diff_two_keys = xfs_cntbt_diff_two_keys, 474 .keys_inorder = xfs_cntbt_keys_inorder, 475 .recs_inorder = xfs_cntbt_recs_inorder, 476 .keys_contiguous = NULL, /* not needed right now */ 477 }; 478 479 /* 480 * Allocate a new bnobt cursor. 481 * 482 * For staging cursors tp and agbp are NULL. 483 */ 484 struct xfs_btree_cur * 485 xfs_bnobt_init_cursor( 486 struct xfs_mount *mp, 487 struct xfs_trans *tp, 488 struct xfs_buf *agbp, 489 struct xfs_perag *pag) 490 { 491 struct xfs_btree_cur *cur; 492 493 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops, 494 mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); 495 cur->bc_ag.pag = xfs_perag_hold(pag); 496 cur->bc_ag.agbp = agbp; 497 if (agbp) { 498 struct xfs_agf *agf = agbp->b_addr; 499 500 cur->bc_nlevels = be32_to_cpu(agf->agf_bno_level); 501 } 502 return cur; 503 } 504 505 /* 506 * Allocate a new cntbt cursor. 507 * 508 * For staging cursors tp and agbp are NULL. 509 */ 510 struct xfs_btree_cur * 511 xfs_cntbt_init_cursor( 512 struct xfs_mount *mp, 513 struct xfs_trans *tp, 514 struct xfs_buf *agbp, 515 struct xfs_perag *pag) 516 { 517 struct xfs_btree_cur *cur; 518 519 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops, 520 mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); 521 cur->bc_ag.pag = xfs_perag_hold(pag); 522 cur->bc_ag.agbp = agbp; 523 if (agbp) { 524 struct xfs_agf *agf = agbp->b_addr; 525 526 cur->bc_nlevels = be32_to_cpu(agf->agf_cnt_level); 527 } 528 return cur; 529 } 530 531 /* 532 * Install a new free space btree root. Caller is responsible for invalidating 533 * and freeing the old btree blocks. 534 */ 535 void 536 xfs_allocbt_commit_staged_btree( 537 struct xfs_btree_cur *cur, 538 struct xfs_trans *tp, 539 struct xfs_buf *agbp) 540 { 541 struct xfs_agf *agf = agbp->b_addr; 542 struct xbtree_afakeroot *afake = cur->bc_ag.afake; 543 544 ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 545 546 if (xfs_btree_is_bno(cur->bc_ops)) { 547 agf->agf_bno_root = cpu_to_be32(afake->af_root); 548 agf->agf_bno_level = cpu_to_be32(afake->af_levels); 549 } else { 550 agf->agf_cnt_root = cpu_to_be32(afake->af_root); 551 agf->agf_cnt_level = cpu_to_be32(afake->af_levels); 552 } 553 xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 554 555 xfs_btree_commit_afakeroot(cur, tp, agbp); 556 } 557 558 /* Calculate number of records in an alloc btree block. */ 559 static inline unsigned int 560 xfs_allocbt_block_maxrecs( 561 unsigned int blocklen, 562 bool leaf) 563 { 564 if (leaf) 565 return blocklen / sizeof(xfs_alloc_rec_t); 566 return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); 567 } 568 569 /* 570 * Calculate number of records in an alloc btree block. 571 */ 572 unsigned int 573 xfs_allocbt_maxrecs( 574 struct xfs_mount *mp, 575 unsigned int blocklen, 576 bool leaf) 577 { 578 blocklen -= XFS_ALLOC_BLOCK_LEN(mp); 579 return xfs_allocbt_block_maxrecs(blocklen, leaf); 580 } 581 582 /* Free space btrees are at their largest when every other block is free. */ 583 #define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2) 584 585 /* Compute the max possible height for free space btrees. */ 586 unsigned int 587 xfs_allocbt_maxlevels_ondisk(void) 588 { 589 unsigned int minrecs[2]; 590 unsigned int blocklen; 591 592 blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, 593 XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); 594 595 minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2; 596 minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2; 597 598 return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS); 599 } 600 601 /* Calculate the freespace btree size for some records. */ 602 xfs_extlen_t 603 xfs_allocbt_calc_size( 604 struct xfs_mount *mp, 605 unsigned long long len) 606 { 607 return xfs_btree_calc_size(mp->m_alloc_mnr, len); 608 } 609 610 int __init 611 xfs_allocbt_init_cur_cache(void) 612 { 613 xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur", 614 xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()), 615 0, 0, NULL); 616 617 if (!xfs_allocbt_cur_cache) 618 return -ENOMEM; 619 return 0; 620 } 621 622 void 623 xfs_allocbt_destroy_cur_cache(void) 624 { 625 kmem_cache_destroy(xfs_allocbt_cur_cache); 626 xfs_allocbt_cur_cache = NULL; 627 } 628