1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_alloc_btree.h" 16 #include "xfs_alloc.h" 17 #include "xfs_extent_busy.h" 18 #include "xfs_error.h" 19 #include "xfs_health.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_ag.h" 23 24 static struct kmem_cache *xfs_allocbt_cur_cache; 25 26 STATIC struct xfs_btree_cur * 27 xfs_bnobt_dup_cursor( 28 struct xfs_btree_cur *cur) 29 { 30 return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, 31 to_perag(cur->bc_group)); 32 } 33 34 STATIC struct xfs_btree_cur * 35 xfs_cntbt_dup_cursor( 36 struct xfs_btree_cur *cur) 37 { 38 return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, 39 to_perag(cur->bc_group)); 40 } 41 42 STATIC void 43 xfs_allocbt_set_root( 44 struct xfs_btree_cur *cur, 45 const union xfs_btree_ptr *ptr, 46 int inc) 47 { 48 struct xfs_perag *pag = to_perag(cur->bc_group); 49 struct xfs_buf *agbp = cur->bc_ag.agbp; 50 struct xfs_agf *agf = agbp->b_addr; 51 52 ASSERT(ptr->s != 0); 53 54 if (xfs_btree_is_bno(cur->bc_ops)) { 55 agf->agf_bno_root = ptr->s; 56 be32_add_cpu(&agf->agf_bno_level, inc); 57 pag->pagf_bno_level += inc; 58 } else { 59 agf->agf_cnt_root = ptr->s; 60 be32_add_cpu(&agf->agf_cnt_level, inc); 61 pag->pagf_cnt_level += inc; 62 } 63 64 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 65 } 66 67 STATIC int 68 xfs_allocbt_alloc_block( 69 struct xfs_btree_cur *cur, 70 const union xfs_btree_ptr *start, 71 union xfs_btree_ptr *new, 72 int *stat) 73 { 74 int error; 75 xfs_agblock_t bno; 76 77 /* Allocate the new block from the freelist. If we can't, give up. */ 78 error = xfs_alloc_get_freelist(to_perag(cur->bc_group), cur->bc_tp, 79 cur->bc_ag.agbp, &bno, 1); 80 if (error) 81 return error; 82 83 if (bno == NULLAGBLOCK) { 84 *stat = 0; 85 return 0; 86 } 87 88 atomic64_inc(&cur->bc_mp->m_allocbt_blks); 89 xfs_extent_busy_reuse(cur->bc_group, bno, 1, false); 90 91 new->s = cpu_to_be32(bno); 92 93 *stat = 1; 94 return 0; 95 } 96 97 STATIC int 98 xfs_allocbt_free_block( 99 struct xfs_btree_cur *cur, 100 struct xfs_buf *bp) 101 { 102 struct xfs_buf *agbp = cur->bc_ag.agbp; 103 xfs_agblock_t bno; 104 int error; 105 106 bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); 107 error = xfs_alloc_put_freelist(to_perag(cur->bc_group), cur->bc_tp, 108 agbp, NULL, bno, 1); 109 if (error) 110 return error; 111 112 atomic64_dec(&cur->bc_mp->m_allocbt_blks); 113 xfs_extent_busy_insert(cur->bc_tp, pag_group(agbp->b_pag), bno, 1, 114 XFS_EXTENT_BUSY_SKIP_DISCARD); 115 return 0; 116 } 117 118 STATIC int 119 xfs_allocbt_get_minrecs( 120 struct xfs_btree_cur *cur, 121 int level) 122 { 123 return cur->bc_mp->m_alloc_mnr[level != 0]; 124 } 125 126 STATIC int 127 xfs_allocbt_get_maxrecs( 128 struct xfs_btree_cur *cur, 129 int level) 130 { 131 return cur->bc_mp->m_alloc_mxr[level != 0]; 132 } 133 134 STATIC void 135 xfs_allocbt_init_key_from_rec( 136 union xfs_btree_key *key, 137 const union xfs_btree_rec *rec) 138 { 139 key->alloc.ar_startblock = rec->alloc.ar_startblock; 140 key->alloc.ar_blockcount = rec->alloc.ar_blockcount; 141 } 142 143 STATIC void 144 xfs_bnobt_init_high_key_from_rec( 145 union xfs_btree_key *key, 146 const union xfs_btree_rec *rec) 147 { 148 __u32 x; 149 150 x = be32_to_cpu(rec->alloc.ar_startblock); 151 x += be32_to_cpu(rec->alloc.ar_blockcount) - 1; 152 key->alloc.ar_startblock = cpu_to_be32(x); 153 key->alloc.ar_blockcount = 0; 154 } 155 156 STATIC void 157 xfs_cntbt_init_high_key_from_rec( 158 union xfs_btree_key *key, 159 const union xfs_btree_rec *rec) 160 { 161 key->alloc.ar_blockcount = rec->alloc.ar_blockcount; 162 key->alloc.ar_startblock = 0; 163 } 164 165 STATIC void 166 xfs_allocbt_init_rec_from_cur( 167 struct xfs_btree_cur *cur, 168 union xfs_btree_rec *rec) 169 { 170 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); 171 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); 172 } 173 174 STATIC void 175 xfs_allocbt_init_ptr_from_cur( 176 struct xfs_btree_cur *cur, 177 union xfs_btree_ptr *ptr) 178 { 179 struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; 180 181 ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno)); 182 183 if (xfs_btree_is_bno(cur->bc_ops)) 184 ptr->s = agf->agf_bno_root; 185 else 186 ptr->s = agf->agf_cnt_root; 187 } 188 189 STATIC int 190 xfs_bnobt_cmp_key_with_cur( 191 struct xfs_btree_cur *cur, 192 const union xfs_btree_key *key) 193 { 194 struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; 195 const struct xfs_alloc_rec *kp = &key->alloc; 196 197 return cmp_int(be32_to_cpu(kp->ar_startblock), 198 rec->ar_startblock); 199 } 200 201 STATIC int 202 xfs_cntbt_cmp_key_with_cur( 203 struct xfs_btree_cur *cur, 204 const union xfs_btree_key *key) 205 { 206 struct xfs_alloc_rec_incore *rec = &cur->bc_rec.a; 207 const struct xfs_alloc_rec *kp = &key->alloc; 208 209 return cmp_int(be32_to_cpu(kp->ar_blockcount), rec->ar_blockcount) ?: 210 cmp_int(be32_to_cpu(kp->ar_startblock), rec->ar_startblock); 211 } 212 213 STATIC int 214 xfs_bnobt_cmp_two_keys( 215 struct xfs_btree_cur *cur, 216 const union xfs_btree_key *k1, 217 const union xfs_btree_key *k2, 218 const union xfs_btree_key *mask) 219 { 220 ASSERT(!mask || mask->alloc.ar_startblock); 221 222 return cmp_int(be32_to_cpu(k1->alloc.ar_startblock), 223 be32_to_cpu(k2->alloc.ar_startblock)); 224 } 225 226 STATIC int 227 xfs_cntbt_cmp_two_keys( 228 struct xfs_btree_cur *cur, 229 const union xfs_btree_key *k1, 230 const union xfs_btree_key *k2, 231 const union xfs_btree_key *mask) 232 { 233 ASSERT(!mask || (mask->alloc.ar_blockcount && 234 mask->alloc.ar_startblock)); 235 236 return cmp_int(be32_to_cpu(k1->alloc.ar_blockcount), 237 be32_to_cpu(k2->alloc.ar_blockcount)) ?: 238 cmp_int(be32_to_cpu(k1->alloc.ar_startblock), 239 be32_to_cpu(k2->alloc.ar_startblock)); 240 } 241 242 static xfs_failaddr_t 243 xfs_allocbt_verify( 244 struct xfs_buf *bp) 245 { 246 struct xfs_mount *mp = bp->b_mount; 247 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 248 struct xfs_perag *pag = bp->b_pag; 249 xfs_failaddr_t fa; 250 unsigned int level; 251 252 if (!xfs_verify_magic(bp, block->bb_magic)) 253 return __this_address; 254 255 if (xfs_has_crc(mp)) { 256 fa = xfs_btree_agblock_v5hdr_verify(bp); 257 if (fa) 258 return fa; 259 } 260 261 /* 262 * The perag may not be attached during grow operations or fully 263 * initialized from the AGF during log recovery. Therefore we can only 264 * check against maximum tree depth from those contexts. 265 * 266 * Otherwise check against the per-tree limit. Peek at one of the 267 * verifier magic values to determine the type of tree we're verifying 268 * against. 269 */ 270 level = be16_to_cpu(block->bb_level); 271 if (pag && xfs_perag_initialised_agf(pag)) { 272 unsigned int maxlevel, repair_maxlevel = 0; 273 274 /* 275 * Online repair could be rewriting the free space btrees, so 276 * we'll validate against the larger of either tree while this 277 * is going on. 278 */ 279 if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) { 280 maxlevel = pag->pagf_cnt_level; 281 #ifdef CONFIG_XFS_ONLINE_REPAIR 282 repair_maxlevel = pag->pagf_repair_cnt_level; 283 #endif 284 } else { 285 maxlevel = pag->pagf_bno_level; 286 #ifdef CONFIG_XFS_ONLINE_REPAIR 287 repair_maxlevel = pag->pagf_repair_bno_level; 288 #endif 289 } 290 291 if (level >= max(maxlevel, repair_maxlevel)) 292 return __this_address; 293 } else if (level >= mp->m_alloc_maxlevels) 294 return __this_address; 295 296 return xfs_btree_agblock_verify(bp, mp->m_alloc_mxr[level != 0]); 297 } 298 299 static void 300 xfs_allocbt_read_verify( 301 struct xfs_buf *bp) 302 { 303 xfs_failaddr_t fa; 304 305 if (!xfs_btree_agblock_verify_crc(bp)) 306 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 307 else { 308 fa = xfs_allocbt_verify(bp); 309 if (fa) 310 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 311 } 312 313 if (bp->b_error) 314 trace_xfs_btree_corrupt(bp, _RET_IP_); 315 } 316 317 static void 318 xfs_allocbt_write_verify( 319 struct xfs_buf *bp) 320 { 321 xfs_failaddr_t fa; 322 323 fa = xfs_allocbt_verify(bp); 324 if (fa) { 325 trace_xfs_btree_corrupt(bp, _RET_IP_); 326 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 327 return; 328 } 329 xfs_btree_agblock_calc_crc(bp); 330 331 } 332 333 const struct xfs_buf_ops xfs_bnobt_buf_ops = { 334 .name = "xfs_bnobt", 335 .magic = { cpu_to_be32(XFS_ABTB_MAGIC), 336 cpu_to_be32(XFS_ABTB_CRC_MAGIC) }, 337 .verify_read = xfs_allocbt_read_verify, 338 .verify_write = xfs_allocbt_write_verify, 339 .verify_struct = xfs_allocbt_verify, 340 }; 341 342 const struct xfs_buf_ops xfs_cntbt_buf_ops = { 343 .name = "xfs_cntbt", 344 .magic = { cpu_to_be32(XFS_ABTC_MAGIC), 345 cpu_to_be32(XFS_ABTC_CRC_MAGIC) }, 346 .verify_read = xfs_allocbt_read_verify, 347 .verify_write = xfs_allocbt_write_verify, 348 .verify_struct = xfs_allocbt_verify, 349 }; 350 351 STATIC int 352 xfs_bnobt_keys_inorder( 353 struct xfs_btree_cur *cur, 354 const union xfs_btree_key *k1, 355 const union xfs_btree_key *k2) 356 { 357 return be32_to_cpu(k1->alloc.ar_startblock) < 358 be32_to_cpu(k2->alloc.ar_startblock); 359 } 360 361 STATIC int 362 xfs_bnobt_recs_inorder( 363 struct xfs_btree_cur *cur, 364 const union xfs_btree_rec *r1, 365 const union xfs_btree_rec *r2) 366 { 367 return be32_to_cpu(r1->alloc.ar_startblock) + 368 be32_to_cpu(r1->alloc.ar_blockcount) <= 369 be32_to_cpu(r2->alloc.ar_startblock); 370 } 371 372 STATIC int 373 xfs_cntbt_keys_inorder( 374 struct xfs_btree_cur *cur, 375 const union xfs_btree_key *k1, 376 const union xfs_btree_key *k2) 377 { 378 return be32_to_cpu(k1->alloc.ar_blockcount) < 379 be32_to_cpu(k2->alloc.ar_blockcount) || 380 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && 381 be32_to_cpu(k1->alloc.ar_startblock) < 382 be32_to_cpu(k2->alloc.ar_startblock)); 383 } 384 385 STATIC int 386 xfs_cntbt_recs_inorder( 387 struct xfs_btree_cur *cur, 388 const union xfs_btree_rec *r1, 389 const union xfs_btree_rec *r2) 390 { 391 return be32_to_cpu(r1->alloc.ar_blockcount) < 392 be32_to_cpu(r2->alloc.ar_blockcount) || 393 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && 394 be32_to_cpu(r1->alloc.ar_startblock) < 395 be32_to_cpu(r2->alloc.ar_startblock)); 396 } 397 398 STATIC enum xbtree_key_contig 399 xfs_allocbt_keys_contiguous( 400 struct xfs_btree_cur *cur, 401 const union xfs_btree_key *key1, 402 const union xfs_btree_key *key2, 403 const union xfs_btree_key *mask) 404 { 405 ASSERT(!mask || mask->alloc.ar_startblock); 406 407 return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock), 408 be32_to_cpu(key2->alloc.ar_startblock)); 409 } 410 411 const struct xfs_btree_ops xfs_bnobt_ops = { 412 .name = "bno", 413 .type = XFS_BTREE_TYPE_AG, 414 415 .rec_len = sizeof(xfs_alloc_rec_t), 416 .key_len = sizeof(xfs_alloc_key_t), 417 .ptr_len = XFS_BTREE_SHORT_PTR_LEN, 418 419 .lru_refs = XFS_ALLOC_BTREE_REF, 420 .statoff = XFS_STATS_CALC_INDEX(xs_abtb_2), 421 .sick_mask = XFS_SICK_AG_BNOBT, 422 423 .dup_cursor = xfs_bnobt_dup_cursor, 424 .set_root = xfs_allocbt_set_root, 425 .alloc_block = xfs_allocbt_alloc_block, 426 .free_block = xfs_allocbt_free_block, 427 .get_minrecs = xfs_allocbt_get_minrecs, 428 .get_maxrecs = xfs_allocbt_get_maxrecs, 429 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 430 .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec, 431 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 432 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 433 .cmp_key_with_cur = xfs_bnobt_cmp_key_with_cur, 434 .buf_ops = &xfs_bnobt_buf_ops, 435 .cmp_two_keys = xfs_bnobt_cmp_two_keys, 436 .keys_inorder = xfs_bnobt_keys_inorder, 437 .recs_inorder = xfs_bnobt_recs_inorder, 438 .keys_contiguous = xfs_allocbt_keys_contiguous, 439 }; 440 441 const struct xfs_btree_ops xfs_cntbt_ops = { 442 .name = "cnt", 443 .type = XFS_BTREE_TYPE_AG, 444 445 .rec_len = sizeof(xfs_alloc_rec_t), 446 .key_len = sizeof(xfs_alloc_key_t), 447 .ptr_len = XFS_BTREE_SHORT_PTR_LEN, 448 449 .lru_refs = XFS_ALLOC_BTREE_REF, 450 .statoff = XFS_STATS_CALC_INDEX(xs_abtc_2), 451 .sick_mask = XFS_SICK_AG_CNTBT, 452 453 .dup_cursor = xfs_cntbt_dup_cursor, 454 .set_root = xfs_allocbt_set_root, 455 .alloc_block = xfs_allocbt_alloc_block, 456 .free_block = xfs_allocbt_free_block, 457 .get_minrecs = xfs_allocbt_get_minrecs, 458 .get_maxrecs = xfs_allocbt_get_maxrecs, 459 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 460 .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec, 461 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 462 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 463 .cmp_key_with_cur = xfs_cntbt_cmp_key_with_cur, 464 .buf_ops = &xfs_cntbt_buf_ops, 465 .cmp_two_keys = xfs_cntbt_cmp_two_keys, 466 .keys_inorder = xfs_cntbt_keys_inorder, 467 .recs_inorder = xfs_cntbt_recs_inorder, 468 .keys_contiguous = NULL, /* not needed right now */ 469 }; 470 471 /* 472 * Allocate a new bnobt cursor. 473 * 474 * For staging cursors tp and agbp are NULL. 475 */ 476 struct xfs_btree_cur * 477 xfs_bnobt_init_cursor( 478 struct xfs_mount *mp, 479 struct xfs_trans *tp, 480 struct xfs_buf *agbp, 481 struct xfs_perag *pag) 482 { 483 struct xfs_btree_cur *cur; 484 485 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops, 486 mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); 487 cur->bc_group = xfs_group_hold(pag_group(pag)); 488 cur->bc_ag.agbp = agbp; 489 if (agbp) { 490 struct xfs_agf *agf = agbp->b_addr; 491 492 cur->bc_nlevels = be32_to_cpu(agf->agf_bno_level); 493 } 494 return cur; 495 } 496 497 /* 498 * Allocate a new cntbt cursor. 499 * 500 * For staging cursors tp and agbp are NULL. 501 */ 502 struct xfs_btree_cur * 503 xfs_cntbt_init_cursor( 504 struct xfs_mount *mp, 505 struct xfs_trans *tp, 506 struct xfs_buf *agbp, 507 struct xfs_perag *pag) 508 { 509 struct xfs_btree_cur *cur; 510 511 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops, 512 mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); 513 cur->bc_group = xfs_group_hold(pag_group(pag)); 514 cur->bc_ag.agbp = agbp; 515 if (agbp) { 516 struct xfs_agf *agf = agbp->b_addr; 517 518 cur->bc_nlevels = be32_to_cpu(agf->agf_cnt_level); 519 } 520 return cur; 521 } 522 523 /* 524 * Install a new free space btree root. Caller is responsible for invalidating 525 * and freeing the old btree blocks. 526 */ 527 void 528 xfs_allocbt_commit_staged_btree( 529 struct xfs_btree_cur *cur, 530 struct xfs_trans *tp, 531 struct xfs_buf *agbp) 532 { 533 struct xfs_agf *agf = agbp->b_addr; 534 struct xbtree_afakeroot *afake = cur->bc_ag.afake; 535 536 ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 537 538 if (xfs_btree_is_bno(cur->bc_ops)) { 539 agf->agf_bno_root = cpu_to_be32(afake->af_root); 540 agf->agf_bno_level = cpu_to_be32(afake->af_levels); 541 } else { 542 agf->agf_cnt_root = cpu_to_be32(afake->af_root); 543 agf->agf_cnt_level = cpu_to_be32(afake->af_levels); 544 } 545 xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 546 547 xfs_btree_commit_afakeroot(cur, tp, agbp); 548 } 549 550 /* Calculate number of records in an alloc btree block. */ 551 static inline unsigned int 552 xfs_allocbt_block_maxrecs( 553 unsigned int blocklen, 554 bool leaf) 555 { 556 if (leaf) 557 return blocklen / sizeof(xfs_alloc_rec_t); 558 return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); 559 } 560 561 /* 562 * Calculate number of records in an alloc btree block. 563 */ 564 unsigned int 565 xfs_allocbt_maxrecs( 566 struct xfs_mount *mp, 567 unsigned int blocklen, 568 bool leaf) 569 { 570 blocklen -= XFS_ALLOC_BLOCK_LEN(mp); 571 return xfs_allocbt_block_maxrecs(blocklen, leaf); 572 } 573 574 /* Free space btrees are at their largest when every other block is free. */ 575 #define XFS_MAX_FREESP_RECORDS ((XFS_MAX_AG_BLOCKS + 1) / 2) 576 577 /* Compute the max possible height for free space btrees. */ 578 unsigned int 579 xfs_allocbt_maxlevels_ondisk(void) 580 { 581 unsigned int minrecs[2]; 582 unsigned int blocklen; 583 584 blocklen = min(XFS_MIN_BLOCKSIZE - XFS_BTREE_SBLOCK_LEN, 585 XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN); 586 587 minrecs[0] = xfs_allocbt_block_maxrecs(blocklen, true) / 2; 588 minrecs[1] = xfs_allocbt_block_maxrecs(blocklen, false) / 2; 589 590 return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_FREESP_RECORDS); 591 } 592 593 /* Calculate the freespace btree size for some records. */ 594 xfs_extlen_t 595 xfs_allocbt_calc_size( 596 struct xfs_mount *mp, 597 unsigned long long len) 598 { 599 return xfs_btree_calc_size(mp->m_alloc_mnr, len); 600 } 601 602 int __init 603 xfs_allocbt_init_cur_cache(void) 604 { 605 xfs_allocbt_cur_cache = kmem_cache_create("xfs_bnobt_cur", 606 xfs_btree_cur_sizeof(xfs_allocbt_maxlevels_ondisk()), 607 0, 0, NULL); 608 609 if (!xfs_allocbt_cur_cache) 610 return -ENOMEM; 611 return 0; 612 } 613 614 void 615 xfs_allocbt_destroy_cur_cache(void) 616 { 617 kmem_cache_destroy(xfs_allocbt_cur_cache); 618 xfs_allocbt_cur_cache = NULL; 619 } 620