1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2014 Red Hat, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_trans.h" 14 #include "xfs_alloc.h" 15 #include "xfs_btree.h" 16 #include "xfs_btree_staging.h" 17 #include "xfs_rmap.h" 18 #include "xfs_rmap_btree.h" 19 #include "xfs_trace.h" 20 #include "xfs_error.h" 21 #include "xfs_extent_busy.h" 22 #include "xfs_ag.h" 23 #include "xfs_ag_resv.h" 24 25 static struct kmem_cache *xfs_rmapbt_cur_cache; 26 27 /* 28 * Reverse map btree. 29 * 30 * This is a per-ag tree used to track the owner(s) of a given extent. With 31 * reflink it is possible for there to be multiple owners, which is a departure 32 * from classic XFS. Owner records for data extents are inserted when the 33 * extent is mapped and removed when an extent is unmapped. Owner records for 34 * all other block types (i.e. metadata) are inserted when an extent is 35 * allocated and removed when an extent is freed. There can only be one owner 36 * of a metadata extent, usually an inode or some other metadata structure like 37 * an AG btree. 38 * 39 * The rmap btree is part of the free space management, so blocks for the tree 40 * are sourced from the agfl. Hence we need transaction reservation support for 41 * this tree so that the freelist is always large enough. This also impacts on 42 * the minimum space we need to leave free in the AG. 43 * 44 * The tree is ordered by [ag block, owner, offset]. This is a large key size, 45 * but it is the only way to enforce unique keys when a block can be owned by 46 * multiple files at any offset. There's no need to order/search by extent 47 * size for online updating/management of the tree. It is intended that most 48 * reverse lookups will be to find the owner(s) of a particular block, or to 49 * try to recover tree and file data from corrupt primary metadata. 50 */ 51 52 static struct xfs_btree_cur * 53 xfs_rmapbt_dup_cursor( 54 struct xfs_btree_cur *cur) 55 { 56 return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, 57 cur->bc_ag.agbp, cur->bc_ag.pag); 58 } 59 60 STATIC void 61 xfs_rmapbt_set_root( 62 struct xfs_btree_cur *cur, 63 const union xfs_btree_ptr *ptr, 64 int inc) 65 { 66 struct xfs_buf *agbp = cur->bc_ag.agbp; 67 struct xfs_agf *agf = agbp->b_addr; 68 int btnum = cur->bc_btnum; 69 70 ASSERT(ptr->s != 0); 71 72 agf->agf_roots[btnum] = ptr->s; 73 be32_add_cpu(&agf->agf_levels[btnum], inc); 74 cur->bc_ag.pag->pagf_levels[btnum] += inc; 75 76 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 77 } 78 79 STATIC int 80 xfs_rmapbt_alloc_block( 81 struct xfs_btree_cur *cur, 82 const union xfs_btree_ptr *start, 83 union xfs_btree_ptr *new, 84 int *stat) 85 { 86 struct xfs_buf *agbp = cur->bc_ag.agbp; 87 struct xfs_agf *agf = agbp->b_addr; 88 struct xfs_perag *pag = cur->bc_ag.pag; 89 int error; 90 xfs_agblock_t bno; 91 92 /* Allocate the new block from the freelist. If we can't, give up. */ 93 error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp, 94 &bno, 1); 95 if (error) 96 return error; 97 98 trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1); 99 if (bno == NULLAGBLOCK) { 100 *stat = 0; 101 return 0; 102 } 103 104 xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false); 105 106 new->s = cpu_to_be32(bno); 107 be32_add_cpu(&agf->agf_rmap_blocks, 1); 108 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 109 110 xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno); 111 112 *stat = 1; 113 return 0; 114 } 115 116 STATIC int 117 xfs_rmapbt_free_block( 118 struct xfs_btree_cur *cur, 119 struct xfs_buf *bp) 120 { 121 struct xfs_buf *agbp = cur->bc_ag.agbp; 122 struct xfs_agf *agf = agbp->b_addr; 123 struct xfs_perag *pag = cur->bc_ag.pag; 124 xfs_agblock_t bno; 125 int error; 126 127 bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); 128 trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno, 129 bno, 1); 130 be32_add_cpu(&agf->agf_rmap_blocks, -1); 131 xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 132 error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1); 133 if (error) 134 return error; 135 136 xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1, 137 XFS_EXTENT_BUSY_SKIP_DISCARD); 138 139 xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); 140 return 0; 141 } 142 143 STATIC int 144 xfs_rmapbt_get_minrecs( 145 struct xfs_btree_cur *cur, 146 int level) 147 { 148 return cur->bc_mp->m_rmap_mnr[level != 0]; 149 } 150 151 STATIC int 152 xfs_rmapbt_get_maxrecs( 153 struct xfs_btree_cur *cur, 154 int level) 155 { 156 return cur->bc_mp->m_rmap_mxr[level != 0]; 157 } 158 159 /* 160 * Convert the ondisk record's offset field into the ondisk key's offset field. 161 * Fork and bmbt are significant parts of the rmap record key, but written 162 * status is merely a record attribute. 163 */ 164 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec) 165 { 166 return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN); 167 } 168 169 STATIC void 170 xfs_rmapbt_init_key_from_rec( 171 union xfs_btree_key *key, 172 const union xfs_btree_rec *rec) 173 { 174 key->rmap.rm_startblock = rec->rmap.rm_startblock; 175 key->rmap.rm_owner = rec->rmap.rm_owner; 176 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); 177 } 178 179 /* 180 * The high key for a reverse mapping record can be computed by shifting 181 * the startblock and offset to the highest value that would still map 182 * to that record. In practice this means that we add blockcount-1 to 183 * the startblock for all records, and if the record is for a data/attr 184 * fork mapping, we add blockcount-1 to the offset too. 185 */ 186 STATIC void 187 xfs_rmapbt_init_high_key_from_rec( 188 union xfs_btree_key *key, 189 const union xfs_btree_rec *rec) 190 { 191 uint64_t off; 192 int adj; 193 194 adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; 195 196 key->rmap.rm_startblock = rec->rmap.rm_startblock; 197 be32_add_cpu(&key->rmap.rm_startblock, adj); 198 key->rmap.rm_owner = rec->rmap.rm_owner; 199 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); 200 if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || 201 XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) 202 return; 203 off = be64_to_cpu(key->rmap.rm_offset); 204 off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); 205 key->rmap.rm_offset = cpu_to_be64(off); 206 } 207 208 STATIC void 209 xfs_rmapbt_init_rec_from_cur( 210 struct xfs_btree_cur *cur, 211 union xfs_btree_rec *rec) 212 { 213 rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); 214 rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); 215 rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); 216 rec->rmap.rm_offset = cpu_to_be64( 217 xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); 218 } 219 220 STATIC void 221 xfs_rmapbt_init_ptr_from_cur( 222 struct xfs_btree_cur *cur, 223 union xfs_btree_ptr *ptr) 224 { 225 struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; 226 227 ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); 228 229 ptr->s = agf->agf_roots[cur->bc_btnum]; 230 } 231 232 /* 233 * Mask the appropriate parts of the ondisk key field for a key comparison. 234 * Fork and bmbt are significant parts of the rmap record key, but written 235 * status is merely a record attribute. 236 */ 237 static inline uint64_t offset_keymask(uint64_t offset) 238 { 239 return offset & ~XFS_RMAP_OFF_UNWRITTEN; 240 } 241 242 STATIC int64_t 243 xfs_rmapbt_key_diff( 244 struct xfs_btree_cur *cur, 245 const union xfs_btree_key *key) 246 { 247 struct xfs_rmap_irec *rec = &cur->bc_rec.r; 248 const struct xfs_rmap_key *kp = &key->rmap; 249 __u64 x, y; 250 int64_t d; 251 252 d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; 253 if (d) 254 return d; 255 256 x = be64_to_cpu(kp->rm_owner); 257 y = rec->rm_owner; 258 if (x > y) 259 return 1; 260 else if (y > x) 261 return -1; 262 263 x = offset_keymask(be64_to_cpu(kp->rm_offset)); 264 y = offset_keymask(xfs_rmap_irec_offset_pack(rec)); 265 if (x > y) 266 return 1; 267 else if (y > x) 268 return -1; 269 return 0; 270 } 271 272 STATIC int64_t 273 xfs_rmapbt_diff_two_keys( 274 struct xfs_btree_cur *cur, 275 const union xfs_btree_key *k1, 276 const union xfs_btree_key *k2, 277 const union xfs_btree_key *mask) 278 { 279 const struct xfs_rmap_key *kp1 = &k1->rmap; 280 const struct xfs_rmap_key *kp2 = &k2->rmap; 281 int64_t d; 282 __u64 x, y; 283 284 /* Doesn't make sense to mask off the physical space part */ 285 ASSERT(!mask || mask->rmap.rm_startblock); 286 287 d = (int64_t)be32_to_cpu(kp1->rm_startblock) - 288 be32_to_cpu(kp2->rm_startblock); 289 if (d) 290 return d; 291 292 if (!mask || mask->rmap.rm_owner) { 293 x = be64_to_cpu(kp1->rm_owner); 294 y = be64_to_cpu(kp2->rm_owner); 295 if (x > y) 296 return 1; 297 else if (y > x) 298 return -1; 299 } 300 301 if (!mask || mask->rmap.rm_offset) { 302 /* Doesn't make sense to allow offset but not owner */ 303 ASSERT(!mask || mask->rmap.rm_owner); 304 305 x = offset_keymask(be64_to_cpu(kp1->rm_offset)); 306 y = offset_keymask(be64_to_cpu(kp2->rm_offset)); 307 if (x > y) 308 return 1; 309 else if (y > x) 310 return -1; 311 } 312 313 return 0; 314 } 315 316 static xfs_failaddr_t 317 xfs_rmapbt_verify( 318 struct xfs_buf *bp) 319 { 320 struct xfs_mount *mp = bp->b_mount; 321 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 322 struct xfs_perag *pag = bp->b_pag; 323 xfs_failaddr_t fa; 324 unsigned int level; 325 326 /* 327 * magic number and level verification 328 * 329 * During growfs operations, we can't verify the exact level or owner as 330 * the perag is not fully initialised and hence not attached to the 331 * buffer. In this case, check against the maximum tree depth. 332 * 333 * Similarly, during log recovery we will have a perag structure 334 * attached, but the agf information will not yet have been initialised 335 * from the on disk AGF. Again, we can only check against maximum limits 336 * in this case. 337 */ 338 if (!xfs_verify_magic(bp, block->bb_magic)) 339 return __this_address; 340 341 if (!xfs_has_rmapbt(mp)) 342 return __this_address; 343 fa = xfs_btree_sblock_v5hdr_verify(bp); 344 if (fa) 345 return fa; 346 347 level = be16_to_cpu(block->bb_level); 348 if (pag && xfs_perag_initialised_agf(pag)) { 349 if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 350 return __this_address; 351 } else if (level >= mp->m_rmap_maxlevels) 352 return __this_address; 353 354 return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 355 } 356 357 static void 358 xfs_rmapbt_read_verify( 359 struct xfs_buf *bp) 360 { 361 xfs_failaddr_t fa; 362 363 if (!xfs_btree_sblock_verify_crc(bp)) 364 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 365 else { 366 fa = xfs_rmapbt_verify(bp); 367 if (fa) 368 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 369 } 370 371 if (bp->b_error) 372 trace_xfs_btree_corrupt(bp, _RET_IP_); 373 } 374 375 static void 376 xfs_rmapbt_write_verify( 377 struct xfs_buf *bp) 378 { 379 xfs_failaddr_t fa; 380 381 fa = xfs_rmapbt_verify(bp); 382 if (fa) { 383 trace_xfs_btree_corrupt(bp, _RET_IP_); 384 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 385 return; 386 } 387 xfs_btree_sblock_calc_crc(bp); 388 389 } 390 391 const struct xfs_buf_ops xfs_rmapbt_buf_ops = { 392 .name = "xfs_rmapbt", 393 .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) }, 394 .verify_read = xfs_rmapbt_read_verify, 395 .verify_write = xfs_rmapbt_write_verify, 396 .verify_struct = xfs_rmapbt_verify, 397 }; 398 399 STATIC int 400 xfs_rmapbt_keys_inorder( 401 struct xfs_btree_cur *cur, 402 const union xfs_btree_key *k1, 403 const union xfs_btree_key *k2) 404 { 405 uint32_t x; 406 uint32_t y; 407 uint64_t a; 408 uint64_t b; 409 410 x = be32_to_cpu(k1->rmap.rm_startblock); 411 y = be32_to_cpu(k2->rmap.rm_startblock); 412 if (x < y) 413 return 1; 414 else if (x > y) 415 return 0; 416 a = be64_to_cpu(k1->rmap.rm_owner); 417 b = be64_to_cpu(k2->rmap.rm_owner); 418 if (a < b) 419 return 1; 420 else if (a > b) 421 return 0; 422 a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset)); 423 b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset)); 424 if (a <= b) 425 return 1; 426 return 0; 427 } 428 429 STATIC int 430 xfs_rmapbt_recs_inorder( 431 struct xfs_btree_cur *cur, 432 const union xfs_btree_rec *r1, 433 const union xfs_btree_rec *r2) 434 { 435 uint32_t x; 436 uint32_t y; 437 uint64_t a; 438 uint64_t b; 439 440 x = be32_to_cpu(r1->rmap.rm_startblock); 441 y = be32_to_cpu(r2->rmap.rm_startblock); 442 if (x < y) 443 return 1; 444 else if (x > y) 445 return 0; 446 a = be64_to_cpu(r1->rmap.rm_owner); 447 b = be64_to_cpu(r2->rmap.rm_owner); 448 if (a < b) 449 return 1; 450 else if (a > b) 451 return 0; 452 a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset)); 453 b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset)); 454 if (a <= b) 455 return 1; 456 return 0; 457 } 458 459 STATIC enum xbtree_key_contig 460 xfs_rmapbt_keys_contiguous( 461 struct xfs_btree_cur *cur, 462 const union xfs_btree_key *key1, 463 const union xfs_btree_key *key2, 464 const union xfs_btree_key *mask) 465 { 466 ASSERT(!mask || mask->rmap.rm_startblock); 467 468 /* 469 * We only support checking contiguity of the physical space component. 470 * If any callers ever need more specificity than that, they'll have to 471 * implement it here. 472 */ 473 ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset)); 474 475 return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock), 476 be32_to_cpu(key2->rmap.rm_startblock)); 477 } 478 479 static const struct xfs_btree_ops xfs_rmapbt_ops = { 480 .rec_len = sizeof(struct xfs_rmap_rec), 481 .key_len = 2 * sizeof(struct xfs_rmap_key), 482 483 .dup_cursor = xfs_rmapbt_dup_cursor, 484 .set_root = xfs_rmapbt_set_root, 485 .alloc_block = xfs_rmapbt_alloc_block, 486 .free_block = xfs_rmapbt_free_block, 487 .get_minrecs = xfs_rmapbt_get_minrecs, 488 .get_maxrecs = xfs_rmapbt_get_maxrecs, 489 .init_key_from_rec = xfs_rmapbt_init_key_from_rec, 490 .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, 491 .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, 492 .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur, 493 .key_diff = xfs_rmapbt_key_diff, 494 .buf_ops = &xfs_rmapbt_buf_ops, 495 .diff_two_keys = xfs_rmapbt_diff_two_keys, 496 .keys_inorder = xfs_rmapbt_keys_inorder, 497 .recs_inorder = xfs_rmapbt_recs_inorder, 498 .keys_contiguous = xfs_rmapbt_keys_contiguous, 499 }; 500 501 static struct xfs_btree_cur * 502 xfs_rmapbt_init_common( 503 struct xfs_mount *mp, 504 struct xfs_trans *tp, 505 struct xfs_perag *pag) 506 { 507 struct xfs_btree_cur *cur; 508 509 /* Overlapping btree; 2 keys per pointer. */ 510 cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, 511 mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache); 512 cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; 513 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); 514 cur->bc_ops = &xfs_rmapbt_ops; 515 516 cur->bc_ag.pag = xfs_perag_hold(pag); 517 return cur; 518 } 519 520 /* Create a new reverse mapping btree cursor. */ 521 struct xfs_btree_cur * 522 xfs_rmapbt_init_cursor( 523 struct xfs_mount *mp, 524 struct xfs_trans *tp, 525 struct xfs_buf *agbp, 526 struct xfs_perag *pag) 527 { 528 struct xfs_agf *agf = agbp->b_addr; 529 struct xfs_btree_cur *cur; 530 531 cur = xfs_rmapbt_init_common(mp, tp, pag); 532 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); 533 cur->bc_ag.agbp = agbp; 534 return cur; 535 } 536 537 /* Create a new reverse mapping btree cursor with a fake root for staging. */ 538 struct xfs_btree_cur * 539 xfs_rmapbt_stage_cursor( 540 struct xfs_mount *mp, 541 struct xbtree_afakeroot *afake, 542 struct xfs_perag *pag) 543 { 544 struct xfs_btree_cur *cur; 545 546 cur = xfs_rmapbt_init_common(mp, NULL, pag); 547 xfs_btree_stage_afakeroot(cur, afake); 548 return cur; 549 } 550 551 /* 552 * Install a new reverse mapping btree root. Caller is responsible for 553 * invalidating and freeing the old btree blocks. 554 */ 555 void 556 xfs_rmapbt_commit_staged_btree( 557 struct xfs_btree_cur *cur, 558 struct xfs_trans *tp, 559 struct xfs_buf *agbp) 560 { 561 struct xfs_agf *agf = agbp->b_addr; 562 struct xbtree_afakeroot *afake = cur->bc_ag.afake; 563 564 ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 565 566 agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); 567 agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); 568 agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks); 569 xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS | 570 XFS_AGF_RMAP_BLOCKS); 571 xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); 572 } 573 574 /* Calculate number of records in a reverse mapping btree block. */ 575 static inline unsigned int 576 xfs_rmapbt_block_maxrecs( 577 unsigned int blocklen, 578 bool leaf) 579 { 580 if (leaf) 581 return blocklen / sizeof(struct xfs_rmap_rec); 582 return blocklen / 583 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); 584 } 585 586 /* 587 * Calculate number of records in an rmap btree block. 588 */ 589 int 590 xfs_rmapbt_maxrecs( 591 int blocklen, 592 int leaf) 593 { 594 blocklen -= XFS_RMAP_BLOCK_LEN; 595 return xfs_rmapbt_block_maxrecs(blocklen, leaf); 596 } 597 598 /* Compute the max possible height for reverse mapping btrees. */ 599 unsigned int 600 xfs_rmapbt_maxlevels_ondisk(void) 601 { 602 unsigned int minrecs[2]; 603 unsigned int blocklen; 604 605 blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN; 606 607 minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2; 608 minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2; 609 610 /* 611 * Compute the asymptotic maxlevels for an rmapbt on any reflink fs. 612 * 613 * On a reflink filesystem, each AG block can have up to 2^32 (per the 614 * refcount record format) owners, which means that theoretically we 615 * could face up to 2^64 rmap records. However, we're likely to run 616 * out of blocks in the AG long before that happens, which means that 617 * we must compute the max height based on what the btree will look 618 * like if it consumes almost all the blocks in the AG due to maximal 619 * sharing factor. 620 */ 621 return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS); 622 } 623 624 /* Compute the maximum height of an rmap btree. */ 625 void 626 xfs_rmapbt_compute_maxlevels( 627 struct xfs_mount *mp) 628 { 629 if (!xfs_has_rmapbt(mp)) { 630 mp->m_rmap_maxlevels = 0; 631 return; 632 } 633 634 if (xfs_has_reflink(mp)) { 635 /* 636 * Compute the asymptotic maxlevels for an rmap btree on a 637 * filesystem that supports reflink. 638 * 639 * On a reflink filesystem, each AG block can have up to 2^32 640 * (per the refcount record format) owners, which means that 641 * theoretically we could face up to 2^64 rmap records. 642 * However, we're likely to run out of blocks in the AG long 643 * before that happens, which means that we must compute the 644 * max height based on what the btree will look like if it 645 * consumes almost all the blocks in the AG due to maximal 646 * sharing factor. 647 */ 648 mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr, 649 mp->m_sb.sb_agblocks); 650 } else { 651 /* 652 * If there's no block sharing, compute the maximum rmapbt 653 * height assuming one rmap record per AG block. 654 */ 655 mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( 656 mp->m_rmap_mnr, mp->m_sb.sb_agblocks); 657 } 658 ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk()); 659 } 660 661 /* Calculate the refcount btree size for some records. */ 662 xfs_extlen_t 663 xfs_rmapbt_calc_size( 664 struct xfs_mount *mp, 665 unsigned long long len) 666 { 667 return xfs_btree_calc_size(mp->m_rmap_mnr, len); 668 } 669 670 /* 671 * Calculate the maximum refcount btree size. 672 */ 673 xfs_extlen_t 674 xfs_rmapbt_max_size( 675 struct xfs_mount *mp, 676 xfs_agblock_t agblocks) 677 { 678 /* Bail out if we're uninitialized, which can happen in mkfs. */ 679 if (mp->m_rmap_mxr[0] == 0) 680 return 0; 681 682 return xfs_rmapbt_calc_size(mp, agblocks); 683 } 684 685 /* 686 * Figure out how many blocks to reserve and how many are used by this btree. 687 */ 688 int 689 xfs_rmapbt_calc_reserves( 690 struct xfs_mount *mp, 691 struct xfs_trans *tp, 692 struct xfs_perag *pag, 693 xfs_extlen_t *ask, 694 xfs_extlen_t *used) 695 { 696 struct xfs_buf *agbp; 697 struct xfs_agf *agf; 698 xfs_agblock_t agblocks; 699 xfs_extlen_t tree_len; 700 int error; 701 702 if (!xfs_has_rmapbt(mp)) 703 return 0; 704 705 error = xfs_alloc_read_agf(pag, tp, 0, &agbp); 706 if (error) 707 return error; 708 709 agf = agbp->b_addr; 710 agblocks = be32_to_cpu(agf->agf_length); 711 tree_len = be32_to_cpu(agf->agf_rmap_blocks); 712 xfs_trans_brelse(tp, agbp); 713 714 /* 715 * The log is permanently allocated, so the space it occupies will 716 * never be available for the kinds of things that would require btree 717 * expansion. We therefore can pretend the space isn't there. 718 */ 719 if (xfs_ag_contains_log(mp, pag->pag_agno)) 720 agblocks -= mp->m_sb.sb_logblocks; 721 722 /* Reserve 1% of the AG or enough for 1 block per record. */ 723 *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); 724 *used += tree_len; 725 726 return error; 727 } 728 729 int __init 730 xfs_rmapbt_init_cur_cache(void) 731 { 732 xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur", 733 xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()), 734 0, 0, NULL); 735 736 if (!xfs_rmapbt_cur_cache) 737 return -ENOMEM; 738 return 0; 739 } 740 741 void 742 xfs_rmapbt_destroy_cur_cache(void) 743 { 744 kmem_cache_destroy(xfs_rmapbt_cur_cache); 745 xfs_rmapbt_cur_cache = NULL; 746 } 747