1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_inode.h" 17 #include "xfs_trans.h" 18 #include "xfs_alloc.h" 19 #include "xfs_btree.h" 20 #include "xfs_btree_staging.h" 21 #include "xfs_metafile.h" 22 #include "xfs_rmap.h" 23 #include "xfs_rtrmap_btree.h" 24 #include "xfs_trace.h" 25 #include "xfs_cksum.h" 26 #include "xfs_error.h" 27 #include "xfs_extent_busy.h" 28 #include "xfs_rtgroup.h" 29 #include "xfs_bmap.h" 30 #include "xfs_health.h" 31 #include "xfs_buf_mem.h" 32 #include "xfs_btree_mem.h" 33 34 static struct kmem_cache *xfs_rtrmapbt_cur_cache; 35 36 /* 37 * Realtime Reverse Map btree. 38 * 39 * This is a btree used to track the owner(s) of a given extent in the realtime 40 * device. See the comments in xfs_rmap_btree.c for more information. 41 * 42 * This tree is basically the same as the regular rmap btree except that it 43 * is rooted in an inode and does not live in free space. 44 */ 45 46 static struct xfs_btree_cur * 47 xfs_rtrmapbt_dup_cursor( 48 struct xfs_btree_cur *cur) 49 { 50 return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group)); 51 } 52 53 STATIC int 54 xfs_rtrmapbt_get_minrecs( 55 struct xfs_btree_cur *cur, 56 int level) 57 { 58 if (level == cur->bc_nlevels - 1) { 59 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 60 61 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, 62 level == 0) / 2; 63 } 64 65 return cur->bc_mp->m_rtrmap_mnr[level != 0]; 66 } 67 68 STATIC int 69 xfs_rtrmapbt_get_maxrecs( 70 struct xfs_btree_cur *cur, 71 int level) 72 { 73 if (level == cur->bc_nlevels - 1) { 74 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 75 76 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, 77 level == 0); 78 } 79 80 return cur->bc_mp->m_rtrmap_mxr[level != 0]; 81 } 82 83 /* Calculate number of records in the ondisk realtime rmap btree inode root. */ 84 unsigned int 85 xfs_rtrmapbt_droot_maxrecs( 86 unsigned int blocklen, 87 bool leaf) 88 { 89 blocklen -= sizeof(struct xfs_rtrmap_root); 90 91 if (leaf) 92 return blocklen / sizeof(struct xfs_rmap_rec); 93 return blocklen / (2 * sizeof(struct xfs_rmap_key) + 94 sizeof(xfs_rtrmap_ptr_t)); 95 } 96 97 /* 98 * Get the maximum records we could store in the on-disk format. 99 * 100 * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but 101 * for the root node this checks the available space in the dinode fork 102 * so that we can resize the in-memory buffer to match it. After a 103 * resize to the maximum size this function returns the same value 104 * as xfs_rtrmapbt_get_maxrecs for the root node, too. 105 */ 106 STATIC int 107 xfs_rtrmapbt_get_dmaxrecs( 108 struct xfs_btree_cur *cur, 109 int level) 110 { 111 if (level != cur->bc_nlevels - 1) 112 return cur->bc_mp->m_rtrmap_mxr[level != 0]; 113 return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0); 114 } 115 116 /* 117 * Convert the ondisk record's offset field into the ondisk key's offset field. 118 * Fork and bmbt are significant parts of the rmap record key, but written 119 * status is merely a record attribute. 120 */ 121 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec) 122 { 123 return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN); 124 } 125 126 STATIC void 127 xfs_rtrmapbt_init_key_from_rec( 128 union xfs_btree_key *key, 129 const union xfs_btree_rec *rec) 130 { 131 key->rmap.rm_startblock = rec->rmap.rm_startblock; 132 key->rmap.rm_owner = rec->rmap.rm_owner; 133 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); 134 } 135 136 STATIC void 137 xfs_rtrmapbt_init_high_key_from_rec( 138 union xfs_btree_key *key, 139 const union xfs_btree_rec *rec) 140 { 141 uint64_t off; 142 int adj; 143 144 adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; 145 146 key->rmap.rm_startblock = rec->rmap.rm_startblock; 147 be32_add_cpu(&key->rmap.rm_startblock, adj); 148 key->rmap.rm_owner = rec->rmap.rm_owner; 149 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); 150 if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || 151 XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) 152 return; 153 off = be64_to_cpu(key->rmap.rm_offset); 154 off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); 155 key->rmap.rm_offset = cpu_to_be64(off); 156 } 157 158 STATIC void 159 xfs_rtrmapbt_init_rec_from_cur( 160 struct xfs_btree_cur *cur, 161 union xfs_btree_rec *rec) 162 { 163 rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); 164 rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); 165 rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); 166 rec->rmap.rm_offset = cpu_to_be64( 167 xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); 168 } 169 170 STATIC void 171 xfs_rtrmapbt_init_ptr_from_cur( 172 struct xfs_btree_cur *cur, 173 union xfs_btree_ptr *ptr) 174 { 175 ptr->l = 0; 176 } 177 178 /* 179 * Mask the appropriate parts of the ondisk key field for a key comparison. 180 * Fork and bmbt are significant parts of the rmap record key, but written 181 * status is merely a record attribute. 182 */ 183 static inline uint64_t offset_keymask(uint64_t offset) 184 { 185 return offset & ~XFS_RMAP_OFF_UNWRITTEN; 186 } 187 188 STATIC int 189 xfs_rtrmapbt_cmp_key_with_cur( 190 struct xfs_btree_cur *cur, 191 const union xfs_btree_key *key) 192 { 193 struct xfs_rmap_irec *rec = &cur->bc_rec.r; 194 const struct xfs_rmap_key *kp = &key->rmap; 195 196 return cmp_int(be32_to_cpu(kp->rm_startblock), rec->rm_startblock) ?: 197 cmp_int(be64_to_cpu(kp->rm_owner), rec->rm_owner) ?: 198 cmp_int(offset_keymask(be64_to_cpu(kp->rm_offset)), 199 offset_keymask(xfs_rmap_irec_offset_pack(rec))); 200 } 201 202 STATIC int 203 xfs_rtrmapbt_cmp_two_keys( 204 struct xfs_btree_cur *cur, 205 const union xfs_btree_key *k1, 206 const union xfs_btree_key *k2, 207 const union xfs_btree_key *mask) 208 { 209 const struct xfs_rmap_key *kp1 = &k1->rmap; 210 const struct xfs_rmap_key *kp2 = &k2->rmap; 211 int d; 212 213 /* Doesn't make sense to mask off the physical space part */ 214 ASSERT(!mask || mask->rmap.rm_startblock); 215 216 d = cmp_int(be32_to_cpu(kp1->rm_startblock), 217 be32_to_cpu(kp2->rm_startblock)); 218 if (d) 219 return d; 220 221 if (!mask || mask->rmap.rm_owner) { 222 d = cmp_int(be64_to_cpu(kp1->rm_owner), 223 be64_to_cpu(kp2->rm_owner)); 224 if (d) 225 return d; 226 } 227 228 if (!mask || mask->rmap.rm_offset) { 229 /* Doesn't make sense to allow offset but not owner */ 230 ASSERT(!mask || mask->rmap.rm_owner); 231 232 d = cmp_int(offset_keymask(be64_to_cpu(kp1->rm_offset)), 233 offset_keymask(be64_to_cpu(kp2->rm_offset))); 234 if (d) 235 return d; 236 } 237 238 return 0; 239 } 240 241 static xfs_failaddr_t 242 xfs_rtrmapbt_verify( 243 struct xfs_buf *bp) 244 { 245 struct xfs_mount *mp = bp->b_target->bt_mount; 246 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 247 xfs_failaddr_t fa; 248 int level; 249 250 if (!xfs_verify_magic(bp, block->bb_magic)) 251 return __this_address; 252 253 if (!xfs_has_rmapbt(mp)) 254 return __this_address; 255 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 256 if (fa) 257 return fa; 258 level = be16_to_cpu(block->bb_level); 259 if (level > mp->m_rtrmap_maxlevels) 260 return __this_address; 261 262 return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]); 263 } 264 265 static void 266 xfs_rtrmapbt_read_verify( 267 struct xfs_buf *bp) 268 { 269 xfs_failaddr_t fa; 270 271 if (!xfs_btree_fsblock_verify_crc(bp)) 272 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 273 else { 274 fa = xfs_rtrmapbt_verify(bp); 275 if (fa) 276 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 277 } 278 279 if (bp->b_error) 280 trace_xfs_btree_corrupt(bp, _RET_IP_); 281 } 282 283 static void 284 xfs_rtrmapbt_write_verify( 285 struct xfs_buf *bp) 286 { 287 xfs_failaddr_t fa; 288 289 fa = xfs_rtrmapbt_verify(bp); 290 if (fa) { 291 trace_xfs_btree_corrupt(bp, _RET_IP_); 292 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 293 return; 294 } 295 xfs_btree_fsblock_calc_crc(bp); 296 297 } 298 299 const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = { 300 .name = "xfs_rtrmapbt", 301 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) }, 302 .verify_read = xfs_rtrmapbt_read_verify, 303 .verify_write = xfs_rtrmapbt_write_verify, 304 .verify_struct = xfs_rtrmapbt_verify, 305 }; 306 307 STATIC int 308 xfs_rtrmapbt_keys_inorder( 309 struct xfs_btree_cur *cur, 310 const union xfs_btree_key *k1, 311 const union xfs_btree_key *k2) 312 { 313 uint32_t x; 314 uint32_t y; 315 uint64_t a; 316 uint64_t b; 317 318 x = be32_to_cpu(k1->rmap.rm_startblock); 319 y = be32_to_cpu(k2->rmap.rm_startblock); 320 if (x < y) 321 return 1; 322 else if (x > y) 323 return 0; 324 a = be64_to_cpu(k1->rmap.rm_owner); 325 b = be64_to_cpu(k2->rmap.rm_owner); 326 if (a < b) 327 return 1; 328 else if (a > b) 329 return 0; 330 a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset)); 331 b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset)); 332 if (a <= b) 333 return 1; 334 return 0; 335 } 336 337 STATIC int 338 xfs_rtrmapbt_recs_inorder( 339 struct xfs_btree_cur *cur, 340 const union xfs_btree_rec *r1, 341 const union xfs_btree_rec *r2) 342 { 343 uint32_t x; 344 uint32_t y; 345 uint64_t a; 346 uint64_t b; 347 348 x = be32_to_cpu(r1->rmap.rm_startblock); 349 y = be32_to_cpu(r2->rmap.rm_startblock); 350 if (x < y) 351 return 1; 352 else if (x > y) 353 return 0; 354 a = be64_to_cpu(r1->rmap.rm_owner); 355 b = be64_to_cpu(r2->rmap.rm_owner); 356 if (a < b) 357 return 1; 358 else if (a > b) 359 return 0; 360 a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset)); 361 b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset)); 362 if (a <= b) 363 return 1; 364 return 0; 365 } 366 367 STATIC enum xbtree_key_contig 368 xfs_rtrmapbt_keys_contiguous( 369 struct xfs_btree_cur *cur, 370 const union xfs_btree_key *key1, 371 const union xfs_btree_key *key2, 372 const union xfs_btree_key *mask) 373 { 374 ASSERT(!mask || mask->rmap.rm_startblock); 375 376 /* 377 * We only support checking contiguity of the physical space component. 378 * If any callers ever need more specificity than that, they'll have to 379 * implement it here. 380 */ 381 ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset)); 382 383 return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock), 384 be32_to_cpu(key2->rmap.rm_startblock)); 385 } 386 387 static inline void 388 xfs_rtrmapbt_move_ptrs( 389 struct xfs_mount *mp, 390 struct xfs_btree_block *broot, 391 short old_size, 392 size_t new_size, 393 unsigned int numrecs) 394 { 395 void *dptr; 396 void *sptr; 397 398 sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size); 399 dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size); 400 memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t)); 401 } 402 403 static struct xfs_btree_block * 404 xfs_rtrmapbt_broot_realloc( 405 struct xfs_btree_cur *cur, 406 unsigned int new_numrecs) 407 { 408 struct xfs_mount *mp = cur->bc_mp; 409 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 410 struct xfs_btree_block *broot; 411 unsigned int new_size; 412 unsigned int old_size = ifp->if_broot_bytes; 413 const unsigned int level = cur->bc_nlevels - 1; 414 415 new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs); 416 417 /* Handle the nop case quietly. */ 418 if (new_size == old_size) 419 return ifp->if_broot; 420 421 if (new_size > old_size) { 422 unsigned int old_numrecs; 423 424 /* 425 * If there wasn't any memory allocated before, just allocate 426 * it now and get out. 427 */ 428 if (old_size == 0) 429 return xfs_broot_realloc(ifp, new_size); 430 431 /* 432 * If there is already an existing if_broot, then we need to 433 * realloc it and possibly move the node block pointers because 434 * those are not butted up against the btree block header. 435 */ 436 old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0); 437 broot = xfs_broot_realloc(ifp, new_size); 438 if (level > 0) 439 xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size, 440 old_numrecs); 441 goto out_broot; 442 } 443 444 /* 445 * We're reducing numrecs. If we're going all the way to zero, just 446 * free the block. 447 */ 448 ASSERT(ifp->if_broot != NULL && old_size > 0); 449 if (new_size == 0) 450 return xfs_broot_realloc(ifp, 0); 451 452 /* 453 * Shrink the btree root by possibly moving the rtrmapbt pointers, 454 * since they are not butted up against the btree block header. Then 455 * reallocate broot. 456 */ 457 if (level > 0) 458 xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size, 459 new_numrecs); 460 broot = xfs_broot_realloc(ifp, new_size); 461 462 out_broot: 463 ASSERT(xfs_rtrmap_droot_space(broot) <= 464 xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork)); 465 return broot; 466 } 467 468 const struct xfs_btree_ops xfs_rtrmapbt_ops = { 469 .name = "rtrmap", 470 .type = XFS_BTREE_TYPE_INODE, 471 .geom_flags = XFS_BTGEO_OVERLAPPING | 472 XFS_BTGEO_IROOT_RECORDS, 473 474 .rec_len = sizeof(struct xfs_rmap_rec), 475 /* Overlapping btree; 2 keys per pointer. */ 476 .key_len = 2 * sizeof(struct xfs_rmap_key), 477 .ptr_len = XFS_BTREE_LONG_PTR_LEN, 478 479 .lru_refs = XFS_RMAP_BTREE_REF, 480 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_2), 481 .sick_mask = XFS_SICK_RG_RMAPBT, 482 483 .dup_cursor = xfs_rtrmapbt_dup_cursor, 484 .alloc_block = xfs_btree_alloc_metafile_block, 485 .free_block = xfs_btree_free_metafile_block, 486 .get_minrecs = xfs_rtrmapbt_get_minrecs, 487 .get_maxrecs = xfs_rtrmapbt_get_maxrecs, 488 .get_dmaxrecs = xfs_rtrmapbt_get_dmaxrecs, 489 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec, 490 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, 491 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, 492 .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur, 493 .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur, 494 .buf_ops = &xfs_rtrmapbt_buf_ops, 495 .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys, 496 .keys_inorder = xfs_rtrmapbt_keys_inorder, 497 .recs_inorder = xfs_rtrmapbt_recs_inorder, 498 .keys_contiguous = xfs_rtrmapbt_keys_contiguous, 499 .broot_realloc = xfs_rtrmapbt_broot_realloc, 500 }; 501 502 /* Allocate a new rt rmap btree cursor. */ 503 struct xfs_btree_cur * 504 xfs_rtrmapbt_init_cursor( 505 struct xfs_trans *tp, 506 struct xfs_rtgroup *rtg) 507 { 508 struct xfs_inode *ip = rtg_rmap(rtg); 509 struct xfs_mount *mp = rtg_mount(rtg); 510 struct xfs_btree_cur *cur; 511 512 xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); 513 514 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops, 515 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache); 516 517 cur->bc_ino.ip = ip; 518 cur->bc_group = xfs_group_hold(rtg_group(rtg)); 519 cur->bc_ino.whichfork = XFS_DATA_FORK; 520 cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1; 521 cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK); 522 523 return cur; 524 } 525 526 #ifdef CONFIG_XFS_BTREE_IN_MEM 527 /* 528 * Validate an in-memory realtime rmap btree block. Callers are allowed to 529 * generate an in-memory btree even if the ondisk feature is not enabled. 530 */ 531 static xfs_failaddr_t 532 xfs_rtrmapbt_mem_verify( 533 struct xfs_buf *bp) 534 { 535 struct xfs_mount *mp = bp->b_mount; 536 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 537 xfs_failaddr_t fa; 538 unsigned int level; 539 unsigned int maxrecs; 540 541 if (!xfs_verify_magic(bp, block->bb_magic)) 542 return __this_address; 543 544 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 545 if (fa) 546 return fa; 547 548 level = be16_to_cpu(block->bb_level); 549 if (xfs_has_rmapbt(mp)) { 550 if (level >= mp->m_rtrmap_maxlevels) 551 return __this_address; 552 } else { 553 if (level >= xfs_rtrmapbt_maxlevels_ondisk()) 554 return __this_address; 555 } 556 557 maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0); 558 return xfs_btree_memblock_verify(bp, maxrecs); 559 } 560 561 static void 562 xfs_rtrmapbt_mem_rw_verify( 563 struct xfs_buf *bp) 564 { 565 xfs_failaddr_t fa = xfs_rtrmapbt_mem_verify(bp); 566 567 if (fa) 568 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 569 } 570 571 /* skip crc checks on in-memory btrees to save time */ 572 static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = { 573 .name = "xfs_rtrmapbt_mem", 574 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) }, 575 .verify_read = xfs_rtrmapbt_mem_rw_verify, 576 .verify_write = xfs_rtrmapbt_mem_rw_verify, 577 .verify_struct = xfs_rtrmapbt_mem_verify, 578 }; 579 580 const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = { 581 .type = XFS_BTREE_TYPE_MEM, 582 .geom_flags = XFS_BTGEO_OVERLAPPING, 583 584 .rec_len = sizeof(struct xfs_rmap_rec), 585 /* Overlapping btree; 2 keys per pointer. */ 586 .key_len = 2 * sizeof(struct xfs_rmap_key), 587 .ptr_len = XFS_BTREE_LONG_PTR_LEN, 588 589 .lru_refs = XFS_RMAP_BTREE_REF, 590 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2), 591 592 .dup_cursor = xfbtree_dup_cursor, 593 .set_root = xfbtree_set_root, 594 .alloc_block = xfbtree_alloc_block, 595 .free_block = xfbtree_free_block, 596 .get_minrecs = xfbtree_get_minrecs, 597 .get_maxrecs = xfbtree_get_maxrecs, 598 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec, 599 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, 600 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, 601 .init_ptr_from_cur = xfbtree_init_ptr_from_cur, 602 .cmp_key_with_cur = xfs_rtrmapbt_cmp_key_with_cur, 603 .buf_ops = &xfs_rtrmapbt_mem_buf_ops, 604 .cmp_two_keys = xfs_rtrmapbt_cmp_two_keys, 605 .keys_inorder = xfs_rtrmapbt_keys_inorder, 606 .recs_inorder = xfs_rtrmapbt_recs_inorder, 607 .keys_contiguous = xfs_rtrmapbt_keys_contiguous, 608 }; 609 610 /* Create a cursor for an in-memory btree. */ 611 struct xfs_btree_cur * 612 xfs_rtrmapbt_mem_cursor( 613 struct xfs_rtgroup *rtg, 614 struct xfs_trans *tp, 615 struct xfbtree *xfbt) 616 { 617 struct xfs_mount *mp = rtg_mount(rtg); 618 struct xfs_btree_cur *cur; 619 620 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops, 621 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache); 622 cur->bc_mem.xfbtree = xfbt; 623 cur->bc_nlevels = xfbt->nlevels; 624 cur->bc_group = xfs_group_hold(rtg_group(rtg)); 625 return cur; 626 } 627 628 /* Create an in-memory realtime rmap btree. */ 629 int 630 xfs_rtrmapbt_mem_init( 631 struct xfs_mount *mp, 632 struct xfbtree *xfbt, 633 struct xfs_buftarg *btp, 634 xfs_rgnumber_t rgno) 635 { 636 xfbt->owner = rgno; 637 return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops); 638 } 639 #endif /* CONFIG_XFS_BTREE_IN_MEM */ 640 641 /* 642 * Install a new rt reverse mapping btree root. Caller is responsible for 643 * invalidating and freeing the old btree blocks. 644 */ 645 void 646 xfs_rtrmapbt_commit_staged_btree( 647 struct xfs_btree_cur *cur, 648 struct xfs_trans *tp) 649 { 650 struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake; 651 struct xfs_ifork *ifp; 652 int flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT; 653 654 ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 655 ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE); 656 657 /* 658 * Free any resources hanging off the real fork, then shallow-copy the 659 * staging fork's contents into the real fork to transfer everything 660 * we just built. 661 */ 662 ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK); 663 xfs_idestroy_fork(ifp); 664 memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork)); 665 666 cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno; 667 xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); 668 xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK); 669 } 670 671 /* Calculate number of records in a rt reverse mapping btree block. */ 672 static inline unsigned int 673 xfs_rtrmapbt_block_maxrecs( 674 unsigned int blocklen, 675 bool leaf) 676 { 677 if (leaf) 678 return blocklen / sizeof(struct xfs_rmap_rec); 679 return blocklen / 680 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t)); 681 } 682 683 /* 684 * Calculate number of records in an rt reverse mapping btree block. 685 */ 686 unsigned int 687 xfs_rtrmapbt_maxrecs( 688 struct xfs_mount *mp, 689 unsigned int blocklen, 690 bool leaf) 691 { 692 blocklen -= XFS_RTRMAP_BLOCK_LEN; 693 return xfs_rtrmapbt_block_maxrecs(blocklen, leaf); 694 } 695 696 /* Compute the max possible height for realtime reverse mapping btrees. */ 697 unsigned int 698 xfs_rtrmapbt_maxlevels_ondisk(void) 699 { 700 unsigned long long max_dblocks; 701 unsigned int minrecs[2]; 702 unsigned int blocklen; 703 704 blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN; 705 706 minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2; 707 minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2; 708 709 /* 710 * Compute the asymptotic maxlevels for an rtrmapbt on any rtreflink fs. 711 * 712 * On a reflink filesystem, each block in an rtgroup can have up to 713 * 2^32 (per the refcount record format) owners, which means that 714 * theoretically we could face up to 2^64 rmap records. However, we're 715 * likely to run out of blocks in the data device long before that 716 * happens, which means that we must compute the max height based on 717 * what the btree will look like if it consumes almost all the blocks 718 * in the data device due to maximal sharing factor. 719 */ 720 max_dblocks = -1U; /* max ag count */ 721 max_dblocks *= XFS_MAX_CRC_AG_BLOCKS; 722 return xfs_btree_space_to_height(minrecs, max_dblocks); 723 } 724 725 int __init 726 xfs_rtrmapbt_init_cur_cache(void) 727 { 728 xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur", 729 xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()), 730 0, 0, NULL); 731 732 if (!xfs_rtrmapbt_cur_cache) 733 return -ENOMEM; 734 return 0; 735 } 736 737 void 738 xfs_rtrmapbt_destroy_cur_cache(void) 739 { 740 kmem_cache_destroy(xfs_rtrmapbt_cur_cache); 741 xfs_rtrmapbt_cur_cache = NULL; 742 } 743 744 /* Compute the maximum height of an rt reverse mapping btree. */ 745 void 746 xfs_rtrmapbt_compute_maxlevels( 747 struct xfs_mount *mp) 748 { 749 unsigned int d_maxlevels, r_maxlevels; 750 751 if (!xfs_has_rtrmapbt(mp)) { 752 mp->m_rtrmap_maxlevels = 0; 753 return; 754 } 755 756 /* 757 * The realtime rmapbt lives on the data device, which means that its 758 * maximum height is constrained by the size of the data device and 759 * the height required to store one rmap record for each block in an 760 * rt group. 761 * 762 * On a reflink filesystem, each rt block can have up to 2^32 (per the 763 * refcount record format) owners, which means that theoretically we 764 * could face up to 2^64 rmap records. This makes the computation of 765 * maxlevels based on record count meaningless, so we only consider the 766 * size of the data device. 767 */ 768 d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr, 769 mp->m_sb.sb_dblocks); 770 if (xfs_has_rtreflink(mp)) { 771 mp->m_rtrmap_maxlevels = d_maxlevels + 1; 772 return; 773 } 774 775 r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr, 776 mp->m_groups[XG_TYPE_RTG].blocks); 777 778 /* Add one level to handle the inode root level. */ 779 mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1; 780 } 781 782 /* Calculate the rtrmap btree size for some records. */ 783 unsigned long long 784 xfs_rtrmapbt_calc_size( 785 struct xfs_mount *mp, 786 unsigned long long len) 787 { 788 return xfs_btree_calc_size(mp->m_rtrmap_mnr, len); 789 } 790 791 /* 792 * Calculate the maximum rmap btree size. 793 */ 794 static unsigned long long 795 xfs_rtrmapbt_max_size( 796 struct xfs_mount *mp, 797 xfs_rtblock_t rtblocks) 798 { 799 /* Bail out if we're uninitialized, which can happen in mkfs. */ 800 if (mp->m_rtrmap_mxr[0] == 0) 801 return 0; 802 803 return xfs_rtrmapbt_calc_size(mp, rtblocks); 804 } 805 806 /* 807 * Figure out how many blocks to reserve and how many are used by this btree. 808 */ 809 xfs_filblks_t 810 xfs_rtrmapbt_calc_reserves( 811 struct xfs_mount *mp) 812 { 813 uint32_t blocks = mp->m_groups[XG_TYPE_RTG].blocks; 814 815 if (!xfs_has_rtrmapbt(mp)) 816 return 0; 817 818 /* Reserve 1% of the rtgroup or enough for 1 block per record. */ 819 return max_t(xfs_filblks_t, blocks / 100, 820 xfs_rtrmapbt_max_size(mp, blocks)); 821 } 822 823 /* Convert on-disk form of btree root to in-memory form. */ 824 STATIC void 825 xfs_rtrmapbt_from_disk( 826 struct xfs_inode *ip, 827 struct xfs_rtrmap_root *dblock, 828 unsigned int dblocklen, 829 struct xfs_btree_block *rblock) 830 { 831 struct xfs_mount *mp = ip->i_mount; 832 struct xfs_rmap_key *fkp; 833 __be64 *fpp; 834 struct xfs_rmap_key *tkp; 835 __be64 *tpp; 836 struct xfs_rmap_rec *frp; 837 struct xfs_rmap_rec *trp; 838 unsigned int rblocklen = xfs_rtrmap_broot_space(mp, dblock); 839 unsigned int numrecs; 840 unsigned int maxrecs; 841 842 xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino); 843 844 rblock->bb_level = dblock->bb_level; 845 rblock->bb_numrecs = dblock->bb_numrecs; 846 numrecs = be16_to_cpu(dblock->bb_numrecs); 847 848 if (be16_to_cpu(rblock->bb_level) > 0) { 849 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false); 850 fkp = xfs_rtrmap_droot_key_addr(dblock, 1); 851 tkp = xfs_rtrmap_key_addr(rblock, 1); 852 fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs); 853 tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 854 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs); 855 memcpy(tpp, fpp, sizeof(*fpp) * numrecs); 856 } else { 857 frp = xfs_rtrmap_droot_rec_addr(dblock, 1); 858 trp = xfs_rtrmap_rec_addr(rblock, 1); 859 memcpy(trp, frp, sizeof(*frp) * numrecs); 860 } 861 } 862 863 /* Load a realtime reverse mapping btree root in from disk. */ 864 int 865 xfs_iformat_rtrmap( 866 struct xfs_inode *ip, 867 struct xfs_dinode *dip) 868 { 869 struct xfs_mount *mp = ip->i_mount; 870 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 871 struct xfs_btree_block *broot; 872 unsigned int numrecs; 873 unsigned int level; 874 int dsize; 875 876 /* 877 * growfs must create the rtrmap inodes before adding a realtime volume 878 * to the filesystem, so we cannot use the rtrmapbt predicate here. 879 */ 880 if (!xfs_has_rmapbt(ip->i_mount)) { 881 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); 882 return -EFSCORRUPTED; 883 } 884 885 dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK); 886 numrecs = be16_to_cpu(dfp->bb_numrecs); 887 level = be16_to_cpu(dfp->bb_level); 888 889 if (level > mp->m_rtrmap_maxlevels || 890 xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) { 891 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); 892 return -EFSCORRUPTED; 893 } 894 895 broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK), 896 xfs_rtrmap_broot_space_calc(mp, level, numrecs)); 897 if (broot) 898 xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot); 899 return 0; 900 } 901 902 /* Convert in-memory form of btree root to on-disk form. */ 903 void 904 xfs_rtrmapbt_to_disk( 905 struct xfs_mount *mp, 906 struct xfs_btree_block *rblock, 907 unsigned int rblocklen, 908 struct xfs_rtrmap_root *dblock, 909 unsigned int dblocklen) 910 { 911 struct xfs_rmap_key *fkp; 912 __be64 *fpp; 913 struct xfs_rmap_key *tkp; 914 __be64 *tpp; 915 struct xfs_rmap_rec *frp; 916 struct xfs_rmap_rec *trp; 917 unsigned int numrecs; 918 unsigned int maxrecs; 919 920 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC)); 921 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)); 922 ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL)); 923 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)); 924 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)); 925 926 dblock->bb_level = rblock->bb_level; 927 dblock->bb_numrecs = rblock->bb_numrecs; 928 numrecs = be16_to_cpu(rblock->bb_numrecs); 929 930 if (be16_to_cpu(rblock->bb_level) > 0) { 931 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false); 932 fkp = xfs_rtrmap_key_addr(rblock, 1); 933 tkp = xfs_rtrmap_droot_key_addr(dblock, 1); 934 fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 935 tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs); 936 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs); 937 memcpy(tpp, fpp, sizeof(*fpp) * numrecs); 938 } else { 939 frp = xfs_rtrmap_rec_addr(rblock, 1); 940 trp = xfs_rtrmap_droot_rec_addr(dblock, 1); 941 memcpy(trp, frp, sizeof(*frp) * numrecs); 942 } 943 } 944 945 /* Flush a realtime reverse mapping btree root out to disk. */ 946 void 947 xfs_iflush_rtrmap( 948 struct xfs_inode *ip, 949 struct xfs_dinode *dip) 950 { 951 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 952 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 953 954 ASSERT(ifp->if_broot != NULL); 955 ASSERT(ifp->if_broot_bytes > 0); 956 ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <= 957 xfs_inode_fork_size(ip, XFS_DATA_FORK)); 958 xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes, 959 dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK)); 960 } 961 962 /* 963 * Create a realtime rmap btree inode. 964 */ 965 int 966 xfs_rtrmapbt_create( 967 struct xfs_rtgroup *rtg, 968 struct xfs_inode *ip, 969 struct xfs_trans *tp, 970 bool init) 971 { 972 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 973 struct xfs_mount *mp = ip->i_mount; 974 struct xfs_btree_block *broot; 975 976 ifp->if_format = XFS_DINODE_FMT_META_BTREE; 977 ASSERT(ifp->if_broot_bytes == 0); 978 ASSERT(ifp->if_bytes == 0); 979 980 /* Initialize the empty incore btree root. */ 981 broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0)); 982 if (broot) 983 xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0, 984 ip->i_ino); 985 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT); 986 987 return 0; 988 } 989 990 /* 991 * Initialize an rmap for a realtime superblock using the potentially updated 992 * rt geometry in the provided @mp. 993 */ 994 int 995 xfs_rtrmapbt_init_rtsb( 996 struct xfs_mount *mp, 997 struct xfs_rtgroup *rtg, 998 struct xfs_trans *tp) 999 { 1000 struct xfs_rmap_irec rmap = { 1001 .rm_blockcount = mp->m_sb.sb_rextsize, 1002 .rm_owner = XFS_RMAP_OWN_FS, 1003 }; 1004 struct xfs_btree_cur *cur; 1005 int error; 1006 1007 ASSERT(xfs_has_rtsb(mp)); 1008 ASSERT(rtg_rgno(rtg) == 0); 1009 1010 cur = xfs_rtrmapbt_init_cursor(tp, rtg); 1011 error = xfs_rmap_map_raw(cur, &rmap); 1012 xfs_btree_del_cursor(cur, error); 1013 return error; 1014 } 1015 1016 /* 1017 * Return the highest rgbno currently tracked by the rmap for this rtg. 1018 */ 1019 xfs_rgblock_t 1020 xfs_rtrmap_highest_rgbno( 1021 struct xfs_rtgroup *rtg) 1022 { 1023 struct xfs_btree_block *block = rtg_rmap(rtg)->i_df.if_broot; 1024 union xfs_btree_key key = {}; 1025 struct xfs_btree_cur *cur; 1026 1027 if (block->bb_numrecs == 0) 1028 return NULLRGBLOCK; 1029 cur = xfs_rtrmapbt_init_cursor(NULL, rtg); 1030 xfs_btree_get_keys(cur, block, &key); 1031 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1032 return be32_to_cpu(key.__rmap_bigkey[1].rm_startblock); 1033 } 1034