1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_inode.h" 17 #include "xfs_trans.h" 18 #include "xfs_alloc.h" 19 #include "xfs_btree.h" 20 #include "xfs_btree_staging.h" 21 #include "xfs_metafile.h" 22 #include "xfs_rmap.h" 23 #include "xfs_rtrmap_btree.h" 24 #include "xfs_trace.h" 25 #include "xfs_cksum.h" 26 #include "xfs_error.h" 27 #include "xfs_extent_busy.h" 28 #include "xfs_rtgroup.h" 29 #include "xfs_bmap.h" 30 #include "xfs_health.h" 31 #include "xfs_buf_mem.h" 32 #include "xfs_btree_mem.h" 33 34 static struct kmem_cache *xfs_rtrmapbt_cur_cache; 35 36 /* 37 * Realtime Reverse Map btree. 38 * 39 * This is a btree used to track the owner(s) of a given extent in the realtime 40 * device. See the comments in xfs_rmap_btree.c for more information. 41 * 42 * This tree is basically the same as the regular rmap btree except that it 43 * is rooted in an inode and does not live in free space. 44 */ 45 46 static struct xfs_btree_cur * 47 xfs_rtrmapbt_dup_cursor( 48 struct xfs_btree_cur *cur) 49 { 50 return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group)); 51 } 52 53 STATIC int 54 xfs_rtrmapbt_get_minrecs( 55 struct xfs_btree_cur *cur, 56 int level) 57 { 58 if (level == cur->bc_nlevels - 1) { 59 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 60 61 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, 62 level == 0) / 2; 63 } 64 65 return cur->bc_mp->m_rtrmap_mnr[level != 0]; 66 } 67 68 STATIC int 69 xfs_rtrmapbt_get_maxrecs( 70 struct xfs_btree_cur *cur, 71 int level) 72 { 73 if (level == cur->bc_nlevels - 1) { 74 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 75 76 return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, 77 level == 0); 78 } 79 80 return cur->bc_mp->m_rtrmap_mxr[level != 0]; 81 } 82 83 /* Calculate number of records in the ondisk realtime rmap btree inode root. */ 84 unsigned int 85 xfs_rtrmapbt_droot_maxrecs( 86 unsigned int blocklen, 87 bool leaf) 88 { 89 blocklen -= sizeof(struct xfs_rtrmap_root); 90 91 if (leaf) 92 return blocklen / sizeof(struct xfs_rmap_rec); 93 return blocklen / (2 * sizeof(struct xfs_rmap_key) + 94 sizeof(xfs_rtrmap_ptr_t)); 95 } 96 97 /* 98 * Get the maximum records we could store in the on-disk format. 99 * 100 * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but 101 * for the root node this checks the available space in the dinode fork 102 * so that we can resize the in-memory buffer to match it. After a 103 * resize to the maximum size this function returns the same value 104 * as xfs_rtrmapbt_get_maxrecs for the root node, too. 105 */ 106 STATIC int 107 xfs_rtrmapbt_get_dmaxrecs( 108 struct xfs_btree_cur *cur, 109 int level) 110 { 111 if (level != cur->bc_nlevels - 1) 112 return cur->bc_mp->m_rtrmap_mxr[level != 0]; 113 return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0); 114 } 115 116 /* 117 * Convert the ondisk record's offset field into the ondisk key's offset field. 118 * Fork and bmbt are significant parts of the rmap record key, but written 119 * status is merely a record attribute. 120 */ 121 static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec) 122 { 123 return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN); 124 } 125 126 STATIC void 127 xfs_rtrmapbt_init_key_from_rec( 128 union xfs_btree_key *key, 129 const union xfs_btree_rec *rec) 130 { 131 key->rmap.rm_startblock = rec->rmap.rm_startblock; 132 key->rmap.rm_owner = rec->rmap.rm_owner; 133 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); 134 } 135 136 STATIC void 137 xfs_rtrmapbt_init_high_key_from_rec( 138 union xfs_btree_key *key, 139 const union xfs_btree_rec *rec) 140 { 141 uint64_t off; 142 int adj; 143 144 adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; 145 146 key->rmap.rm_startblock = rec->rmap.rm_startblock; 147 be32_add_cpu(&key->rmap.rm_startblock, adj); 148 key->rmap.rm_owner = rec->rmap.rm_owner; 149 key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); 150 if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || 151 XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) 152 return; 153 off = be64_to_cpu(key->rmap.rm_offset); 154 off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); 155 key->rmap.rm_offset = cpu_to_be64(off); 156 } 157 158 STATIC void 159 xfs_rtrmapbt_init_rec_from_cur( 160 struct xfs_btree_cur *cur, 161 union xfs_btree_rec *rec) 162 { 163 rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); 164 rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); 165 rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); 166 rec->rmap.rm_offset = cpu_to_be64( 167 xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); 168 } 169 170 STATIC void 171 xfs_rtrmapbt_init_ptr_from_cur( 172 struct xfs_btree_cur *cur, 173 union xfs_btree_ptr *ptr) 174 { 175 ptr->l = 0; 176 } 177 178 /* 179 * Mask the appropriate parts of the ondisk key field for a key comparison. 180 * Fork and bmbt are significant parts of the rmap record key, but written 181 * status is merely a record attribute. 182 */ 183 static inline uint64_t offset_keymask(uint64_t offset) 184 { 185 return offset & ~XFS_RMAP_OFF_UNWRITTEN; 186 } 187 188 STATIC int64_t 189 xfs_rtrmapbt_key_diff( 190 struct xfs_btree_cur *cur, 191 const union xfs_btree_key *key) 192 { 193 struct xfs_rmap_irec *rec = &cur->bc_rec.r; 194 const struct xfs_rmap_key *kp = &key->rmap; 195 __u64 x, y; 196 int64_t d; 197 198 d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; 199 if (d) 200 return d; 201 202 x = be64_to_cpu(kp->rm_owner); 203 y = rec->rm_owner; 204 if (x > y) 205 return 1; 206 else if (y > x) 207 return -1; 208 209 x = offset_keymask(be64_to_cpu(kp->rm_offset)); 210 y = offset_keymask(xfs_rmap_irec_offset_pack(rec)); 211 if (x > y) 212 return 1; 213 else if (y > x) 214 return -1; 215 return 0; 216 } 217 218 STATIC int64_t 219 xfs_rtrmapbt_diff_two_keys( 220 struct xfs_btree_cur *cur, 221 const union xfs_btree_key *k1, 222 const union xfs_btree_key *k2, 223 const union xfs_btree_key *mask) 224 { 225 const struct xfs_rmap_key *kp1 = &k1->rmap; 226 const struct xfs_rmap_key *kp2 = &k2->rmap; 227 int64_t d; 228 __u64 x, y; 229 230 /* Doesn't make sense to mask off the physical space part */ 231 ASSERT(!mask || mask->rmap.rm_startblock); 232 233 d = (int64_t)be32_to_cpu(kp1->rm_startblock) - 234 be32_to_cpu(kp2->rm_startblock); 235 if (d) 236 return d; 237 238 if (!mask || mask->rmap.rm_owner) { 239 x = be64_to_cpu(kp1->rm_owner); 240 y = be64_to_cpu(kp2->rm_owner); 241 if (x > y) 242 return 1; 243 else if (y > x) 244 return -1; 245 } 246 247 if (!mask || mask->rmap.rm_offset) { 248 /* Doesn't make sense to allow offset but not owner */ 249 ASSERT(!mask || mask->rmap.rm_owner); 250 251 x = offset_keymask(be64_to_cpu(kp1->rm_offset)); 252 y = offset_keymask(be64_to_cpu(kp2->rm_offset)); 253 if (x > y) 254 return 1; 255 else if (y > x) 256 return -1; 257 } 258 259 return 0; 260 } 261 262 static xfs_failaddr_t 263 xfs_rtrmapbt_verify( 264 struct xfs_buf *bp) 265 { 266 struct xfs_mount *mp = bp->b_target->bt_mount; 267 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 268 xfs_failaddr_t fa; 269 int level; 270 271 if (!xfs_verify_magic(bp, block->bb_magic)) 272 return __this_address; 273 274 if (!xfs_has_rmapbt(mp)) 275 return __this_address; 276 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 277 if (fa) 278 return fa; 279 level = be16_to_cpu(block->bb_level); 280 if (level > mp->m_rtrmap_maxlevels) 281 return __this_address; 282 283 return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]); 284 } 285 286 static void 287 xfs_rtrmapbt_read_verify( 288 struct xfs_buf *bp) 289 { 290 xfs_failaddr_t fa; 291 292 if (!xfs_btree_fsblock_verify_crc(bp)) 293 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 294 else { 295 fa = xfs_rtrmapbt_verify(bp); 296 if (fa) 297 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 298 } 299 300 if (bp->b_error) 301 trace_xfs_btree_corrupt(bp, _RET_IP_); 302 } 303 304 static void 305 xfs_rtrmapbt_write_verify( 306 struct xfs_buf *bp) 307 { 308 xfs_failaddr_t fa; 309 310 fa = xfs_rtrmapbt_verify(bp); 311 if (fa) { 312 trace_xfs_btree_corrupt(bp, _RET_IP_); 313 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 314 return; 315 } 316 xfs_btree_fsblock_calc_crc(bp); 317 318 } 319 320 const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = { 321 .name = "xfs_rtrmapbt", 322 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) }, 323 .verify_read = xfs_rtrmapbt_read_verify, 324 .verify_write = xfs_rtrmapbt_write_verify, 325 .verify_struct = xfs_rtrmapbt_verify, 326 }; 327 328 STATIC int 329 xfs_rtrmapbt_keys_inorder( 330 struct xfs_btree_cur *cur, 331 const union xfs_btree_key *k1, 332 const union xfs_btree_key *k2) 333 { 334 uint32_t x; 335 uint32_t y; 336 uint64_t a; 337 uint64_t b; 338 339 x = be32_to_cpu(k1->rmap.rm_startblock); 340 y = be32_to_cpu(k2->rmap.rm_startblock); 341 if (x < y) 342 return 1; 343 else if (x > y) 344 return 0; 345 a = be64_to_cpu(k1->rmap.rm_owner); 346 b = be64_to_cpu(k2->rmap.rm_owner); 347 if (a < b) 348 return 1; 349 else if (a > b) 350 return 0; 351 a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset)); 352 b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset)); 353 if (a <= b) 354 return 1; 355 return 0; 356 } 357 358 STATIC int 359 xfs_rtrmapbt_recs_inorder( 360 struct xfs_btree_cur *cur, 361 const union xfs_btree_rec *r1, 362 const union xfs_btree_rec *r2) 363 { 364 uint32_t x; 365 uint32_t y; 366 uint64_t a; 367 uint64_t b; 368 369 x = be32_to_cpu(r1->rmap.rm_startblock); 370 y = be32_to_cpu(r2->rmap.rm_startblock); 371 if (x < y) 372 return 1; 373 else if (x > y) 374 return 0; 375 a = be64_to_cpu(r1->rmap.rm_owner); 376 b = be64_to_cpu(r2->rmap.rm_owner); 377 if (a < b) 378 return 1; 379 else if (a > b) 380 return 0; 381 a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset)); 382 b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset)); 383 if (a <= b) 384 return 1; 385 return 0; 386 } 387 388 STATIC enum xbtree_key_contig 389 xfs_rtrmapbt_keys_contiguous( 390 struct xfs_btree_cur *cur, 391 const union xfs_btree_key *key1, 392 const union xfs_btree_key *key2, 393 const union xfs_btree_key *mask) 394 { 395 ASSERT(!mask || mask->rmap.rm_startblock); 396 397 /* 398 * We only support checking contiguity of the physical space component. 399 * If any callers ever need more specificity than that, they'll have to 400 * implement it here. 401 */ 402 ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset)); 403 404 return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock), 405 be32_to_cpu(key2->rmap.rm_startblock)); 406 } 407 408 static inline void 409 xfs_rtrmapbt_move_ptrs( 410 struct xfs_mount *mp, 411 struct xfs_btree_block *broot, 412 short old_size, 413 size_t new_size, 414 unsigned int numrecs) 415 { 416 void *dptr; 417 void *sptr; 418 419 sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size); 420 dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size); 421 memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t)); 422 } 423 424 static struct xfs_btree_block * 425 xfs_rtrmapbt_broot_realloc( 426 struct xfs_btree_cur *cur, 427 unsigned int new_numrecs) 428 { 429 struct xfs_mount *mp = cur->bc_mp; 430 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 431 struct xfs_btree_block *broot; 432 unsigned int new_size; 433 unsigned int old_size = ifp->if_broot_bytes; 434 const unsigned int level = cur->bc_nlevels - 1; 435 436 new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs); 437 438 /* Handle the nop case quietly. */ 439 if (new_size == old_size) 440 return ifp->if_broot; 441 442 if (new_size > old_size) { 443 unsigned int old_numrecs; 444 445 /* 446 * If there wasn't any memory allocated before, just allocate 447 * it now and get out. 448 */ 449 if (old_size == 0) 450 return xfs_broot_realloc(ifp, new_size); 451 452 /* 453 * If there is already an existing if_broot, then we need to 454 * realloc it and possibly move the node block pointers because 455 * those are not butted up against the btree block header. 456 */ 457 old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0); 458 broot = xfs_broot_realloc(ifp, new_size); 459 if (level > 0) 460 xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size, 461 old_numrecs); 462 goto out_broot; 463 } 464 465 /* 466 * We're reducing numrecs. If we're going all the way to zero, just 467 * free the block. 468 */ 469 ASSERT(ifp->if_broot != NULL && old_size > 0); 470 if (new_size == 0) 471 return xfs_broot_realloc(ifp, 0); 472 473 /* 474 * Shrink the btree root by possibly moving the rtrmapbt pointers, 475 * since they are not butted up against the btree block header. Then 476 * reallocate broot. 477 */ 478 if (level > 0) 479 xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size, 480 new_numrecs); 481 broot = xfs_broot_realloc(ifp, new_size); 482 483 out_broot: 484 ASSERT(xfs_rtrmap_droot_space(broot) <= 485 xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork)); 486 return broot; 487 } 488 489 const struct xfs_btree_ops xfs_rtrmapbt_ops = { 490 .name = "rtrmap", 491 .type = XFS_BTREE_TYPE_INODE, 492 .geom_flags = XFS_BTGEO_OVERLAPPING | 493 XFS_BTGEO_IROOT_RECORDS, 494 495 .rec_len = sizeof(struct xfs_rmap_rec), 496 /* Overlapping btree; 2 keys per pointer. */ 497 .key_len = 2 * sizeof(struct xfs_rmap_key), 498 .ptr_len = XFS_BTREE_LONG_PTR_LEN, 499 500 .lru_refs = XFS_RMAP_BTREE_REF, 501 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_2), 502 .sick_mask = XFS_SICK_RG_RMAPBT, 503 504 .dup_cursor = xfs_rtrmapbt_dup_cursor, 505 .alloc_block = xfs_btree_alloc_metafile_block, 506 .free_block = xfs_btree_free_metafile_block, 507 .get_minrecs = xfs_rtrmapbt_get_minrecs, 508 .get_maxrecs = xfs_rtrmapbt_get_maxrecs, 509 .get_dmaxrecs = xfs_rtrmapbt_get_dmaxrecs, 510 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec, 511 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, 512 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, 513 .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur, 514 .key_diff = xfs_rtrmapbt_key_diff, 515 .buf_ops = &xfs_rtrmapbt_buf_ops, 516 .diff_two_keys = xfs_rtrmapbt_diff_two_keys, 517 .keys_inorder = xfs_rtrmapbt_keys_inorder, 518 .recs_inorder = xfs_rtrmapbt_recs_inorder, 519 .keys_contiguous = xfs_rtrmapbt_keys_contiguous, 520 .broot_realloc = xfs_rtrmapbt_broot_realloc, 521 }; 522 523 /* Allocate a new rt rmap btree cursor. */ 524 struct xfs_btree_cur * 525 xfs_rtrmapbt_init_cursor( 526 struct xfs_trans *tp, 527 struct xfs_rtgroup *rtg) 528 { 529 struct xfs_inode *ip = rtg_rmap(rtg); 530 struct xfs_mount *mp = rtg_mount(rtg); 531 struct xfs_btree_cur *cur; 532 533 xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); 534 535 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops, 536 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache); 537 538 cur->bc_ino.ip = ip; 539 cur->bc_group = xfs_group_hold(rtg_group(rtg)); 540 cur->bc_ino.whichfork = XFS_DATA_FORK; 541 cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1; 542 cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK); 543 544 return cur; 545 } 546 547 #ifdef CONFIG_XFS_BTREE_IN_MEM 548 /* 549 * Validate an in-memory realtime rmap btree block. Callers are allowed to 550 * generate an in-memory btree even if the ondisk feature is not enabled. 551 */ 552 static xfs_failaddr_t 553 xfs_rtrmapbt_mem_verify( 554 struct xfs_buf *bp) 555 { 556 struct xfs_mount *mp = bp->b_mount; 557 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 558 xfs_failaddr_t fa; 559 unsigned int level; 560 unsigned int maxrecs; 561 562 if (!xfs_verify_magic(bp, block->bb_magic)) 563 return __this_address; 564 565 fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); 566 if (fa) 567 return fa; 568 569 level = be16_to_cpu(block->bb_level); 570 if (xfs_has_rmapbt(mp)) { 571 if (level >= mp->m_rtrmap_maxlevels) 572 return __this_address; 573 } else { 574 if (level >= xfs_rtrmapbt_maxlevels_ondisk()) 575 return __this_address; 576 } 577 578 maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0); 579 return xfs_btree_memblock_verify(bp, maxrecs); 580 } 581 582 static void 583 xfs_rtrmapbt_mem_rw_verify( 584 struct xfs_buf *bp) 585 { 586 xfs_failaddr_t fa = xfs_rtrmapbt_mem_verify(bp); 587 588 if (fa) 589 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 590 } 591 592 /* skip crc checks on in-memory btrees to save time */ 593 static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = { 594 .name = "xfs_rtrmapbt_mem", 595 .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) }, 596 .verify_read = xfs_rtrmapbt_mem_rw_verify, 597 .verify_write = xfs_rtrmapbt_mem_rw_verify, 598 .verify_struct = xfs_rtrmapbt_mem_verify, 599 }; 600 601 const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = { 602 .type = XFS_BTREE_TYPE_MEM, 603 .geom_flags = XFS_BTGEO_OVERLAPPING, 604 605 .rec_len = sizeof(struct xfs_rmap_rec), 606 /* Overlapping btree; 2 keys per pointer. */ 607 .key_len = 2 * sizeof(struct xfs_rmap_key), 608 .ptr_len = XFS_BTREE_LONG_PTR_LEN, 609 610 .lru_refs = XFS_RMAP_BTREE_REF, 611 .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2), 612 613 .dup_cursor = xfbtree_dup_cursor, 614 .set_root = xfbtree_set_root, 615 .alloc_block = xfbtree_alloc_block, 616 .free_block = xfbtree_free_block, 617 .get_minrecs = xfbtree_get_minrecs, 618 .get_maxrecs = xfbtree_get_maxrecs, 619 .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec, 620 .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, 621 .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, 622 .init_ptr_from_cur = xfbtree_init_ptr_from_cur, 623 .key_diff = xfs_rtrmapbt_key_diff, 624 .buf_ops = &xfs_rtrmapbt_mem_buf_ops, 625 .diff_two_keys = xfs_rtrmapbt_diff_two_keys, 626 .keys_inorder = xfs_rtrmapbt_keys_inorder, 627 .recs_inorder = xfs_rtrmapbt_recs_inorder, 628 .keys_contiguous = xfs_rtrmapbt_keys_contiguous, 629 }; 630 631 /* Create a cursor for an in-memory btree. */ 632 struct xfs_btree_cur * 633 xfs_rtrmapbt_mem_cursor( 634 struct xfs_rtgroup *rtg, 635 struct xfs_trans *tp, 636 struct xfbtree *xfbt) 637 { 638 struct xfs_mount *mp = rtg_mount(rtg); 639 struct xfs_btree_cur *cur; 640 641 cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops, 642 mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache); 643 cur->bc_mem.xfbtree = xfbt; 644 cur->bc_nlevels = xfbt->nlevels; 645 cur->bc_group = xfs_group_hold(rtg_group(rtg)); 646 return cur; 647 } 648 649 /* Create an in-memory realtime rmap btree. */ 650 int 651 xfs_rtrmapbt_mem_init( 652 struct xfs_mount *mp, 653 struct xfbtree *xfbt, 654 struct xfs_buftarg *btp, 655 xfs_rgnumber_t rgno) 656 { 657 xfbt->owner = rgno; 658 return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops); 659 } 660 #endif /* CONFIG_XFS_BTREE_IN_MEM */ 661 662 /* 663 * Install a new rt reverse mapping btree root. Caller is responsible for 664 * invalidating and freeing the old btree blocks. 665 */ 666 void 667 xfs_rtrmapbt_commit_staged_btree( 668 struct xfs_btree_cur *cur, 669 struct xfs_trans *tp) 670 { 671 struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake; 672 struct xfs_ifork *ifp; 673 int flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT; 674 675 ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 676 ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE); 677 678 /* 679 * Free any resources hanging off the real fork, then shallow-copy the 680 * staging fork's contents into the real fork to transfer everything 681 * we just built. 682 */ 683 ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK); 684 xfs_idestroy_fork(ifp); 685 memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork)); 686 687 cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno; 688 xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); 689 xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK); 690 } 691 692 /* Calculate number of records in a rt reverse mapping btree block. */ 693 static inline unsigned int 694 xfs_rtrmapbt_block_maxrecs( 695 unsigned int blocklen, 696 bool leaf) 697 { 698 if (leaf) 699 return blocklen / sizeof(struct xfs_rmap_rec); 700 return blocklen / 701 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t)); 702 } 703 704 /* 705 * Calculate number of records in an rt reverse mapping btree block. 706 */ 707 unsigned int 708 xfs_rtrmapbt_maxrecs( 709 struct xfs_mount *mp, 710 unsigned int blocklen, 711 bool leaf) 712 { 713 blocklen -= XFS_RTRMAP_BLOCK_LEN; 714 return xfs_rtrmapbt_block_maxrecs(blocklen, leaf); 715 } 716 717 /* Compute the max possible height for realtime reverse mapping btrees. */ 718 unsigned int 719 xfs_rtrmapbt_maxlevels_ondisk(void) 720 { 721 unsigned int minrecs[2]; 722 unsigned int blocklen; 723 724 blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN; 725 726 minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2; 727 minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2; 728 729 /* We need at most one record for every block in an rt group. */ 730 return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_RGBLOCKS); 731 } 732 733 int __init 734 xfs_rtrmapbt_init_cur_cache(void) 735 { 736 xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur", 737 xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()), 738 0, 0, NULL); 739 740 if (!xfs_rtrmapbt_cur_cache) 741 return -ENOMEM; 742 return 0; 743 } 744 745 void 746 xfs_rtrmapbt_destroy_cur_cache(void) 747 { 748 kmem_cache_destroy(xfs_rtrmapbt_cur_cache); 749 xfs_rtrmapbt_cur_cache = NULL; 750 } 751 752 /* Compute the maximum height of an rt reverse mapping btree. */ 753 void 754 xfs_rtrmapbt_compute_maxlevels( 755 struct xfs_mount *mp) 756 { 757 unsigned int d_maxlevels, r_maxlevels; 758 759 if (!xfs_has_rtrmapbt(mp)) { 760 mp->m_rtrmap_maxlevels = 0; 761 return; 762 } 763 764 /* 765 * The realtime rmapbt lives on the data device, which means that its 766 * maximum height is constrained by the size of the data device and 767 * the height required to store one rmap record for each block in an 768 * rt group. 769 */ 770 d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr, 771 mp->m_sb.sb_dblocks); 772 r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr, 773 mp->m_groups[XG_TYPE_RTG].blocks); 774 775 /* Add one level to handle the inode root level. */ 776 mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1; 777 } 778 779 /* Calculate the rtrmap btree size for some records. */ 780 unsigned long long 781 xfs_rtrmapbt_calc_size( 782 struct xfs_mount *mp, 783 unsigned long long len) 784 { 785 return xfs_btree_calc_size(mp->m_rtrmap_mnr, len); 786 } 787 788 /* 789 * Calculate the maximum rmap btree size. 790 */ 791 static unsigned long long 792 xfs_rtrmapbt_max_size( 793 struct xfs_mount *mp, 794 xfs_rtblock_t rtblocks) 795 { 796 /* Bail out if we're uninitialized, which can happen in mkfs. */ 797 if (mp->m_rtrmap_mxr[0] == 0) 798 return 0; 799 800 return xfs_rtrmapbt_calc_size(mp, rtblocks); 801 } 802 803 /* 804 * Figure out how many blocks to reserve and how many are used by this btree. 805 */ 806 xfs_filblks_t 807 xfs_rtrmapbt_calc_reserves( 808 struct xfs_mount *mp) 809 { 810 uint32_t blocks = mp->m_groups[XG_TYPE_RTG].blocks; 811 812 if (!xfs_has_rtrmapbt(mp)) 813 return 0; 814 815 /* Reserve 1% of the rtgroup or enough for 1 block per record. */ 816 return max_t(xfs_filblks_t, blocks / 100, 817 xfs_rtrmapbt_max_size(mp, blocks)); 818 } 819 820 /* Convert on-disk form of btree root to in-memory form. */ 821 STATIC void 822 xfs_rtrmapbt_from_disk( 823 struct xfs_inode *ip, 824 struct xfs_rtrmap_root *dblock, 825 unsigned int dblocklen, 826 struct xfs_btree_block *rblock) 827 { 828 struct xfs_mount *mp = ip->i_mount; 829 struct xfs_rmap_key *fkp; 830 __be64 *fpp; 831 struct xfs_rmap_key *tkp; 832 __be64 *tpp; 833 struct xfs_rmap_rec *frp; 834 struct xfs_rmap_rec *trp; 835 unsigned int rblocklen = xfs_rtrmap_broot_space(mp, dblock); 836 unsigned int numrecs; 837 unsigned int maxrecs; 838 839 xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino); 840 841 rblock->bb_level = dblock->bb_level; 842 rblock->bb_numrecs = dblock->bb_numrecs; 843 numrecs = be16_to_cpu(dblock->bb_numrecs); 844 845 if (be16_to_cpu(rblock->bb_level) > 0) { 846 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false); 847 fkp = xfs_rtrmap_droot_key_addr(dblock, 1); 848 tkp = xfs_rtrmap_key_addr(rblock, 1); 849 fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs); 850 tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 851 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs); 852 memcpy(tpp, fpp, sizeof(*fpp) * numrecs); 853 } else { 854 frp = xfs_rtrmap_droot_rec_addr(dblock, 1); 855 trp = xfs_rtrmap_rec_addr(rblock, 1); 856 memcpy(trp, frp, sizeof(*frp) * numrecs); 857 } 858 } 859 860 /* Load a realtime reverse mapping btree root in from disk. */ 861 int 862 xfs_iformat_rtrmap( 863 struct xfs_inode *ip, 864 struct xfs_dinode *dip) 865 { 866 struct xfs_mount *mp = ip->i_mount; 867 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 868 struct xfs_btree_block *broot; 869 unsigned int numrecs; 870 unsigned int level; 871 int dsize; 872 873 /* 874 * growfs must create the rtrmap inodes before adding a realtime volume 875 * to the filesystem, so we cannot use the rtrmapbt predicate here. 876 */ 877 if (!xfs_has_rmapbt(ip->i_mount)) { 878 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); 879 return -EFSCORRUPTED; 880 } 881 882 dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK); 883 numrecs = be16_to_cpu(dfp->bb_numrecs); 884 level = be16_to_cpu(dfp->bb_level); 885 886 if (level > mp->m_rtrmap_maxlevels || 887 xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) { 888 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); 889 return -EFSCORRUPTED; 890 } 891 892 broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK), 893 xfs_rtrmap_broot_space_calc(mp, level, numrecs)); 894 if (broot) 895 xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot); 896 return 0; 897 } 898 899 /* Convert in-memory form of btree root to on-disk form. */ 900 void 901 xfs_rtrmapbt_to_disk( 902 struct xfs_mount *mp, 903 struct xfs_btree_block *rblock, 904 unsigned int rblocklen, 905 struct xfs_rtrmap_root *dblock, 906 unsigned int dblocklen) 907 { 908 struct xfs_rmap_key *fkp; 909 __be64 *fpp; 910 struct xfs_rmap_key *tkp; 911 __be64 *tpp; 912 struct xfs_rmap_rec *frp; 913 struct xfs_rmap_rec *trp; 914 unsigned int numrecs; 915 unsigned int maxrecs; 916 917 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC)); 918 ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)); 919 ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL)); 920 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)); 921 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)); 922 923 dblock->bb_level = rblock->bb_level; 924 dblock->bb_numrecs = rblock->bb_numrecs; 925 numrecs = be16_to_cpu(rblock->bb_numrecs); 926 927 if (be16_to_cpu(rblock->bb_level) > 0) { 928 maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false); 929 fkp = xfs_rtrmap_key_addr(rblock, 1); 930 tkp = xfs_rtrmap_droot_key_addr(dblock, 1); 931 fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen); 932 tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs); 933 memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs); 934 memcpy(tpp, fpp, sizeof(*fpp) * numrecs); 935 } else { 936 frp = xfs_rtrmap_rec_addr(rblock, 1); 937 trp = xfs_rtrmap_droot_rec_addr(dblock, 1); 938 memcpy(trp, frp, sizeof(*frp) * numrecs); 939 } 940 } 941 942 /* Flush a realtime reverse mapping btree root out to disk. */ 943 void 944 xfs_iflush_rtrmap( 945 struct xfs_inode *ip, 946 struct xfs_dinode *dip) 947 { 948 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 949 struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 950 951 ASSERT(ifp->if_broot != NULL); 952 ASSERT(ifp->if_broot_bytes > 0); 953 ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <= 954 xfs_inode_fork_size(ip, XFS_DATA_FORK)); 955 xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes, 956 dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK)); 957 } 958 959 /* 960 * Create a realtime rmap btree inode. 961 */ 962 int 963 xfs_rtrmapbt_create( 964 struct xfs_rtgroup *rtg, 965 struct xfs_inode *ip, 966 struct xfs_trans *tp, 967 bool init) 968 { 969 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 970 struct xfs_mount *mp = ip->i_mount; 971 struct xfs_btree_block *broot; 972 973 ifp->if_format = XFS_DINODE_FMT_META_BTREE; 974 ASSERT(ifp->if_broot_bytes == 0); 975 ASSERT(ifp->if_bytes == 0); 976 977 /* Initialize the empty incore btree root. */ 978 broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0)); 979 if (broot) 980 xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0, 981 ip->i_ino); 982 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT); 983 984 return 0; 985 } 986 987 /* 988 * Initialize an rmap for a realtime superblock using the potentially updated 989 * rt geometry in the provided @mp. 990 */ 991 int 992 xfs_rtrmapbt_init_rtsb( 993 struct xfs_mount *mp, 994 struct xfs_rtgroup *rtg, 995 struct xfs_trans *tp) 996 { 997 struct xfs_rmap_irec rmap = { 998 .rm_blockcount = mp->m_sb.sb_rextsize, 999 .rm_owner = XFS_RMAP_OWN_FS, 1000 }; 1001 struct xfs_btree_cur *cur; 1002 int error; 1003 1004 ASSERT(xfs_has_rtsb(mp)); 1005 ASSERT(rtg_rgno(rtg) == 0); 1006 1007 cur = xfs_rtrmapbt_init_cursor(tp, rtg); 1008 error = xfs_rmap_map_raw(cur, &rmap); 1009 xfs_btree_del_cursor(cur, error); 1010 return error; 1011 } 1012