1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2022-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_btree.h" 15 #include "xfs_alloc_btree.h" 16 #include "xfs_rmap_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_ialloc.h" 19 #include "xfs_rmap.h" 20 #include "xfs_ag.h" 21 #include "xfs_ag_resv.h" 22 #include "xfs_health.h" 23 #include "xfs_error.h" 24 #include "xfs_bmap.h" 25 #include "xfs_defer.h" 26 #include "xfs_log_format.h" 27 #include "xfs_trans.h" 28 #include "xfs_trace.h" 29 #include "xfs_inode.h" 30 #include "xfs_icache.h" 31 #include "xfs_buf_item.h" 32 #include "xfs_rtgroup.h" 33 #include "xfs_rtbitmap.h" 34 #include "xfs_metafile.h" 35 #include "xfs_metadir.h" 36 37 int 38 xfs_rtgroup_alloc( 39 struct xfs_mount *mp, 40 xfs_rgnumber_t rgno, 41 xfs_rgnumber_t rgcount, 42 xfs_rtbxlen_t rextents) 43 { 44 struct xfs_rtgroup *rtg; 45 int error; 46 47 rtg = kzalloc(sizeof(struct xfs_rtgroup), GFP_KERNEL); 48 if (!rtg) 49 return -ENOMEM; 50 51 error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG); 52 if (error) 53 goto out_free_rtg; 54 return 0; 55 56 out_free_rtg: 57 kfree(rtg); 58 return error; 59 } 60 61 void 62 xfs_rtgroup_free( 63 struct xfs_mount *mp, 64 xfs_rgnumber_t rgno) 65 { 66 xfs_group_free(mp, rgno, XG_TYPE_RTG, NULL); 67 } 68 69 /* Free a range of incore rtgroup objects. */ 70 void 71 xfs_free_rtgroups( 72 struct xfs_mount *mp, 73 xfs_rgnumber_t first_rgno, 74 xfs_rgnumber_t end_rgno) 75 { 76 xfs_rgnumber_t rgno; 77 78 for (rgno = first_rgno; rgno < end_rgno; rgno++) 79 xfs_rtgroup_free(mp, rgno); 80 } 81 82 /* Initialize some range of incore rtgroup objects. */ 83 int 84 xfs_initialize_rtgroups( 85 struct xfs_mount *mp, 86 xfs_rgnumber_t first_rgno, 87 xfs_rgnumber_t end_rgno, 88 xfs_rtbxlen_t rextents) 89 { 90 xfs_rgnumber_t index; 91 int error; 92 93 if (first_rgno >= end_rgno) 94 return 0; 95 96 for (index = first_rgno; index < end_rgno; index++) { 97 error = xfs_rtgroup_alloc(mp, index, end_rgno, rextents); 98 if (error) 99 goto out_unwind_new_rtgs; 100 } 101 102 return 0; 103 104 out_unwind_new_rtgs: 105 xfs_free_rtgroups(mp, first_rgno, index); 106 return error; 107 } 108 109 /* Compute the number of rt extents in this realtime group. */ 110 xfs_rtxnum_t 111 __xfs_rtgroup_extents( 112 struct xfs_mount *mp, 113 xfs_rgnumber_t rgno, 114 xfs_rgnumber_t rgcount, 115 xfs_rtbxlen_t rextents) 116 { 117 ASSERT(rgno < rgcount); 118 if (rgno == rgcount - 1) 119 return rextents - ((xfs_rtxnum_t)rgno * mp->m_sb.sb_rgextents); 120 121 ASSERT(xfs_has_rtgroups(mp)); 122 return mp->m_sb.sb_rgextents; 123 } 124 125 xfs_rtxnum_t 126 xfs_rtgroup_extents( 127 struct xfs_mount *mp, 128 xfs_rgnumber_t rgno) 129 { 130 return __xfs_rtgroup_extents(mp, rgno, mp->m_sb.sb_rgcount, 131 mp->m_sb.sb_rextents); 132 } 133 134 /* 135 * Update the rt extent count of the previous tail rtgroup if it changed during 136 * recovery (i.e. recovery of a growfs). 137 */ 138 int 139 xfs_update_last_rtgroup_size( 140 struct xfs_mount *mp, 141 xfs_rgnumber_t prev_rgcount) 142 { 143 struct xfs_rtgroup *rtg; 144 145 ASSERT(prev_rgcount > 0); 146 147 rtg = xfs_rtgroup_grab(mp, prev_rgcount - 1); 148 if (!rtg) 149 return -EFSCORRUPTED; 150 rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1, 151 mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents); 152 xfs_rtgroup_rele(rtg); 153 return 0; 154 } 155 156 /* Lock metadata inodes associated with this rt group. */ 157 void 158 xfs_rtgroup_lock( 159 struct xfs_rtgroup *rtg, 160 unsigned int rtglock_flags) 161 { 162 ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); 163 ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || 164 !(rtglock_flags & XFS_RTGLOCK_BITMAP)); 165 166 if (rtglock_flags & XFS_RTGLOCK_BITMAP) { 167 /* 168 * Lock both realtime free space metadata inodes for a freespace 169 * update. 170 */ 171 xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); 172 xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); 173 } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { 174 xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); 175 } 176 } 177 178 /* Unlock metadata inodes associated with this rt group. */ 179 void 180 xfs_rtgroup_unlock( 181 struct xfs_rtgroup *rtg, 182 unsigned int rtglock_flags) 183 { 184 ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); 185 ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) || 186 !(rtglock_flags & XFS_RTGLOCK_BITMAP)); 187 188 if (rtglock_flags & XFS_RTGLOCK_BITMAP) { 189 xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL); 190 xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL); 191 } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) { 192 xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED); 193 } 194 } 195 196 /* 197 * Join realtime group metadata inodes to the transaction. The ILOCKs will be 198 * released on transaction commit. 199 */ 200 void 201 xfs_rtgroup_trans_join( 202 struct xfs_trans *tp, 203 struct xfs_rtgroup *rtg, 204 unsigned int rtglock_flags) 205 { 206 ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS)); 207 ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED)); 208 209 if (rtglock_flags & XFS_RTGLOCK_BITMAP) { 210 xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP], 211 XFS_ILOCK_EXCL); 212 xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY], 213 XFS_ILOCK_EXCL); 214 } 215 } 216 217 /* Retrieve rt group geometry. */ 218 int 219 xfs_rtgroup_get_geometry( 220 struct xfs_rtgroup *rtg, 221 struct xfs_rtgroup_geometry *rgeo) 222 { 223 /* Fill out form. */ 224 memset(rgeo, 0, sizeof(*rgeo)); 225 rgeo->rg_number = rtg_rgno(rtg); 226 rgeo->rg_length = rtg->rtg_extents * rtg_mount(rtg)->m_sb.sb_rextsize; 227 xfs_rtgroup_geom_health(rtg, rgeo); 228 return 0; 229 } 230 231 #ifdef CONFIG_PROVE_LOCKING 232 static struct lock_class_key xfs_rtginode_lock_class; 233 234 static int 235 xfs_rtginode_ilock_cmp_fn( 236 const struct lockdep_map *m1, 237 const struct lockdep_map *m2) 238 { 239 const struct xfs_inode *ip1 = 240 container_of(m1, struct xfs_inode, i_lock.dep_map); 241 const struct xfs_inode *ip2 = 242 container_of(m2, struct xfs_inode, i_lock.dep_map); 243 244 if (ip1->i_projid < ip2->i_projid) 245 return -1; 246 if (ip1->i_projid > ip2->i_projid) 247 return 1; 248 return 0; 249 } 250 251 static inline void 252 xfs_rtginode_ilock_print_fn( 253 const struct lockdep_map *m) 254 { 255 const struct xfs_inode *ip = 256 container_of(m, struct xfs_inode, i_lock.dep_map); 257 258 printk(KERN_CONT " rgno=%u", ip->i_projid); 259 } 260 261 /* 262 * Most of the time each of the RTG inode locks are only taken one at a time. 263 * But when committing deferred ops, more than one of a kind can be taken. 264 * However, deferred rt ops will be committed in rgno order so there is no 265 * potential for deadlocks. The code here is needed to tell lockdep about this 266 * order. 267 */ 268 static inline void 269 xfs_rtginode_lockdep_setup( 270 struct xfs_inode *ip, 271 xfs_rgnumber_t rgno, 272 enum xfs_rtg_inodes type) 273 { 274 lockdep_set_class_and_subclass(&ip->i_lock, &xfs_rtginode_lock_class, 275 type); 276 lock_set_cmp_fn(&ip->i_lock, xfs_rtginode_ilock_cmp_fn, 277 xfs_rtginode_ilock_print_fn); 278 } 279 #else 280 #define xfs_rtginode_lockdep_setup(ip, rgno, type) do { } while (0) 281 #endif /* CONFIG_PROVE_LOCKING */ 282 283 struct xfs_rtginode_ops { 284 const char *name; /* short name */ 285 286 enum xfs_metafile_type metafile_type; 287 288 unsigned int sick; /* rtgroup sickness flag */ 289 290 /* Does the fs have this feature? */ 291 bool (*enabled)(struct xfs_mount *mp); 292 293 /* Create this rtgroup metadata inode and initialize it. */ 294 int (*create)(struct xfs_rtgroup *rtg, 295 struct xfs_inode *ip, 296 struct xfs_trans *tp, 297 bool init); 298 }; 299 300 static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = { 301 [XFS_RTGI_BITMAP] = { 302 .name = "bitmap", 303 .metafile_type = XFS_METAFILE_RTBITMAP, 304 .sick = XFS_SICK_RG_BITMAP, 305 .create = xfs_rtbitmap_create, 306 }, 307 [XFS_RTGI_SUMMARY] = { 308 .name = "summary", 309 .metafile_type = XFS_METAFILE_RTSUMMARY, 310 .sick = XFS_SICK_RG_SUMMARY, 311 .create = xfs_rtsummary_create, 312 }, 313 }; 314 315 /* Return the shortname of this rtgroup inode. */ 316 const char * 317 xfs_rtginode_name( 318 enum xfs_rtg_inodes type) 319 { 320 return xfs_rtginode_ops[type].name; 321 } 322 323 /* Return the metafile type of this rtgroup inode. */ 324 enum xfs_metafile_type 325 xfs_rtginode_metafile_type( 326 enum xfs_rtg_inodes type) 327 { 328 return xfs_rtginode_ops[type].metafile_type; 329 } 330 331 /* Should this rtgroup inode be present? */ 332 bool 333 xfs_rtginode_enabled( 334 struct xfs_rtgroup *rtg, 335 enum xfs_rtg_inodes type) 336 { 337 const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; 338 339 if (!ops->enabled) 340 return true; 341 return ops->enabled(rtg_mount(rtg)); 342 } 343 344 /* Mark an rtgroup inode sick */ 345 void 346 xfs_rtginode_mark_sick( 347 struct xfs_rtgroup *rtg, 348 enum xfs_rtg_inodes type) 349 { 350 const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; 351 352 xfs_group_mark_sick(rtg_group(rtg), ops->sick); 353 } 354 355 /* Load and existing rtgroup inode into the rtgroup structure. */ 356 int 357 xfs_rtginode_load( 358 struct xfs_rtgroup *rtg, 359 enum xfs_rtg_inodes type, 360 struct xfs_trans *tp) 361 { 362 struct xfs_mount *mp = tp->t_mountp; 363 struct xfs_inode *ip; 364 const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; 365 int error; 366 367 if (!xfs_rtginode_enabled(rtg, type)) 368 return 0; 369 370 if (!xfs_has_rtgroups(mp)) { 371 xfs_ino_t ino; 372 373 switch (type) { 374 case XFS_RTGI_BITMAP: 375 ino = mp->m_sb.sb_rbmino; 376 break; 377 case XFS_RTGI_SUMMARY: 378 ino = mp->m_sb.sb_rsumino; 379 break; 380 default: 381 /* None of the other types exist on !rtgroups */ 382 return 0; 383 } 384 385 error = xfs_trans_metafile_iget(tp, ino, ops->metafile_type, 386 &ip); 387 } else { 388 const char *path; 389 390 if (!mp->m_rtdirip) { 391 xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); 392 return -EFSCORRUPTED; 393 } 394 395 path = xfs_rtginode_path(rtg_rgno(rtg), type); 396 if (!path) 397 return -ENOMEM; 398 error = xfs_metadir_load(tp, mp->m_rtdirip, path, 399 ops->metafile_type, &ip); 400 kfree(path); 401 } 402 403 if (error) { 404 if (xfs_metadata_is_sick(error)) 405 xfs_rtginode_mark_sick(rtg, type); 406 return error; 407 } 408 409 if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS && 410 ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) { 411 xfs_irele(ip); 412 xfs_rtginode_mark_sick(rtg, type); 413 return -EFSCORRUPTED; 414 } 415 416 if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) { 417 xfs_irele(ip); 418 xfs_rtginode_mark_sick(rtg, type); 419 return -EFSCORRUPTED; 420 } 421 422 xfs_rtginode_lockdep_setup(ip, rtg_rgno(rtg), type); 423 rtg->rtg_inodes[type] = ip; 424 return 0; 425 } 426 427 /* Release an rtgroup metadata inode. */ 428 void 429 xfs_rtginode_irele( 430 struct xfs_inode **ipp) 431 { 432 if (*ipp) 433 xfs_irele(*ipp); 434 *ipp = NULL; 435 } 436 437 /* Add a metadata inode for a realtime rmap btree. */ 438 int 439 xfs_rtginode_create( 440 struct xfs_rtgroup *rtg, 441 enum xfs_rtg_inodes type, 442 bool init) 443 { 444 const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type]; 445 struct xfs_mount *mp = rtg_mount(rtg); 446 struct xfs_metadir_update upd = { 447 .dp = mp->m_rtdirip, 448 .metafile_type = ops->metafile_type, 449 }; 450 int error; 451 452 if (!xfs_rtginode_enabled(rtg, type)) 453 return 0; 454 455 if (!mp->m_rtdirip) { 456 xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); 457 return -EFSCORRUPTED; 458 } 459 460 upd.path = xfs_rtginode_path(rtg_rgno(rtg), type); 461 if (!upd.path) 462 return -ENOMEM; 463 464 error = xfs_metadir_start_create(&upd); 465 if (error) 466 goto out_path; 467 468 error = xfs_metadir_create(&upd, S_IFREG); 469 if (error) 470 return error; 471 472 xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type); 473 474 upd.ip->i_projid = rtg_rgno(rtg); 475 error = ops->create(rtg, upd.ip, upd.tp, init); 476 if (error) 477 goto out_cancel; 478 479 error = xfs_metadir_commit(&upd); 480 if (error) 481 goto out_path; 482 483 kfree(upd.path); 484 xfs_finish_inode_setup(upd.ip); 485 rtg->rtg_inodes[type] = upd.ip; 486 return 0; 487 488 out_cancel: 489 xfs_metadir_cancel(&upd, error); 490 /* Have to finish setting up the inode to ensure it's deleted. */ 491 if (upd.ip) { 492 xfs_finish_inode_setup(upd.ip); 493 xfs_irele(upd.ip); 494 } 495 out_path: 496 kfree(upd.path); 497 return error; 498 } 499 500 /* Create the parent directory for all rtgroup inodes and load it. */ 501 int 502 xfs_rtginode_mkdir_parent( 503 struct xfs_mount *mp) 504 { 505 if (!mp->m_metadirip) { 506 xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); 507 return -EFSCORRUPTED; 508 } 509 510 return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip); 511 } 512 513 /* Load the parent directory of all rtgroup inodes. */ 514 int 515 xfs_rtginode_load_parent( 516 struct xfs_trans *tp) 517 { 518 struct xfs_mount *mp = tp->t_mountp; 519 520 if (!mp->m_metadirip) { 521 xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); 522 return -EFSCORRUPTED; 523 } 524 525 return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups", 526 XFS_METAFILE_DIR, &mp->m_rtdirip); 527 } 528 529 /* Check superblock fields for a read or a write. */ 530 static xfs_failaddr_t 531 xfs_rtsb_verify_common( 532 struct xfs_buf *bp) 533 { 534 struct xfs_rtsb *rsb = bp->b_addr; 535 536 if (!xfs_verify_magic(bp, rsb->rsb_magicnum)) 537 return __this_address; 538 if (rsb->rsb_pad) 539 return __this_address; 540 541 /* Everything to the end of the fs block must be zero */ 542 if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb))) 543 return __this_address; 544 545 return NULL; 546 } 547 548 /* Check superblock fields for a read or revalidation. */ 549 static inline xfs_failaddr_t 550 xfs_rtsb_verify_all( 551 struct xfs_buf *bp) 552 { 553 struct xfs_rtsb *rsb = bp->b_addr; 554 struct xfs_mount *mp = bp->b_mount; 555 xfs_failaddr_t fa; 556 557 fa = xfs_rtsb_verify_common(bp); 558 if (fa) 559 return fa; 560 561 if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX)) 562 return __this_address; 563 if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid)) 564 return __this_address; 565 if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid)) 566 return __this_address; 567 568 return NULL; 569 } 570 571 static void 572 xfs_rtsb_read_verify( 573 struct xfs_buf *bp) 574 { 575 xfs_failaddr_t fa; 576 577 if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) { 578 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 579 return; 580 } 581 582 fa = xfs_rtsb_verify_all(bp); 583 if (fa) 584 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 585 } 586 587 static void 588 xfs_rtsb_write_verify( 589 struct xfs_buf *bp) 590 { 591 xfs_failaddr_t fa; 592 593 fa = xfs_rtsb_verify_common(bp); 594 if (fa) { 595 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 596 return; 597 } 598 599 xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF); 600 } 601 602 const struct xfs_buf_ops xfs_rtsb_buf_ops = { 603 .name = "xfs_rtsb", 604 .magic = { 0, cpu_to_be32(XFS_RTSB_MAGIC) }, 605 .verify_read = xfs_rtsb_read_verify, 606 .verify_write = xfs_rtsb_write_verify, 607 .verify_struct = xfs_rtsb_verify_all, 608 }; 609 610 /* Update a realtime superblock from the primary fs super */ 611 void 612 xfs_update_rtsb( 613 struct xfs_buf *rtsb_bp, 614 const struct xfs_buf *sb_bp) 615 { 616 const struct xfs_dsb *dsb = sb_bp->b_addr; 617 struct xfs_rtsb *rsb = rtsb_bp->b_addr; 618 const uuid_t *meta_uuid; 619 620 rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC); 621 622 rsb->rsb_pad = 0; 623 memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX); 624 625 memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid)); 626 627 /* 628 * The metadata uuid is the fs uuid if the metauuid feature is not 629 * enabled. 630 */ 631 if (dsb->sb_features_incompat & 632 cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID)) 633 meta_uuid = &dsb->sb_meta_uuid; 634 else 635 meta_uuid = &dsb->sb_uuid; 636 memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid)); 637 } 638 639 /* 640 * Update the realtime superblock from a filesystem superblock and log it to 641 * the given transaction. 642 */ 643 struct xfs_buf * 644 xfs_log_rtsb( 645 struct xfs_trans *tp, 646 const struct xfs_buf *sb_bp) 647 { 648 struct xfs_buf *rtsb_bp; 649 650 if (!xfs_has_rtsb(tp->t_mountp)) 651 return NULL; 652 653 rtsb_bp = xfs_trans_getrtsb(tp); 654 if (!rtsb_bp) { 655 /* 656 * It's possible for the rtgroups feature to be enabled but 657 * there is no incore rt superblock buffer if the rt geometry 658 * was specified at mkfs time but the rt section has not yet 659 * been attached. In this case, rblocks must be zero. 660 */ 661 ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0); 662 return NULL; 663 } 664 665 xfs_update_rtsb(rtsb_bp, sb_bp); 666 xfs_trans_ordered_buf(tp, rtsb_bp); 667 return rtsb_bp; 668 } 669