1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2018 Red Hat, Inc. 5 * All rights reserved. 6 */ 7 8 #include "xfs_platform.h" 9 #include "xfs_fs.h" 10 #include "xfs_shared.h" 11 #include "xfs_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_sb.h" 15 #include "xfs_mount.h" 16 #include "xfs_btree.h" 17 #include "xfs_alloc_btree.h" 18 #include "xfs_rmap_btree.h" 19 #include "xfs_alloc.h" 20 #include "xfs_ialloc.h" 21 #include "xfs_rmap.h" 22 #include "xfs_ag.h" 23 #include "xfs_ag_resv.h" 24 #include "xfs_health.h" 25 #include "xfs_error.h" 26 #include "xfs_bmap.h" 27 #include "xfs_defer.h" 28 #include "xfs_log_format.h" 29 #include "xfs_trans.h" 30 #include "xfs_trace.h" 31 #include "xfs_inode.h" 32 #include "xfs_icache.h" 33 #include "xfs_group.h" 34 35 /* 36 * xfs_initialize_perag_data 37 * 38 * Read in each per-ag structure so we can count up the number of 39 * allocated inodes, free inodes and used filesystem blocks as this 40 * information is no longer persistent in the superblock. Once we have 41 * this information, write it into the in-core superblock structure. 42 */ 43 int 44 xfs_initialize_perag_data( 45 struct xfs_mount *mp, 46 xfs_agnumber_t agcount) 47 { 48 xfs_agnumber_t index; 49 struct xfs_perag *pag; 50 struct xfs_sb *sbp = &mp->m_sb; 51 uint64_t ifree = 0; 52 uint64_t ialloc = 0; 53 uint64_t bfree = 0; 54 uint64_t bfreelst = 0; 55 uint64_t btree = 0; 56 uint64_t fdblocks; 57 int error = 0; 58 59 for (index = 0; index < agcount; index++) { 60 /* 61 * Read the AGF and AGI buffers to populate the per-ag 62 * structures for us. 63 */ 64 pag = xfs_perag_get(mp, index); 65 error = xfs_alloc_read_agf(pag, NULL, 0, NULL); 66 if (!error) 67 error = xfs_ialloc_read_agi(pag, NULL, 0, NULL); 68 if (error) { 69 xfs_perag_put(pag); 70 return error; 71 } 72 73 ifree += pag->pagi_freecount; 74 ialloc += pag->pagi_count; 75 bfree += pag->pagf_freeblks; 76 bfreelst += pag->pagf_flcount; 77 btree += pag->pagf_btreeblks; 78 xfs_perag_put(pag); 79 } 80 fdblocks = bfree + bfreelst + btree; 81 82 /* 83 * If the new summary counts are obviously incorrect, fail the 84 * mount operation because that implies the AGFs are also corrupt. 85 * Clear FS_COUNTERS so that we don't unmount with a dirty log, which 86 * will prevent xfs_repair from fixing anything. 87 */ 88 if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { 89 xfs_alert(mp, "AGF corruption. Please run xfs_repair."); 90 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); 91 error = -EFSCORRUPTED; 92 goto out; 93 } 94 95 /* Overwrite incore superblock counters with just-read data */ 96 spin_lock(&mp->m_sb_lock); 97 sbp->sb_ifree = ifree; 98 sbp->sb_icount = ialloc; 99 sbp->sb_fdblocks = fdblocks; 100 spin_unlock(&mp->m_sb_lock); 101 102 xfs_reinit_percpu_counters(mp); 103 out: 104 xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); 105 return error; 106 } 107 108 static void 109 xfs_perag_uninit( 110 struct xfs_group *xg) 111 { 112 #ifdef __KERNEL__ 113 cancel_delayed_work_sync(&to_perag(xg)->pag_blockgc_work); 114 #endif 115 } 116 117 /* 118 * Free up the per-ag resources within the specified AG range. 119 */ 120 void 121 xfs_free_perag_range( 122 struct xfs_mount *mp, 123 xfs_agnumber_t first_agno, 124 xfs_agnumber_t end_agno) 125 126 { 127 xfs_agnumber_t agno; 128 129 for (agno = first_agno; agno < end_agno; agno++) 130 xfs_group_free(mp, agno, XG_TYPE_AG, xfs_perag_uninit); 131 } 132 133 /* Find the size of the AG, in blocks. */ 134 static xfs_agblock_t 135 __xfs_ag_block_count( 136 struct xfs_mount *mp, 137 xfs_agnumber_t agno, 138 xfs_agnumber_t agcount, 139 xfs_rfsblock_t dblocks) 140 { 141 ASSERT(agno < agcount); 142 143 if (agno < agcount - 1) 144 return mp->m_sb.sb_agblocks; 145 return dblocks - (agno * mp->m_sb.sb_agblocks); 146 } 147 148 xfs_agblock_t 149 xfs_ag_block_count( 150 struct xfs_mount *mp, 151 xfs_agnumber_t agno) 152 { 153 return __xfs_ag_block_count(mp, agno, mp->m_sb.sb_agcount, 154 mp->m_sb.sb_dblocks); 155 } 156 157 /* Calculate the first and last possible inode number in an AG. */ 158 static void 159 __xfs_agino_range( 160 struct xfs_mount *mp, 161 xfs_agblock_t eoag, 162 xfs_agino_t *first, 163 xfs_agino_t *last) 164 { 165 xfs_agblock_t bno; 166 167 /* 168 * Calculate the first inode, which will be in the first 169 * cluster-aligned block after the AGFL. 170 */ 171 bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); 172 *first = XFS_AGB_TO_AGINO(mp, bno); 173 174 /* 175 * Calculate the last inode, which will be at the end of the 176 * last (aligned) cluster that can be allocated in the AG. 177 */ 178 bno = round_down(eoag, M_IGEO(mp)->cluster_align); 179 *last = XFS_AGB_TO_AGINO(mp, bno) - 1; 180 } 181 182 void 183 xfs_agino_range( 184 struct xfs_mount *mp, 185 xfs_agnumber_t agno, 186 xfs_agino_t *first, 187 xfs_agino_t *last) 188 { 189 return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); 190 } 191 192 /* 193 * Update the perag of the previous tail AG if it has been changed during 194 * recovery (i.e. recovery of a growfs). 195 */ 196 int 197 xfs_update_last_ag_size( 198 struct xfs_mount *mp, 199 xfs_agnumber_t prev_agcount) 200 { 201 struct xfs_perag *pag = xfs_perag_grab(mp, prev_agcount - 1); 202 203 if (!pag) 204 return -EFSCORRUPTED; 205 pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, 206 prev_agcount - 1, mp->m_sb.sb_agcount, 207 mp->m_sb.sb_dblocks); 208 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 209 &pag->agino_max); 210 xfs_perag_rele(pag); 211 return 0; 212 } 213 214 static int 215 xfs_perag_alloc( 216 struct xfs_mount *mp, 217 xfs_agnumber_t index, 218 xfs_agnumber_t agcount, 219 xfs_rfsblock_t dblocks) 220 { 221 struct xfs_perag *pag; 222 int error; 223 224 pag = kzalloc_obj(*pag); 225 if (!pag) 226 return -ENOMEM; 227 228 #ifdef __KERNEL__ 229 /* Place kernel structure only init below this point. */ 230 spin_lock_init(&pag->pag_ici_lock); 231 INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); 232 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 233 #endif /* __KERNEL__ */ 234 235 /* 236 * Pre-calculated geometry 237 */ 238 pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, index, agcount, 239 dblocks); 240 pag_group(pag)->xg_min_gbno = XFS_AGFL_BLOCK(mp) + 1; 241 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 242 &pag->agino_max); 243 244 error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG); 245 if (error) 246 goto out_free_perag; 247 248 return 0; 249 250 out_free_perag: 251 kfree(pag); 252 return error; 253 } 254 255 int 256 xfs_initialize_perag( 257 struct xfs_mount *mp, 258 xfs_agnumber_t orig_agcount, 259 xfs_agnumber_t new_agcount, 260 xfs_rfsblock_t dblocks, 261 xfs_agnumber_t *maxagi) 262 { 263 xfs_agnumber_t index; 264 int error; 265 266 if (orig_agcount >= new_agcount) 267 return 0; 268 269 for (index = orig_agcount; index < new_agcount; index++) { 270 error = xfs_perag_alloc(mp, index, new_agcount, dblocks); 271 if (error) 272 goto out_unwind_new_pags; 273 } 274 275 *maxagi = xfs_set_inode_alloc(mp, new_agcount); 276 mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); 277 return 0; 278 279 out_unwind_new_pags: 280 xfs_free_perag_range(mp, orig_agcount, index); 281 return error; 282 } 283 284 static int 285 xfs_get_aghdr_buf( 286 struct xfs_mount *mp, 287 xfs_daddr_t blkno, 288 size_t numblks, 289 struct xfs_buf **bpp, 290 const struct xfs_buf_ops *ops) 291 { 292 struct xfs_buf *bp; 293 int error; 294 295 error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, &bp); 296 if (error) 297 return error; 298 299 bp->b_maps[0].bm_bn = blkno; 300 bp->b_ops = ops; 301 302 *bpp = bp; 303 return 0; 304 } 305 306 /* 307 * Generic btree root block init function 308 */ 309 static void 310 xfs_btroot_init( 311 struct xfs_mount *mp, 312 struct xfs_buf *bp, 313 struct aghdr_init_data *id) 314 { 315 xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); 316 } 317 318 /* Finish initializing a free space btree. */ 319 static void 320 xfs_freesp_init_recs( 321 struct xfs_mount *mp, 322 struct xfs_buf *bp, 323 struct aghdr_init_data *id) 324 { 325 struct xfs_alloc_rec *arec; 326 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 327 328 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 329 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 330 331 if (xfs_ag_contains_log(mp, id->agno)) { 332 struct xfs_alloc_rec *nrec; 333 xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, 334 mp->m_sb.sb_logstart); 335 336 ASSERT(start >= mp->m_ag_prealloc_blocks); 337 if (start != mp->m_ag_prealloc_blocks) { 338 /* 339 * Modify first record to pad stripe align of log and 340 * bump the record count. 341 */ 342 arec->ar_blockcount = cpu_to_be32(start - 343 mp->m_ag_prealloc_blocks); 344 be16_add_cpu(&block->bb_numrecs, 1); 345 nrec = arec + 1; 346 347 /* 348 * Insert second record at start of internal log 349 * which then gets trimmed. 350 */ 351 nrec->ar_startblock = cpu_to_be32( 352 be32_to_cpu(arec->ar_startblock) + 353 be32_to_cpu(arec->ar_blockcount)); 354 arec = nrec; 355 } 356 /* 357 * Change record start to after the internal log 358 */ 359 be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); 360 } 361 362 /* 363 * Calculate the block count of this record; if it is nonzero, 364 * increment the record count. 365 */ 366 arec->ar_blockcount = cpu_to_be32(id->agsize - 367 be32_to_cpu(arec->ar_startblock)); 368 if (arec->ar_blockcount) 369 be16_add_cpu(&block->bb_numrecs, 1); 370 } 371 372 /* 373 * bnobt/cntbt btree root block init functions 374 */ 375 static void 376 xfs_bnoroot_init( 377 struct xfs_mount *mp, 378 struct xfs_buf *bp, 379 struct aghdr_init_data *id) 380 { 381 xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); 382 xfs_freesp_init_recs(mp, bp, id); 383 } 384 385 /* 386 * Reverse map root block init 387 */ 388 static void 389 xfs_rmaproot_init( 390 struct xfs_mount *mp, 391 struct xfs_buf *bp, 392 struct aghdr_init_data *id) 393 { 394 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 395 struct xfs_rmap_rec *rrec; 396 397 xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 4, id->agno); 398 399 /* 400 * mark the AG header regions as static metadata The BNO 401 * btree block is the first block after the headers, so 402 * it's location defines the size of region the static 403 * metadata consumes. 404 * 405 * Note: unlike mkfs, we never have to account for log 406 * space when growing the data regions 407 */ 408 rrec = XFS_RMAP_REC_ADDR(block, 1); 409 rrec->rm_startblock = 0; 410 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); 411 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); 412 rrec->rm_offset = 0; 413 414 /* account freespace btree root blocks */ 415 rrec = XFS_RMAP_REC_ADDR(block, 2); 416 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); 417 rrec->rm_blockcount = cpu_to_be32(2); 418 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); 419 rrec->rm_offset = 0; 420 421 /* account inode btree root blocks */ 422 rrec = XFS_RMAP_REC_ADDR(block, 3); 423 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); 424 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - 425 XFS_IBT_BLOCK(mp)); 426 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); 427 rrec->rm_offset = 0; 428 429 /* account for rmap btree root */ 430 rrec = XFS_RMAP_REC_ADDR(block, 4); 431 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); 432 rrec->rm_blockcount = cpu_to_be32(1); 433 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); 434 rrec->rm_offset = 0; 435 436 /* account for refc btree root */ 437 if (xfs_has_reflink(mp)) { 438 rrec = XFS_RMAP_REC_ADDR(block, 5); 439 rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); 440 rrec->rm_blockcount = cpu_to_be32(1); 441 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); 442 rrec->rm_offset = 0; 443 be16_add_cpu(&block->bb_numrecs, 1); 444 } 445 446 /* account for the log space */ 447 if (xfs_ag_contains_log(mp, id->agno)) { 448 rrec = XFS_RMAP_REC_ADDR(block, 449 be16_to_cpu(block->bb_numrecs) + 1); 450 rrec->rm_startblock = cpu_to_be32( 451 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); 452 rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); 453 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); 454 rrec->rm_offset = 0; 455 be16_add_cpu(&block->bb_numrecs, 1); 456 } 457 } 458 459 /* 460 * Initialise new secondary superblocks with the pre-grow geometry, but mark 461 * them as "in progress" so we know they haven't yet been activated. This will 462 * get cleared when the update with the new geometry information is done after 463 * changes to the primary are committed. This isn't strictly necessary, but we 464 * get it for free with the delayed buffer write lists and it means we can tell 465 * if a grow operation didn't complete properly after the fact. 466 */ 467 static void 468 xfs_sbblock_init( 469 struct xfs_mount *mp, 470 struct xfs_buf *bp, 471 struct aghdr_init_data *id) 472 { 473 struct xfs_dsb *dsb = bp->b_addr; 474 475 xfs_sb_to_disk(dsb, &mp->m_sb); 476 dsb->sb_inprogress = 1; 477 } 478 479 static void 480 xfs_agfblock_init( 481 struct xfs_mount *mp, 482 struct xfs_buf *bp, 483 struct aghdr_init_data *id) 484 { 485 struct xfs_agf *agf = bp->b_addr; 486 xfs_extlen_t tmpsize; 487 488 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); 489 agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); 490 agf->agf_seqno = cpu_to_be32(id->agno); 491 agf->agf_length = cpu_to_be32(id->agsize); 492 agf->agf_bno_root = cpu_to_be32(XFS_BNO_BLOCK(mp)); 493 agf->agf_cnt_root = cpu_to_be32(XFS_CNT_BLOCK(mp)); 494 agf->agf_bno_level = cpu_to_be32(1); 495 agf->agf_cnt_level = cpu_to_be32(1); 496 if (xfs_has_rmapbt(mp)) { 497 agf->agf_rmap_root = cpu_to_be32(XFS_RMAP_BLOCK(mp)); 498 agf->agf_rmap_level = cpu_to_be32(1); 499 agf->agf_rmap_blocks = cpu_to_be32(1); 500 } 501 502 agf->agf_flfirst = cpu_to_be32(1); 503 agf->agf_fllast = 0; 504 agf->agf_flcount = 0; 505 tmpsize = id->agsize - mp->m_ag_prealloc_blocks; 506 agf->agf_freeblks = cpu_to_be32(tmpsize); 507 agf->agf_longest = cpu_to_be32(tmpsize); 508 if (xfs_has_crc(mp)) 509 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); 510 if (xfs_has_reflink(mp)) { 511 agf->agf_refcount_root = cpu_to_be32( 512 xfs_refc_block(mp)); 513 agf->agf_refcount_level = cpu_to_be32(1); 514 agf->agf_refcount_blocks = cpu_to_be32(1); 515 } 516 517 if (xfs_ag_contains_log(mp, id->agno)) { 518 int64_t logblocks = mp->m_sb.sb_logblocks; 519 520 be32_add_cpu(&agf->agf_freeblks, -logblocks); 521 agf->agf_longest = cpu_to_be32(id->agsize - 522 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); 523 } 524 } 525 526 static void 527 xfs_agflblock_init( 528 struct xfs_mount *mp, 529 struct xfs_buf *bp, 530 struct aghdr_init_data *id) 531 { 532 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 533 __be32 *agfl_bno; 534 int bucket; 535 536 if (xfs_has_crc(mp)) { 537 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); 538 agfl->agfl_seqno = cpu_to_be32(id->agno); 539 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); 540 } 541 542 agfl_bno = xfs_buf_to_agfl_bno(bp); 543 for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) 544 agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); 545 } 546 547 static void 548 xfs_agiblock_init( 549 struct xfs_mount *mp, 550 struct xfs_buf *bp, 551 struct aghdr_init_data *id) 552 { 553 struct xfs_agi *agi = bp->b_addr; 554 int bucket; 555 556 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); 557 agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); 558 agi->agi_seqno = cpu_to_be32(id->agno); 559 agi->agi_length = cpu_to_be32(id->agsize); 560 agi->agi_count = 0; 561 agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp)); 562 agi->agi_level = cpu_to_be32(1); 563 agi->agi_freecount = 0; 564 agi->agi_newino = cpu_to_be32(NULLAGINO); 565 agi->agi_dirino = cpu_to_be32(NULLAGINO); 566 if (xfs_has_crc(mp)) 567 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); 568 if (xfs_has_finobt(mp)) { 569 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); 570 agi->agi_free_level = cpu_to_be32(1); 571 } 572 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) 573 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 574 if (xfs_has_inobtcounts(mp)) { 575 agi->agi_iblocks = cpu_to_be32(1); 576 if (xfs_has_finobt(mp)) 577 agi->agi_fblocks = cpu_to_be32(1); 578 } 579 } 580 581 typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, 582 struct aghdr_init_data *id); 583 static int 584 xfs_ag_init_hdr( 585 struct xfs_mount *mp, 586 struct aghdr_init_data *id, 587 aghdr_init_work_f work, 588 const struct xfs_buf_ops *ops) 589 { 590 struct xfs_buf *bp; 591 int error; 592 593 error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops); 594 if (error) 595 return error; 596 597 (*work)(mp, bp, id); 598 599 xfs_buf_delwri_queue(bp, &id->buffer_list); 600 xfs_buf_relse(bp); 601 return 0; 602 } 603 604 struct xfs_aghdr_grow_data { 605 xfs_daddr_t daddr; 606 size_t numblks; 607 const struct xfs_buf_ops *ops; 608 aghdr_init_work_f work; 609 const struct xfs_btree_ops *bc_ops; 610 bool need_init; 611 }; 612 613 /* 614 * Prepare new AG headers to be written to disk. We use uncached buffers here, 615 * as it is assumed these new AG headers are currently beyond the currently 616 * valid filesystem address space. Using cached buffers would trip over EOFS 617 * corruption detection alogrithms in the buffer cache lookup routines. 618 * 619 * This is a non-transactional function, but the prepared buffers are added to a 620 * delayed write buffer list supplied by the caller so they can submit them to 621 * disk and wait on them as required. 622 */ 623 int 624 xfs_ag_init_headers( 625 struct xfs_mount *mp, 626 struct aghdr_init_data *id) 627 628 { 629 struct xfs_aghdr_grow_data aghdr_data[] = { 630 { /* SB */ 631 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR), 632 .numblks = XFS_FSS_TO_BB(mp, 1), 633 .ops = &xfs_sb_buf_ops, 634 .work = &xfs_sbblock_init, 635 .need_init = true 636 }, 637 { /* AGF */ 638 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)), 639 .numblks = XFS_FSS_TO_BB(mp, 1), 640 .ops = &xfs_agf_buf_ops, 641 .work = &xfs_agfblock_init, 642 .need_init = true 643 }, 644 { /* AGFL */ 645 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)), 646 .numblks = XFS_FSS_TO_BB(mp, 1), 647 .ops = &xfs_agfl_buf_ops, 648 .work = &xfs_agflblock_init, 649 .need_init = true 650 }, 651 { /* AGI */ 652 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)), 653 .numblks = XFS_FSS_TO_BB(mp, 1), 654 .ops = &xfs_agi_buf_ops, 655 .work = &xfs_agiblock_init, 656 .need_init = true 657 }, 658 { /* BNO root block */ 659 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)), 660 .numblks = BTOBB(mp->m_sb.sb_blocksize), 661 .ops = &xfs_bnobt_buf_ops, 662 .work = &xfs_bnoroot_init, 663 .bc_ops = &xfs_bnobt_ops, 664 .need_init = true 665 }, 666 { /* CNT root block */ 667 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), 668 .numblks = BTOBB(mp->m_sb.sb_blocksize), 669 .ops = &xfs_cntbt_buf_ops, 670 .work = &xfs_bnoroot_init, 671 .bc_ops = &xfs_cntbt_ops, 672 .need_init = true 673 }, 674 { /* INO root block */ 675 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)), 676 .numblks = BTOBB(mp->m_sb.sb_blocksize), 677 .ops = &xfs_inobt_buf_ops, 678 .work = &xfs_btroot_init, 679 .bc_ops = &xfs_inobt_ops, 680 .need_init = true 681 }, 682 { /* FINO root block */ 683 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)), 684 .numblks = BTOBB(mp->m_sb.sb_blocksize), 685 .ops = &xfs_finobt_buf_ops, 686 .work = &xfs_btroot_init, 687 .bc_ops = &xfs_finobt_ops, 688 .need_init = xfs_has_finobt(mp) 689 }, 690 { /* RMAP root block */ 691 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), 692 .numblks = BTOBB(mp->m_sb.sb_blocksize), 693 .ops = &xfs_rmapbt_buf_ops, 694 .work = &xfs_rmaproot_init, 695 .bc_ops = &xfs_rmapbt_ops, 696 .need_init = xfs_has_rmapbt(mp) 697 }, 698 { /* REFC root block */ 699 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), 700 .numblks = BTOBB(mp->m_sb.sb_blocksize), 701 .ops = &xfs_refcountbt_buf_ops, 702 .work = &xfs_btroot_init, 703 .bc_ops = &xfs_refcountbt_ops, 704 .need_init = xfs_has_reflink(mp) 705 }, 706 { /* NULL terminating block */ 707 .daddr = XFS_BUF_DADDR_NULL, 708 } 709 }; 710 struct xfs_aghdr_grow_data *dp; 711 int error = 0; 712 713 /* Account for AG free space in new AG */ 714 id->nfree += id->agsize - mp->m_ag_prealloc_blocks; 715 for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) { 716 if (!dp->need_init) 717 continue; 718 719 id->daddr = dp->daddr; 720 id->numblks = dp->numblks; 721 id->bc_ops = dp->bc_ops; 722 error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops); 723 if (error) 724 break; 725 } 726 return error; 727 } 728 729 int 730 xfs_ag_shrink_space( 731 struct xfs_perag *pag, 732 struct xfs_trans **tpp, 733 xfs_extlen_t delta) 734 { 735 struct xfs_mount *mp = pag_mount(pag); 736 struct xfs_alloc_arg args = { 737 .tp = *tpp, 738 .mp = mp, 739 .pag = pag, 740 .minlen = delta, 741 .maxlen = delta, 742 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, 743 .resv = XFS_AG_RESV_NONE, 744 .prod = 1 745 }; 746 struct xfs_buf *agibp, *agfbp; 747 struct xfs_agi *agi; 748 struct xfs_agf *agf; 749 xfs_agblock_t aglen; 750 int error, err2; 751 752 ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1); 753 error = xfs_ialloc_read_agi(pag, *tpp, 0, &agibp); 754 if (error) 755 return error; 756 757 agi = agibp->b_addr; 758 759 error = xfs_alloc_read_agf(pag, *tpp, 0, &agfbp); 760 if (error) 761 return error; 762 763 agf = agfbp->b_addr; 764 aglen = be32_to_cpu(agi->agi_length); 765 /* some extra paranoid checks before we shrink the ag */ 766 if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) { 767 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); 768 return -EFSCORRUPTED; 769 } 770 if (delta >= aglen) 771 return -EINVAL; 772 773 /* 774 * Make sure that the last inode cluster cannot overlap with the new 775 * end of the AG, even if it's sparse. 776 */ 777 error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta); 778 if (error) 779 return error; 780 781 /* 782 * Disable perag reservations so it doesn't cause the allocation request 783 * to fail. We'll reestablish reservation before we return. 784 */ 785 xfs_ag_resv_free(pag); 786 787 /* internal log shouldn't also show up in the free space btrees */ 788 error = xfs_alloc_vextent_exact_bno(&args, 789 xfs_agbno_to_fsb(pag, aglen - delta)); 790 if (!error && args.agbno == NULLAGBLOCK) 791 error = -ENOSPC; 792 793 if (error) { 794 /* 795 * If extent allocation fails, need to roll the transaction to 796 * ensure that the AGFL fixup has been committed anyway. 797 * 798 * We need to hold the AGF across the roll to ensure nothing can 799 * access the AG for allocation until the shrink is fully 800 * cleaned up. And due to the resetting of the AG block 801 * reservation space needing to lock the AGI, we also have to 802 * hold that so we don't get AGI/AGF lock order inversions in 803 * the error handling path. 804 */ 805 xfs_trans_bhold(*tpp, agfbp); 806 xfs_trans_bhold(*tpp, agibp); 807 err2 = xfs_trans_roll(tpp); 808 if (err2) 809 return err2; 810 xfs_trans_bjoin(*tpp, agfbp); 811 xfs_trans_bjoin(*tpp, agibp); 812 goto resv_init_out; 813 } 814 815 /* 816 * if successfully deleted from freespace btrees, need to confirm 817 * per-AG reservation works as expected. 818 */ 819 be32_add_cpu(&agi->agi_length, -delta); 820 be32_add_cpu(&agf->agf_length, -delta); 821 822 err2 = xfs_ag_resv_init(pag, *tpp); 823 if (err2) { 824 be32_add_cpu(&agi->agi_length, delta); 825 be32_add_cpu(&agf->agf_length, delta); 826 if (err2 != -ENOSPC) 827 goto resv_err; 828 829 err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, 830 XFS_AG_RESV_NONE, XFS_FREE_EXTENT_SKIP_DISCARD); 831 if (err2) 832 goto resv_err; 833 834 /* 835 * Roll the transaction before trying to re-init the per-ag 836 * reservation. The new transaction is clean so it will cancel 837 * without any side effects. 838 */ 839 error = xfs_defer_finish(tpp); 840 if (error) 841 return error; 842 843 error = -ENOSPC; 844 goto resv_init_out; 845 } 846 847 /* Update perag geometry */ 848 pag_group(pag)->xg_block_count -= delta; 849 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 850 &pag->agino_max); 851 852 xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); 853 xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); 854 return 0; 855 856 resv_init_out: 857 err2 = xfs_ag_resv_init(pag, *tpp); 858 if (!err2) 859 return error; 860 resv_err: 861 xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", err2); 862 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 863 return err2; 864 } 865 866 /* 867 * Return the agcount for the new file system size passed in *nb and adjust *nb 868 * when it has to be reduced because of maximum AG count or because it would 869 * create a below minimum size AG. 870 */ 871 xfs_agnumber_t 872 xfs_growfs_compute_agcount( 873 struct xfs_mount *mp, 874 xfs_rfsblock_t *nb) 875 { 876 uint64_t agcount; /* 64-bits wide to catch overflows */ 877 xfs_extlen_t remainder; 878 879 agcount = div_u64_rem(*nb, mp->m_sb.sb_agblocks, &remainder); 880 if (agcount >= XFS_MAX_AGNUMBER + 1) { 881 agcount = XFS_MAX_AGNUMBER + 1; 882 remainder = 0; 883 } 884 *nb = (xfs_rfsblock_t)agcount * mp->m_sb.sb_agblocks; 885 if (remainder >= XFS_MIN_AG_BLOCKS) { 886 *nb += remainder; 887 agcount++; 888 } 889 return agcount; 890 } 891 892 /* 893 * Extent the AG indicated by the @id by the length passed in 894 */ 895 int 896 xfs_ag_extend_space( 897 struct xfs_perag *pag, 898 struct xfs_trans *tp, 899 xfs_extlen_t len) 900 { 901 struct xfs_mount *mp = pag_mount(pag); 902 struct xfs_buf *bp; 903 struct xfs_agi *agi; 904 struct xfs_agf *agf; 905 int error; 906 907 ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1); 908 909 error = xfs_ialloc_read_agi(pag, tp, 0, &bp); 910 if (error) 911 return error; 912 913 agi = bp->b_addr; 914 be32_add_cpu(&agi->agi_length, len); 915 xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); 916 917 /* 918 * Change agf length. 919 */ 920 error = xfs_alloc_read_agf(pag, tp, 0, &bp); 921 if (error) 922 return error; 923 924 agf = bp->b_addr; 925 be32_add_cpu(&agf->agf_length, len); 926 ASSERT(agf->agf_length == agi->agi_length); 927 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 928 929 /* 930 * Free the new space. 931 * 932 * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that 933 * this doesn't actually exist in the rmap btree. 934 */ 935 error = xfs_rmap_free(tp, bp, pag, be32_to_cpu(agf->agf_length) - len, 936 len, &XFS_RMAP_OINFO_SKIP_UPDATE); 937 if (error) 938 return error; 939 940 error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, 941 len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); 942 if (error) 943 return error; 944 945 /* Update perag geometry */ 946 pag_group(pag)->xg_block_count = be32_to_cpu(agf->agf_length); 947 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 948 &pag->agino_max); 949 return 0; 950 } 951 952 /* Retrieve AG geometry. */ 953 int 954 xfs_ag_get_geometry( 955 struct xfs_perag *pag, 956 struct xfs_ag_geometry *ageo) 957 { 958 struct xfs_buf *agi_bp; 959 struct xfs_buf *agf_bp; 960 struct xfs_agi *agi; 961 struct xfs_agf *agf; 962 unsigned int freeblks; 963 int error; 964 965 /* Lock the AG headers. */ 966 error = xfs_ialloc_read_agi(pag, NULL, 0, &agi_bp); 967 if (error) 968 return error; 969 error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp); 970 if (error) 971 goto out_agi; 972 973 /* Fill out form. */ 974 memset(ageo, 0, sizeof(*ageo)); 975 ageo->ag_number = pag_agno(pag); 976 977 agi = agi_bp->b_addr; 978 ageo->ag_icount = be32_to_cpu(agi->agi_count); 979 ageo->ag_ifree = be32_to_cpu(agi->agi_freecount); 980 981 agf = agf_bp->b_addr; 982 ageo->ag_length = be32_to_cpu(agf->agf_length); 983 freeblks = pag->pagf_freeblks + 984 pag->pagf_flcount + 985 pag->pagf_btreeblks - 986 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE); 987 ageo->ag_freeblks = freeblks; 988 xfs_ag_geom_health(pag, ageo); 989 990 /* Release resources. */ 991 xfs_buf_relse(agf_bp); 992 out_agi: 993 xfs_buf_relse(agi_bp); 994 return error; 995 } 996