1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2018 Red Hat, Inc. 5 * All rights reserved. 6 */ 7 8 #include "xfs.h" 9 #include "xfs_fs.h" 10 #include "xfs_shared.h" 11 #include "xfs_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_sb.h" 15 #include "xfs_mount.h" 16 #include "xfs_btree.h" 17 #include "xfs_alloc_btree.h" 18 #include "xfs_rmap_btree.h" 19 #include "xfs_alloc.h" 20 #include "xfs_ialloc.h" 21 #include "xfs_rmap.h" 22 #include "xfs_ag.h" 23 #include "xfs_ag_resv.h" 24 #include "xfs_health.h" 25 #include "xfs_error.h" 26 #include "xfs_bmap.h" 27 #include "xfs_defer.h" 28 #include "xfs_log_format.h" 29 #include "xfs_trans.h" 30 #include "xfs_trace.h" 31 #include "xfs_inode.h" 32 #include "xfs_icache.h" 33 #include "xfs_group.h" 34 35 /* 36 * xfs_initialize_perag_data 37 * 38 * Read in each per-ag structure so we can count up the number of 39 * allocated inodes, free inodes and used filesystem blocks as this 40 * information is no longer persistent in the superblock. Once we have 41 * this information, write it into the in-core superblock structure. 42 */ 43 int 44 xfs_initialize_perag_data( 45 struct xfs_mount *mp, 46 xfs_agnumber_t agcount) 47 { 48 xfs_agnumber_t index; 49 struct xfs_perag *pag; 50 struct xfs_sb *sbp = &mp->m_sb; 51 uint64_t ifree = 0; 52 uint64_t ialloc = 0; 53 uint64_t bfree = 0; 54 uint64_t bfreelst = 0; 55 uint64_t btree = 0; 56 uint64_t fdblocks; 57 int error = 0; 58 59 for (index = 0; index < agcount; index++) { 60 /* 61 * Read the AGF and AGI buffers to populate the per-ag 62 * structures for us. 63 */ 64 pag = xfs_perag_get(mp, index); 65 error = xfs_alloc_read_agf(pag, NULL, 0, NULL); 66 if (!error) 67 error = xfs_ialloc_read_agi(pag, NULL, 0, NULL); 68 if (error) { 69 xfs_perag_put(pag); 70 return error; 71 } 72 73 ifree += pag->pagi_freecount; 74 ialloc += pag->pagi_count; 75 bfree += pag->pagf_freeblks; 76 bfreelst += pag->pagf_flcount; 77 btree += pag->pagf_btreeblks; 78 xfs_perag_put(pag); 79 } 80 fdblocks = bfree + bfreelst + btree; 81 82 /* 83 * If the new summary counts are obviously incorrect, fail the 84 * mount operation because that implies the AGFs are also corrupt. 85 * Clear FS_COUNTERS so that we don't unmount with a dirty log, which 86 * will prevent xfs_repair from fixing anything. 87 */ 88 if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { 89 xfs_alert(mp, "AGF corruption. Please run xfs_repair."); 90 xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); 91 error = -EFSCORRUPTED; 92 goto out; 93 } 94 95 /* Overwrite incore superblock counters with just-read data */ 96 spin_lock(&mp->m_sb_lock); 97 sbp->sb_ifree = ifree; 98 sbp->sb_icount = ialloc; 99 sbp->sb_fdblocks = fdblocks; 100 spin_unlock(&mp->m_sb_lock); 101 102 xfs_reinit_percpu_counters(mp); 103 out: 104 xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS); 105 return error; 106 } 107 108 static void 109 xfs_perag_uninit( 110 struct xfs_group *xg) 111 { 112 #ifdef __KERNEL__ 113 struct xfs_perag *pag = to_perag(xg); 114 115 cancel_delayed_work_sync(&pag->pag_blockgc_work); 116 xfs_buf_cache_destroy(&pag->pag_bcache); 117 #endif 118 } 119 120 /* 121 * Free up the per-ag resources within the specified AG range. 122 */ 123 void 124 xfs_free_perag_range( 125 struct xfs_mount *mp, 126 xfs_agnumber_t first_agno, 127 xfs_agnumber_t end_agno) 128 129 { 130 xfs_agnumber_t agno; 131 132 for (agno = first_agno; agno < end_agno; agno++) 133 xfs_group_free(mp, agno, XG_TYPE_AG, xfs_perag_uninit); 134 } 135 136 /* Find the size of the AG, in blocks. */ 137 static xfs_agblock_t 138 __xfs_ag_block_count( 139 struct xfs_mount *mp, 140 xfs_agnumber_t agno, 141 xfs_agnumber_t agcount, 142 xfs_rfsblock_t dblocks) 143 { 144 ASSERT(agno < agcount); 145 146 if (agno < agcount - 1) 147 return mp->m_sb.sb_agblocks; 148 return dblocks - (agno * mp->m_sb.sb_agblocks); 149 } 150 151 xfs_agblock_t 152 xfs_ag_block_count( 153 struct xfs_mount *mp, 154 xfs_agnumber_t agno) 155 { 156 return __xfs_ag_block_count(mp, agno, mp->m_sb.sb_agcount, 157 mp->m_sb.sb_dblocks); 158 } 159 160 /* Calculate the first and last possible inode number in an AG. */ 161 static void 162 __xfs_agino_range( 163 struct xfs_mount *mp, 164 xfs_agblock_t eoag, 165 xfs_agino_t *first, 166 xfs_agino_t *last) 167 { 168 xfs_agblock_t bno; 169 170 /* 171 * Calculate the first inode, which will be in the first 172 * cluster-aligned block after the AGFL. 173 */ 174 bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); 175 *first = XFS_AGB_TO_AGINO(mp, bno); 176 177 /* 178 * Calculate the last inode, which will be at the end of the 179 * last (aligned) cluster that can be allocated in the AG. 180 */ 181 bno = round_down(eoag, M_IGEO(mp)->cluster_align); 182 *last = XFS_AGB_TO_AGINO(mp, bno) - 1; 183 } 184 185 void 186 xfs_agino_range( 187 struct xfs_mount *mp, 188 xfs_agnumber_t agno, 189 xfs_agino_t *first, 190 xfs_agino_t *last) 191 { 192 return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); 193 } 194 195 /* 196 * Update the perag of the previous tail AG if it has been changed during 197 * recovery (i.e. recovery of a growfs). 198 */ 199 int 200 xfs_update_last_ag_size( 201 struct xfs_mount *mp, 202 xfs_agnumber_t prev_agcount) 203 { 204 struct xfs_perag *pag = xfs_perag_grab(mp, prev_agcount - 1); 205 206 if (!pag) 207 return -EFSCORRUPTED; 208 pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, 209 prev_agcount - 1, mp->m_sb.sb_agcount, 210 mp->m_sb.sb_dblocks); 211 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 212 &pag->agino_max); 213 xfs_perag_rele(pag); 214 return 0; 215 } 216 217 static int 218 xfs_perag_alloc( 219 struct xfs_mount *mp, 220 xfs_agnumber_t index, 221 xfs_agnumber_t agcount, 222 xfs_rfsblock_t dblocks) 223 { 224 struct xfs_perag *pag; 225 int error; 226 227 pag = kzalloc(sizeof(*pag), GFP_KERNEL); 228 if (!pag) 229 return -ENOMEM; 230 231 #ifdef __KERNEL__ 232 /* Place kernel structure only init below this point. */ 233 spin_lock_init(&pag->pag_ici_lock); 234 INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); 235 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 236 #endif /* __KERNEL__ */ 237 238 error = xfs_buf_cache_init(&pag->pag_bcache); 239 if (error) 240 goto out_free_perag; 241 242 /* 243 * Pre-calculated geometry 244 */ 245 pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, index, agcount, 246 dblocks); 247 pag_group(pag)->xg_min_gbno = XFS_AGFL_BLOCK(mp) + 1; 248 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 249 &pag->agino_max); 250 251 error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG); 252 if (error) 253 goto out_buf_cache_destroy; 254 255 return 0; 256 257 out_buf_cache_destroy: 258 xfs_buf_cache_destroy(&pag->pag_bcache); 259 out_free_perag: 260 kfree(pag); 261 return error; 262 } 263 264 int 265 xfs_initialize_perag( 266 struct xfs_mount *mp, 267 xfs_agnumber_t orig_agcount, 268 xfs_agnumber_t new_agcount, 269 xfs_rfsblock_t dblocks, 270 xfs_agnumber_t *maxagi) 271 { 272 xfs_agnumber_t index; 273 int error; 274 275 if (orig_agcount >= new_agcount) 276 return 0; 277 278 for (index = orig_agcount; index < new_agcount; index++) { 279 error = xfs_perag_alloc(mp, index, new_agcount, dblocks); 280 if (error) 281 goto out_unwind_new_pags; 282 } 283 284 *maxagi = xfs_set_inode_alloc(mp, new_agcount); 285 mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); 286 return 0; 287 288 out_unwind_new_pags: 289 xfs_free_perag_range(mp, orig_agcount, index); 290 return error; 291 } 292 293 static int 294 xfs_get_aghdr_buf( 295 struct xfs_mount *mp, 296 xfs_daddr_t blkno, 297 size_t numblks, 298 struct xfs_buf **bpp, 299 const struct xfs_buf_ops *ops) 300 { 301 struct xfs_buf *bp; 302 int error; 303 304 error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp); 305 if (error) 306 return error; 307 308 bp->b_maps[0].bm_bn = blkno; 309 bp->b_ops = ops; 310 311 *bpp = bp; 312 return 0; 313 } 314 315 /* 316 * Generic btree root block init function 317 */ 318 static void 319 xfs_btroot_init( 320 struct xfs_mount *mp, 321 struct xfs_buf *bp, 322 struct aghdr_init_data *id) 323 { 324 xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); 325 } 326 327 /* Finish initializing a free space btree. */ 328 static void 329 xfs_freesp_init_recs( 330 struct xfs_mount *mp, 331 struct xfs_buf *bp, 332 struct aghdr_init_data *id) 333 { 334 struct xfs_alloc_rec *arec; 335 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 336 337 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 338 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 339 340 if (xfs_ag_contains_log(mp, id->agno)) { 341 struct xfs_alloc_rec *nrec; 342 xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, 343 mp->m_sb.sb_logstart); 344 345 ASSERT(start >= mp->m_ag_prealloc_blocks); 346 if (start != mp->m_ag_prealloc_blocks) { 347 /* 348 * Modify first record to pad stripe align of log and 349 * bump the record count. 350 */ 351 arec->ar_blockcount = cpu_to_be32(start - 352 mp->m_ag_prealloc_blocks); 353 be16_add_cpu(&block->bb_numrecs, 1); 354 nrec = arec + 1; 355 356 /* 357 * Insert second record at start of internal log 358 * which then gets trimmed. 359 */ 360 nrec->ar_startblock = cpu_to_be32( 361 be32_to_cpu(arec->ar_startblock) + 362 be32_to_cpu(arec->ar_blockcount)); 363 arec = nrec; 364 } 365 /* 366 * Change record start to after the internal log 367 */ 368 be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); 369 } 370 371 /* 372 * Calculate the block count of this record; if it is nonzero, 373 * increment the record count. 374 */ 375 arec->ar_blockcount = cpu_to_be32(id->agsize - 376 be32_to_cpu(arec->ar_startblock)); 377 if (arec->ar_blockcount) 378 be16_add_cpu(&block->bb_numrecs, 1); 379 } 380 381 /* 382 * bnobt/cntbt btree root block init functions 383 */ 384 static void 385 xfs_bnoroot_init( 386 struct xfs_mount *mp, 387 struct xfs_buf *bp, 388 struct aghdr_init_data *id) 389 { 390 xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); 391 xfs_freesp_init_recs(mp, bp, id); 392 } 393 394 /* 395 * Reverse map root block init 396 */ 397 static void 398 xfs_rmaproot_init( 399 struct xfs_mount *mp, 400 struct xfs_buf *bp, 401 struct aghdr_init_data *id) 402 { 403 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 404 struct xfs_rmap_rec *rrec; 405 406 xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 4, id->agno); 407 408 /* 409 * mark the AG header regions as static metadata The BNO 410 * btree block is the first block after the headers, so 411 * it's location defines the size of region the static 412 * metadata consumes. 413 * 414 * Note: unlike mkfs, we never have to account for log 415 * space when growing the data regions 416 */ 417 rrec = XFS_RMAP_REC_ADDR(block, 1); 418 rrec->rm_startblock = 0; 419 rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); 420 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); 421 rrec->rm_offset = 0; 422 423 /* account freespace btree root blocks */ 424 rrec = XFS_RMAP_REC_ADDR(block, 2); 425 rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); 426 rrec->rm_blockcount = cpu_to_be32(2); 427 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); 428 rrec->rm_offset = 0; 429 430 /* account inode btree root blocks */ 431 rrec = XFS_RMAP_REC_ADDR(block, 3); 432 rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); 433 rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - 434 XFS_IBT_BLOCK(mp)); 435 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); 436 rrec->rm_offset = 0; 437 438 /* account for rmap btree root */ 439 rrec = XFS_RMAP_REC_ADDR(block, 4); 440 rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); 441 rrec->rm_blockcount = cpu_to_be32(1); 442 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); 443 rrec->rm_offset = 0; 444 445 /* account for refc btree root */ 446 if (xfs_has_reflink(mp)) { 447 rrec = XFS_RMAP_REC_ADDR(block, 5); 448 rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); 449 rrec->rm_blockcount = cpu_to_be32(1); 450 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); 451 rrec->rm_offset = 0; 452 be16_add_cpu(&block->bb_numrecs, 1); 453 } 454 455 /* account for the log space */ 456 if (xfs_ag_contains_log(mp, id->agno)) { 457 rrec = XFS_RMAP_REC_ADDR(block, 458 be16_to_cpu(block->bb_numrecs) + 1); 459 rrec->rm_startblock = cpu_to_be32( 460 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); 461 rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); 462 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); 463 rrec->rm_offset = 0; 464 be16_add_cpu(&block->bb_numrecs, 1); 465 } 466 } 467 468 /* 469 * Initialise new secondary superblocks with the pre-grow geometry, but mark 470 * them as "in progress" so we know they haven't yet been activated. This will 471 * get cleared when the update with the new geometry information is done after 472 * changes to the primary are committed. This isn't strictly necessary, but we 473 * get it for free with the delayed buffer write lists and it means we can tell 474 * if a grow operation didn't complete properly after the fact. 475 */ 476 static void 477 xfs_sbblock_init( 478 struct xfs_mount *mp, 479 struct xfs_buf *bp, 480 struct aghdr_init_data *id) 481 { 482 struct xfs_dsb *dsb = bp->b_addr; 483 484 xfs_sb_to_disk(dsb, &mp->m_sb); 485 dsb->sb_inprogress = 1; 486 } 487 488 static void 489 xfs_agfblock_init( 490 struct xfs_mount *mp, 491 struct xfs_buf *bp, 492 struct aghdr_init_data *id) 493 { 494 struct xfs_agf *agf = bp->b_addr; 495 xfs_extlen_t tmpsize; 496 497 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); 498 agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); 499 agf->agf_seqno = cpu_to_be32(id->agno); 500 agf->agf_length = cpu_to_be32(id->agsize); 501 agf->agf_bno_root = cpu_to_be32(XFS_BNO_BLOCK(mp)); 502 agf->agf_cnt_root = cpu_to_be32(XFS_CNT_BLOCK(mp)); 503 agf->agf_bno_level = cpu_to_be32(1); 504 agf->agf_cnt_level = cpu_to_be32(1); 505 if (xfs_has_rmapbt(mp)) { 506 agf->agf_rmap_root = cpu_to_be32(XFS_RMAP_BLOCK(mp)); 507 agf->agf_rmap_level = cpu_to_be32(1); 508 agf->agf_rmap_blocks = cpu_to_be32(1); 509 } 510 511 agf->agf_flfirst = cpu_to_be32(1); 512 agf->agf_fllast = 0; 513 agf->agf_flcount = 0; 514 tmpsize = id->agsize - mp->m_ag_prealloc_blocks; 515 agf->agf_freeblks = cpu_to_be32(tmpsize); 516 agf->agf_longest = cpu_to_be32(tmpsize); 517 if (xfs_has_crc(mp)) 518 uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); 519 if (xfs_has_reflink(mp)) { 520 agf->agf_refcount_root = cpu_to_be32( 521 xfs_refc_block(mp)); 522 agf->agf_refcount_level = cpu_to_be32(1); 523 agf->agf_refcount_blocks = cpu_to_be32(1); 524 } 525 526 if (xfs_ag_contains_log(mp, id->agno)) { 527 int64_t logblocks = mp->m_sb.sb_logblocks; 528 529 be32_add_cpu(&agf->agf_freeblks, -logblocks); 530 agf->agf_longest = cpu_to_be32(id->agsize - 531 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); 532 } 533 } 534 535 static void 536 xfs_agflblock_init( 537 struct xfs_mount *mp, 538 struct xfs_buf *bp, 539 struct aghdr_init_data *id) 540 { 541 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 542 __be32 *agfl_bno; 543 int bucket; 544 545 if (xfs_has_crc(mp)) { 546 agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); 547 agfl->agfl_seqno = cpu_to_be32(id->agno); 548 uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); 549 } 550 551 agfl_bno = xfs_buf_to_agfl_bno(bp); 552 for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) 553 agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); 554 } 555 556 static void 557 xfs_agiblock_init( 558 struct xfs_mount *mp, 559 struct xfs_buf *bp, 560 struct aghdr_init_data *id) 561 { 562 struct xfs_agi *agi = bp->b_addr; 563 int bucket; 564 565 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); 566 agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); 567 agi->agi_seqno = cpu_to_be32(id->agno); 568 agi->agi_length = cpu_to_be32(id->agsize); 569 agi->agi_count = 0; 570 agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp)); 571 agi->agi_level = cpu_to_be32(1); 572 agi->agi_freecount = 0; 573 agi->agi_newino = cpu_to_be32(NULLAGINO); 574 agi->agi_dirino = cpu_to_be32(NULLAGINO); 575 if (xfs_has_crc(mp)) 576 uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); 577 if (xfs_has_finobt(mp)) { 578 agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); 579 agi->agi_free_level = cpu_to_be32(1); 580 } 581 for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) 582 agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 583 if (xfs_has_inobtcounts(mp)) { 584 agi->agi_iblocks = cpu_to_be32(1); 585 if (xfs_has_finobt(mp)) 586 agi->agi_fblocks = cpu_to_be32(1); 587 } 588 } 589 590 typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, 591 struct aghdr_init_data *id); 592 static int 593 xfs_ag_init_hdr( 594 struct xfs_mount *mp, 595 struct aghdr_init_data *id, 596 aghdr_init_work_f work, 597 const struct xfs_buf_ops *ops) 598 { 599 struct xfs_buf *bp; 600 int error; 601 602 error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops); 603 if (error) 604 return error; 605 606 (*work)(mp, bp, id); 607 608 xfs_buf_delwri_queue(bp, &id->buffer_list); 609 xfs_buf_relse(bp); 610 return 0; 611 } 612 613 struct xfs_aghdr_grow_data { 614 xfs_daddr_t daddr; 615 size_t numblks; 616 const struct xfs_buf_ops *ops; 617 aghdr_init_work_f work; 618 const struct xfs_btree_ops *bc_ops; 619 bool need_init; 620 }; 621 622 /* 623 * Prepare new AG headers to be written to disk. We use uncached buffers here, 624 * as it is assumed these new AG headers are currently beyond the currently 625 * valid filesystem address space. Using cached buffers would trip over EOFS 626 * corruption detection alogrithms in the buffer cache lookup routines. 627 * 628 * This is a non-transactional function, but the prepared buffers are added to a 629 * delayed write buffer list supplied by the caller so they can submit them to 630 * disk and wait on them as required. 631 */ 632 int 633 xfs_ag_init_headers( 634 struct xfs_mount *mp, 635 struct aghdr_init_data *id) 636 637 { 638 struct xfs_aghdr_grow_data aghdr_data[] = { 639 { /* SB */ 640 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR), 641 .numblks = XFS_FSS_TO_BB(mp, 1), 642 .ops = &xfs_sb_buf_ops, 643 .work = &xfs_sbblock_init, 644 .need_init = true 645 }, 646 { /* AGF */ 647 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)), 648 .numblks = XFS_FSS_TO_BB(mp, 1), 649 .ops = &xfs_agf_buf_ops, 650 .work = &xfs_agfblock_init, 651 .need_init = true 652 }, 653 { /* AGFL */ 654 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)), 655 .numblks = XFS_FSS_TO_BB(mp, 1), 656 .ops = &xfs_agfl_buf_ops, 657 .work = &xfs_agflblock_init, 658 .need_init = true 659 }, 660 { /* AGI */ 661 .daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)), 662 .numblks = XFS_FSS_TO_BB(mp, 1), 663 .ops = &xfs_agi_buf_ops, 664 .work = &xfs_agiblock_init, 665 .need_init = true 666 }, 667 { /* BNO root block */ 668 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)), 669 .numblks = BTOBB(mp->m_sb.sb_blocksize), 670 .ops = &xfs_bnobt_buf_ops, 671 .work = &xfs_bnoroot_init, 672 .bc_ops = &xfs_bnobt_ops, 673 .need_init = true 674 }, 675 { /* CNT root block */ 676 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), 677 .numblks = BTOBB(mp->m_sb.sb_blocksize), 678 .ops = &xfs_cntbt_buf_ops, 679 .work = &xfs_bnoroot_init, 680 .bc_ops = &xfs_cntbt_ops, 681 .need_init = true 682 }, 683 { /* INO root block */ 684 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)), 685 .numblks = BTOBB(mp->m_sb.sb_blocksize), 686 .ops = &xfs_inobt_buf_ops, 687 .work = &xfs_btroot_init, 688 .bc_ops = &xfs_inobt_ops, 689 .need_init = true 690 }, 691 { /* FINO root block */ 692 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)), 693 .numblks = BTOBB(mp->m_sb.sb_blocksize), 694 .ops = &xfs_finobt_buf_ops, 695 .work = &xfs_btroot_init, 696 .bc_ops = &xfs_finobt_ops, 697 .need_init = xfs_has_finobt(mp) 698 }, 699 { /* RMAP root block */ 700 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), 701 .numblks = BTOBB(mp->m_sb.sb_blocksize), 702 .ops = &xfs_rmapbt_buf_ops, 703 .work = &xfs_rmaproot_init, 704 .bc_ops = &xfs_rmapbt_ops, 705 .need_init = xfs_has_rmapbt(mp) 706 }, 707 { /* REFC root block */ 708 .daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), 709 .numblks = BTOBB(mp->m_sb.sb_blocksize), 710 .ops = &xfs_refcountbt_buf_ops, 711 .work = &xfs_btroot_init, 712 .bc_ops = &xfs_refcountbt_ops, 713 .need_init = xfs_has_reflink(mp) 714 }, 715 { /* NULL terminating block */ 716 .daddr = XFS_BUF_DADDR_NULL, 717 } 718 }; 719 struct xfs_aghdr_grow_data *dp; 720 int error = 0; 721 722 /* Account for AG free space in new AG */ 723 id->nfree += id->agsize - mp->m_ag_prealloc_blocks; 724 for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) { 725 if (!dp->need_init) 726 continue; 727 728 id->daddr = dp->daddr; 729 id->numblks = dp->numblks; 730 id->bc_ops = dp->bc_ops; 731 error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops); 732 if (error) 733 break; 734 } 735 return error; 736 } 737 738 int 739 xfs_ag_shrink_space( 740 struct xfs_perag *pag, 741 struct xfs_trans **tpp, 742 xfs_extlen_t delta) 743 { 744 struct xfs_mount *mp = pag_mount(pag); 745 struct xfs_alloc_arg args = { 746 .tp = *tpp, 747 .mp = mp, 748 .pag = pag, 749 .minlen = delta, 750 .maxlen = delta, 751 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, 752 .resv = XFS_AG_RESV_NONE, 753 .prod = 1 754 }; 755 struct xfs_buf *agibp, *agfbp; 756 struct xfs_agi *agi; 757 struct xfs_agf *agf; 758 xfs_agblock_t aglen; 759 int error, err2; 760 761 ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1); 762 error = xfs_ialloc_read_agi(pag, *tpp, 0, &agibp); 763 if (error) 764 return error; 765 766 agi = agibp->b_addr; 767 768 error = xfs_alloc_read_agf(pag, *tpp, 0, &agfbp); 769 if (error) 770 return error; 771 772 agf = agfbp->b_addr; 773 aglen = be32_to_cpu(agi->agi_length); 774 /* some extra paranoid checks before we shrink the ag */ 775 if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) { 776 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); 777 return -EFSCORRUPTED; 778 } 779 if (delta >= aglen) 780 return -EINVAL; 781 782 /* 783 * Make sure that the last inode cluster cannot overlap with the new 784 * end of the AG, even if it's sparse. 785 */ 786 error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta); 787 if (error) 788 return error; 789 790 /* 791 * Disable perag reservations so it doesn't cause the allocation request 792 * to fail. We'll reestablish reservation before we return. 793 */ 794 xfs_ag_resv_free(pag); 795 796 /* internal log shouldn't also show up in the free space btrees */ 797 error = xfs_alloc_vextent_exact_bno(&args, 798 xfs_agbno_to_fsb(pag, aglen - delta)); 799 if (!error && args.agbno == NULLAGBLOCK) 800 error = -ENOSPC; 801 802 if (error) { 803 /* 804 * If extent allocation fails, need to roll the transaction to 805 * ensure that the AGFL fixup has been committed anyway. 806 * 807 * We need to hold the AGF across the roll to ensure nothing can 808 * access the AG for allocation until the shrink is fully 809 * cleaned up. And due to the resetting of the AG block 810 * reservation space needing to lock the AGI, we also have to 811 * hold that so we don't get AGI/AGF lock order inversions in 812 * the error handling path. 813 */ 814 xfs_trans_bhold(*tpp, agfbp); 815 xfs_trans_bhold(*tpp, agibp); 816 err2 = xfs_trans_roll(tpp); 817 if (err2) 818 return err2; 819 xfs_trans_bjoin(*tpp, agfbp); 820 xfs_trans_bjoin(*tpp, agibp); 821 goto resv_init_out; 822 } 823 824 /* 825 * if successfully deleted from freespace btrees, need to confirm 826 * per-AG reservation works as expected. 827 */ 828 be32_add_cpu(&agi->agi_length, -delta); 829 be32_add_cpu(&agf->agf_length, -delta); 830 831 err2 = xfs_ag_resv_init(pag, *tpp); 832 if (err2) { 833 be32_add_cpu(&agi->agi_length, delta); 834 be32_add_cpu(&agf->agf_length, delta); 835 if (err2 != -ENOSPC) 836 goto resv_err; 837 838 err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, 839 XFS_AG_RESV_NONE, XFS_FREE_EXTENT_SKIP_DISCARD); 840 if (err2) 841 goto resv_err; 842 843 /* 844 * Roll the transaction before trying to re-init the per-ag 845 * reservation. The new transaction is clean so it will cancel 846 * without any side effects. 847 */ 848 error = xfs_defer_finish(tpp); 849 if (error) 850 return error; 851 852 error = -ENOSPC; 853 goto resv_init_out; 854 } 855 856 /* Update perag geometry */ 857 pag_group(pag)->xg_block_count -= delta; 858 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 859 &pag->agino_max); 860 861 xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); 862 xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); 863 return 0; 864 865 resv_init_out: 866 err2 = xfs_ag_resv_init(pag, *tpp); 867 if (!err2) 868 return error; 869 resv_err: 870 xfs_warn(mp, "Error %d reserving per-AG metadata reserve pool.", err2); 871 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 872 return err2; 873 } 874 875 /* 876 * Extent the AG indicated by the @id by the length passed in 877 */ 878 int 879 xfs_ag_extend_space( 880 struct xfs_perag *pag, 881 struct xfs_trans *tp, 882 xfs_extlen_t len) 883 { 884 struct xfs_mount *mp = pag_mount(pag); 885 struct xfs_buf *bp; 886 struct xfs_agi *agi; 887 struct xfs_agf *agf; 888 int error; 889 890 ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1); 891 892 error = xfs_ialloc_read_agi(pag, tp, 0, &bp); 893 if (error) 894 return error; 895 896 agi = bp->b_addr; 897 be32_add_cpu(&agi->agi_length, len); 898 xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); 899 900 /* 901 * Change agf length. 902 */ 903 error = xfs_alloc_read_agf(pag, tp, 0, &bp); 904 if (error) 905 return error; 906 907 agf = bp->b_addr; 908 be32_add_cpu(&agf->agf_length, len); 909 ASSERT(agf->agf_length == agi->agi_length); 910 xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); 911 912 /* 913 * Free the new space. 914 * 915 * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that 916 * this doesn't actually exist in the rmap btree. 917 */ 918 error = xfs_rmap_free(tp, bp, pag, be32_to_cpu(agf->agf_length) - len, 919 len, &XFS_RMAP_OINFO_SKIP_UPDATE); 920 if (error) 921 return error; 922 923 error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, 924 len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); 925 if (error) 926 return error; 927 928 /* Update perag geometry */ 929 pag_group(pag)->xg_block_count = be32_to_cpu(agf->agf_length); 930 __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min, 931 &pag->agino_max); 932 return 0; 933 } 934 935 /* Retrieve AG geometry. */ 936 int 937 xfs_ag_get_geometry( 938 struct xfs_perag *pag, 939 struct xfs_ag_geometry *ageo) 940 { 941 struct xfs_buf *agi_bp; 942 struct xfs_buf *agf_bp; 943 struct xfs_agi *agi; 944 struct xfs_agf *agf; 945 unsigned int freeblks; 946 int error; 947 948 /* Lock the AG headers. */ 949 error = xfs_ialloc_read_agi(pag, NULL, 0, &agi_bp); 950 if (error) 951 return error; 952 error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp); 953 if (error) 954 goto out_agi; 955 956 /* Fill out form. */ 957 memset(ageo, 0, sizeof(*ageo)); 958 ageo->ag_number = pag_agno(pag); 959 960 agi = agi_bp->b_addr; 961 ageo->ag_icount = be32_to_cpu(agi->agi_count); 962 ageo->ag_ifree = be32_to_cpu(agi->agi_freecount); 963 964 agf = agf_bp->b_addr; 965 ageo->ag_length = be32_to_cpu(agf->agf_length); 966 freeblks = pag->pagf_freeblks + 967 pag->pagf_flcount + 968 pag->pagf_btreeblks - 969 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE); 970 ageo->ag_freeblks = freeblks; 971 xfs_ag_geom_health(pag, ageo); 972 973 /* Release resources. */ 974 xfs_buf_relse(agf_bp); 975 out_agi: 976 xfs_buf_relse(agi_bp); 977 return error; 978 } 979