1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_shared.h" 21 #include "xfs_format.h" 22 #include "xfs_log_format.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_sb.h" 26 #include "xfs_mount.h" 27 #include "xfs_da_format.h" 28 #include "xfs_da_btree.h" 29 #include "xfs_inode.h" 30 #include "xfs_dir2.h" 31 #include "xfs_ialloc.h" 32 #include "xfs_alloc.h" 33 #include "xfs_rtalloc.h" 34 #include "xfs_bmap.h" 35 #include "xfs_trans.h" 36 #include "xfs_trans_priv.h" 37 #include "xfs_log.h" 38 #include "xfs_error.h" 39 #include "xfs_quota.h" 40 #include "xfs_fsops.h" 41 #include "xfs_trace.h" 42 #include "xfs_icache.h" 43 #include "xfs_sysfs.h" 44 45 46 #ifdef HAVE_PERCPU_SB 47 STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 48 int); 49 STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, 50 int); 51 STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 52 #else 53 54 #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 55 #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) 56 #endif 57 58 static DEFINE_MUTEX(xfs_uuid_table_mutex); 59 static int xfs_uuid_table_size; 60 static uuid_t *xfs_uuid_table; 61 62 /* 63 * See if the UUID is unique among mounted XFS filesystems. 64 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 65 */ 66 STATIC int 67 xfs_uuid_mount( 68 struct xfs_mount *mp) 69 { 70 uuid_t *uuid = &mp->m_sb.sb_uuid; 71 int hole, i; 72 73 if (mp->m_flags & XFS_MOUNT_NOUUID) 74 return 0; 75 76 if (uuid_is_nil(uuid)) { 77 xfs_warn(mp, "Filesystem has nil UUID - can't mount"); 78 return -EINVAL; 79 } 80 81 mutex_lock(&xfs_uuid_table_mutex); 82 for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) { 83 if (uuid_is_nil(&xfs_uuid_table[i])) { 84 hole = i; 85 continue; 86 } 87 if (uuid_equal(uuid, &xfs_uuid_table[i])) 88 goto out_duplicate; 89 } 90 91 if (hole < 0) { 92 xfs_uuid_table = kmem_realloc(xfs_uuid_table, 93 (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), 94 xfs_uuid_table_size * sizeof(*xfs_uuid_table), 95 KM_SLEEP); 96 hole = xfs_uuid_table_size++; 97 } 98 xfs_uuid_table[hole] = *uuid; 99 mutex_unlock(&xfs_uuid_table_mutex); 100 101 return 0; 102 103 out_duplicate: 104 mutex_unlock(&xfs_uuid_table_mutex); 105 xfs_warn(mp, "Filesystem has duplicate UUID %pU - can't mount", uuid); 106 return -EINVAL; 107 } 108 109 STATIC void 110 xfs_uuid_unmount( 111 struct xfs_mount *mp) 112 { 113 uuid_t *uuid = &mp->m_sb.sb_uuid; 114 int i; 115 116 if (mp->m_flags & XFS_MOUNT_NOUUID) 117 return; 118 119 mutex_lock(&xfs_uuid_table_mutex); 120 for (i = 0; i < xfs_uuid_table_size; i++) { 121 if (uuid_is_nil(&xfs_uuid_table[i])) 122 continue; 123 if (!uuid_equal(uuid, &xfs_uuid_table[i])) 124 continue; 125 memset(&xfs_uuid_table[i], 0, sizeof(uuid_t)); 126 break; 127 } 128 ASSERT(i < xfs_uuid_table_size); 129 mutex_unlock(&xfs_uuid_table_mutex); 130 } 131 132 133 STATIC void 134 __xfs_free_perag( 135 struct rcu_head *head) 136 { 137 struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); 138 139 ASSERT(atomic_read(&pag->pag_ref) == 0); 140 kmem_free(pag); 141 } 142 143 /* 144 * Free up the per-ag resources associated with the mount structure. 145 */ 146 STATIC void 147 xfs_free_perag( 148 xfs_mount_t *mp) 149 { 150 xfs_agnumber_t agno; 151 struct xfs_perag *pag; 152 153 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 154 spin_lock(&mp->m_perag_lock); 155 pag = radix_tree_delete(&mp->m_perag_tree, agno); 156 spin_unlock(&mp->m_perag_lock); 157 ASSERT(pag); 158 ASSERT(atomic_read(&pag->pag_ref) == 0); 159 call_rcu(&pag->rcu_head, __xfs_free_perag); 160 } 161 } 162 163 /* 164 * Check size of device based on the (data/realtime) block count. 165 * Note: this check is used by the growfs code as well as mount. 166 */ 167 int 168 xfs_sb_validate_fsb_count( 169 xfs_sb_t *sbp, 170 __uint64_t nblocks) 171 { 172 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 173 ASSERT(sbp->sb_blocklog >= BBSHIFT); 174 175 /* Limited by ULONG_MAX of page cache index */ 176 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) 177 return -EFBIG; 178 return 0; 179 } 180 181 int 182 xfs_initialize_perag( 183 xfs_mount_t *mp, 184 xfs_agnumber_t agcount, 185 xfs_agnumber_t *maxagi) 186 { 187 xfs_agnumber_t index; 188 xfs_agnumber_t first_initialised = 0; 189 xfs_perag_t *pag; 190 xfs_agino_t agino; 191 xfs_ino_t ino; 192 xfs_sb_t *sbp = &mp->m_sb; 193 int error = -ENOMEM; 194 195 /* 196 * Walk the current per-ag tree so we don't try to initialise AGs 197 * that already exist (growfs case). Allocate and insert all the 198 * AGs we don't find ready for initialisation. 199 */ 200 for (index = 0; index < agcount; index++) { 201 pag = xfs_perag_get(mp, index); 202 if (pag) { 203 xfs_perag_put(pag); 204 continue; 205 } 206 if (!first_initialised) 207 first_initialised = index; 208 209 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); 210 if (!pag) 211 goto out_unwind; 212 pag->pag_agno = index; 213 pag->pag_mount = mp; 214 spin_lock_init(&pag->pag_ici_lock); 215 mutex_init(&pag->pag_ici_reclaim_lock); 216 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 217 spin_lock_init(&pag->pag_buf_lock); 218 pag->pag_buf_tree = RB_ROOT; 219 220 if (radix_tree_preload(GFP_NOFS)) 221 goto out_unwind; 222 223 spin_lock(&mp->m_perag_lock); 224 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) { 225 BUG(); 226 spin_unlock(&mp->m_perag_lock); 227 radix_tree_preload_end(); 228 error = -EEXIST; 229 goto out_unwind; 230 } 231 spin_unlock(&mp->m_perag_lock); 232 radix_tree_preload_end(); 233 } 234 235 /* 236 * If we mount with the inode64 option, or no inode overflows 237 * the legacy 32-bit address space clear the inode32 option. 238 */ 239 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 240 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 241 242 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) 243 mp->m_flags |= XFS_MOUNT_32BITINODES; 244 else 245 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 246 247 if (mp->m_flags & XFS_MOUNT_32BITINODES) 248 index = xfs_set_inode32(mp, agcount); 249 else 250 index = xfs_set_inode64(mp, agcount); 251 252 if (maxagi) 253 *maxagi = index; 254 return 0; 255 256 out_unwind: 257 kmem_free(pag); 258 for (; index > first_initialised; index--) { 259 pag = radix_tree_delete(&mp->m_perag_tree, index); 260 kmem_free(pag); 261 } 262 return error; 263 } 264 265 /* 266 * xfs_readsb 267 * 268 * Does the initial read of the superblock. 269 */ 270 int 271 xfs_readsb( 272 struct xfs_mount *mp, 273 int flags) 274 { 275 unsigned int sector_size; 276 struct xfs_buf *bp; 277 struct xfs_sb *sbp = &mp->m_sb; 278 int error; 279 int loud = !(flags & XFS_MFSI_QUIET); 280 const struct xfs_buf_ops *buf_ops; 281 282 ASSERT(mp->m_sb_bp == NULL); 283 ASSERT(mp->m_ddev_targp != NULL); 284 285 /* 286 * For the initial read, we must guess at the sector 287 * size based on the block device. It's enough to 288 * get the sb_sectsize out of the superblock and 289 * then reread with the proper length. 290 * We don't verify it yet, because it may not be complete. 291 */ 292 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 293 buf_ops = NULL; 294 295 /* 296 * Allocate a (locked) buffer to hold the superblock. 297 * This will be kept around at all times to optimize 298 * access to the superblock. 299 */ 300 reread: 301 error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, 302 BTOBB(sector_size), 0, &bp, buf_ops); 303 if (error) { 304 if (loud) 305 xfs_warn(mp, "SB validate failed with error %d.", error); 306 /* bad CRC means corrupted metadata */ 307 if (error == -EFSBADCRC) 308 error = -EFSCORRUPTED; 309 return error; 310 } 311 312 /* 313 * Initialize the mount structure from the superblock. 314 */ 315 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 316 317 /* 318 * If we haven't validated the superblock, do so now before we try 319 * to check the sector size and reread the superblock appropriately. 320 */ 321 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 322 if (loud) 323 xfs_warn(mp, "Invalid superblock magic number"); 324 error = -EINVAL; 325 goto release_buf; 326 } 327 328 /* 329 * We must be able to do sector-sized and sector-aligned IO. 330 */ 331 if (sector_size > sbp->sb_sectsize) { 332 if (loud) 333 xfs_warn(mp, "device supports %u byte sectors (not %u)", 334 sector_size, sbp->sb_sectsize); 335 error = -ENOSYS; 336 goto release_buf; 337 } 338 339 if (buf_ops == NULL) { 340 /* 341 * Re-read the superblock so the buffer is correctly sized, 342 * and properly verified. 343 */ 344 xfs_buf_relse(bp); 345 sector_size = sbp->sb_sectsize; 346 buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops; 347 goto reread; 348 } 349 350 /* Initialize per-cpu counters */ 351 xfs_icsb_reinit_counters(mp); 352 353 /* no need to be quiet anymore, so reset the buf ops */ 354 bp->b_ops = &xfs_sb_buf_ops; 355 356 mp->m_sb_bp = bp; 357 xfs_buf_unlock(bp); 358 return 0; 359 360 release_buf: 361 xfs_buf_relse(bp); 362 return error; 363 } 364 365 /* 366 * Update alignment values based on mount options and sb values 367 */ 368 STATIC int 369 xfs_update_alignment(xfs_mount_t *mp) 370 { 371 xfs_sb_t *sbp = &(mp->m_sb); 372 373 if (mp->m_dalign) { 374 /* 375 * If stripe unit and stripe width are not multiples 376 * of the fs blocksize turn off alignment. 377 */ 378 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 379 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 380 xfs_warn(mp, 381 "alignment check failed: sunit/swidth vs. blocksize(%d)", 382 sbp->sb_blocksize); 383 return -EINVAL; 384 } else { 385 /* 386 * Convert the stripe unit and width to FSBs. 387 */ 388 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 389 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 390 xfs_warn(mp, 391 "alignment check failed: sunit/swidth vs. agsize(%d)", 392 sbp->sb_agblocks); 393 return -EINVAL; 394 } else if (mp->m_dalign) { 395 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 396 } else { 397 xfs_warn(mp, 398 "alignment check failed: sunit(%d) less than bsize(%d)", 399 mp->m_dalign, sbp->sb_blocksize); 400 return -EINVAL; 401 } 402 } 403 404 /* 405 * Update superblock with new values 406 * and log changes 407 */ 408 if (xfs_sb_version_hasdalign(sbp)) { 409 if (sbp->sb_unit != mp->m_dalign) { 410 sbp->sb_unit = mp->m_dalign; 411 mp->m_update_flags |= XFS_SB_UNIT; 412 } 413 if (sbp->sb_width != mp->m_swidth) { 414 sbp->sb_width = mp->m_swidth; 415 mp->m_update_flags |= XFS_SB_WIDTH; 416 } 417 } else { 418 xfs_warn(mp, 419 "cannot change alignment: superblock does not support data alignment"); 420 return -EINVAL; 421 } 422 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 423 xfs_sb_version_hasdalign(&mp->m_sb)) { 424 mp->m_dalign = sbp->sb_unit; 425 mp->m_swidth = sbp->sb_width; 426 } 427 428 return 0; 429 } 430 431 /* 432 * Set the maximum inode count for this filesystem 433 */ 434 STATIC void 435 xfs_set_maxicount(xfs_mount_t *mp) 436 { 437 xfs_sb_t *sbp = &(mp->m_sb); 438 __uint64_t icount; 439 440 if (sbp->sb_imax_pct) { 441 /* 442 * Make sure the maximum inode count is a multiple 443 * of the units we allocate inodes in. 444 */ 445 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 446 do_div(icount, 100); 447 do_div(icount, mp->m_ialloc_blks); 448 mp->m_maxicount = (icount * mp->m_ialloc_blks) << 449 sbp->sb_inopblog; 450 } else { 451 mp->m_maxicount = 0; 452 } 453 } 454 455 /* 456 * Set the default minimum read and write sizes unless 457 * already specified in a mount option. 458 * We use smaller I/O sizes when the file system 459 * is being used for NFS service (wsync mount option). 460 */ 461 STATIC void 462 xfs_set_rw_sizes(xfs_mount_t *mp) 463 { 464 xfs_sb_t *sbp = &(mp->m_sb); 465 int readio_log, writeio_log; 466 467 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 468 if (mp->m_flags & XFS_MOUNT_WSYNC) { 469 readio_log = XFS_WSYNC_READIO_LOG; 470 writeio_log = XFS_WSYNC_WRITEIO_LOG; 471 } else { 472 readio_log = XFS_READIO_LOG_LARGE; 473 writeio_log = XFS_WRITEIO_LOG_LARGE; 474 } 475 } else { 476 readio_log = mp->m_readio_log; 477 writeio_log = mp->m_writeio_log; 478 } 479 480 if (sbp->sb_blocklog > readio_log) { 481 mp->m_readio_log = sbp->sb_blocklog; 482 } else { 483 mp->m_readio_log = readio_log; 484 } 485 mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); 486 if (sbp->sb_blocklog > writeio_log) { 487 mp->m_writeio_log = sbp->sb_blocklog; 488 } else { 489 mp->m_writeio_log = writeio_log; 490 } 491 mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); 492 } 493 494 /* 495 * precalculate the low space thresholds for dynamic speculative preallocation. 496 */ 497 void 498 xfs_set_low_space_thresholds( 499 struct xfs_mount *mp) 500 { 501 int i; 502 503 for (i = 0; i < XFS_LOWSP_MAX; i++) { 504 __uint64_t space = mp->m_sb.sb_dblocks; 505 506 do_div(space, 100); 507 mp->m_low_space[i] = space * (i + 1); 508 } 509 } 510 511 512 /* 513 * Set whether we're using inode alignment. 514 */ 515 STATIC void 516 xfs_set_inoalignment(xfs_mount_t *mp) 517 { 518 if (xfs_sb_version_hasalign(&mp->m_sb) && 519 mp->m_sb.sb_inoalignmt >= 520 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 521 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 522 else 523 mp->m_inoalign_mask = 0; 524 /* 525 * If we are using stripe alignment, check whether 526 * the stripe unit is a multiple of the inode alignment 527 */ 528 if (mp->m_dalign && mp->m_inoalign_mask && 529 !(mp->m_dalign & mp->m_inoalign_mask)) 530 mp->m_sinoalign = mp->m_dalign; 531 else 532 mp->m_sinoalign = 0; 533 } 534 535 /* 536 * Check that the data (and log if separate) is an ok size. 537 */ 538 STATIC int 539 xfs_check_sizes( 540 struct xfs_mount *mp) 541 { 542 struct xfs_buf *bp; 543 xfs_daddr_t d; 544 int error; 545 546 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 547 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 548 xfs_warn(mp, "filesystem size mismatch detected"); 549 return -EFBIG; 550 } 551 error = xfs_buf_read_uncached(mp->m_ddev_targp, 552 d - XFS_FSS_TO_BB(mp, 1), 553 XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); 554 if (error) { 555 xfs_warn(mp, "last sector read failed"); 556 return error; 557 } 558 xfs_buf_relse(bp); 559 560 if (mp->m_logdev_targp == mp->m_ddev_targp) 561 return 0; 562 563 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 564 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 565 xfs_warn(mp, "log size mismatch detected"); 566 return -EFBIG; 567 } 568 error = xfs_buf_read_uncached(mp->m_logdev_targp, 569 d - XFS_FSB_TO_BB(mp, 1), 570 XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); 571 if (error) { 572 xfs_warn(mp, "log device read failed"); 573 return error; 574 } 575 xfs_buf_relse(bp); 576 return 0; 577 } 578 579 /* 580 * Clear the quotaflags in memory and in the superblock. 581 */ 582 int 583 xfs_mount_reset_sbqflags( 584 struct xfs_mount *mp) 585 { 586 int error; 587 struct xfs_trans *tp; 588 589 mp->m_qflags = 0; 590 591 /* 592 * It is OK to look at sb_qflags here in mount path, 593 * without m_sb_lock. 594 */ 595 if (mp->m_sb.sb_qflags == 0) 596 return 0; 597 spin_lock(&mp->m_sb_lock); 598 mp->m_sb.sb_qflags = 0; 599 spin_unlock(&mp->m_sb_lock); 600 601 /* 602 * If the fs is readonly, let the incore superblock run 603 * with quotas off but don't flush the update out to disk 604 */ 605 if (mp->m_flags & XFS_MOUNT_RDONLY) 606 return 0; 607 608 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 609 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0); 610 if (error) { 611 xfs_trans_cancel(tp, 0); 612 xfs_alert(mp, "%s: Superblock update failed!", __func__); 613 return error; 614 } 615 616 xfs_mod_sb(tp, XFS_SB_QFLAGS); 617 return xfs_trans_commit(tp, 0); 618 } 619 620 __uint64_t 621 xfs_default_resblks(xfs_mount_t *mp) 622 { 623 __uint64_t resblks; 624 625 /* 626 * We default to 5% or 8192 fsbs of space reserved, whichever is 627 * smaller. This is intended to cover concurrent allocation 628 * transactions when we initially hit enospc. These each require a 4 629 * block reservation. Hence by default we cover roughly 2000 concurrent 630 * allocation reservations. 631 */ 632 resblks = mp->m_sb.sb_dblocks; 633 do_div(resblks, 20); 634 resblks = min_t(__uint64_t, resblks, 8192); 635 return resblks; 636 } 637 638 /* 639 * This function does the following on an initial mount of a file system: 640 * - reads the superblock from disk and init the mount struct 641 * - if we're a 32-bit kernel, do a size check on the superblock 642 * so we don't mount terabyte filesystems 643 * - init mount struct realtime fields 644 * - allocate inode hash table for fs 645 * - init directory manager 646 * - perform recovery and init the log manager 647 */ 648 int 649 xfs_mountfs( 650 xfs_mount_t *mp) 651 { 652 xfs_sb_t *sbp = &(mp->m_sb); 653 xfs_inode_t *rip; 654 __uint64_t resblks; 655 uint quotamount = 0; 656 uint quotaflags = 0; 657 int error = 0; 658 659 xfs_sb_mount_common(mp, sbp); 660 661 /* 662 * Check for a mismatched features2 values. Older kernels 663 * read & wrote into the wrong sb offset for sb_features2 664 * on some platforms due to xfs_sb_t not being 64bit size aligned 665 * when sb_features2 was added, which made older superblock 666 * reading/writing routines swap it as a 64-bit value. 667 * 668 * For backwards compatibility, we make both slots equal. 669 * 670 * If we detect a mismatched field, we OR the set bits into the 671 * existing features2 field in case it has already been modified; we 672 * don't want to lose any features. We then update the bad location 673 * with the ORed value so that older kernels will see any features2 674 * flags, and mark the two fields as needing updates once the 675 * transaction subsystem is online. 676 */ 677 if (xfs_sb_has_mismatched_features2(sbp)) { 678 xfs_warn(mp, "correcting sb_features alignment problem"); 679 sbp->sb_features2 |= sbp->sb_bad_features2; 680 sbp->sb_bad_features2 = sbp->sb_features2; 681 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; 682 683 /* 684 * Re-check for ATTR2 in case it was found in bad_features2 685 * slot. 686 */ 687 if (xfs_sb_version_hasattr2(&mp->m_sb) && 688 !(mp->m_flags & XFS_MOUNT_NOATTR2)) 689 mp->m_flags |= XFS_MOUNT_ATTR2; 690 } 691 692 if (xfs_sb_version_hasattr2(&mp->m_sb) && 693 (mp->m_flags & XFS_MOUNT_NOATTR2)) { 694 xfs_sb_version_removeattr2(&mp->m_sb); 695 mp->m_update_flags |= XFS_SB_FEATURES2; 696 697 /* update sb_versionnum for the clearing of the morebits */ 698 if (!sbp->sb_features2) 699 mp->m_update_flags |= XFS_SB_VERSIONNUM; 700 } 701 702 /* always use v2 inodes by default now */ 703 if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { 704 mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; 705 mp->m_update_flags |= XFS_SB_VERSIONNUM; 706 } 707 708 /* 709 * Check if sb_agblocks is aligned at stripe boundary 710 * If sb_agblocks is NOT aligned turn off m_dalign since 711 * allocator alignment is within an ag, therefore ag has 712 * to be aligned at stripe boundary. 713 */ 714 error = xfs_update_alignment(mp); 715 if (error) 716 goto out; 717 718 xfs_alloc_compute_maxlevels(mp); 719 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 720 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 721 xfs_ialloc_compute_maxlevels(mp); 722 723 xfs_set_maxicount(mp); 724 725 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); 726 if (error) 727 goto out; 728 729 error = xfs_uuid_mount(mp); 730 if (error) 731 goto out_remove_sysfs; 732 733 /* 734 * Set the minimum read and write sizes 735 */ 736 xfs_set_rw_sizes(mp); 737 738 /* set the low space thresholds for dynamic preallocation */ 739 xfs_set_low_space_thresholds(mp); 740 741 /* 742 * Set the inode cluster size. 743 * This may still be overridden by the file system 744 * block size if it is larger than the chosen cluster size. 745 * 746 * For v5 filesystems, scale the cluster size with the inode size to 747 * keep a constant ratio of inode per cluster buffer, but only if mkfs 748 * has set the inode alignment value appropriately for larger cluster 749 * sizes. 750 */ 751 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 752 if (xfs_sb_version_hascrc(&mp->m_sb)) { 753 int new_size = mp->m_inode_cluster_size; 754 755 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; 756 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) 757 mp->m_inode_cluster_size = new_size; 758 } 759 760 /* 761 * Set inode alignment fields 762 */ 763 xfs_set_inoalignment(mp); 764 765 /* 766 * Check that the data (and log if separate) is an ok size. 767 */ 768 error = xfs_check_sizes(mp); 769 if (error) 770 goto out_remove_uuid; 771 772 /* 773 * Initialize realtime fields in the mount structure 774 */ 775 error = xfs_rtmount_init(mp); 776 if (error) { 777 xfs_warn(mp, "RT mount failed"); 778 goto out_remove_uuid; 779 } 780 781 /* 782 * Copies the low order bits of the timestamp and the randomly 783 * set "sequence" number out of a UUID. 784 */ 785 uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); 786 787 mp->m_dmevmask = 0; /* not persistent; set after each mount */ 788 789 error = xfs_da_mount(mp); 790 if (error) { 791 xfs_warn(mp, "Failed dir/attr init: %d", error); 792 goto out_remove_uuid; 793 } 794 795 /* 796 * Initialize the precomputed transaction reservations values. 797 */ 798 xfs_trans_init(mp); 799 800 /* 801 * Allocate and initialize the per-ag data. 802 */ 803 spin_lock_init(&mp->m_perag_lock); 804 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 805 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); 806 if (error) { 807 xfs_warn(mp, "Failed per-ag init: %d", error); 808 goto out_free_dir; 809 } 810 811 if (!sbp->sb_logblocks) { 812 xfs_warn(mp, "no log defined"); 813 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); 814 error = -EFSCORRUPTED; 815 goto out_free_perag; 816 } 817 818 /* 819 * log's mount-time initialization. Perform 1st part recovery if needed 820 */ 821 error = xfs_log_mount(mp, mp->m_logdev_targp, 822 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 823 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 824 if (error) { 825 xfs_warn(mp, "log mount failed"); 826 goto out_fail_wait; 827 } 828 829 /* 830 * Now the log is mounted, we know if it was an unclean shutdown or 831 * not. If it was, with the first phase of recovery has completed, we 832 * have consistent AG blocks on disk. We have not recovered EFIs yet, 833 * but they are recovered transactionally in the second recovery phase 834 * later. 835 * 836 * Hence we can safely re-initialise incore superblock counters from 837 * the per-ag data. These may not be correct if the filesystem was not 838 * cleanly unmounted, so we need to wait for recovery to finish before 839 * doing this. 840 * 841 * If the filesystem was cleanly unmounted, then we can trust the 842 * values in the superblock to be correct and we don't need to do 843 * anything here. 844 * 845 * If we are currently making the filesystem, the initialisation will 846 * fail as the perag data is in an undefined state. 847 */ 848 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 849 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && 850 !mp->m_sb.sb_inprogress) { 851 error = xfs_initialize_perag_data(mp, sbp->sb_agcount); 852 if (error) 853 goto out_log_dealloc; 854 } 855 856 /* 857 * Get and sanity-check the root inode. 858 * Save the pointer to it in the mount structure. 859 */ 860 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); 861 if (error) { 862 xfs_warn(mp, "failed to read root inode"); 863 goto out_log_dealloc; 864 } 865 866 ASSERT(rip != NULL); 867 868 if (unlikely(!S_ISDIR(rip->i_d.di_mode))) { 869 xfs_warn(mp, "corrupted root inode %llu: not a directory", 870 (unsigned long long)rip->i_ino); 871 xfs_iunlock(rip, XFS_ILOCK_EXCL); 872 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 873 mp); 874 error = -EFSCORRUPTED; 875 goto out_rele_rip; 876 } 877 mp->m_rootip = rip; /* save it */ 878 879 xfs_iunlock(rip, XFS_ILOCK_EXCL); 880 881 /* 882 * Initialize realtime inode pointers in the mount structure 883 */ 884 error = xfs_rtmount_inodes(mp); 885 if (error) { 886 /* 887 * Free up the root inode. 888 */ 889 xfs_warn(mp, "failed to read RT inodes"); 890 goto out_rele_rip; 891 } 892 893 /* 894 * If this is a read-only mount defer the superblock updates until 895 * the next remount into writeable mode. Otherwise we would never 896 * perform the update e.g. for the root filesystem. 897 */ 898 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { 899 error = xfs_mount_log_sb(mp, mp->m_update_flags); 900 if (error) { 901 xfs_warn(mp, "failed to write sb changes"); 902 goto out_rtunmount; 903 } 904 } 905 906 /* 907 * Initialise the XFS quota management subsystem for this mount 908 */ 909 if (XFS_IS_QUOTA_RUNNING(mp)) { 910 error = xfs_qm_newmount(mp, "amount, "aflags); 911 if (error) 912 goto out_rtunmount; 913 } else { 914 ASSERT(!XFS_IS_QUOTA_ON(mp)); 915 916 /* 917 * If a file system had quotas running earlier, but decided to 918 * mount without -o uquota/pquota/gquota options, revoke the 919 * quotachecked license. 920 */ 921 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { 922 xfs_notice(mp, "resetting quota flags"); 923 error = xfs_mount_reset_sbqflags(mp); 924 if (error) 925 goto out_rtunmount; 926 } 927 } 928 929 /* 930 * Finish recovering the file system. This part needed to be 931 * delayed until after the root and real-time bitmap inodes 932 * were consistently read in. 933 */ 934 error = xfs_log_mount_finish(mp); 935 if (error) { 936 xfs_warn(mp, "log mount finish failed"); 937 goto out_rtunmount; 938 } 939 940 /* 941 * Complete the quota initialisation, post-log-replay component. 942 */ 943 if (quotamount) { 944 ASSERT(mp->m_qflags == 0); 945 mp->m_qflags = quotaflags; 946 947 xfs_qm_mount_quotas(mp); 948 } 949 950 /* 951 * Now we are mounted, reserve a small amount of unused space for 952 * privileged transactions. This is needed so that transaction 953 * space required for critical operations can dip into this pool 954 * when at ENOSPC. This is needed for operations like create with 955 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 956 * are not allowed to use this reserved space. 957 * 958 * This may drive us straight to ENOSPC on mount, but that implies 959 * we were already there on the last unmount. Warn if this occurs. 960 */ 961 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 962 resblks = xfs_default_resblks(mp); 963 error = xfs_reserve_blocks(mp, &resblks, NULL); 964 if (error) 965 xfs_warn(mp, 966 "Unable to allocate reserve blocks. Continuing without reserve pool."); 967 } 968 969 return 0; 970 971 out_rtunmount: 972 xfs_rtunmount_inodes(mp); 973 out_rele_rip: 974 IRELE(rip); 975 out_log_dealloc: 976 xfs_log_unmount(mp); 977 out_fail_wait: 978 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 979 xfs_wait_buftarg(mp->m_logdev_targp); 980 xfs_wait_buftarg(mp->m_ddev_targp); 981 out_free_perag: 982 xfs_free_perag(mp); 983 out_free_dir: 984 xfs_da_unmount(mp); 985 out_remove_uuid: 986 xfs_uuid_unmount(mp); 987 out_remove_sysfs: 988 xfs_sysfs_del(&mp->m_kobj); 989 out: 990 return error; 991 } 992 993 /* 994 * This flushes out the inodes,dquots and the superblock, unmounts the 995 * log and makes sure that incore structures are freed. 996 */ 997 void 998 xfs_unmountfs( 999 struct xfs_mount *mp) 1000 { 1001 __uint64_t resblks; 1002 int error; 1003 1004 cancel_delayed_work_sync(&mp->m_eofblocks_work); 1005 1006 xfs_qm_unmount_quotas(mp); 1007 xfs_rtunmount_inodes(mp); 1008 IRELE(mp->m_rootip); 1009 1010 /* 1011 * We can potentially deadlock here if we have an inode cluster 1012 * that has been freed has its buffer still pinned in memory because 1013 * the transaction is still sitting in a iclog. The stale inodes 1014 * on that buffer will have their flush locks held until the 1015 * transaction hits the disk and the callbacks run. the inode 1016 * flush takes the flush lock unconditionally and with nothing to 1017 * push out the iclog we will never get that unlocked. hence we 1018 * need to force the log first. 1019 */ 1020 xfs_log_force(mp, XFS_LOG_SYNC); 1021 1022 /* 1023 * Flush all pending changes from the AIL. 1024 */ 1025 xfs_ail_push_all_sync(mp->m_ail); 1026 1027 /* 1028 * And reclaim all inodes. At this point there should be no dirty 1029 * inodes and none should be pinned or locked, but use synchronous 1030 * reclaim just to be sure. We can stop background inode reclaim 1031 * here as well if it is still running. 1032 */ 1033 cancel_delayed_work_sync(&mp->m_reclaim_work); 1034 xfs_reclaim_inodes(mp, SYNC_WAIT); 1035 1036 xfs_qm_unmount(mp); 1037 1038 /* 1039 * Unreserve any blocks we have so that when we unmount we don't account 1040 * the reserved free space as used. This is really only necessary for 1041 * lazy superblock counting because it trusts the incore superblock 1042 * counters to be absolutely correct on clean unmount. 1043 * 1044 * We don't bother correcting this elsewhere for lazy superblock 1045 * counting because on mount of an unclean filesystem we reconstruct the 1046 * correct counter value and this is irrelevant. 1047 * 1048 * For non-lazy counter filesystems, this doesn't matter at all because 1049 * we only every apply deltas to the superblock and hence the incore 1050 * value does not matter.... 1051 */ 1052 resblks = 0; 1053 error = xfs_reserve_blocks(mp, &resblks, NULL); 1054 if (error) 1055 xfs_warn(mp, "Unable to free reserved block pool. " 1056 "Freespace may not be correct on next mount."); 1057 1058 error = xfs_log_sbcount(mp); 1059 if (error) 1060 xfs_warn(mp, "Unable to update superblock counters. " 1061 "Freespace may not be correct on next mount."); 1062 1063 xfs_log_unmount(mp); 1064 xfs_da_unmount(mp); 1065 xfs_uuid_unmount(mp); 1066 1067 #if defined(DEBUG) 1068 xfs_errortag_clearall(mp, 0); 1069 #endif 1070 xfs_free_perag(mp); 1071 1072 xfs_sysfs_del(&mp->m_kobj); 1073 } 1074 1075 /* 1076 * Determine whether modifications can proceed. The caller specifies the minimum 1077 * freeze level for which modifications should not be allowed. This allows 1078 * certain operations to proceed while the freeze sequence is in progress, if 1079 * necessary. 1080 */ 1081 bool 1082 xfs_fs_writable( 1083 struct xfs_mount *mp, 1084 int level) 1085 { 1086 ASSERT(level > SB_UNFROZEN); 1087 if ((mp->m_super->s_writers.frozen >= level) || 1088 XFS_FORCED_SHUTDOWN(mp) || (mp->m_flags & XFS_MOUNT_RDONLY)) 1089 return false; 1090 1091 return true; 1092 } 1093 1094 /* 1095 * xfs_log_sbcount 1096 * 1097 * Sync the superblock counters to disk. 1098 * 1099 * Note this code can be called during the process of freezing, so we use the 1100 * transaction allocator that does not block when the transaction subsystem is 1101 * in its frozen state. 1102 */ 1103 int 1104 xfs_log_sbcount(xfs_mount_t *mp) 1105 { 1106 xfs_trans_t *tp; 1107 int error; 1108 1109 /* allow this to proceed during the freeze sequence... */ 1110 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) 1111 return 0; 1112 1113 xfs_icsb_sync_counters(mp, 0); 1114 1115 /* 1116 * we don't need to do this if we are updating the superblock 1117 * counters on every modification. 1118 */ 1119 if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) 1120 return 0; 1121 1122 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP); 1123 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); 1124 if (error) { 1125 xfs_trans_cancel(tp, 0); 1126 return error; 1127 } 1128 1129 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); 1130 xfs_trans_set_sync(tp); 1131 error = xfs_trans_commit(tp, 0); 1132 return error; 1133 } 1134 1135 /* 1136 * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply 1137 * a delta to a specified field in the in-core superblock. Simply 1138 * switch on the field indicated and apply the delta to that field. 1139 * Fields are not allowed to dip below zero, so if the delta would 1140 * do this do not apply it and return EINVAL. 1141 * 1142 * The m_sb_lock must be held when this routine is called. 1143 */ 1144 STATIC int 1145 xfs_mod_incore_sb_unlocked( 1146 xfs_mount_t *mp, 1147 xfs_sb_field_t field, 1148 int64_t delta, 1149 int rsvd) 1150 { 1151 int scounter; /* short counter for 32 bit fields */ 1152 long long lcounter; /* long counter for 64 bit fields */ 1153 long long res_used, rem; 1154 1155 /* 1156 * With the in-core superblock spin lock held, switch 1157 * on the indicated field. Apply the delta to the 1158 * proper field. If the fields value would dip below 1159 * 0, then do not apply the delta and return EINVAL. 1160 */ 1161 switch (field) { 1162 case XFS_SBS_ICOUNT: 1163 lcounter = (long long)mp->m_sb.sb_icount; 1164 lcounter += delta; 1165 if (lcounter < 0) { 1166 ASSERT(0); 1167 return -EINVAL; 1168 } 1169 mp->m_sb.sb_icount = lcounter; 1170 return 0; 1171 case XFS_SBS_IFREE: 1172 lcounter = (long long)mp->m_sb.sb_ifree; 1173 lcounter += delta; 1174 if (lcounter < 0) { 1175 ASSERT(0); 1176 return -EINVAL; 1177 } 1178 mp->m_sb.sb_ifree = lcounter; 1179 return 0; 1180 case XFS_SBS_FDBLOCKS: 1181 lcounter = (long long) 1182 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1183 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1184 1185 if (delta > 0) { /* Putting blocks back */ 1186 if (res_used > delta) { 1187 mp->m_resblks_avail += delta; 1188 } else { 1189 rem = delta - res_used; 1190 mp->m_resblks_avail = mp->m_resblks; 1191 lcounter += rem; 1192 } 1193 } else { /* Taking blocks away */ 1194 lcounter += delta; 1195 if (lcounter >= 0) { 1196 mp->m_sb.sb_fdblocks = lcounter + 1197 XFS_ALLOC_SET_ASIDE(mp); 1198 return 0; 1199 } 1200 1201 /* 1202 * We are out of blocks, use any available reserved 1203 * blocks if were allowed to. 1204 */ 1205 if (!rsvd) 1206 return -ENOSPC; 1207 1208 lcounter = (long long)mp->m_resblks_avail + delta; 1209 if (lcounter >= 0) { 1210 mp->m_resblks_avail = lcounter; 1211 return 0; 1212 } 1213 printk_once(KERN_WARNING 1214 "Filesystem \"%s\": reserve blocks depleted! " 1215 "Consider increasing reserve pool size.", 1216 mp->m_fsname); 1217 return -ENOSPC; 1218 } 1219 1220 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1221 return 0; 1222 case XFS_SBS_FREXTENTS: 1223 lcounter = (long long)mp->m_sb.sb_frextents; 1224 lcounter += delta; 1225 if (lcounter < 0) { 1226 return -ENOSPC; 1227 } 1228 mp->m_sb.sb_frextents = lcounter; 1229 return 0; 1230 case XFS_SBS_DBLOCKS: 1231 lcounter = (long long)mp->m_sb.sb_dblocks; 1232 lcounter += delta; 1233 if (lcounter < 0) { 1234 ASSERT(0); 1235 return -EINVAL; 1236 } 1237 mp->m_sb.sb_dblocks = lcounter; 1238 return 0; 1239 case XFS_SBS_AGCOUNT: 1240 scounter = mp->m_sb.sb_agcount; 1241 scounter += delta; 1242 if (scounter < 0) { 1243 ASSERT(0); 1244 return -EINVAL; 1245 } 1246 mp->m_sb.sb_agcount = scounter; 1247 return 0; 1248 case XFS_SBS_IMAX_PCT: 1249 scounter = mp->m_sb.sb_imax_pct; 1250 scounter += delta; 1251 if (scounter < 0) { 1252 ASSERT(0); 1253 return -EINVAL; 1254 } 1255 mp->m_sb.sb_imax_pct = scounter; 1256 return 0; 1257 case XFS_SBS_REXTSIZE: 1258 scounter = mp->m_sb.sb_rextsize; 1259 scounter += delta; 1260 if (scounter < 0) { 1261 ASSERT(0); 1262 return -EINVAL; 1263 } 1264 mp->m_sb.sb_rextsize = scounter; 1265 return 0; 1266 case XFS_SBS_RBMBLOCKS: 1267 scounter = mp->m_sb.sb_rbmblocks; 1268 scounter += delta; 1269 if (scounter < 0) { 1270 ASSERT(0); 1271 return -EINVAL; 1272 } 1273 mp->m_sb.sb_rbmblocks = scounter; 1274 return 0; 1275 case XFS_SBS_RBLOCKS: 1276 lcounter = (long long)mp->m_sb.sb_rblocks; 1277 lcounter += delta; 1278 if (lcounter < 0) { 1279 ASSERT(0); 1280 return -EINVAL; 1281 } 1282 mp->m_sb.sb_rblocks = lcounter; 1283 return 0; 1284 case XFS_SBS_REXTENTS: 1285 lcounter = (long long)mp->m_sb.sb_rextents; 1286 lcounter += delta; 1287 if (lcounter < 0) { 1288 ASSERT(0); 1289 return -EINVAL; 1290 } 1291 mp->m_sb.sb_rextents = lcounter; 1292 return 0; 1293 case XFS_SBS_REXTSLOG: 1294 scounter = mp->m_sb.sb_rextslog; 1295 scounter += delta; 1296 if (scounter < 0) { 1297 ASSERT(0); 1298 return -EINVAL; 1299 } 1300 mp->m_sb.sb_rextslog = scounter; 1301 return 0; 1302 default: 1303 ASSERT(0); 1304 return -EINVAL; 1305 } 1306 } 1307 1308 /* 1309 * xfs_mod_incore_sb() is used to change a field in the in-core 1310 * superblock structure by the specified delta. This modification 1311 * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked() 1312 * routine to do the work. 1313 */ 1314 int 1315 xfs_mod_incore_sb( 1316 struct xfs_mount *mp, 1317 xfs_sb_field_t field, 1318 int64_t delta, 1319 int rsvd) 1320 { 1321 int status; 1322 1323 #ifdef HAVE_PERCPU_SB 1324 ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); 1325 #endif 1326 spin_lock(&mp->m_sb_lock); 1327 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1328 spin_unlock(&mp->m_sb_lock); 1329 1330 return status; 1331 } 1332 1333 /* 1334 * Change more than one field in the in-core superblock structure at a time. 1335 * 1336 * The fields and changes to those fields are specified in the array of 1337 * xfs_mod_sb structures passed in. Either all of the specified deltas 1338 * will be applied or none of them will. If any modified field dips below 0, 1339 * then all modifications will be backed out and EINVAL will be returned. 1340 * 1341 * Note that this function may not be used for the superblock values that 1342 * are tracked with the in-memory per-cpu counters - a direct call to 1343 * xfs_icsb_modify_counters is required for these. 1344 */ 1345 int 1346 xfs_mod_incore_sb_batch( 1347 struct xfs_mount *mp, 1348 xfs_mod_sb_t *msb, 1349 uint nmsb, 1350 int rsvd) 1351 { 1352 xfs_mod_sb_t *msbp; 1353 int error = 0; 1354 1355 /* 1356 * Loop through the array of mod structures and apply each individually. 1357 * If any fail, then back out all those which have already been applied. 1358 * Do all of this within the scope of the m_sb_lock so that all of the 1359 * changes will be atomic. 1360 */ 1361 spin_lock(&mp->m_sb_lock); 1362 for (msbp = msb; msbp < (msb + nmsb); msbp++) { 1363 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1364 msbp->msb_field > XFS_SBS_FDBLOCKS); 1365 1366 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1367 msbp->msb_delta, rsvd); 1368 if (error) 1369 goto unwind; 1370 } 1371 spin_unlock(&mp->m_sb_lock); 1372 return 0; 1373 1374 unwind: 1375 while (--msbp >= msb) { 1376 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1377 -msbp->msb_delta, rsvd); 1378 ASSERT(error == 0); 1379 } 1380 spin_unlock(&mp->m_sb_lock); 1381 return error; 1382 } 1383 1384 /* 1385 * xfs_getsb() is called to obtain the buffer for the superblock. 1386 * The buffer is returned locked and read in from disk. 1387 * The buffer should be released with a call to xfs_brelse(). 1388 * 1389 * If the flags parameter is BUF_TRYLOCK, then we'll only return 1390 * the superblock buffer if it can be locked without sleeping. 1391 * If it can't then we'll return NULL. 1392 */ 1393 struct xfs_buf * 1394 xfs_getsb( 1395 struct xfs_mount *mp, 1396 int flags) 1397 { 1398 struct xfs_buf *bp = mp->m_sb_bp; 1399 1400 if (!xfs_buf_trylock(bp)) { 1401 if (flags & XBF_TRYLOCK) 1402 return NULL; 1403 xfs_buf_lock(bp); 1404 } 1405 1406 xfs_buf_hold(bp); 1407 ASSERT(XFS_BUF_ISDONE(bp)); 1408 return bp; 1409 } 1410 1411 /* 1412 * Used to free the superblock along various error paths. 1413 */ 1414 void 1415 xfs_freesb( 1416 struct xfs_mount *mp) 1417 { 1418 struct xfs_buf *bp = mp->m_sb_bp; 1419 1420 xfs_buf_lock(bp); 1421 mp->m_sb_bp = NULL; 1422 xfs_buf_relse(bp); 1423 } 1424 1425 /* 1426 * Used to log changes to the superblock unit and width fields which could 1427 * be altered by the mount options, as well as any potential sb_features2 1428 * fixup. Only the first superblock is updated. 1429 */ 1430 int 1431 xfs_mount_log_sb( 1432 xfs_mount_t *mp, 1433 __int64_t fields) 1434 { 1435 xfs_trans_t *tp; 1436 int error; 1437 1438 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | 1439 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 | 1440 XFS_SB_VERSIONNUM)); 1441 1442 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1443 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); 1444 if (error) { 1445 xfs_trans_cancel(tp, 0); 1446 return error; 1447 } 1448 xfs_mod_sb(tp, fields); 1449 error = xfs_trans_commit(tp, 0); 1450 return error; 1451 } 1452 1453 /* 1454 * If the underlying (data/log/rt) device is readonly, there are some 1455 * operations that cannot proceed. 1456 */ 1457 int 1458 xfs_dev_is_read_only( 1459 struct xfs_mount *mp, 1460 char *message) 1461 { 1462 if (xfs_readonly_buftarg(mp->m_ddev_targp) || 1463 xfs_readonly_buftarg(mp->m_logdev_targp) || 1464 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { 1465 xfs_notice(mp, "%s required on read-only device.", message); 1466 xfs_notice(mp, "write access unavailable, cannot proceed."); 1467 return -EROFS; 1468 } 1469 return 0; 1470 } 1471 1472 #ifdef HAVE_PERCPU_SB 1473 /* 1474 * Per-cpu incore superblock counters 1475 * 1476 * Simple concept, difficult implementation 1477 * 1478 * Basically, replace the incore superblock counters with a distributed per cpu 1479 * counter for contended fields (e.g. free block count). 1480 * 1481 * Difficulties arise in that the incore sb is used for ENOSPC checking, and 1482 * hence needs to be accurately read when we are running low on space. Hence 1483 * there is a method to enable and disable the per-cpu counters based on how 1484 * much "stuff" is available in them. 1485 * 1486 * Basically, a counter is enabled if there is enough free resource to justify 1487 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local 1488 * ENOSPC), then we disable the counters to synchronise all callers and 1489 * re-distribute the available resources. 1490 * 1491 * If, once we redistributed the available resources, we still get a failure, 1492 * we disable the per-cpu counter and go through the slow path. 1493 * 1494 * The slow path is the current xfs_mod_incore_sb() function. This means that 1495 * when we disable a per-cpu counter, we need to drain its resources back to 1496 * the global superblock. We do this after disabling the counter to prevent 1497 * more threads from queueing up on the counter. 1498 * 1499 * Essentially, this means that we still need a lock in the fast path to enable 1500 * synchronisation between the global counters and the per-cpu counters. This 1501 * is not a problem because the lock will be local to a CPU almost all the time 1502 * and have little contention except when we get to ENOSPC conditions. 1503 * 1504 * Basically, this lock becomes a barrier that enables us to lock out the fast 1505 * path while we do things like enabling and disabling counters and 1506 * synchronising the counters. 1507 * 1508 * Locking rules: 1509 * 1510 * 1. m_sb_lock before picking up per-cpu locks 1511 * 2. per-cpu locks always picked up via for_each_online_cpu() order 1512 * 3. accurate counter sync requires m_sb_lock + per cpu locks 1513 * 4. modifying per-cpu counters requires holding per-cpu lock 1514 * 5. modifying global counters requires holding m_sb_lock 1515 * 6. enabling or disabling a counter requires holding the m_sb_lock 1516 * and _none_ of the per-cpu locks. 1517 * 1518 * Disabled counters are only ever re-enabled by a balance operation 1519 * that results in more free resources per CPU than a given threshold. 1520 * To ensure counters don't remain disabled, they are rebalanced when 1521 * the global resource goes above a higher threshold (i.e. some hysteresis 1522 * is present to prevent thrashing). 1523 */ 1524 1525 #ifdef CONFIG_HOTPLUG_CPU 1526 /* 1527 * hot-plug CPU notifier support. 1528 * 1529 * We need a notifier per filesystem as we need to be able to identify 1530 * the filesystem to balance the counters out. This is achieved by 1531 * having a notifier block embedded in the xfs_mount_t and doing pointer 1532 * magic to get the mount pointer from the notifier block address. 1533 */ 1534 STATIC int 1535 xfs_icsb_cpu_notify( 1536 struct notifier_block *nfb, 1537 unsigned long action, 1538 void *hcpu) 1539 { 1540 xfs_icsb_cnts_t *cntp; 1541 xfs_mount_t *mp; 1542 1543 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); 1544 cntp = (xfs_icsb_cnts_t *) 1545 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 1546 switch (action) { 1547 case CPU_UP_PREPARE: 1548 case CPU_UP_PREPARE_FROZEN: 1549 /* Easy Case - initialize the area and locks, and 1550 * then rebalance when online does everything else for us. */ 1551 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1552 break; 1553 case CPU_ONLINE: 1554 case CPU_ONLINE_FROZEN: 1555 xfs_icsb_lock(mp); 1556 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 1557 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 1558 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 1559 xfs_icsb_unlock(mp); 1560 break; 1561 case CPU_DEAD: 1562 case CPU_DEAD_FROZEN: 1563 /* Disable all the counters, then fold the dead cpu's 1564 * count into the total on the global superblock and 1565 * re-enable the counters. */ 1566 xfs_icsb_lock(mp); 1567 spin_lock(&mp->m_sb_lock); 1568 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 1569 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 1570 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); 1571 1572 mp->m_sb.sb_icount += cntp->icsb_icount; 1573 mp->m_sb.sb_ifree += cntp->icsb_ifree; 1574 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; 1575 1576 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1577 1578 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0); 1579 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0); 1580 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0); 1581 spin_unlock(&mp->m_sb_lock); 1582 xfs_icsb_unlock(mp); 1583 break; 1584 } 1585 1586 return NOTIFY_OK; 1587 } 1588 #endif /* CONFIG_HOTPLUG_CPU */ 1589 1590 int 1591 xfs_icsb_init_counters( 1592 xfs_mount_t *mp) 1593 { 1594 xfs_icsb_cnts_t *cntp; 1595 int i; 1596 1597 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); 1598 if (mp->m_sb_cnts == NULL) 1599 return -ENOMEM; 1600 1601 for_each_online_cpu(i) { 1602 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1603 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1604 } 1605 1606 mutex_init(&mp->m_icsb_mutex); 1607 1608 /* 1609 * start with all counters disabled so that the 1610 * initial balance kicks us off correctly 1611 */ 1612 mp->m_icsb_counters = -1; 1613 1614 #ifdef CONFIG_HOTPLUG_CPU 1615 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1616 mp->m_icsb_notifier.priority = 0; 1617 register_hotcpu_notifier(&mp->m_icsb_notifier); 1618 #endif /* CONFIG_HOTPLUG_CPU */ 1619 1620 return 0; 1621 } 1622 1623 void 1624 xfs_icsb_reinit_counters( 1625 xfs_mount_t *mp) 1626 { 1627 xfs_icsb_lock(mp); 1628 /* 1629 * start with all counters disabled so that the 1630 * initial balance kicks us off correctly 1631 */ 1632 mp->m_icsb_counters = -1; 1633 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 1634 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 1635 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 1636 xfs_icsb_unlock(mp); 1637 } 1638 1639 void 1640 xfs_icsb_destroy_counters( 1641 xfs_mount_t *mp) 1642 { 1643 if (mp->m_sb_cnts) { 1644 unregister_hotcpu_notifier(&mp->m_icsb_notifier); 1645 free_percpu(mp->m_sb_cnts); 1646 } 1647 mutex_destroy(&mp->m_icsb_mutex); 1648 } 1649 1650 STATIC void 1651 xfs_icsb_lock_cntr( 1652 xfs_icsb_cnts_t *icsbp) 1653 { 1654 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { 1655 ndelay(1000); 1656 } 1657 } 1658 1659 STATIC void 1660 xfs_icsb_unlock_cntr( 1661 xfs_icsb_cnts_t *icsbp) 1662 { 1663 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); 1664 } 1665 1666 1667 STATIC void 1668 xfs_icsb_lock_all_counters( 1669 xfs_mount_t *mp) 1670 { 1671 xfs_icsb_cnts_t *cntp; 1672 int i; 1673 1674 for_each_online_cpu(i) { 1675 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1676 xfs_icsb_lock_cntr(cntp); 1677 } 1678 } 1679 1680 STATIC void 1681 xfs_icsb_unlock_all_counters( 1682 xfs_mount_t *mp) 1683 { 1684 xfs_icsb_cnts_t *cntp; 1685 int i; 1686 1687 for_each_online_cpu(i) { 1688 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1689 xfs_icsb_unlock_cntr(cntp); 1690 } 1691 } 1692 1693 STATIC void 1694 xfs_icsb_count( 1695 xfs_mount_t *mp, 1696 xfs_icsb_cnts_t *cnt, 1697 int flags) 1698 { 1699 xfs_icsb_cnts_t *cntp; 1700 int i; 1701 1702 memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); 1703 1704 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1705 xfs_icsb_lock_all_counters(mp); 1706 1707 for_each_online_cpu(i) { 1708 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1709 cnt->icsb_icount += cntp->icsb_icount; 1710 cnt->icsb_ifree += cntp->icsb_ifree; 1711 cnt->icsb_fdblocks += cntp->icsb_fdblocks; 1712 } 1713 1714 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1715 xfs_icsb_unlock_all_counters(mp); 1716 } 1717 1718 STATIC int 1719 xfs_icsb_counter_disabled( 1720 xfs_mount_t *mp, 1721 xfs_sb_field_t field) 1722 { 1723 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1724 return test_bit(field, &mp->m_icsb_counters); 1725 } 1726 1727 STATIC void 1728 xfs_icsb_disable_counter( 1729 xfs_mount_t *mp, 1730 xfs_sb_field_t field) 1731 { 1732 xfs_icsb_cnts_t cnt; 1733 1734 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1735 1736 /* 1737 * If we are already disabled, then there is nothing to do 1738 * here. We check before locking all the counters to avoid 1739 * the expensive lock operation when being called in the 1740 * slow path and the counter is already disabled. This is 1741 * safe because the only time we set or clear this state is under 1742 * the m_icsb_mutex. 1743 */ 1744 if (xfs_icsb_counter_disabled(mp, field)) 1745 return; 1746 1747 xfs_icsb_lock_all_counters(mp); 1748 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 1749 /* drain back to superblock */ 1750 1751 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT); 1752 switch(field) { 1753 case XFS_SBS_ICOUNT: 1754 mp->m_sb.sb_icount = cnt.icsb_icount; 1755 break; 1756 case XFS_SBS_IFREE: 1757 mp->m_sb.sb_ifree = cnt.icsb_ifree; 1758 break; 1759 case XFS_SBS_FDBLOCKS: 1760 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 1761 break; 1762 default: 1763 BUG(); 1764 } 1765 } 1766 1767 xfs_icsb_unlock_all_counters(mp); 1768 } 1769 1770 STATIC void 1771 xfs_icsb_enable_counter( 1772 xfs_mount_t *mp, 1773 xfs_sb_field_t field, 1774 uint64_t count, 1775 uint64_t resid) 1776 { 1777 xfs_icsb_cnts_t *cntp; 1778 int i; 1779 1780 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1781 1782 xfs_icsb_lock_all_counters(mp); 1783 for_each_online_cpu(i) { 1784 cntp = per_cpu_ptr(mp->m_sb_cnts, i); 1785 switch (field) { 1786 case XFS_SBS_ICOUNT: 1787 cntp->icsb_icount = count + resid; 1788 break; 1789 case XFS_SBS_IFREE: 1790 cntp->icsb_ifree = count + resid; 1791 break; 1792 case XFS_SBS_FDBLOCKS: 1793 cntp->icsb_fdblocks = count + resid; 1794 break; 1795 default: 1796 BUG(); 1797 break; 1798 } 1799 resid = 0; 1800 } 1801 clear_bit(field, &mp->m_icsb_counters); 1802 xfs_icsb_unlock_all_counters(mp); 1803 } 1804 1805 void 1806 xfs_icsb_sync_counters_locked( 1807 xfs_mount_t *mp, 1808 int flags) 1809 { 1810 xfs_icsb_cnts_t cnt; 1811 1812 xfs_icsb_count(mp, &cnt, flags); 1813 1814 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 1815 mp->m_sb.sb_icount = cnt.icsb_icount; 1816 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 1817 mp->m_sb.sb_ifree = cnt.icsb_ifree; 1818 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 1819 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 1820 } 1821 1822 /* 1823 * Accurate update of per-cpu counters to incore superblock 1824 */ 1825 void 1826 xfs_icsb_sync_counters( 1827 xfs_mount_t *mp, 1828 int flags) 1829 { 1830 spin_lock(&mp->m_sb_lock); 1831 xfs_icsb_sync_counters_locked(mp, flags); 1832 spin_unlock(&mp->m_sb_lock); 1833 } 1834 1835 /* 1836 * Balance and enable/disable counters as necessary. 1837 * 1838 * Thresholds for re-enabling counters are somewhat magic. inode counts are 1839 * chosen to be the same number as single on disk allocation chunk per CPU, and 1840 * free blocks is something far enough zero that we aren't going thrash when we 1841 * get near ENOSPC. We also need to supply a minimum we require per cpu to 1842 * prevent looping endlessly when xfs_alloc_space asks for more than will 1843 * be distributed to a single CPU but each CPU has enough blocks to be 1844 * reenabled. 1845 * 1846 * Note that we can be called when counters are already disabled. 1847 * xfs_icsb_disable_counter() optimises the counter locking in this case to 1848 * prevent locking every per-cpu counter needlessly. 1849 */ 1850 1851 #define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64 1852 #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 1853 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 1854 STATIC void 1855 xfs_icsb_balance_counter_locked( 1856 xfs_mount_t *mp, 1857 xfs_sb_field_t field, 1858 int min_per_cpu) 1859 { 1860 uint64_t count, resid; 1861 int weight = num_online_cpus(); 1862 uint64_t min = (uint64_t)min_per_cpu; 1863 1864 /* disable counter and sync counter */ 1865 xfs_icsb_disable_counter(mp, field); 1866 1867 /* update counters - first CPU gets residual*/ 1868 switch (field) { 1869 case XFS_SBS_ICOUNT: 1870 count = mp->m_sb.sb_icount; 1871 resid = do_div(count, weight); 1872 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 1873 return; 1874 break; 1875 case XFS_SBS_IFREE: 1876 count = mp->m_sb.sb_ifree; 1877 resid = do_div(count, weight); 1878 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 1879 return; 1880 break; 1881 case XFS_SBS_FDBLOCKS: 1882 count = mp->m_sb.sb_fdblocks; 1883 resid = do_div(count, weight); 1884 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 1885 return; 1886 break; 1887 default: 1888 BUG(); 1889 count = resid = 0; /* quiet, gcc */ 1890 break; 1891 } 1892 1893 xfs_icsb_enable_counter(mp, field, count, resid); 1894 } 1895 1896 STATIC void 1897 xfs_icsb_balance_counter( 1898 xfs_mount_t *mp, 1899 xfs_sb_field_t fields, 1900 int min_per_cpu) 1901 { 1902 spin_lock(&mp->m_sb_lock); 1903 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu); 1904 spin_unlock(&mp->m_sb_lock); 1905 } 1906 1907 int 1908 xfs_icsb_modify_counters( 1909 xfs_mount_t *mp, 1910 xfs_sb_field_t field, 1911 int64_t delta, 1912 int rsvd) 1913 { 1914 xfs_icsb_cnts_t *icsbp; 1915 long long lcounter; /* long counter for 64 bit fields */ 1916 int ret = 0; 1917 1918 might_sleep(); 1919 again: 1920 preempt_disable(); 1921 icsbp = this_cpu_ptr(mp->m_sb_cnts); 1922 1923 /* 1924 * if the counter is disabled, go to slow path 1925 */ 1926 if (unlikely(xfs_icsb_counter_disabled(mp, field))) 1927 goto slow_path; 1928 xfs_icsb_lock_cntr(icsbp); 1929 if (unlikely(xfs_icsb_counter_disabled(mp, field))) { 1930 xfs_icsb_unlock_cntr(icsbp); 1931 goto slow_path; 1932 } 1933 1934 switch (field) { 1935 case XFS_SBS_ICOUNT: 1936 lcounter = icsbp->icsb_icount; 1937 lcounter += delta; 1938 if (unlikely(lcounter < 0)) 1939 goto balance_counter; 1940 icsbp->icsb_icount = lcounter; 1941 break; 1942 1943 case XFS_SBS_IFREE: 1944 lcounter = icsbp->icsb_ifree; 1945 lcounter += delta; 1946 if (unlikely(lcounter < 0)) 1947 goto balance_counter; 1948 icsbp->icsb_ifree = lcounter; 1949 break; 1950 1951 case XFS_SBS_FDBLOCKS: 1952 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 1953 1954 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1955 lcounter += delta; 1956 if (unlikely(lcounter < 0)) 1957 goto balance_counter; 1958 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1959 break; 1960 default: 1961 BUG(); 1962 break; 1963 } 1964 xfs_icsb_unlock_cntr(icsbp); 1965 preempt_enable(); 1966 return 0; 1967 1968 slow_path: 1969 preempt_enable(); 1970 1971 /* 1972 * serialise with a mutex so we don't burn lots of cpu on 1973 * the superblock lock. We still need to hold the superblock 1974 * lock, however, when we modify the global structures. 1975 */ 1976 xfs_icsb_lock(mp); 1977 1978 /* 1979 * Now running atomically. 1980 * 1981 * If the counter is enabled, someone has beaten us to rebalancing. 1982 * Drop the lock and try again in the fast path.... 1983 */ 1984 if (!(xfs_icsb_counter_disabled(mp, field))) { 1985 xfs_icsb_unlock(mp); 1986 goto again; 1987 } 1988 1989 /* 1990 * The counter is currently disabled. Because we are 1991 * running atomically here, we know a rebalance cannot 1992 * be in progress. Hence we can go straight to operating 1993 * on the global superblock. We do not call xfs_mod_incore_sb() 1994 * here even though we need to get the m_sb_lock. Doing so 1995 * will cause us to re-enter this function and deadlock. 1996 * Hence we get the m_sb_lock ourselves and then call 1997 * xfs_mod_incore_sb_unlocked() as the unlocked path operates 1998 * directly on the global counters. 1999 */ 2000 spin_lock(&mp->m_sb_lock); 2001 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2002 spin_unlock(&mp->m_sb_lock); 2003 2004 /* 2005 * Now that we've modified the global superblock, we 2006 * may be able to re-enable the distributed counters 2007 * (e.g. lots of space just got freed). After that 2008 * we are done. 2009 */ 2010 if (ret != -ENOSPC) 2011 xfs_icsb_balance_counter(mp, field, 0); 2012 xfs_icsb_unlock(mp); 2013 return ret; 2014 2015 balance_counter: 2016 xfs_icsb_unlock_cntr(icsbp); 2017 preempt_enable(); 2018 2019 /* 2020 * We may have multiple threads here if multiple per-cpu 2021 * counters run dry at the same time. This will mean we can 2022 * do more balances than strictly necessary but it is not 2023 * the common slowpath case. 2024 */ 2025 xfs_icsb_lock(mp); 2026 2027 /* 2028 * running atomically. 2029 * 2030 * This will leave the counter in the correct state for future 2031 * accesses. After the rebalance, we simply try again and our retry 2032 * will either succeed through the fast path or slow path without 2033 * another balance operation being required. 2034 */ 2035 xfs_icsb_balance_counter(mp, field, delta); 2036 xfs_icsb_unlock(mp); 2037 goto again; 2038 } 2039 2040 #endif 2041