1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_types.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_sb.h" 26 #include "xfs_ag.h" 27 #include "xfs_dir.h" 28 #include "xfs_dir2.h" 29 #include "xfs_dmapi.h" 30 #include "xfs_mount.h" 31 #include "xfs_bmap_btree.h" 32 #include "xfs_alloc_btree.h" 33 #include "xfs_ialloc_btree.h" 34 #include "xfs_dir_sf.h" 35 #include "xfs_dir2_sf.h" 36 #include "xfs_attr_sf.h" 37 #include "xfs_dinode.h" 38 #include "xfs_inode.h" 39 #include "xfs_btree.h" 40 #include "xfs_ialloc.h" 41 #include "xfs_alloc.h" 42 #include "xfs_rtalloc.h" 43 #include "xfs_bmap.h" 44 #include "xfs_error.h" 45 #include "xfs_rw.h" 46 #include "xfs_quota.h" 47 #include "xfs_fsops.h" 48 49 STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); 50 STATIC int xfs_uuid_mount(xfs_mount_t *); 51 STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 52 STATIC void xfs_unmountfs_wait(xfs_mount_t *); 53 54 55 #ifdef HAVE_PERCPU_SB 56 STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 57 STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); 58 STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 59 STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60 int, int); 61 STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t, 62 int, int); 63 STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 64 65 #else 66 67 #define xfs_icsb_destroy_counters(mp) do { } while (0) 68 #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 69 #define xfs_icsb_sync_counters(mp) do { } while (0) 70 #define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 71 #define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0) 72 73 #endif 74 75 static const struct { 76 short offset; 77 short type; /* 0 = integer 78 * 1 = binary / string (no translation) 79 */ 80 } xfs_sb_info[] = { 81 { offsetof(xfs_sb_t, sb_magicnum), 0 }, 82 { offsetof(xfs_sb_t, sb_blocksize), 0 }, 83 { offsetof(xfs_sb_t, sb_dblocks), 0 }, 84 { offsetof(xfs_sb_t, sb_rblocks), 0 }, 85 { offsetof(xfs_sb_t, sb_rextents), 0 }, 86 { offsetof(xfs_sb_t, sb_uuid), 1 }, 87 { offsetof(xfs_sb_t, sb_logstart), 0 }, 88 { offsetof(xfs_sb_t, sb_rootino), 0 }, 89 { offsetof(xfs_sb_t, sb_rbmino), 0 }, 90 { offsetof(xfs_sb_t, sb_rsumino), 0 }, 91 { offsetof(xfs_sb_t, sb_rextsize), 0 }, 92 { offsetof(xfs_sb_t, sb_agblocks), 0 }, 93 { offsetof(xfs_sb_t, sb_agcount), 0 }, 94 { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, 95 { offsetof(xfs_sb_t, sb_logblocks), 0 }, 96 { offsetof(xfs_sb_t, sb_versionnum), 0 }, 97 { offsetof(xfs_sb_t, sb_sectsize), 0 }, 98 { offsetof(xfs_sb_t, sb_inodesize), 0 }, 99 { offsetof(xfs_sb_t, sb_inopblock), 0 }, 100 { offsetof(xfs_sb_t, sb_fname[0]), 1 }, 101 { offsetof(xfs_sb_t, sb_blocklog), 0 }, 102 { offsetof(xfs_sb_t, sb_sectlog), 0 }, 103 { offsetof(xfs_sb_t, sb_inodelog), 0 }, 104 { offsetof(xfs_sb_t, sb_inopblog), 0 }, 105 { offsetof(xfs_sb_t, sb_agblklog), 0 }, 106 { offsetof(xfs_sb_t, sb_rextslog), 0 }, 107 { offsetof(xfs_sb_t, sb_inprogress), 0 }, 108 { offsetof(xfs_sb_t, sb_imax_pct), 0 }, 109 { offsetof(xfs_sb_t, sb_icount), 0 }, 110 { offsetof(xfs_sb_t, sb_ifree), 0 }, 111 { offsetof(xfs_sb_t, sb_fdblocks), 0 }, 112 { offsetof(xfs_sb_t, sb_frextents), 0 }, 113 { offsetof(xfs_sb_t, sb_uquotino), 0 }, 114 { offsetof(xfs_sb_t, sb_gquotino), 0 }, 115 { offsetof(xfs_sb_t, sb_qflags), 0 }, 116 { offsetof(xfs_sb_t, sb_flags), 0 }, 117 { offsetof(xfs_sb_t, sb_shared_vn), 0 }, 118 { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, 119 { offsetof(xfs_sb_t, sb_unit), 0 }, 120 { offsetof(xfs_sb_t, sb_width), 0 }, 121 { offsetof(xfs_sb_t, sb_dirblklog), 0 }, 122 { offsetof(xfs_sb_t, sb_logsectlog), 0 }, 123 { offsetof(xfs_sb_t, sb_logsectsize),0 }, 124 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 125 { offsetof(xfs_sb_t, sb_features2), 0 }, 126 { sizeof(xfs_sb_t), 0 } 127 }; 128 129 /* 130 * Return a pointer to an initialized xfs_mount structure. 131 */ 132 xfs_mount_t * 133 xfs_mount_init(void) 134 { 135 xfs_mount_t *mp; 136 137 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); 138 139 if (xfs_icsb_init_counters(mp)) { 140 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 141 } 142 143 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); 144 spinlock_init(&mp->m_sb_lock, "xfs_sb"); 145 mutex_init(&mp->m_ilock); 146 initnsema(&mp->m_growlock, 1, "xfs_grow"); 147 /* 148 * Initialize the AIL. 149 */ 150 xfs_trans_ail_init(mp); 151 152 atomic_set(&mp->m_active_trans, 0); 153 154 return mp; 155 } 156 157 /* 158 * Free up the resources associated with a mount structure. Assume that 159 * the structure was initially zeroed, so we can tell which fields got 160 * initialized. 161 */ 162 void 163 xfs_mount_free( 164 xfs_mount_t *mp, 165 int remove_bhv) 166 { 167 if (mp->m_ihash) 168 xfs_ihash_free(mp); 169 if (mp->m_chash) 170 xfs_chash_free(mp); 171 172 if (mp->m_perag) { 173 int agno; 174 175 for (agno = 0; agno < mp->m_maxagi; agno++) 176 if (mp->m_perag[agno].pagb_list) 177 kmem_free(mp->m_perag[agno].pagb_list, 178 sizeof(xfs_perag_busy_t) * 179 XFS_PAGB_NUM_SLOTS); 180 kmem_free(mp->m_perag, 181 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); 182 } 183 184 AIL_LOCK_DESTROY(&mp->m_ail_lock); 185 spinlock_destroy(&mp->m_sb_lock); 186 mutex_destroy(&mp->m_ilock); 187 freesema(&mp->m_growlock); 188 if (mp->m_quotainfo) 189 XFS_QM_DONE(mp); 190 191 if (mp->m_fsname != NULL) 192 kmem_free(mp->m_fsname, mp->m_fsname_len); 193 if (mp->m_rtname != NULL) 194 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 195 if (mp->m_logname != NULL) 196 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 197 198 if (remove_bhv) { 199 struct vfs *vfsp = XFS_MTOVFS(mp); 200 201 bhv_remove_all_vfsops(vfsp, 0); 202 VFS_REMOVEBHV(vfsp, &mp->m_bhv); 203 } 204 205 xfs_icsb_destroy_counters(mp); 206 kmem_free(mp, sizeof(xfs_mount_t)); 207 } 208 209 210 /* 211 * Check the validity of the SB found. 212 */ 213 STATIC int 214 xfs_mount_validate_sb( 215 xfs_mount_t *mp, 216 xfs_sb_t *sbp, 217 int flags) 218 { 219 /* 220 * If the log device and data device have the 221 * same device number, the log is internal. 222 * Consequently, the sb_logstart should be non-zero. If 223 * we have a zero sb_logstart in this case, we may be trying to mount 224 * a volume filesystem in a non-volume manner. 225 */ 226 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 227 xfs_fs_mount_cmn_err(flags, "bad magic number"); 228 return XFS_ERROR(EWRONGFS); 229 } 230 231 if (!XFS_SB_GOOD_VERSION(sbp)) { 232 xfs_fs_mount_cmn_err(flags, "bad version"); 233 return XFS_ERROR(EWRONGFS); 234 } 235 236 if (unlikely( 237 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 238 xfs_fs_mount_cmn_err(flags, 239 "filesystem is marked as having an external log; " 240 "specify logdev on the\nmount command line."); 241 return XFS_ERROR(EINVAL); 242 } 243 244 if (unlikely( 245 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 246 xfs_fs_mount_cmn_err(flags, 247 "filesystem is marked as having an internal log; " 248 "do not specify logdev on\nthe mount command line."); 249 return XFS_ERROR(EINVAL); 250 } 251 252 /* 253 * More sanity checking. These were stolen directly from 254 * xfs_repair. 255 */ 256 if (unlikely( 257 sbp->sb_agcount <= 0 || 258 sbp->sb_sectsize < XFS_MIN_SECTORSIZE || 259 sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 260 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 261 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 262 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 263 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 264 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 265 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 266 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 267 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 268 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 269 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 270 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 271 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 272 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 273 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 274 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 275 return XFS_ERROR(EFSCORRUPTED); 276 } 277 278 /* 279 * Sanity check AG count, size fields against data size field 280 */ 281 if (unlikely( 282 sbp->sb_dblocks == 0 || 283 sbp->sb_dblocks > 284 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 285 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 286 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 287 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 288 return XFS_ERROR(EFSCORRUPTED); 289 } 290 291 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 292 ASSERT(sbp->sb_blocklog >= BBSHIFT); 293 294 #if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 295 if (unlikely( 296 (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX || 297 (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) { 298 #else /* Limited by UINT_MAX of sectors */ 299 if (unlikely( 300 (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || 301 (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { 302 #endif 303 xfs_fs_mount_cmn_err(flags, 304 "file system too large to be mounted on this system."); 305 return XFS_ERROR(E2BIG); 306 } 307 308 if (unlikely(sbp->sb_inprogress)) { 309 xfs_fs_mount_cmn_err(flags, "file system busy"); 310 return XFS_ERROR(EFSCORRUPTED); 311 } 312 313 /* 314 * Version 1 directory format has never worked on Linux. 315 */ 316 if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { 317 xfs_fs_mount_cmn_err(flags, 318 "file system using version 1 directory format"); 319 return XFS_ERROR(ENOSYS); 320 } 321 322 /* 323 * Until this is fixed only page-sized or smaller data blocks work. 324 */ 325 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 326 xfs_fs_mount_cmn_err(flags, 327 "file system with blocksize %d bytes", 328 sbp->sb_blocksize); 329 xfs_fs_mount_cmn_err(flags, 330 "only pagesize (%ld) or less will currently work.", 331 PAGE_SIZE); 332 return XFS_ERROR(ENOSYS); 333 } 334 335 return 0; 336 } 337 338 xfs_agnumber_t 339 xfs_initialize_perag( 340 struct vfs *vfs, 341 xfs_mount_t *mp, 342 xfs_agnumber_t agcount) 343 { 344 xfs_agnumber_t index, max_metadata; 345 xfs_perag_t *pag; 346 xfs_agino_t agino; 347 xfs_ino_t ino; 348 xfs_sb_t *sbp = &mp->m_sb; 349 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 350 351 /* Check to see if the filesystem can overflow 32 bit inodes */ 352 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 353 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 354 355 /* Clear the mount flag if no inode can overflow 32 bits 356 * on this filesystem, or if specifically requested.. 357 */ 358 if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) { 359 mp->m_flags |= XFS_MOUNT_32BITINODES; 360 } else { 361 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 362 } 363 364 /* If we can overflow then setup the ag headers accordingly */ 365 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 366 /* Calculate how much should be reserved for inodes to 367 * meet the max inode percentage. 368 */ 369 if (mp->m_maxicount) { 370 __uint64_t icount; 371 372 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 373 do_div(icount, 100); 374 icount += sbp->sb_agblocks - 1; 375 do_div(icount, sbp->sb_agblocks); 376 max_metadata = icount; 377 } else { 378 max_metadata = agcount; 379 } 380 for (index = 0; index < agcount; index++) { 381 ino = XFS_AGINO_TO_INO(mp, index, agino); 382 if (ino > max_inum) { 383 index++; 384 break; 385 } 386 387 /* This ag is preferred for inodes */ 388 pag = &mp->m_perag[index]; 389 pag->pagi_inodeok = 1; 390 if (index < max_metadata) 391 pag->pagf_metadata = 1; 392 } 393 } else { 394 /* Setup default behavior for smaller filesystems */ 395 for (index = 0; index < agcount; index++) { 396 pag = &mp->m_perag[index]; 397 pag->pagi_inodeok = 1; 398 } 399 } 400 return index; 401 } 402 403 /* 404 * xfs_xlatesb 405 * 406 * data - on disk version of sb 407 * sb - a superblock 408 * dir - conversion direction: <0 - convert sb to buf 409 * >0 - convert buf to sb 410 * fields - which fields to copy (bitmask) 411 */ 412 void 413 xfs_xlatesb( 414 void *data, 415 xfs_sb_t *sb, 416 int dir, 417 __int64_t fields) 418 { 419 xfs_caddr_t buf_ptr; 420 xfs_caddr_t mem_ptr; 421 xfs_sb_field_t f; 422 int first; 423 int size; 424 425 ASSERT(dir); 426 ASSERT(fields); 427 428 if (!fields) 429 return; 430 431 buf_ptr = (xfs_caddr_t)data; 432 mem_ptr = (xfs_caddr_t)sb; 433 434 while (fields) { 435 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 436 first = xfs_sb_info[f].offset; 437 size = xfs_sb_info[f + 1].offset - first; 438 439 ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); 440 441 if (size == 1 || xfs_sb_info[f].type == 1) { 442 if (dir > 0) { 443 memcpy(mem_ptr + first, buf_ptr + first, size); 444 } else { 445 memcpy(buf_ptr + first, mem_ptr + first, size); 446 } 447 } else { 448 switch (size) { 449 case 2: 450 INT_XLATE(*(__uint16_t*)(buf_ptr+first), 451 *(__uint16_t*)(mem_ptr+first), 452 dir, ARCH_CONVERT); 453 break; 454 case 4: 455 INT_XLATE(*(__uint32_t*)(buf_ptr+first), 456 *(__uint32_t*)(mem_ptr+first), 457 dir, ARCH_CONVERT); 458 break; 459 case 8: 460 INT_XLATE(*(__uint64_t*)(buf_ptr+first), 461 *(__uint64_t*)(mem_ptr+first), dir, ARCH_CONVERT); 462 break; 463 default: 464 ASSERT(0); 465 } 466 } 467 468 fields &= ~(1LL << f); 469 } 470 } 471 472 /* 473 * xfs_readsb 474 * 475 * Does the initial read of the superblock. 476 */ 477 int 478 xfs_readsb(xfs_mount_t *mp, int flags) 479 { 480 unsigned int sector_size; 481 unsigned int extra_flags; 482 xfs_buf_t *bp; 483 xfs_sb_t *sbp; 484 int error; 485 486 ASSERT(mp->m_sb_bp == NULL); 487 ASSERT(mp->m_ddev_targp != NULL); 488 489 /* 490 * Allocate a (locked) buffer to hold the superblock. 491 * This will be kept around at all times to optimize 492 * access to the superblock. 493 */ 494 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 495 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 496 497 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 498 BTOBB(sector_size), extra_flags); 499 if (!bp || XFS_BUF_ISERROR(bp)) { 500 xfs_fs_mount_cmn_err(flags, "SB read failed"); 501 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 502 goto fail; 503 } 504 ASSERT(XFS_BUF_ISBUSY(bp)); 505 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 506 507 /* 508 * Initialize the mount structure from the superblock. 509 * But first do some basic consistency checking. 510 */ 511 sbp = XFS_BUF_TO_SBP(bp); 512 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS); 513 514 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 515 if (error) { 516 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 517 goto fail; 518 } 519 520 /* 521 * We must be able to do sector-sized and sector-aligned IO. 522 */ 523 if (sector_size > mp->m_sb.sb_sectsize) { 524 xfs_fs_mount_cmn_err(flags, 525 "device supports only %u byte sectors (not %u)", 526 sector_size, mp->m_sb.sb_sectsize); 527 error = ENOSYS; 528 goto fail; 529 } 530 531 /* 532 * If device sector size is smaller than the superblock size, 533 * re-read the superblock so the buffer is correctly sized. 534 */ 535 if (sector_size < mp->m_sb.sb_sectsize) { 536 XFS_BUF_UNMANAGE(bp); 537 xfs_buf_relse(bp); 538 sector_size = mp->m_sb.sb_sectsize; 539 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 540 BTOBB(sector_size), extra_flags); 541 if (!bp || XFS_BUF_ISERROR(bp)) { 542 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 543 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 544 goto fail; 545 } 546 ASSERT(XFS_BUF_ISBUSY(bp)); 547 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 548 } 549 550 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 551 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 552 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 553 554 mp->m_sb_bp = bp; 555 xfs_buf_relse(bp); 556 ASSERT(XFS_BUF_VALUSEMA(bp) > 0); 557 return 0; 558 559 fail: 560 if (bp) { 561 XFS_BUF_UNMANAGE(bp); 562 xfs_buf_relse(bp); 563 } 564 return error; 565 } 566 567 568 /* 569 * xfs_mount_common 570 * 571 * Mount initialization code establishing various mount 572 * fields from the superblock associated with the given 573 * mount structure 574 */ 575 STATIC void 576 xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 577 { 578 int i; 579 580 mp->m_agfrotor = mp->m_agirotor = 0; 581 spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock"); 582 mp->m_maxagi = mp->m_sb.sb_agcount; 583 mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; 584 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 585 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 586 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 587 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 588 mp->m_litino = sbp->sb_inodesize - 589 ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); 590 mp->m_blockmask = sbp->sb_blocksize - 1; 591 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 592 mp->m_blockwmask = mp->m_blockwsize - 1; 593 INIT_LIST_HEAD(&mp->m_del_inodes); 594 595 /* 596 * Setup for attributes, in case they get created. 597 * This value is for inodes getting attributes for the first time, 598 * the per-inode value is for old attribute values. 599 */ 600 ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); 601 switch (sbp->sb_inodesize) { 602 case 256: 603 mp->m_attroffset = XFS_LITINO(mp) - 604 XFS_BMDR_SPACE_CALC(MINABTPTRS); 605 break; 606 case 512: 607 case 1024: 608 case 2048: 609 mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 610 break; 611 default: 612 ASSERT(0); 613 } 614 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 615 616 for (i = 0; i < 2; i++) { 617 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 618 xfs_alloc, i == 0); 619 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 620 xfs_alloc, i == 0); 621 } 622 for (i = 0; i < 2; i++) { 623 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 624 xfs_bmbt, i == 0); 625 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 626 xfs_bmbt, i == 0); 627 } 628 for (i = 0; i < 2; i++) { 629 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 630 xfs_inobt, i == 0); 631 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 632 xfs_inobt, i == 0); 633 } 634 635 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 636 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 637 sbp->sb_inopblock); 638 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 639 } 640 /* 641 * xfs_mountfs 642 * 643 * This function does the following on an initial mount of a file system: 644 * - reads the superblock from disk and init the mount struct 645 * - if we're a 32-bit kernel, do a size check on the superblock 646 * so we don't mount terabyte filesystems 647 * - init mount struct realtime fields 648 * - allocate inode hash table for fs 649 * - init directory manager 650 * - perform recovery and init the log manager 651 */ 652 int 653 xfs_mountfs( 654 vfs_t *vfsp, 655 xfs_mount_t *mp, 656 int mfsi_flags) 657 { 658 xfs_buf_t *bp; 659 xfs_sb_t *sbp = &(mp->m_sb); 660 xfs_inode_t *rip; 661 vnode_t *rvp = NULL; 662 int readio_log, writeio_log; 663 xfs_daddr_t d; 664 __uint64_t ret64; 665 __int64_t update_flags; 666 uint quotamount, quotaflags; 667 int agno; 668 int uuid_mounted = 0; 669 int error = 0; 670 671 if (mp->m_sb_bp == NULL) { 672 if ((error = xfs_readsb(mp, mfsi_flags))) { 673 return error; 674 } 675 } 676 xfs_mount_common(mp, sbp); 677 678 /* 679 * Check if sb_agblocks is aligned at stripe boundary 680 * If sb_agblocks is NOT aligned turn off m_dalign since 681 * allocator alignment is within an ag, therefore ag has 682 * to be aligned at stripe boundary. 683 */ 684 update_flags = 0LL; 685 if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 686 /* 687 * If stripe unit and stripe width are not multiples 688 * of the fs blocksize turn off alignment. 689 */ 690 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 691 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 692 if (mp->m_flags & XFS_MOUNT_RETERR) { 693 cmn_err(CE_WARN, 694 "XFS: alignment check 1 failed"); 695 error = XFS_ERROR(EINVAL); 696 goto error1; 697 } 698 mp->m_dalign = mp->m_swidth = 0; 699 } else { 700 /* 701 * Convert the stripe unit and width to FSBs. 702 */ 703 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 704 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 705 if (mp->m_flags & XFS_MOUNT_RETERR) { 706 error = XFS_ERROR(EINVAL); 707 goto error1; 708 } 709 xfs_fs_cmn_err(CE_WARN, mp, 710 "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 711 mp->m_dalign, mp->m_swidth, 712 sbp->sb_agblocks); 713 714 mp->m_dalign = 0; 715 mp->m_swidth = 0; 716 } else if (mp->m_dalign) { 717 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 718 } else { 719 if (mp->m_flags & XFS_MOUNT_RETERR) { 720 xfs_fs_cmn_err(CE_WARN, mp, 721 "stripe alignment turned off: sunit(%d) less than bsize(%d)", 722 mp->m_dalign, 723 mp->m_blockmask +1); 724 error = XFS_ERROR(EINVAL); 725 goto error1; 726 } 727 mp->m_swidth = 0; 728 } 729 } 730 731 /* 732 * Update superblock with new values 733 * and log changes 734 */ 735 if (XFS_SB_VERSION_HASDALIGN(sbp)) { 736 if (sbp->sb_unit != mp->m_dalign) { 737 sbp->sb_unit = mp->m_dalign; 738 update_flags |= XFS_SB_UNIT; 739 } 740 if (sbp->sb_width != mp->m_swidth) { 741 sbp->sb_width = mp->m_swidth; 742 update_flags |= XFS_SB_WIDTH; 743 } 744 } 745 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 746 XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) { 747 mp->m_dalign = sbp->sb_unit; 748 mp->m_swidth = sbp->sb_width; 749 } 750 751 xfs_alloc_compute_maxlevels(mp); 752 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 753 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 754 xfs_ialloc_compute_maxlevels(mp); 755 756 if (sbp->sb_imax_pct) { 757 __uint64_t icount; 758 759 /* Make sure the maximum inode count is a multiple of the 760 * units we allocate inodes in. 761 */ 762 763 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 764 do_div(icount, 100); 765 do_div(icount, mp->m_ialloc_blks); 766 mp->m_maxicount = (icount * mp->m_ialloc_blks) << 767 sbp->sb_inopblog; 768 } else 769 mp->m_maxicount = 0; 770 771 mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); 772 773 /* 774 * XFS uses the uuid from the superblock as the unique 775 * identifier for fsid. We can not use the uuid from the volume 776 * since a single partition filesystem is identical to a single 777 * partition volume/filesystem. 778 */ 779 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 780 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 781 if (xfs_uuid_mount(mp)) { 782 error = XFS_ERROR(EINVAL); 783 goto error1; 784 } 785 uuid_mounted=1; 786 ret64 = uuid_hash64(&sbp->sb_uuid); 787 memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64)); 788 } 789 790 /* 791 * Set the default minimum read and write sizes unless 792 * already specified in a mount option. 793 * We use smaller I/O sizes when the file system 794 * is being used for NFS service (wsync mount option). 795 */ 796 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 797 if (mp->m_flags & XFS_MOUNT_WSYNC) { 798 readio_log = XFS_WSYNC_READIO_LOG; 799 writeio_log = XFS_WSYNC_WRITEIO_LOG; 800 } else { 801 readio_log = XFS_READIO_LOG_LARGE; 802 writeio_log = XFS_WRITEIO_LOG_LARGE; 803 } 804 } else { 805 readio_log = mp->m_readio_log; 806 writeio_log = mp->m_writeio_log; 807 } 808 809 /* 810 * Set the number of readahead buffers to use based on 811 * physical memory size. 812 */ 813 if (xfs_physmem <= 4096) /* <= 16MB */ 814 mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB; 815 else if (xfs_physmem <= 8192) /* <= 32MB */ 816 mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB; 817 else 818 mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32; 819 if (sbp->sb_blocklog > readio_log) { 820 mp->m_readio_log = sbp->sb_blocklog; 821 } else { 822 mp->m_readio_log = readio_log; 823 } 824 mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); 825 if (sbp->sb_blocklog > writeio_log) { 826 mp->m_writeio_log = sbp->sb_blocklog; 827 } else { 828 mp->m_writeio_log = writeio_log; 829 } 830 mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); 831 832 /* 833 * Set the inode cluster size based on the physical memory 834 * size. This may still be overridden by the file system 835 * block size if it is larger than the chosen cluster size. 836 */ 837 if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */ 838 mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE; 839 } else { 840 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 841 } 842 /* 843 * Set whether we're using inode alignment. 844 */ 845 if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 846 mp->m_sb.sb_inoalignmt >= 847 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 848 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 849 else 850 mp->m_inoalign_mask = 0; 851 /* 852 * If we are using stripe alignment, check whether 853 * the stripe unit is a multiple of the inode alignment 854 */ 855 if (mp->m_dalign && mp->m_inoalign_mask && 856 !(mp->m_dalign & mp->m_inoalign_mask)) 857 mp->m_sinoalign = mp->m_dalign; 858 else 859 mp->m_sinoalign = 0; 860 /* 861 * Check that the data (and log if separate) are an ok size. 862 */ 863 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 864 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 865 cmn_err(CE_WARN, "XFS: size check 1 failed"); 866 error = XFS_ERROR(E2BIG); 867 goto error1; 868 } 869 error = xfs_read_buf(mp, mp->m_ddev_targp, 870 d - XFS_FSS_TO_BB(mp, 1), 871 XFS_FSS_TO_BB(mp, 1), 0, &bp); 872 if (!error) { 873 xfs_buf_relse(bp); 874 } else { 875 cmn_err(CE_WARN, "XFS: size check 2 failed"); 876 if (error == ENOSPC) { 877 error = XFS_ERROR(E2BIG); 878 } 879 goto error1; 880 } 881 882 if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 883 mp->m_logdev_targp != mp->m_ddev_targp) { 884 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 885 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 886 cmn_err(CE_WARN, "XFS: size check 3 failed"); 887 error = XFS_ERROR(E2BIG); 888 goto error1; 889 } 890 error = xfs_read_buf(mp, mp->m_logdev_targp, 891 d - XFS_FSB_TO_BB(mp, 1), 892 XFS_FSB_TO_BB(mp, 1), 0, &bp); 893 if (!error) { 894 xfs_buf_relse(bp); 895 } else { 896 cmn_err(CE_WARN, "XFS: size check 3 failed"); 897 if (error == ENOSPC) { 898 error = XFS_ERROR(E2BIG); 899 } 900 goto error1; 901 } 902 } 903 904 /* 905 * Initialize realtime fields in the mount structure 906 */ 907 if ((error = xfs_rtmount_init(mp))) { 908 cmn_err(CE_WARN, "XFS: RT mount failed"); 909 goto error1; 910 } 911 912 /* 913 * For client case we are done now 914 */ 915 if (mfsi_flags & XFS_MFSI_CLIENT) { 916 return 0; 917 } 918 919 /* 920 * Copies the low order bits of the timestamp and the randomly 921 * set "sequence" number out of a UUID. 922 */ 923 uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); 924 925 /* 926 * The vfs structure needs to have a file system independent 927 * way of checking for the invariant file system ID. Since it 928 * can't look at mount structures it has a pointer to the data 929 * in the mount structure. 930 * 931 * File systems that don't support user level file handles (i.e. 932 * all of them except for XFS) will leave vfs_altfsid as NULL. 933 */ 934 vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid; 935 mp->m_dmevmask = 0; /* not persistent; set after each mount */ 936 937 /* 938 * Select the right directory manager. 939 */ 940 mp->m_dirops = 941 XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ? 942 xfsv2_dirops : 943 xfsv1_dirops; 944 945 /* 946 * Initialize directory manager's entries. 947 */ 948 XFS_DIR_MOUNT(mp); 949 950 /* 951 * Initialize the attribute manager's entries. 952 */ 953 mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; 954 955 /* 956 * Initialize the precomputed transaction reservations values. 957 */ 958 xfs_trans_init(mp); 959 960 /* 961 * Allocate and initialize the inode hash table for this 962 * file system. 963 */ 964 xfs_ihash_init(mp); 965 xfs_chash_init(mp); 966 967 /* 968 * Allocate and initialize the per-ag data. 969 */ 970 init_rwsem(&mp->m_peraglock); 971 mp->m_perag = 972 kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 973 974 mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount); 975 976 /* 977 * log's mount-time initialization. Perform 1st part recovery if needed 978 */ 979 if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ 980 error = xfs_log_mount(mp, mp->m_logdev_targp, 981 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 982 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 983 if (error) { 984 cmn_err(CE_WARN, "XFS: log mount failed"); 985 goto error2; 986 } 987 } else { /* No log has been defined */ 988 cmn_err(CE_WARN, "XFS: no log defined"); 989 XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp); 990 error = XFS_ERROR(EFSCORRUPTED); 991 goto error2; 992 } 993 994 /* 995 * Get and sanity-check the root inode. 996 * Save the pointer to it in the mount structure. 997 */ 998 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 999 if (error) { 1000 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1001 goto error3; 1002 } 1003 1004 ASSERT(rip != NULL); 1005 rvp = XFS_ITOV(rip); 1006 1007 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1008 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1009 prdev("Root inode %llu is not a directory", 1010 mp->m_ddev_targp, (unsigned long long)rip->i_ino); 1011 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1012 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1013 mp); 1014 error = XFS_ERROR(EFSCORRUPTED); 1015 goto error4; 1016 } 1017 mp->m_rootip = rip; /* save it */ 1018 1019 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1020 1021 /* 1022 * Initialize realtime inode pointers in the mount structure 1023 */ 1024 if ((error = xfs_rtmount_inodes(mp))) { 1025 /* 1026 * Free up the root inode. 1027 */ 1028 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1029 goto error4; 1030 } 1031 1032 /* 1033 * If fs is not mounted readonly, then update the superblock 1034 * unit and width changes. 1035 */ 1036 if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY)) 1037 xfs_mount_log_sbunit(mp, update_flags); 1038 1039 /* 1040 * Initialise the XFS quota management subsystem for this mount 1041 */ 1042 if ((error = XFS_QM_INIT(mp, "amount, "aflags))) 1043 goto error4; 1044 1045 /* 1046 * Finish recovering the file system. This part needed to be 1047 * delayed until after the root and real-time bitmap inodes 1048 * were consistently read in. 1049 */ 1050 error = xfs_log_mount_finish(mp, mfsi_flags); 1051 if (error) { 1052 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1053 goto error4; 1054 } 1055 1056 /* 1057 * Complete the quota initialisation, post-log-replay component. 1058 */ 1059 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1060 goto error4; 1061 1062 return 0; 1063 1064 error4: 1065 /* 1066 * Free up the root inode. 1067 */ 1068 VN_RELE(rvp); 1069 error3: 1070 xfs_log_unmount_dealloc(mp); 1071 error2: 1072 xfs_ihash_free(mp); 1073 xfs_chash_free(mp); 1074 for (agno = 0; agno < sbp->sb_agcount; agno++) 1075 if (mp->m_perag[agno].pagb_list) 1076 kmem_free(mp->m_perag[agno].pagb_list, 1077 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1078 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t)); 1079 mp->m_perag = NULL; 1080 /* FALLTHROUGH */ 1081 error1: 1082 if (uuid_mounted) 1083 xfs_uuid_unmount(mp); 1084 xfs_freesb(mp); 1085 return error; 1086 } 1087 1088 /* 1089 * xfs_unmountfs 1090 * 1091 * This flushes out the inodes,dquots and the superblock, unmounts the 1092 * log and makes sure that incore structures are freed. 1093 */ 1094 int 1095 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1096 { 1097 struct vfs *vfsp = XFS_MTOVFS(mp); 1098 #if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1099 int64_t fsid; 1100 #endif 1101 1102 xfs_iflush_all(mp); 1103 1104 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1105 1106 /* 1107 * Flush out the log synchronously so that we know for sure 1108 * that nothing is pinned. This is important because bflush() 1109 * will skip pinned buffers. 1110 */ 1111 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1112 1113 xfs_binval(mp->m_ddev_targp); 1114 if (mp->m_rtdev_targp) { 1115 xfs_binval(mp->m_rtdev_targp); 1116 } 1117 1118 xfs_unmountfs_writesb(mp); 1119 1120 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1121 1122 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1123 1124 xfs_freesb(mp); 1125 1126 /* 1127 * All inodes from this mount point should be freed. 1128 */ 1129 ASSERT(mp->m_inodes == NULL); 1130 1131 xfs_unmountfs_close(mp, cr); 1132 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1133 xfs_uuid_unmount(mp); 1134 1135 #if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1136 /* 1137 * clear all error tags on this filesystem 1138 */ 1139 memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t)); 1140 xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0); 1141 #endif 1142 XFS_IODONE(vfsp); 1143 xfs_mount_free(mp, 1); 1144 return 0; 1145 } 1146 1147 void 1148 xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) 1149 { 1150 if (mp->m_logdev_targp != mp->m_ddev_targp) 1151 xfs_free_buftarg(mp->m_logdev_targp, 1); 1152 if (mp->m_rtdev_targp) 1153 xfs_free_buftarg(mp->m_rtdev_targp, 1); 1154 xfs_free_buftarg(mp->m_ddev_targp, 0); 1155 } 1156 1157 STATIC void 1158 xfs_unmountfs_wait(xfs_mount_t *mp) 1159 { 1160 if (mp->m_logdev_targp != mp->m_ddev_targp) 1161 xfs_wait_buftarg(mp->m_logdev_targp); 1162 if (mp->m_rtdev_targp) 1163 xfs_wait_buftarg(mp->m_rtdev_targp); 1164 xfs_wait_buftarg(mp->m_ddev_targp); 1165 } 1166 1167 int 1168 xfs_unmountfs_writesb(xfs_mount_t *mp) 1169 { 1170 xfs_buf_t *sbp; 1171 xfs_sb_t *sb; 1172 int error = 0; 1173 1174 /* 1175 * skip superblock write if fs is read-only, or 1176 * if we are doing a forced umount. 1177 */ 1178 sbp = xfs_getsb(mp, 0); 1179 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1180 XFS_FORCED_SHUTDOWN(mp))) { 1181 1182 xfs_icsb_sync_counters(mp); 1183 1184 /* 1185 * mark shared-readonly if desired 1186 */ 1187 sb = XFS_BUF_TO_SBP(sbp); 1188 if (mp->m_mk_sharedro) { 1189 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1190 sb->sb_flags |= XFS_SBF_READONLY; 1191 if (!XFS_SB_VERSION_HASSHARED(sb)) 1192 XFS_SB_VERSION_ADDSHARED(sb); 1193 xfs_fs_cmn_err(CE_NOTE, mp, 1194 "Unmounting, marking shared read-only"); 1195 } 1196 XFS_BUF_UNDONE(sbp); 1197 XFS_BUF_UNREAD(sbp); 1198 XFS_BUF_UNDELAYWRITE(sbp); 1199 XFS_BUF_WRITE(sbp); 1200 XFS_BUF_UNASYNC(sbp); 1201 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1202 xfsbdstrat(mp, sbp); 1203 /* Nevermind errors we might get here. */ 1204 error = xfs_iowait(sbp); 1205 if (error) 1206 xfs_ioerror_alert("xfs_unmountfs_writesb", 1207 mp, sbp, XFS_BUF_ADDR(sbp)); 1208 if (error && mp->m_mk_sharedro) 1209 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1210 } 1211 xfs_buf_relse(sbp); 1212 return error; 1213 } 1214 1215 /* 1216 * xfs_mod_sb() can be used to copy arbitrary changes to the 1217 * in-core superblock into the superblock buffer to be logged. 1218 * It does not provide the higher level of locking that is 1219 * needed to protect the in-core superblock from concurrent 1220 * access. 1221 */ 1222 void 1223 xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) 1224 { 1225 xfs_buf_t *bp; 1226 int first; 1227 int last; 1228 xfs_mount_t *mp; 1229 xfs_sb_t *sbp; 1230 xfs_sb_field_t f; 1231 1232 ASSERT(fields); 1233 if (!fields) 1234 return; 1235 mp = tp->t_mountp; 1236 bp = xfs_trans_getsb(tp, mp, 0); 1237 sbp = XFS_BUF_TO_SBP(bp); 1238 first = sizeof(xfs_sb_t); 1239 last = 0; 1240 1241 /* translate/copy */ 1242 1243 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields); 1244 1245 /* find modified range */ 1246 1247 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1248 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1249 first = xfs_sb_info[f].offset; 1250 1251 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); 1252 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1253 last = xfs_sb_info[f + 1].offset - 1; 1254 1255 xfs_trans_log_buf(tp, bp, first, last); 1256 } 1257 /* 1258 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1259 * a delta to a specified field in the in-core superblock. Simply 1260 * switch on the field indicated and apply the delta to that field. 1261 * Fields are not allowed to dip below zero, so if the delta would 1262 * do this do not apply it and return EINVAL. 1263 * 1264 * The SB_LOCK must be held when this routine is called. 1265 */ 1266 int 1267 xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, 1268 int delta, int rsvd) 1269 { 1270 int scounter; /* short counter for 32 bit fields */ 1271 long long lcounter; /* long counter for 64 bit fields */ 1272 long long res_used, rem; 1273 1274 /* 1275 * With the in-core superblock spin lock held, switch 1276 * on the indicated field. Apply the delta to the 1277 * proper field. If the fields value would dip below 1278 * 0, then do not apply the delta and return EINVAL. 1279 */ 1280 switch (field) { 1281 case XFS_SBS_ICOUNT: 1282 lcounter = (long long)mp->m_sb.sb_icount; 1283 lcounter += delta; 1284 if (lcounter < 0) { 1285 ASSERT(0); 1286 return XFS_ERROR(EINVAL); 1287 } 1288 mp->m_sb.sb_icount = lcounter; 1289 return 0; 1290 case XFS_SBS_IFREE: 1291 lcounter = (long long)mp->m_sb.sb_ifree; 1292 lcounter += delta; 1293 if (lcounter < 0) { 1294 ASSERT(0); 1295 return XFS_ERROR(EINVAL); 1296 } 1297 mp->m_sb.sb_ifree = lcounter; 1298 return 0; 1299 case XFS_SBS_FDBLOCKS: 1300 1301 lcounter = (long long)mp->m_sb.sb_fdblocks; 1302 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1303 1304 if (delta > 0) { /* Putting blocks back */ 1305 if (res_used > delta) { 1306 mp->m_resblks_avail += delta; 1307 } else { 1308 rem = delta - res_used; 1309 mp->m_resblks_avail = mp->m_resblks; 1310 lcounter += rem; 1311 } 1312 } else { /* Taking blocks away */ 1313 1314 lcounter += delta; 1315 1316 /* 1317 * If were out of blocks, use any available reserved blocks if 1318 * were allowed to. 1319 */ 1320 1321 if (lcounter < 0) { 1322 if (rsvd) { 1323 lcounter = (long long)mp->m_resblks_avail + delta; 1324 if (lcounter < 0) { 1325 return XFS_ERROR(ENOSPC); 1326 } 1327 mp->m_resblks_avail = lcounter; 1328 return 0; 1329 } else { /* not reserved */ 1330 return XFS_ERROR(ENOSPC); 1331 } 1332 } 1333 } 1334 1335 mp->m_sb.sb_fdblocks = lcounter; 1336 return 0; 1337 case XFS_SBS_FREXTENTS: 1338 lcounter = (long long)mp->m_sb.sb_frextents; 1339 lcounter += delta; 1340 if (lcounter < 0) { 1341 return XFS_ERROR(ENOSPC); 1342 } 1343 mp->m_sb.sb_frextents = lcounter; 1344 return 0; 1345 case XFS_SBS_DBLOCKS: 1346 lcounter = (long long)mp->m_sb.sb_dblocks; 1347 lcounter += delta; 1348 if (lcounter < 0) { 1349 ASSERT(0); 1350 return XFS_ERROR(EINVAL); 1351 } 1352 mp->m_sb.sb_dblocks = lcounter; 1353 return 0; 1354 case XFS_SBS_AGCOUNT: 1355 scounter = mp->m_sb.sb_agcount; 1356 scounter += delta; 1357 if (scounter < 0) { 1358 ASSERT(0); 1359 return XFS_ERROR(EINVAL); 1360 } 1361 mp->m_sb.sb_agcount = scounter; 1362 return 0; 1363 case XFS_SBS_IMAX_PCT: 1364 scounter = mp->m_sb.sb_imax_pct; 1365 scounter += delta; 1366 if (scounter < 0) { 1367 ASSERT(0); 1368 return XFS_ERROR(EINVAL); 1369 } 1370 mp->m_sb.sb_imax_pct = scounter; 1371 return 0; 1372 case XFS_SBS_REXTSIZE: 1373 scounter = mp->m_sb.sb_rextsize; 1374 scounter += delta; 1375 if (scounter < 0) { 1376 ASSERT(0); 1377 return XFS_ERROR(EINVAL); 1378 } 1379 mp->m_sb.sb_rextsize = scounter; 1380 return 0; 1381 case XFS_SBS_RBMBLOCKS: 1382 scounter = mp->m_sb.sb_rbmblocks; 1383 scounter += delta; 1384 if (scounter < 0) { 1385 ASSERT(0); 1386 return XFS_ERROR(EINVAL); 1387 } 1388 mp->m_sb.sb_rbmblocks = scounter; 1389 return 0; 1390 case XFS_SBS_RBLOCKS: 1391 lcounter = (long long)mp->m_sb.sb_rblocks; 1392 lcounter += delta; 1393 if (lcounter < 0) { 1394 ASSERT(0); 1395 return XFS_ERROR(EINVAL); 1396 } 1397 mp->m_sb.sb_rblocks = lcounter; 1398 return 0; 1399 case XFS_SBS_REXTENTS: 1400 lcounter = (long long)mp->m_sb.sb_rextents; 1401 lcounter += delta; 1402 if (lcounter < 0) { 1403 ASSERT(0); 1404 return XFS_ERROR(EINVAL); 1405 } 1406 mp->m_sb.sb_rextents = lcounter; 1407 return 0; 1408 case XFS_SBS_REXTSLOG: 1409 scounter = mp->m_sb.sb_rextslog; 1410 scounter += delta; 1411 if (scounter < 0) { 1412 ASSERT(0); 1413 return XFS_ERROR(EINVAL); 1414 } 1415 mp->m_sb.sb_rextslog = scounter; 1416 return 0; 1417 default: 1418 ASSERT(0); 1419 return XFS_ERROR(EINVAL); 1420 } 1421 } 1422 1423 /* 1424 * xfs_mod_incore_sb() is used to change a field in the in-core 1425 * superblock structure by the specified delta. This modification 1426 * is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked() 1427 * routine to do the work. 1428 */ 1429 int 1430 xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) 1431 { 1432 unsigned long s; 1433 int status; 1434 1435 /* check for per-cpu counters */ 1436 switch (field) { 1437 #ifdef HAVE_PERCPU_SB 1438 case XFS_SBS_ICOUNT: 1439 case XFS_SBS_IFREE: 1440 case XFS_SBS_FDBLOCKS: 1441 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1442 status = xfs_icsb_modify_counters(mp, field, 1443 delta, rsvd); 1444 break; 1445 } 1446 /* FALLTHROUGH */ 1447 #endif 1448 default: 1449 s = XFS_SB_LOCK(mp); 1450 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1451 XFS_SB_UNLOCK(mp, s); 1452 break; 1453 } 1454 1455 return status; 1456 } 1457 1458 /* 1459 * xfs_mod_incore_sb_batch() is used to change more than one field 1460 * in the in-core superblock structure at a time. This modification 1461 * is protected by a lock internal to this module. The fields and 1462 * changes to those fields are specified in the array of xfs_mod_sb 1463 * structures passed in. 1464 * 1465 * Either all of the specified deltas will be applied or none of 1466 * them will. If any modified field dips below 0, then all modifications 1467 * will be backed out and EINVAL will be returned. 1468 */ 1469 int 1470 xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) 1471 { 1472 unsigned long s; 1473 int status=0; 1474 xfs_mod_sb_t *msbp; 1475 1476 /* 1477 * Loop through the array of mod structures and apply each 1478 * individually. If any fail, then back out all those 1479 * which have already been applied. Do all of this within 1480 * the scope of the SB_LOCK so that all of the changes will 1481 * be atomic. 1482 */ 1483 s = XFS_SB_LOCK(mp); 1484 msbp = &msb[0]; 1485 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1486 /* 1487 * Apply the delta at index n. If it fails, break 1488 * from the loop so we'll fall into the undo loop 1489 * below. 1490 */ 1491 switch (msbp->msb_field) { 1492 #ifdef HAVE_PERCPU_SB 1493 case XFS_SBS_ICOUNT: 1494 case XFS_SBS_IFREE: 1495 case XFS_SBS_FDBLOCKS: 1496 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1497 status = xfs_icsb_modify_counters_locked(mp, 1498 msbp->msb_field, 1499 msbp->msb_delta, rsvd); 1500 break; 1501 } 1502 /* FALLTHROUGH */ 1503 #endif 1504 default: 1505 status = xfs_mod_incore_sb_unlocked(mp, 1506 msbp->msb_field, 1507 msbp->msb_delta, rsvd); 1508 break; 1509 } 1510 1511 if (status != 0) { 1512 break; 1513 } 1514 } 1515 1516 /* 1517 * If we didn't complete the loop above, then back out 1518 * any changes made to the superblock. If you add code 1519 * between the loop above and here, make sure that you 1520 * preserve the value of status. Loop back until 1521 * we step below the beginning of the array. Make sure 1522 * we don't touch anything back there. 1523 */ 1524 if (status != 0) { 1525 msbp--; 1526 while (msbp >= msb) { 1527 switch (msbp->msb_field) { 1528 #ifdef HAVE_PERCPU_SB 1529 case XFS_SBS_ICOUNT: 1530 case XFS_SBS_IFREE: 1531 case XFS_SBS_FDBLOCKS: 1532 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1533 status = 1534 xfs_icsb_modify_counters_locked(mp, 1535 msbp->msb_field, 1536 -(msbp->msb_delta), 1537 rsvd); 1538 break; 1539 } 1540 /* FALLTHROUGH */ 1541 #endif 1542 default: 1543 status = xfs_mod_incore_sb_unlocked(mp, 1544 msbp->msb_field, 1545 -(msbp->msb_delta), 1546 rsvd); 1547 break; 1548 } 1549 ASSERT(status == 0); 1550 msbp--; 1551 } 1552 } 1553 XFS_SB_UNLOCK(mp, s); 1554 return status; 1555 } 1556 1557 /* 1558 * xfs_getsb() is called to obtain the buffer for the superblock. 1559 * The buffer is returned locked and read in from disk. 1560 * The buffer should be released with a call to xfs_brelse(). 1561 * 1562 * If the flags parameter is BUF_TRYLOCK, then we'll only return 1563 * the superblock buffer if it can be locked without sleeping. 1564 * If it can't then we'll return NULL. 1565 */ 1566 xfs_buf_t * 1567 xfs_getsb( 1568 xfs_mount_t *mp, 1569 int flags) 1570 { 1571 xfs_buf_t *bp; 1572 1573 ASSERT(mp->m_sb_bp != NULL); 1574 bp = mp->m_sb_bp; 1575 if (flags & XFS_BUF_TRYLOCK) { 1576 if (!XFS_BUF_CPSEMA(bp)) { 1577 return NULL; 1578 } 1579 } else { 1580 XFS_BUF_PSEMA(bp, PRIBIO); 1581 } 1582 XFS_BUF_HOLD(bp); 1583 ASSERT(XFS_BUF_ISDONE(bp)); 1584 return bp; 1585 } 1586 1587 /* 1588 * Used to free the superblock along various error paths. 1589 */ 1590 void 1591 xfs_freesb( 1592 xfs_mount_t *mp) 1593 { 1594 xfs_buf_t *bp; 1595 1596 /* 1597 * Use xfs_getsb() so that the buffer will be locked 1598 * when we call xfs_buf_relse(). 1599 */ 1600 bp = xfs_getsb(mp, 0); 1601 XFS_BUF_UNMANAGE(bp); 1602 xfs_buf_relse(bp); 1603 mp->m_sb_bp = NULL; 1604 } 1605 1606 /* 1607 * See if the UUID is unique among mounted XFS filesystems. 1608 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 1609 */ 1610 STATIC int 1611 xfs_uuid_mount( 1612 xfs_mount_t *mp) 1613 { 1614 if (uuid_is_nil(&mp->m_sb.sb_uuid)) { 1615 cmn_err(CE_WARN, 1616 "XFS: Filesystem %s has nil UUID - can't mount", 1617 mp->m_fsname); 1618 return -1; 1619 } 1620 if (!uuid_table_insert(&mp->m_sb.sb_uuid)) { 1621 cmn_err(CE_WARN, 1622 "XFS: Filesystem %s has duplicate UUID - can't mount", 1623 mp->m_fsname); 1624 return -1; 1625 } 1626 return 0; 1627 } 1628 1629 /* 1630 * Remove filesystem from the UUID table. 1631 */ 1632 STATIC void 1633 xfs_uuid_unmount( 1634 xfs_mount_t *mp) 1635 { 1636 uuid_table_remove(&mp->m_sb.sb_uuid); 1637 } 1638 1639 /* 1640 * Used to log changes to the superblock unit and width fields which could 1641 * be altered by the mount options. Only the first superblock is updated. 1642 */ 1643 STATIC void 1644 xfs_mount_log_sbunit( 1645 xfs_mount_t *mp, 1646 __int64_t fields) 1647 { 1648 xfs_trans_t *tp; 1649 1650 ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID)); 1651 1652 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1653 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1654 XFS_DEFAULT_LOG_COUNT)) { 1655 xfs_trans_cancel(tp, 0); 1656 return; 1657 } 1658 xfs_mod_sb(tp, fields); 1659 xfs_trans_commit(tp, 0, NULL); 1660 } 1661 1662 1663 #ifdef HAVE_PERCPU_SB 1664 /* 1665 * Per-cpu incore superblock counters 1666 * 1667 * Simple concept, difficult implementation 1668 * 1669 * Basically, replace the incore superblock counters with a distributed per cpu 1670 * counter for contended fields (e.g. free block count). 1671 * 1672 * Difficulties arise in that the incore sb is used for ENOSPC checking, and 1673 * hence needs to be accurately read when we are running low on space. Hence 1674 * there is a method to enable and disable the per-cpu counters based on how 1675 * much "stuff" is available in them. 1676 * 1677 * Basically, a counter is enabled if there is enough free resource to justify 1678 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local 1679 * ENOSPC), then we disable the counters to synchronise all callers and 1680 * re-distribute the available resources. 1681 * 1682 * If, once we redistributed the available resources, we still get a failure, 1683 * we disable the per-cpu counter and go through the slow path. 1684 * 1685 * The slow path is the current xfs_mod_incore_sb() function. This means that 1686 * when we disable a per-cpu counter, we need to drain it's resources back to 1687 * the global superblock. We do this after disabling the counter to prevent 1688 * more threads from queueing up on the counter. 1689 * 1690 * Essentially, this means that we still need a lock in the fast path to enable 1691 * synchronisation between the global counters and the per-cpu counters. This 1692 * is not a problem because the lock will be local to a CPU almost all the time 1693 * and have little contention except when we get to ENOSPC conditions. 1694 * 1695 * Basically, this lock becomes a barrier that enables us to lock out the fast 1696 * path while we do things like enabling and disabling counters and 1697 * synchronising the counters. 1698 * 1699 * Locking rules: 1700 * 1701 * 1. XFS_SB_LOCK() before picking up per-cpu locks 1702 * 2. per-cpu locks always picked up via for_each_online_cpu() order 1703 * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks 1704 * 4. modifying per-cpu counters requires holding per-cpu lock 1705 * 5. modifying global counters requires holding XFS_SB_LOCK 1706 * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK 1707 * and _none_ of the per-cpu locks. 1708 * 1709 * Disabled counters are only ever re-enabled by a balance operation 1710 * that results in more free resources per CPU than a given threshold. 1711 * To ensure counters don't remain disabled, they are rebalanced when 1712 * the global resource goes above a higher threshold (i.e. some hysteresis 1713 * is present to prevent thrashing). 1714 */ 1715 1716 /* 1717 * hot-plug CPU notifier support. 1718 * 1719 * We cannot use the hotcpu_register() function because it does 1720 * not allow notifier instances. We need a notifier per filesystem 1721 * as we need to be able to identify the filesystem to balance 1722 * the counters out. This is achieved by having a notifier block 1723 * embedded in the xfs_mount_t and doing pointer magic to get the 1724 * mount pointer from the notifier block address. 1725 */ 1726 STATIC int 1727 xfs_icsb_cpu_notify( 1728 struct notifier_block *nfb, 1729 unsigned long action, 1730 void *hcpu) 1731 { 1732 xfs_icsb_cnts_t *cntp; 1733 xfs_mount_t *mp; 1734 int s; 1735 1736 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); 1737 cntp = (xfs_icsb_cnts_t *) 1738 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 1739 switch (action) { 1740 case CPU_UP_PREPARE: 1741 /* Easy Case - initialize the area and locks, and 1742 * then rebalance when online does everything else for us. */ 1743 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1744 break; 1745 case CPU_ONLINE: 1746 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); 1747 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); 1748 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); 1749 break; 1750 case CPU_DEAD: 1751 /* Disable all the counters, then fold the dead cpu's 1752 * count into the total on the global superblock and 1753 * re-enable the counters. */ 1754 s = XFS_SB_LOCK(mp); 1755 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 1756 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 1757 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); 1758 1759 mp->m_sb.sb_icount += cntp->icsb_icount; 1760 mp->m_sb.sb_ifree += cntp->icsb_ifree; 1761 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; 1762 1763 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1764 1765 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED); 1766 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED); 1767 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED); 1768 XFS_SB_UNLOCK(mp, s); 1769 break; 1770 } 1771 1772 return NOTIFY_OK; 1773 } 1774 1775 int 1776 xfs_icsb_init_counters( 1777 xfs_mount_t *mp) 1778 { 1779 xfs_icsb_cnts_t *cntp; 1780 int i; 1781 1782 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); 1783 if (mp->m_sb_cnts == NULL) 1784 return -ENOMEM; 1785 1786 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1787 mp->m_icsb_notifier.priority = 0; 1788 register_cpu_notifier(&mp->m_icsb_notifier); 1789 1790 for_each_online_cpu(i) { 1791 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1792 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1793 } 1794 /* 1795 * start with all counters disabled so that the 1796 * initial balance kicks us off correctly 1797 */ 1798 mp->m_icsb_counters = -1; 1799 return 0; 1800 } 1801 1802 STATIC void 1803 xfs_icsb_destroy_counters( 1804 xfs_mount_t *mp) 1805 { 1806 if (mp->m_sb_cnts) { 1807 unregister_cpu_notifier(&mp->m_icsb_notifier); 1808 free_percpu(mp->m_sb_cnts); 1809 } 1810 } 1811 1812 STATIC inline void 1813 xfs_icsb_lock_cntr( 1814 xfs_icsb_cnts_t *icsbp) 1815 { 1816 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { 1817 ndelay(1000); 1818 } 1819 } 1820 1821 STATIC inline void 1822 xfs_icsb_unlock_cntr( 1823 xfs_icsb_cnts_t *icsbp) 1824 { 1825 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); 1826 } 1827 1828 1829 STATIC inline void 1830 xfs_icsb_lock_all_counters( 1831 xfs_mount_t *mp) 1832 { 1833 xfs_icsb_cnts_t *cntp; 1834 int i; 1835 1836 for_each_online_cpu(i) { 1837 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1838 xfs_icsb_lock_cntr(cntp); 1839 } 1840 } 1841 1842 STATIC inline void 1843 xfs_icsb_unlock_all_counters( 1844 xfs_mount_t *mp) 1845 { 1846 xfs_icsb_cnts_t *cntp; 1847 int i; 1848 1849 for_each_online_cpu(i) { 1850 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1851 xfs_icsb_unlock_cntr(cntp); 1852 } 1853 } 1854 1855 STATIC void 1856 xfs_icsb_count( 1857 xfs_mount_t *mp, 1858 xfs_icsb_cnts_t *cnt, 1859 int flags) 1860 { 1861 xfs_icsb_cnts_t *cntp; 1862 int i; 1863 1864 memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); 1865 1866 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1867 xfs_icsb_lock_all_counters(mp); 1868 1869 for_each_online_cpu(i) { 1870 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1871 cnt->icsb_icount += cntp->icsb_icount; 1872 cnt->icsb_ifree += cntp->icsb_ifree; 1873 cnt->icsb_fdblocks += cntp->icsb_fdblocks; 1874 } 1875 1876 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1877 xfs_icsb_unlock_all_counters(mp); 1878 } 1879 1880 STATIC int 1881 xfs_icsb_counter_disabled( 1882 xfs_mount_t *mp, 1883 xfs_sb_field_t field) 1884 { 1885 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1886 return test_bit(field, &mp->m_icsb_counters); 1887 } 1888 1889 STATIC int 1890 xfs_icsb_disable_counter( 1891 xfs_mount_t *mp, 1892 xfs_sb_field_t field) 1893 { 1894 xfs_icsb_cnts_t cnt; 1895 1896 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1897 1898 xfs_icsb_lock_all_counters(mp); 1899 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 1900 /* drain back to superblock */ 1901 1902 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 1903 switch(field) { 1904 case XFS_SBS_ICOUNT: 1905 mp->m_sb.sb_icount = cnt.icsb_icount; 1906 break; 1907 case XFS_SBS_IFREE: 1908 mp->m_sb.sb_ifree = cnt.icsb_ifree; 1909 break; 1910 case XFS_SBS_FDBLOCKS: 1911 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 1912 break; 1913 default: 1914 BUG(); 1915 } 1916 } 1917 1918 xfs_icsb_unlock_all_counters(mp); 1919 1920 return 0; 1921 } 1922 1923 STATIC void 1924 xfs_icsb_enable_counter( 1925 xfs_mount_t *mp, 1926 xfs_sb_field_t field, 1927 uint64_t count, 1928 uint64_t resid) 1929 { 1930 xfs_icsb_cnts_t *cntp; 1931 int i; 1932 1933 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1934 1935 xfs_icsb_lock_all_counters(mp); 1936 for_each_online_cpu(i) { 1937 cntp = per_cpu_ptr(mp->m_sb_cnts, i); 1938 switch (field) { 1939 case XFS_SBS_ICOUNT: 1940 cntp->icsb_icount = count + resid; 1941 break; 1942 case XFS_SBS_IFREE: 1943 cntp->icsb_ifree = count + resid; 1944 break; 1945 case XFS_SBS_FDBLOCKS: 1946 cntp->icsb_fdblocks = count + resid; 1947 break; 1948 default: 1949 BUG(); 1950 break; 1951 } 1952 resid = 0; 1953 } 1954 clear_bit(field, &mp->m_icsb_counters); 1955 xfs_icsb_unlock_all_counters(mp); 1956 } 1957 1958 STATIC void 1959 xfs_icsb_sync_counters_int( 1960 xfs_mount_t *mp, 1961 int flags) 1962 { 1963 xfs_icsb_cnts_t cnt; 1964 int s; 1965 1966 /* Pass 1: lock all counters */ 1967 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 1968 s = XFS_SB_LOCK(mp); 1969 1970 xfs_icsb_count(mp, &cnt, flags); 1971 1972 /* Step 3: update mp->m_sb fields */ 1973 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 1974 mp->m_sb.sb_icount = cnt.icsb_icount; 1975 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 1976 mp->m_sb.sb_ifree = cnt.icsb_ifree; 1977 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 1978 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 1979 1980 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 1981 XFS_SB_UNLOCK(mp, s); 1982 } 1983 1984 /* 1985 * Accurate update of per-cpu counters to incore superblock 1986 */ 1987 STATIC void 1988 xfs_icsb_sync_counters( 1989 xfs_mount_t *mp) 1990 { 1991 xfs_icsb_sync_counters_int(mp, 0); 1992 } 1993 1994 /* 1995 * lazy addition used for things like df, background sb syncs, etc 1996 */ 1997 void 1998 xfs_icsb_sync_counters_lazy( 1999 xfs_mount_t *mp) 2000 { 2001 xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT); 2002 } 2003 2004 /* 2005 * Balance and enable/disable counters as necessary. 2006 * 2007 * Thresholds for re-enabling counters are somewhat magic. 2008 * inode counts are chosen to be the same number as single 2009 * on disk allocation chunk per CPU, and free blocks is 2010 * something far enough zero that we aren't going thrash 2011 * when we get near ENOSPC. 2012 */ 2013 #define XFS_ICSB_INO_CNTR_REENABLE 64 2014 #define XFS_ICSB_FDBLK_CNTR_REENABLE 512 2015 STATIC void 2016 xfs_icsb_balance_counter( 2017 xfs_mount_t *mp, 2018 xfs_sb_field_t field, 2019 int flags) 2020 { 2021 uint64_t count, resid = 0; 2022 int weight = num_online_cpus(); 2023 int s; 2024 2025 if (!(flags & XFS_ICSB_SB_LOCKED)) 2026 s = XFS_SB_LOCK(mp); 2027 2028 /* disable counter and sync counter */ 2029 xfs_icsb_disable_counter(mp, field); 2030 2031 /* update counters - first CPU gets residual*/ 2032 switch (field) { 2033 case XFS_SBS_ICOUNT: 2034 count = mp->m_sb.sb_icount; 2035 resid = do_div(count, weight); 2036 if (count < XFS_ICSB_INO_CNTR_REENABLE) 2037 goto out; 2038 break; 2039 case XFS_SBS_IFREE: 2040 count = mp->m_sb.sb_ifree; 2041 resid = do_div(count, weight); 2042 if (count < XFS_ICSB_INO_CNTR_REENABLE) 2043 goto out; 2044 break; 2045 case XFS_SBS_FDBLOCKS: 2046 count = mp->m_sb.sb_fdblocks; 2047 resid = do_div(count, weight); 2048 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) 2049 goto out; 2050 break; 2051 default: 2052 BUG(); 2053 break; 2054 } 2055 2056 xfs_icsb_enable_counter(mp, field, count, resid); 2057 out: 2058 if (!(flags & XFS_ICSB_SB_LOCKED)) 2059 XFS_SB_UNLOCK(mp, s); 2060 } 2061 2062 STATIC int 2063 xfs_icsb_modify_counters_int( 2064 xfs_mount_t *mp, 2065 xfs_sb_field_t field, 2066 int delta, 2067 int rsvd, 2068 int flags) 2069 { 2070 xfs_icsb_cnts_t *icsbp; 2071 long long lcounter; /* long counter for 64 bit fields */ 2072 int cpu, s, locked = 0; 2073 int ret = 0, balance_done = 0; 2074 2075 again: 2076 cpu = get_cpu(); 2077 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu), 2078 xfs_icsb_lock_cntr(icsbp); 2079 if (unlikely(xfs_icsb_counter_disabled(mp, field))) 2080 goto slow_path; 2081 2082 switch (field) { 2083 case XFS_SBS_ICOUNT: 2084 lcounter = icsbp->icsb_icount; 2085 lcounter += delta; 2086 if (unlikely(lcounter < 0)) 2087 goto slow_path; 2088 icsbp->icsb_icount = lcounter; 2089 break; 2090 2091 case XFS_SBS_IFREE: 2092 lcounter = icsbp->icsb_ifree; 2093 lcounter += delta; 2094 if (unlikely(lcounter < 0)) 2095 goto slow_path; 2096 icsbp->icsb_ifree = lcounter; 2097 break; 2098 2099 case XFS_SBS_FDBLOCKS: 2100 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2101 2102 lcounter = icsbp->icsb_fdblocks; 2103 lcounter += delta; 2104 if (unlikely(lcounter < 0)) 2105 goto slow_path; 2106 icsbp->icsb_fdblocks = lcounter; 2107 break; 2108 default: 2109 BUG(); 2110 break; 2111 } 2112 xfs_icsb_unlock_cntr(icsbp); 2113 put_cpu(); 2114 if (locked) 2115 XFS_SB_UNLOCK(mp, s); 2116 return 0; 2117 2118 /* 2119 * The slow path needs to be run with the SBLOCK 2120 * held so that we prevent other threads from 2121 * attempting to run this path at the same time. 2122 * this provides exclusion for the balancing code, 2123 * and exclusive fallback if the balance does not 2124 * provide enough resources to continue in an unlocked 2125 * manner. 2126 */ 2127 slow_path: 2128 xfs_icsb_unlock_cntr(icsbp); 2129 put_cpu(); 2130 2131 /* need to hold superblock incase we need 2132 * to disable a counter */ 2133 if (!(flags & XFS_ICSB_SB_LOCKED)) { 2134 s = XFS_SB_LOCK(mp); 2135 locked = 1; 2136 flags |= XFS_ICSB_SB_LOCKED; 2137 } 2138 if (!balance_done) { 2139 xfs_icsb_balance_counter(mp, field, flags); 2140 balance_done = 1; 2141 goto again; 2142 } else { 2143 /* 2144 * we might not have enough on this local 2145 * cpu to allocate for a bulk request. 2146 * We need to drain this field from all CPUs 2147 * and disable the counter fastpath 2148 */ 2149 xfs_icsb_disable_counter(mp, field); 2150 } 2151 2152 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2153 2154 if (locked) 2155 XFS_SB_UNLOCK(mp, s); 2156 return ret; 2157 } 2158 2159 STATIC int 2160 xfs_icsb_modify_counters( 2161 xfs_mount_t *mp, 2162 xfs_sb_field_t field, 2163 int delta, 2164 int rsvd) 2165 { 2166 return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0); 2167 } 2168 2169 /* 2170 * Called when superblock is already locked 2171 */ 2172 STATIC int 2173 xfs_icsb_modify_counters_locked( 2174 xfs_mount_t *mp, 2175 xfs_sb_field_t field, 2176 int delta, 2177 int rsvd) 2178 { 2179 return xfs_icsb_modify_counters_int(mp, field, delta, 2180 rsvd, XFS_ICSB_SB_LOCKED); 2181 } 2182 #endif 2183