1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_alloc.h" 17 #include "xfs_error.h" 18 #include "xfs_trans.h" 19 #include "xfs_buf_item.h" 20 #include "xfs_bmap_btree.h" 21 #include "xfs_alloc_btree.h" 22 #include "xfs_log.h" 23 #include "xfs_rmap_btree.h" 24 #include "xfs_refcount_btree.h" 25 #include "xfs_da_format.h" 26 #include "xfs_health.h" 27 #include "xfs_ag.h" 28 #include "xfs_rtbitmap.h" 29 #include "xfs_exchrange.h" 30 31 /* 32 * Physical superblock buffer manipulations. Shared with libxfs in userspace. 33 */ 34 35 /* 36 * Check that all the V4 feature bits that the V5 filesystem format requires are 37 * correctly set. 38 */ 39 static bool 40 xfs_sb_validate_v5_features( 41 struct xfs_sb *sbp) 42 { 43 /* We must not have any unknown V4 feature bits set */ 44 if (sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) 45 return false; 46 47 /* 48 * The CRC bit is considered an invalid V4 flag, so we have to add it 49 * manually to the OKBITS mask. 50 */ 51 if (sbp->sb_features2 & ~(XFS_SB_VERSION2_OKBITS | 52 XFS_SB_VERSION2_CRCBIT)) 53 return false; 54 55 /* Now check all the required V4 feature flags are set. */ 56 57 #define V5_VERS_FLAGS (XFS_SB_VERSION_NLINKBIT | \ 58 XFS_SB_VERSION_ALIGNBIT | \ 59 XFS_SB_VERSION_LOGV2BIT | \ 60 XFS_SB_VERSION_EXTFLGBIT | \ 61 XFS_SB_VERSION_DIRV2BIT | \ 62 XFS_SB_VERSION_MOREBITSBIT) 63 64 #define V5_FEAT_FLAGS (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ 65 XFS_SB_VERSION2_ATTR2BIT | \ 66 XFS_SB_VERSION2_PROJID32BIT | \ 67 XFS_SB_VERSION2_CRCBIT) 68 69 if ((sbp->sb_versionnum & V5_VERS_FLAGS) != V5_VERS_FLAGS) 70 return false; 71 if ((sbp->sb_features2 & V5_FEAT_FLAGS) != V5_FEAT_FLAGS) 72 return false; 73 return true; 74 } 75 76 /* 77 * We current support XFS v5 formats with known features and v4 superblocks with 78 * at least V2 directories. 79 */ 80 bool 81 xfs_sb_good_version( 82 struct xfs_sb *sbp) 83 { 84 /* 85 * All v5 filesystems are supported, but we must check that all the 86 * required v4 feature flags are enabled correctly as the code checks 87 * those flags and not for v5 support. 88 */ 89 if (xfs_sb_is_v5(sbp)) 90 return xfs_sb_validate_v5_features(sbp); 91 92 /* versions prior to v4 are not supported */ 93 if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_4) 94 return false; 95 96 /* We must not have any unknown v4 feature bits set */ 97 if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || 98 ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && 99 (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) 100 return false; 101 102 /* V4 filesystems need v2 directories and unwritten extents */ 103 if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) 104 return false; 105 if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) 106 return false; 107 108 /* It's a supported v4 filesystem */ 109 return true; 110 } 111 112 uint64_t 113 xfs_sb_version_to_features( 114 struct xfs_sb *sbp) 115 { 116 uint64_t features = 0; 117 118 /* optional V4 features */ 119 if (sbp->sb_rblocks > 0) 120 features |= XFS_FEAT_REALTIME; 121 if (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT) 122 features |= XFS_FEAT_NLINK; 123 if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT) 124 features |= XFS_FEAT_ATTR; 125 if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT) 126 features |= XFS_FEAT_QUOTA; 127 if (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT) 128 features |= XFS_FEAT_ALIGN; 129 if (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT) 130 features |= XFS_FEAT_LOGV2; 131 if (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT) 132 features |= XFS_FEAT_DALIGN; 133 if (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT) 134 features |= XFS_FEAT_EXTFLG; 135 if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) 136 features |= XFS_FEAT_SECTOR; 137 if (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT) 138 features |= XFS_FEAT_ASCIICI; 139 if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) { 140 if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT) 141 features |= XFS_FEAT_LAZYSBCOUNT; 142 if (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT) 143 features |= XFS_FEAT_ATTR2; 144 if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT) 145 features |= XFS_FEAT_PROJID32; 146 if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE) 147 features |= XFS_FEAT_FTYPE; 148 } 149 150 if (!xfs_sb_is_v5(sbp)) 151 return features; 152 153 /* Always on V5 features */ 154 features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG | 155 XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_ATTR2 | XFS_FEAT_PROJID32 | 156 XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO; 157 158 /* Optional V5 features */ 159 if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT) 160 features |= XFS_FEAT_FINOBT; 161 if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT) 162 features |= XFS_FEAT_RMAPBT; 163 if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK) 164 features |= XFS_FEAT_REFLINK; 165 if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) 166 features |= XFS_FEAT_INOBTCNT; 167 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) 168 features |= XFS_FEAT_FTYPE; 169 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) 170 features |= XFS_FEAT_SPINODES; 171 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) 172 features |= XFS_FEAT_META_UUID; 173 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME) 174 features |= XFS_FEAT_BIGTIME; 175 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) 176 features |= XFS_FEAT_NEEDSREPAIR; 177 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NREXT64) 178 features |= XFS_FEAT_NREXT64; 179 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE) 180 features |= XFS_FEAT_EXCHANGE_RANGE; 181 182 return features; 183 } 184 185 /* Check all the superblock fields we care about when reading one in. */ 186 STATIC int 187 xfs_validate_sb_read( 188 struct xfs_mount *mp, 189 struct xfs_sb *sbp) 190 { 191 if (!xfs_sb_is_v5(sbp)) 192 return 0; 193 194 /* 195 * Version 5 superblock feature mask validation. Reject combinations 196 * the kernel cannot support up front before checking anything else. 197 */ 198 if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { 199 xfs_warn(mp, 200 "Superblock has unknown compatible features (0x%x) enabled.", 201 (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); 202 xfs_warn(mp, 203 "Using a more recent kernel is recommended."); 204 } 205 206 if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 207 xfs_alert(mp, 208 "Superblock has unknown read-only compatible features (0x%x) enabled.", 209 (sbp->sb_features_ro_compat & 210 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 211 if (!xfs_is_readonly(mp)) { 212 xfs_warn(mp, 213 "Attempted to mount read-only compatible filesystem read-write."); 214 xfs_warn(mp, 215 "Filesystem can only be safely mounted read only."); 216 217 return -EINVAL; 218 } 219 } 220 if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { 221 xfs_warn(mp, 222 "Superblock has unknown incompatible features (0x%x) enabled.", 223 (sbp->sb_features_incompat & 224 XFS_SB_FEAT_INCOMPAT_UNKNOWN)); 225 xfs_warn(mp, 226 "Filesystem cannot be safely mounted by this kernel."); 227 return -EINVAL; 228 } 229 230 return 0; 231 } 232 233 /* Check all the superblock fields we care about when writing one out. */ 234 STATIC int 235 xfs_validate_sb_write( 236 struct xfs_mount *mp, 237 struct xfs_buf *bp, 238 struct xfs_sb *sbp) 239 { 240 /* 241 * Carry out additional sb summary counter sanity checks when we write 242 * the superblock. We skip this in the read validator because there 243 * could be newer superblocks in the log and if the values are garbage 244 * even after replay we'll recalculate them at the end of log mount. 245 * 246 * mkfs has traditionally written zeroed counters to inprogress and 247 * secondary superblocks, so allow this usage to continue because 248 * we never read counters from such superblocks. 249 */ 250 if (xfs_buf_daddr(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && 251 (sbp->sb_fdblocks > sbp->sb_dblocks || 252 !xfs_verify_icount(mp, sbp->sb_icount) || 253 sbp->sb_ifree > sbp->sb_icount)) { 254 xfs_warn(mp, "SB summary counter sanity check failed"); 255 return -EFSCORRUPTED; 256 } 257 258 if (!xfs_sb_is_v5(sbp)) 259 return 0; 260 261 /* 262 * Version 5 superblock feature mask validation. Reject combinations 263 * the kernel cannot support since we checked for unsupported bits in 264 * the read verifier, which means that memory is corrupt. 265 */ 266 if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { 267 xfs_warn(mp, 268 "Corruption detected in superblock compatible features (0x%x)!", 269 (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); 270 return -EFSCORRUPTED; 271 } 272 273 if (!xfs_is_readonly(mp) && 274 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 275 xfs_alert(mp, 276 "Corruption detected in superblock read-only compatible features (0x%x)!", 277 (sbp->sb_features_ro_compat & 278 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 279 return -EFSCORRUPTED; 280 } 281 if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { 282 xfs_warn(mp, 283 "Corruption detected in superblock incompatible features (0x%x)!", 284 (sbp->sb_features_incompat & 285 XFS_SB_FEAT_INCOMPAT_UNKNOWN)); 286 return -EFSCORRUPTED; 287 } 288 if (xfs_sb_has_incompat_log_feature(sbp, 289 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { 290 xfs_warn(mp, 291 "Corruption detected in superblock incompatible log features (0x%x)!", 292 (sbp->sb_features_log_incompat & 293 XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); 294 return -EFSCORRUPTED; 295 } 296 297 /* 298 * We can't read verify the sb LSN because the read verifier is called 299 * before the log is allocated and processed. We know the log is set up 300 * before write verifier calls, so check it here. 301 */ 302 if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) 303 return -EFSCORRUPTED; 304 305 return 0; 306 } 307 308 /* Check the validity of the SB. */ 309 STATIC int 310 xfs_validate_sb_common( 311 struct xfs_mount *mp, 312 struct xfs_buf *bp, 313 struct xfs_sb *sbp) 314 { 315 struct xfs_dsb *dsb = bp->b_addr; 316 uint32_t agcount = 0; 317 uint32_t rem; 318 bool has_dalign; 319 320 if (!xfs_verify_magic(bp, dsb->sb_magicnum)) { 321 xfs_warn(mp, 322 "Superblock has bad magic number 0x%x. Not an XFS filesystem?", 323 be32_to_cpu(dsb->sb_magicnum)); 324 return -EWRONGFS; 325 } 326 327 if (!xfs_sb_good_version(sbp)) { 328 xfs_warn(mp, 329 "Superblock has unknown features enabled or corrupted feature masks."); 330 return -EWRONGFS; 331 } 332 333 /* 334 * Validate feature flags and state 335 */ 336 if (xfs_sb_is_v5(sbp)) { 337 if (sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { 338 xfs_notice(mp, 339 "Block size (%u bytes) too small for Version 5 superblock (minimum %d bytes)", 340 sbp->sb_blocksize, XFS_MIN_CRC_BLOCKSIZE); 341 return -EFSCORRUPTED; 342 } 343 344 /* V5 has a separate project quota inode */ 345 if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { 346 xfs_notice(mp, 347 "Version 5 of Super block has XFS_OQUOTA bits."); 348 return -EFSCORRUPTED; 349 } 350 351 /* 352 * Full inode chunks must be aligned to inode chunk size when 353 * sparse inodes are enabled to support the sparse chunk 354 * allocation algorithm and prevent overlapping inode records. 355 */ 356 if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) { 357 uint32_t align; 358 359 align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize 360 >> sbp->sb_blocklog; 361 if (sbp->sb_inoalignmt != align) { 362 xfs_warn(mp, 363 "Inode block alignment (%u) must match chunk size (%u) for sparse inodes.", 364 sbp->sb_inoalignmt, align); 365 return -EINVAL; 366 } 367 } 368 } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | 369 XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { 370 xfs_notice(mp, 371 "Superblock earlier than Version 5 has XFS_{P|G}QUOTA_{ENFD|CHKD} bits."); 372 return -EFSCORRUPTED; 373 } 374 375 if (unlikely( 376 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 377 xfs_warn(mp, 378 "filesystem is marked as having an external log; " 379 "specify logdev on the mount command line."); 380 return -EINVAL; 381 } 382 383 if (unlikely( 384 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 385 xfs_warn(mp, 386 "filesystem is marked as having an internal log; " 387 "do not specify logdev on the mount command line."); 388 return -EINVAL; 389 } 390 391 /* Compute agcount for this number of dblocks and agblocks */ 392 if (sbp->sb_agblocks) { 393 agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); 394 if (rem) 395 agcount++; 396 } 397 398 /* 399 * More sanity checking. Most of these were stolen directly from 400 * xfs_repair. 401 */ 402 if (unlikely( 403 sbp->sb_agcount <= 0 || 404 sbp->sb_sectsize < XFS_MIN_SECTORSIZE || 405 sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 406 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 407 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 408 sbp->sb_sectsize != (1 << sbp->sb_sectlog) || 409 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 410 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 411 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 412 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 413 sbp->sb_blocksize != (1 << sbp->sb_blocklog) || 414 sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 415 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 416 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 417 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 418 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 419 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 420 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || 421 XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || 422 XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || 423 sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || 424 agcount == 0 || agcount != sbp->sb_agcount || 425 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 426 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 427 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 428 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || 429 sbp->sb_dblocks == 0 || 430 sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || 431 sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || 432 sbp->sb_shared_vn != 0)) { 433 xfs_notice(mp, "SB sanity check failed"); 434 return -EFSCORRUPTED; 435 } 436 437 /* 438 * Logs that are too large are not supported at all. Reject them 439 * outright. Logs that are too small are tolerated on v4 filesystems, 440 * but we can only check that when mounting the log. Hence we skip 441 * those checks here. 442 */ 443 if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) { 444 xfs_notice(mp, 445 "Log size 0x%x blocks too large, maximum size is 0x%llx blocks", 446 sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS); 447 return -EFSCORRUPTED; 448 } 449 450 if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) { 451 xfs_warn(mp, 452 "log size 0x%llx bytes too large, maximum size is 0x%llx bytes", 453 XFS_FSB_TO_B(mp, sbp->sb_logblocks), 454 XFS_MAX_LOG_BYTES); 455 return -EFSCORRUPTED; 456 } 457 458 /* 459 * Do not allow filesystems with corrupted log sector or stripe units to 460 * be mounted. We cannot safely size the iclogs or write to the log if 461 * the log stripe unit is not valid. 462 */ 463 if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) { 464 if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) { 465 xfs_notice(mp, 466 "log sector size in bytes/log2 (0x%x/0x%x) must match", 467 sbp->sb_logsectsize, 1U << sbp->sb_logsectlog); 468 return -EFSCORRUPTED; 469 } 470 } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) { 471 xfs_notice(mp, 472 "log sector size in bytes/log2 (0x%x/0x%x) are not zero", 473 sbp->sb_logsectsize, sbp->sb_logsectlog); 474 return -EFSCORRUPTED; 475 } 476 477 if (sbp->sb_logsunit > 1) { 478 if (sbp->sb_logsunit % sbp->sb_blocksize) { 479 xfs_notice(mp, 480 "log stripe unit 0x%x bytes must be a multiple of block size", 481 sbp->sb_logsunit); 482 return -EFSCORRUPTED; 483 } 484 if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) { 485 xfs_notice(mp, 486 "log stripe unit 0x%x bytes over maximum size (0x%x bytes)", 487 sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE); 488 return -EFSCORRUPTED; 489 } 490 } 491 492 /* Validate the realtime geometry; stolen from xfs_repair */ 493 if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || 494 sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { 495 xfs_notice(mp, 496 "realtime extent sanity check failed"); 497 return -EFSCORRUPTED; 498 } 499 500 if (sbp->sb_rblocks == 0) { 501 if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || 502 sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) { 503 xfs_notice(mp, 504 "realtime zeroed geometry check failed"); 505 return -EFSCORRUPTED; 506 } 507 } else { 508 uint64_t rexts; 509 uint64_t rbmblocks; 510 511 rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize); 512 rbmblocks = howmany_64(sbp->sb_rextents, 513 NBBY * sbp->sb_blocksize); 514 515 if (!xfs_validate_rtextents(rexts) || 516 sbp->sb_rextents != rexts || 517 sbp->sb_rextslog != xfs_compute_rextslog(rexts) || 518 sbp->sb_rbmblocks != rbmblocks) { 519 xfs_notice(mp, 520 "realtime geometry sanity check failed"); 521 return -EFSCORRUPTED; 522 } 523 } 524 525 /* 526 * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) 527 * would imply the image is corrupted. 528 */ 529 has_dalign = sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT; 530 if (!!sbp->sb_unit ^ has_dalign) { 531 xfs_notice(mp, "SB stripe alignment sanity check failed"); 532 return -EFSCORRUPTED; 533 } 534 535 if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), 536 XFS_FSB_TO_B(mp, sbp->sb_width), 0, 537 xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) 538 return -EFSCORRUPTED; 539 540 /* 541 * Currently only very few inode sizes are supported. 542 */ 543 switch (sbp->sb_inodesize) { 544 case 256: 545 case 512: 546 case 1024: 547 case 2048: 548 break; 549 default: 550 xfs_warn(mp, "inode size of %d bytes not supported", 551 sbp->sb_inodesize); 552 return -ENOSYS; 553 } 554 555 return 0; 556 } 557 558 void 559 xfs_sb_quota_from_disk(struct xfs_sb *sbp) 560 { 561 /* 562 * older mkfs doesn't initialize quota inodes to NULLFSINO. This 563 * leads to in-core values having two different values for a quota 564 * inode to be invalid: 0 and NULLFSINO. Change it to a single value 565 * NULLFSINO. 566 * 567 * Note that this change affect only the in-core values. These 568 * values are not written back to disk unless any quota information 569 * is written to the disk. Even in that case, sb_pquotino field is 570 * not written to disk unless the superblock supports pquotino. 571 */ 572 if (sbp->sb_uquotino == 0) 573 sbp->sb_uquotino = NULLFSINO; 574 if (sbp->sb_gquotino == 0) 575 sbp->sb_gquotino = NULLFSINO; 576 if (sbp->sb_pquotino == 0) 577 sbp->sb_pquotino = NULLFSINO; 578 579 /* 580 * We need to do these manipilations only if we are working 581 * with an older version of on-disk superblock. 582 */ 583 if (xfs_sb_is_v5(sbp)) 584 return; 585 586 if (sbp->sb_qflags & XFS_OQUOTA_ENFD) 587 sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? 588 XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; 589 if (sbp->sb_qflags & XFS_OQUOTA_CHKD) 590 sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? 591 XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; 592 sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); 593 594 if (sbp->sb_qflags & XFS_PQUOTA_ACCT && 595 sbp->sb_gquotino != NULLFSINO) { 596 /* 597 * In older version of superblock, on-disk superblock only 598 * has sb_gquotino, and in-core superblock has both sb_gquotino 599 * and sb_pquotino. But, only one of them is supported at any 600 * point of time. So, if PQUOTA is set in disk superblock, 601 * copy over sb_gquotino to sb_pquotino. The NULLFSINO test 602 * above is to make sure we don't do this twice and wipe them 603 * both out! 604 */ 605 sbp->sb_pquotino = sbp->sb_gquotino; 606 sbp->sb_gquotino = NULLFSINO; 607 } 608 } 609 610 static void 611 __xfs_sb_from_disk( 612 struct xfs_sb *to, 613 struct xfs_dsb *from, 614 bool convert_xquota) 615 { 616 to->sb_magicnum = be32_to_cpu(from->sb_magicnum); 617 to->sb_blocksize = be32_to_cpu(from->sb_blocksize); 618 to->sb_dblocks = be64_to_cpu(from->sb_dblocks); 619 to->sb_rblocks = be64_to_cpu(from->sb_rblocks); 620 to->sb_rextents = be64_to_cpu(from->sb_rextents); 621 memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); 622 to->sb_logstart = be64_to_cpu(from->sb_logstart); 623 to->sb_rootino = be64_to_cpu(from->sb_rootino); 624 to->sb_rbmino = be64_to_cpu(from->sb_rbmino); 625 to->sb_rsumino = be64_to_cpu(from->sb_rsumino); 626 to->sb_rextsize = be32_to_cpu(from->sb_rextsize); 627 to->sb_agblocks = be32_to_cpu(from->sb_agblocks); 628 to->sb_agcount = be32_to_cpu(from->sb_agcount); 629 to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks); 630 to->sb_logblocks = be32_to_cpu(from->sb_logblocks); 631 to->sb_versionnum = be16_to_cpu(from->sb_versionnum); 632 to->sb_sectsize = be16_to_cpu(from->sb_sectsize); 633 to->sb_inodesize = be16_to_cpu(from->sb_inodesize); 634 to->sb_inopblock = be16_to_cpu(from->sb_inopblock); 635 memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); 636 to->sb_blocklog = from->sb_blocklog; 637 to->sb_sectlog = from->sb_sectlog; 638 to->sb_inodelog = from->sb_inodelog; 639 to->sb_inopblog = from->sb_inopblog; 640 to->sb_agblklog = from->sb_agblklog; 641 to->sb_rextslog = from->sb_rextslog; 642 to->sb_inprogress = from->sb_inprogress; 643 to->sb_imax_pct = from->sb_imax_pct; 644 to->sb_icount = be64_to_cpu(from->sb_icount); 645 to->sb_ifree = be64_to_cpu(from->sb_ifree); 646 to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks); 647 to->sb_frextents = be64_to_cpu(from->sb_frextents); 648 to->sb_uquotino = be64_to_cpu(from->sb_uquotino); 649 to->sb_gquotino = be64_to_cpu(from->sb_gquotino); 650 to->sb_qflags = be16_to_cpu(from->sb_qflags); 651 to->sb_flags = from->sb_flags; 652 to->sb_shared_vn = from->sb_shared_vn; 653 to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt); 654 to->sb_unit = be32_to_cpu(from->sb_unit); 655 to->sb_width = be32_to_cpu(from->sb_width); 656 to->sb_dirblklog = from->sb_dirblklog; 657 to->sb_logsectlog = from->sb_logsectlog; 658 to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); 659 to->sb_logsunit = be32_to_cpu(from->sb_logsunit); 660 to->sb_features2 = be32_to_cpu(from->sb_features2); 661 to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); 662 to->sb_features_compat = be32_to_cpu(from->sb_features_compat); 663 to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat); 664 to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); 665 to->sb_features_log_incompat = 666 be32_to_cpu(from->sb_features_log_incompat); 667 /* crc is only used on disk, not in memory; just init to 0 here. */ 668 to->sb_crc = 0; 669 to->sb_spino_align = be32_to_cpu(from->sb_spino_align); 670 to->sb_pquotino = be64_to_cpu(from->sb_pquotino); 671 to->sb_lsn = be64_to_cpu(from->sb_lsn); 672 /* 673 * sb_meta_uuid is only on disk if it differs from sb_uuid and the 674 * feature flag is set; if not set we keep it only in memory. 675 */ 676 if (xfs_sb_is_v5(to) && 677 (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)) 678 uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); 679 else 680 uuid_copy(&to->sb_meta_uuid, &from->sb_uuid); 681 /* Convert on-disk flags to in-memory flags? */ 682 if (convert_xquota) 683 xfs_sb_quota_from_disk(to); 684 } 685 686 void 687 xfs_sb_from_disk( 688 struct xfs_sb *to, 689 struct xfs_dsb *from) 690 { 691 __xfs_sb_from_disk(to, from, true); 692 } 693 694 static void 695 xfs_sb_quota_to_disk( 696 struct xfs_dsb *to, 697 struct xfs_sb *from) 698 { 699 uint16_t qflags = from->sb_qflags; 700 701 to->sb_uquotino = cpu_to_be64(from->sb_uquotino); 702 703 /* 704 * The in-memory superblock quota state matches the v5 on-disk format so 705 * just write them out and return 706 */ 707 if (xfs_sb_is_v5(from)) { 708 to->sb_qflags = cpu_to_be16(from->sb_qflags); 709 to->sb_gquotino = cpu_to_be64(from->sb_gquotino); 710 to->sb_pquotino = cpu_to_be64(from->sb_pquotino); 711 return; 712 } 713 714 /* 715 * For older superblocks (v4), the in-core version of sb_qflags do not 716 * have XFS_OQUOTA_* flags, whereas the on-disk version does. So, 717 * convert incore XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. 718 */ 719 qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | 720 XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); 721 722 if (from->sb_qflags & 723 (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) 724 qflags |= XFS_OQUOTA_ENFD; 725 if (from->sb_qflags & 726 (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) 727 qflags |= XFS_OQUOTA_CHKD; 728 to->sb_qflags = cpu_to_be16(qflags); 729 730 /* 731 * GQUOTINO and PQUOTINO cannot be used together in versions 732 * of superblock that do not have pquotino. from->sb_flags 733 * tells us which quota is active and should be copied to 734 * disk. If neither are active, we should NULL the inode. 735 * 736 * In all cases, the separate pquotino must remain 0 because it 737 * is beyond the "end" of the valid non-pquotino superblock. 738 */ 739 if (from->sb_qflags & XFS_GQUOTA_ACCT) 740 to->sb_gquotino = cpu_to_be64(from->sb_gquotino); 741 else if (from->sb_qflags & XFS_PQUOTA_ACCT) 742 to->sb_gquotino = cpu_to_be64(from->sb_pquotino); 743 else { 744 /* 745 * We can't rely on just the fields being logged to tell us 746 * that it is safe to write NULLFSINO - we should only do that 747 * if quotas are not actually enabled. Hence only write 748 * NULLFSINO if both in-core quota inodes are NULL. 749 */ 750 if (from->sb_gquotino == NULLFSINO && 751 from->sb_pquotino == NULLFSINO) 752 to->sb_gquotino = cpu_to_be64(NULLFSINO); 753 } 754 755 to->sb_pquotino = 0; 756 } 757 758 void 759 xfs_sb_to_disk( 760 struct xfs_dsb *to, 761 struct xfs_sb *from) 762 { 763 xfs_sb_quota_to_disk(to, from); 764 765 to->sb_magicnum = cpu_to_be32(from->sb_magicnum); 766 to->sb_blocksize = cpu_to_be32(from->sb_blocksize); 767 to->sb_dblocks = cpu_to_be64(from->sb_dblocks); 768 to->sb_rblocks = cpu_to_be64(from->sb_rblocks); 769 to->sb_rextents = cpu_to_be64(from->sb_rextents); 770 memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); 771 to->sb_logstart = cpu_to_be64(from->sb_logstart); 772 to->sb_rootino = cpu_to_be64(from->sb_rootino); 773 to->sb_rbmino = cpu_to_be64(from->sb_rbmino); 774 to->sb_rsumino = cpu_to_be64(from->sb_rsumino); 775 to->sb_rextsize = cpu_to_be32(from->sb_rextsize); 776 to->sb_agblocks = cpu_to_be32(from->sb_agblocks); 777 to->sb_agcount = cpu_to_be32(from->sb_agcount); 778 to->sb_rbmblocks = cpu_to_be32(from->sb_rbmblocks); 779 to->sb_logblocks = cpu_to_be32(from->sb_logblocks); 780 to->sb_versionnum = cpu_to_be16(from->sb_versionnum); 781 to->sb_sectsize = cpu_to_be16(from->sb_sectsize); 782 to->sb_inodesize = cpu_to_be16(from->sb_inodesize); 783 to->sb_inopblock = cpu_to_be16(from->sb_inopblock); 784 memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); 785 to->sb_blocklog = from->sb_blocklog; 786 to->sb_sectlog = from->sb_sectlog; 787 to->sb_inodelog = from->sb_inodelog; 788 to->sb_inopblog = from->sb_inopblog; 789 to->sb_agblklog = from->sb_agblklog; 790 to->sb_rextslog = from->sb_rextslog; 791 to->sb_inprogress = from->sb_inprogress; 792 to->sb_imax_pct = from->sb_imax_pct; 793 to->sb_icount = cpu_to_be64(from->sb_icount); 794 to->sb_ifree = cpu_to_be64(from->sb_ifree); 795 to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks); 796 to->sb_frextents = cpu_to_be64(from->sb_frextents); 797 798 to->sb_flags = from->sb_flags; 799 to->sb_shared_vn = from->sb_shared_vn; 800 to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt); 801 to->sb_unit = cpu_to_be32(from->sb_unit); 802 to->sb_width = cpu_to_be32(from->sb_width); 803 to->sb_dirblklog = from->sb_dirblklog; 804 to->sb_logsectlog = from->sb_logsectlog; 805 to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize); 806 to->sb_logsunit = cpu_to_be32(from->sb_logsunit); 807 808 /* 809 * We need to ensure that bad_features2 always matches features2. 810 * Hence we enforce that here rather than having to remember to do it 811 * everywhere else that updates features2. 812 */ 813 from->sb_bad_features2 = from->sb_features2; 814 to->sb_features2 = cpu_to_be32(from->sb_features2); 815 to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); 816 817 if (!xfs_sb_is_v5(from)) 818 return; 819 820 to->sb_features_compat = cpu_to_be32(from->sb_features_compat); 821 to->sb_features_ro_compat = 822 cpu_to_be32(from->sb_features_ro_compat); 823 to->sb_features_incompat = 824 cpu_to_be32(from->sb_features_incompat); 825 to->sb_features_log_incompat = 826 cpu_to_be32(from->sb_features_log_incompat); 827 to->sb_spino_align = cpu_to_be32(from->sb_spino_align); 828 to->sb_lsn = cpu_to_be64(from->sb_lsn); 829 if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) 830 uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); 831 } 832 833 /* 834 * If the superblock has the CRC feature bit set or the CRC field is non-null, 835 * check that the CRC is valid. We check the CRC field is non-null because a 836 * single bit error could clear the feature bit and unused parts of the 837 * superblock are supposed to be zero. Hence a non-null crc field indicates that 838 * we've potentially lost a feature bit and we should check it anyway. 839 * 840 * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the 841 * last field in V4 secondary superblocks. So for secondary superblocks, 842 * we are more forgiving, and ignore CRC failures if the primary doesn't 843 * indicate that the fs version is V5. 844 */ 845 static void 846 xfs_sb_read_verify( 847 struct xfs_buf *bp) 848 { 849 struct xfs_sb sb; 850 struct xfs_mount *mp = bp->b_mount; 851 struct xfs_dsb *dsb = bp->b_addr; 852 int error; 853 854 /* 855 * open code the version check to avoid needing to convert the entire 856 * superblock from disk order just to check the version number 857 */ 858 if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) && 859 (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) == 860 XFS_SB_VERSION_5) || 861 dsb->sb_crc != 0)) { 862 863 if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { 864 /* Only fail bad secondaries on a known V5 filesystem */ 865 if (xfs_buf_daddr(bp) == XFS_SB_DADDR || 866 xfs_has_crc(mp)) { 867 error = -EFSBADCRC; 868 goto out_error; 869 } 870 } 871 } 872 873 /* 874 * Check all the superblock fields. Don't byteswap the xquota flags 875 * because _verify_common checks the on-disk values. 876 */ 877 __xfs_sb_from_disk(&sb, dsb, false); 878 error = xfs_validate_sb_common(mp, bp, &sb); 879 if (error) 880 goto out_error; 881 error = xfs_validate_sb_read(mp, &sb); 882 883 out_error: 884 if (error == -EFSCORRUPTED || error == -EFSBADCRC) 885 xfs_verifier_error(bp, error, __this_address); 886 else if (error) 887 xfs_buf_ioerror(bp, error); 888 } 889 890 /* 891 * We may be probed for a filesystem match, so we may not want to emit 892 * messages when the superblock buffer is not actually an XFS superblock. 893 * If we find an XFS superblock, then run a normal, noisy mount because we are 894 * really going to mount it and want to know about errors. 895 */ 896 static void 897 xfs_sb_quiet_read_verify( 898 struct xfs_buf *bp) 899 { 900 struct xfs_dsb *dsb = bp->b_addr; 901 902 if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { 903 /* XFS filesystem, verify noisily! */ 904 xfs_sb_read_verify(bp); 905 return; 906 } 907 /* quietly fail */ 908 xfs_buf_ioerror(bp, -EWRONGFS); 909 } 910 911 static void 912 xfs_sb_write_verify( 913 struct xfs_buf *bp) 914 { 915 struct xfs_sb sb; 916 struct xfs_mount *mp = bp->b_mount; 917 struct xfs_buf_log_item *bip = bp->b_log_item; 918 struct xfs_dsb *dsb = bp->b_addr; 919 int error; 920 921 /* 922 * Check all the superblock fields. Don't byteswap the xquota flags 923 * because _verify_common checks the on-disk values. 924 */ 925 __xfs_sb_from_disk(&sb, dsb, false); 926 error = xfs_validate_sb_common(mp, bp, &sb); 927 if (error) 928 goto out_error; 929 error = xfs_validate_sb_write(mp, bp, &sb); 930 if (error) 931 goto out_error; 932 933 if (!xfs_sb_is_v5(&sb)) 934 return; 935 936 if (bip) 937 dsb->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 938 939 xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); 940 return; 941 942 out_error: 943 xfs_verifier_error(bp, error, __this_address); 944 } 945 946 const struct xfs_buf_ops xfs_sb_buf_ops = { 947 .name = "xfs_sb", 948 .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, 949 .verify_read = xfs_sb_read_verify, 950 .verify_write = xfs_sb_write_verify, 951 }; 952 953 const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { 954 .name = "xfs_sb_quiet", 955 .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, 956 .verify_read = xfs_sb_quiet_read_verify, 957 .verify_write = xfs_sb_write_verify, 958 }; 959 960 /* 961 * xfs_mount_common 962 * 963 * Mount initialization code establishing various mount 964 * fields from the superblock associated with the given 965 * mount structure. 966 * 967 * Inode geometry are calculated in xfs_ialloc_setup_geometry. 968 */ 969 void 970 xfs_sb_mount_common( 971 struct xfs_mount *mp, 972 struct xfs_sb *sbp) 973 { 974 mp->m_agfrotor = 0; 975 atomic_set(&mp->m_agirotor, 0); 976 mp->m_maxagi = mp->m_sb.sb_agcount; 977 mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; 978 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 979 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 980 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 981 mp->m_blockmask = sbp->sb_blocksize - 1; 982 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 983 mp->m_blockwmask = mp->m_blockwsize - 1; 984 mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); 985 mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); 986 987 mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); 988 mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); 989 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; 990 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; 991 992 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); 993 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); 994 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; 995 mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; 996 997 mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1); 998 mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0); 999 mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; 1000 mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; 1001 1002 mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true); 1003 mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false); 1004 mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; 1005 mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; 1006 1007 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 1008 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 1009 mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); 1010 } 1011 1012 /* 1013 * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock 1014 * into the superblock buffer to be logged. It does not provide the higher 1015 * level of locking that is needed to protect the in-core superblock from 1016 * concurrent access. 1017 */ 1018 void 1019 xfs_log_sb( 1020 struct xfs_trans *tp) 1021 { 1022 struct xfs_mount *mp = tp->t_mountp; 1023 struct xfs_buf *bp = xfs_trans_getsb(tp); 1024 1025 /* 1026 * Lazy sb counters don't update the in-core superblock so do that now. 1027 * If this is at unmount, the counters will be exactly correct, but at 1028 * any other time they will only be ballpark correct because of 1029 * reservations that have been taken out percpu counters. If we have an 1030 * unclean shutdown, this will be corrected by log recovery rebuilding 1031 * the counters from the AGF block counts. 1032 * 1033 * Do not update sb_frextents here because it is not part of the lazy 1034 * sb counters, despite having a percpu counter. It is always kept 1035 * consistent with the ondisk rtbitmap by xfs_trans_apply_sb_deltas() 1036 * and hence we don't need have to update it here. 1037 */ 1038 if (xfs_has_lazysbcount(mp)) { 1039 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); 1040 mp->m_sb.sb_ifree = min_t(uint64_t, 1041 percpu_counter_sum(&mp->m_ifree), 1042 mp->m_sb.sb_icount); 1043 mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); 1044 } 1045 1046 xfs_sb_to_disk(bp->b_addr, &mp->m_sb); 1047 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); 1048 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); 1049 } 1050 1051 /* 1052 * xfs_sync_sb 1053 * 1054 * Sync the superblock to disk. 1055 * 1056 * Note that the caller is responsible for checking the frozen state of the 1057 * filesystem. This procedure uses the non-blocking transaction allocator and 1058 * thus will allow modifications to a frozen fs. This is required because this 1059 * code can be called during the process of freezing where use of the high-level 1060 * allocator would deadlock. 1061 */ 1062 int 1063 xfs_sync_sb( 1064 struct xfs_mount *mp, 1065 bool wait) 1066 { 1067 struct xfs_trans *tp; 1068 int error; 1069 1070 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 1071 XFS_TRANS_NO_WRITECOUNT, &tp); 1072 if (error) 1073 return error; 1074 1075 xfs_log_sb(tp); 1076 if (wait) 1077 xfs_trans_set_sync(tp); 1078 return xfs_trans_commit(tp); 1079 } 1080 1081 /* 1082 * Update all the secondary superblocks to match the new state of the primary. 1083 * Because we are completely overwriting all the existing fields in the 1084 * secondary superblock buffers, there is no need to read them in from disk. 1085 * Just get a new buffer, stamp it and write it. 1086 * 1087 * The sb buffers need to be cached here so that we serialise against other 1088 * operations that access the secondary superblocks, but we don't want to keep 1089 * them in memory once it is written so we mark it as a one-shot buffer. 1090 */ 1091 int 1092 xfs_update_secondary_sbs( 1093 struct xfs_mount *mp) 1094 { 1095 struct xfs_perag *pag; 1096 xfs_agnumber_t agno = 1; 1097 int saved_error = 0; 1098 int error = 0; 1099 LIST_HEAD (buffer_list); 1100 1101 /* update secondary superblocks. */ 1102 for_each_perag_from(mp, agno, pag) { 1103 struct xfs_buf *bp; 1104 1105 error = xfs_buf_get(mp->m_ddev_targp, 1106 XFS_AG_DADDR(mp, pag->pag_agno, XFS_SB_DADDR), 1107 XFS_FSS_TO_BB(mp, 1), &bp); 1108 /* 1109 * If we get an error reading or writing alternate superblocks, 1110 * continue. xfs_repair chooses the "best" superblock based 1111 * on most matches; if we break early, we'll leave more 1112 * superblocks un-updated than updated, and xfs_repair may 1113 * pick them over the properly-updated primary. 1114 */ 1115 if (error) { 1116 xfs_warn(mp, 1117 "error allocating secondary superblock for ag %d", 1118 pag->pag_agno); 1119 if (!saved_error) 1120 saved_error = error; 1121 continue; 1122 } 1123 1124 bp->b_ops = &xfs_sb_buf_ops; 1125 xfs_buf_oneshot(bp); 1126 xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); 1127 xfs_sb_to_disk(bp->b_addr, &mp->m_sb); 1128 xfs_buf_delwri_queue(bp, &buffer_list); 1129 xfs_buf_relse(bp); 1130 1131 /* don't hold too many buffers at once */ 1132 if (agno % 16) 1133 continue; 1134 1135 error = xfs_buf_delwri_submit(&buffer_list); 1136 if (error) { 1137 xfs_warn(mp, 1138 "write error %d updating a secondary superblock near ag %d", 1139 error, pag->pag_agno); 1140 if (!saved_error) 1141 saved_error = error; 1142 continue; 1143 } 1144 } 1145 error = xfs_buf_delwri_submit(&buffer_list); 1146 if (error) { 1147 xfs_warn(mp, 1148 "write error %d updating a secondary superblock near ag %d", 1149 error, agno); 1150 } 1151 1152 return saved_error ? saved_error : error; 1153 } 1154 1155 /* 1156 * Same behavior as xfs_sync_sb, except that it is always synchronous and it 1157 * also writes the superblock buffer to disk sector 0 immediately. 1158 */ 1159 int 1160 xfs_sync_sb_buf( 1161 struct xfs_mount *mp) 1162 { 1163 struct xfs_trans *tp; 1164 struct xfs_buf *bp; 1165 int error; 1166 1167 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp); 1168 if (error) 1169 return error; 1170 1171 bp = xfs_trans_getsb(tp); 1172 xfs_log_sb(tp); 1173 xfs_trans_bhold(tp, bp); 1174 xfs_trans_set_sync(tp); 1175 error = xfs_trans_commit(tp); 1176 if (error) 1177 goto out; 1178 /* 1179 * write out the sb buffer to get the changes to disk 1180 */ 1181 error = xfs_bwrite(bp); 1182 out: 1183 xfs_buf_relse(bp); 1184 return error; 1185 } 1186 1187 void 1188 xfs_fs_geometry( 1189 struct xfs_mount *mp, 1190 struct xfs_fsop_geom *geo, 1191 int struct_version) 1192 { 1193 struct xfs_sb *sbp = &mp->m_sb; 1194 1195 memset(geo, 0, sizeof(struct xfs_fsop_geom)); 1196 1197 geo->blocksize = sbp->sb_blocksize; 1198 geo->rtextsize = sbp->sb_rextsize; 1199 geo->agblocks = sbp->sb_agblocks; 1200 geo->agcount = sbp->sb_agcount; 1201 geo->logblocks = sbp->sb_logblocks; 1202 geo->sectsize = sbp->sb_sectsize; 1203 geo->inodesize = sbp->sb_inodesize; 1204 geo->imaxpct = sbp->sb_imax_pct; 1205 geo->datablocks = sbp->sb_dblocks; 1206 geo->rtblocks = sbp->sb_rblocks; 1207 geo->rtextents = sbp->sb_rextents; 1208 geo->logstart = sbp->sb_logstart; 1209 BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); 1210 memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); 1211 1212 if (struct_version < 2) 1213 return; 1214 1215 geo->sunit = sbp->sb_unit; 1216 geo->swidth = sbp->sb_width; 1217 1218 if (struct_version < 3) 1219 return; 1220 1221 geo->version = XFS_FSOP_GEOM_VERSION; 1222 geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | 1223 XFS_FSOP_GEOM_FLAGS_DIRV2 | 1224 XFS_FSOP_GEOM_FLAGS_EXTFLG; 1225 if (xfs_has_attr(mp)) 1226 geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; 1227 if (xfs_has_quota(mp)) 1228 geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; 1229 if (xfs_has_align(mp)) 1230 geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; 1231 if (xfs_has_dalign(mp)) 1232 geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; 1233 if (xfs_has_asciici(mp)) 1234 geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; 1235 if (xfs_has_lazysbcount(mp)) 1236 geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; 1237 if (xfs_has_attr2(mp)) 1238 geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR2; 1239 if (xfs_has_projid32(mp)) 1240 geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; 1241 if (xfs_has_crc(mp)) 1242 geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; 1243 if (xfs_has_ftype(mp)) 1244 geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; 1245 if (xfs_has_finobt(mp)) 1246 geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; 1247 if (xfs_has_sparseinodes(mp)) 1248 geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; 1249 if (xfs_has_rmapbt(mp)) 1250 geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; 1251 if (xfs_has_reflink(mp)) 1252 geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; 1253 if (xfs_has_bigtime(mp)) 1254 geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; 1255 if (xfs_has_inobtcounts(mp)) 1256 geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT; 1257 if (xfs_has_sector(mp)) { 1258 geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; 1259 geo->logsectsize = sbp->sb_logsectsize; 1260 } else { 1261 geo->logsectsize = BBSIZE; 1262 } 1263 if (xfs_has_large_extent_counts(mp)) 1264 geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64; 1265 if (xfs_has_exchange_range(mp)) 1266 geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE; 1267 geo->rtsectsize = sbp->sb_blocksize; 1268 geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); 1269 1270 if (struct_version < 4) 1271 return; 1272 1273 if (xfs_has_logv2(mp)) 1274 geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; 1275 1276 geo->logsunit = sbp->sb_logsunit; 1277 1278 if (struct_version < 5) 1279 return; 1280 1281 geo->version = XFS_FSOP_GEOM_VERSION_V5; 1282 } 1283 1284 /* Read a secondary superblock. */ 1285 int 1286 xfs_sb_read_secondary( 1287 struct xfs_mount *mp, 1288 struct xfs_trans *tp, 1289 xfs_agnumber_t agno, 1290 struct xfs_buf **bpp) 1291 { 1292 struct xfs_buf *bp; 1293 int error; 1294 1295 ASSERT(agno != 0 && agno != NULLAGNUMBER); 1296 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 1297 XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 1298 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); 1299 if (xfs_metadata_is_sick(error)) 1300 xfs_agno_mark_sick(mp, agno, XFS_SICK_AG_SB); 1301 if (error) 1302 return error; 1303 xfs_buf_set_ref(bp, XFS_SSB_REF); 1304 *bpp = bp; 1305 return 0; 1306 } 1307 1308 /* Get an uninitialised secondary superblock buffer. */ 1309 int 1310 xfs_sb_get_secondary( 1311 struct xfs_mount *mp, 1312 struct xfs_trans *tp, 1313 xfs_agnumber_t agno, 1314 struct xfs_buf **bpp) 1315 { 1316 struct xfs_buf *bp; 1317 int error; 1318 1319 ASSERT(agno != 0 && agno != NULLAGNUMBER); 1320 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, 1321 XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 1322 XFS_FSS_TO_BB(mp, 1), 0, &bp); 1323 if (error) 1324 return error; 1325 bp->b_ops = &xfs_sb_buf_ops; 1326 xfs_buf_oneshot(bp); 1327 *bpp = bp; 1328 return 0; 1329 } 1330 1331 /* 1332 * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users 1333 * won't be confused by values in error messages. This function returns false 1334 * if the stripe geometry is invalid and the caller is unable to repair the 1335 * stripe configuration later in the mount process. 1336 */ 1337 bool 1338 xfs_validate_stripe_geometry( 1339 struct xfs_mount *mp, 1340 __s64 sunit, 1341 __s64 swidth, 1342 int sectorsize, 1343 bool may_repair, 1344 bool silent) 1345 { 1346 if (swidth > INT_MAX) { 1347 if (!silent) 1348 xfs_notice(mp, 1349 "stripe width (%lld) is too large", swidth); 1350 goto check_override; 1351 } 1352 1353 if (sunit > swidth) { 1354 if (!silent) 1355 xfs_notice(mp, 1356 "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); 1357 goto check_override; 1358 } 1359 1360 if (sectorsize && (int)sunit % sectorsize) { 1361 if (!silent) 1362 xfs_notice(mp, 1363 "stripe unit (%lld) must be a multiple of the sector size (%d)", 1364 sunit, sectorsize); 1365 goto check_override; 1366 } 1367 1368 if (sunit && !swidth) { 1369 if (!silent) 1370 xfs_notice(mp, 1371 "invalid stripe unit (%lld) and stripe width of 0", sunit); 1372 goto check_override; 1373 } 1374 1375 if (!sunit && swidth) { 1376 if (!silent) 1377 xfs_notice(mp, 1378 "invalid stripe width (%lld) and stripe unit of 0", swidth); 1379 goto check_override; 1380 } 1381 1382 if (sunit && (int)swidth % (int)sunit) { 1383 if (!silent) 1384 xfs_notice(mp, 1385 "stripe width (%lld) must be a multiple of the stripe unit (%lld)", 1386 swidth, sunit); 1387 goto check_override; 1388 } 1389 return true; 1390 1391 check_override: 1392 if (!may_repair) 1393 return false; 1394 /* 1395 * During mount, mp->m_dalign will not be set unless the sunit mount 1396 * option was set. If it was set, ignore the bad stripe alignment values 1397 * and allow the validation and overwrite later in the mount process to 1398 * attempt to overwrite the bad stripe alignment values with the values 1399 * supplied by mount options. 1400 */ 1401 if (!mp->m_dalign) 1402 return false; 1403 if (!silent) 1404 xfs_notice(mp, 1405 "Will try to correct with specified mount options sunit (%d) and swidth (%d)", 1406 BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); 1407 return true; 1408 } 1409 1410 /* 1411 * Compute the maximum level number of the realtime summary file, as defined by 1412 * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct 1413 * use of rt volumes with more than 2^32 extents. 1414 */ 1415 uint8_t 1416 xfs_compute_rextslog( 1417 xfs_rtbxlen_t rtextents) 1418 { 1419 if (!rtextents) 1420 return 0; 1421 return xfs_highbit64(rtextents); 1422 } 1423