1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs_platform.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 #include "xfs_defer.h" 41 #include "xfs_attr_item.h" 42 #include "xfs_xattr.h" 43 #include "xfs_error.h" 44 #include "xfs_errortag.h" 45 #include "xfs_iunlink_item.h" 46 #include "xfs_dahash_test.h" 47 #include "xfs_rtbitmap.h" 48 #include "xfs_exchmaps_item.h" 49 #include "xfs_parent.h" 50 #include "xfs_rtalloc.h" 51 #include "xfs_zone_alloc.h" 52 #include "xfs_healthmon.h" 53 #include "scrub/stats.h" 54 #include "scrub/rcbag_btree.h" 55 56 #include <linux/magic.h> 57 #include <linux/fs_context.h> 58 #include <linux/fs_parser.h> 59 #include <linux/fserror.h> 60 61 static const struct super_operations xfs_super_operations; 62 63 static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */ 64 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 65 #ifdef DEBUG 66 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 67 #endif 68 69 enum xfs_dax_mode { 70 XFS_DAX_INODE = 0, 71 XFS_DAX_ALWAYS = 1, 72 XFS_DAX_NEVER = 2, 73 }; 74 75 /* Were quota mount options provided? Must use the upper 16 bits of qflags. */ 76 #define XFS_QFLAGS_MNTOPTS (1U << 31) 77 78 static void 79 xfs_mount_set_dax_mode( 80 struct xfs_mount *mp, 81 enum xfs_dax_mode mode) 82 { 83 switch (mode) { 84 case XFS_DAX_INODE: 85 mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); 86 break; 87 case XFS_DAX_ALWAYS: 88 mp->m_features |= XFS_FEAT_DAX_ALWAYS; 89 mp->m_features &= ~XFS_FEAT_DAX_NEVER; 90 break; 91 case XFS_DAX_NEVER: 92 mp->m_features |= XFS_FEAT_DAX_NEVER; 93 mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; 94 break; 95 } 96 } 97 98 static const struct constant_table dax_param_enums[] = { 99 {"inode", XFS_DAX_INODE }, 100 {"always", XFS_DAX_ALWAYS }, 101 {"never", XFS_DAX_NEVER }, 102 {} 103 }; 104 105 /* 106 * Table driven mount option parser. 107 */ 108 enum { 109 Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 110 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 111 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 112 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, 113 Opt_largeio, Opt_nolargeio, 114 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 115 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 116 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 117 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones, 118 Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, Opt_errortag, 119 }; 120 121 #define fsparam_dead(NAME) \ 122 __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL) 123 124 static const struct fs_parameter_spec xfs_fs_parameters[] = { 125 /* 126 * These mount options were supposed to be deprecated in September 2025 127 * but the deprecation warning was buggy, so not all users were 128 * notified. The deprecation is now obnoxiously loud and postponed to 129 * September 2030. 130 */ 131 fsparam_dead("attr2"), 132 fsparam_dead("noattr2"), 133 fsparam_dead("ikeep"), 134 fsparam_dead("noikeep"), 135 136 fsparam_u32("logbufs", Opt_logbufs), 137 fsparam_string("logbsize", Opt_logbsize), 138 fsparam_string("logdev", Opt_logdev), 139 fsparam_string("rtdev", Opt_rtdev), 140 fsparam_flag("wsync", Opt_wsync), 141 fsparam_flag("noalign", Opt_noalign), 142 fsparam_flag("swalloc", Opt_swalloc), 143 fsparam_u32("sunit", Opt_sunit), 144 fsparam_u32("swidth", Opt_swidth), 145 fsparam_flag("nouuid", Opt_nouuid), 146 fsparam_flag("grpid", Opt_grpid), 147 fsparam_flag("nogrpid", Opt_nogrpid), 148 fsparam_flag("bsdgroups", Opt_bsdgroups), 149 fsparam_flag("sysvgroups", Opt_sysvgroups), 150 fsparam_string("allocsize", Opt_allocsize), 151 fsparam_flag("norecovery", Opt_norecovery), 152 fsparam_flag("inode64", Opt_inode64), 153 fsparam_flag("inode32", Opt_inode32), 154 fsparam_flag("largeio", Opt_largeio), 155 fsparam_flag("nolargeio", Opt_nolargeio), 156 fsparam_flag("filestreams", Opt_filestreams), 157 fsparam_flag("quota", Opt_quota), 158 fsparam_flag("noquota", Opt_noquota), 159 fsparam_flag("usrquota", Opt_usrquota), 160 fsparam_flag("grpquota", Opt_grpquota), 161 fsparam_flag("prjquota", Opt_prjquota), 162 fsparam_flag("uquota", Opt_uquota), 163 fsparam_flag("gquota", Opt_gquota), 164 fsparam_flag("pquota", Opt_pquota), 165 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 166 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 167 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 168 fsparam_flag("qnoenforce", Opt_qnoenforce), 169 fsparam_flag("discard", Opt_discard), 170 fsparam_flag("nodiscard", Opt_nodiscard), 171 fsparam_flag("dax", Opt_dax), 172 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 173 fsparam_u32("max_open_zones", Opt_max_open_zones), 174 fsparam_flag("lifetime", Opt_lifetime), 175 fsparam_flag("nolifetime", Opt_nolifetime), 176 fsparam_string("max_atomic_write", Opt_max_atomic_write), 177 fsparam_string("errortag", Opt_errortag), 178 {} 179 }; 180 181 struct proc_xfs_info { 182 uint64_t flag; 183 char *str; 184 }; 185 186 static int 187 xfs_fs_show_options( 188 struct seq_file *m, 189 struct dentry *root) 190 { 191 static struct proc_xfs_info xfs_info_set[] = { 192 /* the few simple ones we can get from the mount struct */ 193 { XFS_FEAT_WSYNC, ",wsync" }, 194 { XFS_FEAT_NOALIGN, ",noalign" }, 195 { XFS_FEAT_SWALLOC, ",swalloc" }, 196 { XFS_FEAT_NOUUID, ",nouuid" }, 197 { XFS_FEAT_NORECOVERY, ",norecovery" }, 198 { XFS_FEAT_FILESTREAMS, ",filestreams" }, 199 { XFS_FEAT_GRPID, ",grpid" }, 200 { XFS_FEAT_DISCARD, ",discard" }, 201 { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, 202 { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, 203 { XFS_FEAT_DAX_NEVER, ",dax=never" }, 204 { XFS_FEAT_NOLIFETIME, ",nolifetime" }, 205 { 0, NULL } 206 }; 207 struct xfs_mount *mp = XFS_M(root->d_sb); 208 struct proc_xfs_info *xfs_infop; 209 210 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 211 if (mp->m_features & xfs_infop->flag) 212 seq_puts(m, xfs_infop->str); 213 } 214 215 seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); 216 217 if (xfs_has_allocsize(mp)) 218 seq_printf(m, ",allocsize=%dk", 219 (1 << mp->m_allocsize_log) >> 10); 220 221 if (mp->m_logbufs > 0) 222 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 223 if (mp->m_logbsize > 0) 224 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 225 226 if (mp->m_logname) 227 seq_show_option(m, "logdev", mp->m_logname); 228 if (mp->m_rtname) 229 seq_show_option(m, "rtdev", mp->m_rtname); 230 231 if (mp->m_dalign > 0) 232 seq_printf(m, ",sunit=%d", 233 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 234 if (mp->m_swidth > 0) 235 seq_printf(m, ",swidth=%d", 236 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 237 238 if (mp->m_qflags & XFS_UQUOTA_ENFD) 239 seq_puts(m, ",usrquota"); 240 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 241 seq_puts(m, ",uqnoenforce"); 242 243 if (mp->m_qflags & XFS_PQUOTA_ENFD) 244 seq_puts(m, ",prjquota"); 245 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 246 seq_puts(m, ",pqnoenforce"); 247 248 if (mp->m_qflags & XFS_GQUOTA_ENFD) 249 seq_puts(m, ",grpquota"); 250 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 251 seq_puts(m, ",gqnoenforce"); 252 253 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 254 seq_puts(m, ",noquota"); 255 256 if (mp->m_max_open_zones) 257 seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones); 258 if (mp->m_awu_max_bytes) 259 seq_printf(m, ",max_atomic_write=%lluk", 260 mp->m_awu_max_bytes >> 10); 261 262 return 0; 263 } 264 265 static bool 266 xfs_set_inode_alloc_perag( 267 struct xfs_perag *pag, 268 xfs_ino_t ino, 269 xfs_agnumber_t max_metadata) 270 { 271 if (!xfs_is_inode32(pag_mount(pag))) { 272 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 273 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 274 return false; 275 } 276 277 if (ino > XFS_MAXINUMBER_32) { 278 clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 279 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 280 return false; 281 } 282 283 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 284 if (pag_agno(pag) < max_metadata) 285 set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 286 else 287 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 288 return true; 289 } 290 291 /* 292 * Set parameters for inode allocation heuristics, taking into account 293 * filesystem size and inode32/inode64 mount options; i.e. specifically 294 * whether or not XFS_FEAT_SMALL_INUMS is set. 295 * 296 * Inode allocation patterns are altered only if inode32 is requested 297 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. 298 * If altered, XFS_OPSTATE_INODE32 is set as well. 299 * 300 * An agcount independent of that in the mount structure is provided 301 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 302 * to the potentially higher ag count. 303 * 304 * Returns the maximum AG index which may contain inodes. 305 */ 306 xfs_agnumber_t 307 xfs_set_inode_alloc( 308 struct xfs_mount *mp, 309 xfs_agnumber_t agcount) 310 { 311 xfs_agnumber_t index; 312 xfs_agnumber_t maxagi = 0; 313 xfs_sb_t *sbp = &mp->m_sb; 314 xfs_agnumber_t max_metadata; 315 xfs_agino_t agino; 316 xfs_ino_t ino; 317 318 /* 319 * Calculate how much should be reserved for inodes to meet 320 * the max inode percentage. Used only for inode32. 321 */ 322 if (M_IGEO(mp)->maxicount) { 323 uint64_t icount; 324 325 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 326 do_div(icount, 100); 327 icount += sbp->sb_agblocks - 1; 328 do_div(icount, sbp->sb_agblocks); 329 max_metadata = icount; 330 } else { 331 max_metadata = agcount; 332 } 333 334 /* Get the last possible inode in the filesystem */ 335 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 336 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 337 338 /* 339 * If user asked for no more than 32-bit inodes, and the fs is 340 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter 341 * the allocator to accommodate the request. 342 */ 343 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) 344 xfs_set_inode32(mp); 345 else 346 xfs_clear_inode32(mp); 347 348 for (index = 0; index < agcount; index++) { 349 struct xfs_perag *pag; 350 351 ino = XFS_AGINO_TO_INO(mp, index, agino); 352 353 pag = xfs_perag_get(mp, index); 354 if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) 355 maxagi++; 356 xfs_perag_put(pag); 357 } 358 359 return xfs_is_inode32(mp) ? maxagi : agcount; 360 } 361 362 static int 363 xfs_setup_dax_always( 364 struct xfs_mount *mp) 365 { 366 if (!mp->m_ddev_targp->bt_daxdev && 367 (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { 368 xfs_alert(mp, 369 "DAX unsupported by block device. Turning off DAX."); 370 goto disable_dax; 371 } 372 373 if (mp->m_super->s_blocksize != PAGE_SIZE) { 374 xfs_alert(mp, 375 "DAX not supported for blocksize. Turning off DAX."); 376 goto disable_dax; 377 } 378 379 if (xfs_has_reflink(mp) && 380 bdev_is_partition(mp->m_ddev_targp->bt_bdev)) { 381 xfs_alert(mp, 382 "DAX and reflink cannot work with multi-partitions!"); 383 return -EINVAL; 384 } 385 386 return 0; 387 388 disable_dax: 389 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 390 return 0; 391 } 392 393 STATIC int 394 xfs_blkdev_get( 395 xfs_mount_t *mp, 396 const char *name, 397 struct file **bdev_filep) 398 { 399 int error = 0; 400 blk_mode_t mode; 401 402 mode = sb_open_mode(mp->m_super->s_flags); 403 *bdev_filep = bdev_file_open_by_path(name, mode, 404 mp->m_super, &fs_holder_ops); 405 if (IS_ERR(*bdev_filep)) { 406 error = PTR_ERR(*bdev_filep); 407 *bdev_filep = NULL; 408 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 409 } 410 411 return error; 412 } 413 414 STATIC void 415 xfs_shutdown_devices( 416 struct xfs_mount *mp) 417 { 418 /* 419 * Udev is triggered whenever anyone closes a block device or unmounts 420 * a file systemm on a block device. 421 * The default udev rules invoke blkid to read the fs super and create 422 * symlinks to the bdev under /dev/disk. For this, it uses buffered 423 * reads through the page cache. 424 * 425 * xfs_db also uses buffered reads to examine metadata. There is no 426 * coordination between xfs_db and udev, which means that they can run 427 * concurrently. Note there is no coordination between the kernel and 428 * blkid either. 429 * 430 * On a system with 64k pages, the page cache can cache the superblock 431 * and the root inode (and hence the root directory) with the same 64k 432 * page. If udev spawns blkid after the mkfs and the system is busy 433 * enough that it is still running when xfs_db starts up, they'll both 434 * read from the same page in the pagecache. 435 * 436 * The unmount writes updated inode metadata to disk directly. The XFS 437 * buffer cache does not use the bdev pagecache, so it needs to 438 * invalidate that pagecache on unmount. If the above scenario occurs, 439 * the pagecache no longer reflects what's on disk, xfs_db reads the 440 * stale metadata, and fails to find /a. Most of the time this succeeds 441 * because closing a bdev invalidates the page cache, but when processes 442 * race, everyone loses. 443 */ 444 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 445 blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); 446 invalidate_bdev(mp->m_logdev_targp->bt_bdev); 447 } 448 if (mp->m_rtdev_targp) { 449 blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); 450 invalidate_bdev(mp->m_rtdev_targp->bt_bdev); 451 } 452 blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); 453 invalidate_bdev(mp->m_ddev_targp->bt_bdev); 454 } 455 456 /* 457 * The file system configurations are: 458 * (1) device (partition) with data and internal log 459 * (2) logical volume with data and log subvolumes. 460 * (3) logical volume with data, log, and realtime subvolumes. 461 * 462 * We only have to handle opening the log and realtime volumes here if 463 * they are present. The data subvolume has already been opened by 464 * get_sb_bdev() and is stored in sb->s_bdev. 465 */ 466 STATIC int 467 xfs_open_devices( 468 struct xfs_mount *mp) 469 { 470 struct super_block *sb = mp->m_super; 471 struct block_device *ddev = sb->s_bdev; 472 struct file *logdev_file = NULL, *rtdev_file = NULL; 473 int error; 474 475 /* 476 * Open real time and log devices - order is important. 477 */ 478 if (mp->m_logname) { 479 error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file); 480 if (error) 481 return error; 482 } 483 484 if (mp->m_rtname) { 485 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file); 486 if (error) 487 goto out_close_logdev; 488 489 if (file_bdev(rtdev_file) == ddev || 490 (logdev_file && 491 file_bdev(rtdev_file) == file_bdev(logdev_file))) { 492 xfs_warn(mp, 493 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 494 error = -EINVAL; 495 goto out_close_rtdev; 496 } 497 } 498 499 /* 500 * Setup xfs_mount buffer target pointers 501 */ 502 mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file); 503 if (IS_ERR(mp->m_ddev_targp)) { 504 error = PTR_ERR(mp->m_ddev_targp); 505 mp->m_ddev_targp = NULL; 506 goto out_close_rtdev; 507 } 508 509 if (rtdev_file) { 510 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file); 511 if (IS_ERR(mp->m_rtdev_targp)) { 512 error = PTR_ERR(mp->m_rtdev_targp); 513 mp->m_rtdev_targp = NULL; 514 goto out_free_ddev_targ; 515 } 516 } 517 518 if (logdev_file && file_bdev(logdev_file) != ddev) { 519 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file); 520 if (IS_ERR(mp->m_logdev_targp)) { 521 error = PTR_ERR(mp->m_logdev_targp); 522 mp->m_logdev_targp = NULL; 523 goto out_free_rtdev_targ; 524 } 525 } else { 526 mp->m_logdev_targp = mp->m_ddev_targp; 527 /* Handle won't be used, drop it */ 528 if (logdev_file) 529 bdev_fput(logdev_file); 530 } 531 532 return 0; 533 534 out_free_rtdev_targ: 535 if (mp->m_rtdev_targp) 536 xfs_free_buftarg(mp->m_rtdev_targp); 537 out_free_ddev_targ: 538 xfs_free_buftarg(mp->m_ddev_targp); 539 out_close_rtdev: 540 if (rtdev_file) 541 bdev_fput(rtdev_file); 542 out_close_logdev: 543 if (logdev_file) 544 bdev_fput(logdev_file); 545 return error; 546 } 547 548 /* 549 * Setup xfs_mount buffer target pointers based on superblock 550 */ 551 STATIC int 552 xfs_setup_devices( 553 struct xfs_mount *mp) 554 { 555 int error; 556 557 error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize, 558 mp->m_sb.sb_dblocks); 559 if (error) 560 return error; 561 562 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 563 unsigned int log_sector_size = BBSIZE; 564 565 if (xfs_has_sector(mp)) 566 log_sector_size = mp->m_sb.sb_logsectsize; 567 error = xfs_configure_buftarg(mp->m_logdev_targp, 568 log_sector_size, mp->m_sb.sb_logblocks); 569 if (error) 570 return error; 571 } 572 573 if (mp->m_sb.sb_rtstart) { 574 if (mp->m_rtdev_targp) { 575 xfs_warn(mp, 576 "can't use internal and external rtdev at the same time"); 577 return -EINVAL; 578 } 579 mp->m_rtdev_targp = mp->m_ddev_targp; 580 } else if (mp->m_rtname) { 581 error = xfs_configure_buftarg(mp->m_rtdev_targp, 582 mp->m_sb.sb_sectsize, mp->m_sb.sb_rblocks); 583 if (error) 584 return error; 585 } 586 587 return 0; 588 } 589 590 STATIC int 591 xfs_init_mount_workqueues( 592 struct xfs_mount *mp) 593 { 594 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 595 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 596 1, mp->m_super->s_id); 597 if (!mp->m_buf_workqueue) 598 goto out; 599 600 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 601 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 602 0, mp->m_super->s_id); 603 if (!mp->m_unwritten_workqueue) 604 goto out_destroy_buf; 605 606 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 607 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 608 0, mp->m_super->s_id); 609 if (!mp->m_reclaim_workqueue) 610 goto out_destroy_unwritten; 611 612 mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", 613 XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 614 0, mp->m_super->s_id); 615 if (!mp->m_blockgc_wq) 616 goto out_destroy_reclaim; 617 618 mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", 619 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 620 1, mp->m_super->s_id); 621 if (!mp->m_inodegc_wq) 622 goto out_destroy_blockgc; 623 624 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 625 XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0, 626 mp->m_super->s_id); 627 if (!mp->m_sync_workqueue) 628 goto out_destroy_inodegc; 629 630 return 0; 631 632 out_destroy_inodegc: 633 destroy_workqueue(mp->m_inodegc_wq); 634 out_destroy_blockgc: 635 destroy_workqueue(mp->m_blockgc_wq); 636 out_destroy_reclaim: 637 destroy_workqueue(mp->m_reclaim_workqueue); 638 out_destroy_unwritten: 639 destroy_workqueue(mp->m_unwritten_workqueue); 640 out_destroy_buf: 641 destroy_workqueue(mp->m_buf_workqueue); 642 out: 643 return -ENOMEM; 644 } 645 646 STATIC void 647 xfs_destroy_mount_workqueues( 648 struct xfs_mount *mp) 649 { 650 destroy_workqueue(mp->m_sync_workqueue); 651 destroy_workqueue(mp->m_blockgc_wq); 652 destroy_workqueue(mp->m_inodegc_wq); 653 destroy_workqueue(mp->m_reclaim_workqueue); 654 destroy_workqueue(mp->m_unwritten_workqueue); 655 destroy_workqueue(mp->m_buf_workqueue); 656 } 657 658 static void 659 xfs_flush_inodes_worker( 660 struct work_struct *work) 661 { 662 struct xfs_mount *mp = container_of(work, struct xfs_mount, 663 m_flush_inodes_work); 664 struct super_block *sb = mp->m_super; 665 666 if (down_read_trylock(&sb->s_umount)) { 667 sync_inodes_sb(sb); 668 up_read(&sb->s_umount); 669 } 670 } 671 672 /* 673 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 674 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 675 * for IO to complete so that we effectively throttle multiple callers to the 676 * rate at which IO is completing. 677 */ 678 void 679 xfs_flush_inodes( 680 struct xfs_mount *mp) 681 { 682 /* 683 * If flush_work() returns true then that means we waited for a flush 684 * which was already in progress. Don't bother running another scan. 685 */ 686 if (flush_work(&mp->m_flush_inodes_work)) 687 return; 688 689 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 690 flush_work(&mp->m_flush_inodes_work); 691 } 692 693 /* Catch misguided souls that try to use this interface on XFS */ 694 STATIC struct inode * 695 xfs_fs_alloc_inode( 696 struct super_block *sb) 697 { 698 BUG(); 699 return NULL; 700 } 701 702 /* 703 * Now that the generic code is guaranteed not to be accessing 704 * the linux inode, we can inactivate and reclaim the inode. 705 */ 706 STATIC void 707 xfs_fs_destroy_inode( 708 struct inode *inode) 709 { 710 struct xfs_inode *ip = XFS_I(inode); 711 712 trace_xfs_destroy_inode(ip); 713 714 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 715 XFS_STATS_INC(ip->i_mount, vn_rele); 716 XFS_STATS_INC(ip->i_mount, vn_remove); 717 xfs_inode_mark_reclaimable(ip); 718 } 719 720 /* 721 * Slab object creation initialisation for the XFS inode. 722 * This covers only the idempotent fields in the XFS inode; 723 * all other fields need to be initialised on allocation 724 * from the slab. This avoids the need to repeatedly initialise 725 * fields in the xfs inode that left in the initialise state 726 * when freeing the inode. 727 */ 728 STATIC void 729 xfs_fs_inode_init_once( 730 void *inode) 731 { 732 struct xfs_inode *ip = inode; 733 734 memset(ip, 0, sizeof(struct xfs_inode)); 735 736 /* vfs inode */ 737 inode_init_once(VFS_I(ip)); 738 739 /* xfs inode */ 740 atomic_set(&ip->i_pincount, 0); 741 spin_lock_init(&ip->i_flags_lock); 742 init_rwsem(&ip->i_lock); 743 } 744 745 /* 746 * We do an unlocked check for XFS_IDONTCACHE here because we are already 747 * serialised against cache hits here via the inode->i_lock and igrab() in 748 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 749 * racing with us, and it avoids needing to grab a spinlock here for every inode 750 * we drop the final reference on. 751 */ 752 STATIC int 753 xfs_fs_drop_inode( 754 struct inode *inode) 755 { 756 struct xfs_inode *ip = XFS_I(inode); 757 758 /* 759 * If this unlinked inode is in the middle of recovery, don't 760 * drop the inode just yet; log recovery will take care of 761 * that. See the comment for this inode flag. 762 */ 763 if (ip->i_flags & XFS_IRECOVERY) { 764 ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); 765 return 0; 766 } 767 768 return inode_generic_drop(inode); 769 } 770 771 STATIC void 772 xfs_fs_evict_inode( 773 struct inode *inode) 774 { 775 if (IS_DAX(inode)) 776 dax_break_layout_final(inode); 777 778 truncate_inode_pages_final(&inode->i_data); 779 clear_inode(inode); 780 781 if (IS_ENABLED(CONFIG_XFS_RT) && 782 S_ISREG(inode->i_mode) && inode->i_private) { 783 xfs_open_zone_put(inode->i_private); 784 inode->i_private = NULL; 785 } 786 } 787 788 static void 789 xfs_mount_free( 790 struct xfs_mount *mp) 791 { 792 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 793 xfs_free_buftarg(mp->m_logdev_targp); 794 if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) 795 xfs_free_buftarg(mp->m_rtdev_targp); 796 if (mp->m_ddev_targp) 797 xfs_free_buftarg(mp->m_ddev_targp); 798 799 debugfs_remove(mp->m_debugfs); 800 kfree(mp->m_rtname); 801 kfree(mp->m_logname); 802 #ifdef DEBUG 803 kfree(mp->m_errortag); 804 #endif 805 kfree(mp); 806 } 807 808 STATIC int 809 xfs_fs_sync_fs( 810 struct super_block *sb, 811 int wait) 812 { 813 struct xfs_mount *mp = XFS_M(sb); 814 int error; 815 816 trace_xfs_fs_sync_fs(mp, __return_address); 817 818 /* 819 * Doing anything during the async pass would be counterproductive. 820 */ 821 if (!wait) 822 return 0; 823 824 error = xfs_log_force(mp, XFS_LOG_SYNC); 825 if (error) 826 return error; 827 828 /* 829 * If we are called with page faults frozen out, it means we are about 830 * to freeze the transaction subsystem. Take the opportunity to shut 831 * down inodegc because once SB_FREEZE_FS is set it's too late to 832 * prevent inactivation races with freeze. The fs doesn't get called 833 * again by the freezing process until after SB_FREEZE_FS has been set, 834 * so it's now or never. Same logic applies to speculative allocation 835 * garbage collection. 836 * 837 * We don't care if this is a normal syncfs call that does this or 838 * freeze that does this - we can run this multiple times without issue 839 * and we won't race with a restart because a restart can only occur 840 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. 841 */ 842 if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { 843 xfs_inodegc_stop(mp); 844 xfs_blockgc_stop(mp); 845 xfs_zone_gc_stop(mp); 846 } 847 848 return 0; 849 } 850 851 static xfs_extlen_t 852 xfs_internal_log_size( 853 struct xfs_mount *mp) 854 { 855 if (!mp->m_sb.sb_logstart) 856 return 0; 857 return mp->m_sb.sb_logblocks; 858 } 859 860 static void 861 xfs_statfs_data( 862 struct xfs_mount *mp, 863 struct kstatfs *st) 864 { 865 int64_t fdblocks = 866 xfs_sum_freecounter(mp, XC_FREE_BLOCKS); 867 868 /* make sure st->f_bfree does not underflow */ 869 st->f_bfree = max(0LL, 870 fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS)); 871 872 /* 873 * sb_dblocks can change during growfs, but nothing cares about reporting 874 * the old or new value during growfs. 875 */ 876 st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp); 877 } 878 879 /* 880 * When stat(v)fs is called on a file with the realtime bit set or a directory 881 * with the rtinherit bit, report freespace information for the RT device 882 * instead of the main data device. 883 */ 884 static void 885 xfs_statfs_rt( 886 struct xfs_mount *mp, 887 struct kstatfs *st) 888 { 889 st->f_bfree = xfs_rtbxlen_to_blen(mp, 890 xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); 891 st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp, 892 mp->m_free[XC_FREE_RTEXTENTS].res_total); 893 } 894 895 static void 896 xfs_statfs_inodes( 897 struct xfs_mount *mp, 898 struct kstatfs *st) 899 { 900 uint64_t icount = percpu_counter_sum(&mp->m_icount); 901 uint64_t ifree = percpu_counter_sum(&mp->m_ifree); 902 uint64_t fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree); 903 904 st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 905 if (M_IGEO(mp)->maxicount) 906 st->f_files = min_t(typeof(st->f_files), st->f_files, 907 M_IGEO(mp)->maxicount); 908 909 /* If sb_icount overshot maxicount, report actual allocation */ 910 st->f_files = max_t(typeof(st->f_files), st->f_files, 911 mp->m_sb.sb_icount); 912 913 /* Make sure st->f_ffree does not underflow */ 914 st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree)); 915 } 916 917 STATIC int 918 xfs_fs_statfs( 919 struct dentry *dentry, 920 struct kstatfs *st) 921 { 922 struct xfs_mount *mp = XFS_M(dentry->d_sb); 923 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 924 925 /* 926 * Expedite background inodegc but don't wait. We do not want to block 927 * here waiting hours for a billion extent file to be truncated. 928 */ 929 xfs_inodegc_push(mp); 930 931 st->f_type = XFS_SUPER_MAGIC; 932 st->f_namelen = MAXNAMELEN - 1; 933 st->f_bsize = mp->m_sb.sb_blocksize; 934 st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev)); 935 936 xfs_statfs_data(mp, st); 937 xfs_statfs_inodes(mp, st); 938 939 if (XFS_IS_REALTIME_MOUNT(mp) && 940 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) 941 xfs_statfs_rt(mp, st); 942 943 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 944 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 945 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 946 xfs_qm_statvfs(ip, st); 947 948 /* 949 * XFS does not distinguish between blocks available to privileged and 950 * unprivileged users. 951 */ 952 st->f_bavail = st->f_bfree; 953 return 0; 954 } 955 956 STATIC void 957 xfs_save_resvblks( 958 struct xfs_mount *mp) 959 { 960 enum xfs_free_counter i; 961 962 for (i = 0; i < XC_FREE_NR; i++) { 963 mp->m_free[i].res_saved = mp->m_free[i].res_total; 964 xfs_reserve_blocks(mp, i, 0); 965 } 966 } 967 968 STATIC void 969 xfs_restore_resvblks( 970 struct xfs_mount *mp) 971 { 972 uint64_t resblks; 973 enum xfs_free_counter i; 974 975 for (i = 0; i < XC_FREE_NR; i++) { 976 if (mp->m_free[i].res_saved) { 977 resblks = mp->m_free[i].res_saved; 978 mp->m_free[i].res_saved = 0; 979 } else 980 resblks = xfs_default_resblks(mp, i); 981 xfs_reserve_blocks(mp, i, resblks); 982 } 983 } 984 985 /* 986 * Second stage of a freeze. The data is already frozen so we only 987 * need to take care of the metadata. Once that's done sync the superblock 988 * to the log to dirty it in case of a crash while frozen. This ensures that we 989 * will recover the unlinked inode lists on the next mount. 990 */ 991 STATIC int 992 xfs_fs_freeze( 993 struct super_block *sb) 994 { 995 struct xfs_mount *mp = XFS_M(sb); 996 unsigned int flags; 997 int ret; 998 999 /* 1000 * The filesystem is now frozen far enough that memory reclaim 1001 * cannot safely operate on the filesystem. Hence we need to 1002 * set a GFP_NOFS context here to avoid recursion deadlocks. 1003 */ 1004 flags = memalloc_nofs_save(); 1005 xfs_save_resvblks(mp); 1006 ret = xfs_log_quiesce(mp); 1007 memalloc_nofs_restore(flags); 1008 1009 /* 1010 * For read-write filesystems, we need to restart the inodegc on error 1011 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not 1012 * going to be run to restart it now. We are at SB_FREEZE_FS level 1013 * here, so we can restart safely without racing with a stop in 1014 * xfs_fs_sync_fs(). 1015 */ 1016 if (ret && !xfs_is_readonly(mp)) { 1017 xfs_blockgc_start(mp); 1018 xfs_inodegc_start(mp); 1019 xfs_zone_gc_start(mp); 1020 } 1021 1022 return ret; 1023 } 1024 1025 STATIC int 1026 xfs_fs_unfreeze( 1027 struct super_block *sb) 1028 { 1029 struct xfs_mount *mp = XFS_M(sb); 1030 1031 xfs_restore_resvblks(mp); 1032 xfs_log_work_queue(mp); 1033 1034 /* 1035 * Don't reactivate the inodegc worker on a readonly filesystem because 1036 * inodes are sent directly to reclaim. Don't reactivate the blockgc 1037 * worker because there are no speculative preallocations on a readonly 1038 * filesystem. 1039 */ 1040 if (!xfs_is_readonly(mp)) { 1041 xfs_zone_gc_start(mp); 1042 xfs_blockgc_start(mp); 1043 xfs_inodegc_start(mp); 1044 } 1045 1046 return 0; 1047 } 1048 1049 /* 1050 * This function fills in xfs_mount_t fields based on mount args. 1051 * Note: the superblock _has_ now been read in. 1052 */ 1053 STATIC int 1054 xfs_finish_flags( 1055 struct xfs_mount *mp) 1056 { 1057 /* Fail a mount where the logbuf is smaller than the log stripe */ 1058 if (xfs_has_logv2(mp)) { 1059 if (mp->m_logbsize <= 0 && 1060 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 1061 mp->m_logbsize = mp->m_sb.sb_logsunit; 1062 } else if (mp->m_logbsize > 0 && 1063 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1064 xfs_warn(mp, 1065 "logbuf size must be greater than or equal to log stripe size"); 1066 return -EINVAL; 1067 } 1068 } else { 1069 /* Fail a mount if the logbuf is larger than 32K */ 1070 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1071 xfs_warn(mp, 1072 "logbuf size for version 1 logs must be 16K or 32K"); 1073 return -EINVAL; 1074 } 1075 } 1076 1077 /* 1078 * prohibit r/w mounts of read-only filesystems 1079 */ 1080 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { 1081 xfs_warn(mp, 1082 "cannot mount a read-only filesystem as read-write"); 1083 return -EROFS; 1084 } 1085 1086 if ((mp->m_qflags & XFS_GQUOTA_ACCT) && 1087 (mp->m_qflags & XFS_PQUOTA_ACCT) && 1088 !xfs_has_pquotino(mp)) { 1089 xfs_warn(mp, 1090 "Super block does not support project and group quota together"); 1091 return -EINVAL; 1092 } 1093 1094 if (!xfs_has_zoned(mp)) { 1095 if (mp->m_max_open_zones) { 1096 xfs_warn(mp, 1097 "max_open_zones mount option only supported on zoned file systems."); 1098 return -EINVAL; 1099 } 1100 if (mp->m_features & XFS_FEAT_NOLIFETIME) { 1101 xfs_warn(mp, 1102 "nolifetime mount option only supported on zoned file systems."); 1103 return -EINVAL; 1104 } 1105 } 1106 1107 return 0; 1108 } 1109 1110 static int 1111 xfs_init_percpu_counters( 1112 struct xfs_mount *mp) 1113 { 1114 int error; 1115 int i; 1116 1117 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1118 if (error) 1119 return -ENOMEM; 1120 1121 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1122 if (error) 1123 goto free_icount; 1124 1125 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1126 if (error) 1127 goto free_ifree; 1128 1129 error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL); 1130 if (error) 1131 goto free_delalloc; 1132 1133 for (i = 0; i < XC_FREE_NR; i++) { 1134 error = percpu_counter_init(&mp->m_free[i].count, 0, 1135 GFP_KERNEL); 1136 if (error) 1137 goto free_freecounters; 1138 } 1139 1140 return 0; 1141 1142 free_freecounters: 1143 while (--i >= 0) 1144 percpu_counter_destroy(&mp->m_free[i].count); 1145 percpu_counter_destroy(&mp->m_delalloc_rtextents); 1146 free_delalloc: 1147 percpu_counter_destroy(&mp->m_delalloc_blks); 1148 free_ifree: 1149 percpu_counter_destroy(&mp->m_ifree); 1150 free_icount: 1151 percpu_counter_destroy(&mp->m_icount); 1152 return -ENOMEM; 1153 } 1154 1155 void 1156 xfs_reinit_percpu_counters( 1157 struct xfs_mount *mp) 1158 { 1159 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1160 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1161 xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks); 1162 if (!xfs_has_zoned(mp)) 1163 xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, 1164 mp->m_sb.sb_frextents); 1165 } 1166 1167 static void 1168 xfs_destroy_percpu_counters( 1169 struct xfs_mount *mp) 1170 { 1171 enum xfs_free_counter i; 1172 1173 for (i = 0; i < XC_FREE_NR; i++) 1174 percpu_counter_destroy(&mp->m_free[i].count); 1175 percpu_counter_destroy(&mp->m_icount); 1176 percpu_counter_destroy(&mp->m_ifree); 1177 ASSERT(xfs_is_shutdown(mp) || 1178 percpu_counter_sum(&mp->m_delalloc_rtextents) == 0); 1179 percpu_counter_destroy(&mp->m_delalloc_rtextents); 1180 ASSERT(xfs_is_shutdown(mp) || 1181 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1182 percpu_counter_destroy(&mp->m_delalloc_blks); 1183 } 1184 1185 static int 1186 xfs_inodegc_init_percpu( 1187 struct xfs_mount *mp) 1188 { 1189 struct xfs_inodegc *gc; 1190 int cpu; 1191 1192 mp->m_inodegc = alloc_percpu(struct xfs_inodegc); 1193 if (!mp->m_inodegc) 1194 return -ENOMEM; 1195 1196 for_each_possible_cpu(cpu) { 1197 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1198 gc->cpu = cpu; 1199 gc->mp = mp; 1200 init_llist_head(&gc->list); 1201 gc->items = 0; 1202 gc->error = 0; 1203 INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); 1204 } 1205 return 0; 1206 } 1207 1208 static void 1209 xfs_inodegc_free_percpu( 1210 struct xfs_mount *mp) 1211 { 1212 if (!mp->m_inodegc) 1213 return; 1214 free_percpu(mp->m_inodegc); 1215 } 1216 1217 static void 1218 xfs_fs_put_super( 1219 struct super_block *sb) 1220 { 1221 struct xfs_mount *mp = XFS_M(sb); 1222 1223 xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); 1224 xfs_filestream_unmount(mp); 1225 xfs_unmountfs(mp); 1226 1227 xfs_rtmount_freesb(mp); 1228 xfs_freesb(mp); 1229 xchk_mount_stats_free(mp); 1230 free_percpu(mp->m_stats.xs_stats); 1231 xfs_inodegc_free_percpu(mp); 1232 xfs_destroy_percpu_counters(mp); 1233 xfs_destroy_mount_workqueues(mp); 1234 xfs_shutdown_devices(mp); 1235 } 1236 1237 static long 1238 xfs_fs_nr_cached_objects( 1239 struct super_block *sb, 1240 struct shrink_control *sc) 1241 { 1242 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1243 if (WARN_ON_ONCE(!sb->s_fs_info)) 1244 return 0; 1245 return xfs_reclaim_inodes_count(XFS_M(sb)); 1246 } 1247 1248 static long 1249 xfs_fs_free_cached_objects( 1250 struct super_block *sb, 1251 struct shrink_control *sc) 1252 { 1253 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1254 } 1255 1256 static void 1257 xfs_fs_shutdown( 1258 struct super_block *sb) 1259 { 1260 xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); 1261 } 1262 1263 static int 1264 xfs_fs_show_stats( 1265 struct seq_file *m, 1266 struct dentry *root) 1267 { 1268 struct xfs_mount *mp = XFS_M(root->d_sb); 1269 1270 if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT)) 1271 xfs_zoned_show_stats(m, mp); 1272 return 0; 1273 } 1274 1275 static void 1276 xfs_fs_report_error( 1277 const struct fserror_event *event) 1278 { 1279 /* healthmon already knows about non-inode and metadata errors */ 1280 if (event->inode && event->type != FSERR_METADATA) 1281 xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event); 1282 } 1283 1284 static const struct super_operations xfs_super_operations = { 1285 .alloc_inode = xfs_fs_alloc_inode, 1286 .destroy_inode = xfs_fs_destroy_inode, 1287 .drop_inode = xfs_fs_drop_inode, 1288 .evict_inode = xfs_fs_evict_inode, 1289 .put_super = xfs_fs_put_super, 1290 .sync_fs = xfs_fs_sync_fs, 1291 .freeze_fs = xfs_fs_freeze, 1292 .unfreeze_fs = xfs_fs_unfreeze, 1293 .statfs = xfs_fs_statfs, 1294 .show_options = xfs_fs_show_options, 1295 .nr_cached_objects = xfs_fs_nr_cached_objects, 1296 .free_cached_objects = xfs_fs_free_cached_objects, 1297 .shutdown = xfs_fs_shutdown, 1298 .show_stats = xfs_fs_show_stats, 1299 .report_error = xfs_fs_report_error, 1300 }; 1301 1302 static int 1303 suffix_kstrtoint( 1304 const char *s, 1305 unsigned int base, 1306 int *res) 1307 { 1308 int last, shift_left_factor = 0, _res; 1309 char *value; 1310 int ret = 0; 1311 1312 value = kstrdup(s, GFP_KERNEL); 1313 if (!value) 1314 return -ENOMEM; 1315 1316 last = strlen(value) - 1; 1317 if (value[last] == 'K' || value[last] == 'k') { 1318 shift_left_factor = 10; 1319 value[last] = '\0'; 1320 } 1321 if (value[last] == 'M' || value[last] == 'm') { 1322 shift_left_factor = 20; 1323 value[last] = '\0'; 1324 } 1325 if (value[last] == 'G' || value[last] == 'g') { 1326 shift_left_factor = 30; 1327 value[last] = '\0'; 1328 } 1329 1330 if (kstrtoint(value, base, &_res)) 1331 ret = -EINVAL; 1332 kfree(value); 1333 *res = _res << shift_left_factor; 1334 return ret; 1335 } 1336 1337 static int 1338 suffix_kstrtoull( 1339 const char *s, 1340 unsigned int base, 1341 unsigned long long *res) 1342 { 1343 int last, shift_left_factor = 0; 1344 unsigned long long _res; 1345 char *value; 1346 int ret = 0; 1347 1348 value = kstrdup(s, GFP_KERNEL); 1349 if (!value) 1350 return -ENOMEM; 1351 1352 last = strlen(value) - 1; 1353 if (value[last] == 'K' || value[last] == 'k') { 1354 shift_left_factor = 10; 1355 value[last] = '\0'; 1356 } 1357 if (value[last] == 'M' || value[last] == 'm') { 1358 shift_left_factor = 20; 1359 value[last] = '\0'; 1360 } 1361 if (value[last] == 'G' || value[last] == 'g') { 1362 shift_left_factor = 30; 1363 value[last] = '\0'; 1364 } 1365 1366 if (kstrtoull(value, base, &_res)) 1367 ret = -EINVAL; 1368 kfree(value); 1369 *res = _res << shift_left_factor; 1370 return ret; 1371 } 1372 1373 static inline void 1374 xfs_fs_warn_deprecated( 1375 struct fs_context *fc, 1376 struct fs_parameter *param) 1377 { 1378 /* 1379 * Always warn about someone passing in a deprecated mount option. 1380 * Previously we wouldn't print the warning if we were reconfiguring 1381 * and current mount point already had the flag set, but that was not 1382 * the right thing to do. 1383 * 1384 * Many distributions mount the root filesystem with no options in the 1385 * initramfs and rely on mount -a to remount the root fs with the 1386 * options in fstab. However, the old behavior meant that there would 1387 * never be a warning about deprecated mount options for the root fs in 1388 * /etc/fstab. On a single-fs system, that means no warning at all. 1389 * 1390 * Compounding this problem are distribution scripts that copy 1391 * /proc/mounts to fstab, which means that we can't remove mount 1392 * options unless we're 100% sure they have only ever been advertised 1393 * in /proc/mounts in response to explicitly provided mount options. 1394 */ 1395 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1396 } 1397 1398 /* 1399 * Set mount state from a mount option. 1400 * 1401 * NOTE: mp->m_super is NULL here! 1402 */ 1403 static int 1404 xfs_fs_parse_param( 1405 struct fs_context *fc, 1406 struct fs_parameter *param) 1407 { 1408 struct xfs_mount *parsing_mp = fc->s_fs_info; 1409 struct fs_parse_result result; 1410 int size = 0; 1411 int opt; 1412 1413 BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL); 1414 1415 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1416 if (opt < 0) 1417 return opt; 1418 1419 switch (opt) { 1420 case Op_deprecated: 1421 xfs_fs_warn_deprecated(fc, param); 1422 return 0; 1423 case Opt_logbufs: 1424 parsing_mp->m_logbufs = result.uint_32; 1425 return 0; 1426 case Opt_logbsize: 1427 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1428 return -EINVAL; 1429 return 0; 1430 case Opt_logdev: 1431 kfree(parsing_mp->m_logname); 1432 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1433 if (!parsing_mp->m_logname) 1434 return -ENOMEM; 1435 return 0; 1436 case Opt_rtdev: 1437 kfree(parsing_mp->m_rtname); 1438 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1439 if (!parsing_mp->m_rtname) 1440 return -ENOMEM; 1441 return 0; 1442 case Opt_allocsize: 1443 if (suffix_kstrtoint(param->string, 10, &size)) 1444 return -EINVAL; 1445 parsing_mp->m_allocsize_log = ffs(size) - 1; 1446 parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; 1447 return 0; 1448 case Opt_grpid: 1449 case Opt_bsdgroups: 1450 parsing_mp->m_features |= XFS_FEAT_GRPID; 1451 return 0; 1452 case Opt_nogrpid: 1453 case Opt_sysvgroups: 1454 parsing_mp->m_features &= ~XFS_FEAT_GRPID; 1455 return 0; 1456 case Opt_wsync: 1457 parsing_mp->m_features |= XFS_FEAT_WSYNC; 1458 return 0; 1459 case Opt_norecovery: 1460 parsing_mp->m_features |= XFS_FEAT_NORECOVERY; 1461 return 0; 1462 case Opt_noalign: 1463 parsing_mp->m_features |= XFS_FEAT_NOALIGN; 1464 return 0; 1465 case Opt_swalloc: 1466 parsing_mp->m_features |= XFS_FEAT_SWALLOC; 1467 return 0; 1468 case Opt_sunit: 1469 parsing_mp->m_dalign = result.uint_32; 1470 return 0; 1471 case Opt_swidth: 1472 parsing_mp->m_swidth = result.uint_32; 1473 return 0; 1474 case Opt_inode32: 1475 parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; 1476 return 0; 1477 case Opt_inode64: 1478 parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1479 return 0; 1480 case Opt_nouuid: 1481 parsing_mp->m_features |= XFS_FEAT_NOUUID; 1482 return 0; 1483 case Opt_largeio: 1484 parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; 1485 return 0; 1486 case Opt_nolargeio: 1487 parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; 1488 return 0; 1489 case Opt_filestreams: 1490 parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; 1491 return 0; 1492 case Opt_noquota: 1493 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1494 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1495 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1496 return 0; 1497 case Opt_quota: 1498 case Opt_uquota: 1499 case Opt_usrquota: 1500 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); 1501 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1502 return 0; 1503 case Opt_qnoenforce: 1504 case Opt_uqnoenforce: 1505 parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; 1506 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1507 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1508 return 0; 1509 case Opt_pquota: 1510 case Opt_prjquota: 1511 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); 1512 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1513 return 0; 1514 case Opt_pqnoenforce: 1515 parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; 1516 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1517 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1518 return 0; 1519 case Opt_gquota: 1520 case Opt_grpquota: 1521 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); 1522 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1523 return 0; 1524 case Opt_gqnoenforce: 1525 parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; 1526 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1527 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1528 return 0; 1529 case Opt_discard: 1530 parsing_mp->m_features |= XFS_FEAT_DISCARD; 1531 return 0; 1532 case Opt_nodiscard: 1533 parsing_mp->m_features &= ~XFS_FEAT_DISCARD; 1534 return 0; 1535 #ifdef CONFIG_FS_DAX 1536 case Opt_dax: 1537 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1538 return 0; 1539 case Opt_dax_enum: 1540 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1541 return 0; 1542 #endif 1543 case Opt_max_open_zones: 1544 parsing_mp->m_max_open_zones = result.uint_32; 1545 return 0; 1546 case Opt_lifetime: 1547 parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME; 1548 return 0; 1549 case Opt_nolifetime: 1550 parsing_mp->m_features |= XFS_FEAT_NOLIFETIME; 1551 return 0; 1552 case Opt_max_atomic_write: 1553 if (suffix_kstrtoull(param->string, 10, 1554 &parsing_mp->m_awu_max_bytes)) { 1555 xfs_warn(parsing_mp, 1556 "max atomic write size must be positive integer"); 1557 return -EINVAL; 1558 } 1559 return 0; 1560 case Opt_errortag: 1561 return xfs_errortag_add_name(parsing_mp, param->string); 1562 default: 1563 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1564 return -EINVAL; 1565 } 1566 1567 return 0; 1568 } 1569 1570 static int 1571 xfs_fs_validate_params( 1572 struct xfs_mount *mp) 1573 { 1574 /* No recovery flag requires a read-only mount */ 1575 if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { 1576 xfs_warn(mp, "no-recovery mounts must be read-only."); 1577 return -EINVAL; 1578 } 1579 1580 if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { 1581 xfs_warn(mp, 1582 "sunit and swidth options incompatible with the noalign option"); 1583 return -EINVAL; 1584 } 1585 1586 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && 1587 (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) { 1588 xfs_warn(mp, "quota support not available in this kernel."); 1589 return -EINVAL; 1590 } 1591 1592 if ((mp->m_dalign && !mp->m_swidth) || 1593 (!mp->m_dalign && mp->m_swidth)) { 1594 xfs_warn(mp, "sunit and swidth must be specified together"); 1595 return -EINVAL; 1596 } 1597 1598 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1599 xfs_warn(mp, 1600 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1601 mp->m_swidth, mp->m_dalign); 1602 return -EINVAL; 1603 } 1604 1605 if (mp->m_logbufs != -1 && 1606 mp->m_logbufs != 0 && 1607 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1608 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1609 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1610 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1611 return -EINVAL; 1612 } 1613 1614 if (mp->m_logbsize != -1 && 1615 mp->m_logbsize != 0 && 1616 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1617 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1618 !is_power_of_2(mp->m_logbsize))) { 1619 xfs_warn(mp, 1620 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1621 mp->m_logbsize); 1622 return -EINVAL; 1623 } 1624 1625 if (xfs_has_allocsize(mp) && 1626 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1627 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1628 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1629 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1630 return -EINVAL; 1631 } 1632 1633 return 0; 1634 } 1635 1636 struct dentry * 1637 xfs_debugfs_mkdir( 1638 const char *name, 1639 struct dentry *parent) 1640 { 1641 struct dentry *child; 1642 1643 /* Apparently we're expected to ignore error returns?? */ 1644 child = debugfs_create_dir(name, parent); 1645 if (IS_ERR(child)) 1646 return NULL; 1647 1648 return child; 1649 } 1650 1651 static int 1652 xfs_fs_fill_super( 1653 struct super_block *sb, 1654 struct fs_context *fc) 1655 { 1656 struct xfs_mount *mp = sb->s_fs_info; 1657 struct inode *root; 1658 int flags = 0, error; 1659 1660 mp->m_super = sb; 1661 1662 /* 1663 * Copy VFS mount flags from the context now that all parameter parsing 1664 * is guaranteed to have been completed by either the old mount API or 1665 * the newer fsopen/fsconfig API. 1666 */ 1667 if (fc->sb_flags & SB_RDONLY) 1668 xfs_set_readonly(mp); 1669 if (fc->sb_flags & SB_DIRSYNC) 1670 mp->m_features |= XFS_FEAT_DIRSYNC; 1671 if (fc->sb_flags & SB_SYNCHRONOUS) 1672 mp->m_features |= XFS_FEAT_WSYNC; 1673 1674 error = xfs_fs_validate_params(mp); 1675 if (error) 1676 return error; 1677 1678 if (!sb_min_blocksize(sb, BBSIZE)) { 1679 xfs_err(mp, "unable to set blocksize"); 1680 return -EINVAL; 1681 } 1682 sb->s_xattr = xfs_xattr_handlers; 1683 sb->s_export_op = &xfs_export_operations; 1684 #ifdef CONFIG_XFS_QUOTA 1685 sb->s_qcop = &xfs_quotactl_operations; 1686 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1687 #endif 1688 sb->s_op = &xfs_super_operations; 1689 1690 /* 1691 * Delay mount work if the debug hook is set. This is debug 1692 * instrumention to coordinate simulation of xfs mount failures with 1693 * VFS superblock operations 1694 */ 1695 if (xfs_globals.mount_delay) { 1696 xfs_notice(mp, "Delaying mount for %d seconds.", 1697 xfs_globals.mount_delay); 1698 msleep(xfs_globals.mount_delay * 1000); 1699 } 1700 1701 if (fc->sb_flags & SB_SILENT) 1702 flags |= XFS_MFSI_QUIET; 1703 1704 error = xfs_open_devices(mp); 1705 if (error) 1706 return error; 1707 1708 if (xfs_debugfs) { 1709 mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id, 1710 xfs_debugfs); 1711 } else { 1712 mp->m_debugfs = NULL; 1713 } 1714 1715 error = xfs_init_mount_workqueues(mp); 1716 if (error) 1717 goto out_shutdown_devices; 1718 1719 error = xfs_init_percpu_counters(mp); 1720 if (error) 1721 goto out_destroy_workqueues; 1722 1723 error = xfs_inodegc_init_percpu(mp); 1724 if (error) 1725 goto out_destroy_counters; 1726 1727 /* Allocate stats memory before we do operations that might use it */ 1728 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1729 if (!mp->m_stats.xs_stats) { 1730 error = -ENOMEM; 1731 goto out_destroy_inodegc; 1732 } 1733 1734 error = xchk_mount_stats_alloc(mp); 1735 if (error) 1736 goto out_free_stats; 1737 1738 error = xfs_readsb(mp, flags); 1739 if (error) 1740 goto out_free_scrub_stats; 1741 1742 error = xfs_finish_flags(mp); 1743 if (error) 1744 goto out_free_sb; 1745 1746 error = xfs_setup_devices(mp); 1747 if (error) 1748 goto out_free_sb; 1749 1750 /* 1751 * V4 support is undergoing deprecation. 1752 * 1753 * Note: this has to use an open coded m_features check as xfs_has_crc 1754 * always returns false for !CONFIG_XFS_SUPPORT_V4. 1755 */ 1756 if (!(mp->m_features & XFS_FEAT_CRC)) { 1757 if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) { 1758 xfs_warn(mp, 1759 "Deprecated V4 format (crc=0) not supported by kernel."); 1760 error = -EINVAL; 1761 goto out_free_sb; 1762 } 1763 xfs_warn_once(mp, 1764 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1765 } 1766 1767 /* ASCII case insensitivity is undergoing deprecation. */ 1768 if (xfs_has_asciici(mp)) { 1769 #ifdef CONFIG_XFS_SUPPORT_ASCII_CI 1770 xfs_warn_once(mp, 1771 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); 1772 #else 1773 xfs_warn(mp, 1774 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel."); 1775 error = -EINVAL; 1776 goto out_free_sb; 1777 #endif 1778 } 1779 1780 /* 1781 * Filesystem claims it needs repair, so refuse the mount unless 1782 * norecovery is also specified, in which case the filesystem can 1783 * be mounted with no risk of further damage. 1784 */ 1785 if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) { 1786 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1787 error = -EFSCORRUPTED; 1788 goto out_free_sb; 1789 } 1790 1791 /* 1792 * Don't touch the filesystem if a user tool thinks it owns the primary 1793 * superblock. mkfs doesn't clear the flag from secondary supers, so 1794 * we don't check them at all. 1795 */ 1796 if (mp->m_sb.sb_inprogress) { 1797 xfs_warn(mp, "Offline file system operation in progress!"); 1798 error = -EFSCORRUPTED; 1799 goto out_free_sb; 1800 } 1801 1802 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1803 size_t max_folio_size = mapping_max_folio_size_supported(); 1804 1805 if (!xfs_has_crc(mp)) { 1806 xfs_warn(mp, 1807 "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.", 1808 mp->m_sb.sb_blocksize, PAGE_SIZE); 1809 error = -ENOSYS; 1810 goto out_free_sb; 1811 } 1812 1813 if (mp->m_sb.sb_blocksize > max_folio_size) { 1814 xfs_warn(mp, 1815 "block size (%u bytes) not supported; Only block size (%zu) or less is supported", 1816 mp->m_sb.sb_blocksize, max_folio_size); 1817 error = -ENOSYS; 1818 goto out_free_sb; 1819 } 1820 } 1821 1822 /* Ensure this filesystem fits in the page cache limits */ 1823 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1824 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1825 xfs_warn(mp, 1826 "file system too large to be mounted on this system."); 1827 error = -EFBIG; 1828 goto out_free_sb; 1829 } 1830 1831 /* 1832 * XFS block mappings use 54 bits to store the logical block offset. 1833 * This should suffice to handle the maximum file size that the VFS 1834 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1835 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1836 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1837 * to check this assertion. 1838 * 1839 * Avoid integer overflow by comparing the maximum bmbt offset to the 1840 * maximum pagecache offset in units of fs blocks. 1841 */ 1842 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1843 xfs_warn(mp, 1844 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1845 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1846 XFS_MAX_FILEOFF); 1847 error = -EINVAL; 1848 goto out_free_sb; 1849 } 1850 1851 error = xfs_rtmount_readsb(mp); 1852 if (error) 1853 goto out_free_sb; 1854 1855 error = xfs_filestream_mount(mp); 1856 if (error) 1857 goto out_free_rtsb; 1858 1859 /* 1860 * we must configure the block size in the superblock before we run the 1861 * full mount process as the mount process can lookup and cache inodes. 1862 */ 1863 sb->s_magic = XFS_SUPER_MAGIC; 1864 sb->s_blocksize = mp->m_sb.sb_blocksize; 1865 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1866 sb->s_maxbytes = MAX_LFS_FILESIZE; 1867 sb->s_max_links = XFS_MAXLINK; 1868 sb->s_time_gran = 1; 1869 if (xfs_has_bigtime(mp)) { 1870 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1871 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1872 } else { 1873 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1874 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1875 } 1876 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1877 sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM; 1878 1879 set_posix_acl_flag(sb); 1880 1881 /* version 5 superblocks support inode version counters. */ 1882 if (xfs_has_crc(mp)) 1883 sb->s_flags |= SB_I_VERSION; 1884 1885 if (xfs_has_dax_always(mp)) { 1886 error = xfs_setup_dax_always(mp); 1887 if (error) 1888 goto out_filestream_unmount; 1889 } 1890 1891 if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { 1892 xfs_warn(mp, 1893 "mounting with \"discard\" option, but the device does not support discard"); 1894 mp->m_features &= ~XFS_FEAT_DISCARD; 1895 } 1896 1897 if (xfs_has_zoned(mp)) { 1898 if (!xfs_has_metadir(mp)) { 1899 xfs_alert(mp, 1900 "metadir feature required for zoned realtime devices."); 1901 error = -EINVAL; 1902 goto out_filestream_unmount; 1903 } 1904 xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED); 1905 } 1906 1907 if (xfs_has_reflink(mp)) { 1908 if (xfs_has_realtime(mp) && 1909 !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) { 1910 xfs_alert(mp, 1911 "reflink not compatible with realtime extent size %u!", 1912 mp->m_sb.sb_rextsize); 1913 error = -EINVAL; 1914 goto out_filestream_unmount; 1915 } 1916 1917 if (xfs_has_zoned(mp)) { 1918 xfs_alert(mp, 1919 "reflink not compatible with zoned RT device!"); 1920 error = -EINVAL; 1921 goto out_filestream_unmount; 1922 } 1923 1924 if (xfs_globals.always_cow) { 1925 xfs_info(mp, "using DEBUG-only always_cow mode."); 1926 mp->m_always_cow = true; 1927 } 1928 } 1929 1930 /* 1931 * If no quota mount options were provided, maybe we'll try to pick 1932 * up the quota accounting and enforcement flags from the ondisk sb. 1933 */ 1934 if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS)) 1935 xfs_set_resuming_quotaon(mp); 1936 mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; 1937 1938 error = xfs_mountfs(mp); 1939 if (error) 1940 goto out_filestream_unmount; 1941 1942 root = igrab(VFS_I(mp->m_rootip)); 1943 if (!root) { 1944 error = -ENOENT; 1945 goto out_unmount; 1946 } 1947 sb->s_root = d_make_root(root); 1948 if (!sb->s_root) { 1949 error = -ENOMEM; 1950 goto out_unmount; 1951 } 1952 1953 return 0; 1954 1955 out_filestream_unmount: 1956 xfs_filestream_unmount(mp); 1957 out_free_rtsb: 1958 xfs_rtmount_freesb(mp); 1959 out_free_sb: 1960 xfs_freesb(mp); 1961 out_free_scrub_stats: 1962 xchk_mount_stats_free(mp); 1963 out_free_stats: 1964 free_percpu(mp->m_stats.xs_stats); 1965 out_destroy_inodegc: 1966 xfs_inodegc_free_percpu(mp); 1967 out_destroy_counters: 1968 xfs_destroy_percpu_counters(mp); 1969 out_destroy_workqueues: 1970 xfs_destroy_mount_workqueues(mp); 1971 out_shutdown_devices: 1972 xfs_shutdown_devices(mp); 1973 return error; 1974 1975 out_unmount: 1976 xfs_filestream_unmount(mp); 1977 xfs_unmountfs(mp); 1978 goto out_free_rtsb; 1979 } 1980 1981 static int 1982 xfs_fs_get_tree( 1983 struct fs_context *fc) 1984 { 1985 return get_tree_bdev(fc, xfs_fs_fill_super); 1986 } 1987 1988 static int 1989 xfs_remount_rw( 1990 struct xfs_mount *mp) 1991 { 1992 struct xfs_sb *sbp = &mp->m_sb; 1993 int error; 1994 1995 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && 1996 xfs_readonly_buftarg(mp->m_logdev_targp)) { 1997 xfs_warn(mp, 1998 "ro->rw transition prohibited by read-only logdev"); 1999 return -EACCES; 2000 } 2001 2002 if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { 2003 xfs_warn(mp, 2004 "ro->rw transition prohibited by read-only rtdev"); 2005 return -EACCES; 2006 } 2007 2008 if (xfs_has_norecovery(mp)) { 2009 xfs_warn(mp, 2010 "ro->rw transition prohibited on norecovery mount"); 2011 return -EINVAL; 2012 } 2013 2014 if (xfs_sb_is_v5(sbp) && 2015 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 2016 xfs_warn(mp, 2017 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 2018 (sbp->sb_features_ro_compat & 2019 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 2020 return -EINVAL; 2021 } 2022 2023 xfs_clear_readonly(mp); 2024 2025 /* 2026 * If this is the first remount to writeable state we might have some 2027 * superblock changes to update. 2028 */ 2029 if (mp->m_update_sb) { 2030 error = xfs_sync_sb(mp, false); 2031 if (error) { 2032 xfs_warn(mp, "failed to write sb changes"); 2033 return error; 2034 } 2035 mp->m_update_sb = false; 2036 } 2037 2038 /* 2039 * Fill out the reserve pool if it is empty. Use the stashed value if 2040 * it is non-zero, otherwise go with the default. 2041 */ 2042 xfs_restore_resvblks(mp); 2043 xfs_log_work_queue(mp); 2044 xfs_blockgc_start(mp); 2045 2046 /* Create the per-AG metadata reservation pool .*/ 2047 error = xfs_fs_reserve_ag_blocks(mp); 2048 if (error && error != -ENOSPC) 2049 return error; 2050 2051 /* Re-enable the background inode inactivation worker. */ 2052 xfs_inodegc_start(mp); 2053 2054 /* Restart zone reclaim */ 2055 xfs_zone_gc_start(mp); 2056 2057 return 0; 2058 } 2059 2060 static int 2061 xfs_remount_ro( 2062 struct xfs_mount *mp) 2063 { 2064 struct xfs_icwalk icw = { 2065 .icw_flags = XFS_ICWALK_FLAG_SYNC, 2066 }; 2067 int error; 2068 2069 /* Flush all the dirty data to disk. */ 2070 error = sync_filesystem(mp->m_super); 2071 if (error) 2072 return error; 2073 2074 /* 2075 * Cancel background eofb scanning so it cannot race with the final 2076 * log force+buftarg wait and deadlock the remount. 2077 */ 2078 xfs_blockgc_stop(mp); 2079 2080 /* 2081 * Clear out all remaining COW staging extents and speculative post-EOF 2082 * preallocations so that we don't leave inodes requiring inactivation 2083 * cleanups during reclaim on a read-only mount. We must process every 2084 * cached inode, so this requires a synchronous cache scan. 2085 */ 2086 error = xfs_blockgc_free_space(mp, &icw); 2087 if (error) { 2088 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2089 return error; 2090 } 2091 2092 /* 2093 * Stop the inodegc background worker. xfs_fs_reconfigure already 2094 * flushed all pending inodegc work when it sync'd the filesystem. 2095 * The VFS holds s_umount, so we know that inodes cannot enter 2096 * xfs_fs_destroy_inode during a remount operation. In readonly mode 2097 * we send inodes straight to reclaim, so no inodes will be queued. 2098 */ 2099 xfs_inodegc_stop(mp); 2100 2101 /* Stop zone reclaim */ 2102 xfs_zone_gc_stop(mp); 2103 2104 /* Free the per-AG metadata reservation pool. */ 2105 xfs_fs_unreserve_ag_blocks(mp); 2106 2107 /* 2108 * Before we sync the metadata, we need to free up the reserve block 2109 * pool so that the used block count in the superblock on disk is 2110 * correct at the end of the remount. Stash the current* reserve pool 2111 * size so that if we get remounted rw, we can return it to the same 2112 * size. 2113 */ 2114 xfs_save_resvblks(mp); 2115 2116 xfs_log_clean(mp); 2117 xfs_set_readonly(mp); 2118 2119 return 0; 2120 } 2121 2122 /* 2123 * Logically we would return an error here to prevent users from believing 2124 * they might have changed mount options using remount which can't be changed. 2125 * 2126 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 2127 * arguments in some cases so we can't blindly reject options, but have to 2128 * check for each specified option if it actually differs from the currently 2129 * set option and only reject it if that's the case. 2130 * 2131 * Until that is implemented we return success for every remount request, and 2132 * silently ignore all options that we can't actually change. 2133 */ 2134 static int 2135 xfs_fs_reconfigure( 2136 struct fs_context *fc) 2137 { 2138 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 2139 struct xfs_mount *new_mp = fc->s_fs_info; 2140 int flags = fc->sb_flags; 2141 int error; 2142 2143 new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; 2144 2145 /* version 5 superblocks always support version counters. */ 2146 if (xfs_has_crc(mp)) 2147 fc->sb_flags |= SB_I_VERSION; 2148 2149 error = xfs_fs_validate_params(new_mp); 2150 if (error) 2151 return error; 2152 2153 xfs_errortag_copy(mp, new_mp); 2154 2155 /* Validate new max_atomic_write option before making other changes */ 2156 if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) { 2157 error = xfs_set_max_atomic_write_opt(mp, 2158 new_mp->m_awu_max_bytes); 2159 if (error) 2160 return error; 2161 } 2162 2163 /* inode32 -> inode64 */ 2164 if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { 2165 mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 2166 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 2167 } 2168 2169 /* inode64 -> inode32 */ 2170 if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { 2171 mp->m_features |= XFS_FEAT_SMALL_INUMS; 2172 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 2173 } 2174 2175 /* 2176 * Now that mp has been modified according to the remount options, we 2177 * do a final option validation with xfs_finish_flags() just like it is 2178 * just like it is done during mount. We cannot use 2179 * done during mount. We cannot use xfs_finish_flags() on new_mp as it 2180 * contains only the user given options. 2181 */ 2182 error = xfs_finish_flags(mp); 2183 if (error) 2184 return error; 2185 2186 /* ro -> rw */ 2187 if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { 2188 error = xfs_remount_rw(mp); 2189 if (error) 2190 return error; 2191 } 2192 2193 /* rw -> ro */ 2194 if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { 2195 error = xfs_remount_ro(mp); 2196 if (error) 2197 return error; 2198 } 2199 2200 return 0; 2201 } 2202 2203 static void 2204 xfs_fs_free( 2205 struct fs_context *fc) 2206 { 2207 struct xfs_mount *mp = fc->s_fs_info; 2208 2209 /* 2210 * mp is stored in the fs_context when it is initialized. 2211 * mp is transferred to the superblock on a successful mount, 2212 * but if an error occurs before the transfer we have to free 2213 * it here. 2214 */ 2215 if (mp) 2216 xfs_mount_free(mp); 2217 } 2218 2219 static const struct fs_context_operations xfs_context_ops = { 2220 .parse_param = xfs_fs_parse_param, 2221 .get_tree = xfs_fs_get_tree, 2222 .reconfigure = xfs_fs_reconfigure, 2223 .free = xfs_fs_free, 2224 }; 2225 2226 /* 2227 * WARNING: do not initialise any parameters in this function that depend on 2228 * mount option parsing having already been performed as this can be called from 2229 * fsopen() before any parameters have been set. 2230 */ 2231 static int 2232 xfs_init_fs_context( 2233 struct fs_context *fc) 2234 { 2235 struct xfs_mount *mp; 2236 int i; 2237 2238 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 2239 if (!mp) 2240 return -ENOMEM; 2241 #ifdef DEBUG 2242 mp->m_errortag = kcalloc(XFS_ERRTAG_MAX, sizeof(*mp->m_errortag), 2243 GFP_KERNEL); 2244 if (!mp->m_errortag) { 2245 kfree(mp); 2246 return -ENOMEM; 2247 } 2248 #endif 2249 2250 spin_lock_init(&mp->m_sb_lock); 2251 for (i = 0; i < XG_TYPE_MAX; i++) 2252 xa_init(&mp->m_groups[i].xa); 2253 mutex_init(&mp->m_growlock); 2254 mutex_init(&mp->m_metafile_resv_lock); 2255 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 2256 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 2257 mp->m_kobj.kobject.kset = xfs_kset; 2258 /* 2259 * We don't create the finobt per-ag space reservation until after log 2260 * recovery, so we must set this to true so that an ifree transaction 2261 * started during log recovery will not depend on space reservations 2262 * for finobt expansion. 2263 */ 2264 mp->m_finobt_nores = true; 2265 2266 /* 2267 * These can be overridden by the mount option parsing. 2268 */ 2269 mp->m_logbufs = -1; 2270 mp->m_logbsize = -1; 2271 mp->m_allocsize_log = 16; /* 64k */ 2272 2273 xfs_hooks_init(&mp->m_dir_update_hooks); 2274 2275 fc->s_fs_info = mp; 2276 fc->ops = &xfs_context_ops; 2277 2278 return 0; 2279 } 2280 2281 static void 2282 xfs_kill_sb( 2283 struct super_block *sb) 2284 { 2285 kill_block_super(sb); 2286 xfs_mount_free(XFS_M(sb)); 2287 } 2288 2289 static struct file_system_type xfs_fs_type = { 2290 .owner = THIS_MODULE, 2291 .name = "xfs", 2292 .init_fs_context = xfs_init_fs_context, 2293 .parameters = xfs_fs_parameters, 2294 .kill_sb = xfs_kill_sb, 2295 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME | 2296 FS_LBS, 2297 }; 2298 MODULE_ALIAS_FS("xfs"); 2299 2300 STATIC int __init 2301 xfs_init_caches(void) 2302 { 2303 int error; 2304 2305 xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, 2306 SLAB_HWCACHE_ALIGN | 2307 SLAB_RECLAIM_ACCOUNT, 2308 NULL); 2309 if (!xfs_buf_cache) 2310 goto out; 2311 2312 xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", 2313 sizeof(struct xlog_ticket), 2314 0, 0, NULL); 2315 if (!xfs_log_ticket_cache) 2316 goto out_destroy_buf_cache; 2317 2318 error = xfs_btree_init_cur_caches(); 2319 if (error) 2320 goto out_destroy_log_ticket_cache; 2321 2322 error = rcbagbt_init_cur_cache(); 2323 if (error) 2324 goto out_destroy_btree_cur_cache; 2325 2326 error = xfs_defer_init_item_caches(); 2327 if (error) 2328 goto out_destroy_rcbagbt_cur_cache; 2329 2330 xfs_da_state_cache = kmem_cache_create("xfs_da_state", 2331 sizeof(struct xfs_da_state), 2332 0, 0, NULL); 2333 if (!xfs_da_state_cache) 2334 goto out_destroy_defer_item_cache; 2335 2336 xfs_ifork_cache = kmem_cache_create("xfs_ifork", 2337 sizeof(struct xfs_ifork), 2338 0, 0, NULL); 2339 if (!xfs_ifork_cache) 2340 goto out_destroy_da_state_cache; 2341 2342 xfs_trans_cache = kmem_cache_create("xfs_trans", 2343 sizeof(struct xfs_trans), 2344 0, 0, NULL); 2345 if (!xfs_trans_cache) 2346 goto out_destroy_ifork_cache; 2347 2348 2349 /* 2350 * The size of the cache-allocated buf log item is the maximum 2351 * size possible under XFS. This wastes a little bit of memory, 2352 * but it is much faster. 2353 */ 2354 xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", 2355 sizeof(struct xfs_buf_log_item), 2356 0, 0, NULL); 2357 if (!xfs_buf_item_cache) 2358 goto out_destroy_trans_cache; 2359 2360 xfs_efd_cache = kmem_cache_create("xfs_efd_item", 2361 xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), 2362 0, 0, NULL); 2363 if (!xfs_efd_cache) 2364 goto out_destroy_buf_item_cache; 2365 2366 xfs_efi_cache = kmem_cache_create("xfs_efi_item", 2367 xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), 2368 0, 0, NULL); 2369 if (!xfs_efi_cache) 2370 goto out_destroy_efd_cache; 2371 2372 xfs_inode_cache = kmem_cache_create("xfs_inode", 2373 sizeof(struct xfs_inode), 0, 2374 (SLAB_HWCACHE_ALIGN | 2375 SLAB_RECLAIM_ACCOUNT | 2376 SLAB_ACCOUNT), 2377 xfs_fs_inode_init_once); 2378 if (!xfs_inode_cache) 2379 goto out_destroy_efi_cache; 2380 2381 xfs_ili_cache = kmem_cache_create("xfs_ili", 2382 sizeof(struct xfs_inode_log_item), 0, 2383 SLAB_RECLAIM_ACCOUNT, 2384 NULL); 2385 if (!xfs_ili_cache) 2386 goto out_destroy_inode_cache; 2387 2388 xfs_icreate_cache = kmem_cache_create("xfs_icr", 2389 sizeof(struct xfs_icreate_item), 2390 0, 0, NULL); 2391 if (!xfs_icreate_cache) 2392 goto out_destroy_ili_cache; 2393 2394 xfs_rud_cache = kmem_cache_create("xfs_rud_item", 2395 sizeof(struct xfs_rud_log_item), 2396 0, 0, NULL); 2397 if (!xfs_rud_cache) 2398 goto out_destroy_icreate_cache; 2399 2400 xfs_rui_cache = kmem_cache_create("xfs_rui_item", 2401 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2402 0, 0, NULL); 2403 if (!xfs_rui_cache) 2404 goto out_destroy_rud_cache; 2405 2406 xfs_cud_cache = kmem_cache_create("xfs_cud_item", 2407 sizeof(struct xfs_cud_log_item), 2408 0, 0, NULL); 2409 if (!xfs_cud_cache) 2410 goto out_destroy_rui_cache; 2411 2412 xfs_cui_cache = kmem_cache_create("xfs_cui_item", 2413 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2414 0, 0, NULL); 2415 if (!xfs_cui_cache) 2416 goto out_destroy_cud_cache; 2417 2418 xfs_bud_cache = kmem_cache_create("xfs_bud_item", 2419 sizeof(struct xfs_bud_log_item), 2420 0, 0, NULL); 2421 if (!xfs_bud_cache) 2422 goto out_destroy_cui_cache; 2423 2424 xfs_bui_cache = kmem_cache_create("xfs_bui_item", 2425 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2426 0, 0, NULL); 2427 if (!xfs_bui_cache) 2428 goto out_destroy_bud_cache; 2429 2430 xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", 2431 sizeof(struct xfs_attrd_log_item), 2432 0, 0, NULL); 2433 if (!xfs_attrd_cache) 2434 goto out_destroy_bui_cache; 2435 2436 xfs_attri_cache = kmem_cache_create("xfs_attri_item", 2437 sizeof(struct xfs_attri_log_item), 2438 0, 0, NULL); 2439 if (!xfs_attri_cache) 2440 goto out_destroy_attrd_cache; 2441 2442 xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", 2443 sizeof(struct xfs_iunlink_item), 2444 0, 0, NULL); 2445 if (!xfs_iunlink_cache) 2446 goto out_destroy_attri_cache; 2447 2448 xfs_xmd_cache = kmem_cache_create("xfs_xmd_item", 2449 sizeof(struct xfs_xmd_log_item), 2450 0, 0, NULL); 2451 if (!xfs_xmd_cache) 2452 goto out_destroy_iul_cache; 2453 2454 xfs_xmi_cache = kmem_cache_create("xfs_xmi_item", 2455 sizeof(struct xfs_xmi_log_item), 2456 0, 0, NULL); 2457 if (!xfs_xmi_cache) 2458 goto out_destroy_xmd_cache; 2459 2460 xfs_parent_args_cache = kmem_cache_create("xfs_parent_args", 2461 sizeof(struct xfs_parent_args), 2462 0, 0, NULL); 2463 if (!xfs_parent_args_cache) 2464 goto out_destroy_xmi_cache; 2465 2466 return 0; 2467 2468 out_destroy_xmi_cache: 2469 kmem_cache_destroy(xfs_xmi_cache); 2470 out_destroy_xmd_cache: 2471 kmem_cache_destroy(xfs_xmd_cache); 2472 out_destroy_iul_cache: 2473 kmem_cache_destroy(xfs_iunlink_cache); 2474 out_destroy_attri_cache: 2475 kmem_cache_destroy(xfs_attri_cache); 2476 out_destroy_attrd_cache: 2477 kmem_cache_destroy(xfs_attrd_cache); 2478 out_destroy_bui_cache: 2479 kmem_cache_destroy(xfs_bui_cache); 2480 out_destroy_bud_cache: 2481 kmem_cache_destroy(xfs_bud_cache); 2482 out_destroy_cui_cache: 2483 kmem_cache_destroy(xfs_cui_cache); 2484 out_destroy_cud_cache: 2485 kmem_cache_destroy(xfs_cud_cache); 2486 out_destroy_rui_cache: 2487 kmem_cache_destroy(xfs_rui_cache); 2488 out_destroy_rud_cache: 2489 kmem_cache_destroy(xfs_rud_cache); 2490 out_destroy_icreate_cache: 2491 kmem_cache_destroy(xfs_icreate_cache); 2492 out_destroy_ili_cache: 2493 kmem_cache_destroy(xfs_ili_cache); 2494 out_destroy_inode_cache: 2495 kmem_cache_destroy(xfs_inode_cache); 2496 out_destroy_efi_cache: 2497 kmem_cache_destroy(xfs_efi_cache); 2498 out_destroy_efd_cache: 2499 kmem_cache_destroy(xfs_efd_cache); 2500 out_destroy_buf_item_cache: 2501 kmem_cache_destroy(xfs_buf_item_cache); 2502 out_destroy_trans_cache: 2503 kmem_cache_destroy(xfs_trans_cache); 2504 out_destroy_ifork_cache: 2505 kmem_cache_destroy(xfs_ifork_cache); 2506 out_destroy_da_state_cache: 2507 kmem_cache_destroy(xfs_da_state_cache); 2508 out_destroy_defer_item_cache: 2509 xfs_defer_destroy_item_caches(); 2510 out_destroy_rcbagbt_cur_cache: 2511 rcbagbt_destroy_cur_cache(); 2512 out_destroy_btree_cur_cache: 2513 xfs_btree_destroy_cur_caches(); 2514 out_destroy_log_ticket_cache: 2515 kmem_cache_destroy(xfs_log_ticket_cache); 2516 out_destroy_buf_cache: 2517 kmem_cache_destroy(xfs_buf_cache); 2518 out: 2519 return -ENOMEM; 2520 } 2521 2522 STATIC void 2523 xfs_destroy_caches(void) 2524 { 2525 /* 2526 * Make sure all delayed rcu free are flushed before we 2527 * destroy caches. 2528 */ 2529 rcu_barrier(); 2530 kmem_cache_destroy(xfs_parent_args_cache); 2531 kmem_cache_destroy(xfs_xmd_cache); 2532 kmem_cache_destroy(xfs_xmi_cache); 2533 kmem_cache_destroy(xfs_iunlink_cache); 2534 kmem_cache_destroy(xfs_attri_cache); 2535 kmem_cache_destroy(xfs_attrd_cache); 2536 kmem_cache_destroy(xfs_bui_cache); 2537 kmem_cache_destroy(xfs_bud_cache); 2538 kmem_cache_destroy(xfs_cui_cache); 2539 kmem_cache_destroy(xfs_cud_cache); 2540 kmem_cache_destroy(xfs_rui_cache); 2541 kmem_cache_destroy(xfs_rud_cache); 2542 kmem_cache_destroy(xfs_icreate_cache); 2543 kmem_cache_destroy(xfs_ili_cache); 2544 kmem_cache_destroy(xfs_inode_cache); 2545 kmem_cache_destroy(xfs_efi_cache); 2546 kmem_cache_destroy(xfs_efd_cache); 2547 kmem_cache_destroy(xfs_buf_item_cache); 2548 kmem_cache_destroy(xfs_trans_cache); 2549 kmem_cache_destroy(xfs_ifork_cache); 2550 kmem_cache_destroy(xfs_da_state_cache); 2551 xfs_defer_destroy_item_caches(); 2552 rcbagbt_destroy_cur_cache(); 2553 xfs_btree_destroy_cur_caches(); 2554 kmem_cache_destroy(xfs_log_ticket_cache); 2555 kmem_cache_destroy(xfs_buf_cache); 2556 } 2557 2558 STATIC int __init 2559 xfs_init_workqueues(void) 2560 { 2561 /* 2562 * The allocation workqueue can be used in memory reclaim situations 2563 * (writepage path), and parallelism is only limited by the number of 2564 * AGs in all the filesystems mounted. Hence use the default large 2565 * max_active value for this workqueue. 2566 */ 2567 xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU), 2568 0); 2569 if (!xfs_alloc_wq) 2570 return -ENOMEM; 2571 2572 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2573 0); 2574 if (!xfs_discard_wq) 2575 goto out_free_alloc_wq; 2576 2577 return 0; 2578 out_free_alloc_wq: 2579 destroy_workqueue(xfs_alloc_wq); 2580 return -ENOMEM; 2581 } 2582 2583 STATIC void 2584 xfs_destroy_workqueues(void) 2585 { 2586 destroy_workqueue(xfs_discard_wq); 2587 destroy_workqueue(xfs_alloc_wq); 2588 } 2589 2590 STATIC int __init 2591 init_xfs_fs(void) 2592 { 2593 int error; 2594 2595 xfs_check_ondisk_structs(); 2596 2597 error = xfs_dahash_test(); 2598 if (error) 2599 return error; 2600 2601 printk(KERN_INFO XFS_VERSION_STRING " with " 2602 XFS_BUILD_OPTIONS " enabled\n"); 2603 2604 xfs_dir_startup(); 2605 2606 error = xfs_init_caches(); 2607 if (error) 2608 goto out; 2609 2610 error = xfs_init_workqueues(); 2611 if (error) 2612 goto out_destroy_caches; 2613 2614 error = xfs_mru_cache_init(); 2615 if (error) 2616 goto out_destroy_wq; 2617 2618 error = xfs_init_procfs(); 2619 if (error) 2620 goto out_mru_cache_uninit; 2621 2622 error = xfs_sysctl_register(); 2623 if (error) 2624 goto out_cleanup_procfs; 2625 2626 xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL); 2627 2628 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2629 if (!xfs_kset) { 2630 error = -ENOMEM; 2631 goto out_debugfs_unregister; 2632 } 2633 2634 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2635 2636 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2637 if (!xfsstats.xs_stats) { 2638 error = -ENOMEM; 2639 goto out_kset_unregister; 2640 } 2641 2642 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2643 "stats"); 2644 if (error) 2645 goto out_free_stats; 2646 2647 error = xchk_global_stats_setup(xfs_debugfs); 2648 if (error) 2649 goto out_remove_stats_kobj; 2650 2651 #ifdef DEBUG 2652 xfs_dbg_kobj.kobject.kset = xfs_kset; 2653 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2654 if (error) 2655 goto out_remove_scrub_stats; 2656 #endif 2657 2658 error = xfs_qm_init(); 2659 if (error) 2660 goto out_remove_dbg_kobj; 2661 2662 error = register_filesystem(&xfs_fs_type); 2663 if (error) 2664 goto out_qm_exit; 2665 return 0; 2666 2667 out_qm_exit: 2668 xfs_qm_exit(); 2669 out_remove_dbg_kobj: 2670 #ifdef DEBUG 2671 xfs_sysfs_del(&xfs_dbg_kobj); 2672 out_remove_scrub_stats: 2673 #endif 2674 xchk_global_stats_teardown(); 2675 out_remove_stats_kobj: 2676 xfs_sysfs_del(&xfsstats.xs_kobj); 2677 out_free_stats: 2678 free_percpu(xfsstats.xs_stats); 2679 out_kset_unregister: 2680 kset_unregister(xfs_kset); 2681 out_debugfs_unregister: 2682 debugfs_remove(xfs_debugfs); 2683 xfs_sysctl_unregister(); 2684 out_cleanup_procfs: 2685 xfs_cleanup_procfs(); 2686 out_mru_cache_uninit: 2687 xfs_mru_cache_uninit(); 2688 out_destroy_wq: 2689 xfs_destroy_workqueues(); 2690 out_destroy_caches: 2691 xfs_destroy_caches(); 2692 out: 2693 return error; 2694 } 2695 2696 STATIC void __exit 2697 exit_xfs_fs(void) 2698 { 2699 xfs_qm_exit(); 2700 unregister_filesystem(&xfs_fs_type); 2701 #ifdef DEBUG 2702 xfs_sysfs_del(&xfs_dbg_kobj); 2703 #endif 2704 xchk_global_stats_teardown(); 2705 xfs_sysfs_del(&xfsstats.xs_kobj); 2706 free_percpu(xfsstats.xs_stats); 2707 kset_unregister(xfs_kset); 2708 debugfs_remove(xfs_debugfs); 2709 xfs_sysctl_unregister(); 2710 xfs_cleanup_procfs(); 2711 xfs_mru_cache_uninit(); 2712 xfs_destroy_workqueues(); 2713 xfs_destroy_caches(); 2714 xfs_uuid_table_free(); 2715 } 2716 2717 module_init(init_xfs_fs); 2718 module_exit(exit_xfs_fs); 2719 2720 MODULE_AUTHOR("Silicon Graphics, Inc."); 2721 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2722 MODULE_LICENSE("GPL"); 2723