1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs_platform.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 #include "xfs_defer.h" 41 #include "xfs_attr_item.h" 42 #include "xfs_xattr.h" 43 #include "xfs_error.h" 44 #include "xfs_errortag.h" 45 #include "xfs_iunlink_item.h" 46 #include "xfs_dahash_test.h" 47 #include "xfs_rtbitmap.h" 48 #include "xfs_exchmaps_item.h" 49 #include "xfs_parent.h" 50 #include "xfs_rtalloc.h" 51 #include "xfs_zone_alloc.h" 52 #include "xfs_healthmon.h" 53 #include "scrub/stats.h" 54 #include "scrub/rcbag_btree.h" 55 56 #include <linux/magic.h> 57 #include <linux/fs_context.h> 58 #include <linux/fs_parser.h> 59 #include <linux/fserror.h> 60 61 static const struct super_operations xfs_super_operations; 62 63 static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */ 64 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 65 #ifdef DEBUG 66 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 67 #endif 68 69 enum xfs_dax_mode { 70 XFS_DAX_INODE = 0, 71 XFS_DAX_ALWAYS = 1, 72 XFS_DAX_NEVER = 2, 73 }; 74 75 /* Were quota mount options provided? Must use the upper 16 bits of qflags. */ 76 #define XFS_QFLAGS_MNTOPTS (1U << 31) 77 78 static void 79 xfs_mount_set_dax_mode( 80 struct xfs_mount *mp, 81 enum xfs_dax_mode mode) 82 { 83 switch (mode) { 84 case XFS_DAX_INODE: 85 mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); 86 break; 87 case XFS_DAX_ALWAYS: 88 mp->m_features |= XFS_FEAT_DAX_ALWAYS; 89 mp->m_features &= ~XFS_FEAT_DAX_NEVER; 90 break; 91 case XFS_DAX_NEVER: 92 mp->m_features |= XFS_FEAT_DAX_NEVER; 93 mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; 94 break; 95 } 96 } 97 98 static const struct constant_table dax_param_enums[] = { 99 {"inode", XFS_DAX_INODE }, 100 {"always", XFS_DAX_ALWAYS }, 101 {"never", XFS_DAX_NEVER }, 102 {} 103 }; 104 105 /* 106 * Table driven mount option parser. 107 */ 108 enum { 109 Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 110 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 111 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 112 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, 113 Opt_largeio, Opt_nolargeio, 114 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 115 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 116 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 117 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones, 118 Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, Opt_errortag, 119 }; 120 121 #define fsparam_dead(NAME) \ 122 __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL) 123 124 static const struct fs_parameter_spec xfs_fs_parameters[] = { 125 /* 126 * These mount options were supposed to be deprecated in September 2025 127 * but the deprecation warning was buggy, so not all users were 128 * notified. The deprecation is now obnoxiously loud and postponed to 129 * September 2030. 130 */ 131 fsparam_dead("attr2"), 132 fsparam_dead("noattr2"), 133 fsparam_dead("ikeep"), 134 fsparam_dead("noikeep"), 135 136 fsparam_u32("logbufs", Opt_logbufs), 137 fsparam_string("logbsize", Opt_logbsize), 138 fsparam_string("logdev", Opt_logdev), 139 fsparam_string("rtdev", Opt_rtdev), 140 fsparam_flag("wsync", Opt_wsync), 141 fsparam_flag("noalign", Opt_noalign), 142 fsparam_flag("swalloc", Opt_swalloc), 143 fsparam_u32("sunit", Opt_sunit), 144 fsparam_u32("swidth", Opt_swidth), 145 fsparam_flag("nouuid", Opt_nouuid), 146 fsparam_flag("grpid", Opt_grpid), 147 fsparam_flag("nogrpid", Opt_nogrpid), 148 fsparam_flag("bsdgroups", Opt_bsdgroups), 149 fsparam_flag("sysvgroups", Opt_sysvgroups), 150 fsparam_string("allocsize", Opt_allocsize), 151 fsparam_flag("norecovery", Opt_norecovery), 152 fsparam_flag("inode64", Opt_inode64), 153 fsparam_flag("inode32", Opt_inode32), 154 fsparam_flag("largeio", Opt_largeio), 155 fsparam_flag("nolargeio", Opt_nolargeio), 156 fsparam_flag("filestreams", Opt_filestreams), 157 fsparam_flag("quota", Opt_quota), 158 fsparam_flag("noquota", Opt_noquota), 159 fsparam_flag("usrquota", Opt_usrquota), 160 fsparam_flag("grpquota", Opt_grpquota), 161 fsparam_flag("prjquota", Opt_prjquota), 162 fsparam_flag("uquota", Opt_uquota), 163 fsparam_flag("gquota", Opt_gquota), 164 fsparam_flag("pquota", Opt_pquota), 165 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 166 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 167 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 168 fsparam_flag("qnoenforce", Opt_qnoenforce), 169 fsparam_flag("discard", Opt_discard), 170 fsparam_flag("nodiscard", Opt_nodiscard), 171 fsparam_flag("dax", Opt_dax), 172 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 173 fsparam_u32("max_open_zones", Opt_max_open_zones), 174 fsparam_flag("lifetime", Opt_lifetime), 175 fsparam_flag("nolifetime", Opt_nolifetime), 176 fsparam_string("max_atomic_write", Opt_max_atomic_write), 177 fsparam_string("errortag", Opt_errortag), 178 {} 179 }; 180 181 struct proc_xfs_info { 182 uint64_t flag; 183 char *str; 184 }; 185 186 static int 187 xfs_fs_show_options( 188 struct seq_file *m, 189 struct dentry *root) 190 { 191 static struct proc_xfs_info xfs_info_set[] = { 192 /* the few simple ones we can get from the mount struct */ 193 { XFS_FEAT_WSYNC, ",wsync" }, 194 { XFS_FEAT_NOALIGN, ",noalign" }, 195 { XFS_FEAT_SWALLOC, ",swalloc" }, 196 { XFS_FEAT_NOUUID, ",nouuid" }, 197 { XFS_FEAT_NORECOVERY, ",norecovery" }, 198 { XFS_FEAT_FILESTREAMS, ",filestreams" }, 199 { XFS_FEAT_GRPID, ",grpid" }, 200 { XFS_FEAT_DISCARD, ",discard" }, 201 { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, 202 { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, 203 { XFS_FEAT_DAX_NEVER, ",dax=never" }, 204 { XFS_FEAT_NOLIFETIME, ",nolifetime" }, 205 { 0, NULL } 206 }; 207 struct xfs_mount *mp = XFS_M(root->d_sb); 208 struct proc_xfs_info *xfs_infop; 209 210 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 211 if (mp->m_features & xfs_infop->flag) 212 seq_puts(m, xfs_infop->str); 213 } 214 215 seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); 216 217 if (xfs_has_allocsize(mp)) 218 seq_printf(m, ",allocsize=%dk", 219 (1 << mp->m_allocsize_log) >> 10); 220 221 if (mp->m_logbufs > 0) 222 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 223 if (mp->m_logbsize > 0) 224 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 225 226 if (mp->m_logname) 227 seq_show_option(m, "logdev", mp->m_logname); 228 if (mp->m_rtname) 229 seq_show_option(m, "rtdev", mp->m_rtname); 230 231 if (mp->m_dalign > 0) 232 seq_printf(m, ",sunit=%d", 233 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 234 if (mp->m_swidth > 0) 235 seq_printf(m, ",swidth=%d", 236 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 237 238 if (mp->m_qflags & XFS_UQUOTA_ENFD) 239 seq_puts(m, ",usrquota"); 240 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 241 seq_puts(m, ",uqnoenforce"); 242 243 if (mp->m_qflags & XFS_PQUOTA_ENFD) 244 seq_puts(m, ",prjquota"); 245 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 246 seq_puts(m, ",pqnoenforce"); 247 248 if (mp->m_qflags & XFS_GQUOTA_ENFD) 249 seq_puts(m, ",grpquota"); 250 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 251 seq_puts(m, ",gqnoenforce"); 252 253 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 254 seq_puts(m, ",noquota"); 255 256 if (mp->m_max_open_zones) 257 seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones); 258 if (mp->m_awu_max_bytes) 259 seq_printf(m, ",max_atomic_write=%lluk", 260 mp->m_awu_max_bytes >> 10); 261 262 return 0; 263 } 264 265 static bool 266 xfs_set_inode_alloc_perag( 267 struct xfs_perag *pag, 268 xfs_ino_t ino, 269 xfs_agnumber_t max_metadata) 270 { 271 if (!xfs_is_inode32(pag_mount(pag))) { 272 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 273 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 274 return false; 275 } 276 277 if (ino > XFS_MAXINUMBER_32) { 278 clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 279 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 280 return false; 281 } 282 283 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 284 if (pag_agno(pag) < max_metadata) 285 set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 286 else 287 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 288 return true; 289 } 290 291 /* 292 * Set parameters for inode allocation heuristics, taking into account 293 * filesystem size and inode32/inode64 mount options; i.e. specifically 294 * whether or not XFS_FEAT_SMALL_INUMS is set. 295 * 296 * Inode allocation patterns are altered only if inode32 is requested 297 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. 298 * If altered, XFS_OPSTATE_INODE32 is set as well. 299 * 300 * An agcount independent of that in the mount structure is provided 301 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 302 * to the potentially higher ag count. 303 * 304 * Returns the maximum AG index which may contain inodes. 305 */ 306 xfs_agnumber_t 307 xfs_set_inode_alloc( 308 struct xfs_mount *mp, 309 xfs_agnumber_t agcount) 310 { 311 xfs_agnumber_t index; 312 xfs_agnumber_t maxagi = 0; 313 xfs_sb_t *sbp = &mp->m_sb; 314 xfs_agnumber_t max_metadata; 315 xfs_agino_t agino; 316 xfs_ino_t ino; 317 318 /* 319 * Calculate how much should be reserved for inodes to meet 320 * the max inode percentage. Used only for inode32. 321 */ 322 if (M_IGEO(mp)->maxicount) { 323 uint64_t icount; 324 325 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 326 do_div(icount, 100); 327 icount += sbp->sb_agblocks - 1; 328 do_div(icount, sbp->sb_agblocks); 329 max_metadata = icount; 330 } else { 331 max_metadata = agcount; 332 } 333 334 /* Get the last possible inode in the filesystem */ 335 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 336 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 337 338 /* 339 * If user asked for no more than 32-bit inodes, and the fs is 340 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter 341 * the allocator to accommodate the request. 342 */ 343 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) 344 xfs_set_inode32(mp); 345 else 346 xfs_clear_inode32(mp); 347 348 for (index = 0; index < agcount; index++) { 349 struct xfs_perag *pag; 350 351 ino = XFS_AGINO_TO_INO(mp, index, agino); 352 353 pag = xfs_perag_get(mp, index); 354 if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) 355 maxagi++; 356 xfs_perag_put(pag); 357 } 358 359 return xfs_is_inode32(mp) ? maxagi : agcount; 360 } 361 362 static int 363 xfs_setup_dax_always( 364 struct xfs_mount *mp) 365 { 366 if (!mp->m_ddev_targp->bt_daxdev && 367 (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { 368 xfs_alert(mp, 369 "DAX unsupported by block device. Turning off DAX."); 370 goto disable_dax; 371 } 372 373 if (mp->m_super->s_blocksize != PAGE_SIZE) { 374 xfs_alert(mp, 375 "DAX not supported for blocksize. Turning off DAX."); 376 goto disable_dax; 377 } 378 379 if (xfs_has_reflink(mp) && 380 bdev_is_partition(mp->m_ddev_targp->bt_bdev)) { 381 xfs_alert(mp, 382 "DAX and reflink cannot work with multi-partitions!"); 383 return -EINVAL; 384 } 385 386 return 0; 387 388 disable_dax: 389 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 390 return 0; 391 } 392 393 STATIC int 394 xfs_blkdev_get( 395 xfs_mount_t *mp, 396 const char *name, 397 struct file **bdev_filep) 398 { 399 int error = 0; 400 blk_mode_t mode; 401 402 mode = sb_open_mode(mp->m_super->s_flags); 403 *bdev_filep = bdev_file_open_by_path(name, mode, 404 mp->m_super, &fs_holder_ops); 405 if (IS_ERR(*bdev_filep)) { 406 error = PTR_ERR(*bdev_filep); 407 *bdev_filep = NULL; 408 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 409 } 410 411 return error; 412 } 413 414 STATIC void 415 xfs_shutdown_devices( 416 struct xfs_mount *mp) 417 { 418 /* 419 * Udev is triggered whenever anyone closes a block device or unmounts 420 * a file systemm on a block device. 421 * The default udev rules invoke blkid to read the fs super and create 422 * symlinks to the bdev under /dev/disk. For this, it uses buffered 423 * reads through the page cache. 424 * 425 * xfs_db also uses buffered reads to examine metadata. There is no 426 * coordination between xfs_db and udev, which means that they can run 427 * concurrently. Note there is no coordination between the kernel and 428 * blkid either. 429 * 430 * On a system with 64k pages, the page cache can cache the superblock 431 * and the root inode (and hence the root directory) with the same 64k 432 * page. If udev spawns blkid after the mkfs and the system is busy 433 * enough that it is still running when xfs_db starts up, they'll both 434 * read from the same page in the pagecache. 435 * 436 * The unmount writes updated inode metadata to disk directly. The XFS 437 * buffer cache does not use the bdev pagecache, so it needs to 438 * invalidate that pagecache on unmount. If the above scenario occurs, 439 * the pagecache no longer reflects what's on disk, xfs_db reads the 440 * stale metadata, and fails to find /a. Most of the time this succeeds 441 * because closing a bdev invalidates the page cache, but when processes 442 * race, everyone loses. 443 */ 444 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 445 blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); 446 invalidate_bdev(mp->m_logdev_targp->bt_bdev); 447 } 448 if (mp->m_rtdev_targp) { 449 blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); 450 invalidate_bdev(mp->m_rtdev_targp->bt_bdev); 451 } 452 blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); 453 invalidate_bdev(mp->m_ddev_targp->bt_bdev); 454 } 455 456 /* 457 * The file system configurations are: 458 * (1) device (partition) with data and internal log 459 * (2) logical volume with data and log subvolumes. 460 * (3) logical volume with data, log, and realtime subvolumes. 461 * 462 * We only have to handle opening the log and realtime volumes here if 463 * they are present. The data subvolume has already been opened by 464 * get_sb_bdev() and is stored in sb->s_bdev. 465 */ 466 STATIC int 467 xfs_open_devices( 468 struct xfs_mount *mp) 469 { 470 struct super_block *sb = mp->m_super; 471 struct block_device *ddev = sb->s_bdev; 472 struct file *logdev_file = NULL, *rtdev_file = NULL; 473 int error; 474 475 /* 476 * Open real time and log devices - order is important. 477 */ 478 if (mp->m_logname) { 479 error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file); 480 if (error) 481 return error; 482 } 483 484 if (mp->m_rtname) { 485 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file); 486 if (error) 487 goto out_close_logdev; 488 489 if (file_bdev(rtdev_file) == ddev || 490 (logdev_file && 491 file_bdev(rtdev_file) == file_bdev(logdev_file))) { 492 xfs_warn(mp, 493 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 494 error = -EINVAL; 495 goto out_close_rtdev; 496 } 497 } 498 499 /* 500 * Setup xfs_mount buffer target pointers 501 */ 502 mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file); 503 if (IS_ERR(mp->m_ddev_targp)) { 504 error = PTR_ERR(mp->m_ddev_targp); 505 mp->m_ddev_targp = NULL; 506 goto out_close_rtdev; 507 } 508 509 if (rtdev_file) { 510 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file); 511 if (IS_ERR(mp->m_rtdev_targp)) { 512 error = PTR_ERR(mp->m_rtdev_targp); 513 mp->m_rtdev_targp = NULL; 514 goto out_free_ddev_targ; 515 } 516 } 517 518 if (logdev_file && file_bdev(logdev_file) != ddev) { 519 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file); 520 if (IS_ERR(mp->m_logdev_targp)) { 521 error = PTR_ERR(mp->m_logdev_targp); 522 mp->m_logdev_targp = NULL; 523 goto out_free_rtdev_targ; 524 } 525 } else { 526 mp->m_logdev_targp = mp->m_ddev_targp; 527 /* Handle won't be used, drop it */ 528 if (logdev_file) 529 bdev_fput(logdev_file); 530 } 531 532 return 0; 533 534 out_free_rtdev_targ: 535 if (mp->m_rtdev_targp) 536 xfs_free_buftarg(mp->m_rtdev_targp); 537 out_free_ddev_targ: 538 xfs_free_buftarg(mp->m_ddev_targp); 539 out_close_rtdev: 540 if (rtdev_file) 541 bdev_fput(rtdev_file); 542 out_close_logdev: 543 if (logdev_file) 544 bdev_fput(logdev_file); 545 return error; 546 } 547 548 /* 549 * Setup xfs_mount buffer target pointers based on superblock 550 */ 551 STATIC int 552 xfs_setup_devices( 553 struct xfs_mount *mp) 554 { 555 int error; 556 557 error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize, 558 mp->m_sb.sb_dblocks); 559 if (error) 560 return error; 561 562 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 563 unsigned int log_sector_size = BBSIZE; 564 565 if (xfs_has_sector(mp)) 566 log_sector_size = mp->m_sb.sb_logsectsize; 567 error = xfs_configure_buftarg(mp->m_logdev_targp, 568 log_sector_size, mp->m_sb.sb_logblocks); 569 if (error) 570 return error; 571 } 572 573 if (mp->m_sb.sb_rtstart) { 574 if (mp->m_rtdev_targp) { 575 xfs_warn(mp, 576 "can't use internal and external rtdev at the same time"); 577 return -EINVAL; 578 } 579 mp->m_rtdev_targp = mp->m_ddev_targp; 580 } else if (mp->m_rtname) { 581 error = xfs_configure_buftarg(mp->m_rtdev_targp, 582 mp->m_sb.sb_sectsize, mp->m_sb.sb_rblocks); 583 if (error) 584 return error; 585 } 586 587 return 0; 588 } 589 590 STATIC int 591 xfs_init_mount_workqueues( 592 struct xfs_mount *mp) 593 { 594 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 595 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 596 1, mp->m_super->s_id); 597 if (!mp->m_buf_workqueue) 598 goto out; 599 600 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 601 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 602 0, mp->m_super->s_id); 603 if (!mp->m_unwritten_workqueue) 604 goto out_destroy_buf; 605 606 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 607 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 608 0, mp->m_super->s_id); 609 if (!mp->m_reclaim_workqueue) 610 goto out_destroy_unwritten; 611 612 mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", 613 XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 614 0, mp->m_super->s_id); 615 if (!mp->m_blockgc_wq) 616 goto out_destroy_reclaim; 617 618 mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", 619 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 620 1, mp->m_super->s_id); 621 if (!mp->m_inodegc_wq) 622 goto out_destroy_blockgc; 623 624 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 625 XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0, 626 mp->m_super->s_id); 627 if (!mp->m_sync_workqueue) 628 goto out_destroy_inodegc; 629 630 return 0; 631 632 out_destroy_inodegc: 633 destroy_workqueue(mp->m_inodegc_wq); 634 out_destroy_blockgc: 635 destroy_workqueue(mp->m_blockgc_wq); 636 out_destroy_reclaim: 637 destroy_workqueue(mp->m_reclaim_workqueue); 638 out_destroy_unwritten: 639 destroy_workqueue(mp->m_unwritten_workqueue); 640 out_destroy_buf: 641 destroy_workqueue(mp->m_buf_workqueue); 642 out: 643 return -ENOMEM; 644 } 645 646 STATIC void 647 xfs_destroy_mount_workqueues( 648 struct xfs_mount *mp) 649 { 650 destroy_workqueue(mp->m_sync_workqueue); 651 destroy_workqueue(mp->m_blockgc_wq); 652 destroy_workqueue(mp->m_inodegc_wq); 653 destroy_workqueue(mp->m_reclaim_workqueue); 654 destroy_workqueue(mp->m_unwritten_workqueue); 655 destroy_workqueue(mp->m_buf_workqueue); 656 } 657 658 static void 659 xfs_flush_inodes_worker( 660 struct work_struct *work) 661 { 662 struct xfs_mount *mp = container_of(work, struct xfs_mount, 663 m_flush_inodes_work); 664 struct super_block *sb = mp->m_super; 665 666 if (down_read_trylock(&sb->s_umount)) { 667 sync_inodes_sb(sb); 668 up_read(&sb->s_umount); 669 } 670 } 671 672 /* 673 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 674 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 675 * for IO to complete so that we effectively throttle multiple callers to the 676 * rate at which IO is completing. 677 */ 678 void 679 xfs_flush_inodes( 680 struct xfs_mount *mp) 681 { 682 /* 683 * If flush_work() returns true then that means we waited for a flush 684 * which was already in progress. Don't bother running another scan. 685 */ 686 if (flush_work(&mp->m_flush_inodes_work)) 687 return; 688 689 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 690 flush_work(&mp->m_flush_inodes_work); 691 } 692 693 /* Catch misguided souls that try to use this interface on XFS */ 694 STATIC struct inode * 695 xfs_fs_alloc_inode( 696 struct super_block *sb) 697 { 698 BUG(); 699 return NULL; 700 } 701 702 /* 703 * Now that the generic code is guaranteed not to be accessing 704 * the linux inode, we can inactivate and reclaim the inode. 705 */ 706 STATIC void 707 xfs_fs_destroy_inode( 708 struct inode *inode) 709 { 710 struct xfs_inode *ip = XFS_I(inode); 711 712 trace_xfs_destroy_inode(ip); 713 714 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 715 XFS_STATS_INC(ip->i_mount, vn_rele); 716 XFS_STATS_INC(ip->i_mount, vn_remove); 717 xfs_inode_mark_reclaimable(ip); 718 } 719 720 /* 721 * Slab object creation initialisation for the XFS inode. 722 * This covers only the idempotent fields in the XFS inode; 723 * all other fields need to be initialised on allocation 724 * from the slab. This avoids the need to repeatedly initialise 725 * fields in the xfs inode that left in the initialise state 726 * when freeing the inode. 727 */ 728 STATIC void 729 xfs_fs_inode_init_once( 730 void *inode) 731 { 732 struct xfs_inode *ip = inode; 733 734 memset(ip, 0, sizeof(struct xfs_inode)); 735 736 /* vfs inode */ 737 inode_init_once(VFS_I(ip)); 738 739 /* xfs inode */ 740 atomic_set(&ip->i_pincount, 0); 741 spin_lock_init(&ip->i_flags_lock); 742 init_rwsem(&ip->i_lock); 743 } 744 745 /* 746 * We do an unlocked check for XFS_IDONTCACHE here because we are already 747 * serialised against cache hits here via the inode->i_lock and igrab() in 748 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 749 * racing with us, and it avoids needing to grab a spinlock here for every inode 750 * we drop the final reference on. 751 */ 752 STATIC int 753 xfs_fs_drop_inode( 754 struct inode *inode) 755 { 756 struct xfs_inode *ip = XFS_I(inode); 757 758 /* 759 * If this unlinked inode is in the middle of recovery, don't 760 * drop the inode just yet; log recovery will take care of 761 * that. See the comment for this inode flag. 762 */ 763 if (ip->i_flags & XFS_IRECOVERY) { 764 ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); 765 return 0; 766 } 767 768 return inode_generic_drop(inode); 769 } 770 771 STATIC void 772 xfs_fs_evict_inode( 773 struct inode *inode) 774 { 775 if (IS_DAX(inode)) 776 dax_break_layout_final(inode); 777 778 truncate_inode_pages_final(&inode->i_data); 779 clear_inode(inode); 780 781 if (IS_ENABLED(CONFIG_XFS_RT) && 782 S_ISREG(inode->i_mode) && inode->i_private) { 783 xfs_open_zone_put(inode->i_private); 784 inode->i_private = NULL; 785 } 786 } 787 788 static void 789 xfs_mount_free( 790 struct xfs_mount *mp) 791 { 792 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 793 xfs_free_buftarg(mp->m_logdev_targp); 794 if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) 795 xfs_free_buftarg(mp->m_rtdev_targp); 796 if (mp->m_ddev_targp) 797 xfs_free_buftarg(mp->m_ddev_targp); 798 799 debugfs_remove(mp->m_debugfs); 800 kfree(mp->m_rtname); 801 kfree(mp->m_logname); 802 #ifdef DEBUG 803 kfree(mp->m_errortag); 804 #endif 805 kfree(mp); 806 } 807 808 STATIC int 809 xfs_fs_sync_fs( 810 struct super_block *sb, 811 int wait) 812 { 813 struct xfs_mount *mp = XFS_M(sb); 814 int error; 815 816 trace_xfs_fs_sync_fs(mp, __return_address); 817 818 /* 819 * Doing anything during the async pass would be counterproductive. 820 */ 821 if (!wait) 822 return 0; 823 824 error = xfs_log_force(mp, XFS_LOG_SYNC); 825 if (error) 826 return error; 827 828 if (laptop_mode) { 829 /* 830 * The disk must be active because we're syncing. 831 * We schedule log work now (now that the disk is 832 * active) instead of later (when it might not be). 833 */ 834 flush_delayed_work(&mp->m_log->l_work); 835 } 836 837 /* 838 * If we are called with page faults frozen out, it means we are about 839 * to freeze the transaction subsystem. Take the opportunity to shut 840 * down inodegc because once SB_FREEZE_FS is set it's too late to 841 * prevent inactivation races with freeze. The fs doesn't get called 842 * again by the freezing process until after SB_FREEZE_FS has been set, 843 * so it's now or never. Same logic applies to speculative allocation 844 * garbage collection. 845 * 846 * We don't care if this is a normal syncfs call that does this or 847 * freeze that does this - we can run this multiple times without issue 848 * and we won't race with a restart because a restart can only occur 849 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. 850 */ 851 if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { 852 xfs_inodegc_stop(mp); 853 xfs_blockgc_stop(mp); 854 xfs_zone_gc_stop(mp); 855 } 856 857 return 0; 858 } 859 860 static xfs_extlen_t 861 xfs_internal_log_size( 862 struct xfs_mount *mp) 863 { 864 if (!mp->m_sb.sb_logstart) 865 return 0; 866 return mp->m_sb.sb_logblocks; 867 } 868 869 static void 870 xfs_statfs_data( 871 struct xfs_mount *mp, 872 struct kstatfs *st) 873 { 874 int64_t fdblocks = 875 xfs_sum_freecounter(mp, XC_FREE_BLOCKS); 876 877 /* make sure st->f_bfree does not underflow */ 878 st->f_bfree = max(0LL, 879 fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS)); 880 881 /* 882 * sb_dblocks can change during growfs, but nothing cares about reporting 883 * the old or new value during growfs. 884 */ 885 st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp); 886 } 887 888 /* 889 * When stat(v)fs is called on a file with the realtime bit set or a directory 890 * with the rtinherit bit, report freespace information for the RT device 891 * instead of the main data device. 892 */ 893 static void 894 xfs_statfs_rt( 895 struct xfs_mount *mp, 896 struct kstatfs *st) 897 { 898 st->f_bfree = xfs_rtbxlen_to_blen(mp, 899 xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); 900 st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp, 901 mp->m_free[XC_FREE_RTEXTENTS].res_total); 902 } 903 904 static void 905 xfs_statfs_inodes( 906 struct xfs_mount *mp, 907 struct kstatfs *st) 908 { 909 uint64_t icount = percpu_counter_sum(&mp->m_icount); 910 uint64_t ifree = percpu_counter_sum(&mp->m_ifree); 911 uint64_t fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree); 912 913 st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 914 if (M_IGEO(mp)->maxicount) 915 st->f_files = min_t(typeof(st->f_files), st->f_files, 916 M_IGEO(mp)->maxicount); 917 918 /* If sb_icount overshot maxicount, report actual allocation */ 919 st->f_files = max_t(typeof(st->f_files), st->f_files, 920 mp->m_sb.sb_icount); 921 922 /* Make sure st->f_ffree does not underflow */ 923 st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree)); 924 } 925 926 STATIC int 927 xfs_fs_statfs( 928 struct dentry *dentry, 929 struct kstatfs *st) 930 { 931 struct xfs_mount *mp = XFS_M(dentry->d_sb); 932 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 933 934 /* 935 * Expedite background inodegc but don't wait. We do not want to block 936 * here waiting hours for a billion extent file to be truncated. 937 */ 938 xfs_inodegc_push(mp); 939 940 st->f_type = XFS_SUPER_MAGIC; 941 st->f_namelen = MAXNAMELEN - 1; 942 st->f_bsize = mp->m_sb.sb_blocksize; 943 st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev)); 944 945 xfs_statfs_data(mp, st); 946 xfs_statfs_inodes(mp, st); 947 948 if (XFS_IS_REALTIME_MOUNT(mp) && 949 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) 950 xfs_statfs_rt(mp, st); 951 952 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 953 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 954 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 955 xfs_qm_statvfs(ip, st); 956 957 /* 958 * XFS does not distinguish between blocks available to privileged and 959 * unprivileged users. 960 */ 961 st->f_bavail = st->f_bfree; 962 return 0; 963 } 964 965 STATIC void 966 xfs_save_resvblks( 967 struct xfs_mount *mp) 968 { 969 enum xfs_free_counter i; 970 971 for (i = 0; i < XC_FREE_NR; i++) { 972 mp->m_free[i].res_saved = mp->m_free[i].res_total; 973 xfs_reserve_blocks(mp, i, 0); 974 } 975 } 976 977 STATIC void 978 xfs_restore_resvblks( 979 struct xfs_mount *mp) 980 { 981 uint64_t resblks; 982 enum xfs_free_counter i; 983 984 for (i = 0; i < XC_FREE_NR; i++) { 985 if (mp->m_free[i].res_saved) { 986 resblks = mp->m_free[i].res_saved; 987 mp->m_free[i].res_saved = 0; 988 } else 989 resblks = xfs_default_resblks(mp, i); 990 xfs_reserve_blocks(mp, i, resblks); 991 } 992 } 993 994 /* 995 * Second stage of a freeze. The data is already frozen so we only 996 * need to take care of the metadata. Once that's done sync the superblock 997 * to the log to dirty it in case of a crash while frozen. This ensures that we 998 * will recover the unlinked inode lists on the next mount. 999 */ 1000 STATIC int 1001 xfs_fs_freeze( 1002 struct super_block *sb) 1003 { 1004 struct xfs_mount *mp = XFS_M(sb); 1005 unsigned int flags; 1006 int ret; 1007 1008 /* 1009 * The filesystem is now frozen far enough that memory reclaim 1010 * cannot safely operate on the filesystem. Hence we need to 1011 * set a GFP_NOFS context here to avoid recursion deadlocks. 1012 */ 1013 flags = memalloc_nofs_save(); 1014 xfs_save_resvblks(mp); 1015 ret = xfs_log_quiesce(mp); 1016 memalloc_nofs_restore(flags); 1017 1018 /* 1019 * For read-write filesystems, we need to restart the inodegc on error 1020 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not 1021 * going to be run to restart it now. We are at SB_FREEZE_FS level 1022 * here, so we can restart safely without racing with a stop in 1023 * xfs_fs_sync_fs(). 1024 */ 1025 if (ret && !xfs_is_readonly(mp)) { 1026 xfs_blockgc_start(mp); 1027 xfs_inodegc_start(mp); 1028 xfs_zone_gc_start(mp); 1029 } 1030 1031 return ret; 1032 } 1033 1034 STATIC int 1035 xfs_fs_unfreeze( 1036 struct super_block *sb) 1037 { 1038 struct xfs_mount *mp = XFS_M(sb); 1039 1040 xfs_restore_resvblks(mp); 1041 xfs_log_work_queue(mp); 1042 1043 /* 1044 * Don't reactivate the inodegc worker on a readonly filesystem because 1045 * inodes are sent directly to reclaim. Don't reactivate the blockgc 1046 * worker because there are no speculative preallocations on a readonly 1047 * filesystem. 1048 */ 1049 if (!xfs_is_readonly(mp)) { 1050 xfs_zone_gc_start(mp); 1051 xfs_blockgc_start(mp); 1052 xfs_inodegc_start(mp); 1053 } 1054 1055 return 0; 1056 } 1057 1058 /* 1059 * This function fills in xfs_mount_t fields based on mount args. 1060 * Note: the superblock _has_ now been read in. 1061 */ 1062 STATIC int 1063 xfs_finish_flags( 1064 struct xfs_mount *mp) 1065 { 1066 /* Fail a mount where the logbuf is smaller than the log stripe */ 1067 if (xfs_has_logv2(mp)) { 1068 if (mp->m_logbsize <= 0 && 1069 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 1070 mp->m_logbsize = mp->m_sb.sb_logsunit; 1071 } else if (mp->m_logbsize > 0 && 1072 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1073 xfs_warn(mp, 1074 "logbuf size must be greater than or equal to log stripe size"); 1075 return -EINVAL; 1076 } 1077 } else { 1078 /* Fail a mount if the logbuf is larger than 32K */ 1079 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1080 xfs_warn(mp, 1081 "logbuf size for version 1 logs must be 16K or 32K"); 1082 return -EINVAL; 1083 } 1084 } 1085 1086 /* 1087 * prohibit r/w mounts of read-only filesystems 1088 */ 1089 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { 1090 xfs_warn(mp, 1091 "cannot mount a read-only filesystem as read-write"); 1092 return -EROFS; 1093 } 1094 1095 if ((mp->m_qflags & XFS_GQUOTA_ACCT) && 1096 (mp->m_qflags & XFS_PQUOTA_ACCT) && 1097 !xfs_has_pquotino(mp)) { 1098 xfs_warn(mp, 1099 "Super block does not support project and group quota together"); 1100 return -EINVAL; 1101 } 1102 1103 if (!xfs_has_zoned(mp)) { 1104 if (mp->m_max_open_zones) { 1105 xfs_warn(mp, 1106 "max_open_zones mount option only supported on zoned file systems."); 1107 return -EINVAL; 1108 } 1109 if (mp->m_features & XFS_FEAT_NOLIFETIME) { 1110 xfs_warn(mp, 1111 "nolifetime mount option only supported on zoned file systems."); 1112 return -EINVAL; 1113 } 1114 } 1115 1116 return 0; 1117 } 1118 1119 static int 1120 xfs_init_percpu_counters( 1121 struct xfs_mount *mp) 1122 { 1123 int error; 1124 int i; 1125 1126 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1127 if (error) 1128 return -ENOMEM; 1129 1130 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1131 if (error) 1132 goto free_icount; 1133 1134 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1135 if (error) 1136 goto free_ifree; 1137 1138 error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL); 1139 if (error) 1140 goto free_delalloc; 1141 1142 for (i = 0; i < XC_FREE_NR; i++) { 1143 error = percpu_counter_init(&mp->m_free[i].count, 0, 1144 GFP_KERNEL); 1145 if (error) 1146 goto free_freecounters; 1147 } 1148 1149 return 0; 1150 1151 free_freecounters: 1152 while (--i >= 0) 1153 percpu_counter_destroy(&mp->m_free[i].count); 1154 percpu_counter_destroy(&mp->m_delalloc_rtextents); 1155 free_delalloc: 1156 percpu_counter_destroy(&mp->m_delalloc_blks); 1157 free_ifree: 1158 percpu_counter_destroy(&mp->m_ifree); 1159 free_icount: 1160 percpu_counter_destroy(&mp->m_icount); 1161 return -ENOMEM; 1162 } 1163 1164 void 1165 xfs_reinit_percpu_counters( 1166 struct xfs_mount *mp) 1167 { 1168 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1169 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1170 xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks); 1171 if (!xfs_has_zoned(mp)) 1172 xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, 1173 mp->m_sb.sb_frextents); 1174 } 1175 1176 static void 1177 xfs_destroy_percpu_counters( 1178 struct xfs_mount *mp) 1179 { 1180 enum xfs_free_counter i; 1181 1182 for (i = 0; i < XC_FREE_NR; i++) 1183 percpu_counter_destroy(&mp->m_free[i].count); 1184 percpu_counter_destroy(&mp->m_icount); 1185 percpu_counter_destroy(&mp->m_ifree); 1186 ASSERT(xfs_is_shutdown(mp) || 1187 percpu_counter_sum(&mp->m_delalloc_rtextents) == 0); 1188 percpu_counter_destroy(&mp->m_delalloc_rtextents); 1189 ASSERT(xfs_is_shutdown(mp) || 1190 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1191 percpu_counter_destroy(&mp->m_delalloc_blks); 1192 } 1193 1194 static int 1195 xfs_inodegc_init_percpu( 1196 struct xfs_mount *mp) 1197 { 1198 struct xfs_inodegc *gc; 1199 int cpu; 1200 1201 mp->m_inodegc = alloc_percpu(struct xfs_inodegc); 1202 if (!mp->m_inodegc) 1203 return -ENOMEM; 1204 1205 for_each_possible_cpu(cpu) { 1206 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1207 gc->cpu = cpu; 1208 gc->mp = mp; 1209 init_llist_head(&gc->list); 1210 gc->items = 0; 1211 gc->error = 0; 1212 INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); 1213 } 1214 return 0; 1215 } 1216 1217 static void 1218 xfs_inodegc_free_percpu( 1219 struct xfs_mount *mp) 1220 { 1221 if (!mp->m_inodegc) 1222 return; 1223 free_percpu(mp->m_inodegc); 1224 } 1225 1226 static void 1227 xfs_fs_put_super( 1228 struct super_block *sb) 1229 { 1230 struct xfs_mount *mp = XFS_M(sb); 1231 1232 xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); 1233 xfs_filestream_unmount(mp); 1234 xfs_unmountfs(mp); 1235 1236 xfs_rtmount_freesb(mp); 1237 xfs_freesb(mp); 1238 xchk_mount_stats_free(mp); 1239 free_percpu(mp->m_stats.xs_stats); 1240 xfs_inodegc_free_percpu(mp); 1241 xfs_destroy_percpu_counters(mp); 1242 xfs_destroy_mount_workqueues(mp); 1243 xfs_shutdown_devices(mp); 1244 } 1245 1246 static long 1247 xfs_fs_nr_cached_objects( 1248 struct super_block *sb, 1249 struct shrink_control *sc) 1250 { 1251 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1252 if (WARN_ON_ONCE(!sb->s_fs_info)) 1253 return 0; 1254 return xfs_reclaim_inodes_count(XFS_M(sb)); 1255 } 1256 1257 static long 1258 xfs_fs_free_cached_objects( 1259 struct super_block *sb, 1260 struct shrink_control *sc) 1261 { 1262 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1263 } 1264 1265 static void 1266 xfs_fs_shutdown( 1267 struct super_block *sb) 1268 { 1269 xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); 1270 } 1271 1272 static int 1273 xfs_fs_show_stats( 1274 struct seq_file *m, 1275 struct dentry *root) 1276 { 1277 struct xfs_mount *mp = XFS_M(root->d_sb); 1278 1279 if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT)) 1280 xfs_zoned_show_stats(m, mp); 1281 return 0; 1282 } 1283 1284 static void 1285 xfs_fs_report_error( 1286 const struct fserror_event *event) 1287 { 1288 /* healthmon already knows about non-inode and metadata errors */ 1289 if (event->inode && event->type != FSERR_METADATA) 1290 xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event); 1291 } 1292 1293 static const struct super_operations xfs_super_operations = { 1294 .alloc_inode = xfs_fs_alloc_inode, 1295 .destroy_inode = xfs_fs_destroy_inode, 1296 .drop_inode = xfs_fs_drop_inode, 1297 .evict_inode = xfs_fs_evict_inode, 1298 .put_super = xfs_fs_put_super, 1299 .sync_fs = xfs_fs_sync_fs, 1300 .freeze_fs = xfs_fs_freeze, 1301 .unfreeze_fs = xfs_fs_unfreeze, 1302 .statfs = xfs_fs_statfs, 1303 .show_options = xfs_fs_show_options, 1304 .nr_cached_objects = xfs_fs_nr_cached_objects, 1305 .free_cached_objects = xfs_fs_free_cached_objects, 1306 .shutdown = xfs_fs_shutdown, 1307 .show_stats = xfs_fs_show_stats, 1308 .report_error = xfs_fs_report_error, 1309 }; 1310 1311 static int 1312 suffix_kstrtoint( 1313 const char *s, 1314 unsigned int base, 1315 int *res) 1316 { 1317 int last, shift_left_factor = 0, _res; 1318 char *value; 1319 int ret = 0; 1320 1321 value = kstrdup(s, GFP_KERNEL); 1322 if (!value) 1323 return -ENOMEM; 1324 1325 last = strlen(value) - 1; 1326 if (value[last] == 'K' || value[last] == 'k') { 1327 shift_left_factor = 10; 1328 value[last] = '\0'; 1329 } 1330 if (value[last] == 'M' || value[last] == 'm') { 1331 shift_left_factor = 20; 1332 value[last] = '\0'; 1333 } 1334 if (value[last] == 'G' || value[last] == 'g') { 1335 shift_left_factor = 30; 1336 value[last] = '\0'; 1337 } 1338 1339 if (kstrtoint(value, base, &_res)) 1340 ret = -EINVAL; 1341 kfree(value); 1342 *res = _res << shift_left_factor; 1343 return ret; 1344 } 1345 1346 static int 1347 suffix_kstrtoull( 1348 const char *s, 1349 unsigned int base, 1350 unsigned long long *res) 1351 { 1352 int last, shift_left_factor = 0; 1353 unsigned long long _res; 1354 char *value; 1355 int ret = 0; 1356 1357 value = kstrdup(s, GFP_KERNEL); 1358 if (!value) 1359 return -ENOMEM; 1360 1361 last = strlen(value) - 1; 1362 if (value[last] == 'K' || value[last] == 'k') { 1363 shift_left_factor = 10; 1364 value[last] = '\0'; 1365 } 1366 if (value[last] == 'M' || value[last] == 'm') { 1367 shift_left_factor = 20; 1368 value[last] = '\0'; 1369 } 1370 if (value[last] == 'G' || value[last] == 'g') { 1371 shift_left_factor = 30; 1372 value[last] = '\0'; 1373 } 1374 1375 if (kstrtoull(value, base, &_res)) 1376 ret = -EINVAL; 1377 kfree(value); 1378 *res = _res << shift_left_factor; 1379 return ret; 1380 } 1381 1382 static inline void 1383 xfs_fs_warn_deprecated( 1384 struct fs_context *fc, 1385 struct fs_parameter *param) 1386 { 1387 /* 1388 * Always warn about someone passing in a deprecated mount option. 1389 * Previously we wouldn't print the warning if we were reconfiguring 1390 * and current mount point already had the flag set, but that was not 1391 * the right thing to do. 1392 * 1393 * Many distributions mount the root filesystem with no options in the 1394 * initramfs and rely on mount -a to remount the root fs with the 1395 * options in fstab. However, the old behavior meant that there would 1396 * never be a warning about deprecated mount options for the root fs in 1397 * /etc/fstab. On a single-fs system, that means no warning at all. 1398 * 1399 * Compounding this problem are distribution scripts that copy 1400 * /proc/mounts to fstab, which means that we can't remove mount 1401 * options unless we're 100% sure they have only ever been advertised 1402 * in /proc/mounts in response to explicitly provided mount options. 1403 */ 1404 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1405 } 1406 1407 /* 1408 * Set mount state from a mount option. 1409 * 1410 * NOTE: mp->m_super is NULL here! 1411 */ 1412 static int 1413 xfs_fs_parse_param( 1414 struct fs_context *fc, 1415 struct fs_parameter *param) 1416 { 1417 struct xfs_mount *parsing_mp = fc->s_fs_info; 1418 struct fs_parse_result result; 1419 int size = 0; 1420 int opt; 1421 1422 BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL); 1423 1424 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1425 if (opt < 0) 1426 return opt; 1427 1428 switch (opt) { 1429 case Op_deprecated: 1430 xfs_fs_warn_deprecated(fc, param); 1431 return 0; 1432 case Opt_logbufs: 1433 parsing_mp->m_logbufs = result.uint_32; 1434 return 0; 1435 case Opt_logbsize: 1436 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1437 return -EINVAL; 1438 return 0; 1439 case Opt_logdev: 1440 kfree(parsing_mp->m_logname); 1441 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1442 if (!parsing_mp->m_logname) 1443 return -ENOMEM; 1444 return 0; 1445 case Opt_rtdev: 1446 kfree(parsing_mp->m_rtname); 1447 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1448 if (!parsing_mp->m_rtname) 1449 return -ENOMEM; 1450 return 0; 1451 case Opt_allocsize: 1452 if (suffix_kstrtoint(param->string, 10, &size)) 1453 return -EINVAL; 1454 parsing_mp->m_allocsize_log = ffs(size) - 1; 1455 parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; 1456 return 0; 1457 case Opt_grpid: 1458 case Opt_bsdgroups: 1459 parsing_mp->m_features |= XFS_FEAT_GRPID; 1460 return 0; 1461 case Opt_nogrpid: 1462 case Opt_sysvgroups: 1463 parsing_mp->m_features &= ~XFS_FEAT_GRPID; 1464 return 0; 1465 case Opt_wsync: 1466 parsing_mp->m_features |= XFS_FEAT_WSYNC; 1467 return 0; 1468 case Opt_norecovery: 1469 parsing_mp->m_features |= XFS_FEAT_NORECOVERY; 1470 return 0; 1471 case Opt_noalign: 1472 parsing_mp->m_features |= XFS_FEAT_NOALIGN; 1473 return 0; 1474 case Opt_swalloc: 1475 parsing_mp->m_features |= XFS_FEAT_SWALLOC; 1476 return 0; 1477 case Opt_sunit: 1478 parsing_mp->m_dalign = result.uint_32; 1479 return 0; 1480 case Opt_swidth: 1481 parsing_mp->m_swidth = result.uint_32; 1482 return 0; 1483 case Opt_inode32: 1484 parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; 1485 return 0; 1486 case Opt_inode64: 1487 parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1488 return 0; 1489 case Opt_nouuid: 1490 parsing_mp->m_features |= XFS_FEAT_NOUUID; 1491 return 0; 1492 case Opt_largeio: 1493 parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; 1494 return 0; 1495 case Opt_nolargeio: 1496 parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; 1497 return 0; 1498 case Opt_filestreams: 1499 parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; 1500 return 0; 1501 case Opt_noquota: 1502 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1503 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1504 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1505 return 0; 1506 case Opt_quota: 1507 case Opt_uquota: 1508 case Opt_usrquota: 1509 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); 1510 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1511 return 0; 1512 case Opt_qnoenforce: 1513 case Opt_uqnoenforce: 1514 parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; 1515 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1516 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1517 return 0; 1518 case Opt_pquota: 1519 case Opt_prjquota: 1520 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); 1521 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1522 return 0; 1523 case Opt_pqnoenforce: 1524 parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; 1525 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1526 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1527 return 0; 1528 case Opt_gquota: 1529 case Opt_grpquota: 1530 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); 1531 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1532 return 0; 1533 case Opt_gqnoenforce: 1534 parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; 1535 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1536 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1537 return 0; 1538 case Opt_discard: 1539 parsing_mp->m_features |= XFS_FEAT_DISCARD; 1540 return 0; 1541 case Opt_nodiscard: 1542 parsing_mp->m_features &= ~XFS_FEAT_DISCARD; 1543 return 0; 1544 #ifdef CONFIG_FS_DAX 1545 case Opt_dax: 1546 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1547 return 0; 1548 case Opt_dax_enum: 1549 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1550 return 0; 1551 #endif 1552 case Opt_max_open_zones: 1553 parsing_mp->m_max_open_zones = result.uint_32; 1554 return 0; 1555 case Opt_lifetime: 1556 parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME; 1557 return 0; 1558 case Opt_nolifetime: 1559 parsing_mp->m_features |= XFS_FEAT_NOLIFETIME; 1560 return 0; 1561 case Opt_max_atomic_write: 1562 if (suffix_kstrtoull(param->string, 10, 1563 &parsing_mp->m_awu_max_bytes)) { 1564 xfs_warn(parsing_mp, 1565 "max atomic write size must be positive integer"); 1566 return -EINVAL; 1567 } 1568 return 0; 1569 case Opt_errortag: 1570 return xfs_errortag_add_name(parsing_mp, param->string); 1571 default: 1572 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1573 return -EINVAL; 1574 } 1575 1576 return 0; 1577 } 1578 1579 static int 1580 xfs_fs_validate_params( 1581 struct xfs_mount *mp) 1582 { 1583 /* No recovery flag requires a read-only mount */ 1584 if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { 1585 xfs_warn(mp, "no-recovery mounts must be read-only."); 1586 return -EINVAL; 1587 } 1588 1589 if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { 1590 xfs_warn(mp, 1591 "sunit and swidth options incompatible with the noalign option"); 1592 return -EINVAL; 1593 } 1594 1595 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && 1596 (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) { 1597 xfs_warn(mp, "quota support not available in this kernel."); 1598 return -EINVAL; 1599 } 1600 1601 if ((mp->m_dalign && !mp->m_swidth) || 1602 (!mp->m_dalign && mp->m_swidth)) { 1603 xfs_warn(mp, "sunit and swidth must be specified together"); 1604 return -EINVAL; 1605 } 1606 1607 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1608 xfs_warn(mp, 1609 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1610 mp->m_swidth, mp->m_dalign); 1611 return -EINVAL; 1612 } 1613 1614 if (mp->m_logbufs != -1 && 1615 mp->m_logbufs != 0 && 1616 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1617 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1618 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1619 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1620 return -EINVAL; 1621 } 1622 1623 if (mp->m_logbsize != -1 && 1624 mp->m_logbsize != 0 && 1625 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1626 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1627 !is_power_of_2(mp->m_logbsize))) { 1628 xfs_warn(mp, 1629 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1630 mp->m_logbsize); 1631 return -EINVAL; 1632 } 1633 1634 if (xfs_has_allocsize(mp) && 1635 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1636 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1637 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1638 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1639 return -EINVAL; 1640 } 1641 1642 return 0; 1643 } 1644 1645 struct dentry * 1646 xfs_debugfs_mkdir( 1647 const char *name, 1648 struct dentry *parent) 1649 { 1650 struct dentry *child; 1651 1652 /* Apparently we're expected to ignore error returns?? */ 1653 child = debugfs_create_dir(name, parent); 1654 if (IS_ERR(child)) 1655 return NULL; 1656 1657 return child; 1658 } 1659 1660 static int 1661 xfs_fs_fill_super( 1662 struct super_block *sb, 1663 struct fs_context *fc) 1664 { 1665 struct xfs_mount *mp = sb->s_fs_info; 1666 struct inode *root; 1667 int flags = 0, error; 1668 1669 mp->m_super = sb; 1670 1671 /* 1672 * Copy VFS mount flags from the context now that all parameter parsing 1673 * is guaranteed to have been completed by either the old mount API or 1674 * the newer fsopen/fsconfig API. 1675 */ 1676 if (fc->sb_flags & SB_RDONLY) 1677 xfs_set_readonly(mp); 1678 if (fc->sb_flags & SB_DIRSYNC) 1679 mp->m_features |= XFS_FEAT_DIRSYNC; 1680 if (fc->sb_flags & SB_SYNCHRONOUS) 1681 mp->m_features |= XFS_FEAT_WSYNC; 1682 1683 error = xfs_fs_validate_params(mp); 1684 if (error) 1685 return error; 1686 1687 if (!sb_min_blocksize(sb, BBSIZE)) { 1688 xfs_err(mp, "unable to set blocksize"); 1689 return -EINVAL; 1690 } 1691 sb->s_xattr = xfs_xattr_handlers; 1692 sb->s_export_op = &xfs_export_operations; 1693 #ifdef CONFIG_XFS_QUOTA 1694 sb->s_qcop = &xfs_quotactl_operations; 1695 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1696 #endif 1697 sb->s_op = &xfs_super_operations; 1698 1699 /* 1700 * Delay mount work if the debug hook is set. This is debug 1701 * instrumention to coordinate simulation of xfs mount failures with 1702 * VFS superblock operations 1703 */ 1704 if (xfs_globals.mount_delay) { 1705 xfs_notice(mp, "Delaying mount for %d seconds.", 1706 xfs_globals.mount_delay); 1707 msleep(xfs_globals.mount_delay * 1000); 1708 } 1709 1710 if (fc->sb_flags & SB_SILENT) 1711 flags |= XFS_MFSI_QUIET; 1712 1713 error = xfs_open_devices(mp); 1714 if (error) 1715 return error; 1716 1717 if (xfs_debugfs) { 1718 mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id, 1719 xfs_debugfs); 1720 } else { 1721 mp->m_debugfs = NULL; 1722 } 1723 1724 error = xfs_init_mount_workqueues(mp); 1725 if (error) 1726 goto out_shutdown_devices; 1727 1728 error = xfs_init_percpu_counters(mp); 1729 if (error) 1730 goto out_destroy_workqueues; 1731 1732 error = xfs_inodegc_init_percpu(mp); 1733 if (error) 1734 goto out_destroy_counters; 1735 1736 /* Allocate stats memory before we do operations that might use it */ 1737 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1738 if (!mp->m_stats.xs_stats) { 1739 error = -ENOMEM; 1740 goto out_destroy_inodegc; 1741 } 1742 1743 error = xchk_mount_stats_alloc(mp); 1744 if (error) 1745 goto out_free_stats; 1746 1747 error = xfs_readsb(mp, flags); 1748 if (error) 1749 goto out_free_scrub_stats; 1750 1751 error = xfs_finish_flags(mp); 1752 if (error) 1753 goto out_free_sb; 1754 1755 error = xfs_setup_devices(mp); 1756 if (error) 1757 goto out_free_sb; 1758 1759 /* 1760 * V4 support is undergoing deprecation. 1761 * 1762 * Note: this has to use an open coded m_features check as xfs_has_crc 1763 * always returns false for !CONFIG_XFS_SUPPORT_V4. 1764 */ 1765 if (!(mp->m_features & XFS_FEAT_CRC)) { 1766 if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) { 1767 xfs_warn(mp, 1768 "Deprecated V4 format (crc=0) not supported by kernel."); 1769 error = -EINVAL; 1770 goto out_free_sb; 1771 } 1772 xfs_warn_once(mp, 1773 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1774 } 1775 1776 /* ASCII case insensitivity is undergoing deprecation. */ 1777 if (xfs_has_asciici(mp)) { 1778 #ifdef CONFIG_XFS_SUPPORT_ASCII_CI 1779 xfs_warn_once(mp, 1780 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); 1781 #else 1782 xfs_warn(mp, 1783 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel."); 1784 error = -EINVAL; 1785 goto out_free_sb; 1786 #endif 1787 } 1788 1789 /* 1790 * Filesystem claims it needs repair, so refuse the mount unless 1791 * norecovery is also specified, in which case the filesystem can 1792 * be mounted with no risk of further damage. 1793 */ 1794 if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) { 1795 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1796 error = -EFSCORRUPTED; 1797 goto out_free_sb; 1798 } 1799 1800 /* 1801 * Don't touch the filesystem if a user tool thinks it owns the primary 1802 * superblock. mkfs doesn't clear the flag from secondary supers, so 1803 * we don't check them at all. 1804 */ 1805 if (mp->m_sb.sb_inprogress) { 1806 xfs_warn(mp, "Offline file system operation in progress!"); 1807 error = -EFSCORRUPTED; 1808 goto out_free_sb; 1809 } 1810 1811 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1812 size_t max_folio_size = mapping_max_folio_size_supported(); 1813 1814 if (!xfs_has_crc(mp)) { 1815 xfs_warn(mp, 1816 "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.", 1817 mp->m_sb.sb_blocksize, PAGE_SIZE); 1818 error = -ENOSYS; 1819 goto out_free_sb; 1820 } 1821 1822 if (mp->m_sb.sb_blocksize > max_folio_size) { 1823 xfs_warn(mp, 1824 "block size (%u bytes) not supported; Only block size (%zu) or less is supported", 1825 mp->m_sb.sb_blocksize, max_folio_size); 1826 error = -ENOSYS; 1827 goto out_free_sb; 1828 } 1829 } 1830 1831 /* Ensure this filesystem fits in the page cache limits */ 1832 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1833 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1834 xfs_warn(mp, 1835 "file system too large to be mounted on this system."); 1836 error = -EFBIG; 1837 goto out_free_sb; 1838 } 1839 1840 /* 1841 * XFS block mappings use 54 bits to store the logical block offset. 1842 * This should suffice to handle the maximum file size that the VFS 1843 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1844 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1845 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1846 * to check this assertion. 1847 * 1848 * Avoid integer overflow by comparing the maximum bmbt offset to the 1849 * maximum pagecache offset in units of fs blocks. 1850 */ 1851 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1852 xfs_warn(mp, 1853 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1854 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1855 XFS_MAX_FILEOFF); 1856 error = -EINVAL; 1857 goto out_free_sb; 1858 } 1859 1860 error = xfs_rtmount_readsb(mp); 1861 if (error) 1862 goto out_free_sb; 1863 1864 error = xfs_filestream_mount(mp); 1865 if (error) 1866 goto out_free_rtsb; 1867 1868 /* 1869 * we must configure the block size in the superblock before we run the 1870 * full mount process as the mount process can lookup and cache inodes. 1871 */ 1872 sb->s_magic = XFS_SUPER_MAGIC; 1873 sb->s_blocksize = mp->m_sb.sb_blocksize; 1874 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1875 sb->s_maxbytes = MAX_LFS_FILESIZE; 1876 sb->s_max_links = XFS_MAXLINK; 1877 sb->s_time_gran = 1; 1878 if (xfs_has_bigtime(mp)) { 1879 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1880 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1881 } else { 1882 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1883 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1884 } 1885 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1886 sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM; 1887 1888 set_posix_acl_flag(sb); 1889 1890 /* version 5 superblocks support inode version counters. */ 1891 if (xfs_has_crc(mp)) 1892 sb->s_flags |= SB_I_VERSION; 1893 1894 if (xfs_has_dax_always(mp)) { 1895 error = xfs_setup_dax_always(mp); 1896 if (error) 1897 goto out_filestream_unmount; 1898 } 1899 1900 if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { 1901 xfs_warn(mp, 1902 "mounting with \"discard\" option, but the device does not support discard"); 1903 mp->m_features &= ~XFS_FEAT_DISCARD; 1904 } 1905 1906 if (xfs_has_zoned(mp)) { 1907 if (!xfs_has_metadir(mp)) { 1908 xfs_alert(mp, 1909 "metadir feature required for zoned realtime devices."); 1910 error = -EINVAL; 1911 goto out_filestream_unmount; 1912 } 1913 xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED); 1914 } 1915 1916 if (xfs_has_reflink(mp)) { 1917 if (xfs_has_realtime(mp) && 1918 !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) { 1919 xfs_alert(mp, 1920 "reflink not compatible with realtime extent size %u!", 1921 mp->m_sb.sb_rextsize); 1922 error = -EINVAL; 1923 goto out_filestream_unmount; 1924 } 1925 1926 if (xfs_has_zoned(mp)) { 1927 xfs_alert(mp, 1928 "reflink not compatible with zoned RT device!"); 1929 error = -EINVAL; 1930 goto out_filestream_unmount; 1931 } 1932 1933 if (xfs_globals.always_cow) { 1934 xfs_info(mp, "using DEBUG-only always_cow mode."); 1935 mp->m_always_cow = true; 1936 } 1937 } 1938 1939 /* 1940 * If no quota mount options were provided, maybe we'll try to pick 1941 * up the quota accounting and enforcement flags from the ondisk sb. 1942 */ 1943 if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS)) 1944 xfs_set_resuming_quotaon(mp); 1945 mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; 1946 1947 error = xfs_mountfs(mp); 1948 if (error) 1949 goto out_filestream_unmount; 1950 1951 root = igrab(VFS_I(mp->m_rootip)); 1952 if (!root) { 1953 error = -ENOENT; 1954 goto out_unmount; 1955 } 1956 sb->s_root = d_make_root(root); 1957 if (!sb->s_root) { 1958 error = -ENOMEM; 1959 goto out_unmount; 1960 } 1961 1962 return 0; 1963 1964 out_filestream_unmount: 1965 xfs_filestream_unmount(mp); 1966 out_free_rtsb: 1967 xfs_rtmount_freesb(mp); 1968 out_free_sb: 1969 xfs_freesb(mp); 1970 out_free_scrub_stats: 1971 xchk_mount_stats_free(mp); 1972 out_free_stats: 1973 free_percpu(mp->m_stats.xs_stats); 1974 out_destroy_inodegc: 1975 xfs_inodegc_free_percpu(mp); 1976 out_destroy_counters: 1977 xfs_destroy_percpu_counters(mp); 1978 out_destroy_workqueues: 1979 xfs_destroy_mount_workqueues(mp); 1980 out_shutdown_devices: 1981 xfs_shutdown_devices(mp); 1982 return error; 1983 1984 out_unmount: 1985 xfs_filestream_unmount(mp); 1986 xfs_unmountfs(mp); 1987 goto out_free_rtsb; 1988 } 1989 1990 static int 1991 xfs_fs_get_tree( 1992 struct fs_context *fc) 1993 { 1994 return get_tree_bdev(fc, xfs_fs_fill_super); 1995 } 1996 1997 static int 1998 xfs_remount_rw( 1999 struct xfs_mount *mp) 2000 { 2001 struct xfs_sb *sbp = &mp->m_sb; 2002 int error; 2003 2004 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && 2005 xfs_readonly_buftarg(mp->m_logdev_targp)) { 2006 xfs_warn(mp, 2007 "ro->rw transition prohibited by read-only logdev"); 2008 return -EACCES; 2009 } 2010 2011 if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { 2012 xfs_warn(mp, 2013 "ro->rw transition prohibited by read-only rtdev"); 2014 return -EACCES; 2015 } 2016 2017 if (xfs_has_norecovery(mp)) { 2018 xfs_warn(mp, 2019 "ro->rw transition prohibited on norecovery mount"); 2020 return -EINVAL; 2021 } 2022 2023 if (xfs_sb_is_v5(sbp) && 2024 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 2025 xfs_warn(mp, 2026 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 2027 (sbp->sb_features_ro_compat & 2028 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 2029 return -EINVAL; 2030 } 2031 2032 xfs_clear_readonly(mp); 2033 2034 /* 2035 * If this is the first remount to writeable state we might have some 2036 * superblock changes to update. 2037 */ 2038 if (mp->m_update_sb) { 2039 error = xfs_sync_sb(mp, false); 2040 if (error) { 2041 xfs_warn(mp, "failed to write sb changes"); 2042 return error; 2043 } 2044 mp->m_update_sb = false; 2045 } 2046 2047 /* 2048 * Fill out the reserve pool if it is empty. Use the stashed value if 2049 * it is non-zero, otherwise go with the default. 2050 */ 2051 xfs_restore_resvblks(mp); 2052 xfs_log_work_queue(mp); 2053 xfs_blockgc_start(mp); 2054 2055 /* Create the per-AG metadata reservation pool .*/ 2056 error = xfs_fs_reserve_ag_blocks(mp); 2057 if (error && error != -ENOSPC) 2058 return error; 2059 2060 /* Re-enable the background inode inactivation worker. */ 2061 xfs_inodegc_start(mp); 2062 2063 /* Restart zone reclaim */ 2064 xfs_zone_gc_start(mp); 2065 2066 return 0; 2067 } 2068 2069 static int 2070 xfs_remount_ro( 2071 struct xfs_mount *mp) 2072 { 2073 struct xfs_icwalk icw = { 2074 .icw_flags = XFS_ICWALK_FLAG_SYNC, 2075 }; 2076 int error; 2077 2078 /* Flush all the dirty data to disk. */ 2079 error = sync_filesystem(mp->m_super); 2080 if (error) 2081 return error; 2082 2083 /* 2084 * Cancel background eofb scanning so it cannot race with the final 2085 * log force+buftarg wait and deadlock the remount. 2086 */ 2087 xfs_blockgc_stop(mp); 2088 2089 /* 2090 * Clear out all remaining COW staging extents and speculative post-EOF 2091 * preallocations so that we don't leave inodes requiring inactivation 2092 * cleanups during reclaim on a read-only mount. We must process every 2093 * cached inode, so this requires a synchronous cache scan. 2094 */ 2095 error = xfs_blockgc_free_space(mp, &icw); 2096 if (error) { 2097 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2098 return error; 2099 } 2100 2101 /* 2102 * Stop the inodegc background worker. xfs_fs_reconfigure already 2103 * flushed all pending inodegc work when it sync'd the filesystem. 2104 * The VFS holds s_umount, so we know that inodes cannot enter 2105 * xfs_fs_destroy_inode during a remount operation. In readonly mode 2106 * we send inodes straight to reclaim, so no inodes will be queued. 2107 */ 2108 xfs_inodegc_stop(mp); 2109 2110 /* Stop zone reclaim */ 2111 xfs_zone_gc_stop(mp); 2112 2113 /* Free the per-AG metadata reservation pool. */ 2114 xfs_fs_unreserve_ag_blocks(mp); 2115 2116 /* 2117 * Before we sync the metadata, we need to free up the reserve block 2118 * pool so that the used block count in the superblock on disk is 2119 * correct at the end of the remount. Stash the current* reserve pool 2120 * size so that if we get remounted rw, we can return it to the same 2121 * size. 2122 */ 2123 xfs_save_resvblks(mp); 2124 2125 xfs_log_clean(mp); 2126 xfs_set_readonly(mp); 2127 2128 return 0; 2129 } 2130 2131 /* 2132 * Logically we would return an error here to prevent users from believing 2133 * they might have changed mount options using remount which can't be changed. 2134 * 2135 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 2136 * arguments in some cases so we can't blindly reject options, but have to 2137 * check for each specified option if it actually differs from the currently 2138 * set option and only reject it if that's the case. 2139 * 2140 * Until that is implemented we return success for every remount request, and 2141 * silently ignore all options that we can't actually change. 2142 */ 2143 static int 2144 xfs_fs_reconfigure( 2145 struct fs_context *fc) 2146 { 2147 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 2148 struct xfs_mount *new_mp = fc->s_fs_info; 2149 int flags = fc->sb_flags; 2150 int error; 2151 2152 new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; 2153 2154 /* version 5 superblocks always support version counters. */ 2155 if (xfs_has_crc(mp)) 2156 fc->sb_flags |= SB_I_VERSION; 2157 2158 error = xfs_fs_validate_params(new_mp); 2159 if (error) 2160 return error; 2161 2162 xfs_errortag_copy(mp, new_mp); 2163 2164 /* Validate new max_atomic_write option before making other changes */ 2165 if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) { 2166 error = xfs_set_max_atomic_write_opt(mp, 2167 new_mp->m_awu_max_bytes); 2168 if (error) 2169 return error; 2170 } 2171 2172 /* inode32 -> inode64 */ 2173 if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { 2174 mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 2175 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 2176 } 2177 2178 /* inode64 -> inode32 */ 2179 if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { 2180 mp->m_features |= XFS_FEAT_SMALL_INUMS; 2181 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 2182 } 2183 2184 /* 2185 * Now that mp has been modified according to the remount options, we 2186 * do a final option validation with xfs_finish_flags() just like it is 2187 * just like it is done during mount. We cannot use 2188 * done during mount. We cannot use xfs_finish_flags() on new_mp as it 2189 * contains only the user given options. 2190 */ 2191 error = xfs_finish_flags(mp); 2192 if (error) 2193 return error; 2194 2195 /* ro -> rw */ 2196 if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { 2197 error = xfs_remount_rw(mp); 2198 if (error) 2199 return error; 2200 } 2201 2202 /* rw -> ro */ 2203 if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { 2204 error = xfs_remount_ro(mp); 2205 if (error) 2206 return error; 2207 } 2208 2209 return 0; 2210 } 2211 2212 static void 2213 xfs_fs_free( 2214 struct fs_context *fc) 2215 { 2216 struct xfs_mount *mp = fc->s_fs_info; 2217 2218 /* 2219 * mp is stored in the fs_context when it is initialized. 2220 * mp is transferred to the superblock on a successful mount, 2221 * but if an error occurs before the transfer we have to free 2222 * it here. 2223 */ 2224 if (mp) 2225 xfs_mount_free(mp); 2226 } 2227 2228 static const struct fs_context_operations xfs_context_ops = { 2229 .parse_param = xfs_fs_parse_param, 2230 .get_tree = xfs_fs_get_tree, 2231 .reconfigure = xfs_fs_reconfigure, 2232 .free = xfs_fs_free, 2233 }; 2234 2235 /* 2236 * WARNING: do not initialise any parameters in this function that depend on 2237 * mount option parsing having already been performed as this can be called from 2238 * fsopen() before any parameters have been set. 2239 */ 2240 static int 2241 xfs_init_fs_context( 2242 struct fs_context *fc) 2243 { 2244 struct xfs_mount *mp; 2245 int i; 2246 2247 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 2248 if (!mp) 2249 return -ENOMEM; 2250 #ifdef DEBUG 2251 mp->m_errortag = kcalloc(XFS_ERRTAG_MAX, sizeof(*mp->m_errortag), 2252 GFP_KERNEL); 2253 if (!mp->m_errortag) { 2254 kfree(mp); 2255 return -ENOMEM; 2256 } 2257 #endif 2258 2259 spin_lock_init(&mp->m_sb_lock); 2260 for (i = 0; i < XG_TYPE_MAX; i++) 2261 xa_init(&mp->m_groups[i].xa); 2262 mutex_init(&mp->m_growlock); 2263 mutex_init(&mp->m_metafile_resv_lock); 2264 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 2265 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 2266 mp->m_kobj.kobject.kset = xfs_kset; 2267 /* 2268 * We don't create the finobt per-ag space reservation until after log 2269 * recovery, so we must set this to true so that an ifree transaction 2270 * started during log recovery will not depend on space reservations 2271 * for finobt expansion. 2272 */ 2273 mp->m_finobt_nores = true; 2274 2275 /* 2276 * These can be overridden by the mount option parsing. 2277 */ 2278 mp->m_logbufs = -1; 2279 mp->m_logbsize = -1; 2280 mp->m_allocsize_log = 16; /* 64k */ 2281 2282 xfs_hooks_init(&mp->m_dir_update_hooks); 2283 2284 fc->s_fs_info = mp; 2285 fc->ops = &xfs_context_ops; 2286 2287 return 0; 2288 } 2289 2290 static void 2291 xfs_kill_sb( 2292 struct super_block *sb) 2293 { 2294 kill_block_super(sb); 2295 xfs_mount_free(XFS_M(sb)); 2296 } 2297 2298 static struct file_system_type xfs_fs_type = { 2299 .owner = THIS_MODULE, 2300 .name = "xfs", 2301 .init_fs_context = xfs_init_fs_context, 2302 .parameters = xfs_fs_parameters, 2303 .kill_sb = xfs_kill_sb, 2304 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME | 2305 FS_LBS, 2306 }; 2307 MODULE_ALIAS_FS("xfs"); 2308 2309 STATIC int __init 2310 xfs_init_caches(void) 2311 { 2312 int error; 2313 2314 xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, 2315 SLAB_HWCACHE_ALIGN | 2316 SLAB_RECLAIM_ACCOUNT, 2317 NULL); 2318 if (!xfs_buf_cache) 2319 goto out; 2320 2321 xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", 2322 sizeof(struct xlog_ticket), 2323 0, 0, NULL); 2324 if (!xfs_log_ticket_cache) 2325 goto out_destroy_buf_cache; 2326 2327 error = xfs_btree_init_cur_caches(); 2328 if (error) 2329 goto out_destroy_log_ticket_cache; 2330 2331 error = rcbagbt_init_cur_cache(); 2332 if (error) 2333 goto out_destroy_btree_cur_cache; 2334 2335 error = xfs_defer_init_item_caches(); 2336 if (error) 2337 goto out_destroy_rcbagbt_cur_cache; 2338 2339 xfs_da_state_cache = kmem_cache_create("xfs_da_state", 2340 sizeof(struct xfs_da_state), 2341 0, 0, NULL); 2342 if (!xfs_da_state_cache) 2343 goto out_destroy_defer_item_cache; 2344 2345 xfs_ifork_cache = kmem_cache_create("xfs_ifork", 2346 sizeof(struct xfs_ifork), 2347 0, 0, NULL); 2348 if (!xfs_ifork_cache) 2349 goto out_destroy_da_state_cache; 2350 2351 xfs_trans_cache = kmem_cache_create("xfs_trans", 2352 sizeof(struct xfs_trans), 2353 0, 0, NULL); 2354 if (!xfs_trans_cache) 2355 goto out_destroy_ifork_cache; 2356 2357 2358 /* 2359 * The size of the cache-allocated buf log item is the maximum 2360 * size possible under XFS. This wastes a little bit of memory, 2361 * but it is much faster. 2362 */ 2363 xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", 2364 sizeof(struct xfs_buf_log_item), 2365 0, 0, NULL); 2366 if (!xfs_buf_item_cache) 2367 goto out_destroy_trans_cache; 2368 2369 xfs_efd_cache = kmem_cache_create("xfs_efd_item", 2370 xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), 2371 0, 0, NULL); 2372 if (!xfs_efd_cache) 2373 goto out_destroy_buf_item_cache; 2374 2375 xfs_efi_cache = kmem_cache_create("xfs_efi_item", 2376 xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), 2377 0, 0, NULL); 2378 if (!xfs_efi_cache) 2379 goto out_destroy_efd_cache; 2380 2381 xfs_inode_cache = kmem_cache_create("xfs_inode", 2382 sizeof(struct xfs_inode), 0, 2383 (SLAB_HWCACHE_ALIGN | 2384 SLAB_RECLAIM_ACCOUNT | 2385 SLAB_ACCOUNT), 2386 xfs_fs_inode_init_once); 2387 if (!xfs_inode_cache) 2388 goto out_destroy_efi_cache; 2389 2390 xfs_ili_cache = kmem_cache_create("xfs_ili", 2391 sizeof(struct xfs_inode_log_item), 0, 2392 SLAB_RECLAIM_ACCOUNT, 2393 NULL); 2394 if (!xfs_ili_cache) 2395 goto out_destroy_inode_cache; 2396 2397 xfs_icreate_cache = kmem_cache_create("xfs_icr", 2398 sizeof(struct xfs_icreate_item), 2399 0, 0, NULL); 2400 if (!xfs_icreate_cache) 2401 goto out_destroy_ili_cache; 2402 2403 xfs_rud_cache = kmem_cache_create("xfs_rud_item", 2404 sizeof(struct xfs_rud_log_item), 2405 0, 0, NULL); 2406 if (!xfs_rud_cache) 2407 goto out_destroy_icreate_cache; 2408 2409 xfs_rui_cache = kmem_cache_create("xfs_rui_item", 2410 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2411 0, 0, NULL); 2412 if (!xfs_rui_cache) 2413 goto out_destroy_rud_cache; 2414 2415 xfs_cud_cache = kmem_cache_create("xfs_cud_item", 2416 sizeof(struct xfs_cud_log_item), 2417 0, 0, NULL); 2418 if (!xfs_cud_cache) 2419 goto out_destroy_rui_cache; 2420 2421 xfs_cui_cache = kmem_cache_create("xfs_cui_item", 2422 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2423 0, 0, NULL); 2424 if (!xfs_cui_cache) 2425 goto out_destroy_cud_cache; 2426 2427 xfs_bud_cache = kmem_cache_create("xfs_bud_item", 2428 sizeof(struct xfs_bud_log_item), 2429 0, 0, NULL); 2430 if (!xfs_bud_cache) 2431 goto out_destroy_cui_cache; 2432 2433 xfs_bui_cache = kmem_cache_create("xfs_bui_item", 2434 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2435 0, 0, NULL); 2436 if (!xfs_bui_cache) 2437 goto out_destroy_bud_cache; 2438 2439 xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", 2440 sizeof(struct xfs_attrd_log_item), 2441 0, 0, NULL); 2442 if (!xfs_attrd_cache) 2443 goto out_destroy_bui_cache; 2444 2445 xfs_attri_cache = kmem_cache_create("xfs_attri_item", 2446 sizeof(struct xfs_attri_log_item), 2447 0, 0, NULL); 2448 if (!xfs_attri_cache) 2449 goto out_destroy_attrd_cache; 2450 2451 xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", 2452 sizeof(struct xfs_iunlink_item), 2453 0, 0, NULL); 2454 if (!xfs_iunlink_cache) 2455 goto out_destroy_attri_cache; 2456 2457 xfs_xmd_cache = kmem_cache_create("xfs_xmd_item", 2458 sizeof(struct xfs_xmd_log_item), 2459 0, 0, NULL); 2460 if (!xfs_xmd_cache) 2461 goto out_destroy_iul_cache; 2462 2463 xfs_xmi_cache = kmem_cache_create("xfs_xmi_item", 2464 sizeof(struct xfs_xmi_log_item), 2465 0, 0, NULL); 2466 if (!xfs_xmi_cache) 2467 goto out_destroy_xmd_cache; 2468 2469 xfs_parent_args_cache = kmem_cache_create("xfs_parent_args", 2470 sizeof(struct xfs_parent_args), 2471 0, 0, NULL); 2472 if (!xfs_parent_args_cache) 2473 goto out_destroy_xmi_cache; 2474 2475 return 0; 2476 2477 out_destroy_xmi_cache: 2478 kmem_cache_destroy(xfs_xmi_cache); 2479 out_destroy_xmd_cache: 2480 kmem_cache_destroy(xfs_xmd_cache); 2481 out_destroy_iul_cache: 2482 kmem_cache_destroy(xfs_iunlink_cache); 2483 out_destroy_attri_cache: 2484 kmem_cache_destroy(xfs_attri_cache); 2485 out_destroy_attrd_cache: 2486 kmem_cache_destroy(xfs_attrd_cache); 2487 out_destroy_bui_cache: 2488 kmem_cache_destroy(xfs_bui_cache); 2489 out_destroy_bud_cache: 2490 kmem_cache_destroy(xfs_bud_cache); 2491 out_destroy_cui_cache: 2492 kmem_cache_destroy(xfs_cui_cache); 2493 out_destroy_cud_cache: 2494 kmem_cache_destroy(xfs_cud_cache); 2495 out_destroy_rui_cache: 2496 kmem_cache_destroy(xfs_rui_cache); 2497 out_destroy_rud_cache: 2498 kmem_cache_destroy(xfs_rud_cache); 2499 out_destroy_icreate_cache: 2500 kmem_cache_destroy(xfs_icreate_cache); 2501 out_destroy_ili_cache: 2502 kmem_cache_destroy(xfs_ili_cache); 2503 out_destroy_inode_cache: 2504 kmem_cache_destroy(xfs_inode_cache); 2505 out_destroy_efi_cache: 2506 kmem_cache_destroy(xfs_efi_cache); 2507 out_destroy_efd_cache: 2508 kmem_cache_destroy(xfs_efd_cache); 2509 out_destroy_buf_item_cache: 2510 kmem_cache_destroy(xfs_buf_item_cache); 2511 out_destroy_trans_cache: 2512 kmem_cache_destroy(xfs_trans_cache); 2513 out_destroy_ifork_cache: 2514 kmem_cache_destroy(xfs_ifork_cache); 2515 out_destroy_da_state_cache: 2516 kmem_cache_destroy(xfs_da_state_cache); 2517 out_destroy_defer_item_cache: 2518 xfs_defer_destroy_item_caches(); 2519 out_destroy_rcbagbt_cur_cache: 2520 rcbagbt_destroy_cur_cache(); 2521 out_destroy_btree_cur_cache: 2522 xfs_btree_destroy_cur_caches(); 2523 out_destroy_log_ticket_cache: 2524 kmem_cache_destroy(xfs_log_ticket_cache); 2525 out_destroy_buf_cache: 2526 kmem_cache_destroy(xfs_buf_cache); 2527 out: 2528 return -ENOMEM; 2529 } 2530 2531 STATIC void 2532 xfs_destroy_caches(void) 2533 { 2534 /* 2535 * Make sure all delayed rcu free are flushed before we 2536 * destroy caches. 2537 */ 2538 rcu_barrier(); 2539 kmem_cache_destroy(xfs_parent_args_cache); 2540 kmem_cache_destroy(xfs_xmd_cache); 2541 kmem_cache_destroy(xfs_xmi_cache); 2542 kmem_cache_destroy(xfs_iunlink_cache); 2543 kmem_cache_destroy(xfs_attri_cache); 2544 kmem_cache_destroy(xfs_attrd_cache); 2545 kmem_cache_destroy(xfs_bui_cache); 2546 kmem_cache_destroy(xfs_bud_cache); 2547 kmem_cache_destroy(xfs_cui_cache); 2548 kmem_cache_destroy(xfs_cud_cache); 2549 kmem_cache_destroy(xfs_rui_cache); 2550 kmem_cache_destroy(xfs_rud_cache); 2551 kmem_cache_destroy(xfs_icreate_cache); 2552 kmem_cache_destroy(xfs_ili_cache); 2553 kmem_cache_destroy(xfs_inode_cache); 2554 kmem_cache_destroy(xfs_efi_cache); 2555 kmem_cache_destroy(xfs_efd_cache); 2556 kmem_cache_destroy(xfs_buf_item_cache); 2557 kmem_cache_destroy(xfs_trans_cache); 2558 kmem_cache_destroy(xfs_ifork_cache); 2559 kmem_cache_destroy(xfs_da_state_cache); 2560 xfs_defer_destroy_item_caches(); 2561 rcbagbt_destroy_cur_cache(); 2562 xfs_btree_destroy_cur_caches(); 2563 kmem_cache_destroy(xfs_log_ticket_cache); 2564 kmem_cache_destroy(xfs_buf_cache); 2565 } 2566 2567 STATIC int __init 2568 xfs_init_workqueues(void) 2569 { 2570 /* 2571 * The allocation workqueue can be used in memory reclaim situations 2572 * (writepage path), and parallelism is only limited by the number of 2573 * AGs in all the filesystems mounted. Hence use the default large 2574 * max_active value for this workqueue. 2575 */ 2576 xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU), 2577 0); 2578 if (!xfs_alloc_wq) 2579 return -ENOMEM; 2580 2581 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2582 0); 2583 if (!xfs_discard_wq) 2584 goto out_free_alloc_wq; 2585 2586 return 0; 2587 out_free_alloc_wq: 2588 destroy_workqueue(xfs_alloc_wq); 2589 return -ENOMEM; 2590 } 2591 2592 STATIC void 2593 xfs_destroy_workqueues(void) 2594 { 2595 destroy_workqueue(xfs_discard_wq); 2596 destroy_workqueue(xfs_alloc_wq); 2597 } 2598 2599 STATIC int __init 2600 init_xfs_fs(void) 2601 { 2602 int error; 2603 2604 xfs_check_ondisk_structs(); 2605 2606 error = xfs_dahash_test(); 2607 if (error) 2608 return error; 2609 2610 printk(KERN_INFO XFS_VERSION_STRING " with " 2611 XFS_BUILD_OPTIONS " enabled\n"); 2612 2613 xfs_dir_startup(); 2614 2615 error = xfs_init_caches(); 2616 if (error) 2617 goto out; 2618 2619 error = xfs_init_workqueues(); 2620 if (error) 2621 goto out_destroy_caches; 2622 2623 error = xfs_mru_cache_init(); 2624 if (error) 2625 goto out_destroy_wq; 2626 2627 error = xfs_init_procfs(); 2628 if (error) 2629 goto out_mru_cache_uninit; 2630 2631 error = xfs_sysctl_register(); 2632 if (error) 2633 goto out_cleanup_procfs; 2634 2635 xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL); 2636 2637 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2638 if (!xfs_kset) { 2639 error = -ENOMEM; 2640 goto out_debugfs_unregister; 2641 } 2642 2643 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2644 2645 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2646 if (!xfsstats.xs_stats) { 2647 error = -ENOMEM; 2648 goto out_kset_unregister; 2649 } 2650 2651 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2652 "stats"); 2653 if (error) 2654 goto out_free_stats; 2655 2656 error = xchk_global_stats_setup(xfs_debugfs); 2657 if (error) 2658 goto out_remove_stats_kobj; 2659 2660 #ifdef DEBUG 2661 xfs_dbg_kobj.kobject.kset = xfs_kset; 2662 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2663 if (error) 2664 goto out_remove_scrub_stats; 2665 #endif 2666 2667 error = xfs_qm_init(); 2668 if (error) 2669 goto out_remove_dbg_kobj; 2670 2671 error = register_filesystem(&xfs_fs_type); 2672 if (error) 2673 goto out_qm_exit; 2674 return 0; 2675 2676 out_qm_exit: 2677 xfs_qm_exit(); 2678 out_remove_dbg_kobj: 2679 #ifdef DEBUG 2680 xfs_sysfs_del(&xfs_dbg_kobj); 2681 out_remove_scrub_stats: 2682 #endif 2683 xchk_global_stats_teardown(); 2684 out_remove_stats_kobj: 2685 xfs_sysfs_del(&xfsstats.xs_kobj); 2686 out_free_stats: 2687 free_percpu(xfsstats.xs_stats); 2688 out_kset_unregister: 2689 kset_unregister(xfs_kset); 2690 out_debugfs_unregister: 2691 debugfs_remove(xfs_debugfs); 2692 xfs_sysctl_unregister(); 2693 out_cleanup_procfs: 2694 xfs_cleanup_procfs(); 2695 out_mru_cache_uninit: 2696 xfs_mru_cache_uninit(); 2697 out_destroy_wq: 2698 xfs_destroy_workqueues(); 2699 out_destroy_caches: 2700 xfs_destroy_caches(); 2701 out: 2702 return error; 2703 } 2704 2705 STATIC void __exit 2706 exit_xfs_fs(void) 2707 { 2708 xfs_qm_exit(); 2709 unregister_filesystem(&xfs_fs_type); 2710 #ifdef DEBUG 2711 xfs_sysfs_del(&xfs_dbg_kobj); 2712 #endif 2713 xchk_global_stats_teardown(); 2714 xfs_sysfs_del(&xfsstats.xs_kobj); 2715 free_percpu(xfsstats.xs_stats); 2716 kset_unregister(xfs_kset); 2717 debugfs_remove(xfs_debugfs); 2718 xfs_sysctl_unregister(); 2719 xfs_cleanup_procfs(); 2720 xfs_mru_cache_uninit(); 2721 xfs_destroy_workqueues(); 2722 xfs_destroy_caches(); 2723 xfs_uuid_table_free(); 2724 } 2725 2726 module_init(init_xfs_fs); 2727 module_exit(exit_xfs_fs); 2728 2729 MODULE_AUTHOR("Silicon Graphics, Inc."); 2730 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2731 MODULE_LICENSE("GPL"); 2732