1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 #include "xfs_defer.h" 41 #include "xfs_attr_item.h" 42 #include "xfs_xattr.h" 43 #include "xfs_iunlink_item.h" 44 #include "xfs_dahash_test.h" 45 #include "xfs_rtbitmap.h" 46 #include "xfs_exchmaps_item.h" 47 #include "xfs_parent.h" 48 #include "xfs_rtalloc.h" 49 #include "xfs_zone_alloc.h" 50 #include "scrub/stats.h" 51 #include "scrub/rcbag_btree.h" 52 53 #include <linux/magic.h> 54 #include <linux/fs_context.h> 55 #include <linux/fs_parser.h> 56 57 static const struct super_operations xfs_super_operations; 58 59 static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */ 60 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 61 #ifdef DEBUG 62 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 63 #endif 64 65 enum xfs_dax_mode { 66 XFS_DAX_INODE = 0, 67 XFS_DAX_ALWAYS = 1, 68 XFS_DAX_NEVER = 2, 69 }; 70 71 /* Were quota mount options provided? Must use the upper 16 bits of qflags. */ 72 #define XFS_QFLAGS_MNTOPTS (1U << 31) 73 74 static void 75 xfs_mount_set_dax_mode( 76 struct xfs_mount *mp, 77 enum xfs_dax_mode mode) 78 { 79 switch (mode) { 80 case XFS_DAX_INODE: 81 mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); 82 break; 83 case XFS_DAX_ALWAYS: 84 mp->m_features |= XFS_FEAT_DAX_ALWAYS; 85 mp->m_features &= ~XFS_FEAT_DAX_NEVER; 86 break; 87 case XFS_DAX_NEVER: 88 mp->m_features |= XFS_FEAT_DAX_NEVER; 89 mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; 90 break; 91 } 92 } 93 94 static const struct constant_table dax_param_enums[] = { 95 {"inode", XFS_DAX_INODE }, 96 {"always", XFS_DAX_ALWAYS }, 97 {"never", XFS_DAX_NEVER }, 98 {} 99 }; 100 101 /* 102 * Table driven mount option parser. 103 */ 104 enum { 105 Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 106 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 107 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 108 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, 109 Opt_largeio, Opt_nolargeio, 110 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 111 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 112 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 113 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones, 114 Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, 115 }; 116 117 #define fsparam_dead(NAME) \ 118 __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL) 119 120 static const struct fs_parameter_spec xfs_fs_parameters[] = { 121 /* 122 * These mount options were supposed to be deprecated in September 2025 123 * but the deprecation warning was buggy, so not all users were 124 * notified. The deprecation is now obnoxiously loud and postponed to 125 * September 2030. 126 */ 127 fsparam_dead("attr2"), 128 fsparam_dead("noattr2"), 129 fsparam_dead("ikeep"), 130 fsparam_dead("noikeep"), 131 132 fsparam_u32("logbufs", Opt_logbufs), 133 fsparam_string("logbsize", Opt_logbsize), 134 fsparam_string("logdev", Opt_logdev), 135 fsparam_string("rtdev", Opt_rtdev), 136 fsparam_flag("wsync", Opt_wsync), 137 fsparam_flag("noalign", Opt_noalign), 138 fsparam_flag("swalloc", Opt_swalloc), 139 fsparam_u32("sunit", Opt_sunit), 140 fsparam_u32("swidth", Opt_swidth), 141 fsparam_flag("nouuid", Opt_nouuid), 142 fsparam_flag("grpid", Opt_grpid), 143 fsparam_flag("nogrpid", Opt_nogrpid), 144 fsparam_flag("bsdgroups", Opt_bsdgroups), 145 fsparam_flag("sysvgroups", Opt_sysvgroups), 146 fsparam_string("allocsize", Opt_allocsize), 147 fsparam_flag("norecovery", Opt_norecovery), 148 fsparam_flag("inode64", Opt_inode64), 149 fsparam_flag("inode32", Opt_inode32), 150 fsparam_flag("largeio", Opt_largeio), 151 fsparam_flag("nolargeio", Opt_nolargeio), 152 fsparam_flag("filestreams", Opt_filestreams), 153 fsparam_flag("quota", Opt_quota), 154 fsparam_flag("noquota", Opt_noquota), 155 fsparam_flag("usrquota", Opt_usrquota), 156 fsparam_flag("grpquota", Opt_grpquota), 157 fsparam_flag("prjquota", Opt_prjquota), 158 fsparam_flag("uquota", Opt_uquota), 159 fsparam_flag("gquota", Opt_gquota), 160 fsparam_flag("pquota", Opt_pquota), 161 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 162 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 163 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 164 fsparam_flag("qnoenforce", Opt_qnoenforce), 165 fsparam_flag("discard", Opt_discard), 166 fsparam_flag("nodiscard", Opt_nodiscard), 167 fsparam_flag("dax", Opt_dax), 168 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 169 fsparam_u32("max_open_zones", Opt_max_open_zones), 170 fsparam_flag("lifetime", Opt_lifetime), 171 fsparam_flag("nolifetime", Opt_nolifetime), 172 fsparam_string("max_atomic_write", Opt_max_atomic_write), 173 {} 174 }; 175 176 struct proc_xfs_info { 177 uint64_t flag; 178 char *str; 179 }; 180 181 static int 182 xfs_fs_show_options( 183 struct seq_file *m, 184 struct dentry *root) 185 { 186 static struct proc_xfs_info xfs_info_set[] = { 187 /* the few simple ones we can get from the mount struct */ 188 { XFS_FEAT_WSYNC, ",wsync" }, 189 { XFS_FEAT_NOALIGN, ",noalign" }, 190 { XFS_FEAT_SWALLOC, ",swalloc" }, 191 { XFS_FEAT_NOUUID, ",nouuid" }, 192 { XFS_FEAT_NORECOVERY, ",norecovery" }, 193 { XFS_FEAT_FILESTREAMS, ",filestreams" }, 194 { XFS_FEAT_GRPID, ",grpid" }, 195 { XFS_FEAT_DISCARD, ",discard" }, 196 { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, 197 { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, 198 { XFS_FEAT_DAX_NEVER, ",dax=never" }, 199 { XFS_FEAT_NOLIFETIME, ",nolifetime" }, 200 { 0, NULL } 201 }; 202 struct xfs_mount *mp = XFS_M(root->d_sb); 203 struct proc_xfs_info *xfs_infop; 204 205 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 206 if (mp->m_features & xfs_infop->flag) 207 seq_puts(m, xfs_infop->str); 208 } 209 210 seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); 211 212 if (xfs_has_allocsize(mp)) 213 seq_printf(m, ",allocsize=%dk", 214 (1 << mp->m_allocsize_log) >> 10); 215 216 if (mp->m_logbufs > 0) 217 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 218 if (mp->m_logbsize > 0) 219 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 220 221 if (mp->m_logname) 222 seq_show_option(m, "logdev", mp->m_logname); 223 if (mp->m_rtname) 224 seq_show_option(m, "rtdev", mp->m_rtname); 225 226 if (mp->m_dalign > 0) 227 seq_printf(m, ",sunit=%d", 228 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 229 if (mp->m_swidth > 0) 230 seq_printf(m, ",swidth=%d", 231 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 232 233 if (mp->m_qflags & XFS_UQUOTA_ENFD) 234 seq_puts(m, ",usrquota"); 235 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 236 seq_puts(m, ",uqnoenforce"); 237 238 if (mp->m_qflags & XFS_PQUOTA_ENFD) 239 seq_puts(m, ",prjquota"); 240 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 241 seq_puts(m, ",pqnoenforce"); 242 243 if (mp->m_qflags & XFS_GQUOTA_ENFD) 244 seq_puts(m, ",grpquota"); 245 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 246 seq_puts(m, ",gqnoenforce"); 247 248 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 249 seq_puts(m, ",noquota"); 250 251 if (mp->m_max_open_zones) 252 seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones); 253 if (mp->m_awu_max_bytes) 254 seq_printf(m, ",max_atomic_write=%lluk", 255 mp->m_awu_max_bytes >> 10); 256 257 return 0; 258 } 259 260 static bool 261 xfs_set_inode_alloc_perag( 262 struct xfs_perag *pag, 263 xfs_ino_t ino, 264 xfs_agnumber_t max_metadata) 265 { 266 if (!xfs_is_inode32(pag_mount(pag))) { 267 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 268 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 269 return false; 270 } 271 272 if (ino > XFS_MAXINUMBER_32) { 273 clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 274 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 275 return false; 276 } 277 278 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 279 if (pag_agno(pag) < max_metadata) 280 set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 281 else 282 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 283 return true; 284 } 285 286 /* 287 * Set parameters for inode allocation heuristics, taking into account 288 * filesystem size and inode32/inode64 mount options; i.e. specifically 289 * whether or not XFS_FEAT_SMALL_INUMS is set. 290 * 291 * Inode allocation patterns are altered only if inode32 is requested 292 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. 293 * If altered, XFS_OPSTATE_INODE32 is set as well. 294 * 295 * An agcount independent of that in the mount structure is provided 296 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 297 * to the potentially higher ag count. 298 * 299 * Returns the maximum AG index which may contain inodes. 300 */ 301 xfs_agnumber_t 302 xfs_set_inode_alloc( 303 struct xfs_mount *mp, 304 xfs_agnumber_t agcount) 305 { 306 xfs_agnumber_t index; 307 xfs_agnumber_t maxagi = 0; 308 xfs_sb_t *sbp = &mp->m_sb; 309 xfs_agnumber_t max_metadata; 310 xfs_agino_t agino; 311 xfs_ino_t ino; 312 313 /* 314 * Calculate how much should be reserved for inodes to meet 315 * the max inode percentage. Used only for inode32. 316 */ 317 if (M_IGEO(mp)->maxicount) { 318 uint64_t icount; 319 320 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 321 do_div(icount, 100); 322 icount += sbp->sb_agblocks - 1; 323 do_div(icount, sbp->sb_agblocks); 324 max_metadata = icount; 325 } else { 326 max_metadata = agcount; 327 } 328 329 /* Get the last possible inode in the filesystem */ 330 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 331 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 332 333 /* 334 * If user asked for no more than 32-bit inodes, and the fs is 335 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter 336 * the allocator to accommodate the request. 337 */ 338 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) 339 xfs_set_inode32(mp); 340 else 341 xfs_clear_inode32(mp); 342 343 for (index = 0; index < agcount; index++) { 344 struct xfs_perag *pag; 345 346 ino = XFS_AGINO_TO_INO(mp, index, agino); 347 348 pag = xfs_perag_get(mp, index); 349 if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) 350 maxagi++; 351 xfs_perag_put(pag); 352 } 353 354 return xfs_is_inode32(mp) ? maxagi : agcount; 355 } 356 357 static int 358 xfs_setup_dax_always( 359 struct xfs_mount *mp) 360 { 361 if (!mp->m_ddev_targp->bt_daxdev && 362 (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { 363 xfs_alert(mp, 364 "DAX unsupported by block device. Turning off DAX."); 365 goto disable_dax; 366 } 367 368 if (mp->m_super->s_blocksize != PAGE_SIZE) { 369 xfs_alert(mp, 370 "DAX not supported for blocksize. Turning off DAX."); 371 goto disable_dax; 372 } 373 374 if (xfs_has_reflink(mp) && 375 bdev_is_partition(mp->m_ddev_targp->bt_bdev)) { 376 xfs_alert(mp, 377 "DAX and reflink cannot work with multi-partitions!"); 378 return -EINVAL; 379 } 380 381 return 0; 382 383 disable_dax: 384 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 385 return 0; 386 } 387 388 STATIC int 389 xfs_blkdev_get( 390 xfs_mount_t *mp, 391 const char *name, 392 struct file **bdev_filep) 393 { 394 int error = 0; 395 blk_mode_t mode; 396 397 mode = sb_open_mode(mp->m_super->s_flags); 398 *bdev_filep = bdev_file_open_by_path(name, mode, 399 mp->m_super, &fs_holder_ops); 400 if (IS_ERR(*bdev_filep)) { 401 error = PTR_ERR(*bdev_filep); 402 *bdev_filep = NULL; 403 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 404 } 405 406 return error; 407 } 408 409 STATIC void 410 xfs_shutdown_devices( 411 struct xfs_mount *mp) 412 { 413 /* 414 * Udev is triggered whenever anyone closes a block device or unmounts 415 * a file systemm on a block device. 416 * The default udev rules invoke blkid to read the fs super and create 417 * symlinks to the bdev under /dev/disk. For this, it uses buffered 418 * reads through the page cache. 419 * 420 * xfs_db also uses buffered reads to examine metadata. There is no 421 * coordination between xfs_db and udev, which means that they can run 422 * concurrently. Note there is no coordination between the kernel and 423 * blkid either. 424 * 425 * On a system with 64k pages, the page cache can cache the superblock 426 * and the root inode (and hence the root directory) with the same 64k 427 * page. If udev spawns blkid after the mkfs and the system is busy 428 * enough that it is still running when xfs_db starts up, they'll both 429 * read from the same page in the pagecache. 430 * 431 * The unmount writes updated inode metadata to disk directly. The XFS 432 * buffer cache does not use the bdev pagecache, so it needs to 433 * invalidate that pagecache on unmount. If the above scenario occurs, 434 * the pagecache no longer reflects what's on disk, xfs_db reads the 435 * stale metadata, and fails to find /a. Most of the time this succeeds 436 * because closing a bdev invalidates the page cache, but when processes 437 * race, everyone loses. 438 */ 439 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 440 blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); 441 invalidate_bdev(mp->m_logdev_targp->bt_bdev); 442 } 443 if (mp->m_rtdev_targp) { 444 blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); 445 invalidate_bdev(mp->m_rtdev_targp->bt_bdev); 446 } 447 blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); 448 invalidate_bdev(mp->m_ddev_targp->bt_bdev); 449 } 450 451 /* 452 * The file system configurations are: 453 * (1) device (partition) with data and internal log 454 * (2) logical volume with data and log subvolumes. 455 * (3) logical volume with data, log, and realtime subvolumes. 456 * 457 * We only have to handle opening the log and realtime volumes here if 458 * they are present. The data subvolume has already been opened by 459 * get_sb_bdev() and is stored in sb->s_bdev. 460 */ 461 STATIC int 462 xfs_open_devices( 463 struct xfs_mount *mp) 464 { 465 struct super_block *sb = mp->m_super; 466 struct block_device *ddev = sb->s_bdev; 467 struct file *logdev_file = NULL, *rtdev_file = NULL; 468 int error; 469 470 /* 471 * Open real time and log devices - order is important. 472 */ 473 if (mp->m_logname) { 474 error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file); 475 if (error) 476 return error; 477 } 478 479 if (mp->m_rtname) { 480 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file); 481 if (error) 482 goto out_close_logdev; 483 484 if (file_bdev(rtdev_file) == ddev || 485 (logdev_file && 486 file_bdev(rtdev_file) == file_bdev(logdev_file))) { 487 xfs_warn(mp, 488 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 489 error = -EINVAL; 490 goto out_close_rtdev; 491 } 492 } 493 494 /* 495 * Setup xfs_mount buffer target pointers 496 */ 497 mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file); 498 if (IS_ERR(mp->m_ddev_targp)) { 499 error = PTR_ERR(mp->m_ddev_targp); 500 mp->m_ddev_targp = NULL; 501 goto out_close_rtdev; 502 } 503 504 if (rtdev_file) { 505 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file); 506 if (IS_ERR(mp->m_rtdev_targp)) { 507 error = PTR_ERR(mp->m_rtdev_targp); 508 mp->m_rtdev_targp = NULL; 509 goto out_free_ddev_targ; 510 } 511 } 512 513 if (logdev_file && file_bdev(logdev_file) != ddev) { 514 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file); 515 if (IS_ERR(mp->m_logdev_targp)) { 516 error = PTR_ERR(mp->m_logdev_targp); 517 mp->m_logdev_targp = NULL; 518 goto out_free_rtdev_targ; 519 } 520 } else { 521 mp->m_logdev_targp = mp->m_ddev_targp; 522 /* Handle won't be used, drop it */ 523 if (logdev_file) 524 bdev_fput(logdev_file); 525 } 526 527 return 0; 528 529 out_free_rtdev_targ: 530 if (mp->m_rtdev_targp) 531 xfs_free_buftarg(mp->m_rtdev_targp); 532 out_free_ddev_targ: 533 xfs_free_buftarg(mp->m_ddev_targp); 534 out_close_rtdev: 535 if (rtdev_file) 536 bdev_fput(rtdev_file); 537 out_close_logdev: 538 if (logdev_file) 539 bdev_fput(logdev_file); 540 return error; 541 } 542 543 /* 544 * Setup xfs_mount buffer target pointers based on superblock 545 */ 546 STATIC int 547 xfs_setup_devices( 548 struct xfs_mount *mp) 549 { 550 int error; 551 552 error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize, 553 mp->m_sb.sb_dblocks); 554 if (error) 555 return error; 556 557 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 558 unsigned int log_sector_size = BBSIZE; 559 560 if (xfs_has_sector(mp)) 561 log_sector_size = mp->m_sb.sb_logsectsize; 562 error = xfs_configure_buftarg(mp->m_logdev_targp, 563 log_sector_size, mp->m_sb.sb_logblocks); 564 if (error) 565 return error; 566 } 567 568 if (mp->m_sb.sb_rtstart) { 569 if (mp->m_rtdev_targp) { 570 xfs_warn(mp, 571 "can't use internal and external rtdev at the same time"); 572 return -EINVAL; 573 } 574 mp->m_rtdev_targp = mp->m_ddev_targp; 575 } else if (mp->m_rtname) { 576 error = xfs_configure_buftarg(mp->m_rtdev_targp, 577 mp->m_sb.sb_sectsize, mp->m_sb.sb_rblocks); 578 if (error) 579 return error; 580 } 581 582 return 0; 583 } 584 585 STATIC int 586 xfs_init_mount_workqueues( 587 struct xfs_mount *mp) 588 { 589 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 590 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 591 1, mp->m_super->s_id); 592 if (!mp->m_buf_workqueue) 593 goto out; 594 595 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 596 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 597 0, mp->m_super->s_id); 598 if (!mp->m_unwritten_workqueue) 599 goto out_destroy_buf; 600 601 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 602 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 603 0, mp->m_super->s_id); 604 if (!mp->m_reclaim_workqueue) 605 goto out_destroy_unwritten; 606 607 mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", 608 XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 609 0, mp->m_super->s_id); 610 if (!mp->m_blockgc_wq) 611 goto out_destroy_reclaim; 612 613 mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", 614 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 615 1, mp->m_super->s_id); 616 if (!mp->m_inodegc_wq) 617 goto out_destroy_blockgc; 618 619 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 620 XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0, 621 mp->m_super->s_id); 622 if (!mp->m_sync_workqueue) 623 goto out_destroy_inodegc; 624 625 return 0; 626 627 out_destroy_inodegc: 628 destroy_workqueue(mp->m_inodegc_wq); 629 out_destroy_blockgc: 630 destroy_workqueue(mp->m_blockgc_wq); 631 out_destroy_reclaim: 632 destroy_workqueue(mp->m_reclaim_workqueue); 633 out_destroy_unwritten: 634 destroy_workqueue(mp->m_unwritten_workqueue); 635 out_destroy_buf: 636 destroy_workqueue(mp->m_buf_workqueue); 637 out: 638 return -ENOMEM; 639 } 640 641 STATIC void 642 xfs_destroy_mount_workqueues( 643 struct xfs_mount *mp) 644 { 645 destroy_workqueue(mp->m_sync_workqueue); 646 destroy_workqueue(mp->m_blockgc_wq); 647 destroy_workqueue(mp->m_inodegc_wq); 648 destroy_workqueue(mp->m_reclaim_workqueue); 649 destroy_workqueue(mp->m_unwritten_workqueue); 650 destroy_workqueue(mp->m_buf_workqueue); 651 } 652 653 static void 654 xfs_flush_inodes_worker( 655 struct work_struct *work) 656 { 657 struct xfs_mount *mp = container_of(work, struct xfs_mount, 658 m_flush_inodes_work); 659 struct super_block *sb = mp->m_super; 660 661 if (down_read_trylock(&sb->s_umount)) { 662 sync_inodes_sb(sb); 663 up_read(&sb->s_umount); 664 } 665 } 666 667 /* 668 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 669 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 670 * for IO to complete so that we effectively throttle multiple callers to the 671 * rate at which IO is completing. 672 */ 673 void 674 xfs_flush_inodes( 675 struct xfs_mount *mp) 676 { 677 /* 678 * If flush_work() returns true then that means we waited for a flush 679 * which was already in progress. Don't bother running another scan. 680 */ 681 if (flush_work(&mp->m_flush_inodes_work)) 682 return; 683 684 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 685 flush_work(&mp->m_flush_inodes_work); 686 } 687 688 /* Catch misguided souls that try to use this interface on XFS */ 689 STATIC struct inode * 690 xfs_fs_alloc_inode( 691 struct super_block *sb) 692 { 693 BUG(); 694 return NULL; 695 } 696 697 /* 698 * Now that the generic code is guaranteed not to be accessing 699 * the linux inode, we can inactivate and reclaim the inode. 700 */ 701 STATIC void 702 xfs_fs_destroy_inode( 703 struct inode *inode) 704 { 705 struct xfs_inode *ip = XFS_I(inode); 706 707 trace_xfs_destroy_inode(ip); 708 709 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 710 XFS_STATS_INC(ip->i_mount, vn_rele); 711 XFS_STATS_INC(ip->i_mount, vn_remove); 712 xfs_inode_mark_reclaimable(ip); 713 } 714 715 static void 716 xfs_fs_dirty_inode( 717 struct inode *inode, 718 int flags) 719 { 720 struct xfs_inode *ip = XFS_I(inode); 721 struct xfs_mount *mp = ip->i_mount; 722 struct xfs_trans *tp; 723 724 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 725 return; 726 727 /* 728 * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) 729 * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed 730 * in flags possibly together with I_DIRTY_SYNC. 731 */ 732 if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) 733 return; 734 735 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) 736 return; 737 xfs_ilock(ip, XFS_ILOCK_EXCL); 738 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 739 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 740 xfs_trans_commit(tp); 741 } 742 743 /* 744 * Slab object creation initialisation for the XFS inode. 745 * This covers only the idempotent fields in the XFS inode; 746 * all other fields need to be initialised on allocation 747 * from the slab. This avoids the need to repeatedly initialise 748 * fields in the xfs inode that left in the initialise state 749 * when freeing the inode. 750 */ 751 STATIC void 752 xfs_fs_inode_init_once( 753 void *inode) 754 { 755 struct xfs_inode *ip = inode; 756 757 memset(ip, 0, sizeof(struct xfs_inode)); 758 759 /* vfs inode */ 760 inode_init_once(VFS_I(ip)); 761 762 /* xfs inode */ 763 atomic_set(&ip->i_pincount, 0); 764 spin_lock_init(&ip->i_flags_lock); 765 init_rwsem(&ip->i_lock); 766 } 767 768 /* 769 * We do an unlocked check for XFS_IDONTCACHE here because we are already 770 * serialised against cache hits here via the inode->i_lock and igrab() in 771 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 772 * racing with us, and it avoids needing to grab a spinlock here for every inode 773 * we drop the final reference on. 774 */ 775 STATIC int 776 xfs_fs_drop_inode( 777 struct inode *inode) 778 { 779 struct xfs_inode *ip = XFS_I(inode); 780 781 /* 782 * If this unlinked inode is in the middle of recovery, don't 783 * drop the inode just yet; log recovery will take care of 784 * that. See the comment for this inode flag. 785 */ 786 if (ip->i_flags & XFS_IRECOVERY) { 787 ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); 788 return 0; 789 } 790 791 return inode_generic_drop(inode); 792 } 793 794 STATIC void 795 xfs_fs_evict_inode( 796 struct inode *inode) 797 { 798 if (IS_DAX(inode)) 799 dax_break_layout_final(inode); 800 801 truncate_inode_pages_final(&inode->i_data); 802 clear_inode(inode); 803 804 if (IS_ENABLED(CONFIG_XFS_RT) && 805 S_ISREG(inode->i_mode) && inode->i_private) { 806 xfs_open_zone_put(inode->i_private); 807 inode->i_private = NULL; 808 } 809 } 810 811 static void 812 xfs_mount_free( 813 struct xfs_mount *mp) 814 { 815 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 816 xfs_free_buftarg(mp->m_logdev_targp); 817 if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) 818 xfs_free_buftarg(mp->m_rtdev_targp); 819 if (mp->m_ddev_targp) 820 xfs_free_buftarg(mp->m_ddev_targp); 821 822 debugfs_remove(mp->m_debugfs); 823 kfree(mp->m_rtname); 824 kfree(mp->m_logname); 825 kfree(mp); 826 } 827 828 STATIC int 829 xfs_fs_sync_fs( 830 struct super_block *sb, 831 int wait) 832 { 833 struct xfs_mount *mp = XFS_M(sb); 834 int error; 835 836 trace_xfs_fs_sync_fs(mp, __return_address); 837 838 /* 839 * Doing anything during the async pass would be counterproductive. 840 */ 841 if (!wait) 842 return 0; 843 844 error = xfs_log_force(mp, XFS_LOG_SYNC); 845 if (error) 846 return error; 847 848 /* 849 * If we are called with page faults frozen out, it means we are about 850 * to freeze the transaction subsystem. Take the opportunity to shut 851 * down inodegc because once SB_FREEZE_FS is set it's too late to 852 * prevent inactivation races with freeze. The fs doesn't get called 853 * again by the freezing process until after SB_FREEZE_FS has been set, 854 * so it's now or never. Same logic applies to speculative allocation 855 * garbage collection. 856 * 857 * We don't care if this is a normal syncfs call that does this or 858 * freeze that does this - we can run this multiple times without issue 859 * and we won't race with a restart because a restart can only occur 860 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. 861 */ 862 if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { 863 xfs_inodegc_stop(mp); 864 xfs_blockgc_stop(mp); 865 xfs_zone_gc_stop(mp); 866 } 867 868 return 0; 869 } 870 871 static xfs_extlen_t 872 xfs_internal_log_size( 873 struct xfs_mount *mp) 874 { 875 if (!mp->m_sb.sb_logstart) 876 return 0; 877 return mp->m_sb.sb_logblocks; 878 } 879 880 static void 881 xfs_statfs_data( 882 struct xfs_mount *mp, 883 struct kstatfs *st) 884 { 885 int64_t fdblocks = 886 xfs_sum_freecounter(mp, XC_FREE_BLOCKS); 887 888 /* make sure st->f_bfree does not underflow */ 889 st->f_bfree = max(0LL, 890 fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS)); 891 892 /* 893 * sb_dblocks can change during growfs, but nothing cares about reporting 894 * the old or new value during growfs. 895 */ 896 st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp); 897 } 898 899 /* 900 * When stat(v)fs is called on a file with the realtime bit set or a directory 901 * with the rtinherit bit, report freespace information for the RT device 902 * instead of the main data device. 903 */ 904 static void 905 xfs_statfs_rt( 906 struct xfs_mount *mp, 907 struct kstatfs *st) 908 { 909 st->f_bfree = xfs_rtbxlen_to_blen(mp, 910 xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); 911 st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp, 912 mp->m_free[XC_FREE_RTEXTENTS].res_total); 913 } 914 915 static void 916 xfs_statfs_inodes( 917 struct xfs_mount *mp, 918 struct kstatfs *st) 919 { 920 uint64_t icount = percpu_counter_sum(&mp->m_icount); 921 uint64_t ifree = percpu_counter_sum(&mp->m_ifree); 922 uint64_t fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree); 923 924 st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 925 if (M_IGEO(mp)->maxicount) 926 st->f_files = min_t(typeof(st->f_files), st->f_files, 927 M_IGEO(mp)->maxicount); 928 929 /* If sb_icount overshot maxicount, report actual allocation */ 930 st->f_files = max_t(typeof(st->f_files), st->f_files, 931 mp->m_sb.sb_icount); 932 933 /* Make sure st->f_ffree does not underflow */ 934 st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree)); 935 } 936 937 STATIC int 938 xfs_fs_statfs( 939 struct dentry *dentry, 940 struct kstatfs *st) 941 { 942 struct xfs_mount *mp = XFS_M(dentry->d_sb); 943 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 944 945 /* 946 * Expedite background inodegc but don't wait. We do not want to block 947 * here waiting hours for a billion extent file to be truncated. 948 */ 949 xfs_inodegc_push(mp); 950 951 st->f_type = XFS_SUPER_MAGIC; 952 st->f_namelen = MAXNAMELEN - 1; 953 st->f_bsize = mp->m_sb.sb_blocksize; 954 st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev)); 955 956 xfs_statfs_data(mp, st); 957 xfs_statfs_inodes(mp, st); 958 959 if (XFS_IS_REALTIME_MOUNT(mp) && 960 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) 961 xfs_statfs_rt(mp, st); 962 963 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 964 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 965 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 966 xfs_qm_statvfs(ip, st); 967 968 /* 969 * XFS does not distinguish between blocks available to privileged and 970 * unprivileged users. 971 */ 972 st->f_bavail = st->f_bfree; 973 return 0; 974 } 975 976 STATIC void 977 xfs_save_resvblks( 978 struct xfs_mount *mp) 979 { 980 enum xfs_free_counter i; 981 982 for (i = 0; i < XC_FREE_NR; i++) { 983 mp->m_free[i].res_saved = mp->m_free[i].res_total; 984 xfs_reserve_blocks(mp, i, 0); 985 } 986 } 987 988 STATIC void 989 xfs_restore_resvblks( 990 struct xfs_mount *mp) 991 { 992 uint64_t resblks; 993 enum xfs_free_counter i; 994 995 for (i = 0; i < XC_FREE_NR; i++) { 996 if (mp->m_free[i].res_saved) { 997 resblks = mp->m_free[i].res_saved; 998 mp->m_free[i].res_saved = 0; 999 } else 1000 resblks = xfs_default_resblks(mp, i); 1001 xfs_reserve_blocks(mp, i, resblks); 1002 } 1003 } 1004 1005 /* 1006 * Second stage of a freeze. The data is already frozen so we only 1007 * need to take care of the metadata. Once that's done sync the superblock 1008 * to the log to dirty it in case of a crash while frozen. This ensures that we 1009 * will recover the unlinked inode lists on the next mount. 1010 */ 1011 STATIC int 1012 xfs_fs_freeze( 1013 struct super_block *sb) 1014 { 1015 struct xfs_mount *mp = XFS_M(sb); 1016 unsigned int flags; 1017 int ret; 1018 1019 /* 1020 * The filesystem is now frozen far enough that memory reclaim 1021 * cannot safely operate on the filesystem. Hence we need to 1022 * set a GFP_NOFS context here to avoid recursion deadlocks. 1023 */ 1024 flags = memalloc_nofs_save(); 1025 xfs_save_resvblks(mp); 1026 ret = xfs_log_quiesce(mp); 1027 memalloc_nofs_restore(flags); 1028 1029 /* 1030 * For read-write filesystems, we need to restart the inodegc on error 1031 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not 1032 * going to be run to restart it now. We are at SB_FREEZE_FS level 1033 * here, so we can restart safely without racing with a stop in 1034 * xfs_fs_sync_fs(). 1035 */ 1036 if (ret && !xfs_is_readonly(mp)) { 1037 xfs_blockgc_start(mp); 1038 xfs_inodegc_start(mp); 1039 xfs_zone_gc_start(mp); 1040 } 1041 1042 return ret; 1043 } 1044 1045 STATIC int 1046 xfs_fs_unfreeze( 1047 struct super_block *sb) 1048 { 1049 struct xfs_mount *mp = XFS_M(sb); 1050 1051 xfs_restore_resvblks(mp); 1052 xfs_log_work_queue(mp); 1053 1054 /* 1055 * Don't reactivate the inodegc worker on a readonly filesystem because 1056 * inodes are sent directly to reclaim. Don't reactivate the blockgc 1057 * worker because there are no speculative preallocations on a readonly 1058 * filesystem. 1059 */ 1060 if (!xfs_is_readonly(mp)) { 1061 xfs_zone_gc_start(mp); 1062 xfs_blockgc_start(mp); 1063 xfs_inodegc_start(mp); 1064 } 1065 1066 return 0; 1067 } 1068 1069 /* 1070 * This function fills in xfs_mount_t fields based on mount args. 1071 * Note: the superblock _has_ now been read in. 1072 */ 1073 STATIC int 1074 xfs_finish_flags( 1075 struct xfs_mount *mp) 1076 { 1077 /* Fail a mount where the logbuf is smaller than the log stripe */ 1078 if (xfs_has_logv2(mp)) { 1079 if (mp->m_logbsize <= 0 && 1080 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 1081 mp->m_logbsize = mp->m_sb.sb_logsunit; 1082 } else if (mp->m_logbsize > 0 && 1083 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1084 xfs_warn(mp, 1085 "logbuf size must be greater than or equal to log stripe size"); 1086 return -EINVAL; 1087 } 1088 } else { 1089 /* Fail a mount if the logbuf is larger than 32K */ 1090 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1091 xfs_warn(mp, 1092 "logbuf size for version 1 logs must be 16K or 32K"); 1093 return -EINVAL; 1094 } 1095 } 1096 1097 /* 1098 * prohibit r/w mounts of read-only filesystems 1099 */ 1100 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { 1101 xfs_warn(mp, 1102 "cannot mount a read-only filesystem as read-write"); 1103 return -EROFS; 1104 } 1105 1106 if ((mp->m_qflags & XFS_GQUOTA_ACCT) && 1107 (mp->m_qflags & XFS_PQUOTA_ACCT) && 1108 !xfs_has_pquotino(mp)) { 1109 xfs_warn(mp, 1110 "Super block does not support project and group quota together"); 1111 return -EINVAL; 1112 } 1113 1114 if (!xfs_has_zoned(mp)) { 1115 if (mp->m_max_open_zones) { 1116 xfs_warn(mp, 1117 "max_open_zones mount option only supported on zoned file systems."); 1118 return -EINVAL; 1119 } 1120 if (mp->m_features & XFS_FEAT_NOLIFETIME) { 1121 xfs_warn(mp, 1122 "nolifetime mount option only supported on zoned file systems."); 1123 return -EINVAL; 1124 } 1125 } 1126 1127 return 0; 1128 } 1129 1130 static int 1131 xfs_init_percpu_counters( 1132 struct xfs_mount *mp) 1133 { 1134 int error; 1135 int i; 1136 1137 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1138 if (error) 1139 return -ENOMEM; 1140 1141 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1142 if (error) 1143 goto free_icount; 1144 1145 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1146 if (error) 1147 goto free_ifree; 1148 1149 error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL); 1150 if (error) 1151 goto free_delalloc; 1152 1153 for (i = 0; i < XC_FREE_NR; i++) { 1154 error = percpu_counter_init(&mp->m_free[i].count, 0, 1155 GFP_KERNEL); 1156 if (error) 1157 goto free_freecounters; 1158 } 1159 1160 return 0; 1161 1162 free_freecounters: 1163 while (--i >= 0) 1164 percpu_counter_destroy(&mp->m_free[i].count); 1165 percpu_counter_destroy(&mp->m_delalloc_rtextents); 1166 free_delalloc: 1167 percpu_counter_destroy(&mp->m_delalloc_blks); 1168 free_ifree: 1169 percpu_counter_destroy(&mp->m_ifree); 1170 free_icount: 1171 percpu_counter_destroy(&mp->m_icount); 1172 return -ENOMEM; 1173 } 1174 1175 void 1176 xfs_reinit_percpu_counters( 1177 struct xfs_mount *mp) 1178 { 1179 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1180 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1181 xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks); 1182 if (!xfs_has_zoned(mp)) 1183 xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, 1184 mp->m_sb.sb_frextents); 1185 } 1186 1187 static void 1188 xfs_destroy_percpu_counters( 1189 struct xfs_mount *mp) 1190 { 1191 enum xfs_free_counter i; 1192 1193 for (i = 0; i < XC_FREE_NR; i++) 1194 percpu_counter_destroy(&mp->m_free[i].count); 1195 percpu_counter_destroy(&mp->m_icount); 1196 percpu_counter_destroy(&mp->m_ifree); 1197 ASSERT(xfs_is_shutdown(mp) || 1198 percpu_counter_sum(&mp->m_delalloc_rtextents) == 0); 1199 percpu_counter_destroy(&mp->m_delalloc_rtextents); 1200 ASSERT(xfs_is_shutdown(mp) || 1201 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1202 percpu_counter_destroy(&mp->m_delalloc_blks); 1203 } 1204 1205 static int 1206 xfs_inodegc_init_percpu( 1207 struct xfs_mount *mp) 1208 { 1209 struct xfs_inodegc *gc; 1210 int cpu; 1211 1212 mp->m_inodegc = alloc_percpu(struct xfs_inodegc); 1213 if (!mp->m_inodegc) 1214 return -ENOMEM; 1215 1216 for_each_possible_cpu(cpu) { 1217 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1218 gc->cpu = cpu; 1219 gc->mp = mp; 1220 init_llist_head(&gc->list); 1221 gc->items = 0; 1222 gc->error = 0; 1223 INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); 1224 } 1225 return 0; 1226 } 1227 1228 static void 1229 xfs_inodegc_free_percpu( 1230 struct xfs_mount *mp) 1231 { 1232 if (!mp->m_inodegc) 1233 return; 1234 free_percpu(mp->m_inodegc); 1235 } 1236 1237 static void 1238 xfs_fs_put_super( 1239 struct super_block *sb) 1240 { 1241 struct xfs_mount *mp = XFS_M(sb); 1242 1243 xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); 1244 xfs_filestream_unmount(mp); 1245 xfs_unmountfs(mp); 1246 1247 xfs_rtmount_freesb(mp); 1248 xfs_freesb(mp); 1249 xchk_mount_stats_free(mp); 1250 free_percpu(mp->m_stats.xs_stats); 1251 xfs_inodegc_free_percpu(mp); 1252 xfs_destroy_percpu_counters(mp); 1253 xfs_destroy_mount_workqueues(mp); 1254 xfs_shutdown_devices(mp); 1255 } 1256 1257 static long 1258 xfs_fs_nr_cached_objects( 1259 struct super_block *sb, 1260 struct shrink_control *sc) 1261 { 1262 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1263 if (WARN_ON_ONCE(!sb->s_fs_info)) 1264 return 0; 1265 return xfs_reclaim_inodes_count(XFS_M(sb)); 1266 } 1267 1268 static long 1269 xfs_fs_free_cached_objects( 1270 struct super_block *sb, 1271 struct shrink_control *sc) 1272 { 1273 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1274 } 1275 1276 static void 1277 xfs_fs_shutdown( 1278 struct super_block *sb) 1279 { 1280 xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); 1281 } 1282 1283 static int 1284 xfs_fs_show_stats( 1285 struct seq_file *m, 1286 struct dentry *root) 1287 { 1288 struct xfs_mount *mp = XFS_M(root->d_sb); 1289 1290 if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT)) 1291 xfs_zoned_show_stats(m, mp); 1292 return 0; 1293 } 1294 1295 static const struct super_operations xfs_super_operations = { 1296 .alloc_inode = xfs_fs_alloc_inode, 1297 .destroy_inode = xfs_fs_destroy_inode, 1298 .dirty_inode = xfs_fs_dirty_inode, 1299 .drop_inode = xfs_fs_drop_inode, 1300 .evict_inode = xfs_fs_evict_inode, 1301 .put_super = xfs_fs_put_super, 1302 .sync_fs = xfs_fs_sync_fs, 1303 .freeze_fs = xfs_fs_freeze, 1304 .unfreeze_fs = xfs_fs_unfreeze, 1305 .statfs = xfs_fs_statfs, 1306 .show_options = xfs_fs_show_options, 1307 .nr_cached_objects = xfs_fs_nr_cached_objects, 1308 .free_cached_objects = xfs_fs_free_cached_objects, 1309 .shutdown = xfs_fs_shutdown, 1310 .show_stats = xfs_fs_show_stats, 1311 }; 1312 1313 static int 1314 suffix_kstrtoint( 1315 const char *s, 1316 unsigned int base, 1317 int *res) 1318 { 1319 int last, shift_left_factor = 0, _res; 1320 char *value; 1321 int ret = 0; 1322 1323 value = kstrdup(s, GFP_KERNEL); 1324 if (!value) 1325 return -ENOMEM; 1326 1327 last = strlen(value) - 1; 1328 if (value[last] == 'K' || value[last] == 'k') { 1329 shift_left_factor = 10; 1330 value[last] = '\0'; 1331 } 1332 if (value[last] == 'M' || value[last] == 'm') { 1333 shift_left_factor = 20; 1334 value[last] = '\0'; 1335 } 1336 if (value[last] == 'G' || value[last] == 'g') { 1337 shift_left_factor = 30; 1338 value[last] = '\0'; 1339 } 1340 1341 if (kstrtoint(value, base, &_res)) 1342 ret = -EINVAL; 1343 kfree(value); 1344 *res = _res << shift_left_factor; 1345 return ret; 1346 } 1347 1348 static int 1349 suffix_kstrtoull( 1350 const char *s, 1351 unsigned int base, 1352 unsigned long long *res) 1353 { 1354 int last, shift_left_factor = 0; 1355 unsigned long long _res; 1356 char *value; 1357 int ret = 0; 1358 1359 value = kstrdup(s, GFP_KERNEL); 1360 if (!value) 1361 return -ENOMEM; 1362 1363 last = strlen(value) - 1; 1364 if (value[last] == 'K' || value[last] == 'k') { 1365 shift_left_factor = 10; 1366 value[last] = '\0'; 1367 } 1368 if (value[last] == 'M' || value[last] == 'm') { 1369 shift_left_factor = 20; 1370 value[last] = '\0'; 1371 } 1372 if (value[last] == 'G' || value[last] == 'g') { 1373 shift_left_factor = 30; 1374 value[last] = '\0'; 1375 } 1376 1377 if (kstrtoull(value, base, &_res)) 1378 ret = -EINVAL; 1379 kfree(value); 1380 *res = _res << shift_left_factor; 1381 return ret; 1382 } 1383 1384 static inline void 1385 xfs_fs_warn_deprecated( 1386 struct fs_context *fc, 1387 struct fs_parameter *param) 1388 { 1389 /* 1390 * Always warn about someone passing in a deprecated mount option. 1391 * Previously we wouldn't print the warning if we were reconfiguring 1392 * and current mount point already had the flag set, but that was not 1393 * the right thing to do. 1394 * 1395 * Many distributions mount the root filesystem with no options in the 1396 * initramfs and rely on mount -a to remount the root fs with the 1397 * options in fstab. However, the old behavior meant that there would 1398 * never be a warning about deprecated mount options for the root fs in 1399 * /etc/fstab. On a single-fs system, that means no warning at all. 1400 * 1401 * Compounding this problem are distribution scripts that copy 1402 * /proc/mounts to fstab, which means that we can't remove mount 1403 * options unless we're 100% sure they have only ever been advertised 1404 * in /proc/mounts in response to explicitly provided mount options. 1405 */ 1406 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1407 } 1408 1409 /* 1410 * Set mount state from a mount option. 1411 * 1412 * NOTE: mp->m_super is NULL here! 1413 */ 1414 static int 1415 xfs_fs_parse_param( 1416 struct fs_context *fc, 1417 struct fs_parameter *param) 1418 { 1419 struct xfs_mount *parsing_mp = fc->s_fs_info; 1420 struct fs_parse_result result; 1421 int size = 0; 1422 int opt; 1423 1424 BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL); 1425 1426 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1427 if (opt < 0) 1428 return opt; 1429 1430 switch (opt) { 1431 case Op_deprecated: 1432 xfs_fs_warn_deprecated(fc, param); 1433 return 0; 1434 case Opt_logbufs: 1435 parsing_mp->m_logbufs = result.uint_32; 1436 return 0; 1437 case Opt_logbsize: 1438 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1439 return -EINVAL; 1440 return 0; 1441 case Opt_logdev: 1442 kfree(parsing_mp->m_logname); 1443 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1444 if (!parsing_mp->m_logname) 1445 return -ENOMEM; 1446 return 0; 1447 case Opt_rtdev: 1448 kfree(parsing_mp->m_rtname); 1449 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1450 if (!parsing_mp->m_rtname) 1451 return -ENOMEM; 1452 return 0; 1453 case Opt_allocsize: 1454 if (suffix_kstrtoint(param->string, 10, &size)) 1455 return -EINVAL; 1456 parsing_mp->m_allocsize_log = ffs(size) - 1; 1457 parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; 1458 return 0; 1459 case Opt_grpid: 1460 case Opt_bsdgroups: 1461 parsing_mp->m_features |= XFS_FEAT_GRPID; 1462 return 0; 1463 case Opt_nogrpid: 1464 case Opt_sysvgroups: 1465 parsing_mp->m_features &= ~XFS_FEAT_GRPID; 1466 return 0; 1467 case Opt_wsync: 1468 parsing_mp->m_features |= XFS_FEAT_WSYNC; 1469 return 0; 1470 case Opt_norecovery: 1471 parsing_mp->m_features |= XFS_FEAT_NORECOVERY; 1472 return 0; 1473 case Opt_noalign: 1474 parsing_mp->m_features |= XFS_FEAT_NOALIGN; 1475 return 0; 1476 case Opt_swalloc: 1477 parsing_mp->m_features |= XFS_FEAT_SWALLOC; 1478 return 0; 1479 case Opt_sunit: 1480 parsing_mp->m_dalign = result.uint_32; 1481 return 0; 1482 case Opt_swidth: 1483 parsing_mp->m_swidth = result.uint_32; 1484 return 0; 1485 case Opt_inode32: 1486 parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; 1487 return 0; 1488 case Opt_inode64: 1489 parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1490 return 0; 1491 case Opt_nouuid: 1492 parsing_mp->m_features |= XFS_FEAT_NOUUID; 1493 return 0; 1494 case Opt_largeio: 1495 parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; 1496 return 0; 1497 case Opt_nolargeio: 1498 parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; 1499 return 0; 1500 case Opt_filestreams: 1501 parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; 1502 return 0; 1503 case Opt_noquota: 1504 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1505 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1506 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1507 return 0; 1508 case Opt_quota: 1509 case Opt_uquota: 1510 case Opt_usrquota: 1511 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); 1512 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1513 return 0; 1514 case Opt_qnoenforce: 1515 case Opt_uqnoenforce: 1516 parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; 1517 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1518 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1519 return 0; 1520 case Opt_pquota: 1521 case Opt_prjquota: 1522 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); 1523 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1524 return 0; 1525 case Opt_pqnoenforce: 1526 parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; 1527 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1528 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1529 return 0; 1530 case Opt_gquota: 1531 case Opt_grpquota: 1532 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); 1533 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1534 return 0; 1535 case Opt_gqnoenforce: 1536 parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; 1537 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1538 parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; 1539 return 0; 1540 case Opt_discard: 1541 parsing_mp->m_features |= XFS_FEAT_DISCARD; 1542 return 0; 1543 case Opt_nodiscard: 1544 parsing_mp->m_features &= ~XFS_FEAT_DISCARD; 1545 return 0; 1546 #ifdef CONFIG_FS_DAX 1547 case Opt_dax: 1548 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1549 return 0; 1550 case Opt_dax_enum: 1551 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1552 return 0; 1553 #endif 1554 case Opt_max_open_zones: 1555 parsing_mp->m_max_open_zones = result.uint_32; 1556 return 0; 1557 case Opt_lifetime: 1558 parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME; 1559 return 0; 1560 case Opt_nolifetime: 1561 parsing_mp->m_features |= XFS_FEAT_NOLIFETIME; 1562 return 0; 1563 case Opt_max_atomic_write: 1564 if (suffix_kstrtoull(param->string, 10, 1565 &parsing_mp->m_awu_max_bytes)) { 1566 xfs_warn(parsing_mp, 1567 "max atomic write size must be positive integer"); 1568 return -EINVAL; 1569 } 1570 return 0; 1571 default: 1572 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1573 return -EINVAL; 1574 } 1575 1576 return 0; 1577 } 1578 1579 static int 1580 xfs_fs_validate_params( 1581 struct xfs_mount *mp) 1582 { 1583 /* No recovery flag requires a read-only mount */ 1584 if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { 1585 xfs_warn(mp, "no-recovery mounts must be read-only."); 1586 return -EINVAL; 1587 } 1588 1589 if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { 1590 xfs_warn(mp, 1591 "sunit and swidth options incompatible with the noalign option"); 1592 return -EINVAL; 1593 } 1594 1595 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && 1596 (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) { 1597 xfs_warn(mp, "quota support not available in this kernel."); 1598 return -EINVAL; 1599 } 1600 1601 if ((mp->m_dalign && !mp->m_swidth) || 1602 (!mp->m_dalign && mp->m_swidth)) { 1603 xfs_warn(mp, "sunit and swidth must be specified together"); 1604 return -EINVAL; 1605 } 1606 1607 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1608 xfs_warn(mp, 1609 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1610 mp->m_swidth, mp->m_dalign); 1611 return -EINVAL; 1612 } 1613 1614 if (mp->m_logbufs != -1 && 1615 mp->m_logbufs != 0 && 1616 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1617 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1618 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1619 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1620 return -EINVAL; 1621 } 1622 1623 if (mp->m_logbsize != -1 && 1624 mp->m_logbsize != 0 && 1625 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1626 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1627 !is_power_of_2(mp->m_logbsize))) { 1628 xfs_warn(mp, 1629 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1630 mp->m_logbsize); 1631 return -EINVAL; 1632 } 1633 1634 if (xfs_has_allocsize(mp) && 1635 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1636 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1637 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1638 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1639 return -EINVAL; 1640 } 1641 1642 return 0; 1643 } 1644 1645 struct dentry * 1646 xfs_debugfs_mkdir( 1647 const char *name, 1648 struct dentry *parent) 1649 { 1650 struct dentry *child; 1651 1652 /* Apparently we're expected to ignore error returns?? */ 1653 child = debugfs_create_dir(name, parent); 1654 if (IS_ERR(child)) 1655 return NULL; 1656 1657 return child; 1658 } 1659 1660 static int 1661 xfs_fs_fill_super( 1662 struct super_block *sb, 1663 struct fs_context *fc) 1664 { 1665 struct xfs_mount *mp = sb->s_fs_info; 1666 struct inode *root; 1667 int flags = 0, error; 1668 1669 mp->m_super = sb; 1670 1671 /* 1672 * Copy VFS mount flags from the context now that all parameter parsing 1673 * is guaranteed to have been completed by either the old mount API or 1674 * the newer fsopen/fsconfig API. 1675 */ 1676 if (fc->sb_flags & SB_RDONLY) 1677 xfs_set_readonly(mp); 1678 if (fc->sb_flags & SB_DIRSYNC) 1679 mp->m_features |= XFS_FEAT_DIRSYNC; 1680 if (fc->sb_flags & SB_SYNCHRONOUS) 1681 mp->m_features |= XFS_FEAT_WSYNC; 1682 1683 error = xfs_fs_validate_params(mp); 1684 if (error) 1685 return error; 1686 1687 if (!sb_min_blocksize(sb, BBSIZE)) { 1688 xfs_err(mp, "unable to set blocksize"); 1689 return -EINVAL; 1690 } 1691 sb->s_xattr = xfs_xattr_handlers; 1692 sb->s_export_op = &xfs_export_operations; 1693 #ifdef CONFIG_XFS_QUOTA 1694 sb->s_qcop = &xfs_quotactl_operations; 1695 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1696 #endif 1697 sb->s_op = &xfs_super_operations; 1698 1699 /* 1700 * Delay mount work if the debug hook is set. This is debug 1701 * instrumention to coordinate simulation of xfs mount failures with 1702 * VFS superblock operations 1703 */ 1704 if (xfs_globals.mount_delay) { 1705 xfs_notice(mp, "Delaying mount for %d seconds.", 1706 xfs_globals.mount_delay); 1707 msleep(xfs_globals.mount_delay * 1000); 1708 } 1709 1710 if (fc->sb_flags & SB_SILENT) 1711 flags |= XFS_MFSI_QUIET; 1712 1713 error = xfs_open_devices(mp); 1714 if (error) 1715 return error; 1716 1717 if (xfs_debugfs) { 1718 mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id, 1719 xfs_debugfs); 1720 } else { 1721 mp->m_debugfs = NULL; 1722 } 1723 1724 error = xfs_init_mount_workqueues(mp); 1725 if (error) 1726 goto out_shutdown_devices; 1727 1728 error = xfs_init_percpu_counters(mp); 1729 if (error) 1730 goto out_destroy_workqueues; 1731 1732 error = xfs_inodegc_init_percpu(mp); 1733 if (error) 1734 goto out_destroy_counters; 1735 1736 /* Allocate stats memory before we do operations that might use it */ 1737 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1738 if (!mp->m_stats.xs_stats) { 1739 error = -ENOMEM; 1740 goto out_destroy_inodegc; 1741 } 1742 1743 error = xchk_mount_stats_alloc(mp); 1744 if (error) 1745 goto out_free_stats; 1746 1747 error = xfs_readsb(mp, flags); 1748 if (error) 1749 goto out_free_scrub_stats; 1750 1751 error = xfs_finish_flags(mp); 1752 if (error) 1753 goto out_free_sb; 1754 1755 error = xfs_setup_devices(mp); 1756 if (error) 1757 goto out_free_sb; 1758 1759 /* 1760 * V4 support is undergoing deprecation. 1761 * 1762 * Note: this has to use an open coded m_features check as xfs_has_crc 1763 * always returns false for !CONFIG_XFS_SUPPORT_V4. 1764 */ 1765 if (!(mp->m_features & XFS_FEAT_CRC)) { 1766 if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) { 1767 xfs_warn(mp, 1768 "Deprecated V4 format (crc=0) not supported by kernel."); 1769 error = -EINVAL; 1770 goto out_free_sb; 1771 } 1772 xfs_warn_once(mp, 1773 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1774 } 1775 1776 /* ASCII case insensitivity is undergoing deprecation. */ 1777 if (xfs_has_asciici(mp)) { 1778 #ifdef CONFIG_XFS_SUPPORT_ASCII_CI 1779 xfs_warn_once(mp, 1780 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); 1781 #else 1782 xfs_warn(mp, 1783 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel."); 1784 error = -EINVAL; 1785 goto out_free_sb; 1786 #endif 1787 } 1788 1789 /* 1790 * Filesystem claims it needs repair, so refuse the mount unless 1791 * norecovery is also specified, in which case the filesystem can 1792 * be mounted with no risk of further damage. 1793 */ 1794 if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) { 1795 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1796 error = -EFSCORRUPTED; 1797 goto out_free_sb; 1798 } 1799 1800 /* 1801 * Don't touch the filesystem if a user tool thinks it owns the primary 1802 * superblock. mkfs doesn't clear the flag from secondary supers, so 1803 * we don't check them at all. 1804 */ 1805 if (mp->m_sb.sb_inprogress) { 1806 xfs_warn(mp, "Offline file system operation in progress!"); 1807 error = -EFSCORRUPTED; 1808 goto out_free_sb; 1809 } 1810 1811 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1812 size_t max_folio_size = mapping_max_folio_size_supported(); 1813 1814 if (!xfs_has_crc(mp)) { 1815 xfs_warn(mp, 1816 "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.", 1817 mp->m_sb.sb_blocksize, PAGE_SIZE); 1818 error = -ENOSYS; 1819 goto out_free_sb; 1820 } 1821 1822 if (mp->m_sb.sb_blocksize > max_folio_size) { 1823 xfs_warn(mp, 1824 "block size (%u bytes) not supported; Only block size (%zu) or less is supported", 1825 mp->m_sb.sb_blocksize, max_folio_size); 1826 error = -ENOSYS; 1827 goto out_free_sb; 1828 } 1829 1830 xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS); 1831 } 1832 1833 /* Ensure this filesystem fits in the page cache limits */ 1834 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1835 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1836 xfs_warn(mp, 1837 "file system too large to be mounted on this system."); 1838 error = -EFBIG; 1839 goto out_free_sb; 1840 } 1841 1842 /* 1843 * XFS block mappings use 54 bits to store the logical block offset. 1844 * This should suffice to handle the maximum file size that the VFS 1845 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1846 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1847 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1848 * to check this assertion. 1849 * 1850 * Avoid integer overflow by comparing the maximum bmbt offset to the 1851 * maximum pagecache offset in units of fs blocks. 1852 */ 1853 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1854 xfs_warn(mp, 1855 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1856 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1857 XFS_MAX_FILEOFF); 1858 error = -EINVAL; 1859 goto out_free_sb; 1860 } 1861 1862 error = xfs_rtmount_readsb(mp); 1863 if (error) 1864 goto out_free_sb; 1865 1866 error = xfs_filestream_mount(mp); 1867 if (error) 1868 goto out_free_rtsb; 1869 1870 /* 1871 * we must configure the block size in the superblock before we run the 1872 * full mount process as the mount process can lookup and cache inodes. 1873 */ 1874 sb->s_magic = XFS_SUPER_MAGIC; 1875 sb->s_blocksize = mp->m_sb.sb_blocksize; 1876 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1877 sb->s_maxbytes = MAX_LFS_FILESIZE; 1878 sb->s_max_links = XFS_MAXLINK; 1879 sb->s_time_gran = 1; 1880 if (xfs_has_bigtime(mp)) { 1881 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1882 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1883 } else { 1884 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1885 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1886 } 1887 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1888 sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM; 1889 1890 set_posix_acl_flag(sb); 1891 1892 /* version 5 superblocks support inode version counters. */ 1893 if (xfs_has_crc(mp)) 1894 sb->s_flags |= SB_I_VERSION; 1895 1896 if (xfs_has_dax_always(mp)) { 1897 error = xfs_setup_dax_always(mp); 1898 if (error) 1899 goto out_filestream_unmount; 1900 } 1901 1902 if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { 1903 xfs_warn(mp, 1904 "mounting with \"discard\" option, but the device does not support discard"); 1905 mp->m_features &= ~XFS_FEAT_DISCARD; 1906 } 1907 1908 if (xfs_has_zoned(mp)) { 1909 if (!xfs_has_metadir(mp)) { 1910 xfs_alert(mp, 1911 "metadir feature required for zoned realtime devices."); 1912 error = -EINVAL; 1913 goto out_filestream_unmount; 1914 } 1915 xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED); 1916 } else if (xfs_has_metadir(mp)) { 1917 xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR); 1918 } 1919 1920 if (xfs_has_reflink(mp)) { 1921 if (xfs_has_realtime(mp) && 1922 !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) { 1923 xfs_alert(mp, 1924 "reflink not compatible with realtime extent size %u!", 1925 mp->m_sb.sb_rextsize); 1926 error = -EINVAL; 1927 goto out_filestream_unmount; 1928 } 1929 1930 if (xfs_has_zoned(mp)) { 1931 xfs_alert(mp, 1932 "reflink not compatible with zoned RT device!"); 1933 error = -EINVAL; 1934 goto out_filestream_unmount; 1935 } 1936 1937 if (xfs_globals.always_cow) { 1938 xfs_info(mp, "using DEBUG-only always_cow mode."); 1939 mp->m_always_cow = true; 1940 } 1941 } 1942 1943 /* 1944 * If no quota mount options were provided, maybe we'll try to pick 1945 * up the quota accounting and enforcement flags from the ondisk sb. 1946 */ 1947 if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS)) 1948 xfs_set_resuming_quotaon(mp); 1949 mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; 1950 1951 error = xfs_mountfs(mp); 1952 if (error) 1953 goto out_filestream_unmount; 1954 1955 root = igrab(VFS_I(mp->m_rootip)); 1956 if (!root) { 1957 error = -ENOENT; 1958 goto out_unmount; 1959 } 1960 sb->s_root = d_make_root(root); 1961 if (!sb->s_root) { 1962 error = -ENOMEM; 1963 goto out_unmount; 1964 } 1965 1966 return 0; 1967 1968 out_filestream_unmount: 1969 xfs_filestream_unmount(mp); 1970 out_free_rtsb: 1971 xfs_rtmount_freesb(mp); 1972 out_free_sb: 1973 xfs_freesb(mp); 1974 out_free_scrub_stats: 1975 xchk_mount_stats_free(mp); 1976 out_free_stats: 1977 free_percpu(mp->m_stats.xs_stats); 1978 out_destroy_inodegc: 1979 xfs_inodegc_free_percpu(mp); 1980 out_destroy_counters: 1981 xfs_destroy_percpu_counters(mp); 1982 out_destroy_workqueues: 1983 xfs_destroy_mount_workqueues(mp); 1984 out_shutdown_devices: 1985 xfs_shutdown_devices(mp); 1986 return error; 1987 1988 out_unmount: 1989 xfs_filestream_unmount(mp); 1990 xfs_unmountfs(mp); 1991 goto out_free_rtsb; 1992 } 1993 1994 static int 1995 xfs_fs_get_tree( 1996 struct fs_context *fc) 1997 { 1998 return get_tree_bdev(fc, xfs_fs_fill_super); 1999 } 2000 2001 static int 2002 xfs_remount_rw( 2003 struct xfs_mount *mp) 2004 { 2005 struct xfs_sb *sbp = &mp->m_sb; 2006 int error; 2007 2008 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && 2009 xfs_readonly_buftarg(mp->m_logdev_targp)) { 2010 xfs_warn(mp, 2011 "ro->rw transition prohibited by read-only logdev"); 2012 return -EACCES; 2013 } 2014 2015 if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { 2016 xfs_warn(mp, 2017 "ro->rw transition prohibited by read-only rtdev"); 2018 return -EACCES; 2019 } 2020 2021 if (xfs_has_norecovery(mp)) { 2022 xfs_warn(mp, 2023 "ro->rw transition prohibited on norecovery mount"); 2024 return -EINVAL; 2025 } 2026 2027 if (xfs_sb_is_v5(sbp) && 2028 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 2029 xfs_warn(mp, 2030 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 2031 (sbp->sb_features_ro_compat & 2032 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 2033 return -EINVAL; 2034 } 2035 2036 xfs_clear_readonly(mp); 2037 2038 /* 2039 * If this is the first remount to writeable state we might have some 2040 * superblock changes to update. 2041 */ 2042 if (mp->m_update_sb) { 2043 error = xfs_sync_sb(mp, false); 2044 if (error) { 2045 xfs_warn(mp, "failed to write sb changes"); 2046 return error; 2047 } 2048 mp->m_update_sb = false; 2049 } 2050 2051 /* 2052 * Fill out the reserve pool if it is empty. Use the stashed value if 2053 * it is non-zero, otherwise go with the default. 2054 */ 2055 xfs_restore_resvblks(mp); 2056 xfs_log_work_queue(mp); 2057 xfs_blockgc_start(mp); 2058 2059 /* Create the per-AG metadata reservation pool .*/ 2060 error = xfs_fs_reserve_ag_blocks(mp); 2061 if (error && error != -ENOSPC) 2062 return error; 2063 2064 /* Re-enable the background inode inactivation worker. */ 2065 xfs_inodegc_start(mp); 2066 2067 /* Restart zone reclaim */ 2068 xfs_zone_gc_start(mp); 2069 2070 return 0; 2071 } 2072 2073 static int 2074 xfs_remount_ro( 2075 struct xfs_mount *mp) 2076 { 2077 struct xfs_icwalk icw = { 2078 .icw_flags = XFS_ICWALK_FLAG_SYNC, 2079 }; 2080 int error; 2081 2082 /* Flush all the dirty data to disk. */ 2083 error = sync_filesystem(mp->m_super); 2084 if (error) 2085 return error; 2086 2087 /* 2088 * Cancel background eofb scanning so it cannot race with the final 2089 * log force+buftarg wait and deadlock the remount. 2090 */ 2091 xfs_blockgc_stop(mp); 2092 2093 /* 2094 * Clear out all remaining COW staging extents and speculative post-EOF 2095 * preallocations so that we don't leave inodes requiring inactivation 2096 * cleanups during reclaim on a read-only mount. We must process every 2097 * cached inode, so this requires a synchronous cache scan. 2098 */ 2099 error = xfs_blockgc_free_space(mp, &icw); 2100 if (error) { 2101 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 2102 return error; 2103 } 2104 2105 /* 2106 * Stop the inodegc background worker. xfs_fs_reconfigure already 2107 * flushed all pending inodegc work when it sync'd the filesystem. 2108 * The VFS holds s_umount, so we know that inodes cannot enter 2109 * xfs_fs_destroy_inode during a remount operation. In readonly mode 2110 * we send inodes straight to reclaim, so no inodes will be queued. 2111 */ 2112 xfs_inodegc_stop(mp); 2113 2114 /* Stop zone reclaim */ 2115 xfs_zone_gc_stop(mp); 2116 2117 /* Free the per-AG metadata reservation pool. */ 2118 xfs_fs_unreserve_ag_blocks(mp); 2119 2120 /* 2121 * Before we sync the metadata, we need to free up the reserve block 2122 * pool so that the used block count in the superblock on disk is 2123 * correct at the end of the remount. Stash the current* reserve pool 2124 * size so that if we get remounted rw, we can return it to the same 2125 * size. 2126 */ 2127 xfs_save_resvblks(mp); 2128 2129 xfs_log_clean(mp); 2130 xfs_set_readonly(mp); 2131 2132 return 0; 2133 } 2134 2135 /* 2136 * Logically we would return an error here to prevent users from believing 2137 * they might have changed mount options using remount which can't be changed. 2138 * 2139 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 2140 * arguments in some cases so we can't blindly reject options, but have to 2141 * check for each specified option if it actually differs from the currently 2142 * set option and only reject it if that's the case. 2143 * 2144 * Until that is implemented we return success for every remount request, and 2145 * silently ignore all options that we can't actually change. 2146 */ 2147 static int 2148 xfs_fs_reconfigure( 2149 struct fs_context *fc) 2150 { 2151 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 2152 struct xfs_mount *new_mp = fc->s_fs_info; 2153 int flags = fc->sb_flags; 2154 int error; 2155 2156 new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; 2157 2158 /* version 5 superblocks always support version counters. */ 2159 if (xfs_has_crc(mp)) 2160 fc->sb_flags |= SB_I_VERSION; 2161 2162 error = xfs_fs_validate_params(new_mp); 2163 if (error) 2164 return error; 2165 2166 /* Validate new max_atomic_write option before making other changes */ 2167 if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) { 2168 error = xfs_set_max_atomic_write_opt(mp, 2169 new_mp->m_awu_max_bytes); 2170 if (error) 2171 return error; 2172 } 2173 2174 /* inode32 -> inode64 */ 2175 if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { 2176 mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 2177 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 2178 } 2179 2180 /* inode64 -> inode32 */ 2181 if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { 2182 mp->m_features |= XFS_FEAT_SMALL_INUMS; 2183 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 2184 } 2185 2186 /* 2187 * Now that mp has been modified according to the remount options, we 2188 * do a final option validation with xfs_finish_flags() just like it is 2189 * just like it is done during mount. We cannot use 2190 * done during mount. We cannot use xfs_finish_flags() on new_mp as it 2191 * contains only the user given options. 2192 */ 2193 error = xfs_finish_flags(mp); 2194 if (error) 2195 return error; 2196 2197 /* ro -> rw */ 2198 if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { 2199 error = xfs_remount_rw(mp); 2200 if (error) 2201 return error; 2202 } 2203 2204 /* rw -> ro */ 2205 if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { 2206 error = xfs_remount_ro(mp); 2207 if (error) 2208 return error; 2209 } 2210 2211 return 0; 2212 } 2213 2214 static void 2215 xfs_fs_free( 2216 struct fs_context *fc) 2217 { 2218 struct xfs_mount *mp = fc->s_fs_info; 2219 2220 /* 2221 * mp is stored in the fs_context when it is initialized. 2222 * mp is transferred to the superblock on a successful mount, 2223 * but if an error occurs before the transfer we have to free 2224 * it here. 2225 */ 2226 if (mp) 2227 xfs_mount_free(mp); 2228 } 2229 2230 static const struct fs_context_operations xfs_context_ops = { 2231 .parse_param = xfs_fs_parse_param, 2232 .get_tree = xfs_fs_get_tree, 2233 .reconfigure = xfs_fs_reconfigure, 2234 .free = xfs_fs_free, 2235 }; 2236 2237 /* 2238 * WARNING: do not initialise any parameters in this function that depend on 2239 * mount option parsing having already been performed as this can be called from 2240 * fsopen() before any parameters have been set. 2241 */ 2242 static int 2243 xfs_init_fs_context( 2244 struct fs_context *fc) 2245 { 2246 struct xfs_mount *mp; 2247 int i; 2248 2249 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 2250 if (!mp) 2251 return -ENOMEM; 2252 2253 spin_lock_init(&mp->m_sb_lock); 2254 for (i = 0; i < XG_TYPE_MAX; i++) 2255 xa_init(&mp->m_groups[i].xa); 2256 mutex_init(&mp->m_growlock); 2257 mutex_init(&mp->m_metafile_resv_lock); 2258 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 2259 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 2260 mp->m_kobj.kobject.kset = xfs_kset; 2261 /* 2262 * We don't create the finobt per-ag space reservation until after log 2263 * recovery, so we must set this to true so that an ifree transaction 2264 * started during log recovery will not depend on space reservations 2265 * for finobt expansion. 2266 */ 2267 mp->m_finobt_nores = true; 2268 2269 /* 2270 * These can be overridden by the mount option parsing. 2271 */ 2272 mp->m_logbufs = -1; 2273 mp->m_logbsize = -1; 2274 mp->m_allocsize_log = 16; /* 64k */ 2275 2276 xfs_hooks_init(&mp->m_dir_update_hooks); 2277 2278 fc->s_fs_info = mp; 2279 fc->ops = &xfs_context_ops; 2280 2281 return 0; 2282 } 2283 2284 static void 2285 xfs_kill_sb( 2286 struct super_block *sb) 2287 { 2288 kill_block_super(sb); 2289 xfs_mount_free(XFS_M(sb)); 2290 } 2291 2292 static struct file_system_type xfs_fs_type = { 2293 .owner = THIS_MODULE, 2294 .name = "xfs", 2295 .init_fs_context = xfs_init_fs_context, 2296 .parameters = xfs_fs_parameters, 2297 .kill_sb = xfs_kill_sb, 2298 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME | 2299 FS_LBS, 2300 }; 2301 MODULE_ALIAS_FS("xfs"); 2302 2303 STATIC int __init 2304 xfs_init_caches(void) 2305 { 2306 int error; 2307 2308 xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, 2309 SLAB_HWCACHE_ALIGN | 2310 SLAB_RECLAIM_ACCOUNT, 2311 NULL); 2312 if (!xfs_buf_cache) 2313 goto out; 2314 2315 xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", 2316 sizeof(struct xlog_ticket), 2317 0, 0, NULL); 2318 if (!xfs_log_ticket_cache) 2319 goto out_destroy_buf_cache; 2320 2321 error = xfs_btree_init_cur_caches(); 2322 if (error) 2323 goto out_destroy_log_ticket_cache; 2324 2325 error = rcbagbt_init_cur_cache(); 2326 if (error) 2327 goto out_destroy_btree_cur_cache; 2328 2329 error = xfs_defer_init_item_caches(); 2330 if (error) 2331 goto out_destroy_rcbagbt_cur_cache; 2332 2333 xfs_da_state_cache = kmem_cache_create("xfs_da_state", 2334 sizeof(struct xfs_da_state), 2335 0, 0, NULL); 2336 if (!xfs_da_state_cache) 2337 goto out_destroy_defer_item_cache; 2338 2339 xfs_ifork_cache = kmem_cache_create("xfs_ifork", 2340 sizeof(struct xfs_ifork), 2341 0, 0, NULL); 2342 if (!xfs_ifork_cache) 2343 goto out_destroy_da_state_cache; 2344 2345 xfs_trans_cache = kmem_cache_create("xfs_trans", 2346 sizeof(struct xfs_trans), 2347 0, 0, NULL); 2348 if (!xfs_trans_cache) 2349 goto out_destroy_ifork_cache; 2350 2351 2352 /* 2353 * The size of the cache-allocated buf log item is the maximum 2354 * size possible under XFS. This wastes a little bit of memory, 2355 * but it is much faster. 2356 */ 2357 xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", 2358 sizeof(struct xfs_buf_log_item), 2359 0, 0, NULL); 2360 if (!xfs_buf_item_cache) 2361 goto out_destroy_trans_cache; 2362 2363 xfs_efd_cache = kmem_cache_create("xfs_efd_item", 2364 xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), 2365 0, 0, NULL); 2366 if (!xfs_efd_cache) 2367 goto out_destroy_buf_item_cache; 2368 2369 xfs_efi_cache = kmem_cache_create("xfs_efi_item", 2370 xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), 2371 0, 0, NULL); 2372 if (!xfs_efi_cache) 2373 goto out_destroy_efd_cache; 2374 2375 xfs_inode_cache = kmem_cache_create("xfs_inode", 2376 sizeof(struct xfs_inode), 0, 2377 (SLAB_HWCACHE_ALIGN | 2378 SLAB_RECLAIM_ACCOUNT | 2379 SLAB_ACCOUNT), 2380 xfs_fs_inode_init_once); 2381 if (!xfs_inode_cache) 2382 goto out_destroy_efi_cache; 2383 2384 xfs_ili_cache = kmem_cache_create("xfs_ili", 2385 sizeof(struct xfs_inode_log_item), 0, 2386 SLAB_RECLAIM_ACCOUNT, 2387 NULL); 2388 if (!xfs_ili_cache) 2389 goto out_destroy_inode_cache; 2390 2391 xfs_icreate_cache = kmem_cache_create("xfs_icr", 2392 sizeof(struct xfs_icreate_item), 2393 0, 0, NULL); 2394 if (!xfs_icreate_cache) 2395 goto out_destroy_ili_cache; 2396 2397 xfs_rud_cache = kmem_cache_create("xfs_rud_item", 2398 sizeof(struct xfs_rud_log_item), 2399 0, 0, NULL); 2400 if (!xfs_rud_cache) 2401 goto out_destroy_icreate_cache; 2402 2403 xfs_rui_cache = kmem_cache_create("xfs_rui_item", 2404 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2405 0, 0, NULL); 2406 if (!xfs_rui_cache) 2407 goto out_destroy_rud_cache; 2408 2409 xfs_cud_cache = kmem_cache_create("xfs_cud_item", 2410 sizeof(struct xfs_cud_log_item), 2411 0, 0, NULL); 2412 if (!xfs_cud_cache) 2413 goto out_destroy_rui_cache; 2414 2415 xfs_cui_cache = kmem_cache_create("xfs_cui_item", 2416 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2417 0, 0, NULL); 2418 if (!xfs_cui_cache) 2419 goto out_destroy_cud_cache; 2420 2421 xfs_bud_cache = kmem_cache_create("xfs_bud_item", 2422 sizeof(struct xfs_bud_log_item), 2423 0, 0, NULL); 2424 if (!xfs_bud_cache) 2425 goto out_destroy_cui_cache; 2426 2427 xfs_bui_cache = kmem_cache_create("xfs_bui_item", 2428 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2429 0, 0, NULL); 2430 if (!xfs_bui_cache) 2431 goto out_destroy_bud_cache; 2432 2433 xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", 2434 sizeof(struct xfs_attrd_log_item), 2435 0, 0, NULL); 2436 if (!xfs_attrd_cache) 2437 goto out_destroy_bui_cache; 2438 2439 xfs_attri_cache = kmem_cache_create("xfs_attri_item", 2440 sizeof(struct xfs_attri_log_item), 2441 0, 0, NULL); 2442 if (!xfs_attri_cache) 2443 goto out_destroy_attrd_cache; 2444 2445 xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", 2446 sizeof(struct xfs_iunlink_item), 2447 0, 0, NULL); 2448 if (!xfs_iunlink_cache) 2449 goto out_destroy_attri_cache; 2450 2451 xfs_xmd_cache = kmem_cache_create("xfs_xmd_item", 2452 sizeof(struct xfs_xmd_log_item), 2453 0, 0, NULL); 2454 if (!xfs_xmd_cache) 2455 goto out_destroy_iul_cache; 2456 2457 xfs_xmi_cache = kmem_cache_create("xfs_xmi_item", 2458 sizeof(struct xfs_xmi_log_item), 2459 0, 0, NULL); 2460 if (!xfs_xmi_cache) 2461 goto out_destroy_xmd_cache; 2462 2463 xfs_parent_args_cache = kmem_cache_create("xfs_parent_args", 2464 sizeof(struct xfs_parent_args), 2465 0, 0, NULL); 2466 if (!xfs_parent_args_cache) 2467 goto out_destroy_xmi_cache; 2468 2469 return 0; 2470 2471 out_destroy_xmi_cache: 2472 kmem_cache_destroy(xfs_xmi_cache); 2473 out_destroy_xmd_cache: 2474 kmem_cache_destroy(xfs_xmd_cache); 2475 out_destroy_iul_cache: 2476 kmem_cache_destroy(xfs_iunlink_cache); 2477 out_destroy_attri_cache: 2478 kmem_cache_destroy(xfs_attri_cache); 2479 out_destroy_attrd_cache: 2480 kmem_cache_destroy(xfs_attrd_cache); 2481 out_destroy_bui_cache: 2482 kmem_cache_destroy(xfs_bui_cache); 2483 out_destroy_bud_cache: 2484 kmem_cache_destroy(xfs_bud_cache); 2485 out_destroy_cui_cache: 2486 kmem_cache_destroy(xfs_cui_cache); 2487 out_destroy_cud_cache: 2488 kmem_cache_destroy(xfs_cud_cache); 2489 out_destroy_rui_cache: 2490 kmem_cache_destroy(xfs_rui_cache); 2491 out_destroy_rud_cache: 2492 kmem_cache_destroy(xfs_rud_cache); 2493 out_destroy_icreate_cache: 2494 kmem_cache_destroy(xfs_icreate_cache); 2495 out_destroy_ili_cache: 2496 kmem_cache_destroy(xfs_ili_cache); 2497 out_destroy_inode_cache: 2498 kmem_cache_destroy(xfs_inode_cache); 2499 out_destroy_efi_cache: 2500 kmem_cache_destroy(xfs_efi_cache); 2501 out_destroy_efd_cache: 2502 kmem_cache_destroy(xfs_efd_cache); 2503 out_destroy_buf_item_cache: 2504 kmem_cache_destroy(xfs_buf_item_cache); 2505 out_destroy_trans_cache: 2506 kmem_cache_destroy(xfs_trans_cache); 2507 out_destroy_ifork_cache: 2508 kmem_cache_destroy(xfs_ifork_cache); 2509 out_destroy_da_state_cache: 2510 kmem_cache_destroy(xfs_da_state_cache); 2511 out_destroy_defer_item_cache: 2512 xfs_defer_destroy_item_caches(); 2513 out_destroy_rcbagbt_cur_cache: 2514 rcbagbt_destroy_cur_cache(); 2515 out_destroy_btree_cur_cache: 2516 xfs_btree_destroy_cur_caches(); 2517 out_destroy_log_ticket_cache: 2518 kmem_cache_destroy(xfs_log_ticket_cache); 2519 out_destroy_buf_cache: 2520 kmem_cache_destroy(xfs_buf_cache); 2521 out: 2522 return -ENOMEM; 2523 } 2524 2525 STATIC void 2526 xfs_destroy_caches(void) 2527 { 2528 /* 2529 * Make sure all delayed rcu free are flushed before we 2530 * destroy caches. 2531 */ 2532 rcu_barrier(); 2533 kmem_cache_destroy(xfs_parent_args_cache); 2534 kmem_cache_destroy(xfs_xmd_cache); 2535 kmem_cache_destroy(xfs_xmi_cache); 2536 kmem_cache_destroy(xfs_iunlink_cache); 2537 kmem_cache_destroy(xfs_attri_cache); 2538 kmem_cache_destroy(xfs_attrd_cache); 2539 kmem_cache_destroy(xfs_bui_cache); 2540 kmem_cache_destroy(xfs_bud_cache); 2541 kmem_cache_destroy(xfs_cui_cache); 2542 kmem_cache_destroy(xfs_cud_cache); 2543 kmem_cache_destroy(xfs_rui_cache); 2544 kmem_cache_destroy(xfs_rud_cache); 2545 kmem_cache_destroy(xfs_icreate_cache); 2546 kmem_cache_destroy(xfs_ili_cache); 2547 kmem_cache_destroy(xfs_inode_cache); 2548 kmem_cache_destroy(xfs_efi_cache); 2549 kmem_cache_destroy(xfs_efd_cache); 2550 kmem_cache_destroy(xfs_buf_item_cache); 2551 kmem_cache_destroy(xfs_trans_cache); 2552 kmem_cache_destroy(xfs_ifork_cache); 2553 kmem_cache_destroy(xfs_da_state_cache); 2554 xfs_defer_destroy_item_caches(); 2555 rcbagbt_destroy_cur_cache(); 2556 xfs_btree_destroy_cur_caches(); 2557 kmem_cache_destroy(xfs_log_ticket_cache); 2558 kmem_cache_destroy(xfs_buf_cache); 2559 } 2560 2561 STATIC int __init 2562 xfs_init_workqueues(void) 2563 { 2564 /* 2565 * The allocation workqueue can be used in memory reclaim situations 2566 * (writepage path), and parallelism is only limited by the number of 2567 * AGs in all the filesystems mounted. Hence use the default large 2568 * max_active value for this workqueue. 2569 */ 2570 xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU), 2571 0); 2572 if (!xfs_alloc_wq) 2573 return -ENOMEM; 2574 2575 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2576 0); 2577 if (!xfs_discard_wq) 2578 goto out_free_alloc_wq; 2579 2580 return 0; 2581 out_free_alloc_wq: 2582 destroy_workqueue(xfs_alloc_wq); 2583 return -ENOMEM; 2584 } 2585 2586 STATIC void 2587 xfs_destroy_workqueues(void) 2588 { 2589 destroy_workqueue(xfs_discard_wq); 2590 destroy_workqueue(xfs_alloc_wq); 2591 } 2592 2593 STATIC int __init 2594 init_xfs_fs(void) 2595 { 2596 int error; 2597 2598 xfs_check_ondisk_structs(); 2599 2600 error = xfs_dahash_test(); 2601 if (error) 2602 return error; 2603 2604 printk(KERN_INFO XFS_VERSION_STRING " with " 2605 XFS_BUILD_OPTIONS " enabled\n"); 2606 2607 xfs_dir_startup(); 2608 2609 error = xfs_init_caches(); 2610 if (error) 2611 goto out; 2612 2613 error = xfs_init_workqueues(); 2614 if (error) 2615 goto out_destroy_caches; 2616 2617 error = xfs_mru_cache_init(); 2618 if (error) 2619 goto out_destroy_wq; 2620 2621 error = xfs_init_procfs(); 2622 if (error) 2623 goto out_mru_cache_uninit; 2624 2625 error = xfs_sysctl_register(); 2626 if (error) 2627 goto out_cleanup_procfs; 2628 2629 xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL); 2630 2631 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2632 if (!xfs_kset) { 2633 error = -ENOMEM; 2634 goto out_debugfs_unregister; 2635 } 2636 2637 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2638 2639 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2640 if (!xfsstats.xs_stats) { 2641 error = -ENOMEM; 2642 goto out_kset_unregister; 2643 } 2644 2645 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2646 "stats"); 2647 if (error) 2648 goto out_free_stats; 2649 2650 error = xchk_global_stats_setup(xfs_debugfs); 2651 if (error) 2652 goto out_remove_stats_kobj; 2653 2654 #ifdef DEBUG 2655 xfs_dbg_kobj.kobject.kset = xfs_kset; 2656 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2657 if (error) 2658 goto out_remove_scrub_stats; 2659 #endif 2660 2661 error = xfs_qm_init(); 2662 if (error) 2663 goto out_remove_dbg_kobj; 2664 2665 error = register_filesystem(&xfs_fs_type); 2666 if (error) 2667 goto out_qm_exit; 2668 return 0; 2669 2670 out_qm_exit: 2671 xfs_qm_exit(); 2672 out_remove_dbg_kobj: 2673 #ifdef DEBUG 2674 xfs_sysfs_del(&xfs_dbg_kobj); 2675 out_remove_scrub_stats: 2676 #endif 2677 xchk_global_stats_teardown(); 2678 out_remove_stats_kobj: 2679 xfs_sysfs_del(&xfsstats.xs_kobj); 2680 out_free_stats: 2681 free_percpu(xfsstats.xs_stats); 2682 out_kset_unregister: 2683 kset_unregister(xfs_kset); 2684 out_debugfs_unregister: 2685 debugfs_remove(xfs_debugfs); 2686 xfs_sysctl_unregister(); 2687 out_cleanup_procfs: 2688 xfs_cleanup_procfs(); 2689 out_mru_cache_uninit: 2690 xfs_mru_cache_uninit(); 2691 out_destroy_wq: 2692 xfs_destroy_workqueues(); 2693 out_destroy_caches: 2694 xfs_destroy_caches(); 2695 out: 2696 return error; 2697 } 2698 2699 STATIC void __exit 2700 exit_xfs_fs(void) 2701 { 2702 xfs_qm_exit(); 2703 unregister_filesystem(&xfs_fs_type); 2704 #ifdef DEBUG 2705 xfs_sysfs_del(&xfs_dbg_kobj); 2706 #endif 2707 xchk_global_stats_teardown(); 2708 xfs_sysfs_del(&xfsstats.xs_kobj); 2709 free_percpu(xfsstats.xs_stats); 2710 kset_unregister(xfs_kset); 2711 debugfs_remove(xfs_debugfs); 2712 xfs_sysctl_unregister(); 2713 xfs_cleanup_procfs(); 2714 xfs_mru_cache_uninit(); 2715 xfs_destroy_workqueues(); 2716 xfs_destroy_caches(); 2717 xfs_uuid_table_free(); 2718 } 2719 2720 module_init(init_xfs_fs); 2721 module_exit(exit_xfs_fs); 2722 2723 MODULE_AUTHOR("Silicon Graphics, Inc."); 2724 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2725 MODULE_LICENSE("GPL"); 2726