1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 #include "xfs_defer.h" 41 #include "xfs_attr_item.h" 42 #include "xfs_xattr.h" 43 #include "xfs_iunlink_item.h" 44 #include "xfs_dahash_test.h" 45 #include "scrub/stats.h" 46 47 #include <linux/magic.h> 48 #include <linux/fs_context.h> 49 #include <linux/fs_parser.h> 50 51 static const struct super_operations xfs_super_operations; 52 53 static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */ 54 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 55 #ifdef DEBUG 56 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 57 #endif 58 59 enum xfs_dax_mode { 60 XFS_DAX_INODE = 0, 61 XFS_DAX_ALWAYS = 1, 62 XFS_DAX_NEVER = 2, 63 }; 64 65 static void 66 xfs_mount_set_dax_mode( 67 struct xfs_mount *mp, 68 enum xfs_dax_mode mode) 69 { 70 switch (mode) { 71 case XFS_DAX_INODE: 72 mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); 73 break; 74 case XFS_DAX_ALWAYS: 75 mp->m_features |= XFS_FEAT_DAX_ALWAYS; 76 mp->m_features &= ~XFS_FEAT_DAX_NEVER; 77 break; 78 case XFS_DAX_NEVER: 79 mp->m_features |= XFS_FEAT_DAX_NEVER; 80 mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; 81 break; 82 } 83 } 84 85 static const struct constant_table dax_param_enums[] = { 86 {"inode", XFS_DAX_INODE }, 87 {"always", XFS_DAX_ALWAYS }, 88 {"never", XFS_DAX_NEVER }, 89 {} 90 }; 91 92 /* 93 * Table driven mount option parser. 94 */ 95 enum { 96 Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 97 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 98 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 99 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, 100 Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, 101 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 102 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 103 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 104 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, 105 }; 106 107 static const struct fs_parameter_spec xfs_fs_parameters[] = { 108 fsparam_u32("logbufs", Opt_logbufs), 109 fsparam_string("logbsize", Opt_logbsize), 110 fsparam_string("logdev", Opt_logdev), 111 fsparam_string("rtdev", Opt_rtdev), 112 fsparam_flag("wsync", Opt_wsync), 113 fsparam_flag("noalign", Opt_noalign), 114 fsparam_flag("swalloc", Opt_swalloc), 115 fsparam_u32("sunit", Opt_sunit), 116 fsparam_u32("swidth", Opt_swidth), 117 fsparam_flag("nouuid", Opt_nouuid), 118 fsparam_flag("grpid", Opt_grpid), 119 fsparam_flag("nogrpid", Opt_nogrpid), 120 fsparam_flag("bsdgroups", Opt_bsdgroups), 121 fsparam_flag("sysvgroups", Opt_sysvgroups), 122 fsparam_string("allocsize", Opt_allocsize), 123 fsparam_flag("norecovery", Opt_norecovery), 124 fsparam_flag("inode64", Opt_inode64), 125 fsparam_flag("inode32", Opt_inode32), 126 fsparam_flag("ikeep", Opt_ikeep), 127 fsparam_flag("noikeep", Opt_noikeep), 128 fsparam_flag("largeio", Opt_largeio), 129 fsparam_flag("nolargeio", Opt_nolargeio), 130 fsparam_flag("attr2", Opt_attr2), 131 fsparam_flag("noattr2", Opt_noattr2), 132 fsparam_flag("filestreams", Opt_filestreams), 133 fsparam_flag("quota", Opt_quota), 134 fsparam_flag("noquota", Opt_noquota), 135 fsparam_flag("usrquota", Opt_usrquota), 136 fsparam_flag("grpquota", Opt_grpquota), 137 fsparam_flag("prjquota", Opt_prjquota), 138 fsparam_flag("uquota", Opt_uquota), 139 fsparam_flag("gquota", Opt_gquota), 140 fsparam_flag("pquota", Opt_pquota), 141 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 142 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 143 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 144 fsparam_flag("qnoenforce", Opt_qnoenforce), 145 fsparam_flag("discard", Opt_discard), 146 fsparam_flag("nodiscard", Opt_nodiscard), 147 fsparam_flag("dax", Opt_dax), 148 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 149 {} 150 }; 151 152 struct proc_xfs_info { 153 uint64_t flag; 154 char *str; 155 }; 156 157 static int 158 xfs_fs_show_options( 159 struct seq_file *m, 160 struct dentry *root) 161 { 162 static struct proc_xfs_info xfs_info_set[] = { 163 /* the few simple ones we can get from the mount struct */ 164 { XFS_FEAT_IKEEP, ",ikeep" }, 165 { XFS_FEAT_WSYNC, ",wsync" }, 166 { XFS_FEAT_NOALIGN, ",noalign" }, 167 { XFS_FEAT_SWALLOC, ",swalloc" }, 168 { XFS_FEAT_NOUUID, ",nouuid" }, 169 { XFS_FEAT_NORECOVERY, ",norecovery" }, 170 { XFS_FEAT_ATTR2, ",attr2" }, 171 { XFS_FEAT_FILESTREAMS, ",filestreams" }, 172 { XFS_FEAT_GRPID, ",grpid" }, 173 { XFS_FEAT_DISCARD, ",discard" }, 174 { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, 175 { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, 176 { XFS_FEAT_DAX_NEVER, ",dax=never" }, 177 { 0, NULL } 178 }; 179 struct xfs_mount *mp = XFS_M(root->d_sb); 180 struct proc_xfs_info *xfs_infop; 181 182 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 183 if (mp->m_features & xfs_infop->flag) 184 seq_puts(m, xfs_infop->str); 185 } 186 187 seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); 188 189 if (xfs_has_allocsize(mp)) 190 seq_printf(m, ",allocsize=%dk", 191 (1 << mp->m_allocsize_log) >> 10); 192 193 if (mp->m_logbufs > 0) 194 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 195 if (mp->m_logbsize > 0) 196 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 197 198 if (mp->m_logname) 199 seq_show_option(m, "logdev", mp->m_logname); 200 if (mp->m_rtname) 201 seq_show_option(m, "rtdev", mp->m_rtname); 202 203 if (mp->m_dalign > 0) 204 seq_printf(m, ",sunit=%d", 205 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 206 if (mp->m_swidth > 0) 207 seq_printf(m, ",swidth=%d", 208 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 209 210 if (mp->m_qflags & XFS_UQUOTA_ENFD) 211 seq_puts(m, ",usrquota"); 212 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 213 seq_puts(m, ",uqnoenforce"); 214 215 if (mp->m_qflags & XFS_PQUOTA_ENFD) 216 seq_puts(m, ",prjquota"); 217 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 218 seq_puts(m, ",pqnoenforce"); 219 220 if (mp->m_qflags & XFS_GQUOTA_ENFD) 221 seq_puts(m, ",grpquota"); 222 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 223 seq_puts(m, ",gqnoenforce"); 224 225 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 226 seq_puts(m, ",noquota"); 227 228 return 0; 229 } 230 231 static bool 232 xfs_set_inode_alloc_perag( 233 struct xfs_perag *pag, 234 xfs_ino_t ino, 235 xfs_agnumber_t max_metadata) 236 { 237 if (!xfs_is_inode32(pag->pag_mount)) { 238 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 239 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 240 return false; 241 } 242 243 if (ino > XFS_MAXINUMBER_32) { 244 clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 245 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 246 return false; 247 } 248 249 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 250 if (pag->pag_agno < max_metadata) 251 set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 252 else 253 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 254 return true; 255 } 256 257 /* 258 * Set parameters for inode allocation heuristics, taking into account 259 * filesystem size and inode32/inode64 mount options; i.e. specifically 260 * whether or not XFS_FEAT_SMALL_INUMS is set. 261 * 262 * Inode allocation patterns are altered only if inode32 is requested 263 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. 264 * If altered, XFS_OPSTATE_INODE32 is set as well. 265 * 266 * An agcount independent of that in the mount structure is provided 267 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 268 * to the potentially higher ag count. 269 * 270 * Returns the maximum AG index which may contain inodes. 271 */ 272 xfs_agnumber_t 273 xfs_set_inode_alloc( 274 struct xfs_mount *mp, 275 xfs_agnumber_t agcount) 276 { 277 xfs_agnumber_t index; 278 xfs_agnumber_t maxagi = 0; 279 xfs_sb_t *sbp = &mp->m_sb; 280 xfs_agnumber_t max_metadata; 281 xfs_agino_t agino; 282 xfs_ino_t ino; 283 284 /* 285 * Calculate how much should be reserved for inodes to meet 286 * the max inode percentage. Used only for inode32. 287 */ 288 if (M_IGEO(mp)->maxicount) { 289 uint64_t icount; 290 291 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 292 do_div(icount, 100); 293 icount += sbp->sb_agblocks - 1; 294 do_div(icount, sbp->sb_agblocks); 295 max_metadata = icount; 296 } else { 297 max_metadata = agcount; 298 } 299 300 /* Get the last possible inode in the filesystem */ 301 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 302 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 303 304 /* 305 * If user asked for no more than 32-bit inodes, and the fs is 306 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter 307 * the allocator to accommodate the request. 308 */ 309 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) 310 set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 311 else 312 clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 313 314 for (index = 0; index < agcount; index++) { 315 struct xfs_perag *pag; 316 317 ino = XFS_AGINO_TO_INO(mp, index, agino); 318 319 pag = xfs_perag_get(mp, index); 320 if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) 321 maxagi++; 322 xfs_perag_put(pag); 323 } 324 325 return xfs_is_inode32(mp) ? maxagi : agcount; 326 } 327 328 static int 329 xfs_setup_dax_always( 330 struct xfs_mount *mp) 331 { 332 if (!mp->m_ddev_targp->bt_daxdev && 333 (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { 334 xfs_alert(mp, 335 "DAX unsupported by block device. Turning off DAX."); 336 goto disable_dax; 337 } 338 339 if (mp->m_super->s_blocksize != PAGE_SIZE) { 340 xfs_alert(mp, 341 "DAX not supported for blocksize. Turning off DAX."); 342 goto disable_dax; 343 } 344 345 if (xfs_has_reflink(mp) && 346 bdev_is_partition(mp->m_ddev_targp->bt_bdev)) { 347 xfs_alert(mp, 348 "DAX and reflink cannot work with multi-partitions!"); 349 return -EINVAL; 350 } 351 352 xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 353 return 0; 354 355 disable_dax: 356 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 357 return 0; 358 } 359 360 STATIC int 361 xfs_blkdev_get( 362 xfs_mount_t *mp, 363 const char *name, 364 struct bdev_handle **handlep) 365 { 366 int error = 0; 367 368 *handlep = bdev_open_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, 369 mp->m_super, &fs_holder_ops); 370 if (IS_ERR(*handlep)) { 371 error = PTR_ERR(*handlep); 372 *handlep = NULL; 373 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 374 } 375 376 return error; 377 } 378 379 STATIC void 380 xfs_shutdown_devices( 381 struct xfs_mount *mp) 382 { 383 /* 384 * Udev is triggered whenever anyone closes a block device or unmounts 385 * a file systemm on a block device. 386 * The default udev rules invoke blkid to read the fs super and create 387 * symlinks to the bdev under /dev/disk. For this, it uses buffered 388 * reads through the page cache. 389 * 390 * xfs_db also uses buffered reads to examine metadata. There is no 391 * coordination between xfs_db and udev, which means that they can run 392 * concurrently. Note there is no coordination between the kernel and 393 * blkid either. 394 * 395 * On a system with 64k pages, the page cache can cache the superblock 396 * and the root inode (and hence the root directory) with the same 64k 397 * page. If udev spawns blkid after the mkfs and the system is busy 398 * enough that it is still running when xfs_db starts up, they'll both 399 * read from the same page in the pagecache. 400 * 401 * The unmount writes updated inode metadata to disk directly. The XFS 402 * buffer cache does not use the bdev pagecache, so it needs to 403 * invalidate that pagecache on unmount. If the above scenario occurs, 404 * the pagecache no longer reflects what's on disk, xfs_db reads the 405 * stale metadata, and fails to find /a. Most of the time this succeeds 406 * because closing a bdev invalidates the page cache, but when processes 407 * race, everyone loses. 408 */ 409 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 410 blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); 411 invalidate_bdev(mp->m_logdev_targp->bt_bdev); 412 } 413 if (mp->m_rtdev_targp) { 414 blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); 415 invalidate_bdev(mp->m_rtdev_targp->bt_bdev); 416 } 417 blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); 418 invalidate_bdev(mp->m_ddev_targp->bt_bdev); 419 } 420 421 /* 422 * The file system configurations are: 423 * (1) device (partition) with data and internal log 424 * (2) logical volume with data and log subvolumes. 425 * (3) logical volume with data, log, and realtime subvolumes. 426 * 427 * We only have to handle opening the log and realtime volumes here if 428 * they are present. The data subvolume has already been opened by 429 * get_sb_bdev() and is stored in sb->s_bdev. 430 */ 431 STATIC int 432 xfs_open_devices( 433 struct xfs_mount *mp) 434 { 435 struct super_block *sb = mp->m_super; 436 struct block_device *ddev = sb->s_bdev; 437 struct bdev_handle *logdev_handle = NULL, *rtdev_handle = NULL; 438 int error; 439 440 /* 441 * blkdev_put() can't be called under s_umount, see the comment 442 * in get_tree_bdev() for more details 443 */ 444 up_write(&sb->s_umount); 445 446 /* 447 * Open real time and log devices - order is important. 448 */ 449 if (mp->m_logname) { 450 error = xfs_blkdev_get(mp, mp->m_logname, &logdev_handle); 451 if (error) 452 goto out_relock; 453 } 454 455 if (mp->m_rtname) { 456 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_handle); 457 if (error) 458 goto out_close_logdev; 459 460 if (rtdev_handle->bdev == ddev || 461 (logdev_handle && 462 rtdev_handle->bdev == logdev_handle->bdev)) { 463 xfs_warn(mp, 464 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 465 error = -EINVAL; 466 goto out_close_rtdev; 467 } 468 } 469 470 /* 471 * Setup xfs_mount buffer target pointers 472 */ 473 error = -ENOMEM; 474 mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_handle); 475 if (!mp->m_ddev_targp) 476 goto out_close_rtdev; 477 478 if (rtdev_handle) { 479 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_handle); 480 if (!mp->m_rtdev_targp) 481 goto out_free_ddev_targ; 482 } 483 484 if (logdev_handle && logdev_handle->bdev != ddev) { 485 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_handle); 486 if (!mp->m_logdev_targp) 487 goto out_free_rtdev_targ; 488 } else { 489 mp->m_logdev_targp = mp->m_ddev_targp; 490 /* Handle won't be used, drop it */ 491 if (logdev_handle) 492 bdev_release(logdev_handle); 493 } 494 495 error = 0; 496 out_relock: 497 down_write(&sb->s_umount); 498 return error; 499 500 out_free_rtdev_targ: 501 if (mp->m_rtdev_targp) 502 xfs_free_buftarg(mp->m_rtdev_targp); 503 out_free_ddev_targ: 504 xfs_free_buftarg(mp->m_ddev_targp); 505 out_close_rtdev: 506 if (rtdev_handle) 507 bdev_release(rtdev_handle); 508 out_close_logdev: 509 if (logdev_handle) 510 bdev_release(logdev_handle); 511 goto out_relock; 512 } 513 514 /* 515 * Setup xfs_mount buffer target pointers based on superblock 516 */ 517 STATIC int 518 xfs_setup_devices( 519 struct xfs_mount *mp) 520 { 521 int error; 522 523 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); 524 if (error) 525 return error; 526 527 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 528 unsigned int log_sector_size = BBSIZE; 529 530 if (xfs_has_sector(mp)) 531 log_sector_size = mp->m_sb.sb_logsectsize; 532 error = xfs_setsize_buftarg(mp->m_logdev_targp, 533 log_sector_size); 534 if (error) 535 return error; 536 } 537 if (mp->m_rtdev_targp) { 538 error = xfs_setsize_buftarg(mp->m_rtdev_targp, 539 mp->m_sb.sb_sectsize); 540 if (error) 541 return error; 542 } 543 544 return 0; 545 } 546 547 STATIC int 548 xfs_init_mount_workqueues( 549 struct xfs_mount *mp) 550 { 551 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 552 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 553 1, mp->m_super->s_id); 554 if (!mp->m_buf_workqueue) 555 goto out; 556 557 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 558 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 559 0, mp->m_super->s_id); 560 if (!mp->m_unwritten_workqueue) 561 goto out_destroy_buf; 562 563 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 564 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 565 0, mp->m_super->s_id); 566 if (!mp->m_reclaim_workqueue) 567 goto out_destroy_unwritten; 568 569 mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", 570 XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 571 0, mp->m_super->s_id); 572 if (!mp->m_blockgc_wq) 573 goto out_destroy_reclaim; 574 575 mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", 576 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 577 1, mp->m_super->s_id); 578 if (!mp->m_inodegc_wq) 579 goto out_destroy_blockgc; 580 581 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 582 XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); 583 if (!mp->m_sync_workqueue) 584 goto out_destroy_inodegc; 585 586 return 0; 587 588 out_destroy_inodegc: 589 destroy_workqueue(mp->m_inodegc_wq); 590 out_destroy_blockgc: 591 destroy_workqueue(mp->m_blockgc_wq); 592 out_destroy_reclaim: 593 destroy_workqueue(mp->m_reclaim_workqueue); 594 out_destroy_unwritten: 595 destroy_workqueue(mp->m_unwritten_workqueue); 596 out_destroy_buf: 597 destroy_workqueue(mp->m_buf_workqueue); 598 out: 599 return -ENOMEM; 600 } 601 602 STATIC void 603 xfs_destroy_mount_workqueues( 604 struct xfs_mount *mp) 605 { 606 destroy_workqueue(mp->m_sync_workqueue); 607 destroy_workqueue(mp->m_blockgc_wq); 608 destroy_workqueue(mp->m_inodegc_wq); 609 destroy_workqueue(mp->m_reclaim_workqueue); 610 destroy_workqueue(mp->m_unwritten_workqueue); 611 destroy_workqueue(mp->m_buf_workqueue); 612 } 613 614 static void 615 xfs_flush_inodes_worker( 616 struct work_struct *work) 617 { 618 struct xfs_mount *mp = container_of(work, struct xfs_mount, 619 m_flush_inodes_work); 620 struct super_block *sb = mp->m_super; 621 622 if (down_read_trylock(&sb->s_umount)) { 623 sync_inodes_sb(sb); 624 up_read(&sb->s_umount); 625 } 626 } 627 628 /* 629 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 630 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 631 * for IO to complete so that we effectively throttle multiple callers to the 632 * rate at which IO is completing. 633 */ 634 void 635 xfs_flush_inodes( 636 struct xfs_mount *mp) 637 { 638 /* 639 * If flush_work() returns true then that means we waited for a flush 640 * which was already in progress. Don't bother running another scan. 641 */ 642 if (flush_work(&mp->m_flush_inodes_work)) 643 return; 644 645 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 646 flush_work(&mp->m_flush_inodes_work); 647 } 648 649 /* Catch misguided souls that try to use this interface on XFS */ 650 STATIC struct inode * 651 xfs_fs_alloc_inode( 652 struct super_block *sb) 653 { 654 BUG(); 655 return NULL; 656 } 657 658 /* 659 * Now that the generic code is guaranteed not to be accessing 660 * the linux inode, we can inactivate and reclaim the inode. 661 */ 662 STATIC void 663 xfs_fs_destroy_inode( 664 struct inode *inode) 665 { 666 struct xfs_inode *ip = XFS_I(inode); 667 668 trace_xfs_destroy_inode(ip); 669 670 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 671 XFS_STATS_INC(ip->i_mount, vn_rele); 672 XFS_STATS_INC(ip->i_mount, vn_remove); 673 xfs_inode_mark_reclaimable(ip); 674 } 675 676 static void 677 xfs_fs_dirty_inode( 678 struct inode *inode, 679 int flags) 680 { 681 struct xfs_inode *ip = XFS_I(inode); 682 struct xfs_mount *mp = ip->i_mount; 683 struct xfs_trans *tp; 684 685 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 686 return; 687 688 /* 689 * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) 690 * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed 691 * in flags possibly together with I_DIRTY_SYNC. 692 */ 693 if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) 694 return; 695 696 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) 697 return; 698 xfs_ilock(ip, XFS_ILOCK_EXCL); 699 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 700 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 701 xfs_trans_commit(tp); 702 } 703 704 /* 705 * Slab object creation initialisation for the XFS inode. 706 * This covers only the idempotent fields in the XFS inode; 707 * all other fields need to be initialised on allocation 708 * from the slab. This avoids the need to repeatedly initialise 709 * fields in the xfs inode that left in the initialise state 710 * when freeing the inode. 711 */ 712 STATIC void 713 xfs_fs_inode_init_once( 714 void *inode) 715 { 716 struct xfs_inode *ip = inode; 717 718 memset(ip, 0, sizeof(struct xfs_inode)); 719 720 /* vfs inode */ 721 inode_init_once(VFS_I(ip)); 722 723 /* xfs inode */ 724 atomic_set(&ip->i_pincount, 0); 725 spin_lock_init(&ip->i_flags_lock); 726 727 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 728 "xfsino", ip->i_ino); 729 } 730 731 /* 732 * We do an unlocked check for XFS_IDONTCACHE here because we are already 733 * serialised against cache hits here via the inode->i_lock and igrab() in 734 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 735 * racing with us, and it avoids needing to grab a spinlock here for every inode 736 * we drop the final reference on. 737 */ 738 STATIC int 739 xfs_fs_drop_inode( 740 struct inode *inode) 741 { 742 struct xfs_inode *ip = XFS_I(inode); 743 744 /* 745 * If this unlinked inode is in the middle of recovery, don't 746 * drop the inode just yet; log recovery will take care of 747 * that. See the comment for this inode flag. 748 */ 749 if (ip->i_flags & XFS_IRECOVERY) { 750 ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); 751 return 0; 752 } 753 754 return generic_drop_inode(inode); 755 } 756 757 static void 758 xfs_mount_free( 759 struct xfs_mount *mp) 760 { 761 /* 762 * Free the buftargs here because blkdev_put needs to be called outside 763 * of sb->s_umount, which is held around the call to ->put_super. 764 */ 765 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 766 xfs_free_buftarg(mp->m_logdev_targp); 767 if (mp->m_rtdev_targp) 768 xfs_free_buftarg(mp->m_rtdev_targp); 769 if (mp->m_ddev_targp) 770 xfs_free_buftarg(mp->m_ddev_targp); 771 772 debugfs_remove(mp->m_debugfs); 773 kfree(mp->m_rtname); 774 kfree(mp->m_logname); 775 kmem_free(mp); 776 } 777 778 STATIC int 779 xfs_fs_sync_fs( 780 struct super_block *sb, 781 int wait) 782 { 783 struct xfs_mount *mp = XFS_M(sb); 784 int error; 785 786 trace_xfs_fs_sync_fs(mp, __return_address); 787 788 /* 789 * Doing anything during the async pass would be counterproductive. 790 */ 791 if (!wait) 792 return 0; 793 794 error = xfs_log_force(mp, XFS_LOG_SYNC); 795 if (error) 796 return error; 797 798 if (laptop_mode) { 799 /* 800 * The disk must be active because we're syncing. 801 * We schedule log work now (now that the disk is 802 * active) instead of later (when it might not be). 803 */ 804 flush_delayed_work(&mp->m_log->l_work); 805 } 806 807 /* 808 * If we are called with page faults frozen out, it means we are about 809 * to freeze the transaction subsystem. Take the opportunity to shut 810 * down inodegc because once SB_FREEZE_FS is set it's too late to 811 * prevent inactivation races with freeze. The fs doesn't get called 812 * again by the freezing process until after SB_FREEZE_FS has been set, 813 * so it's now or never. Same logic applies to speculative allocation 814 * garbage collection. 815 * 816 * We don't care if this is a normal syncfs call that does this or 817 * freeze that does this - we can run this multiple times without issue 818 * and we won't race with a restart because a restart can only occur 819 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. 820 */ 821 if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { 822 xfs_inodegc_stop(mp); 823 xfs_blockgc_stop(mp); 824 } 825 826 return 0; 827 } 828 829 STATIC int 830 xfs_fs_statfs( 831 struct dentry *dentry, 832 struct kstatfs *statp) 833 { 834 struct xfs_mount *mp = XFS_M(dentry->d_sb); 835 xfs_sb_t *sbp = &mp->m_sb; 836 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 837 uint64_t fakeinos, id; 838 uint64_t icount; 839 uint64_t ifree; 840 uint64_t fdblocks; 841 xfs_extlen_t lsize; 842 int64_t ffree; 843 844 /* 845 * Expedite background inodegc but don't wait. We do not want to block 846 * here waiting hours for a billion extent file to be truncated. 847 */ 848 xfs_inodegc_push(mp); 849 850 statp->f_type = XFS_SUPER_MAGIC; 851 statp->f_namelen = MAXNAMELEN - 1; 852 853 id = huge_encode_dev(mp->m_ddev_targp->bt_dev); 854 statp->f_fsid = u64_to_fsid(id); 855 856 icount = percpu_counter_sum(&mp->m_icount); 857 ifree = percpu_counter_sum(&mp->m_ifree); 858 fdblocks = percpu_counter_sum(&mp->m_fdblocks); 859 860 spin_lock(&mp->m_sb_lock); 861 statp->f_bsize = sbp->sb_blocksize; 862 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 863 statp->f_blocks = sbp->sb_dblocks - lsize; 864 spin_unlock(&mp->m_sb_lock); 865 866 /* make sure statp->f_bfree does not underflow */ 867 statp->f_bfree = max_t(int64_t, 0, 868 fdblocks - xfs_fdblocks_unavailable(mp)); 869 statp->f_bavail = statp->f_bfree; 870 871 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 872 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 873 if (M_IGEO(mp)->maxicount) 874 statp->f_files = min_t(typeof(statp->f_files), 875 statp->f_files, 876 M_IGEO(mp)->maxicount); 877 878 /* If sb_icount overshot maxicount, report actual allocation */ 879 statp->f_files = max_t(typeof(statp->f_files), 880 statp->f_files, 881 sbp->sb_icount); 882 883 /* make sure statp->f_ffree does not underflow */ 884 ffree = statp->f_files - (icount - ifree); 885 statp->f_ffree = max_t(int64_t, ffree, 0); 886 887 888 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 889 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 890 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 891 xfs_qm_statvfs(ip, statp); 892 893 if (XFS_IS_REALTIME_MOUNT(mp) && 894 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 895 s64 freertx; 896 897 statp->f_blocks = sbp->sb_rblocks; 898 freertx = percpu_counter_sum_positive(&mp->m_frextents); 899 statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize; 900 } 901 902 return 0; 903 } 904 905 STATIC void 906 xfs_save_resvblks(struct xfs_mount *mp) 907 { 908 uint64_t resblks = 0; 909 910 mp->m_resblks_save = mp->m_resblks; 911 xfs_reserve_blocks(mp, &resblks, NULL); 912 } 913 914 STATIC void 915 xfs_restore_resvblks(struct xfs_mount *mp) 916 { 917 uint64_t resblks; 918 919 if (mp->m_resblks_save) { 920 resblks = mp->m_resblks_save; 921 mp->m_resblks_save = 0; 922 } else 923 resblks = xfs_default_resblks(mp); 924 925 xfs_reserve_blocks(mp, &resblks, NULL); 926 } 927 928 /* 929 * Second stage of a freeze. The data is already frozen so we only 930 * need to take care of the metadata. Once that's done sync the superblock 931 * to the log to dirty it in case of a crash while frozen. This ensures that we 932 * will recover the unlinked inode lists on the next mount. 933 */ 934 STATIC int 935 xfs_fs_freeze( 936 struct super_block *sb) 937 { 938 struct xfs_mount *mp = XFS_M(sb); 939 unsigned int flags; 940 int ret; 941 942 /* 943 * The filesystem is now frozen far enough that memory reclaim 944 * cannot safely operate on the filesystem. Hence we need to 945 * set a GFP_NOFS context here to avoid recursion deadlocks. 946 */ 947 flags = memalloc_nofs_save(); 948 xfs_save_resvblks(mp); 949 ret = xfs_log_quiesce(mp); 950 memalloc_nofs_restore(flags); 951 952 /* 953 * For read-write filesystems, we need to restart the inodegc on error 954 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not 955 * going to be run to restart it now. We are at SB_FREEZE_FS level 956 * here, so we can restart safely without racing with a stop in 957 * xfs_fs_sync_fs(). 958 */ 959 if (ret && !xfs_is_readonly(mp)) { 960 xfs_blockgc_start(mp); 961 xfs_inodegc_start(mp); 962 } 963 964 return ret; 965 } 966 967 STATIC int 968 xfs_fs_unfreeze( 969 struct super_block *sb) 970 { 971 struct xfs_mount *mp = XFS_M(sb); 972 973 xfs_restore_resvblks(mp); 974 xfs_log_work_queue(mp); 975 976 /* 977 * Don't reactivate the inodegc worker on a readonly filesystem because 978 * inodes are sent directly to reclaim. Don't reactivate the blockgc 979 * worker because there are no speculative preallocations on a readonly 980 * filesystem. 981 */ 982 if (!xfs_is_readonly(mp)) { 983 xfs_blockgc_start(mp); 984 xfs_inodegc_start(mp); 985 } 986 987 return 0; 988 } 989 990 /* 991 * This function fills in xfs_mount_t fields based on mount args. 992 * Note: the superblock _has_ now been read in. 993 */ 994 STATIC int 995 xfs_finish_flags( 996 struct xfs_mount *mp) 997 { 998 /* Fail a mount where the logbuf is smaller than the log stripe */ 999 if (xfs_has_logv2(mp)) { 1000 if (mp->m_logbsize <= 0 && 1001 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 1002 mp->m_logbsize = mp->m_sb.sb_logsunit; 1003 } else if (mp->m_logbsize > 0 && 1004 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1005 xfs_warn(mp, 1006 "logbuf size must be greater than or equal to log stripe size"); 1007 return -EINVAL; 1008 } 1009 } else { 1010 /* Fail a mount if the logbuf is larger than 32K */ 1011 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1012 xfs_warn(mp, 1013 "logbuf size for version 1 logs must be 16K or 32K"); 1014 return -EINVAL; 1015 } 1016 } 1017 1018 /* 1019 * V5 filesystems always use attr2 format for attributes. 1020 */ 1021 if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) { 1022 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " 1023 "attr2 is always enabled for V5 filesystems."); 1024 return -EINVAL; 1025 } 1026 1027 /* 1028 * prohibit r/w mounts of read-only filesystems 1029 */ 1030 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { 1031 xfs_warn(mp, 1032 "cannot mount a read-only filesystem as read-write"); 1033 return -EROFS; 1034 } 1035 1036 if ((mp->m_qflags & XFS_GQUOTA_ACCT) && 1037 (mp->m_qflags & XFS_PQUOTA_ACCT) && 1038 !xfs_has_pquotino(mp)) { 1039 xfs_warn(mp, 1040 "Super block does not support project and group quota together"); 1041 return -EINVAL; 1042 } 1043 1044 return 0; 1045 } 1046 1047 static int 1048 xfs_init_percpu_counters( 1049 struct xfs_mount *mp) 1050 { 1051 int error; 1052 1053 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1054 if (error) 1055 return -ENOMEM; 1056 1057 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1058 if (error) 1059 goto free_icount; 1060 1061 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); 1062 if (error) 1063 goto free_ifree; 1064 1065 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1066 if (error) 1067 goto free_fdblocks; 1068 1069 error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL); 1070 if (error) 1071 goto free_delalloc; 1072 1073 return 0; 1074 1075 free_delalloc: 1076 percpu_counter_destroy(&mp->m_delalloc_blks); 1077 free_fdblocks: 1078 percpu_counter_destroy(&mp->m_fdblocks); 1079 free_ifree: 1080 percpu_counter_destroy(&mp->m_ifree); 1081 free_icount: 1082 percpu_counter_destroy(&mp->m_icount); 1083 return -ENOMEM; 1084 } 1085 1086 void 1087 xfs_reinit_percpu_counters( 1088 struct xfs_mount *mp) 1089 { 1090 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1091 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1092 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); 1093 percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents); 1094 } 1095 1096 static void 1097 xfs_destroy_percpu_counters( 1098 struct xfs_mount *mp) 1099 { 1100 percpu_counter_destroy(&mp->m_icount); 1101 percpu_counter_destroy(&mp->m_ifree); 1102 percpu_counter_destroy(&mp->m_fdblocks); 1103 ASSERT(xfs_is_shutdown(mp) || 1104 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1105 percpu_counter_destroy(&mp->m_delalloc_blks); 1106 percpu_counter_destroy(&mp->m_frextents); 1107 } 1108 1109 static int 1110 xfs_inodegc_init_percpu( 1111 struct xfs_mount *mp) 1112 { 1113 struct xfs_inodegc *gc; 1114 int cpu; 1115 1116 mp->m_inodegc = alloc_percpu(struct xfs_inodegc); 1117 if (!mp->m_inodegc) 1118 return -ENOMEM; 1119 1120 for_each_possible_cpu(cpu) { 1121 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1122 gc->cpu = cpu; 1123 gc->mp = mp; 1124 init_llist_head(&gc->list); 1125 gc->items = 0; 1126 gc->error = 0; 1127 INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); 1128 } 1129 return 0; 1130 } 1131 1132 static void 1133 xfs_inodegc_free_percpu( 1134 struct xfs_mount *mp) 1135 { 1136 if (!mp->m_inodegc) 1137 return; 1138 free_percpu(mp->m_inodegc); 1139 } 1140 1141 static void 1142 xfs_fs_put_super( 1143 struct super_block *sb) 1144 { 1145 struct xfs_mount *mp = XFS_M(sb); 1146 1147 xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); 1148 xfs_filestream_unmount(mp); 1149 xfs_unmountfs(mp); 1150 1151 xfs_freesb(mp); 1152 xchk_mount_stats_free(mp); 1153 free_percpu(mp->m_stats.xs_stats); 1154 xfs_inodegc_free_percpu(mp); 1155 xfs_destroy_percpu_counters(mp); 1156 xfs_destroy_mount_workqueues(mp); 1157 xfs_shutdown_devices(mp); 1158 } 1159 1160 static long 1161 xfs_fs_nr_cached_objects( 1162 struct super_block *sb, 1163 struct shrink_control *sc) 1164 { 1165 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1166 if (WARN_ON_ONCE(!sb->s_fs_info)) 1167 return 0; 1168 return xfs_reclaim_inodes_count(XFS_M(sb)); 1169 } 1170 1171 static long 1172 xfs_fs_free_cached_objects( 1173 struct super_block *sb, 1174 struct shrink_control *sc) 1175 { 1176 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1177 } 1178 1179 static void 1180 xfs_fs_shutdown( 1181 struct super_block *sb) 1182 { 1183 xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); 1184 } 1185 1186 static const struct super_operations xfs_super_operations = { 1187 .alloc_inode = xfs_fs_alloc_inode, 1188 .destroy_inode = xfs_fs_destroy_inode, 1189 .dirty_inode = xfs_fs_dirty_inode, 1190 .drop_inode = xfs_fs_drop_inode, 1191 .put_super = xfs_fs_put_super, 1192 .sync_fs = xfs_fs_sync_fs, 1193 .freeze_fs = xfs_fs_freeze, 1194 .unfreeze_fs = xfs_fs_unfreeze, 1195 .statfs = xfs_fs_statfs, 1196 .show_options = xfs_fs_show_options, 1197 .nr_cached_objects = xfs_fs_nr_cached_objects, 1198 .free_cached_objects = xfs_fs_free_cached_objects, 1199 .shutdown = xfs_fs_shutdown, 1200 }; 1201 1202 static int 1203 suffix_kstrtoint( 1204 const char *s, 1205 unsigned int base, 1206 int *res) 1207 { 1208 int last, shift_left_factor = 0, _res; 1209 char *value; 1210 int ret = 0; 1211 1212 value = kstrdup(s, GFP_KERNEL); 1213 if (!value) 1214 return -ENOMEM; 1215 1216 last = strlen(value) - 1; 1217 if (value[last] == 'K' || value[last] == 'k') { 1218 shift_left_factor = 10; 1219 value[last] = '\0'; 1220 } 1221 if (value[last] == 'M' || value[last] == 'm') { 1222 shift_left_factor = 20; 1223 value[last] = '\0'; 1224 } 1225 if (value[last] == 'G' || value[last] == 'g') { 1226 shift_left_factor = 30; 1227 value[last] = '\0'; 1228 } 1229 1230 if (kstrtoint(value, base, &_res)) 1231 ret = -EINVAL; 1232 kfree(value); 1233 *res = _res << shift_left_factor; 1234 return ret; 1235 } 1236 1237 static inline void 1238 xfs_fs_warn_deprecated( 1239 struct fs_context *fc, 1240 struct fs_parameter *param, 1241 uint64_t flag, 1242 bool value) 1243 { 1244 /* Don't print the warning if reconfiguring and current mount point 1245 * already had the flag set 1246 */ 1247 if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && 1248 !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) 1249 return; 1250 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1251 } 1252 1253 /* 1254 * Set mount state from a mount option. 1255 * 1256 * NOTE: mp->m_super is NULL here! 1257 */ 1258 static int 1259 xfs_fs_parse_param( 1260 struct fs_context *fc, 1261 struct fs_parameter *param) 1262 { 1263 struct xfs_mount *parsing_mp = fc->s_fs_info; 1264 struct fs_parse_result result; 1265 int size = 0; 1266 int opt; 1267 1268 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1269 if (opt < 0) 1270 return opt; 1271 1272 switch (opt) { 1273 case Opt_logbufs: 1274 parsing_mp->m_logbufs = result.uint_32; 1275 return 0; 1276 case Opt_logbsize: 1277 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1278 return -EINVAL; 1279 return 0; 1280 case Opt_logdev: 1281 kfree(parsing_mp->m_logname); 1282 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1283 if (!parsing_mp->m_logname) 1284 return -ENOMEM; 1285 return 0; 1286 case Opt_rtdev: 1287 kfree(parsing_mp->m_rtname); 1288 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1289 if (!parsing_mp->m_rtname) 1290 return -ENOMEM; 1291 return 0; 1292 case Opt_allocsize: 1293 if (suffix_kstrtoint(param->string, 10, &size)) 1294 return -EINVAL; 1295 parsing_mp->m_allocsize_log = ffs(size) - 1; 1296 parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; 1297 return 0; 1298 case Opt_grpid: 1299 case Opt_bsdgroups: 1300 parsing_mp->m_features |= XFS_FEAT_GRPID; 1301 return 0; 1302 case Opt_nogrpid: 1303 case Opt_sysvgroups: 1304 parsing_mp->m_features &= ~XFS_FEAT_GRPID; 1305 return 0; 1306 case Opt_wsync: 1307 parsing_mp->m_features |= XFS_FEAT_WSYNC; 1308 return 0; 1309 case Opt_norecovery: 1310 parsing_mp->m_features |= XFS_FEAT_NORECOVERY; 1311 return 0; 1312 case Opt_noalign: 1313 parsing_mp->m_features |= XFS_FEAT_NOALIGN; 1314 return 0; 1315 case Opt_swalloc: 1316 parsing_mp->m_features |= XFS_FEAT_SWALLOC; 1317 return 0; 1318 case Opt_sunit: 1319 parsing_mp->m_dalign = result.uint_32; 1320 return 0; 1321 case Opt_swidth: 1322 parsing_mp->m_swidth = result.uint_32; 1323 return 0; 1324 case Opt_inode32: 1325 parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; 1326 return 0; 1327 case Opt_inode64: 1328 parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1329 return 0; 1330 case Opt_nouuid: 1331 parsing_mp->m_features |= XFS_FEAT_NOUUID; 1332 return 0; 1333 case Opt_largeio: 1334 parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; 1335 return 0; 1336 case Opt_nolargeio: 1337 parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; 1338 return 0; 1339 case Opt_filestreams: 1340 parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; 1341 return 0; 1342 case Opt_noquota: 1343 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1344 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1345 return 0; 1346 case Opt_quota: 1347 case Opt_uquota: 1348 case Opt_usrquota: 1349 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); 1350 return 0; 1351 case Opt_qnoenforce: 1352 case Opt_uqnoenforce: 1353 parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; 1354 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1355 return 0; 1356 case Opt_pquota: 1357 case Opt_prjquota: 1358 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); 1359 return 0; 1360 case Opt_pqnoenforce: 1361 parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; 1362 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1363 return 0; 1364 case Opt_gquota: 1365 case Opt_grpquota: 1366 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); 1367 return 0; 1368 case Opt_gqnoenforce: 1369 parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; 1370 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1371 return 0; 1372 case Opt_discard: 1373 parsing_mp->m_features |= XFS_FEAT_DISCARD; 1374 return 0; 1375 case Opt_nodiscard: 1376 parsing_mp->m_features &= ~XFS_FEAT_DISCARD; 1377 return 0; 1378 #ifdef CONFIG_FS_DAX 1379 case Opt_dax: 1380 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1381 return 0; 1382 case Opt_dax_enum: 1383 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1384 return 0; 1385 #endif 1386 /* Following mount options will be removed in September 2025 */ 1387 case Opt_ikeep: 1388 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); 1389 parsing_mp->m_features |= XFS_FEAT_IKEEP; 1390 return 0; 1391 case Opt_noikeep: 1392 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); 1393 parsing_mp->m_features &= ~XFS_FEAT_IKEEP; 1394 return 0; 1395 case Opt_attr2: 1396 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); 1397 parsing_mp->m_features |= XFS_FEAT_ATTR2; 1398 return 0; 1399 case Opt_noattr2: 1400 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); 1401 parsing_mp->m_features |= XFS_FEAT_NOATTR2; 1402 return 0; 1403 default: 1404 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1405 return -EINVAL; 1406 } 1407 1408 return 0; 1409 } 1410 1411 static int 1412 xfs_fs_validate_params( 1413 struct xfs_mount *mp) 1414 { 1415 /* No recovery flag requires a read-only mount */ 1416 if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { 1417 xfs_warn(mp, "no-recovery mounts must be read-only."); 1418 return -EINVAL; 1419 } 1420 1421 /* 1422 * We have not read the superblock at this point, so only the attr2 1423 * mount option can set the attr2 feature by this stage. 1424 */ 1425 if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { 1426 xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); 1427 return -EINVAL; 1428 } 1429 1430 1431 if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { 1432 xfs_warn(mp, 1433 "sunit and swidth options incompatible with the noalign option"); 1434 return -EINVAL; 1435 } 1436 1437 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { 1438 xfs_warn(mp, "quota support not available in this kernel."); 1439 return -EINVAL; 1440 } 1441 1442 if ((mp->m_dalign && !mp->m_swidth) || 1443 (!mp->m_dalign && mp->m_swidth)) { 1444 xfs_warn(mp, "sunit and swidth must be specified together"); 1445 return -EINVAL; 1446 } 1447 1448 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1449 xfs_warn(mp, 1450 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1451 mp->m_swidth, mp->m_dalign); 1452 return -EINVAL; 1453 } 1454 1455 if (mp->m_logbufs != -1 && 1456 mp->m_logbufs != 0 && 1457 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1458 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1459 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1460 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1461 return -EINVAL; 1462 } 1463 1464 if (mp->m_logbsize != -1 && 1465 mp->m_logbsize != 0 && 1466 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1467 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1468 !is_power_of_2(mp->m_logbsize))) { 1469 xfs_warn(mp, 1470 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1471 mp->m_logbsize); 1472 return -EINVAL; 1473 } 1474 1475 if (xfs_has_allocsize(mp) && 1476 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1477 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1478 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1479 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1480 return -EINVAL; 1481 } 1482 1483 return 0; 1484 } 1485 1486 struct dentry * 1487 xfs_debugfs_mkdir( 1488 const char *name, 1489 struct dentry *parent) 1490 { 1491 struct dentry *child; 1492 1493 /* Apparently we're expected to ignore error returns?? */ 1494 child = debugfs_create_dir(name, parent); 1495 if (IS_ERR(child)) 1496 return NULL; 1497 1498 return child; 1499 } 1500 1501 static int 1502 xfs_fs_fill_super( 1503 struct super_block *sb, 1504 struct fs_context *fc) 1505 { 1506 struct xfs_mount *mp = sb->s_fs_info; 1507 struct inode *root; 1508 int flags = 0, error; 1509 1510 mp->m_super = sb; 1511 1512 error = xfs_fs_validate_params(mp); 1513 if (error) 1514 return error; 1515 1516 sb_min_blocksize(sb, BBSIZE); 1517 sb->s_xattr = xfs_xattr_handlers; 1518 sb->s_export_op = &xfs_export_operations; 1519 #ifdef CONFIG_XFS_QUOTA 1520 sb->s_qcop = &xfs_quotactl_operations; 1521 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1522 #endif 1523 sb->s_op = &xfs_super_operations; 1524 1525 /* 1526 * Delay mount work if the debug hook is set. This is debug 1527 * instrumention to coordinate simulation of xfs mount failures with 1528 * VFS superblock operations 1529 */ 1530 if (xfs_globals.mount_delay) { 1531 xfs_notice(mp, "Delaying mount for %d seconds.", 1532 xfs_globals.mount_delay); 1533 msleep(xfs_globals.mount_delay * 1000); 1534 } 1535 1536 if (fc->sb_flags & SB_SILENT) 1537 flags |= XFS_MFSI_QUIET; 1538 1539 error = xfs_open_devices(mp); 1540 if (error) 1541 return error; 1542 1543 if (xfs_debugfs) { 1544 mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id, 1545 xfs_debugfs); 1546 } else { 1547 mp->m_debugfs = NULL; 1548 } 1549 1550 error = xfs_init_mount_workqueues(mp); 1551 if (error) 1552 goto out_shutdown_devices; 1553 1554 error = xfs_init_percpu_counters(mp); 1555 if (error) 1556 goto out_destroy_workqueues; 1557 1558 error = xfs_inodegc_init_percpu(mp); 1559 if (error) 1560 goto out_destroy_counters; 1561 1562 /* Allocate stats memory before we do operations that might use it */ 1563 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1564 if (!mp->m_stats.xs_stats) { 1565 error = -ENOMEM; 1566 goto out_destroy_inodegc; 1567 } 1568 1569 error = xchk_mount_stats_alloc(mp); 1570 if (error) 1571 goto out_free_stats; 1572 1573 error = xfs_readsb(mp, flags); 1574 if (error) 1575 goto out_free_scrub_stats; 1576 1577 error = xfs_finish_flags(mp); 1578 if (error) 1579 goto out_free_sb; 1580 1581 error = xfs_setup_devices(mp); 1582 if (error) 1583 goto out_free_sb; 1584 1585 /* V4 support is undergoing deprecation. */ 1586 if (!xfs_has_crc(mp)) { 1587 #ifdef CONFIG_XFS_SUPPORT_V4 1588 xfs_warn_once(mp, 1589 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1590 #else 1591 xfs_warn(mp, 1592 "Deprecated V4 format (crc=0) not supported by kernel."); 1593 error = -EINVAL; 1594 goto out_free_sb; 1595 #endif 1596 } 1597 1598 /* ASCII case insensitivity is undergoing deprecation. */ 1599 if (xfs_has_asciici(mp)) { 1600 #ifdef CONFIG_XFS_SUPPORT_ASCII_CI 1601 xfs_warn_once(mp, 1602 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); 1603 #else 1604 xfs_warn(mp, 1605 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel."); 1606 error = -EINVAL; 1607 goto out_free_sb; 1608 #endif 1609 } 1610 1611 /* Filesystem claims it needs repair, so refuse the mount. */ 1612 if (xfs_has_needsrepair(mp)) { 1613 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1614 error = -EFSCORRUPTED; 1615 goto out_free_sb; 1616 } 1617 1618 /* 1619 * Don't touch the filesystem if a user tool thinks it owns the primary 1620 * superblock. mkfs doesn't clear the flag from secondary supers, so 1621 * we don't check them at all. 1622 */ 1623 if (mp->m_sb.sb_inprogress) { 1624 xfs_warn(mp, "Offline file system operation in progress!"); 1625 error = -EFSCORRUPTED; 1626 goto out_free_sb; 1627 } 1628 1629 /* 1630 * Until this is fixed only page-sized or smaller data blocks work. 1631 */ 1632 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1633 xfs_warn(mp, 1634 "File system with blocksize %d bytes. " 1635 "Only pagesize (%ld) or less will currently work.", 1636 mp->m_sb.sb_blocksize, PAGE_SIZE); 1637 error = -ENOSYS; 1638 goto out_free_sb; 1639 } 1640 1641 /* Ensure this filesystem fits in the page cache limits */ 1642 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1643 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1644 xfs_warn(mp, 1645 "file system too large to be mounted on this system."); 1646 error = -EFBIG; 1647 goto out_free_sb; 1648 } 1649 1650 /* 1651 * XFS block mappings use 54 bits to store the logical block offset. 1652 * This should suffice to handle the maximum file size that the VFS 1653 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1654 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1655 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1656 * to check this assertion. 1657 * 1658 * Avoid integer overflow by comparing the maximum bmbt offset to the 1659 * maximum pagecache offset in units of fs blocks. 1660 */ 1661 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1662 xfs_warn(mp, 1663 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1664 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1665 XFS_MAX_FILEOFF); 1666 error = -EINVAL; 1667 goto out_free_sb; 1668 } 1669 1670 error = xfs_filestream_mount(mp); 1671 if (error) 1672 goto out_free_sb; 1673 1674 /* 1675 * we must configure the block size in the superblock before we run the 1676 * full mount process as the mount process can lookup and cache inodes. 1677 */ 1678 sb->s_magic = XFS_SUPER_MAGIC; 1679 sb->s_blocksize = mp->m_sb.sb_blocksize; 1680 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1681 sb->s_maxbytes = MAX_LFS_FILESIZE; 1682 sb->s_max_links = XFS_MAXLINK; 1683 sb->s_time_gran = 1; 1684 if (xfs_has_bigtime(mp)) { 1685 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1686 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1687 } else { 1688 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1689 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1690 } 1691 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1692 sb->s_iflags |= SB_I_CGROUPWB; 1693 1694 set_posix_acl_flag(sb); 1695 1696 /* version 5 superblocks support inode version counters. */ 1697 if (xfs_has_crc(mp)) 1698 sb->s_flags |= SB_I_VERSION; 1699 1700 if (xfs_has_dax_always(mp)) { 1701 error = xfs_setup_dax_always(mp); 1702 if (error) 1703 goto out_filestream_unmount; 1704 } 1705 1706 if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { 1707 xfs_warn(mp, 1708 "mounting with \"discard\" option, but the device does not support discard"); 1709 mp->m_features &= ~XFS_FEAT_DISCARD; 1710 } 1711 1712 if (xfs_has_reflink(mp)) { 1713 if (mp->m_sb.sb_rblocks) { 1714 xfs_alert(mp, 1715 "reflink not compatible with realtime device!"); 1716 error = -EINVAL; 1717 goto out_filestream_unmount; 1718 } 1719 1720 if (xfs_globals.always_cow) { 1721 xfs_info(mp, "using DEBUG-only always_cow mode."); 1722 mp->m_always_cow = true; 1723 } 1724 } 1725 1726 if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { 1727 xfs_alert(mp, 1728 "reverse mapping btree not compatible with realtime device!"); 1729 error = -EINVAL; 1730 goto out_filestream_unmount; 1731 } 1732 1733 error = xfs_mountfs(mp); 1734 if (error) 1735 goto out_filestream_unmount; 1736 1737 root = igrab(VFS_I(mp->m_rootip)); 1738 if (!root) { 1739 error = -ENOENT; 1740 goto out_unmount; 1741 } 1742 sb->s_root = d_make_root(root); 1743 if (!sb->s_root) { 1744 error = -ENOMEM; 1745 goto out_unmount; 1746 } 1747 1748 return 0; 1749 1750 out_filestream_unmount: 1751 xfs_filestream_unmount(mp); 1752 out_free_sb: 1753 xfs_freesb(mp); 1754 out_free_scrub_stats: 1755 xchk_mount_stats_free(mp); 1756 out_free_stats: 1757 free_percpu(mp->m_stats.xs_stats); 1758 out_destroy_inodegc: 1759 xfs_inodegc_free_percpu(mp); 1760 out_destroy_counters: 1761 xfs_destroy_percpu_counters(mp); 1762 out_destroy_workqueues: 1763 xfs_destroy_mount_workqueues(mp); 1764 out_shutdown_devices: 1765 xfs_shutdown_devices(mp); 1766 return error; 1767 1768 out_unmount: 1769 xfs_filestream_unmount(mp); 1770 xfs_unmountfs(mp); 1771 goto out_free_sb; 1772 } 1773 1774 static int 1775 xfs_fs_get_tree( 1776 struct fs_context *fc) 1777 { 1778 return get_tree_bdev(fc, xfs_fs_fill_super); 1779 } 1780 1781 static int 1782 xfs_remount_rw( 1783 struct xfs_mount *mp) 1784 { 1785 struct xfs_sb *sbp = &mp->m_sb; 1786 int error; 1787 1788 if (xfs_has_norecovery(mp)) { 1789 xfs_warn(mp, 1790 "ro->rw transition prohibited on norecovery mount"); 1791 return -EINVAL; 1792 } 1793 1794 if (xfs_sb_is_v5(sbp) && 1795 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 1796 xfs_warn(mp, 1797 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 1798 (sbp->sb_features_ro_compat & 1799 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 1800 return -EINVAL; 1801 } 1802 1803 clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1804 1805 /* 1806 * If this is the first remount to writeable state we might have some 1807 * superblock changes to update. 1808 */ 1809 if (mp->m_update_sb) { 1810 error = xfs_sync_sb(mp, false); 1811 if (error) { 1812 xfs_warn(mp, "failed to write sb changes"); 1813 return error; 1814 } 1815 mp->m_update_sb = false; 1816 } 1817 1818 /* 1819 * Fill out the reserve pool if it is empty. Use the stashed value if 1820 * it is non-zero, otherwise go with the default. 1821 */ 1822 xfs_restore_resvblks(mp); 1823 xfs_log_work_queue(mp); 1824 xfs_blockgc_start(mp); 1825 1826 /* Create the per-AG metadata reservation pool .*/ 1827 error = xfs_fs_reserve_ag_blocks(mp); 1828 if (error && error != -ENOSPC) 1829 return error; 1830 1831 /* Re-enable the background inode inactivation worker. */ 1832 xfs_inodegc_start(mp); 1833 1834 return 0; 1835 } 1836 1837 static int 1838 xfs_remount_ro( 1839 struct xfs_mount *mp) 1840 { 1841 struct xfs_icwalk icw = { 1842 .icw_flags = XFS_ICWALK_FLAG_SYNC, 1843 }; 1844 int error; 1845 1846 /* Flush all the dirty data to disk. */ 1847 error = sync_filesystem(mp->m_super); 1848 if (error) 1849 return error; 1850 1851 /* 1852 * Cancel background eofb scanning so it cannot race with the final 1853 * log force+buftarg wait and deadlock the remount. 1854 */ 1855 xfs_blockgc_stop(mp); 1856 1857 /* 1858 * Clear out all remaining COW staging extents and speculative post-EOF 1859 * preallocations so that we don't leave inodes requiring inactivation 1860 * cleanups during reclaim on a read-only mount. We must process every 1861 * cached inode, so this requires a synchronous cache scan. 1862 */ 1863 error = xfs_blockgc_free_space(mp, &icw); 1864 if (error) { 1865 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1866 return error; 1867 } 1868 1869 /* 1870 * Stop the inodegc background worker. xfs_fs_reconfigure already 1871 * flushed all pending inodegc work when it sync'd the filesystem. 1872 * The VFS holds s_umount, so we know that inodes cannot enter 1873 * xfs_fs_destroy_inode during a remount operation. In readonly mode 1874 * we send inodes straight to reclaim, so no inodes will be queued. 1875 */ 1876 xfs_inodegc_stop(mp); 1877 1878 /* Free the per-AG metadata reservation pool. */ 1879 error = xfs_fs_unreserve_ag_blocks(mp); 1880 if (error) { 1881 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1882 return error; 1883 } 1884 1885 /* 1886 * Before we sync the metadata, we need to free up the reserve block 1887 * pool so that the used block count in the superblock on disk is 1888 * correct at the end of the remount. Stash the current* reserve pool 1889 * size so that if we get remounted rw, we can return it to the same 1890 * size. 1891 */ 1892 xfs_save_resvblks(mp); 1893 1894 xfs_log_clean(mp); 1895 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1896 1897 return 0; 1898 } 1899 1900 /* 1901 * Logically we would return an error here to prevent users from believing 1902 * they might have changed mount options using remount which can't be changed. 1903 * 1904 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 1905 * arguments in some cases so we can't blindly reject options, but have to 1906 * check for each specified option if it actually differs from the currently 1907 * set option and only reject it if that's the case. 1908 * 1909 * Until that is implemented we return success for every remount request, and 1910 * silently ignore all options that we can't actually change. 1911 */ 1912 static int 1913 xfs_fs_reconfigure( 1914 struct fs_context *fc) 1915 { 1916 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 1917 struct xfs_mount *new_mp = fc->s_fs_info; 1918 int flags = fc->sb_flags; 1919 int error; 1920 1921 /* version 5 superblocks always support version counters. */ 1922 if (xfs_has_crc(mp)) 1923 fc->sb_flags |= SB_I_VERSION; 1924 1925 error = xfs_fs_validate_params(new_mp); 1926 if (error) 1927 return error; 1928 1929 /* inode32 -> inode64 */ 1930 if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { 1931 mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1932 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1933 } 1934 1935 /* inode64 -> inode32 */ 1936 if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { 1937 mp->m_features |= XFS_FEAT_SMALL_INUMS; 1938 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1939 } 1940 1941 /* ro -> rw */ 1942 if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { 1943 error = xfs_remount_rw(mp); 1944 if (error) 1945 return error; 1946 } 1947 1948 /* rw -> ro */ 1949 if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { 1950 error = xfs_remount_ro(mp); 1951 if (error) 1952 return error; 1953 } 1954 1955 return 0; 1956 } 1957 1958 static void 1959 xfs_fs_free( 1960 struct fs_context *fc) 1961 { 1962 struct xfs_mount *mp = fc->s_fs_info; 1963 1964 /* 1965 * mp is stored in the fs_context when it is initialized. 1966 * mp is transferred to the superblock on a successful mount, 1967 * but if an error occurs before the transfer we have to free 1968 * it here. 1969 */ 1970 if (mp) 1971 xfs_mount_free(mp); 1972 } 1973 1974 static const struct fs_context_operations xfs_context_ops = { 1975 .parse_param = xfs_fs_parse_param, 1976 .get_tree = xfs_fs_get_tree, 1977 .reconfigure = xfs_fs_reconfigure, 1978 .free = xfs_fs_free, 1979 }; 1980 1981 static int xfs_init_fs_context( 1982 struct fs_context *fc) 1983 { 1984 struct xfs_mount *mp; 1985 1986 mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO); 1987 if (!mp) 1988 return -ENOMEM; 1989 1990 spin_lock_init(&mp->m_sb_lock); 1991 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1992 spin_lock_init(&mp->m_perag_lock); 1993 mutex_init(&mp->m_growlock); 1994 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 1995 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1996 mp->m_kobj.kobject.kset = xfs_kset; 1997 /* 1998 * We don't create the finobt per-ag space reservation until after log 1999 * recovery, so we must set this to true so that an ifree transaction 2000 * started during log recovery will not depend on space reservations 2001 * for finobt expansion. 2002 */ 2003 mp->m_finobt_nores = true; 2004 2005 /* 2006 * These can be overridden by the mount option parsing. 2007 */ 2008 mp->m_logbufs = -1; 2009 mp->m_logbsize = -1; 2010 mp->m_allocsize_log = 16; /* 64k */ 2011 2012 /* 2013 * Copy binary VFS mount flags we are interested in. 2014 */ 2015 if (fc->sb_flags & SB_RDONLY) 2016 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 2017 if (fc->sb_flags & SB_DIRSYNC) 2018 mp->m_features |= XFS_FEAT_DIRSYNC; 2019 if (fc->sb_flags & SB_SYNCHRONOUS) 2020 mp->m_features |= XFS_FEAT_WSYNC; 2021 2022 fc->s_fs_info = mp; 2023 fc->ops = &xfs_context_ops; 2024 2025 return 0; 2026 } 2027 2028 static void 2029 xfs_kill_sb( 2030 struct super_block *sb) 2031 { 2032 kill_block_super(sb); 2033 xfs_mount_free(XFS_M(sb)); 2034 } 2035 2036 static struct file_system_type xfs_fs_type = { 2037 .owner = THIS_MODULE, 2038 .name = "xfs", 2039 .init_fs_context = xfs_init_fs_context, 2040 .parameters = xfs_fs_parameters, 2041 .kill_sb = xfs_kill_sb, 2042 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 2043 }; 2044 MODULE_ALIAS_FS("xfs"); 2045 2046 STATIC int __init 2047 xfs_init_caches(void) 2048 { 2049 int error; 2050 2051 xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, 2052 SLAB_HWCACHE_ALIGN | 2053 SLAB_RECLAIM_ACCOUNT | 2054 SLAB_MEM_SPREAD, 2055 NULL); 2056 if (!xfs_buf_cache) 2057 goto out; 2058 2059 xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", 2060 sizeof(struct xlog_ticket), 2061 0, 0, NULL); 2062 if (!xfs_log_ticket_cache) 2063 goto out_destroy_buf_cache; 2064 2065 error = xfs_btree_init_cur_caches(); 2066 if (error) 2067 goto out_destroy_log_ticket_cache; 2068 2069 error = xfs_defer_init_item_caches(); 2070 if (error) 2071 goto out_destroy_btree_cur_cache; 2072 2073 xfs_da_state_cache = kmem_cache_create("xfs_da_state", 2074 sizeof(struct xfs_da_state), 2075 0, 0, NULL); 2076 if (!xfs_da_state_cache) 2077 goto out_destroy_defer_item_cache; 2078 2079 xfs_ifork_cache = kmem_cache_create("xfs_ifork", 2080 sizeof(struct xfs_ifork), 2081 0, 0, NULL); 2082 if (!xfs_ifork_cache) 2083 goto out_destroy_da_state_cache; 2084 2085 xfs_trans_cache = kmem_cache_create("xfs_trans", 2086 sizeof(struct xfs_trans), 2087 0, 0, NULL); 2088 if (!xfs_trans_cache) 2089 goto out_destroy_ifork_cache; 2090 2091 2092 /* 2093 * The size of the cache-allocated buf log item is the maximum 2094 * size possible under XFS. This wastes a little bit of memory, 2095 * but it is much faster. 2096 */ 2097 xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", 2098 sizeof(struct xfs_buf_log_item), 2099 0, 0, NULL); 2100 if (!xfs_buf_item_cache) 2101 goto out_destroy_trans_cache; 2102 2103 xfs_efd_cache = kmem_cache_create("xfs_efd_item", 2104 xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), 2105 0, 0, NULL); 2106 if (!xfs_efd_cache) 2107 goto out_destroy_buf_item_cache; 2108 2109 xfs_efi_cache = kmem_cache_create("xfs_efi_item", 2110 xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), 2111 0, 0, NULL); 2112 if (!xfs_efi_cache) 2113 goto out_destroy_efd_cache; 2114 2115 xfs_inode_cache = kmem_cache_create("xfs_inode", 2116 sizeof(struct xfs_inode), 0, 2117 (SLAB_HWCACHE_ALIGN | 2118 SLAB_RECLAIM_ACCOUNT | 2119 SLAB_MEM_SPREAD | SLAB_ACCOUNT), 2120 xfs_fs_inode_init_once); 2121 if (!xfs_inode_cache) 2122 goto out_destroy_efi_cache; 2123 2124 xfs_ili_cache = kmem_cache_create("xfs_ili", 2125 sizeof(struct xfs_inode_log_item), 0, 2126 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, 2127 NULL); 2128 if (!xfs_ili_cache) 2129 goto out_destroy_inode_cache; 2130 2131 xfs_icreate_cache = kmem_cache_create("xfs_icr", 2132 sizeof(struct xfs_icreate_item), 2133 0, 0, NULL); 2134 if (!xfs_icreate_cache) 2135 goto out_destroy_ili_cache; 2136 2137 xfs_rud_cache = kmem_cache_create("xfs_rud_item", 2138 sizeof(struct xfs_rud_log_item), 2139 0, 0, NULL); 2140 if (!xfs_rud_cache) 2141 goto out_destroy_icreate_cache; 2142 2143 xfs_rui_cache = kmem_cache_create("xfs_rui_item", 2144 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2145 0, 0, NULL); 2146 if (!xfs_rui_cache) 2147 goto out_destroy_rud_cache; 2148 2149 xfs_cud_cache = kmem_cache_create("xfs_cud_item", 2150 sizeof(struct xfs_cud_log_item), 2151 0, 0, NULL); 2152 if (!xfs_cud_cache) 2153 goto out_destroy_rui_cache; 2154 2155 xfs_cui_cache = kmem_cache_create("xfs_cui_item", 2156 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2157 0, 0, NULL); 2158 if (!xfs_cui_cache) 2159 goto out_destroy_cud_cache; 2160 2161 xfs_bud_cache = kmem_cache_create("xfs_bud_item", 2162 sizeof(struct xfs_bud_log_item), 2163 0, 0, NULL); 2164 if (!xfs_bud_cache) 2165 goto out_destroy_cui_cache; 2166 2167 xfs_bui_cache = kmem_cache_create("xfs_bui_item", 2168 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2169 0, 0, NULL); 2170 if (!xfs_bui_cache) 2171 goto out_destroy_bud_cache; 2172 2173 xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", 2174 sizeof(struct xfs_attrd_log_item), 2175 0, 0, NULL); 2176 if (!xfs_attrd_cache) 2177 goto out_destroy_bui_cache; 2178 2179 xfs_attri_cache = kmem_cache_create("xfs_attri_item", 2180 sizeof(struct xfs_attri_log_item), 2181 0, 0, NULL); 2182 if (!xfs_attri_cache) 2183 goto out_destroy_attrd_cache; 2184 2185 xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", 2186 sizeof(struct xfs_iunlink_item), 2187 0, 0, NULL); 2188 if (!xfs_iunlink_cache) 2189 goto out_destroy_attri_cache; 2190 2191 return 0; 2192 2193 out_destroy_attri_cache: 2194 kmem_cache_destroy(xfs_attri_cache); 2195 out_destroy_attrd_cache: 2196 kmem_cache_destroy(xfs_attrd_cache); 2197 out_destroy_bui_cache: 2198 kmem_cache_destroy(xfs_bui_cache); 2199 out_destroy_bud_cache: 2200 kmem_cache_destroy(xfs_bud_cache); 2201 out_destroy_cui_cache: 2202 kmem_cache_destroy(xfs_cui_cache); 2203 out_destroy_cud_cache: 2204 kmem_cache_destroy(xfs_cud_cache); 2205 out_destroy_rui_cache: 2206 kmem_cache_destroy(xfs_rui_cache); 2207 out_destroy_rud_cache: 2208 kmem_cache_destroy(xfs_rud_cache); 2209 out_destroy_icreate_cache: 2210 kmem_cache_destroy(xfs_icreate_cache); 2211 out_destroy_ili_cache: 2212 kmem_cache_destroy(xfs_ili_cache); 2213 out_destroy_inode_cache: 2214 kmem_cache_destroy(xfs_inode_cache); 2215 out_destroy_efi_cache: 2216 kmem_cache_destroy(xfs_efi_cache); 2217 out_destroy_efd_cache: 2218 kmem_cache_destroy(xfs_efd_cache); 2219 out_destroy_buf_item_cache: 2220 kmem_cache_destroy(xfs_buf_item_cache); 2221 out_destroy_trans_cache: 2222 kmem_cache_destroy(xfs_trans_cache); 2223 out_destroy_ifork_cache: 2224 kmem_cache_destroy(xfs_ifork_cache); 2225 out_destroy_da_state_cache: 2226 kmem_cache_destroy(xfs_da_state_cache); 2227 out_destroy_defer_item_cache: 2228 xfs_defer_destroy_item_caches(); 2229 out_destroy_btree_cur_cache: 2230 xfs_btree_destroy_cur_caches(); 2231 out_destroy_log_ticket_cache: 2232 kmem_cache_destroy(xfs_log_ticket_cache); 2233 out_destroy_buf_cache: 2234 kmem_cache_destroy(xfs_buf_cache); 2235 out: 2236 return -ENOMEM; 2237 } 2238 2239 STATIC void 2240 xfs_destroy_caches(void) 2241 { 2242 /* 2243 * Make sure all delayed rcu free are flushed before we 2244 * destroy caches. 2245 */ 2246 rcu_barrier(); 2247 kmem_cache_destroy(xfs_iunlink_cache); 2248 kmem_cache_destroy(xfs_attri_cache); 2249 kmem_cache_destroy(xfs_attrd_cache); 2250 kmem_cache_destroy(xfs_bui_cache); 2251 kmem_cache_destroy(xfs_bud_cache); 2252 kmem_cache_destroy(xfs_cui_cache); 2253 kmem_cache_destroy(xfs_cud_cache); 2254 kmem_cache_destroy(xfs_rui_cache); 2255 kmem_cache_destroy(xfs_rud_cache); 2256 kmem_cache_destroy(xfs_icreate_cache); 2257 kmem_cache_destroy(xfs_ili_cache); 2258 kmem_cache_destroy(xfs_inode_cache); 2259 kmem_cache_destroy(xfs_efi_cache); 2260 kmem_cache_destroy(xfs_efd_cache); 2261 kmem_cache_destroy(xfs_buf_item_cache); 2262 kmem_cache_destroy(xfs_trans_cache); 2263 kmem_cache_destroy(xfs_ifork_cache); 2264 kmem_cache_destroy(xfs_da_state_cache); 2265 xfs_defer_destroy_item_caches(); 2266 xfs_btree_destroy_cur_caches(); 2267 kmem_cache_destroy(xfs_log_ticket_cache); 2268 kmem_cache_destroy(xfs_buf_cache); 2269 } 2270 2271 STATIC int __init 2272 xfs_init_workqueues(void) 2273 { 2274 /* 2275 * The allocation workqueue can be used in memory reclaim situations 2276 * (writepage path), and parallelism is only limited by the number of 2277 * AGs in all the filesystems mounted. Hence use the default large 2278 * max_active value for this workqueue. 2279 */ 2280 xfs_alloc_wq = alloc_workqueue("xfsalloc", 2281 XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); 2282 if (!xfs_alloc_wq) 2283 return -ENOMEM; 2284 2285 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2286 0); 2287 if (!xfs_discard_wq) 2288 goto out_free_alloc_wq; 2289 2290 return 0; 2291 out_free_alloc_wq: 2292 destroy_workqueue(xfs_alloc_wq); 2293 return -ENOMEM; 2294 } 2295 2296 STATIC void 2297 xfs_destroy_workqueues(void) 2298 { 2299 destroy_workqueue(xfs_discard_wq); 2300 destroy_workqueue(xfs_alloc_wq); 2301 } 2302 2303 STATIC int __init 2304 init_xfs_fs(void) 2305 { 2306 int error; 2307 2308 xfs_check_ondisk_structs(); 2309 2310 error = xfs_dahash_test(); 2311 if (error) 2312 return error; 2313 2314 printk(KERN_INFO XFS_VERSION_STRING " with " 2315 XFS_BUILD_OPTIONS " enabled\n"); 2316 2317 xfs_dir_startup(); 2318 2319 error = xfs_init_caches(); 2320 if (error) 2321 goto out; 2322 2323 error = xfs_init_workqueues(); 2324 if (error) 2325 goto out_destroy_caches; 2326 2327 error = xfs_mru_cache_init(); 2328 if (error) 2329 goto out_destroy_wq; 2330 2331 error = xfs_init_procfs(); 2332 if (error) 2333 goto out_mru_cache_uninit; 2334 2335 error = xfs_sysctl_register(); 2336 if (error) 2337 goto out_cleanup_procfs; 2338 2339 xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL); 2340 2341 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2342 if (!xfs_kset) { 2343 error = -ENOMEM; 2344 goto out_debugfs_unregister; 2345 } 2346 2347 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2348 2349 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2350 if (!xfsstats.xs_stats) { 2351 error = -ENOMEM; 2352 goto out_kset_unregister; 2353 } 2354 2355 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2356 "stats"); 2357 if (error) 2358 goto out_free_stats; 2359 2360 error = xchk_global_stats_setup(xfs_debugfs); 2361 if (error) 2362 goto out_remove_stats_kobj; 2363 2364 #ifdef DEBUG 2365 xfs_dbg_kobj.kobject.kset = xfs_kset; 2366 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2367 if (error) 2368 goto out_remove_scrub_stats; 2369 #endif 2370 2371 error = xfs_qm_init(); 2372 if (error) 2373 goto out_remove_dbg_kobj; 2374 2375 error = register_filesystem(&xfs_fs_type); 2376 if (error) 2377 goto out_qm_exit; 2378 return 0; 2379 2380 out_qm_exit: 2381 xfs_qm_exit(); 2382 out_remove_dbg_kobj: 2383 #ifdef DEBUG 2384 xfs_sysfs_del(&xfs_dbg_kobj); 2385 out_remove_scrub_stats: 2386 #endif 2387 xchk_global_stats_teardown(); 2388 out_remove_stats_kobj: 2389 xfs_sysfs_del(&xfsstats.xs_kobj); 2390 out_free_stats: 2391 free_percpu(xfsstats.xs_stats); 2392 out_kset_unregister: 2393 kset_unregister(xfs_kset); 2394 out_debugfs_unregister: 2395 debugfs_remove(xfs_debugfs); 2396 xfs_sysctl_unregister(); 2397 out_cleanup_procfs: 2398 xfs_cleanup_procfs(); 2399 out_mru_cache_uninit: 2400 xfs_mru_cache_uninit(); 2401 out_destroy_wq: 2402 xfs_destroy_workqueues(); 2403 out_destroy_caches: 2404 xfs_destroy_caches(); 2405 out: 2406 return error; 2407 } 2408 2409 STATIC void __exit 2410 exit_xfs_fs(void) 2411 { 2412 xfs_qm_exit(); 2413 unregister_filesystem(&xfs_fs_type); 2414 #ifdef DEBUG 2415 xfs_sysfs_del(&xfs_dbg_kobj); 2416 #endif 2417 xchk_global_stats_teardown(); 2418 xfs_sysfs_del(&xfsstats.xs_kobj); 2419 free_percpu(xfsstats.xs_stats); 2420 kset_unregister(xfs_kset); 2421 debugfs_remove(xfs_debugfs); 2422 xfs_sysctl_unregister(); 2423 xfs_cleanup_procfs(); 2424 xfs_mru_cache_uninit(); 2425 xfs_destroy_workqueues(); 2426 xfs_destroy_caches(); 2427 xfs_uuid_table_free(); 2428 } 2429 2430 module_init(init_xfs_fs); 2431 module_exit(exit_xfs_fs); 2432 2433 MODULE_AUTHOR("Silicon Graphics, Inc."); 2434 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2435 MODULE_LICENSE("GPL"); 2436