1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 #include "xfs_defer.h" 41 #include "xfs_attr_item.h" 42 #include "xfs_xattr.h" 43 #include "xfs_iunlink_item.h" 44 #include "xfs_dahash_test.h" 45 #include "xfs_rtbitmap.h" 46 #include "scrub/stats.h" 47 #include "scrub/rcbag_btree.h" 48 49 #include <linux/magic.h> 50 #include <linux/fs_context.h> 51 #include <linux/fs_parser.h> 52 53 static const struct super_operations xfs_super_operations; 54 55 static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */ 56 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 57 #ifdef DEBUG 58 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 59 #endif 60 61 enum xfs_dax_mode { 62 XFS_DAX_INODE = 0, 63 XFS_DAX_ALWAYS = 1, 64 XFS_DAX_NEVER = 2, 65 }; 66 67 static void 68 xfs_mount_set_dax_mode( 69 struct xfs_mount *mp, 70 enum xfs_dax_mode mode) 71 { 72 switch (mode) { 73 case XFS_DAX_INODE: 74 mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); 75 break; 76 case XFS_DAX_ALWAYS: 77 mp->m_features |= XFS_FEAT_DAX_ALWAYS; 78 mp->m_features &= ~XFS_FEAT_DAX_NEVER; 79 break; 80 case XFS_DAX_NEVER: 81 mp->m_features |= XFS_FEAT_DAX_NEVER; 82 mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; 83 break; 84 } 85 } 86 87 static const struct constant_table dax_param_enums[] = { 88 {"inode", XFS_DAX_INODE }, 89 {"always", XFS_DAX_ALWAYS }, 90 {"never", XFS_DAX_NEVER }, 91 {} 92 }; 93 94 /* 95 * Table driven mount option parser. 96 */ 97 enum { 98 Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 99 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 100 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 101 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, 102 Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, 103 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 104 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 105 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 106 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, 107 }; 108 109 static const struct fs_parameter_spec xfs_fs_parameters[] = { 110 fsparam_u32("logbufs", Opt_logbufs), 111 fsparam_string("logbsize", Opt_logbsize), 112 fsparam_string("logdev", Opt_logdev), 113 fsparam_string("rtdev", Opt_rtdev), 114 fsparam_flag("wsync", Opt_wsync), 115 fsparam_flag("noalign", Opt_noalign), 116 fsparam_flag("swalloc", Opt_swalloc), 117 fsparam_u32("sunit", Opt_sunit), 118 fsparam_u32("swidth", Opt_swidth), 119 fsparam_flag("nouuid", Opt_nouuid), 120 fsparam_flag("grpid", Opt_grpid), 121 fsparam_flag("nogrpid", Opt_nogrpid), 122 fsparam_flag("bsdgroups", Opt_bsdgroups), 123 fsparam_flag("sysvgroups", Opt_sysvgroups), 124 fsparam_string("allocsize", Opt_allocsize), 125 fsparam_flag("norecovery", Opt_norecovery), 126 fsparam_flag("inode64", Opt_inode64), 127 fsparam_flag("inode32", Opt_inode32), 128 fsparam_flag("ikeep", Opt_ikeep), 129 fsparam_flag("noikeep", Opt_noikeep), 130 fsparam_flag("largeio", Opt_largeio), 131 fsparam_flag("nolargeio", Opt_nolargeio), 132 fsparam_flag("attr2", Opt_attr2), 133 fsparam_flag("noattr2", Opt_noattr2), 134 fsparam_flag("filestreams", Opt_filestreams), 135 fsparam_flag("quota", Opt_quota), 136 fsparam_flag("noquota", Opt_noquota), 137 fsparam_flag("usrquota", Opt_usrquota), 138 fsparam_flag("grpquota", Opt_grpquota), 139 fsparam_flag("prjquota", Opt_prjquota), 140 fsparam_flag("uquota", Opt_uquota), 141 fsparam_flag("gquota", Opt_gquota), 142 fsparam_flag("pquota", Opt_pquota), 143 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 144 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 145 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 146 fsparam_flag("qnoenforce", Opt_qnoenforce), 147 fsparam_flag("discard", Opt_discard), 148 fsparam_flag("nodiscard", Opt_nodiscard), 149 fsparam_flag("dax", Opt_dax), 150 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 151 {} 152 }; 153 154 struct proc_xfs_info { 155 uint64_t flag; 156 char *str; 157 }; 158 159 static int 160 xfs_fs_show_options( 161 struct seq_file *m, 162 struct dentry *root) 163 { 164 static struct proc_xfs_info xfs_info_set[] = { 165 /* the few simple ones we can get from the mount struct */ 166 { XFS_FEAT_IKEEP, ",ikeep" }, 167 { XFS_FEAT_WSYNC, ",wsync" }, 168 { XFS_FEAT_NOALIGN, ",noalign" }, 169 { XFS_FEAT_SWALLOC, ",swalloc" }, 170 { XFS_FEAT_NOUUID, ",nouuid" }, 171 { XFS_FEAT_NORECOVERY, ",norecovery" }, 172 { XFS_FEAT_ATTR2, ",attr2" }, 173 { XFS_FEAT_FILESTREAMS, ",filestreams" }, 174 { XFS_FEAT_GRPID, ",grpid" }, 175 { XFS_FEAT_DISCARD, ",discard" }, 176 { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, 177 { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, 178 { XFS_FEAT_DAX_NEVER, ",dax=never" }, 179 { 0, NULL } 180 }; 181 struct xfs_mount *mp = XFS_M(root->d_sb); 182 struct proc_xfs_info *xfs_infop; 183 184 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 185 if (mp->m_features & xfs_infop->flag) 186 seq_puts(m, xfs_infop->str); 187 } 188 189 seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); 190 191 if (xfs_has_allocsize(mp)) 192 seq_printf(m, ",allocsize=%dk", 193 (1 << mp->m_allocsize_log) >> 10); 194 195 if (mp->m_logbufs > 0) 196 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 197 if (mp->m_logbsize > 0) 198 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 199 200 if (mp->m_logname) 201 seq_show_option(m, "logdev", mp->m_logname); 202 if (mp->m_rtname) 203 seq_show_option(m, "rtdev", mp->m_rtname); 204 205 if (mp->m_dalign > 0) 206 seq_printf(m, ",sunit=%d", 207 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 208 if (mp->m_swidth > 0) 209 seq_printf(m, ",swidth=%d", 210 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 211 212 if (mp->m_qflags & XFS_UQUOTA_ENFD) 213 seq_puts(m, ",usrquota"); 214 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 215 seq_puts(m, ",uqnoenforce"); 216 217 if (mp->m_qflags & XFS_PQUOTA_ENFD) 218 seq_puts(m, ",prjquota"); 219 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 220 seq_puts(m, ",pqnoenforce"); 221 222 if (mp->m_qflags & XFS_GQUOTA_ENFD) 223 seq_puts(m, ",grpquota"); 224 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 225 seq_puts(m, ",gqnoenforce"); 226 227 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 228 seq_puts(m, ",noquota"); 229 230 return 0; 231 } 232 233 static bool 234 xfs_set_inode_alloc_perag( 235 struct xfs_perag *pag, 236 xfs_ino_t ino, 237 xfs_agnumber_t max_metadata) 238 { 239 if (!xfs_is_inode32(pag->pag_mount)) { 240 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 241 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 242 return false; 243 } 244 245 if (ino > XFS_MAXINUMBER_32) { 246 clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 247 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 248 return false; 249 } 250 251 set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); 252 if (pag->pag_agno < max_metadata) 253 set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 254 else 255 clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); 256 return true; 257 } 258 259 /* 260 * Set parameters for inode allocation heuristics, taking into account 261 * filesystem size and inode32/inode64 mount options; i.e. specifically 262 * whether or not XFS_FEAT_SMALL_INUMS is set. 263 * 264 * Inode allocation patterns are altered only if inode32 is requested 265 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. 266 * If altered, XFS_OPSTATE_INODE32 is set as well. 267 * 268 * An agcount independent of that in the mount structure is provided 269 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 270 * to the potentially higher ag count. 271 * 272 * Returns the maximum AG index which may contain inodes. 273 */ 274 xfs_agnumber_t 275 xfs_set_inode_alloc( 276 struct xfs_mount *mp, 277 xfs_agnumber_t agcount) 278 { 279 xfs_agnumber_t index; 280 xfs_agnumber_t maxagi = 0; 281 xfs_sb_t *sbp = &mp->m_sb; 282 xfs_agnumber_t max_metadata; 283 xfs_agino_t agino; 284 xfs_ino_t ino; 285 286 /* 287 * Calculate how much should be reserved for inodes to meet 288 * the max inode percentage. Used only for inode32. 289 */ 290 if (M_IGEO(mp)->maxicount) { 291 uint64_t icount; 292 293 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 294 do_div(icount, 100); 295 icount += sbp->sb_agblocks - 1; 296 do_div(icount, sbp->sb_agblocks); 297 max_metadata = icount; 298 } else { 299 max_metadata = agcount; 300 } 301 302 /* Get the last possible inode in the filesystem */ 303 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 304 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 305 306 /* 307 * If user asked for no more than 32-bit inodes, and the fs is 308 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter 309 * the allocator to accommodate the request. 310 */ 311 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) 312 set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 313 else 314 clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 315 316 for (index = 0; index < agcount; index++) { 317 struct xfs_perag *pag; 318 319 ino = XFS_AGINO_TO_INO(mp, index, agino); 320 321 pag = xfs_perag_get(mp, index); 322 if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) 323 maxagi++; 324 xfs_perag_put(pag); 325 } 326 327 return xfs_is_inode32(mp) ? maxagi : agcount; 328 } 329 330 static int 331 xfs_setup_dax_always( 332 struct xfs_mount *mp) 333 { 334 if (!mp->m_ddev_targp->bt_daxdev && 335 (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { 336 xfs_alert(mp, 337 "DAX unsupported by block device. Turning off DAX."); 338 goto disable_dax; 339 } 340 341 if (mp->m_super->s_blocksize != PAGE_SIZE) { 342 xfs_alert(mp, 343 "DAX not supported for blocksize. Turning off DAX."); 344 goto disable_dax; 345 } 346 347 if (xfs_has_reflink(mp) && 348 bdev_is_partition(mp->m_ddev_targp->bt_bdev)) { 349 xfs_alert(mp, 350 "DAX and reflink cannot work with multi-partitions!"); 351 return -EINVAL; 352 } 353 354 return 0; 355 356 disable_dax: 357 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 358 return 0; 359 } 360 361 STATIC int 362 xfs_blkdev_get( 363 xfs_mount_t *mp, 364 const char *name, 365 struct file **bdev_filep) 366 { 367 int error = 0; 368 369 *bdev_filep = bdev_file_open_by_path(name, 370 BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES, 371 mp->m_super, &fs_holder_ops); 372 if (IS_ERR(*bdev_filep)) { 373 error = PTR_ERR(*bdev_filep); 374 *bdev_filep = NULL; 375 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 376 } 377 378 return error; 379 } 380 381 STATIC void 382 xfs_shutdown_devices( 383 struct xfs_mount *mp) 384 { 385 /* 386 * Udev is triggered whenever anyone closes a block device or unmounts 387 * a file systemm on a block device. 388 * The default udev rules invoke blkid to read the fs super and create 389 * symlinks to the bdev under /dev/disk. For this, it uses buffered 390 * reads through the page cache. 391 * 392 * xfs_db also uses buffered reads to examine metadata. There is no 393 * coordination between xfs_db and udev, which means that they can run 394 * concurrently. Note there is no coordination between the kernel and 395 * blkid either. 396 * 397 * On a system with 64k pages, the page cache can cache the superblock 398 * and the root inode (and hence the root directory) with the same 64k 399 * page. If udev spawns blkid after the mkfs and the system is busy 400 * enough that it is still running when xfs_db starts up, they'll both 401 * read from the same page in the pagecache. 402 * 403 * The unmount writes updated inode metadata to disk directly. The XFS 404 * buffer cache does not use the bdev pagecache, so it needs to 405 * invalidate that pagecache on unmount. If the above scenario occurs, 406 * the pagecache no longer reflects what's on disk, xfs_db reads the 407 * stale metadata, and fails to find /a. Most of the time this succeeds 408 * because closing a bdev invalidates the page cache, but when processes 409 * race, everyone loses. 410 */ 411 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 412 blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); 413 invalidate_bdev(mp->m_logdev_targp->bt_bdev); 414 } 415 if (mp->m_rtdev_targp) { 416 blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); 417 invalidate_bdev(mp->m_rtdev_targp->bt_bdev); 418 } 419 blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); 420 invalidate_bdev(mp->m_ddev_targp->bt_bdev); 421 } 422 423 /* 424 * The file system configurations are: 425 * (1) device (partition) with data and internal log 426 * (2) logical volume with data and log subvolumes. 427 * (3) logical volume with data, log, and realtime subvolumes. 428 * 429 * We only have to handle opening the log and realtime volumes here if 430 * they are present. The data subvolume has already been opened by 431 * get_sb_bdev() and is stored in sb->s_bdev. 432 */ 433 STATIC int 434 xfs_open_devices( 435 struct xfs_mount *mp) 436 { 437 struct super_block *sb = mp->m_super; 438 struct block_device *ddev = sb->s_bdev; 439 struct file *logdev_file = NULL, *rtdev_file = NULL; 440 int error; 441 442 /* 443 * Open real time and log devices - order is important. 444 */ 445 if (mp->m_logname) { 446 error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file); 447 if (error) 448 return error; 449 } 450 451 if (mp->m_rtname) { 452 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file); 453 if (error) 454 goto out_close_logdev; 455 456 if (file_bdev(rtdev_file) == ddev || 457 (logdev_file && 458 file_bdev(rtdev_file) == file_bdev(logdev_file))) { 459 xfs_warn(mp, 460 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 461 error = -EINVAL; 462 goto out_close_rtdev; 463 } 464 } 465 466 /* 467 * Setup xfs_mount buffer target pointers 468 */ 469 error = -ENOMEM; 470 mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file); 471 if (!mp->m_ddev_targp) 472 goto out_close_rtdev; 473 474 if (rtdev_file) { 475 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file); 476 if (!mp->m_rtdev_targp) 477 goto out_free_ddev_targ; 478 } 479 480 if (logdev_file && file_bdev(logdev_file) != ddev) { 481 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file); 482 if (!mp->m_logdev_targp) 483 goto out_free_rtdev_targ; 484 } else { 485 mp->m_logdev_targp = mp->m_ddev_targp; 486 /* Handle won't be used, drop it */ 487 if (logdev_file) 488 fput(logdev_file); 489 } 490 491 return 0; 492 493 out_free_rtdev_targ: 494 if (mp->m_rtdev_targp) 495 xfs_free_buftarg(mp->m_rtdev_targp); 496 out_free_ddev_targ: 497 xfs_free_buftarg(mp->m_ddev_targp); 498 out_close_rtdev: 499 if (rtdev_file) 500 fput(rtdev_file); 501 out_close_logdev: 502 if (logdev_file) 503 fput(logdev_file); 504 return error; 505 } 506 507 /* 508 * Setup xfs_mount buffer target pointers based on superblock 509 */ 510 STATIC int 511 xfs_setup_devices( 512 struct xfs_mount *mp) 513 { 514 int error; 515 516 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); 517 if (error) 518 return error; 519 520 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 521 unsigned int log_sector_size = BBSIZE; 522 523 if (xfs_has_sector(mp)) 524 log_sector_size = mp->m_sb.sb_logsectsize; 525 error = xfs_setsize_buftarg(mp->m_logdev_targp, 526 log_sector_size); 527 if (error) 528 return error; 529 } 530 if (mp->m_rtdev_targp) { 531 error = xfs_setsize_buftarg(mp->m_rtdev_targp, 532 mp->m_sb.sb_sectsize); 533 if (error) 534 return error; 535 } 536 537 return 0; 538 } 539 540 STATIC int 541 xfs_init_mount_workqueues( 542 struct xfs_mount *mp) 543 { 544 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 545 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 546 1, mp->m_super->s_id); 547 if (!mp->m_buf_workqueue) 548 goto out; 549 550 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 551 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 552 0, mp->m_super->s_id); 553 if (!mp->m_unwritten_workqueue) 554 goto out_destroy_buf; 555 556 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 557 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 558 0, mp->m_super->s_id); 559 if (!mp->m_reclaim_workqueue) 560 goto out_destroy_unwritten; 561 562 mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", 563 XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 564 0, mp->m_super->s_id); 565 if (!mp->m_blockgc_wq) 566 goto out_destroy_reclaim; 567 568 mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", 569 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 570 1, mp->m_super->s_id); 571 if (!mp->m_inodegc_wq) 572 goto out_destroy_blockgc; 573 574 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 575 XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); 576 if (!mp->m_sync_workqueue) 577 goto out_destroy_inodegc; 578 579 return 0; 580 581 out_destroy_inodegc: 582 destroy_workqueue(mp->m_inodegc_wq); 583 out_destroy_blockgc: 584 destroy_workqueue(mp->m_blockgc_wq); 585 out_destroy_reclaim: 586 destroy_workqueue(mp->m_reclaim_workqueue); 587 out_destroy_unwritten: 588 destroy_workqueue(mp->m_unwritten_workqueue); 589 out_destroy_buf: 590 destroy_workqueue(mp->m_buf_workqueue); 591 out: 592 return -ENOMEM; 593 } 594 595 STATIC void 596 xfs_destroy_mount_workqueues( 597 struct xfs_mount *mp) 598 { 599 destroy_workqueue(mp->m_sync_workqueue); 600 destroy_workqueue(mp->m_blockgc_wq); 601 destroy_workqueue(mp->m_inodegc_wq); 602 destroy_workqueue(mp->m_reclaim_workqueue); 603 destroy_workqueue(mp->m_unwritten_workqueue); 604 destroy_workqueue(mp->m_buf_workqueue); 605 } 606 607 static void 608 xfs_flush_inodes_worker( 609 struct work_struct *work) 610 { 611 struct xfs_mount *mp = container_of(work, struct xfs_mount, 612 m_flush_inodes_work); 613 struct super_block *sb = mp->m_super; 614 615 if (down_read_trylock(&sb->s_umount)) { 616 sync_inodes_sb(sb); 617 up_read(&sb->s_umount); 618 } 619 } 620 621 /* 622 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 623 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 624 * for IO to complete so that we effectively throttle multiple callers to the 625 * rate at which IO is completing. 626 */ 627 void 628 xfs_flush_inodes( 629 struct xfs_mount *mp) 630 { 631 /* 632 * If flush_work() returns true then that means we waited for a flush 633 * which was already in progress. Don't bother running another scan. 634 */ 635 if (flush_work(&mp->m_flush_inodes_work)) 636 return; 637 638 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 639 flush_work(&mp->m_flush_inodes_work); 640 } 641 642 /* Catch misguided souls that try to use this interface on XFS */ 643 STATIC struct inode * 644 xfs_fs_alloc_inode( 645 struct super_block *sb) 646 { 647 BUG(); 648 return NULL; 649 } 650 651 /* 652 * Now that the generic code is guaranteed not to be accessing 653 * the linux inode, we can inactivate and reclaim the inode. 654 */ 655 STATIC void 656 xfs_fs_destroy_inode( 657 struct inode *inode) 658 { 659 struct xfs_inode *ip = XFS_I(inode); 660 661 trace_xfs_destroy_inode(ip); 662 663 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 664 XFS_STATS_INC(ip->i_mount, vn_rele); 665 XFS_STATS_INC(ip->i_mount, vn_remove); 666 xfs_inode_mark_reclaimable(ip); 667 } 668 669 static void 670 xfs_fs_dirty_inode( 671 struct inode *inode, 672 int flags) 673 { 674 struct xfs_inode *ip = XFS_I(inode); 675 struct xfs_mount *mp = ip->i_mount; 676 struct xfs_trans *tp; 677 678 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 679 return; 680 681 /* 682 * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC) 683 * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed 684 * in flags possibly together with I_DIRTY_SYNC. 685 */ 686 if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME)) 687 return; 688 689 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) 690 return; 691 xfs_ilock(ip, XFS_ILOCK_EXCL); 692 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 693 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 694 xfs_trans_commit(tp); 695 } 696 697 /* 698 * Slab object creation initialisation for the XFS inode. 699 * This covers only the idempotent fields in the XFS inode; 700 * all other fields need to be initialised on allocation 701 * from the slab. This avoids the need to repeatedly initialise 702 * fields in the xfs inode that left in the initialise state 703 * when freeing the inode. 704 */ 705 STATIC void 706 xfs_fs_inode_init_once( 707 void *inode) 708 { 709 struct xfs_inode *ip = inode; 710 711 memset(ip, 0, sizeof(struct xfs_inode)); 712 713 /* vfs inode */ 714 inode_init_once(VFS_I(ip)); 715 716 /* xfs inode */ 717 atomic_set(&ip->i_pincount, 0); 718 spin_lock_init(&ip->i_flags_lock); 719 init_rwsem(&ip->i_lock); 720 } 721 722 /* 723 * We do an unlocked check for XFS_IDONTCACHE here because we are already 724 * serialised against cache hits here via the inode->i_lock and igrab() in 725 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 726 * racing with us, and it avoids needing to grab a spinlock here for every inode 727 * we drop the final reference on. 728 */ 729 STATIC int 730 xfs_fs_drop_inode( 731 struct inode *inode) 732 { 733 struct xfs_inode *ip = XFS_I(inode); 734 735 /* 736 * If this unlinked inode is in the middle of recovery, don't 737 * drop the inode just yet; log recovery will take care of 738 * that. See the comment for this inode flag. 739 */ 740 if (ip->i_flags & XFS_IRECOVERY) { 741 ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); 742 return 0; 743 } 744 745 return generic_drop_inode(inode); 746 } 747 748 static void 749 xfs_mount_free( 750 struct xfs_mount *mp) 751 { 752 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 753 xfs_free_buftarg(mp->m_logdev_targp); 754 if (mp->m_rtdev_targp) 755 xfs_free_buftarg(mp->m_rtdev_targp); 756 if (mp->m_ddev_targp) 757 xfs_free_buftarg(mp->m_ddev_targp); 758 759 debugfs_remove(mp->m_debugfs); 760 kfree(mp->m_rtname); 761 kfree(mp->m_logname); 762 kfree(mp); 763 } 764 765 STATIC int 766 xfs_fs_sync_fs( 767 struct super_block *sb, 768 int wait) 769 { 770 struct xfs_mount *mp = XFS_M(sb); 771 int error; 772 773 trace_xfs_fs_sync_fs(mp, __return_address); 774 775 /* 776 * Doing anything during the async pass would be counterproductive. 777 */ 778 if (!wait) 779 return 0; 780 781 error = xfs_log_force(mp, XFS_LOG_SYNC); 782 if (error) 783 return error; 784 785 if (laptop_mode) { 786 /* 787 * The disk must be active because we're syncing. 788 * We schedule log work now (now that the disk is 789 * active) instead of later (when it might not be). 790 */ 791 flush_delayed_work(&mp->m_log->l_work); 792 } 793 794 /* 795 * If we are called with page faults frozen out, it means we are about 796 * to freeze the transaction subsystem. Take the opportunity to shut 797 * down inodegc because once SB_FREEZE_FS is set it's too late to 798 * prevent inactivation races with freeze. The fs doesn't get called 799 * again by the freezing process until after SB_FREEZE_FS has been set, 800 * so it's now or never. Same logic applies to speculative allocation 801 * garbage collection. 802 * 803 * We don't care if this is a normal syncfs call that does this or 804 * freeze that does this - we can run this multiple times without issue 805 * and we won't race with a restart because a restart can only occur 806 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. 807 */ 808 if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { 809 xfs_inodegc_stop(mp); 810 xfs_blockgc_stop(mp); 811 } 812 813 return 0; 814 } 815 816 STATIC int 817 xfs_fs_statfs( 818 struct dentry *dentry, 819 struct kstatfs *statp) 820 { 821 struct xfs_mount *mp = XFS_M(dentry->d_sb); 822 xfs_sb_t *sbp = &mp->m_sb; 823 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 824 uint64_t fakeinos, id; 825 uint64_t icount; 826 uint64_t ifree; 827 uint64_t fdblocks; 828 xfs_extlen_t lsize; 829 int64_t ffree; 830 831 /* 832 * Expedite background inodegc but don't wait. We do not want to block 833 * here waiting hours for a billion extent file to be truncated. 834 */ 835 xfs_inodegc_push(mp); 836 837 statp->f_type = XFS_SUPER_MAGIC; 838 statp->f_namelen = MAXNAMELEN - 1; 839 840 id = huge_encode_dev(mp->m_ddev_targp->bt_dev); 841 statp->f_fsid = u64_to_fsid(id); 842 843 icount = percpu_counter_sum(&mp->m_icount); 844 ifree = percpu_counter_sum(&mp->m_ifree); 845 fdblocks = percpu_counter_sum(&mp->m_fdblocks); 846 847 spin_lock(&mp->m_sb_lock); 848 statp->f_bsize = sbp->sb_blocksize; 849 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 850 statp->f_blocks = sbp->sb_dblocks - lsize; 851 spin_unlock(&mp->m_sb_lock); 852 853 /* make sure statp->f_bfree does not underflow */ 854 statp->f_bfree = max_t(int64_t, 0, 855 fdblocks - xfs_fdblocks_unavailable(mp)); 856 statp->f_bavail = statp->f_bfree; 857 858 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 859 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 860 if (M_IGEO(mp)->maxicount) 861 statp->f_files = min_t(typeof(statp->f_files), 862 statp->f_files, 863 M_IGEO(mp)->maxicount); 864 865 /* If sb_icount overshot maxicount, report actual allocation */ 866 statp->f_files = max_t(typeof(statp->f_files), 867 statp->f_files, 868 sbp->sb_icount); 869 870 /* make sure statp->f_ffree does not underflow */ 871 ffree = statp->f_files - (icount - ifree); 872 statp->f_ffree = max_t(int64_t, ffree, 0); 873 874 875 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 876 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 877 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 878 xfs_qm_statvfs(ip, statp); 879 880 if (XFS_IS_REALTIME_MOUNT(mp) && 881 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 882 s64 freertx; 883 884 statp->f_blocks = sbp->sb_rblocks; 885 freertx = percpu_counter_sum_positive(&mp->m_frextents); 886 statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx); 887 } 888 889 return 0; 890 } 891 892 STATIC void 893 xfs_save_resvblks(struct xfs_mount *mp) 894 { 895 mp->m_resblks_save = mp->m_resblks; 896 xfs_reserve_blocks(mp, 0); 897 } 898 899 STATIC void 900 xfs_restore_resvblks(struct xfs_mount *mp) 901 { 902 uint64_t resblks; 903 904 if (mp->m_resblks_save) { 905 resblks = mp->m_resblks_save; 906 mp->m_resblks_save = 0; 907 } else 908 resblks = xfs_default_resblks(mp); 909 910 xfs_reserve_blocks(mp, resblks); 911 } 912 913 /* 914 * Second stage of a freeze. The data is already frozen so we only 915 * need to take care of the metadata. Once that's done sync the superblock 916 * to the log to dirty it in case of a crash while frozen. This ensures that we 917 * will recover the unlinked inode lists on the next mount. 918 */ 919 STATIC int 920 xfs_fs_freeze( 921 struct super_block *sb) 922 { 923 struct xfs_mount *mp = XFS_M(sb); 924 unsigned int flags; 925 int ret; 926 927 /* 928 * The filesystem is now frozen far enough that memory reclaim 929 * cannot safely operate on the filesystem. Hence we need to 930 * set a GFP_NOFS context here to avoid recursion deadlocks. 931 */ 932 flags = memalloc_nofs_save(); 933 xfs_save_resvblks(mp); 934 ret = xfs_log_quiesce(mp); 935 memalloc_nofs_restore(flags); 936 937 /* 938 * For read-write filesystems, we need to restart the inodegc on error 939 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not 940 * going to be run to restart it now. We are at SB_FREEZE_FS level 941 * here, so we can restart safely without racing with a stop in 942 * xfs_fs_sync_fs(). 943 */ 944 if (ret && !xfs_is_readonly(mp)) { 945 xfs_blockgc_start(mp); 946 xfs_inodegc_start(mp); 947 } 948 949 return ret; 950 } 951 952 STATIC int 953 xfs_fs_unfreeze( 954 struct super_block *sb) 955 { 956 struct xfs_mount *mp = XFS_M(sb); 957 958 xfs_restore_resvblks(mp); 959 xfs_log_work_queue(mp); 960 961 /* 962 * Don't reactivate the inodegc worker on a readonly filesystem because 963 * inodes are sent directly to reclaim. Don't reactivate the blockgc 964 * worker because there are no speculative preallocations on a readonly 965 * filesystem. 966 */ 967 if (!xfs_is_readonly(mp)) { 968 xfs_blockgc_start(mp); 969 xfs_inodegc_start(mp); 970 } 971 972 return 0; 973 } 974 975 /* 976 * This function fills in xfs_mount_t fields based on mount args. 977 * Note: the superblock _has_ now been read in. 978 */ 979 STATIC int 980 xfs_finish_flags( 981 struct xfs_mount *mp) 982 { 983 /* Fail a mount where the logbuf is smaller than the log stripe */ 984 if (xfs_has_logv2(mp)) { 985 if (mp->m_logbsize <= 0 && 986 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 987 mp->m_logbsize = mp->m_sb.sb_logsunit; 988 } else if (mp->m_logbsize > 0 && 989 mp->m_logbsize < mp->m_sb.sb_logsunit) { 990 xfs_warn(mp, 991 "logbuf size must be greater than or equal to log stripe size"); 992 return -EINVAL; 993 } 994 } else { 995 /* Fail a mount if the logbuf is larger than 32K */ 996 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 997 xfs_warn(mp, 998 "logbuf size for version 1 logs must be 16K or 32K"); 999 return -EINVAL; 1000 } 1001 } 1002 1003 /* 1004 * V5 filesystems always use attr2 format for attributes. 1005 */ 1006 if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) { 1007 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " 1008 "attr2 is always enabled for V5 filesystems."); 1009 return -EINVAL; 1010 } 1011 1012 /* 1013 * prohibit r/w mounts of read-only filesystems 1014 */ 1015 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { 1016 xfs_warn(mp, 1017 "cannot mount a read-only filesystem as read-write"); 1018 return -EROFS; 1019 } 1020 1021 if ((mp->m_qflags & XFS_GQUOTA_ACCT) && 1022 (mp->m_qflags & XFS_PQUOTA_ACCT) && 1023 !xfs_has_pquotino(mp)) { 1024 xfs_warn(mp, 1025 "Super block does not support project and group quota together"); 1026 return -EINVAL; 1027 } 1028 1029 return 0; 1030 } 1031 1032 static int 1033 xfs_init_percpu_counters( 1034 struct xfs_mount *mp) 1035 { 1036 int error; 1037 1038 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1039 if (error) 1040 return -ENOMEM; 1041 1042 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1043 if (error) 1044 goto free_icount; 1045 1046 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); 1047 if (error) 1048 goto free_ifree; 1049 1050 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1051 if (error) 1052 goto free_fdblocks; 1053 1054 error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL); 1055 if (error) 1056 goto free_delalloc; 1057 1058 return 0; 1059 1060 free_delalloc: 1061 percpu_counter_destroy(&mp->m_delalloc_blks); 1062 free_fdblocks: 1063 percpu_counter_destroy(&mp->m_fdblocks); 1064 free_ifree: 1065 percpu_counter_destroy(&mp->m_ifree); 1066 free_icount: 1067 percpu_counter_destroy(&mp->m_icount); 1068 return -ENOMEM; 1069 } 1070 1071 void 1072 xfs_reinit_percpu_counters( 1073 struct xfs_mount *mp) 1074 { 1075 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1076 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1077 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); 1078 percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents); 1079 } 1080 1081 static void 1082 xfs_destroy_percpu_counters( 1083 struct xfs_mount *mp) 1084 { 1085 percpu_counter_destroy(&mp->m_icount); 1086 percpu_counter_destroy(&mp->m_ifree); 1087 percpu_counter_destroy(&mp->m_fdblocks); 1088 ASSERT(xfs_is_shutdown(mp) || 1089 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1090 percpu_counter_destroy(&mp->m_delalloc_blks); 1091 percpu_counter_destroy(&mp->m_frextents); 1092 } 1093 1094 static int 1095 xfs_inodegc_init_percpu( 1096 struct xfs_mount *mp) 1097 { 1098 struct xfs_inodegc *gc; 1099 int cpu; 1100 1101 mp->m_inodegc = alloc_percpu(struct xfs_inodegc); 1102 if (!mp->m_inodegc) 1103 return -ENOMEM; 1104 1105 for_each_possible_cpu(cpu) { 1106 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1107 gc->cpu = cpu; 1108 gc->mp = mp; 1109 init_llist_head(&gc->list); 1110 gc->items = 0; 1111 gc->error = 0; 1112 INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); 1113 } 1114 return 0; 1115 } 1116 1117 static void 1118 xfs_inodegc_free_percpu( 1119 struct xfs_mount *mp) 1120 { 1121 if (!mp->m_inodegc) 1122 return; 1123 free_percpu(mp->m_inodegc); 1124 } 1125 1126 static void 1127 xfs_fs_put_super( 1128 struct super_block *sb) 1129 { 1130 struct xfs_mount *mp = XFS_M(sb); 1131 1132 xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); 1133 xfs_filestream_unmount(mp); 1134 xfs_unmountfs(mp); 1135 1136 xfs_freesb(mp); 1137 xchk_mount_stats_free(mp); 1138 free_percpu(mp->m_stats.xs_stats); 1139 xfs_inodegc_free_percpu(mp); 1140 xfs_destroy_percpu_counters(mp); 1141 xfs_destroy_mount_workqueues(mp); 1142 xfs_shutdown_devices(mp); 1143 } 1144 1145 static long 1146 xfs_fs_nr_cached_objects( 1147 struct super_block *sb, 1148 struct shrink_control *sc) 1149 { 1150 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1151 if (WARN_ON_ONCE(!sb->s_fs_info)) 1152 return 0; 1153 return xfs_reclaim_inodes_count(XFS_M(sb)); 1154 } 1155 1156 static long 1157 xfs_fs_free_cached_objects( 1158 struct super_block *sb, 1159 struct shrink_control *sc) 1160 { 1161 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1162 } 1163 1164 static void 1165 xfs_fs_shutdown( 1166 struct super_block *sb) 1167 { 1168 xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); 1169 } 1170 1171 static const struct super_operations xfs_super_operations = { 1172 .alloc_inode = xfs_fs_alloc_inode, 1173 .destroy_inode = xfs_fs_destroy_inode, 1174 .dirty_inode = xfs_fs_dirty_inode, 1175 .drop_inode = xfs_fs_drop_inode, 1176 .put_super = xfs_fs_put_super, 1177 .sync_fs = xfs_fs_sync_fs, 1178 .freeze_fs = xfs_fs_freeze, 1179 .unfreeze_fs = xfs_fs_unfreeze, 1180 .statfs = xfs_fs_statfs, 1181 .show_options = xfs_fs_show_options, 1182 .nr_cached_objects = xfs_fs_nr_cached_objects, 1183 .free_cached_objects = xfs_fs_free_cached_objects, 1184 .shutdown = xfs_fs_shutdown, 1185 }; 1186 1187 static int 1188 suffix_kstrtoint( 1189 const char *s, 1190 unsigned int base, 1191 int *res) 1192 { 1193 int last, shift_left_factor = 0, _res; 1194 char *value; 1195 int ret = 0; 1196 1197 value = kstrdup(s, GFP_KERNEL); 1198 if (!value) 1199 return -ENOMEM; 1200 1201 last = strlen(value) - 1; 1202 if (value[last] == 'K' || value[last] == 'k') { 1203 shift_left_factor = 10; 1204 value[last] = '\0'; 1205 } 1206 if (value[last] == 'M' || value[last] == 'm') { 1207 shift_left_factor = 20; 1208 value[last] = '\0'; 1209 } 1210 if (value[last] == 'G' || value[last] == 'g') { 1211 shift_left_factor = 30; 1212 value[last] = '\0'; 1213 } 1214 1215 if (kstrtoint(value, base, &_res)) 1216 ret = -EINVAL; 1217 kfree(value); 1218 *res = _res << shift_left_factor; 1219 return ret; 1220 } 1221 1222 static inline void 1223 xfs_fs_warn_deprecated( 1224 struct fs_context *fc, 1225 struct fs_parameter *param, 1226 uint64_t flag, 1227 bool value) 1228 { 1229 /* Don't print the warning if reconfiguring and current mount point 1230 * already had the flag set 1231 */ 1232 if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && 1233 !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) 1234 return; 1235 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1236 } 1237 1238 /* 1239 * Set mount state from a mount option. 1240 * 1241 * NOTE: mp->m_super is NULL here! 1242 */ 1243 static int 1244 xfs_fs_parse_param( 1245 struct fs_context *fc, 1246 struct fs_parameter *param) 1247 { 1248 struct xfs_mount *parsing_mp = fc->s_fs_info; 1249 struct fs_parse_result result; 1250 int size = 0; 1251 int opt; 1252 1253 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1254 if (opt < 0) 1255 return opt; 1256 1257 switch (opt) { 1258 case Opt_logbufs: 1259 parsing_mp->m_logbufs = result.uint_32; 1260 return 0; 1261 case Opt_logbsize: 1262 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1263 return -EINVAL; 1264 return 0; 1265 case Opt_logdev: 1266 kfree(parsing_mp->m_logname); 1267 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1268 if (!parsing_mp->m_logname) 1269 return -ENOMEM; 1270 return 0; 1271 case Opt_rtdev: 1272 kfree(parsing_mp->m_rtname); 1273 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1274 if (!parsing_mp->m_rtname) 1275 return -ENOMEM; 1276 return 0; 1277 case Opt_allocsize: 1278 if (suffix_kstrtoint(param->string, 10, &size)) 1279 return -EINVAL; 1280 parsing_mp->m_allocsize_log = ffs(size) - 1; 1281 parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; 1282 return 0; 1283 case Opt_grpid: 1284 case Opt_bsdgroups: 1285 parsing_mp->m_features |= XFS_FEAT_GRPID; 1286 return 0; 1287 case Opt_nogrpid: 1288 case Opt_sysvgroups: 1289 parsing_mp->m_features &= ~XFS_FEAT_GRPID; 1290 return 0; 1291 case Opt_wsync: 1292 parsing_mp->m_features |= XFS_FEAT_WSYNC; 1293 return 0; 1294 case Opt_norecovery: 1295 parsing_mp->m_features |= XFS_FEAT_NORECOVERY; 1296 return 0; 1297 case Opt_noalign: 1298 parsing_mp->m_features |= XFS_FEAT_NOALIGN; 1299 return 0; 1300 case Opt_swalloc: 1301 parsing_mp->m_features |= XFS_FEAT_SWALLOC; 1302 return 0; 1303 case Opt_sunit: 1304 parsing_mp->m_dalign = result.uint_32; 1305 return 0; 1306 case Opt_swidth: 1307 parsing_mp->m_swidth = result.uint_32; 1308 return 0; 1309 case Opt_inode32: 1310 parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; 1311 return 0; 1312 case Opt_inode64: 1313 parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1314 return 0; 1315 case Opt_nouuid: 1316 parsing_mp->m_features |= XFS_FEAT_NOUUID; 1317 return 0; 1318 case Opt_largeio: 1319 parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; 1320 return 0; 1321 case Opt_nolargeio: 1322 parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; 1323 return 0; 1324 case Opt_filestreams: 1325 parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; 1326 return 0; 1327 case Opt_noquota: 1328 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1329 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1330 return 0; 1331 case Opt_quota: 1332 case Opt_uquota: 1333 case Opt_usrquota: 1334 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); 1335 return 0; 1336 case Opt_qnoenforce: 1337 case Opt_uqnoenforce: 1338 parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; 1339 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1340 return 0; 1341 case Opt_pquota: 1342 case Opt_prjquota: 1343 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); 1344 return 0; 1345 case Opt_pqnoenforce: 1346 parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; 1347 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1348 return 0; 1349 case Opt_gquota: 1350 case Opt_grpquota: 1351 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); 1352 return 0; 1353 case Opt_gqnoenforce: 1354 parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; 1355 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1356 return 0; 1357 case Opt_discard: 1358 parsing_mp->m_features |= XFS_FEAT_DISCARD; 1359 return 0; 1360 case Opt_nodiscard: 1361 parsing_mp->m_features &= ~XFS_FEAT_DISCARD; 1362 return 0; 1363 #ifdef CONFIG_FS_DAX 1364 case Opt_dax: 1365 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1366 return 0; 1367 case Opt_dax_enum: 1368 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1369 return 0; 1370 #endif 1371 /* Following mount options will be removed in September 2025 */ 1372 case Opt_ikeep: 1373 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); 1374 parsing_mp->m_features |= XFS_FEAT_IKEEP; 1375 return 0; 1376 case Opt_noikeep: 1377 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); 1378 parsing_mp->m_features &= ~XFS_FEAT_IKEEP; 1379 return 0; 1380 case Opt_attr2: 1381 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); 1382 parsing_mp->m_features |= XFS_FEAT_ATTR2; 1383 return 0; 1384 case Opt_noattr2: 1385 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); 1386 parsing_mp->m_features |= XFS_FEAT_NOATTR2; 1387 return 0; 1388 default: 1389 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1390 return -EINVAL; 1391 } 1392 1393 return 0; 1394 } 1395 1396 static int 1397 xfs_fs_validate_params( 1398 struct xfs_mount *mp) 1399 { 1400 /* No recovery flag requires a read-only mount */ 1401 if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { 1402 xfs_warn(mp, "no-recovery mounts must be read-only."); 1403 return -EINVAL; 1404 } 1405 1406 /* 1407 * We have not read the superblock at this point, so only the attr2 1408 * mount option can set the attr2 feature by this stage. 1409 */ 1410 if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { 1411 xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); 1412 return -EINVAL; 1413 } 1414 1415 1416 if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { 1417 xfs_warn(mp, 1418 "sunit and swidth options incompatible with the noalign option"); 1419 return -EINVAL; 1420 } 1421 1422 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { 1423 xfs_warn(mp, "quota support not available in this kernel."); 1424 return -EINVAL; 1425 } 1426 1427 if ((mp->m_dalign && !mp->m_swidth) || 1428 (!mp->m_dalign && mp->m_swidth)) { 1429 xfs_warn(mp, "sunit and swidth must be specified together"); 1430 return -EINVAL; 1431 } 1432 1433 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1434 xfs_warn(mp, 1435 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1436 mp->m_swidth, mp->m_dalign); 1437 return -EINVAL; 1438 } 1439 1440 if (mp->m_logbufs != -1 && 1441 mp->m_logbufs != 0 && 1442 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1443 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1444 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1445 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1446 return -EINVAL; 1447 } 1448 1449 if (mp->m_logbsize != -1 && 1450 mp->m_logbsize != 0 && 1451 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1452 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1453 !is_power_of_2(mp->m_logbsize))) { 1454 xfs_warn(mp, 1455 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1456 mp->m_logbsize); 1457 return -EINVAL; 1458 } 1459 1460 if (xfs_has_allocsize(mp) && 1461 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1462 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1463 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1464 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1465 return -EINVAL; 1466 } 1467 1468 return 0; 1469 } 1470 1471 struct dentry * 1472 xfs_debugfs_mkdir( 1473 const char *name, 1474 struct dentry *parent) 1475 { 1476 struct dentry *child; 1477 1478 /* Apparently we're expected to ignore error returns?? */ 1479 child = debugfs_create_dir(name, parent); 1480 if (IS_ERR(child)) 1481 return NULL; 1482 1483 return child; 1484 } 1485 1486 static int 1487 xfs_fs_fill_super( 1488 struct super_block *sb, 1489 struct fs_context *fc) 1490 { 1491 struct xfs_mount *mp = sb->s_fs_info; 1492 struct inode *root; 1493 int flags = 0, error; 1494 1495 mp->m_super = sb; 1496 1497 /* 1498 * Copy VFS mount flags from the context now that all parameter parsing 1499 * is guaranteed to have been completed by either the old mount API or 1500 * the newer fsopen/fsconfig API. 1501 */ 1502 if (fc->sb_flags & SB_RDONLY) 1503 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1504 if (fc->sb_flags & SB_DIRSYNC) 1505 mp->m_features |= XFS_FEAT_DIRSYNC; 1506 if (fc->sb_flags & SB_SYNCHRONOUS) 1507 mp->m_features |= XFS_FEAT_WSYNC; 1508 1509 error = xfs_fs_validate_params(mp); 1510 if (error) 1511 return error; 1512 1513 sb_min_blocksize(sb, BBSIZE); 1514 sb->s_xattr = xfs_xattr_handlers; 1515 sb->s_export_op = &xfs_export_operations; 1516 #ifdef CONFIG_XFS_QUOTA 1517 sb->s_qcop = &xfs_quotactl_operations; 1518 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1519 #endif 1520 sb->s_op = &xfs_super_operations; 1521 1522 /* 1523 * Delay mount work if the debug hook is set. This is debug 1524 * instrumention to coordinate simulation of xfs mount failures with 1525 * VFS superblock operations 1526 */ 1527 if (xfs_globals.mount_delay) { 1528 xfs_notice(mp, "Delaying mount for %d seconds.", 1529 xfs_globals.mount_delay); 1530 msleep(xfs_globals.mount_delay * 1000); 1531 } 1532 1533 if (fc->sb_flags & SB_SILENT) 1534 flags |= XFS_MFSI_QUIET; 1535 1536 error = xfs_open_devices(mp); 1537 if (error) 1538 return error; 1539 1540 if (xfs_debugfs) { 1541 mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id, 1542 xfs_debugfs); 1543 } else { 1544 mp->m_debugfs = NULL; 1545 } 1546 1547 error = xfs_init_mount_workqueues(mp); 1548 if (error) 1549 goto out_shutdown_devices; 1550 1551 error = xfs_init_percpu_counters(mp); 1552 if (error) 1553 goto out_destroy_workqueues; 1554 1555 error = xfs_inodegc_init_percpu(mp); 1556 if (error) 1557 goto out_destroy_counters; 1558 1559 /* Allocate stats memory before we do operations that might use it */ 1560 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1561 if (!mp->m_stats.xs_stats) { 1562 error = -ENOMEM; 1563 goto out_destroy_inodegc; 1564 } 1565 1566 error = xchk_mount_stats_alloc(mp); 1567 if (error) 1568 goto out_free_stats; 1569 1570 error = xfs_readsb(mp, flags); 1571 if (error) 1572 goto out_free_scrub_stats; 1573 1574 error = xfs_finish_flags(mp); 1575 if (error) 1576 goto out_free_sb; 1577 1578 error = xfs_setup_devices(mp); 1579 if (error) 1580 goto out_free_sb; 1581 1582 /* V4 support is undergoing deprecation. */ 1583 if (!xfs_has_crc(mp)) { 1584 #ifdef CONFIG_XFS_SUPPORT_V4 1585 xfs_warn_once(mp, 1586 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1587 #else 1588 xfs_warn(mp, 1589 "Deprecated V4 format (crc=0) not supported by kernel."); 1590 error = -EINVAL; 1591 goto out_free_sb; 1592 #endif 1593 } 1594 1595 /* ASCII case insensitivity is undergoing deprecation. */ 1596 if (xfs_has_asciici(mp)) { 1597 #ifdef CONFIG_XFS_SUPPORT_ASCII_CI 1598 xfs_warn_once(mp, 1599 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); 1600 #else 1601 xfs_warn(mp, 1602 "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel."); 1603 error = -EINVAL; 1604 goto out_free_sb; 1605 #endif 1606 } 1607 1608 /* Filesystem claims it needs repair, so refuse the mount. */ 1609 if (xfs_has_needsrepair(mp)) { 1610 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1611 error = -EFSCORRUPTED; 1612 goto out_free_sb; 1613 } 1614 1615 /* 1616 * Don't touch the filesystem if a user tool thinks it owns the primary 1617 * superblock. mkfs doesn't clear the flag from secondary supers, so 1618 * we don't check them at all. 1619 */ 1620 if (mp->m_sb.sb_inprogress) { 1621 xfs_warn(mp, "Offline file system operation in progress!"); 1622 error = -EFSCORRUPTED; 1623 goto out_free_sb; 1624 } 1625 1626 /* 1627 * Until this is fixed only page-sized or smaller data blocks work. 1628 */ 1629 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1630 xfs_warn(mp, 1631 "File system with blocksize %d bytes. " 1632 "Only pagesize (%ld) or less will currently work.", 1633 mp->m_sb.sb_blocksize, PAGE_SIZE); 1634 error = -ENOSYS; 1635 goto out_free_sb; 1636 } 1637 1638 /* Ensure this filesystem fits in the page cache limits */ 1639 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1640 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1641 xfs_warn(mp, 1642 "file system too large to be mounted on this system."); 1643 error = -EFBIG; 1644 goto out_free_sb; 1645 } 1646 1647 /* 1648 * XFS block mappings use 54 bits to store the logical block offset. 1649 * This should suffice to handle the maximum file size that the VFS 1650 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1651 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1652 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1653 * to check this assertion. 1654 * 1655 * Avoid integer overflow by comparing the maximum bmbt offset to the 1656 * maximum pagecache offset in units of fs blocks. 1657 */ 1658 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1659 xfs_warn(mp, 1660 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1661 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1662 XFS_MAX_FILEOFF); 1663 error = -EINVAL; 1664 goto out_free_sb; 1665 } 1666 1667 error = xfs_filestream_mount(mp); 1668 if (error) 1669 goto out_free_sb; 1670 1671 /* 1672 * we must configure the block size in the superblock before we run the 1673 * full mount process as the mount process can lookup and cache inodes. 1674 */ 1675 sb->s_magic = XFS_SUPER_MAGIC; 1676 sb->s_blocksize = mp->m_sb.sb_blocksize; 1677 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1678 sb->s_maxbytes = MAX_LFS_FILESIZE; 1679 sb->s_max_links = XFS_MAXLINK; 1680 sb->s_time_gran = 1; 1681 if (xfs_has_bigtime(mp)) { 1682 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1683 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1684 } else { 1685 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1686 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1687 } 1688 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1689 sb->s_iflags |= SB_I_CGROUPWB; 1690 1691 set_posix_acl_flag(sb); 1692 1693 /* version 5 superblocks support inode version counters. */ 1694 if (xfs_has_crc(mp)) 1695 sb->s_flags |= SB_I_VERSION; 1696 1697 if (xfs_has_dax_always(mp)) { 1698 error = xfs_setup_dax_always(mp); 1699 if (error) 1700 goto out_filestream_unmount; 1701 } 1702 1703 if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { 1704 xfs_warn(mp, 1705 "mounting with \"discard\" option, but the device does not support discard"); 1706 mp->m_features &= ~XFS_FEAT_DISCARD; 1707 } 1708 1709 if (xfs_has_reflink(mp)) { 1710 if (mp->m_sb.sb_rblocks) { 1711 xfs_alert(mp, 1712 "reflink not compatible with realtime device!"); 1713 error = -EINVAL; 1714 goto out_filestream_unmount; 1715 } 1716 1717 if (xfs_globals.always_cow) { 1718 xfs_info(mp, "using DEBUG-only always_cow mode."); 1719 mp->m_always_cow = true; 1720 } 1721 } 1722 1723 if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { 1724 xfs_alert(mp, 1725 "reverse mapping btree not compatible with realtime device!"); 1726 error = -EINVAL; 1727 goto out_filestream_unmount; 1728 } 1729 1730 error = xfs_mountfs(mp); 1731 if (error) 1732 goto out_filestream_unmount; 1733 1734 root = igrab(VFS_I(mp->m_rootip)); 1735 if (!root) { 1736 error = -ENOENT; 1737 goto out_unmount; 1738 } 1739 sb->s_root = d_make_root(root); 1740 if (!sb->s_root) { 1741 error = -ENOMEM; 1742 goto out_unmount; 1743 } 1744 1745 return 0; 1746 1747 out_filestream_unmount: 1748 xfs_filestream_unmount(mp); 1749 out_free_sb: 1750 xfs_freesb(mp); 1751 out_free_scrub_stats: 1752 xchk_mount_stats_free(mp); 1753 out_free_stats: 1754 free_percpu(mp->m_stats.xs_stats); 1755 out_destroy_inodegc: 1756 xfs_inodegc_free_percpu(mp); 1757 out_destroy_counters: 1758 xfs_destroy_percpu_counters(mp); 1759 out_destroy_workqueues: 1760 xfs_destroy_mount_workqueues(mp); 1761 out_shutdown_devices: 1762 xfs_shutdown_devices(mp); 1763 return error; 1764 1765 out_unmount: 1766 xfs_filestream_unmount(mp); 1767 xfs_unmountfs(mp); 1768 goto out_free_sb; 1769 } 1770 1771 static int 1772 xfs_fs_get_tree( 1773 struct fs_context *fc) 1774 { 1775 return get_tree_bdev(fc, xfs_fs_fill_super); 1776 } 1777 1778 static int 1779 xfs_remount_rw( 1780 struct xfs_mount *mp) 1781 { 1782 struct xfs_sb *sbp = &mp->m_sb; 1783 int error; 1784 1785 if (xfs_has_norecovery(mp)) { 1786 xfs_warn(mp, 1787 "ro->rw transition prohibited on norecovery mount"); 1788 return -EINVAL; 1789 } 1790 1791 if (xfs_sb_is_v5(sbp) && 1792 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 1793 xfs_warn(mp, 1794 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 1795 (sbp->sb_features_ro_compat & 1796 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 1797 return -EINVAL; 1798 } 1799 1800 clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1801 1802 /* 1803 * If this is the first remount to writeable state we might have some 1804 * superblock changes to update. 1805 */ 1806 if (mp->m_update_sb) { 1807 error = xfs_sync_sb(mp, false); 1808 if (error) { 1809 xfs_warn(mp, "failed to write sb changes"); 1810 return error; 1811 } 1812 mp->m_update_sb = false; 1813 } 1814 1815 /* 1816 * Fill out the reserve pool if it is empty. Use the stashed value if 1817 * it is non-zero, otherwise go with the default. 1818 */ 1819 xfs_restore_resvblks(mp); 1820 xfs_log_work_queue(mp); 1821 xfs_blockgc_start(mp); 1822 1823 /* Create the per-AG metadata reservation pool .*/ 1824 error = xfs_fs_reserve_ag_blocks(mp); 1825 if (error && error != -ENOSPC) 1826 return error; 1827 1828 /* Re-enable the background inode inactivation worker. */ 1829 xfs_inodegc_start(mp); 1830 1831 return 0; 1832 } 1833 1834 static int 1835 xfs_remount_ro( 1836 struct xfs_mount *mp) 1837 { 1838 struct xfs_icwalk icw = { 1839 .icw_flags = XFS_ICWALK_FLAG_SYNC, 1840 }; 1841 int error; 1842 1843 /* Flush all the dirty data to disk. */ 1844 error = sync_filesystem(mp->m_super); 1845 if (error) 1846 return error; 1847 1848 /* 1849 * Cancel background eofb scanning so it cannot race with the final 1850 * log force+buftarg wait and deadlock the remount. 1851 */ 1852 xfs_blockgc_stop(mp); 1853 1854 /* 1855 * Clear out all remaining COW staging extents and speculative post-EOF 1856 * preallocations so that we don't leave inodes requiring inactivation 1857 * cleanups during reclaim on a read-only mount. We must process every 1858 * cached inode, so this requires a synchronous cache scan. 1859 */ 1860 error = xfs_blockgc_free_space(mp, &icw); 1861 if (error) { 1862 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1863 return error; 1864 } 1865 1866 /* 1867 * Stop the inodegc background worker. xfs_fs_reconfigure already 1868 * flushed all pending inodegc work when it sync'd the filesystem. 1869 * The VFS holds s_umount, so we know that inodes cannot enter 1870 * xfs_fs_destroy_inode during a remount operation. In readonly mode 1871 * we send inodes straight to reclaim, so no inodes will be queued. 1872 */ 1873 xfs_inodegc_stop(mp); 1874 1875 /* Free the per-AG metadata reservation pool. */ 1876 error = xfs_fs_unreserve_ag_blocks(mp); 1877 if (error) { 1878 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1879 return error; 1880 } 1881 1882 /* 1883 * Before we sync the metadata, we need to free up the reserve block 1884 * pool so that the used block count in the superblock on disk is 1885 * correct at the end of the remount. Stash the current* reserve pool 1886 * size so that if we get remounted rw, we can return it to the same 1887 * size. 1888 */ 1889 xfs_save_resvblks(mp); 1890 1891 xfs_log_clean(mp); 1892 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1893 1894 return 0; 1895 } 1896 1897 /* 1898 * Logically we would return an error here to prevent users from believing 1899 * they might have changed mount options using remount which can't be changed. 1900 * 1901 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 1902 * arguments in some cases so we can't blindly reject options, but have to 1903 * check for each specified option if it actually differs from the currently 1904 * set option and only reject it if that's the case. 1905 * 1906 * Until that is implemented we return success for every remount request, and 1907 * silently ignore all options that we can't actually change. 1908 */ 1909 static int 1910 xfs_fs_reconfigure( 1911 struct fs_context *fc) 1912 { 1913 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 1914 struct xfs_mount *new_mp = fc->s_fs_info; 1915 int flags = fc->sb_flags; 1916 int error; 1917 1918 /* version 5 superblocks always support version counters. */ 1919 if (xfs_has_crc(mp)) 1920 fc->sb_flags |= SB_I_VERSION; 1921 1922 error = xfs_fs_validate_params(new_mp); 1923 if (error) 1924 return error; 1925 1926 /* inode32 -> inode64 */ 1927 if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { 1928 mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1929 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1930 } 1931 1932 /* inode64 -> inode32 */ 1933 if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { 1934 mp->m_features |= XFS_FEAT_SMALL_INUMS; 1935 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1936 } 1937 1938 /* ro -> rw */ 1939 if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { 1940 error = xfs_remount_rw(mp); 1941 if (error) 1942 return error; 1943 } 1944 1945 /* rw -> ro */ 1946 if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { 1947 error = xfs_remount_ro(mp); 1948 if (error) 1949 return error; 1950 } 1951 1952 return 0; 1953 } 1954 1955 static void 1956 xfs_fs_free( 1957 struct fs_context *fc) 1958 { 1959 struct xfs_mount *mp = fc->s_fs_info; 1960 1961 /* 1962 * mp is stored in the fs_context when it is initialized. 1963 * mp is transferred to the superblock on a successful mount, 1964 * but if an error occurs before the transfer we have to free 1965 * it here. 1966 */ 1967 if (mp) 1968 xfs_mount_free(mp); 1969 } 1970 1971 static const struct fs_context_operations xfs_context_ops = { 1972 .parse_param = xfs_fs_parse_param, 1973 .get_tree = xfs_fs_get_tree, 1974 .reconfigure = xfs_fs_reconfigure, 1975 .free = xfs_fs_free, 1976 }; 1977 1978 /* 1979 * WARNING: do not initialise any parameters in this function that depend on 1980 * mount option parsing having already been performed as this can be called from 1981 * fsopen() before any parameters have been set. 1982 */ 1983 static int xfs_init_fs_context( 1984 struct fs_context *fc) 1985 { 1986 struct xfs_mount *mp; 1987 1988 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL); 1989 if (!mp) 1990 return -ENOMEM; 1991 1992 spin_lock_init(&mp->m_sb_lock); 1993 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1994 spin_lock_init(&mp->m_perag_lock); 1995 mutex_init(&mp->m_growlock); 1996 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 1997 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1998 mp->m_kobj.kobject.kset = xfs_kset; 1999 /* 2000 * We don't create the finobt per-ag space reservation until after log 2001 * recovery, so we must set this to true so that an ifree transaction 2002 * started during log recovery will not depend on space reservations 2003 * for finobt expansion. 2004 */ 2005 mp->m_finobt_nores = true; 2006 2007 /* 2008 * These can be overridden by the mount option parsing. 2009 */ 2010 mp->m_logbufs = -1; 2011 mp->m_logbsize = -1; 2012 mp->m_allocsize_log = 16; /* 64k */ 2013 2014 xfs_hooks_init(&mp->m_dir_update_hooks); 2015 2016 fc->s_fs_info = mp; 2017 fc->ops = &xfs_context_ops; 2018 2019 return 0; 2020 } 2021 2022 static void 2023 xfs_kill_sb( 2024 struct super_block *sb) 2025 { 2026 kill_block_super(sb); 2027 xfs_mount_free(XFS_M(sb)); 2028 } 2029 2030 static struct file_system_type xfs_fs_type = { 2031 .owner = THIS_MODULE, 2032 .name = "xfs", 2033 .init_fs_context = xfs_init_fs_context, 2034 .parameters = xfs_fs_parameters, 2035 .kill_sb = xfs_kill_sb, 2036 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 2037 }; 2038 MODULE_ALIAS_FS("xfs"); 2039 2040 STATIC int __init 2041 xfs_init_caches(void) 2042 { 2043 int error; 2044 2045 xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, 2046 SLAB_HWCACHE_ALIGN | 2047 SLAB_RECLAIM_ACCOUNT, 2048 NULL); 2049 if (!xfs_buf_cache) 2050 goto out; 2051 2052 xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", 2053 sizeof(struct xlog_ticket), 2054 0, 0, NULL); 2055 if (!xfs_log_ticket_cache) 2056 goto out_destroy_buf_cache; 2057 2058 error = xfs_btree_init_cur_caches(); 2059 if (error) 2060 goto out_destroy_log_ticket_cache; 2061 2062 error = rcbagbt_init_cur_cache(); 2063 if (error) 2064 goto out_destroy_btree_cur_cache; 2065 2066 error = xfs_defer_init_item_caches(); 2067 if (error) 2068 goto out_destroy_rcbagbt_cur_cache; 2069 2070 xfs_da_state_cache = kmem_cache_create("xfs_da_state", 2071 sizeof(struct xfs_da_state), 2072 0, 0, NULL); 2073 if (!xfs_da_state_cache) 2074 goto out_destroy_defer_item_cache; 2075 2076 xfs_ifork_cache = kmem_cache_create("xfs_ifork", 2077 sizeof(struct xfs_ifork), 2078 0, 0, NULL); 2079 if (!xfs_ifork_cache) 2080 goto out_destroy_da_state_cache; 2081 2082 xfs_trans_cache = kmem_cache_create("xfs_trans", 2083 sizeof(struct xfs_trans), 2084 0, 0, NULL); 2085 if (!xfs_trans_cache) 2086 goto out_destroy_ifork_cache; 2087 2088 2089 /* 2090 * The size of the cache-allocated buf log item is the maximum 2091 * size possible under XFS. This wastes a little bit of memory, 2092 * but it is much faster. 2093 */ 2094 xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", 2095 sizeof(struct xfs_buf_log_item), 2096 0, 0, NULL); 2097 if (!xfs_buf_item_cache) 2098 goto out_destroy_trans_cache; 2099 2100 xfs_efd_cache = kmem_cache_create("xfs_efd_item", 2101 xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), 2102 0, 0, NULL); 2103 if (!xfs_efd_cache) 2104 goto out_destroy_buf_item_cache; 2105 2106 xfs_efi_cache = kmem_cache_create("xfs_efi_item", 2107 xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), 2108 0, 0, NULL); 2109 if (!xfs_efi_cache) 2110 goto out_destroy_efd_cache; 2111 2112 xfs_inode_cache = kmem_cache_create("xfs_inode", 2113 sizeof(struct xfs_inode), 0, 2114 (SLAB_HWCACHE_ALIGN | 2115 SLAB_RECLAIM_ACCOUNT | 2116 SLAB_ACCOUNT), 2117 xfs_fs_inode_init_once); 2118 if (!xfs_inode_cache) 2119 goto out_destroy_efi_cache; 2120 2121 xfs_ili_cache = kmem_cache_create("xfs_ili", 2122 sizeof(struct xfs_inode_log_item), 0, 2123 SLAB_RECLAIM_ACCOUNT, 2124 NULL); 2125 if (!xfs_ili_cache) 2126 goto out_destroy_inode_cache; 2127 2128 xfs_icreate_cache = kmem_cache_create("xfs_icr", 2129 sizeof(struct xfs_icreate_item), 2130 0, 0, NULL); 2131 if (!xfs_icreate_cache) 2132 goto out_destroy_ili_cache; 2133 2134 xfs_rud_cache = kmem_cache_create("xfs_rud_item", 2135 sizeof(struct xfs_rud_log_item), 2136 0, 0, NULL); 2137 if (!xfs_rud_cache) 2138 goto out_destroy_icreate_cache; 2139 2140 xfs_rui_cache = kmem_cache_create("xfs_rui_item", 2141 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2142 0, 0, NULL); 2143 if (!xfs_rui_cache) 2144 goto out_destroy_rud_cache; 2145 2146 xfs_cud_cache = kmem_cache_create("xfs_cud_item", 2147 sizeof(struct xfs_cud_log_item), 2148 0, 0, NULL); 2149 if (!xfs_cud_cache) 2150 goto out_destroy_rui_cache; 2151 2152 xfs_cui_cache = kmem_cache_create("xfs_cui_item", 2153 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2154 0, 0, NULL); 2155 if (!xfs_cui_cache) 2156 goto out_destroy_cud_cache; 2157 2158 xfs_bud_cache = kmem_cache_create("xfs_bud_item", 2159 sizeof(struct xfs_bud_log_item), 2160 0, 0, NULL); 2161 if (!xfs_bud_cache) 2162 goto out_destroy_cui_cache; 2163 2164 xfs_bui_cache = kmem_cache_create("xfs_bui_item", 2165 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2166 0, 0, NULL); 2167 if (!xfs_bui_cache) 2168 goto out_destroy_bud_cache; 2169 2170 xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", 2171 sizeof(struct xfs_attrd_log_item), 2172 0, 0, NULL); 2173 if (!xfs_attrd_cache) 2174 goto out_destroy_bui_cache; 2175 2176 xfs_attri_cache = kmem_cache_create("xfs_attri_item", 2177 sizeof(struct xfs_attri_log_item), 2178 0, 0, NULL); 2179 if (!xfs_attri_cache) 2180 goto out_destroy_attrd_cache; 2181 2182 xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", 2183 sizeof(struct xfs_iunlink_item), 2184 0, 0, NULL); 2185 if (!xfs_iunlink_cache) 2186 goto out_destroy_attri_cache; 2187 2188 return 0; 2189 2190 out_destroy_attri_cache: 2191 kmem_cache_destroy(xfs_attri_cache); 2192 out_destroy_attrd_cache: 2193 kmem_cache_destroy(xfs_attrd_cache); 2194 out_destroy_bui_cache: 2195 kmem_cache_destroy(xfs_bui_cache); 2196 out_destroy_bud_cache: 2197 kmem_cache_destroy(xfs_bud_cache); 2198 out_destroy_cui_cache: 2199 kmem_cache_destroy(xfs_cui_cache); 2200 out_destroy_cud_cache: 2201 kmem_cache_destroy(xfs_cud_cache); 2202 out_destroy_rui_cache: 2203 kmem_cache_destroy(xfs_rui_cache); 2204 out_destroy_rud_cache: 2205 kmem_cache_destroy(xfs_rud_cache); 2206 out_destroy_icreate_cache: 2207 kmem_cache_destroy(xfs_icreate_cache); 2208 out_destroy_ili_cache: 2209 kmem_cache_destroy(xfs_ili_cache); 2210 out_destroy_inode_cache: 2211 kmem_cache_destroy(xfs_inode_cache); 2212 out_destroy_efi_cache: 2213 kmem_cache_destroy(xfs_efi_cache); 2214 out_destroy_efd_cache: 2215 kmem_cache_destroy(xfs_efd_cache); 2216 out_destroy_buf_item_cache: 2217 kmem_cache_destroy(xfs_buf_item_cache); 2218 out_destroy_trans_cache: 2219 kmem_cache_destroy(xfs_trans_cache); 2220 out_destroy_ifork_cache: 2221 kmem_cache_destroy(xfs_ifork_cache); 2222 out_destroy_da_state_cache: 2223 kmem_cache_destroy(xfs_da_state_cache); 2224 out_destroy_defer_item_cache: 2225 xfs_defer_destroy_item_caches(); 2226 out_destroy_rcbagbt_cur_cache: 2227 rcbagbt_destroy_cur_cache(); 2228 out_destroy_btree_cur_cache: 2229 xfs_btree_destroy_cur_caches(); 2230 out_destroy_log_ticket_cache: 2231 kmem_cache_destroy(xfs_log_ticket_cache); 2232 out_destroy_buf_cache: 2233 kmem_cache_destroy(xfs_buf_cache); 2234 out: 2235 return -ENOMEM; 2236 } 2237 2238 STATIC void 2239 xfs_destroy_caches(void) 2240 { 2241 /* 2242 * Make sure all delayed rcu free are flushed before we 2243 * destroy caches. 2244 */ 2245 rcu_barrier(); 2246 kmem_cache_destroy(xfs_iunlink_cache); 2247 kmem_cache_destroy(xfs_attri_cache); 2248 kmem_cache_destroy(xfs_attrd_cache); 2249 kmem_cache_destroy(xfs_bui_cache); 2250 kmem_cache_destroy(xfs_bud_cache); 2251 kmem_cache_destroy(xfs_cui_cache); 2252 kmem_cache_destroy(xfs_cud_cache); 2253 kmem_cache_destroy(xfs_rui_cache); 2254 kmem_cache_destroy(xfs_rud_cache); 2255 kmem_cache_destroy(xfs_icreate_cache); 2256 kmem_cache_destroy(xfs_ili_cache); 2257 kmem_cache_destroy(xfs_inode_cache); 2258 kmem_cache_destroy(xfs_efi_cache); 2259 kmem_cache_destroy(xfs_efd_cache); 2260 kmem_cache_destroy(xfs_buf_item_cache); 2261 kmem_cache_destroy(xfs_trans_cache); 2262 kmem_cache_destroy(xfs_ifork_cache); 2263 kmem_cache_destroy(xfs_da_state_cache); 2264 xfs_defer_destroy_item_caches(); 2265 rcbagbt_destroy_cur_cache(); 2266 xfs_btree_destroy_cur_caches(); 2267 kmem_cache_destroy(xfs_log_ticket_cache); 2268 kmem_cache_destroy(xfs_buf_cache); 2269 } 2270 2271 STATIC int __init 2272 xfs_init_workqueues(void) 2273 { 2274 /* 2275 * The allocation workqueue can be used in memory reclaim situations 2276 * (writepage path), and parallelism is only limited by the number of 2277 * AGs in all the filesystems mounted. Hence use the default large 2278 * max_active value for this workqueue. 2279 */ 2280 xfs_alloc_wq = alloc_workqueue("xfsalloc", 2281 XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); 2282 if (!xfs_alloc_wq) 2283 return -ENOMEM; 2284 2285 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2286 0); 2287 if (!xfs_discard_wq) 2288 goto out_free_alloc_wq; 2289 2290 return 0; 2291 out_free_alloc_wq: 2292 destroy_workqueue(xfs_alloc_wq); 2293 return -ENOMEM; 2294 } 2295 2296 STATIC void 2297 xfs_destroy_workqueues(void) 2298 { 2299 destroy_workqueue(xfs_discard_wq); 2300 destroy_workqueue(xfs_alloc_wq); 2301 } 2302 2303 STATIC int __init 2304 init_xfs_fs(void) 2305 { 2306 int error; 2307 2308 xfs_check_ondisk_structs(); 2309 2310 error = xfs_dahash_test(); 2311 if (error) 2312 return error; 2313 2314 printk(KERN_INFO XFS_VERSION_STRING " with " 2315 XFS_BUILD_OPTIONS " enabled\n"); 2316 2317 xfs_dir_startup(); 2318 2319 error = xfs_init_caches(); 2320 if (error) 2321 goto out; 2322 2323 error = xfs_init_workqueues(); 2324 if (error) 2325 goto out_destroy_caches; 2326 2327 error = xfs_mru_cache_init(); 2328 if (error) 2329 goto out_destroy_wq; 2330 2331 error = xfs_init_procfs(); 2332 if (error) 2333 goto out_mru_cache_uninit; 2334 2335 error = xfs_sysctl_register(); 2336 if (error) 2337 goto out_cleanup_procfs; 2338 2339 xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL); 2340 2341 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2342 if (!xfs_kset) { 2343 error = -ENOMEM; 2344 goto out_debugfs_unregister; 2345 } 2346 2347 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2348 2349 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2350 if (!xfsstats.xs_stats) { 2351 error = -ENOMEM; 2352 goto out_kset_unregister; 2353 } 2354 2355 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2356 "stats"); 2357 if (error) 2358 goto out_free_stats; 2359 2360 error = xchk_global_stats_setup(xfs_debugfs); 2361 if (error) 2362 goto out_remove_stats_kobj; 2363 2364 #ifdef DEBUG 2365 xfs_dbg_kobj.kobject.kset = xfs_kset; 2366 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2367 if (error) 2368 goto out_remove_scrub_stats; 2369 #endif 2370 2371 error = xfs_qm_init(); 2372 if (error) 2373 goto out_remove_dbg_kobj; 2374 2375 error = register_filesystem(&xfs_fs_type); 2376 if (error) 2377 goto out_qm_exit; 2378 return 0; 2379 2380 out_qm_exit: 2381 xfs_qm_exit(); 2382 out_remove_dbg_kobj: 2383 #ifdef DEBUG 2384 xfs_sysfs_del(&xfs_dbg_kobj); 2385 out_remove_scrub_stats: 2386 #endif 2387 xchk_global_stats_teardown(); 2388 out_remove_stats_kobj: 2389 xfs_sysfs_del(&xfsstats.xs_kobj); 2390 out_free_stats: 2391 free_percpu(xfsstats.xs_stats); 2392 out_kset_unregister: 2393 kset_unregister(xfs_kset); 2394 out_debugfs_unregister: 2395 debugfs_remove(xfs_debugfs); 2396 xfs_sysctl_unregister(); 2397 out_cleanup_procfs: 2398 xfs_cleanup_procfs(); 2399 out_mru_cache_uninit: 2400 xfs_mru_cache_uninit(); 2401 out_destroy_wq: 2402 xfs_destroy_workqueues(); 2403 out_destroy_caches: 2404 xfs_destroy_caches(); 2405 out: 2406 return error; 2407 } 2408 2409 STATIC void __exit 2410 exit_xfs_fs(void) 2411 { 2412 xfs_qm_exit(); 2413 unregister_filesystem(&xfs_fs_type); 2414 #ifdef DEBUG 2415 xfs_sysfs_del(&xfs_dbg_kobj); 2416 #endif 2417 xchk_global_stats_teardown(); 2418 xfs_sysfs_del(&xfsstats.xs_kobj); 2419 free_percpu(xfsstats.xs_stats); 2420 kset_unregister(xfs_kset); 2421 debugfs_remove(xfs_debugfs); 2422 xfs_sysctl_unregister(); 2423 xfs_cleanup_procfs(); 2424 xfs_mru_cache_uninit(); 2425 xfs_destroy_workqueues(); 2426 xfs_destroy_caches(); 2427 xfs_uuid_table_free(); 2428 } 2429 2430 module_init(init_xfs_fs); 2431 module_exit(exit_xfs_fs); 2432 2433 MODULE_AUTHOR("Silicon Graphics, Inc."); 2434 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2435 MODULE_LICENSE("GPL"); 2436