1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 41 #include <linux/magic.h> 42 #include <linux/fs_context.h> 43 #include <linux/fs_parser.h> 44 45 static const struct super_operations xfs_super_operations; 46 47 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 48 #ifdef DEBUG 49 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 50 #endif 51 52 enum xfs_dax_mode { 53 XFS_DAX_INODE = 0, 54 XFS_DAX_ALWAYS = 1, 55 XFS_DAX_NEVER = 2, 56 }; 57 58 static void 59 xfs_mount_set_dax_mode( 60 struct xfs_mount *mp, 61 enum xfs_dax_mode mode) 62 { 63 switch (mode) { 64 case XFS_DAX_INODE: 65 mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER); 66 break; 67 case XFS_DAX_ALWAYS: 68 mp->m_flags |= XFS_MOUNT_DAX_ALWAYS; 69 mp->m_flags &= ~XFS_MOUNT_DAX_NEVER; 70 break; 71 case XFS_DAX_NEVER: 72 mp->m_flags |= XFS_MOUNT_DAX_NEVER; 73 mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS; 74 break; 75 } 76 } 77 78 static const struct constant_table dax_param_enums[] = { 79 {"inode", XFS_DAX_INODE }, 80 {"always", XFS_DAX_ALWAYS }, 81 {"never", XFS_DAX_NEVER }, 82 {} 83 }; 84 85 /* 86 * Table driven mount option parser. 87 */ 88 enum { 89 Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 90 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 91 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 92 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, 93 Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, 94 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 95 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 96 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 97 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, 98 }; 99 100 static const struct fs_parameter_spec xfs_fs_parameters[] = { 101 fsparam_u32("logbufs", Opt_logbufs), 102 fsparam_string("logbsize", Opt_logbsize), 103 fsparam_string("logdev", Opt_logdev), 104 fsparam_string("rtdev", Opt_rtdev), 105 fsparam_flag("wsync", Opt_wsync), 106 fsparam_flag("noalign", Opt_noalign), 107 fsparam_flag("swalloc", Opt_swalloc), 108 fsparam_u32("sunit", Opt_sunit), 109 fsparam_u32("swidth", Opt_swidth), 110 fsparam_flag("nouuid", Opt_nouuid), 111 fsparam_flag("grpid", Opt_grpid), 112 fsparam_flag("nogrpid", Opt_nogrpid), 113 fsparam_flag("bsdgroups", Opt_bsdgroups), 114 fsparam_flag("sysvgroups", Opt_sysvgroups), 115 fsparam_string("allocsize", Opt_allocsize), 116 fsparam_flag("norecovery", Opt_norecovery), 117 fsparam_flag("inode64", Opt_inode64), 118 fsparam_flag("inode32", Opt_inode32), 119 fsparam_flag("ikeep", Opt_ikeep), 120 fsparam_flag("noikeep", Opt_noikeep), 121 fsparam_flag("largeio", Opt_largeio), 122 fsparam_flag("nolargeio", Opt_nolargeio), 123 fsparam_flag("attr2", Opt_attr2), 124 fsparam_flag("noattr2", Opt_noattr2), 125 fsparam_flag("filestreams", Opt_filestreams), 126 fsparam_flag("quota", Opt_quota), 127 fsparam_flag("noquota", Opt_noquota), 128 fsparam_flag("usrquota", Opt_usrquota), 129 fsparam_flag("grpquota", Opt_grpquota), 130 fsparam_flag("prjquota", Opt_prjquota), 131 fsparam_flag("uquota", Opt_uquota), 132 fsparam_flag("gquota", Opt_gquota), 133 fsparam_flag("pquota", Opt_pquota), 134 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 135 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 136 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 137 fsparam_flag("qnoenforce", Opt_qnoenforce), 138 fsparam_flag("discard", Opt_discard), 139 fsparam_flag("nodiscard", Opt_nodiscard), 140 fsparam_flag("dax", Opt_dax), 141 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 142 {} 143 }; 144 145 struct proc_xfs_info { 146 uint64_t flag; 147 char *str; 148 }; 149 150 static int 151 xfs_fs_show_options( 152 struct seq_file *m, 153 struct dentry *root) 154 { 155 static struct proc_xfs_info xfs_info_set[] = { 156 /* the few simple ones we can get from the mount struct */ 157 { XFS_MOUNT_IKEEP, ",ikeep" }, 158 { XFS_MOUNT_WSYNC, ",wsync" }, 159 { XFS_MOUNT_NOALIGN, ",noalign" }, 160 { XFS_MOUNT_SWALLOC, ",swalloc" }, 161 { XFS_MOUNT_NOUUID, ",nouuid" }, 162 { XFS_MOUNT_NORECOVERY, ",norecovery" }, 163 { XFS_MOUNT_ATTR2, ",attr2" }, 164 { XFS_MOUNT_FILESTREAMS, ",filestreams" }, 165 { XFS_MOUNT_GRPID, ",grpid" }, 166 { XFS_MOUNT_DISCARD, ",discard" }, 167 { XFS_MOUNT_LARGEIO, ",largeio" }, 168 { XFS_MOUNT_DAX_ALWAYS, ",dax=always" }, 169 { XFS_MOUNT_DAX_NEVER, ",dax=never" }, 170 { 0, NULL } 171 }; 172 struct xfs_mount *mp = XFS_M(root->d_sb); 173 struct proc_xfs_info *xfs_infop; 174 175 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 176 if (mp->m_flags & xfs_infop->flag) 177 seq_puts(m, xfs_infop->str); 178 } 179 180 seq_printf(m, ",inode%d", 181 (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64); 182 183 if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) 184 seq_printf(m, ",allocsize=%dk", 185 (1 << mp->m_allocsize_log) >> 10); 186 187 if (mp->m_logbufs > 0) 188 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 189 if (mp->m_logbsize > 0) 190 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 191 192 if (mp->m_logname) 193 seq_show_option(m, "logdev", mp->m_logname); 194 if (mp->m_rtname) 195 seq_show_option(m, "rtdev", mp->m_rtname); 196 197 if (mp->m_dalign > 0) 198 seq_printf(m, ",sunit=%d", 199 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 200 if (mp->m_swidth > 0) 201 seq_printf(m, ",swidth=%d", 202 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 203 204 if (mp->m_qflags & XFS_UQUOTA_ACCT) { 205 if (mp->m_qflags & XFS_UQUOTA_ENFD) 206 seq_puts(m, ",usrquota"); 207 else 208 seq_puts(m, ",uqnoenforce"); 209 } 210 211 if (mp->m_qflags & XFS_PQUOTA_ACCT) { 212 if (mp->m_qflags & XFS_PQUOTA_ENFD) 213 seq_puts(m, ",prjquota"); 214 else 215 seq_puts(m, ",pqnoenforce"); 216 } 217 if (mp->m_qflags & XFS_GQUOTA_ACCT) { 218 if (mp->m_qflags & XFS_GQUOTA_ENFD) 219 seq_puts(m, ",grpquota"); 220 else 221 seq_puts(m, ",gqnoenforce"); 222 } 223 224 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 225 seq_puts(m, ",noquota"); 226 227 return 0; 228 } 229 230 /* 231 * Set parameters for inode allocation heuristics, taking into account 232 * filesystem size and inode32/inode64 mount options; i.e. specifically 233 * whether or not XFS_MOUNT_SMALL_INUMS is set. 234 * 235 * Inode allocation patterns are altered only if inode32 is requested 236 * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large. 237 * If altered, XFS_MOUNT_32BITINODES is set as well. 238 * 239 * An agcount independent of that in the mount structure is provided 240 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 241 * to the potentially higher ag count. 242 * 243 * Returns the maximum AG index which may contain inodes. 244 */ 245 xfs_agnumber_t 246 xfs_set_inode_alloc( 247 struct xfs_mount *mp, 248 xfs_agnumber_t agcount) 249 { 250 xfs_agnumber_t index; 251 xfs_agnumber_t maxagi = 0; 252 xfs_sb_t *sbp = &mp->m_sb; 253 xfs_agnumber_t max_metadata; 254 xfs_agino_t agino; 255 xfs_ino_t ino; 256 257 /* 258 * Calculate how much should be reserved for inodes to meet 259 * the max inode percentage. Used only for inode32. 260 */ 261 if (M_IGEO(mp)->maxicount) { 262 uint64_t icount; 263 264 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 265 do_div(icount, 100); 266 icount += sbp->sb_agblocks - 1; 267 do_div(icount, sbp->sb_agblocks); 268 max_metadata = icount; 269 } else { 270 max_metadata = agcount; 271 } 272 273 /* Get the last possible inode in the filesystem */ 274 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 275 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 276 277 /* 278 * If user asked for no more than 32-bit inodes, and the fs is 279 * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter 280 * the allocator to accommodate the request. 281 */ 282 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) 283 mp->m_flags |= XFS_MOUNT_32BITINODES; 284 else 285 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 286 287 for (index = 0; index < agcount; index++) { 288 struct xfs_perag *pag; 289 290 ino = XFS_AGINO_TO_INO(mp, index, agino); 291 292 pag = xfs_perag_get(mp, index); 293 294 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 295 if (ino > XFS_MAXINUMBER_32) { 296 pag->pagi_inodeok = 0; 297 pag->pagf_metadata = 0; 298 } else { 299 pag->pagi_inodeok = 1; 300 maxagi++; 301 if (index < max_metadata) 302 pag->pagf_metadata = 1; 303 else 304 pag->pagf_metadata = 0; 305 } 306 } else { 307 pag->pagi_inodeok = 1; 308 pag->pagf_metadata = 0; 309 } 310 311 xfs_perag_put(pag); 312 } 313 314 return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount; 315 } 316 317 STATIC int 318 xfs_blkdev_get( 319 xfs_mount_t *mp, 320 const char *name, 321 struct block_device **bdevp) 322 { 323 int error = 0; 324 325 *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 326 mp); 327 if (IS_ERR(*bdevp)) { 328 error = PTR_ERR(*bdevp); 329 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 330 } 331 332 return error; 333 } 334 335 STATIC void 336 xfs_blkdev_put( 337 struct block_device *bdev) 338 { 339 if (bdev) 340 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 341 } 342 343 void 344 xfs_blkdev_issue_flush( 345 xfs_buftarg_t *buftarg) 346 { 347 blkdev_issue_flush(buftarg->bt_bdev); 348 } 349 350 STATIC void 351 xfs_close_devices( 352 struct xfs_mount *mp) 353 { 354 struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev; 355 356 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 357 struct block_device *logdev = mp->m_logdev_targp->bt_bdev; 358 struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev; 359 360 xfs_free_buftarg(mp->m_logdev_targp); 361 xfs_blkdev_put(logdev); 362 fs_put_dax(dax_logdev); 363 } 364 if (mp->m_rtdev_targp) { 365 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; 366 struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev; 367 368 xfs_free_buftarg(mp->m_rtdev_targp); 369 xfs_blkdev_put(rtdev); 370 fs_put_dax(dax_rtdev); 371 } 372 xfs_free_buftarg(mp->m_ddev_targp); 373 fs_put_dax(dax_ddev); 374 } 375 376 /* 377 * The file system configurations are: 378 * (1) device (partition) with data and internal log 379 * (2) logical volume with data and log subvolumes. 380 * (3) logical volume with data, log, and realtime subvolumes. 381 * 382 * We only have to handle opening the log and realtime volumes here if 383 * they are present. The data subvolume has already been opened by 384 * get_sb_bdev() and is stored in sb->s_bdev. 385 */ 386 STATIC int 387 xfs_open_devices( 388 struct xfs_mount *mp) 389 { 390 struct block_device *ddev = mp->m_super->s_bdev; 391 struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev); 392 struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL; 393 struct block_device *logdev = NULL, *rtdev = NULL; 394 int error; 395 396 /* 397 * Open real time and log devices - order is important. 398 */ 399 if (mp->m_logname) { 400 error = xfs_blkdev_get(mp, mp->m_logname, &logdev); 401 if (error) 402 goto out; 403 dax_logdev = fs_dax_get_by_bdev(logdev); 404 } 405 406 if (mp->m_rtname) { 407 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); 408 if (error) 409 goto out_close_logdev; 410 411 if (rtdev == ddev || rtdev == logdev) { 412 xfs_warn(mp, 413 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 414 error = -EINVAL; 415 goto out_close_rtdev; 416 } 417 dax_rtdev = fs_dax_get_by_bdev(rtdev); 418 } 419 420 /* 421 * Setup xfs_mount buffer target pointers 422 */ 423 error = -ENOMEM; 424 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev); 425 if (!mp->m_ddev_targp) 426 goto out_close_rtdev; 427 428 if (rtdev) { 429 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev); 430 if (!mp->m_rtdev_targp) 431 goto out_free_ddev_targ; 432 } 433 434 if (logdev && logdev != ddev) { 435 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev); 436 if (!mp->m_logdev_targp) 437 goto out_free_rtdev_targ; 438 } else { 439 mp->m_logdev_targp = mp->m_ddev_targp; 440 } 441 442 return 0; 443 444 out_free_rtdev_targ: 445 if (mp->m_rtdev_targp) 446 xfs_free_buftarg(mp->m_rtdev_targp); 447 out_free_ddev_targ: 448 xfs_free_buftarg(mp->m_ddev_targp); 449 out_close_rtdev: 450 xfs_blkdev_put(rtdev); 451 fs_put_dax(dax_rtdev); 452 out_close_logdev: 453 if (logdev && logdev != ddev) { 454 xfs_blkdev_put(logdev); 455 fs_put_dax(dax_logdev); 456 } 457 out: 458 fs_put_dax(dax_ddev); 459 return error; 460 } 461 462 /* 463 * Setup xfs_mount buffer target pointers based on superblock 464 */ 465 STATIC int 466 xfs_setup_devices( 467 struct xfs_mount *mp) 468 { 469 int error; 470 471 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); 472 if (error) 473 return error; 474 475 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 476 unsigned int log_sector_size = BBSIZE; 477 478 if (xfs_sb_version_hassector(&mp->m_sb)) 479 log_sector_size = mp->m_sb.sb_logsectsize; 480 error = xfs_setsize_buftarg(mp->m_logdev_targp, 481 log_sector_size); 482 if (error) 483 return error; 484 } 485 if (mp->m_rtdev_targp) { 486 error = xfs_setsize_buftarg(mp->m_rtdev_targp, 487 mp->m_sb.sb_sectsize); 488 if (error) 489 return error; 490 } 491 492 return 0; 493 } 494 495 STATIC int 496 xfs_init_mount_workqueues( 497 struct xfs_mount *mp) 498 { 499 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 500 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 501 1, mp->m_super->s_id); 502 if (!mp->m_buf_workqueue) 503 goto out; 504 505 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 506 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 507 0, mp->m_super->s_id); 508 if (!mp->m_unwritten_workqueue) 509 goto out_destroy_buf; 510 511 mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", 512 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND), 513 0, mp->m_super->s_id); 514 if (!mp->m_cil_workqueue) 515 goto out_destroy_unwritten; 516 517 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 518 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 519 0, mp->m_super->s_id); 520 if (!mp->m_reclaim_workqueue) 521 goto out_destroy_cil; 522 523 mp->m_gc_workqueue = alloc_workqueue("xfs-gc/%s", 524 WQ_SYSFS | WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM, 525 0, mp->m_super->s_id); 526 if (!mp->m_gc_workqueue) 527 goto out_destroy_reclaim; 528 529 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 530 XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); 531 if (!mp->m_sync_workqueue) 532 goto out_destroy_eofb; 533 534 return 0; 535 536 out_destroy_eofb: 537 destroy_workqueue(mp->m_gc_workqueue); 538 out_destroy_reclaim: 539 destroy_workqueue(mp->m_reclaim_workqueue); 540 out_destroy_cil: 541 destroy_workqueue(mp->m_cil_workqueue); 542 out_destroy_unwritten: 543 destroy_workqueue(mp->m_unwritten_workqueue); 544 out_destroy_buf: 545 destroy_workqueue(mp->m_buf_workqueue); 546 out: 547 return -ENOMEM; 548 } 549 550 STATIC void 551 xfs_destroy_mount_workqueues( 552 struct xfs_mount *mp) 553 { 554 destroy_workqueue(mp->m_sync_workqueue); 555 destroy_workqueue(mp->m_gc_workqueue); 556 destroy_workqueue(mp->m_reclaim_workqueue); 557 destroy_workqueue(mp->m_cil_workqueue); 558 destroy_workqueue(mp->m_unwritten_workqueue); 559 destroy_workqueue(mp->m_buf_workqueue); 560 } 561 562 static void 563 xfs_flush_inodes_worker( 564 struct work_struct *work) 565 { 566 struct xfs_mount *mp = container_of(work, struct xfs_mount, 567 m_flush_inodes_work); 568 struct super_block *sb = mp->m_super; 569 570 if (down_read_trylock(&sb->s_umount)) { 571 sync_inodes_sb(sb); 572 up_read(&sb->s_umount); 573 } 574 } 575 576 /* 577 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 578 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 579 * for IO to complete so that we effectively throttle multiple callers to the 580 * rate at which IO is completing. 581 */ 582 void 583 xfs_flush_inodes( 584 struct xfs_mount *mp) 585 { 586 /* 587 * If flush_work() returns true then that means we waited for a flush 588 * which was already in progress. Don't bother running another scan. 589 */ 590 if (flush_work(&mp->m_flush_inodes_work)) 591 return; 592 593 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 594 flush_work(&mp->m_flush_inodes_work); 595 } 596 597 /* Catch misguided souls that try to use this interface on XFS */ 598 STATIC struct inode * 599 xfs_fs_alloc_inode( 600 struct super_block *sb) 601 { 602 BUG(); 603 return NULL; 604 } 605 606 #ifdef DEBUG 607 static void 608 xfs_check_delalloc( 609 struct xfs_inode *ip, 610 int whichfork) 611 { 612 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 613 struct xfs_bmbt_irec got; 614 struct xfs_iext_cursor icur; 615 616 if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) 617 return; 618 do { 619 if (isnullstartblock(got.br_startblock)) { 620 xfs_warn(ip->i_mount, 621 "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", 622 ip->i_ino, 623 whichfork == XFS_DATA_FORK ? "data" : "cow", 624 got.br_startoff, got.br_blockcount); 625 } 626 } while (xfs_iext_next_extent(ifp, &icur, &got)); 627 } 628 #else 629 #define xfs_check_delalloc(ip, whichfork) do { } while (0) 630 #endif 631 632 /* 633 * Now that the generic code is guaranteed not to be accessing 634 * the linux inode, we can inactivate and reclaim the inode. 635 */ 636 STATIC void 637 xfs_fs_destroy_inode( 638 struct inode *inode) 639 { 640 struct xfs_inode *ip = XFS_I(inode); 641 642 trace_xfs_destroy_inode(ip); 643 644 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 645 XFS_STATS_INC(ip->i_mount, vn_rele); 646 XFS_STATS_INC(ip->i_mount, vn_remove); 647 648 xfs_inactive(ip); 649 650 if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) { 651 xfs_check_delalloc(ip, XFS_DATA_FORK); 652 xfs_check_delalloc(ip, XFS_COW_FORK); 653 ASSERT(0); 654 } 655 656 XFS_STATS_INC(ip->i_mount, vn_reclaim); 657 658 /* 659 * We should never get here with one of the reclaim flags already set. 660 */ 661 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); 662 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 663 664 /* 665 * We always use background reclaim here because even if the inode is 666 * clean, it still may be under IO and hence we have wait for IO 667 * completion to occur before we can reclaim the inode. The background 668 * reclaim path handles this more efficiently than we can here, so 669 * simply let background reclaim tear down all inodes. 670 */ 671 xfs_inode_set_reclaim_tag(ip); 672 } 673 674 static void 675 xfs_fs_dirty_inode( 676 struct inode *inode, 677 int flag) 678 { 679 struct xfs_inode *ip = XFS_I(inode); 680 struct xfs_mount *mp = ip->i_mount; 681 struct xfs_trans *tp; 682 683 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 684 return; 685 if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) 686 return; 687 688 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) 689 return; 690 xfs_ilock(ip, XFS_ILOCK_EXCL); 691 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 692 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 693 xfs_trans_commit(tp); 694 } 695 696 /* 697 * Slab object creation initialisation for the XFS inode. 698 * This covers only the idempotent fields in the XFS inode; 699 * all other fields need to be initialised on allocation 700 * from the slab. This avoids the need to repeatedly initialise 701 * fields in the xfs inode that left in the initialise state 702 * when freeing the inode. 703 */ 704 STATIC void 705 xfs_fs_inode_init_once( 706 void *inode) 707 { 708 struct xfs_inode *ip = inode; 709 710 memset(ip, 0, sizeof(struct xfs_inode)); 711 712 /* vfs inode */ 713 inode_init_once(VFS_I(ip)); 714 715 /* xfs inode */ 716 atomic_set(&ip->i_pincount, 0); 717 spin_lock_init(&ip->i_flags_lock); 718 719 mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 720 "xfsino", ip->i_ino); 721 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 722 "xfsino", ip->i_ino); 723 } 724 725 /* 726 * We do an unlocked check for XFS_IDONTCACHE here because we are already 727 * serialised against cache hits here via the inode->i_lock and igrab() in 728 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 729 * racing with us, and it avoids needing to grab a spinlock here for every inode 730 * we drop the final reference on. 731 */ 732 STATIC int 733 xfs_fs_drop_inode( 734 struct inode *inode) 735 { 736 struct xfs_inode *ip = XFS_I(inode); 737 738 /* 739 * If this unlinked inode is in the middle of recovery, don't 740 * drop the inode just yet; log recovery will take care of 741 * that. See the comment for this inode flag. 742 */ 743 if (ip->i_flags & XFS_IRECOVERY) { 744 ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED); 745 return 0; 746 } 747 748 return generic_drop_inode(inode); 749 } 750 751 static void 752 xfs_mount_free( 753 struct xfs_mount *mp) 754 { 755 kfree(mp->m_rtname); 756 kfree(mp->m_logname); 757 kmem_free(mp); 758 } 759 760 STATIC int 761 xfs_fs_sync_fs( 762 struct super_block *sb, 763 int wait) 764 { 765 struct xfs_mount *mp = XFS_M(sb); 766 767 /* 768 * Doing anything during the async pass would be counterproductive. 769 */ 770 if (!wait) 771 return 0; 772 773 xfs_log_force(mp, XFS_LOG_SYNC); 774 if (laptop_mode) { 775 /* 776 * The disk must be active because we're syncing. 777 * We schedule log work now (now that the disk is 778 * active) instead of later (when it might not be). 779 */ 780 flush_delayed_work(&mp->m_log->l_work); 781 } 782 783 return 0; 784 } 785 786 STATIC int 787 xfs_fs_statfs( 788 struct dentry *dentry, 789 struct kstatfs *statp) 790 { 791 struct xfs_mount *mp = XFS_M(dentry->d_sb); 792 xfs_sb_t *sbp = &mp->m_sb; 793 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 794 uint64_t fakeinos, id; 795 uint64_t icount; 796 uint64_t ifree; 797 uint64_t fdblocks; 798 xfs_extlen_t lsize; 799 int64_t ffree; 800 801 statp->f_type = XFS_SUPER_MAGIC; 802 statp->f_namelen = MAXNAMELEN - 1; 803 804 id = huge_encode_dev(mp->m_ddev_targp->bt_dev); 805 statp->f_fsid = u64_to_fsid(id); 806 807 icount = percpu_counter_sum(&mp->m_icount); 808 ifree = percpu_counter_sum(&mp->m_ifree); 809 fdblocks = percpu_counter_sum(&mp->m_fdblocks); 810 811 spin_lock(&mp->m_sb_lock); 812 statp->f_bsize = sbp->sb_blocksize; 813 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 814 statp->f_blocks = sbp->sb_dblocks - lsize; 815 spin_unlock(&mp->m_sb_lock); 816 817 /* make sure statp->f_bfree does not underflow */ 818 statp->f_bfree = max_t(int64_t, fdblocks - mp->m_alloc_set_aside, 0); 819 statp->f_bavail = statp->f_bfree; 820 821 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 822 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 823 if (M_IGEO(mp)->maxicount) 824 statp->f_files = min_t(typeof(statp->f_files), 825 statp->f_files, 826 M_IGEO(mp)->maxicount); 827 828 /* If sb_icount overshot maxicount, report actual allocation */ 829 statp->f_files = max_t(typeof(statp->f_files), 830 statp->f_files, 831 sbp->sb_icount); 832 833 /* make sure statp->f_ffree does not underflow */ 834 ffree = statp->f_files - (icount - ifree); 835 statp->f_ffree = max_t(int64_t, ffree, 0); 836 837 838 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 839 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 840 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 841 xfs_qm_statvfs(ip, statp); 842 843 if (XFS_IS_REALTIME_MOUNT(mp) && 844 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 845 statp->f_blocks = sbp->sb_rblocks; 846 statp->f_bavail = statp->f_bfree = 847 sbp->sb_frextents * sbp->sb_rextsize; 848 } 849 850 return 0; 851 } 852 853 STATIC void 854 xfs_save_resvblks(struct xfs_mount *mp) 855 { 856 uint64_t resblks = 0; 857 858 mp->m_resblks_save = mp->m_resblks; 859 xfs_reserve_blocks(mp, &resblks, NULL); 860 } 861 862 STATIC void 863 xfs_restore_resvblks(struct xfs_mount *mp) 864 { 865 uint64_t resblks; 866 867 if (mp->m_resblks_save) { 868 resblks = mp->m_resblks_save; 869 mp->m_resblks_save = 0; 870 } else 871 resblks = xfs_default_resblks(mp); 872 873 xfs_reserve_blocks(mp, &resblks, NULL); 874 } 875 876 /* 877 * Second stage of a freeze. The data is already frozen so we only 878 * need to take care of the metadata. Once that's done sync the superblock 879 * to the log to dirty it in case of a crash while frozen. This ensures that we 880 * will recover the unlinked inode lists on the next mount. 881 */ 882 STATIC int 883 xfs_fs_freeze( 884 struct super_block *sb) 885 { 886 struct xfs_mount *mp = XFS_M(sb); 887 unsigned int flags; 888 int ret; 889 890 /* 891 * The filesystem is now frozen far enough that memory reclaim 892 * cannot safely operate on the filesystem. Hence we need to 893 * set a GFP_NOFS context here to avoid recursion deadlocks. 894 */ 895 flags = memalloc_nofs_save(); 896 xfs_blockgc_stop(mp); 897 xfs_save_resvblks(mp); 898 ret = xfs_log_quiesce(mp); 899 memalloc_nofs_restore(flags); 900 return ret; 901 } 902 903 STATIC int 904 xfs_fs_unfreeze( 905 struct super_block *sb) 906 { 907 struct xfs_mount *mp = XFS_M(sb); 908 909 xfs_restore_resvblks(mp); 910 xfs_log_work_queue(mp); 911 xfs_blockgc_start(mp); 912 return 0; 913 } 914 915 /* 916 * This function fills in xfs_mount_t fields based on mount args. 917 * Note: the superblock _has_ now been read in. 918 */ 919 STATIC int 920 xfs_finish_flags( 921 struct xfs_mount *mp) 922 { 923 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); 924 925 /* Fail a mount where the logbuf is smaller than the log stripe */ 926 if (xfs_sb_version_haslogv2(&mp->m_sb)) { 927 if (mp->m_logbsize <= 0 && 928 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 929 mp->m_logbsize = mp->m_sb.sb_logsunit; 930 } else if (mp->m_logbsize > 0 && 931 mp->m_logbsize < mp->m_sb.sb_logsunit) { 932 xfs_warn(mp, 933 "logbuf size must be greater than or equal to log stripe size"); 934 return -EINVAL; 935 } 936 } else { 937 /* Fail a mount if the logbuf is larger than 32K */ 938 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 939 xfs_warn(mp, 940 "logbuf size for version 1 logs must be 16K or 32K"); 941 return -EINVAL; 942 } 943 } 944 945 /* 946 * V5 filesystems always use attr2 format for attributes. 947 */ 948 if (xfs_sb_version_hascrc(&mp->m_sb) && 949 (mp->m_flags & XFS_MOUNT_NOATTR2)) { 950 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " 951 "attr2 is always enabled for V5 filesystems."); 952 return -EINVAL; 953 } 954 955 /* 956 * mkfs'ed attr2 will turn on attr2 mount unless explicitly 957 * told by noattr2 to turn it off 958 */ 959 if (xfs_sb_version_hasattr2(&mp->m_sb) && 960 !(mp->m_flags & XFS_MOUNT_NOATTR2)) 961 mp->m_flags |= XFS_MOUNT_ATTR2; 962 963 /* 964 * prohibit r/w mounts of read-only filesystems 965 */ 966 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { 967 xfs_warn(mp, 968 "cannot mount a read-only filesystem as read-write"); 969 return -EROFS; 970 } 971 972 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && 973 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) && 974 !xfs_sb_version_has_pquotino(&mp->m_sb)) { 975 xfs_warn(mp, 976 "Super block does not support project and group quota together"); 977 return -EINVAL; 978 } 979 980 return 0; 981 } 982 983 static int 984 xfs_init_percpu_counters( 985 struct xfs_mount *mp) 986 { 987 int error; 988 989 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 990 if (error) 991 return -ENOMEM; 992 993 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 994 if (error) 995 goto free_icount; 996 997 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); 998 if (error) 999 goto free_ifree; 1000 1001 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1002 if (error) 1003 goto free_fdblocks; 1004 1005 return 0; 1006 1007 free_fdblocks: 1008 percpu_counter_destroy(&mp->m_fdblocks); 1009 free_ifree: 1010 percpu_counter_destroy(&mp->m_ifree); 1011 free_icount: 1012 percpu_counter_destroy(&mp->m_icount); 1013 return -ENOMEM; 1014 } 1015 1016 void 1017 xfs_reinit_percpu_counters( 1018 struct xfs_mount *mp) 1019 { 1020 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1021 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1022 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); 1023 } 1024 1025 static void 1026 xfs_destroy_percpu_counters( 1027 struct xfs_mount *mp) 1028 { 1029 percpu_counter_destroy(&mp->m_icount); 1030 percpu_counter_destroy(&mp->m_ifree); 1031 percpu_counter_destroy(&mp->m_fdblocks); 1032 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1033 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1034 percpu_counter_destroy(&mp->m_delalloc_blks); 1035 } 1036 1037 static void 1038 xfs_fs_put_super( 1039 struct super_block *sb) 1040 { 1041 struct xfs_mount *mp = XFS_M(sb); 1042 1043 /* if ->fill_super failed, we have no mount to tear down */ 1044 if (!sb->s_fs_info) 1045 return; 1046 1047 xfs_notice(mp, "Unmounting Filesystem"); 1048 xfs_filestream_unmount(mp); 1049 xfs_unmountfs(mp); 1050 1051 xfs_freesb(mp); 1052 free_percpu(mp->m_stats.xs_stats); 1053 xfs_destroy_percpu_counters(mp); 1054 xfs_destroy_mount_workqueues(mp); 1055 xfs_close_devices(mp); 1056 1057 sb->s_fs_info = NULL; 1058 xfs_mount_free(mp); 1059 } 1060 1061 static long 1062 xfs_fs_nr_cached_objects( 1063 struct super_block *sb, 1064 struct shrink_control *sc) 1065 { 1066 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1067 if (WARN_ON_ONCE(!sb->s_fs_info)) 1068 return 0; 1069 return xfs_reclaim_inodes_count(XFS_M(sb)); 1070 } 1071 1072 static long 1073 xfs_fs_free_cached_objects( 1074 struct super_block *sb, 1075 struct shrink_control *sc) 1076 { 1077 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1078 } 1079 1080 static const struct super_operations xfs_super_operations = { 1081 .alloc_inode = xfs_fs_alloc_inode, 1082 .destroy_inode = xfs_fs_destroy_inode, 1083 .dirty_inode = xfs_fs_dirty_inode, 1084 .drop_inode = xfs_fs_drop_inode, 1085 .put_super = xfs_fs_put_super, 1086 .sync_fs = xfs_fs_sync_fs, 1087 .freeze_fs = xfs_fs_freeze, 1088 .unfreeze_fs = xfs_fs_unfreeze, 1089 .statfs = xfs_fs_statfs, 1090 .show_options = xfs_fs_show_options, 1091 .nr_cached_objects = xfs_fs_nr_cached_objects, 1092 .free_cached_objects = xfs_fs_free_cached_objects, 1093 }; 1094 1095 static int 1096 suffix_kstrtoint( 1097 const char *s, 1098 unsigned int base, 1099 int *res) 1100 { 1101 int last, shift_left_factor = 0, _res; 1102 char *value; 1103 int ret = 0; 1104 1105 value = kstrdup(s, GFP_KERNEL); 1106 if (!value) 1107 return -ENOMEM; 1108 1109 last = strlen(value) - 1; 1110 if (value[last] == 'K' || value[last] == 'k') { 1111 shift_left_factor = 10; 1112 value[last] = '\0'; 1113 } 1114 if (value[last] == 'M' || value[last] == 'm') { 1115 shift_left_factor = 20; 1116 value[last] = '\0'; 1117 } 1118 if (value[last] == 'G' || value[last] == 'g') { 1119 shift_left_factor = 30; 1120 value[last] = '\0'; 1121 } 1122 1123 if (kstrtoint(value, base, &_res)) 1124 ret = -EINVAL; 1125 kfree(value); 1126 *res = _res << shift_left_factor; 1127 return ret; 1128 } 1129 1130 static inline void 1131 xfs_fs_warn_deprecated( 1132 struct fs_context *fc, 1133 struct fs_parameter *param, 1134 uint64_t flag, 1135 bool value) 1136 { 1137 /* Don't print the warning if reconfiguring and current mount point 1138 * already had the flag set 1139 */ 1140 if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && 1141 !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value) 1142 return; 1143 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1144 } 1145 1146 /* 1147 * Set mount state from a mount option. 1148 * 1149 * NOTE: mp->m_super is NULL here! 1150 */ 1151 static int 1152 xfs_fs_parse_param( 1153 struct fs_context *fc, 1154 struct fs_parameter *param) 1155 { 1156 struct xfs_mount *parsing_mp = fc->s_fs_info; 1157 struct fs_parse_result result; 1158 int size = 0; 1159 int opt; 1160 1161 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1162 if (opt < 0) 1163 return opt; 1164 1165 switch (opt) { 1166 case Opt_logbufs: 1167 parsing_mp->m_logbufs = result.uint_32; 1168 return 0; 1169 case Opt_logbsize: 1170 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1171 return -EINVAL; 1172 return 0; 1173 case Opt_logdev: 1174 kfree(parsing_mp->m_logname); 1175 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1176 if (!parsing_mp->m_logname) 1177 return -ENOMEM; 1178 return 0; 1179 case Opt_rtdev: 1180 kfree(parsing_mp->m_rtname); 1181 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1182 if (!parsing_mp->m_rtname) 1183 return -ENOMEM; 1184 return 0; 1185 case Opt_allocsize: 1186 if (suffix_kstrtoint(param->string, 10, &size)) 1187 return -EINVAL; 1188 parsing_mp->m_allocsize_log = ffs(size) - 1; 1189 parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE; 1190 return 0; 1191 case Opt_grpid: 1192 case Opt_bsdgroups: 1193 parsing_mp->m_flags |= XFS_MOUNT_GRPID; 1194 return 0; 1195 case Opt_nogrpid: 1196 case Opt_sysvgroups: 1197 parsing_mp->m_flags &= ~XFS_MOUNT_GRPID; 1198 return 0; 1199 case Opt_wsync: 1200 parsing_mp->m_flags |= XFS_MOUNT_WSYNC; 1201 return 0; 1202 case Opt_norecovery: 1203 parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY; 1204 return 0; 1205 case Opt_noalign: 1206 parsing_mp->m_flags |= XFS_MOUNT_NOALIGN; 1207 return 0; 1208 case Opt_swalloc: 1209 parsing_mp->m_flags |= XFS_MOUNT_SWALLOC; 1210 return 0; 1211 case Opt_sunit: 1212 parsing_mp->m_dalign = result.uint_32; 1213 return 0; 1214 case Opt_swidth: 1215 parsing_mp->m_swidth = result.uint_32; 1216 return 0; 1217 case Opt_inode32: 1218 parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 1219 return 0; 1220 case Opt_inode64: 1221 parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 1222 return 0; 1223 case Opt_nouuid: 1224 parsing_mp->m_flags |= XFS_MOUNT_NOUUID; 1225 return 0; 1226 case Opt_largeio: 1227 parsing_mp->m_flags |= XFS_MOUNT_LARGEIO; 1228 return 0; 1229 case Opt_nolargeio: 1230 parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO; 1231 return 0; 1232 case Opt_filestreams: 1233 parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS; 1234 return 0; 1235 case Opt_noquota: 1236 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1237 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1238 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; 1239 return 0; 1240 case Opt_quota: 1241 case Opt_uquota: 1242 case Opt_usrquota: 1243 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | 1244 XFS_UQUOTA_ENFD); 1245 return 0; 1246 case Opt_qnoenforce: 1247 case Opt_uqnoenforce: 1248 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); 1249 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1250 return 0; 1251 case Opt_pquota: 1252 case Opt_prjquota: 1253 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | 1254 XFS_PQUOTA_ENFD); 1255 return 0; 1256 case Opt_pqnoenforce: 1257 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); 1258 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1259 return 0; 1260 case Opt_gquota: 1261 case Opt_grpquota: 1262 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | 1263 XFS_GQUOTA_ENFD); 1264 return 0; 1265 case Opt_gqnoenforce: 1266 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 1267 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1268 return 0; 1269 case Opt_discard: 1270 parsing_mp->m_flags |= XFS_MOUNT_DISCARD; 1271 return 0; 1272 case Opt_nodiscard: 1273 parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD; 1274 return 0; 1275 #ifdef CONFIG_FS_DAX 1276 case Opt_dax: 1277 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1278 return 0; 1279 case Opt_dax_enum: 1280 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1281 return 0; 1282 #endif 1283 /* Following mount options will be removed in September 2025 */ 1284 case Opt_ikeep: 1285 xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true); 1286 parsing_mp->m_flags |= XFS_MOUNT_IKEEP; 1287 return 0; 1288 case Opt_noikeep: 1289 xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false); 1290 parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP; 1291 return 0; 1292 case Opt_attr2: 1293 xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true); 1294 parsing_mp->m_flags |= XFS_MOUNT_ATTR2; 1295 return 0; 1296 case Opt_noattr2: 1297 xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true); 1298 parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2; 1299 parsing_mp->m_flags |= XFS_MOUNT_NOATTR2; 1300 return 0; 1301 default: 1302 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1303 return -EINVAL; 1304 } 1305 1306 return 0; 1307 } 1308 1309 static int 1310 xfs_fs_validate_params( 1311 struct xfs_mount *mp) 1312 { 1313 /* 1314 * no recovery flag requires a read-only mount 1315 */ 1316 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && 1317 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 1318 xfs_warn(mp, "no-recovery mounts must be read-only."); 1319 return -EINVAL; 1320 } 1321 1322 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && 1323 (mp->m_dalign || mp->m_swidth)) { 1324 xfs_warn(mp, 1325 "sunit and swidth options incompatible with the noalign option"); 1326 return -EINVAL; 1327 } 1328 1329 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { 1330 xfs_warn(mp, "quota support not available in this kernel."); 1331 return -EINVAL; 1332 } 1333 1334 if ((mp->m_dalign && !mp->m_swidth) || 1335 (!mp->m_dalign && mp->m_swidth)) { 1336 xfs_warn(mp, "sunit and swidth must be specified together"); 1337 return -EINVAL; 1338 } 1339 1340 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1341 xfs_warn(mp, 1342 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1343 mp->m_swidth, mp->m_dalign); 1344 return -EINVAL; 1345 } 1346 1347 if (mp->m_logbufs != -1 && 1348 mp->m_logbufs != 0 && 1349 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1350 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1351 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1352 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1353 return -EINVAL; 1354 } 1355 1356 if (mp->m_logbsize != -1 && 1357 mp->m_logbsize != 0 && 1358 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1359 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1360 !is_power_of_2(mp->m_logbsize))) { 1361 xfs_warn(mp, 1362 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1363 mp->m_logbsize); 1364 return -EINVAL; 1365 } 1366 1367 if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) && 1368 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1369 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1370 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1371 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1372 return -EINVAL; 1373 } 1374 1375 return 0; 1376 } 1377 1378 static int 1379 xfs_fs_fill_super( 1380 struct super_block *sb, 1381 struct fs_context *fc) 1382 { 1383 struct xfs_mount *mp = sb->s_fs_info; 1384 struct inode *root; 1385 int flags = 0, error; 1386 1387 mp->m_super = sb; 1388 1389 error = xfs_fs_validate_params(mp); 1390 if (error) 1391 goto out_free_names; 1392 1393 sb_min_blocksize(sb, BBSIZE); 1394 sb->s_xattr = xfs_xattr_handlers; 1395 sb->s_export_op = &xfs_export_operations; 1396 #ifdef CONFIG_XFS_QUOTA 1397 sb->s_qcop = &xfs_quotactl_operations; 1398 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1399 #endif 1400 sb->s_op = &xfs_super_operations; 1401 1402 /* 1403 * Delay mount work if the debug hook is set. This is debug 1404 * instrumention to coordinate simulation of xfs mount failures with 1405 * VFS superblock operations 1406 */ 1407 if (xfs_globals.mount_delay) { 1408 xfs_notice(mp, "Delaying mount for %d seconds.", 1409 xfs_globals.mount_delay); 1410 msleep(xfs_globals.mount_delay * 1000); 1411 } 1412 1413 if (fc->sb_flags & SB_SILENT) 1414 flags |= XFS_MFSI_QUIET; 1415 1416 error = xfs_open_devices(mp); 1417 if (error) 1418 goto out_free_names; 1419 1420 error = xfs_init_mount_workqueues(mp); 1421 if (error) 1422 goto out_close_devices; 1423 1424 error = xfs_init_percpu_counters(mp); 1425 if (error) 1426 goto out_destroy_workqueues; 1427 1428 /* Allocate stats memory before we do operations that might use it */ 1429 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1430 if (!mp->m_stats.xs_stats) { 1431 error = -ENOMEM; 1432 goto out_destroy_counters; 1433 } 1434 1435 error = xfs_readsb(mp, flags); 1436 if (error) 1437 goto out_free_stats; 1438 1439 error = xfs_finish_flags(mp); 1440 if (error) 1441 goto out_free_sb; 1442 1443 error = xfs_setup_devices(mp); 1444 if (error) 1445 goto out_free_sb; 1446 1447 /* V4 support is undergoing deprecation. */ 1448 if (!xfs_sb_version_hascrc(&mp->m_sb)) { 1449 #ifdef CONFIG_XFS_SUPPORT_V4 1450 xfs_warn_once(mp, 1451 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1452 #else 1453 xfs_warn(mp, 1454 "Deprecated V4 format (crc=0) not supported by kernel."); 1455 error = -EINVAL; 1456 goto out_free_sb; 1457 #endif 1458 } 1459 1460 /* Filesystem claims it needs repair, so refuse the mount. */ 1461 if (xfs_sb_version_needsrepair(&mp->m_sb)) { 1462 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1463 error = -EFSCORRUPTED; 1464 goto out_free_sb; 1465 } 1466 1467 /* 1468 * Don't touch the filesystem if a user tool thinks it owns the primary 1469 * superblock. mkfs doesn't clear the flag from secondary supers, so 1470 * we don't check them at all. 1471 */ 1472 if (mp->m_sb.sb_inprogress) { 1473 xfs_warn(mp, "Offline file system operation in progress!"); 1474 error = -EFSCORRUPTED; 1475 goto out_free_sb; 1476 } 1477 1478 /* 1479 * Until this is fixed only page-sized or smaller data blocks work. 1480 */ 1481 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1482 xfs_warn(mp, 1483 "File system with blocksize %d bytes. " 1484 "Only pagesize (%ld) or less will currently work.", 1485 mp->m_sb.sb_blocksize, PAGE_SIZE); 1486 error = -ENOSYS; 1487 goto out_free_sb; 1488 } 1489 1490 /* Ensure this filesystem fits in the page cache limits */ 1491 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1492 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1493 xfs_warn(mp, 1494 "file system too large to be mounted on this system."); 1495 error = -EFBIG; 1496 goto out_free_sb; 1497 } 1498 1499 /* 1500 * XFS block mappings use 54 bits to store the logical block offset. 1501 * This should suffice to handle the maximum file size that the VFS 1502 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1503 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1504 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1505 * to check this assertion. 1506 * 1507 * Avoid integer overflow by comparing the maximum bmbt offset to the 1508 * maximum pagecache offset in units of fs blocks. 1509 */ 1510 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1511 xfs_warn(mp, 1512 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1513 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1514 XFS_MAX_FILEOFF); 1515 error = -EINVAL; 1516 goto out_free_sb; 1517 } 1518 1519 error = xfs_filestream_mount(mp); 1520 if (error) 1521 goto out_free_sb; 1522 1523 /* 1524 * we must configure the block size in the superblock before we run the 1525 * full mount process as the mount process can lookup and cache inodes. 1526 */ 1527 sb->s_magic = XFS_SUPER_MAGIC; 1528 sb->s_blocksize = mp->m_sb.sb_blocksize; 1529 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1530 sb->s_maxbytes = MAX_LFS_FILESIZE; 1531 sb->s_max_links = XFS_MAXLINK; 1532 sb->s_time_gran = 1; 1533 if (xfs_sb_version_hasbigtime(&mp->m_sb)) { 1534 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1535 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1536 } else { 1537 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1538 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1539 } 1540 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1541 sb->s_iflags |= SB_I_CGROUPWB; 1542 1543 set_posix_acl_flag(sb); 1544 1545 /* version 5 superblocks support inode version counters. */ 1546 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) 1547 sb->s_flags |= SB_I_VERSION; 1548 1549 if (xfs_sb_version_hasbigtime(&mp->m_sb)) 1550 xfs_warn(mp, 1551 "EXPERIMENTAL big timestamp feature in use. Use at your own risk!"); 1552 1553 if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) { 1554 bool rtdev_is_dax = false, datadev_is_dax; 1555 1556 xfs_warn(mp, 1557 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 1558 1559 datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, 1560 sb->s_blocksize); 1561 if (mp->m_rtdev_targp) 1562 rtdev_is_dax = bdev_dax_supported( 1563 mp->m_rtdev_targp->bt_bdev, sb->s_blocksize); 1564 if (!rtdev_is_dax && !datadev_is_dax) { 1565 xfs_alert(mp, 1566 "DAX unsupported by block device. Turning off DAX."); 1567 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 1568 } 1569 if (xfs_sb_version_hasreflink(&mp->m_sb)) { 1570 xfs_alert(mp, 1571 "DAX and reflink cannot be used together!"); 1572 error = -EINVAL; 1573 goto out_filestream_unmount; 1574 } 1575 } 1576 1577 if (mp->m_flags & XFS_MOUNT_DISCARD) { 1578 struct request_queue *q = bdev_get_queue(sb->s_bdev); 1579 1580 if (!blk_queue_discard(q)) { 1581 xfs_warn(mp, "mounting with \"discard\" option, but " 1582 "the device does not support discard"); 1583 mp->m_flags &= ~XFS_MOUNT_DISCARD; 1584 } 1585 } 1586 1587 if (xfs_sb_version_hasreflink(&mp->m_sb)) { 1588 if (mp->m_sb.sb_rblocks) { 1589 xfs_alert(mp, 1590 "reflink not compatible with realtime device!"); 1591 error = -EINVAL; 1592 goto out_filestream_unmount; 1593 } 1594 1595 if (xfs_globals.always_cow) { 1596 xfs_info(mp, "using DEBUG-only always_cow mode."); 1597 mp->m_always_cow = true; 1598 } 1599 } 1600 1601 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) { 1602 xfs_alert(mp, 1603 "reverse mapping btree not compatible with realtime device!"); 1604 error = -EINVAL; 1605 goto out_filestream_unmount; 1606 } 1607 1608 if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) 1609 xfs_warn(mp, 1610 "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!"); 1611 1612 error = xfs_mountfs(mp); 1613 if (error) 1614 goto out_filestream_unmount; 1615 1616 root = igrab(VFS_I(mp->m_rootip)); 1617 if (!root) { 1618 error = -ENOENT; 1619 goto out_unmount; 1620 } 1621 sb->s_root = d_make_root(root); 1622 if (!sb->s_root) { 1623 error = -ENOMEM; 1624 goto out_unmount; 1625 } 1626 1627 return 0; 1628 1629 out_filestream_unmount: 1630 xfs_filestream_unmount(mp); 1631 out_free_sb: 1632 xfs_freesb(mp); 1633 out_free_stats: 1634 free_percpu(mp->m_stats.xs_stats); 1635 out_destroy_counters: 1636 xfs_destroy_percpu_counters(mp); 1637 out_destroy_workqueues: 1638 xfs_destroy_mount_workqueues(mp); 1639 out_close_devices: 1640 xfs_close_devices(mp); 1641 out_free_names: 1642 sb->s_fs_info = NULL; 1643 xfs_mount_free(mp); 1644 return error; 1645 1646 out_unmount: 1647 xfs_filestream_unmount(mp); 1648 xfs_unmountfs(mp); 1649 goto out_free_sb; 1650 } 1651 1652 static int 1653 xfs_fs_get_tree( 1654 struct fs_context *fc) 1655 { 1656 return get_tree_bdev(fc, xfs_fs_fill_super); 1657 } 1658 1659 static int 1660 xfs_remount_rw( 1661 struct xfs_mount *mp) 1662 { 1663 struct xfs_sb *sbp = &mp->m_sb; 1664 int error; 1665 1666 if (mp->m_flags & XFS_MOUNT_NORECOVERY) { 1667 xfs_warn(mp, 1668 "ro->rw transition prohibited on norecovery mount"); 1669 return -EINVAL; 1670 } 1671 1672 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && 1673 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 1674 xfs_warn(mp, 1675 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 1676 (sbp->sb_features_ro_compat & 1677 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 1678 return -EINVAL; 1679 } 1680 1681 mp->m_flags &= ~XFS_MOUNT_RDONLY; 1682 1683 /* 1684 * If this is the first remount to writeable state we might have some 1685 * superblock changes to update. 1686 */ 1687 if (mp->m_update_sb) { 1688 error = xfs_sync_sb(mp, false); 1689 if (error) { 1690 xfs_warn(mp, "failed to write sb changes"); 1691 return error; 1692 } 1693 mp->m_update_sb = false; 1694 } 1695 1696 /* 1697 * Fill out the reserve pool if it is empty. Use the stashed value if 1698 * it is non-zero, otherwise go with the default. 1699 */ 1700 xfs_restore_resvblks(mp); 1701 xfs_log_work_queue(mp); 1702 1703 /* Recover any CoW blocks that never got remapped. */ 1704 error = xfs_reflink_recover_cow(mp); 1705 if (error) { 1706 xfs_err(mp, 1707 "Error %d recovering leftover CoW allocations.", error); 1708 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1709 return error; 1710 } 1711 xfs_blockgc_start(mp); 1712 1713 /* Create the per-AG metadata reservation pool .*/ 1714 error = xfs_fs_reserve_ag_blocks(mp); 1715 if (error && error != -ENOSPC) 1716 return error; 1717 1718 return 0; 1719 } 1720 1721 static int 1722 xfs_remount_ro( 1723 struct xfs_mount *mp) 1724 { 1725 int error; 1726 1727 /* 1728 * Cancel background eofb scanning so it cannot race with the final 1729 * log force+buftarg wait and deadlock the remount. 1730 */ 1731 xfs_blockgc_stop(mp); 1732 1733 /* Get rid of any leftover CoW reservations... */ 1734 error = xfs_blockgc_free_space(mp, NULL); 1735 if (error) { 1736 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1737 return error; 1738 } 1739 1740 /* Free the per-AG metadata reservation pool. */ 1741 error = xfs_fs_unreserve_ag_blocks(mp); 1742 if (error) { 1743 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1744 return error; 1745 } 1746 1747 /* 1748 * Before we sync the metadata, we need to free up the reserve block 1749 * pool so that the used block count in the superblock on disk is 1750 * correct at the end of the remount. Stash the current* reserve pool 1751 * size so that if we get remounted rw, we can return it to the same 1752 * size. 1753 */ 1754 xfs_save_resvblks(mp); 1755 1756 xfs_log_clean(mp); 1757 mp->m_flags |= XFS_MOUNT_RDONLY; 1758 1759 return 0; 1760 } 1761 1762 /* 1763 * Logically we would return an error here to prevent users from believing 1764 * they might have changed mount options using remount which can't be changed. 1765 * 1766 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 1767 * arguments in some cases so we can't blindly reject options, but have to 1768 * check for each specified option if it actually differs from the currently 1769 * set option and only reject it if that's the case. 1770 * 1771 * Until that is implemented we return success for every remount request, and 1772 * silently ignore all options that we can't actually change. 1773 */ 1774 static int 1775 xfs_fs_reconfigure( 1776 struct fs_context *fc) 1777 { 1778 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 1779 struct xfs_mount *new_mp = fc->s_fs_info; 1780 xfs_sb_t *sbp = &mp->m_sb; 1781 int flags = fc->sb_flags; 1782 int error; 1783 1784 /* version 5 superblocks always support version counters. */ 1785 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) 1786 fc->sb_flags |= SB_I_VERSION; 1787 1788 error = xfs_fs_validate_params(new_mp); 1789 if (error) 1790 return error; 1791 1792 sync_filesystem(mp->m_super); 1793 1794 /* inode32 -> inode64 */ 1795 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && 1796 !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { 1797 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 1798 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); 1799 } 1800 1801 /* inode64 -> inode32 */ 1802 if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) && 1803 (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { 1804 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 1805 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); 1806 } 1807 1808 /* ro -> rw */ 1809 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) { 1810 error = xfs_remount_rw(mp); 1811 if (error) 1812 return error; 1813 } 1814 1815 /* rw -> ro */ 1816 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) { 1817 error = xfs_remount_ro(mp); 1818 if (error) 1819 return error; 1820 } 1821 1822 return 0; 1823 } 1824 1825 static void xfs_fs_free( 1826 struct fs_context *fc) 1827 { 1828 struct xfs_mount *mp = fc->s_fs_info; 1829 1830 /* 1831 * mp is stored in the fs_context when it is initialized. 1832 * mp is transferred to the superblock on a successful mount, 1833 * but if an error occurs before the transfer we have to free 1834 * it here. 1835 */ 1836 if (mp) 1837 xfs_mount_free(mp); 1838 } 1839 1840 static const struct fs_context_operations xfs_context_ops = { 1841 .parse_param = xfs_fs_parse_param, 1842 .get_tree = xfs_fs_get_tree, 1843 .reconfigure = xfs_fs_reconfigure, 1844 .free = xfs_fs_free, 1845 }; 1846 1847 static int xfs_init_fs_context( 1848 struct fs_context *fc) 1849 { 1850 struct xfs_mount *mp; 1851 1852 mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO); 1853 if (!mp) 1854 return -ENOMEM; 1855 1856 spin_lock_init(&mp->m_sb_lock); 1857 spin_lock_init(&mp->m_agirotor_lock); 1858 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1859 spin_lock_init(&mp->m_perag_lock); 1860 mutex_init(&mp->m_growlock); 1861 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 1862 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1863 mp->m_kobj.kobject.kset = xfs_kset; 1864 /* 1865 * We don't create the finobt per-ag space reservation until after log 1866 * recovery, so we must set this to true so that an ifree transaction 1867 * started during log recovery will not depend on space reservations 1868 * for finobt expansion. 1869 */ 1870 mp->m_finobt_nores = true; 1871 1872 /* 1873 * These can be overridden by the mount option parsing. 1874 */ 1875 mp->m_logbufs = -1; 1876 mp->m_logbsize = -1; 1877 mp->m_allocsize_log = 16; /* 64k */ 1878 1879 /* 1880 * Copy binary VFS mount flags we are interested in. 1881 */ 1882 if (fc->sb_flags & SB_RDONLY) 1883 mp->m_flags |= XFS_MOUNT_RDONLY; 1884 if (fc->sb_flags & SB_DIRSYNC) 1885 mp->m_flags |= XFS_MOUNT_DIRSYNC; 1886 if (fc->sb_flags & SB_SYNCHRONOUS) 1887 mp->m_flags |= XFS_MOUNT_WSYNC; 1888 1889 fc->s_fs_info = mp; 1890 fc->ops = &xfs_context_ops; 1891 1892 return 0; 1893 } 1894 1895 static struct file_system_type xfs_fs_type = { 1896 .owner = THIS_MODULE, 1897 .name = "xfs", 1898 .init_fs_context = xfs_init_fs_context, 1899 .parameters = xfs_fs_parameters, 1900 .kill_sb = kill_block_super, 1901 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 1902 }; 1903 MODULE_ALIAS_FS("xfs"); 1904 1905 STATIC int __init 1906 xfs_init_zones(void) 1907 { 1908 xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket", 1909 sizeof(struct xlog_ticket), 1910 0, 0, NULL); 1911 if (!xfs_log_ticket_zone) 1912 goto out; 1913 1914 xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item", 1915 sizeof(struct xfs_extent_free_item), 1916 0, 0, NULL); 1917 if (!xfs_bmap_free_item_zone) 1918 goto out_destroy_log_ticket_zone; 1919 1920 xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur", 1921 sizeof(struct xfs_btree_cur), 1922 0, 0, NULL); 1923 if (!xfs_btree_cur_zone) 1924 goto out_destroy_bmap_free_item_zone; 1925 1926 xfs_da_state_zone = kmem_cache_create("xfs_da_state", 1927 sizeof(struct xfs_da_state), 1928 0, 0, NULL); 1929 if (!xfs_da_state_zone) 1930 goto out_destroy_btree_cur_zone; 1931 1932 xfs_ifork_zone = kmem_cache_create("xfs_ifork", 1933 sizeof(struct xfs_ifork), 1934 0, 0, NULL); 1935 if (!xfs_ifork_zone) 1936 goto out_destroy_da_state_zone; 1937 1938 xfs_trans_zone = kmem_cache_create("xfs_trans", 1939 sizeof(struct xfs_trans), 1940 0, 0, NULL); 1941 if (!xfs_trans_zone) 1942 goto out_destroy_ifork_zone; 1943 1944 1945 /* 1946 * The size of the zone allocated buf log item is the maximum 1947 * size possible under XFS. This wastes a little bit of memory, 1948 * but it is much faster. 1949 */ 1950 xfs_buf_item_zone = kmem_cache_create("xfs_buf_item", 1951 sizeof(struct xfs_buf_log_item), 1952 0, 0, NULL); 1953 if (!xfs_buf_item_zone) 1954 goto out_destroy_trans_zone; 1955 1956 xfs_efd_zone = kmem_cache_create("xfs_efd_item", 1957 (sizeof(struct xfs_efd_log_item) + 1958 (XFS_EFD_MAX_FAST_EXTENTS - 1) * 1959 sizeof(struct xfs_extent)), 1960 0, 0, NULL); 1961 if (!xfs_efd_zone) 1962 goto out_destroy_buf_item_zone; 1963 1964 xfs_efi_zone = kmem_cache_create("xfs_efi_item", 1965 (sizeof(struct xfs_efi_log_item) + 1966 (XFS_EFI_MAX_FAST_EXTENTS - 1) * 1967 sizeof(struct xfs_extent)), 1968 0, 0, NULL); 1969 if (!xfs_efi_zone) 1970 goto out_destroy_efd_zone; 1971 1972 xfs_inode_zone = kmem_cache_create("xfs_inode", 1973 sizeof(struct xfs_inode), 0, 1974 (SLAB_HWCACHE_ALIGN | 1975 SLAB_RECLAIM_ACCOUNT | 1976 SLAB_MEM_SPREAD | SLAB_ACCOUNT), 1977 xfs_fs_inode_init_once); 1978 if (!xfs_inode_zone) 1979 goto out_destroy_efi_zone; 1980 1981 xfs_ili_zone = kmem_cache_create("xfs_ili", 1982 sizeof(struct xfs_inode_log_item), 0, 1983 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, 1984 NULL); 1985 if (!xfs_ili_zone) 1986 goto out_destroy_inode_zone; 1987 1988 xfs_icreate_zone = kmem_cache_create("xfs_icr", 1989 sizeof(struct xfs_icreate_item), 1990 0, 0, NULL); 1991 if (!xfs_icreate_zone) 1992 goto out_destroy_ili_zone; 1993 1994 xfs_rud_zone = kmem_cache_create("xfs_rud_item", 1995 sizeof(struct xfs_rud_log_item), 1996 0, 0, NULL); 1997 if (!xfs_rud_zone) 1998 goto out_destroy_icreate_zone; 1999 2000 xfs_rui_zone = kmem_cache_create("xfs_rui_item", 2001 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2002 0, 0, NULL); 2003 if (!xfs_rui_zone) 2004 goto out_destroy_rud_zone; 2005 2006 xfs_cud_zone = kmem_cache_create("xfs_cud_item", 2007 sizeof(struct xfs_cud_log_item), 2008 0, 0, NULL); 2009 if (!xfs_cud_zone) 2010 goto out_destroy_rui_zone; 2011 2012 xfs_cui_zone = kmem_cache_create("xfs_cui_item", 2013 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2014 0, 0, NULL); 2015 if (!xfs_cui_zone) 2016 goto out_destroy_cud_zone; 2017 2018 xfs_bud_zone = kmem_cache_create("xfs_bud_item", 2019 sizeof(struct xfs_bud_log_item), 2020 0, 0, NULL); 2021 if (!xfs_bud_zone) 2022 goto out_destroy_cui_zone; 2023 2024 xfs_bui_zone = kmem_cache_create("xfs_bui_item", 2025 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2026 0, 0, NULL); 2027 if (!xfs_bui_zone) 2028 goto out_destroy_bud_zone; 2029 2030 return 0; 2031 2032 out_destroy_bud_zone: 2033 kmem_cache_destroy(xfs_bud_zone); 2034 out_destroy_cui_zone: 2035 kmem_cache_destroy(xfs_cui_zone); 2036 out_destroy_cud_zone: 2037 kmem_cache_destroy(xfs_cud_zone); 2038 out_destroy_rui_zone: 2039 kmem_cache_destroy(xfs_rui_zone); 2040 out_destroy_rud_zone: 2041 kmem_cache_destroy(xfs_rud_zone); 2042 out_destroy_icreate_zone: 2043 kmem_cache_destroy(xfs_icreate_zone); 2044 out_destroy_ili_zone: 2045 kmem_cache_destroy(xfs_ili_zone); 2046 out_destroy_inode_zone: 2047 kmem_cache_destroy(xfs_inode_zone); 2048 out_destroy_efi_zone: 2049 kmem_cache_destroy(xfs_efi_zone); 2050 out_destroy_efd_zone: 2051 kmem_cache_destroy(xfs_efd_zone); 2052 out_destroy_buf_item_zone: 2053 kmem_cache_destroy(xfs_buf_item_zone); 2054 out_destroy_trans_zone: 2055 kmem_cache_destroy(xfs_trans_zone); 2056 out_destroy_ifork_zone: 2057 kmem_cache_destroy(xfs_ifork_zone); 2058 out_destroy_da_state_zone: 2059 kmem_cache_destroy(xfs_da_state_zone); 2060 out_destroy_btree_cur_zone: 2061 kmem_cache_destroy(xfs_btree_cur_zone); 2062 out_destroy_bmap_free_item_zone: 2063 kmem_cache_destroy(xfs_bmap_free_item_zone); 2064 out_destroy_log_ticket_zone: 2065 kmem_cache_destroy(xfs_log_ticket_zone); 2066 out: 2067 return -ENOMEM; 2068 } 2069 2070 STATIC void 2071 xfs_destroy_zones(void) 2072 { 2073 /* 2074 * Make sure all delayed rcu free are flushed before we 2075 * destroy caches. 2076 */ 2077 rcu_barrier(); 2078 kmem_cache_destroy(xfs_bui_zone); 2079 kmem_cache_destroy(xfs_bud_zone); 2080 kmem_cache_destroy(xfs_cui_zone); 2081 kmem_cache_destroy(xfs_cud_zone); 2082 kmem_cache_destroy(xfs_rui_zone); 2083 kmem_cache_destroy(xfs_rud_zone); 2084 kmem_cache_destroy(xfs_icreate_zone); 2085 kmem_cache_destroy(xfs_ili_zone); 2086 kmem_cache_destroy(xfs_inode_zone); 2087 kmem_cache_destroy(xfs_efi_zone); 2088 kmem_cache_destroy(xfs_efd_zone); 2089 kmem_cache_destroy(xfs_buf_item_zone); 2090 kmem_cache_destroy(xfs_trans_zone); 2091 kmem_cache_destroy(xfs_ifork_zone); 2092 kmem_cache_destroy(xfs_da_state_zone); 2093 kmem_cache_destroy(xfs_btree_cur_zone); 2094 kmem_cache_destroy(xfs_bmap_free_item_zone); 2095 kmem_cache_destroy(xfs_log_ticket_zone); 2096 } 2097 2098 STATIC int __init 2099 xfs_init_workqueues(void) 2100 { 2101 /* 2102 * The allocation workqueue can be used in memory reclaim situations 2103 * (writepage path), and parallelism is only limited by the number of 2104 * AGs in all the filesystems mounted. Hence use the default large 2105 * max_active value for this workqueue. 2106 */ 2107 xfs_alloc_wq = alloc_workqueue("xfsalloc", 2108 XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); 2109 if (!xfs_alloc_wq) 2110 return -ENOMEM; 2111 2112 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2113 0); 2114 if (!xfs_discard_wq) 2115 goto out_free_alloc_wq; 2116 2117 return 0; 2118 out_free_alloc_wq: 2119 destroy_workqueue(xfs_alloc_wq); 2120 return -ENOMEM; 2121 } 2122 2123 STATIC void 2124 xfs_destroy_workqueues(void) 2125 { 2126 destroy_workqueue(xfs_discard_wq); 2127 destroy_workqueue(xfs_alloc_wq); 2128 } 2129 2130 STATIC int __init 2131 init_xfs_fs(void) 2132 { 2133 int error; 2134 2135 xfs_check_ondisk_structs(); 2136 2137 printk(KERN_INFO XFS_VERSION_STRING " with " 2138 XFS_BUILD_OPTIONS " enabled\n"); 2139 2140 xfs_dir_startup(); 2141 2142 error = xfs_init_zones(); 2143 if (error) 2144 goto out; 2145 2146 error = xfs_init_workqueues(); 2147 if (error) 2148 goto out_destroy_zones; 2149 2150 error = xfs_mru_cache_init(); 2151 if (error) 2152 goto out_destroy_wq; 2153 2154 error = xfs_buf_init(); 2155 if (error) 2156 goto out_mru_cache_uninit; 2157 2158 error = xfs_init_procfs(); 2159 if (error) 2160 goto out_buf_terminate; 2161 2162 error = xfs_sysctl_register(); 2163 if (error) 2164 goto out_cleanup_procfs; 2165 2166 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2167 if (!xfs_kset) { 2168 error = -ENOMEM; 2169 goto out_sysctl_unregister; 2170 } 2171 2172 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2173 2174 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2175 if (!xfsstats.xs_stats) { 2176 error = -ENOMEM; 2177 goto out_kset_unregister; 2178 } 2179 2180 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2181 "stats"); 2182 if (error) 2183 goto out_free_stats; 2184 2185 #ifdef DEBUG 2186 xfs_dbg_kobj.kobject.kset = xfs_kset; 2187 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2188 if (error) 2189 goto out_remove_stats_kobj; 2190 #endif 2191 2192 error = xfs_qm_init(); 2193 if (error) 2194 goto out_remove_dbg_kobj; 2195 2196 error = register_filesystem(&xfs_fs_type); 2197 if (error) 2198 goto out_qm_exit; 2199 return 0; 2200 2201 out_qm_exit: 2202 xfs_qm_exit(); 2203 out_remove_dbg_kobj: 2204 #ifdef DEBUG 2205 xfs_sysfs_del(&xfs_dbg_kobj); 2206 out_remove_stats_kobj: 2207 #endif 2208 xfs_sysfs_del(&xfsstats.xs_kobj); 2209 out_free_stats: 2210 free_percpu(xfsstats.xs_stats); 2211 out_kset_unregister: 2212 kset_unregister(xfs_kset); 2213 out_sysctl_unregister: 2214 xfs_sysctl_unregister(); 2215 out_cleanup_procfs: 2216 xfs_cleanup_procfs(); 2217 out_buf_terminate: 2218 xfs_buf_terminate(); 2219 out_mru_cache_uninit: 2220 xfs_mru_cache_uninit(); 2221 out_destroy_wq: 2222 xfs_destroy_workqueues(); 2223 out_destroy_zones: 2224 xfs_destroy_zones(); 2225 out: 2226 return error; 2227 } 2228 2229 STATIC void __exit 2230 exit_xfs_fs(void) 2231 { 2232 xfs_qm_exit(); 2233 unregister_filesystem(&xfs_fs_type); 2234 #ifdef DEBUG 2235 xfs_sysfs_del(&xfs_dbg_kobj); 2236 #endif 2237 xfs_sysfs_del(&xfsstats.xs_kobj); 2238 free_percpu(xfsstats.xs_stats); 2239 kset_unregister(xfs_kset); 2240 xfs_sysctl_unregister(); 2241 xfs_cleanup_procfs(); 2242 xfs_buf_terminate(); 2243 xfs_mru_cache_uninit(); 2244 xfs_destroy_workqueues(); 2245 xfs_destroy_zones(); 2246 xfs_uuid_table_free(); 2247 } 2248 2249 module_init(init_xfs_fs); 2250 module_exit(exit_xfs_fs); 2251 2252 MODULE_AUTHOR("Silicon Graphics, Inc."); 2253 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2254 MODULE_LICENSE("GPL"); 2255