1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_da_format.h" 15 #include "xfs_inode.h" 16 #include "xfs_btree.h" 17 #include "xfs_bmap.h" 18 #include "xfs_alloc.h" 19 #include "xfs_error.h" 20 #include "xfs_fsops.h" 21 #include "xfs_trans.h" 22 #include "xfs_buf_item.h" 23 #include "xfs_log.h" 24 #include "xfs_log_priv.h" 25 #include "xfs_da_btree.h" 26 #include "xfs_dir2.h" 27 #include "xfs_extfree_item.h" 28 #include "xfs_mru_cache.h" 29 #include "xfs_inode_item.h" 30 #include "xfs_icache.h" 31 #include "xfs_trace.h" 32 #include "xfs_icreate_item.h" 33 #include "xfs_filestream.h" 34 #include "xfs_quota.h" 35 #include "xfs_sysfs.h" 36 #include "xfs_ondisk.h" 37 #include "xfs_rmap_item.h" 38 #include "xfs_refcount_item.h" 39 #include "xfs_bmap_item.h" 40 #include "xfs_reflink.h" 41 42 #include <linux/namei.h> 43 #include <linux/dax.h> 44 #include <linux/init.h> 45 #include <linux/slab.h> 46 #include <linux/magic.h> 47 #include <linux/mount.h> 48 #include <linux/mempool.h> 49 #include <linux/writeback.h> 50 #include <linux/kthread.h> 51 #include <linux/freezer.h> 52 #include <linux/parser.h> 53 54 static const struct super_operations xfs_super_operations; 55 struct bio_set xfs_ioend_bioset; 56 57 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 58 #ifdef DEBUG 59 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 60 #endif 61 62 /* 63 * Table driven mount option parser. 64 */ 65 enum { 66 Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize, 67 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 68 Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 69 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, 70 Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, 71 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 72 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 73 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 74 Opt_discard, Opt_nodiscard, Opt_dax, Opt_err, 75 }; 76 77 static const match_table_t tokens = { 78 {Opt_logbufs, "logbufs=%u"}, /* number of XFS log buffers */ 79 {Opt_logbsize, "logbsize=%s"}, /* size of XFS log buffers */ 80 {Opt_logdev, "logdev=%s"}, /* log device */ 81 {Opt_rtdev, "rtdev=%s"}, /* realtime I/O device */ 82 {Opt_biosize, "biosize=%u"}, /* log2 of preferred buffered io size */ 83 {Opt_wsync, "wsync"}, /* safe-mode nfs compatible mount */ 84 {Opt_noalign, "noalign"}, /* turn off stripe alignment */ 85 {Opt_swalloc, "swalloc"}, /* turn on stripe width allocation */ 86 {Opt_sunit, "sunit=%u"}, /* data volume stripe unit */ 87 {Opt_swidth, "swidth=%u"}, /* data volume stripe width */ 88 {Opt_nouuid, "nouuid"}, /* ignore filesystem UUID */ 89 {Opt_mtpt, "mtpt"}, /* filesystem mount point */ 90 {Opt_grpid, "grpid"}, /* group-ID from parent directory */ 91 {Opt_nogrpid, "nogrpid"}, /* group-ID from current process */ 92 {Opt_bsdgroups, "bsdgroups"}, /* group-ID from parent directory */ 93 {Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */ 94 {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */ 95 {Opt_norecovery,"norecovery"}, /* don't run XFS recovery */ 96 {Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */ 97 {Opt_inode32, "inode32"}, /* inode allocation limited to 98 * XFS_MAXINUMBER_32 */ 99 {Opt_ikeep, "ikeep"}, /* do not free empty inode clusters */ 100 {Opt_noikeep, "noikeep"}, /* free empty inode clusters */ 101 {Opt_largeio, "largeio"}, /* report large I/O sizes in stat() */ 102 {Opt_nolargeio, "nolargeio"}, /* do not report large I/O sizes 103 * in stat(). */ 104 {Opt_attr2, "attr2"}, /* do use attr2 attribute format */ 105 {Opt_noattr2, "noattr2"}, /* do not use attr2 attribute format */ 106 {Opt_filestreams,"filestreams"},/* use filestreams allocator */ 107 {Opt_quota, "quota"}, /* disk quotas (user) */ 108 {Opt_noquota, "noquota"}, /* no quotas */ 109 {Opt_usrquota, "usrquota"}, /* user quota enabled */ 110 {Opt_grpquota, "grpquota"}, /* group quota enabled */ 111 {Opt_prjquota, "prjquota"}, /* project quota enabled */ 112 {Opt_uquota, "uquota"}, /* user quota (IRIX variant) */ 113 {Opt_gquota, "gquota"}, /* group quota (IRIX variant) */ 114 {Opt_pquota, "pquota"}, /* project quota (IRIX variant) */ 115 {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */ 116 {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */ 117 {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */ 118 {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */ 119 {Opt_discard, "discard"}, /* Discard unused blocks */ 120 {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ 121 {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ 122 {Opt_err, NULL}, 123 }; 124 125 126 STATIC int 127 suffix_kstrtoint(const substring_t *s, unsigned int base, int *res) 128 { 129 int last, shift_left_factor = 0, _res; 130 char *value; 131 int ret = 0; 132 133 value = match_strdup(s); 134 if (!value) 135 return -ENOMEM; 136 137 last = strlen(value) - 1; 138 if (value[last] == 'K' || value[last] == 'k') { 139 shift_left_factor = 10; 140 value[last] = '\0'; 141 } 142 if (value[last] == 'M' || value[last] == 'm') { 143 shift_left_factor = 20; 144 value[last] = '\0'; 145 } 146 if (value[last] == 'G' || value[last] == 'g') { 147 shift_left_factor = 30; 148 value[last] = '\0'; 149 } 150 151 if (kstrtoint(value, base, &_res)) 152 ret = -EINVAL; 153 kfree(value); 154 *res = _res << shift_left_factor; 155 return ret; 156 } 157 158 /* 159 * This function fills in xfs_mount_t fields based on mount args. 160 * Note: the superblock has _not_ yet been read in. 161 * 162 * Note that this function leaks the various device name allocations on 163 * failure. The caller takes care of them. 164 * 165 * *sb is const because this is also used to test options on the remount 166 * path, and we don't want this to have any side effects at remount time. 167 * Today this function does not change *sb, but just to future-proof... 168 */ 169 STATIC int 170 xfs_parseargs( 171 struct xfs_mount *mp, 172 char *options) 173 { 174 const struct super_block *sb = mp->m_super; 175 char *p; 176 substring_t args[MAX_OPT_ARGS]; 177 int dsunit = 0; 178 int dswidth = 0; 179 int iosize = 0; 180 uint8_t iosizelog = 0; 181 182 /* 183 * set up the mount name first so all the errors will refer to the 184 * correct device. 185 */ 186 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); 187 if (!mp->m_fsname) 188 return -ENOMEM; 189 mp->m_fsname_len = strlen(mp->m_fsname) + 1; 190 191 /* 192 * Copy binary VFS mount flags we are interested in. 193 */ 194 if (sb_rdonly(sb)) 195 mp->m_flags |= XFS_MOUNT_RDONLY; 196 if (sb->s_flags & SB_DIRSYNC) 197 mp->m_flags |= XFS_MOUNT_DIRSYNC; 198 if (sb->s_flags & SB_SYNCHRONOUS) 199 mp->m_flags |= XFS_MOUNT_WSYNC; 200 201 /* 202 * Set some default flags that could be cleared by the mount option 203 * parsing. 204 */ 205 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 206 207 /* 208 * These can be overridden by the mount option parsing. 209 */ 210 mp->m_logbufs = -1; 211 mp->m_logbsize = -1; 212 213 if (!options) 214 goto done; 215 216 while ((p = strsep(&options, ",")) != NULL) { 217 int token; 218 219 if (!*p) 220 continue; 221 222 token = match_token(p, tokens, args); 223 switch (token) { 224 case Opt_logbufs: 225 if (match_int(args, &mp->m_logbufs)) 226 return -EINVAL; 227 break; 228 case Opt_logbsize: 229 if (suffix_kstrtoint(args, 10, &mp->m_logbsize)) 230 return -EINVAL; 231 break; 232 case Opt_logdev: 233 kfree(mp->m_logname); 234 mp->m_logname = match_strdup(args); 235 if (!mp->m_logname) 236 return -ENOMEM; 237 break; 238 case Opt_mtpt: 239 xfs_warn(mp, "%s option not allowed on this system", p); 240 return -EINVAL; 241 case Opt_rtdev: 242 kfree(mp->m_rtname); 243 mp->m_rtname = match_strdup(args); 244 if (!mp->m_rtname) 245 return -ENOMEM; 246 break; 247 case Opt_allocsize: 248 case Opt_biosize: 249 if (suffix_kstrtoint(args, 10, &iosize)) 250 return -EINVAL; 251 iosizelog = ffs(iosize) - 1; 252 break; 253 case Opt_grpid: 254 case Opt_bsdgroups: 255 mp->m_flags |= XFS_MOUNT_GRPID; 256 break; 257 case Opt_nogrpid: 258 case Opt_sysvgroups: 259 mp->m_flags &= ~XFS_MOUNT_GRPID; 260 break; 261 case Opt_wsync: 262 mp->m_flags |= XFS_MOUNT_WSYNC; 263 break; 264 case Opt_norecovery: 265 mp->m_flags |= XFS_MOUNT_NORECOVERY; 266 break; 267 case Opt_noalign: 268 mp->m_flags |= XFS_MOUNT_NOALIGN; 269 break; 270 case Opt_swalloc: 271 mp->m_flags |= XFS_MOUNT_SWALLOC; 272 break; 273 case Opt_sunit: 274 if (match_int(args, &dsunit)) 275 return -EINVAL; 276 break; 277 case Opt_swidth: 278 if (match_int(args, &dswidth)) 279 return -EINVAL; 280 break; 281 case Opt_inode32: 282 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 283 break; 284 case Opt_inode64: 285 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 286 break; 287 case Opt_nouuid: 288 mp->m_flags |= XFS_MOUNT_NOUUID; 289 break; 290 case Opt_ikeep: 291 mp->m_flags |= XFS_MOUNT_IKEEP; 292 break; 293 case Opt_noikeep: 294 mp->m_flags &= ~XFS_MOUNT_IKEEP; 295 break; 296 case Opt_largeio: 297 mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; 298 break; 299 case Opt_nolargeio: 300 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 301 break; 302 case Opt_attr2: 303 mp->m_flags |= XFS_MOUNT_ATTR2; 304 break; 305 case Opt_noattr2: 306 mp->m_flags &= ~XFS_MOUNT_ATTR2; 307 mp->m_flags |= XFS_MOUNT_NOATTR2; 308 break; 309 case Opt_filestreams: 310 mp->m_flags |= XFS_MOUNT_FILESTREAMS; 311 break; 312 case Opt_noquota: 313 mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 314 mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 315 mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; 316 break; 317 case Opt_quota: 318 case Opt_uquota: 319 case Opt_usrquota: 320 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | 321 XFS_UQUOTA_ENFD); 322 break; 323 case Opt_qnoenforce: 324 case Opt_uqnoenforce: 325 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); 326 mp->m_qflags &= ~XFS_UQUOTA_ENFD; 327 break; 328 case Opt_pquota: 329 case Opt_prjquota: 330 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | 331 XFS_PQUOTA_ENFD); 332 break; 333 case Opt_pqnoenforce: 334 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); 335 mp->m_qflags &= ~XFS_PQUOTA_ENFD; 336 break; 337 case Opt_gquota: 338 case Opt_grpquota: 339 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | 340 XFS_GQUOTA_ENFD); 341 break; 342 case Opt_gqnoenforce: 343 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 344 mp->m_qflags &= ~XFS_GQUOTA_ENFD; 345 break; 346 case Opt_discard: 347 mp->m_flags |= XFS_MOUNT_DISCARD; 348 break; 349 case Opt_nodiscard: 350 mp->m_flags &= ~XFS_MOUNT_DISCARD; 351 break; 352 #ifdef CONFIG_FS_DAX 353 case Opt_dax: 354 mp->m_flags |= XFS_MOUNT_DAX; 355 break; 356 #endif 357 default: 358 xfs_warn(mp, "unknown mount option [%s].", p); 359 return -EINVAL; 360 } 361 } 362 363 /* 364 * no recovery flag requires a read-only mount 365 */ 366 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && 367 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 368 xfs_warn(mp, "no-recovery mounts must be read-only."); 369 return -EINVAL; 370 } 371 372 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { 373 xfs_warn(mp, 374 "sunit and swidth options incompatible with the noalign option"); 375 return -EINVAL; 376 } 377 378 #ifndef CONFIG_XFS_QUOTA 379 if (XFS_IS_QUOTA_RUNNING(mp)) { 380 xfs_warn(mp, "quota support not available in this kernel."); 381 return -EINVAL; 382 } 383 #endif 384 385 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 386 xfs_warn(mp, "sunit and swidth must be specified together"); 387 return -EINVAL; 388 } 389 390 if (dsunit && (dswidth % dsunit != 0)) { 391 xfs_warn(mp, 392 "stripe width (%d) must be a multiple of the stripe unit (%d)", 393 dswidth, dsunit); 394 return -EINVAL; 395 } 396 397 done: 398 if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) { 399 /* 400 * At this point the superblock has not been read 401 * in, therefore we do not know the block size. 402 * Before the mount call ends we will convert 403 * these to FSBs. 404 */ 405 mp->m_dalign = dsunit; 406 mp->m_swidth = dswidth; 407 } 408 409 if (mp->m_logbufs != -1 && 410 mp->m_logbufs != 0 && 411 (mp->m_logbufs < XLOG_MIN_ICLOGS || 412 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 413 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 414 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 415 return -EINVAL; 416 } 417 if (mp->m_logbsize != -1 && 418 mp->m_logbsize != 0 && 419 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 420 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 421 !is_power_of_2(mp->m_logbsize))) { 422 xfs_warn(mp, 423 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 424 mp->m_logbsize); 425 return -EINVAL; 426 } 427 428 if (iosizelog) { 429 if (iosizelog > XFS_MAX_IO_LOG || 430 iosizelog < XFS_MIN_IO_LOG) { 431 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 432 iosizelog, XFS_MIN_IO_LOG, 433 XFS_MAX_IO_LOG); 434 return -EINVAL; 435 } 436 437 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; 438 mp->m_readio_log = iosizelog; 439 mp->m_writeio_log = iosizelog; 440 } 441 442 return 0; 443 } 444 445 struct proc_xfs_info { 446 uint64_t flag; 447 char *str; 448 }; 449 450 STATIC int 451 xfs_showargs( 452 struct xfs_mount *mp, 453 struct seq_file *m) 454 { 455 static struct proc_xfs_info xfs_info_set[] = { 456 /* the few simple ones we can get from the mount struct */ 457 { XFS_MOUNT_IKEEP, ",ikeep" }, 458 { XFS_MOUNT_WSYNC, ",wsync" }, 459 { XFS_MOUNT_NOALIGN, ",noalign" }, 460 { XFS_MOUNT_SWALLOC, ",swalloc" }, 461 { XFS_MOUNT_NOUUID, ",nouuid" }, 462 { XFS_MOUNT_NORECOVERY, ",norecovery" }, 463 { XFS_MOUNT_ATTR2, ",attr2" }, 464 { XFS_MOUNT_FILESTREAMS, ",filestreams" }, 465 { XFS_MOUNT_GRPID, ",grpid" }, 466 { XFS_MOUNT_DISCARD, ",discard" }, 467 { XFS_MOUNT_SMALL_INUMS, ",inode32" }, 468 { XFS_MOUNT_DAX, ",dax" }, 469 { 0, NULL } 470 }; 471 static struct proc_xfs_info xfs_info_unset[] = { 472 /* the few simple ones we can get from the mount struct */ 473 { XFS_MOUNT_COMPAT_IOSIZE, ",largeio" }, 474 { XFS_MOUNT_SMALL_INUMS, ",inode64" }, 475 { 0, NULL } 476 }; 477 struct proc_xfs_info *xfs_infop; 478 479 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 480 if (mp->m_flags & xfs_infop->flag) 481 seq_puts(m, xfs_infop->str); 482 } 483 for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { 484 if (!(mp->m_flags & xfs_infop->flag)) 485 seq_puts(m, xfs_infop->str); 486 } 487 488 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) 489 seq_printf(m, ",allocsize=%dk", 490 (int)(1 << mp->m_writeio_log) >> 10); 491 492 if (mp->m_logbufs > 0) 493 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 494 if (mp->m_logbsize > 0) 495 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 496 497 if (mp->m_logname) 498 seq_show_option(m, "logdev", mp->m_logname); 499 if (mp->m_rtname) 500 seq_show_option(m, "rtdev", mp->m_rtname); 501 502 if (mp->m_dalign > 0) 503 seq_printf(m, ",sunit=%d", 504 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 505 if (mp->m_swidth > 0) 506 seq_printf(m, ",swidth=%d", 507 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 508 509 if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) 510 seq_puts(m, ",usrquota"); 511 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 512 seq_puts(m, ",uqnoenforce"); 513 514 if (mp->m_qflags & XFS_PQUOTA_ACCT) { 515 if (mp->m_qflags & XFS_PQUOTA_ENFD) 516 seq_puts(m, ",prjquota"); 517 else 518 seq_puts(m, ",pqnoenforce"); 519 } 520 if (mp->m_qflags & XFS_GQUOTA_ACCT) { 521 if (mp->m_qflags & XFS_GQUOTA_ENFD) 522 seq_puts(m, ",grpquota"); 523 else 524 seq_puts(m, ",gqnoenforce"); 525 } 526 527 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 528 seq_puts(m, ",noquota"); 529 530 return 0; 531 } 532 static uint64_t 533 xfs_max_file_offset( 534 unsigned int blockshift) 535 { 536 unsigned int pagefactor = 1; 537 unsigned int bitshift = BITS_PER_LONG - 1; 538 539 /* Figure out maximum filesize, on Linux this can depend on 540 * the filesystem blocksize (on 32 bit platforms). 541 * __block_write_begin does this in an [unsigned] long... 542 * page->index << (PAGE_SHIFT - bbits) 543 * So, for page sized blocks (4K on 32 bit platforms), 544 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is 545 * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1) 546 * but for smaller blocksizes it is less (bbits = log2 bsize). 547 * Note1: get_block_t takes a long (implicit cast from above) 548 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch 549 * can optionally convert the [unsigned] long from above into 550 * an [unsigned] long long. 551 */ 552 553 #if BITS_PER_LONG == 32 554 # if defined(CONFIG_LBDAF) 555 ASSERT(sizeof(sector_t) == 8); 556 pagefactor = PAGE_SIZE; 557 bitshift = BITS_PER_LONG; 558 # else 559 pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift); 560 # endif 561 #endif 562 563 return (((uint64_t)pagefactor) << bitshift) - 1; 564 } 565 566 /* 567 * Set parameters for inode allocation heuristics, taking into account 568 * filesystem size and inode32/inode64 mount options; i.e. specifically 569 * whether or not XFS_MOUNT_SMALL_INUMS is set. 570 * 571 * Inode allocation patterns are altered only if inode32 is requested 572 * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large. 573 * If altered, XFS_MOUNT_32BITINODES is set as well. 574 * 575 * An agcount independent of that in the mount structure is provided 576 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 577 * to the potentially higher ag count. 578 * 579 * Returns the maximum AG index which may contain inodes. 580 */ 581 xfs_agnumber_t 582 xfs_set_inode_alloc( 583 struct xfs_mount *mp, 584 xfs_agnumber_t agcount) 585 { 586 xfs_agnumber_t index; 587 xfs_agnumber_t maxagi = 0; 588 xfs_sb_t *sbp = &mp->m_sb; 589 xfs_agnumber_t max_metadata; 590 xfs_agino_t agino; 591 xfs_ino_t ino; 592 593 /* 594 * Calculate how much should be reserved for inodes to meet 595 * the max inode percentage. Used only for inode32. 596 */ 597 if (mp->m_maxicount) { 598 uint64_t icount; 599 600 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 601 do_div(icount, 100); 602 icount += sbp->sb_agblocks - 1; 603 do_div(icount, sbp->sb_agblocks); 604 max_metadata = icount; 605 } else { 606 max_metadata = agcount; 607 } 608 609 /* Get the last possible inode in the filesystem */ 610 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 611 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 612 613 /* 614 * If user asked for no more than 32-bit inodes, and the fs is 615 * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter 616 * the allocator to accommodate the request. 617 */ 618 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) 619 mp->m_flags |= XFS_MOUNT_32BITINODES; 620 else 621 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 622 623 for (index = 0; index < agcount; index++) { 624 struct xfs_perag *pag; 625 626 ino = XFS_AGINO_TO_INO(mp, index, agino); 627 628 pag = xfs_perag_get(mp, index); 629 630 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 631 if (ino > XFS_MAXINUMBER_32) { 632 pag->pagi_inodeok = 0; 633 pag->pagf_metadata = 0; 634 } else { 635 pag->pagi_inodeok = 1; 636 maxagi++; 637 if (index < max_metadata) 638 pag->pagf_metadata = 1; 639 else 640 pag->pagf_metadata = 0; 641 } 642 } else { 643 pag->pagi_inodeok = 1; 644 pag->pagf_metadata = 0; 645 } 646 647 xfs_perag_put(pag); 648 } 649 650 return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount; 651 } 652 653 STATIC int 654 xfs_blkdev_get( 655 xfs_mount_t *mp, 656 const char *name, 657 struct block_device **bdevp) 658 { 659 int error = 0; 660 661 *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 662 mp); 663 if (IS_ERR(*bdevp)) { 664 error = PTR_ERR(*bdevp); 665 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 666 } 667 668 return error; 669 } 670 671 STATIC void 672 xfs_blkdev_put( 673 struct block_device *bdev) 674 { 675 if (bdev) 676 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 677 } 678 679 void 680 xfs_blkdev_issue_flush( 681 xfs_buftarg_t *buftarg) 682 { 683 blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL); 684 } 685 686 STATIC void 687 xfs_close_devices( 688 struct xfs_mount *mp) 689 { 690 struct dax_device *dax_ddev = mp->m_ddev_targp->bt_daxdev; 691 692 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 693 struct block_device *logdev = mp->m_logdev_targp->bt_bdev; 694 struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev; 695 696 xfs_free_buftarg(mp->m_logdev_targp); 697 xfs_blkdev_put(logdev); 698 fs_put_dax(dax_logdev); 699 } 700 if (mp->m_rtdev_targp) { 701 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; 702 struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev; 703 704 xfs_free_buftarg(mp->m_rtdev_targp); 705 xfs_blkdev_put(rtdev); 706 fs_put_dax(dax_rtdev); 707 } 708 xfs_free_buftarg(mp->m_ddev_targp); 709 fs_put_dax(dax_ddev); 710 } 711 712 /* 713 * The file system configurations are: 714 * (1) device (partition) with data and internal log 715 * (2) logical volume with data and log subvolumes. 716 * (3) logical volume with data, log, and realtime subvolumes. 717 * 718 * We only have to handle opening the log and realtime volumes here if 719 * they are present. The data subvolume has already been opened by 720 * get_sb_bdev() and is stored in sb->s_bdev. 721 */ 722 STATIC int 723 xfs_open_devices( 724 struct xfs_mount *mp) 725 { 726 struct block_device *ddev = mp->m_super->s_bdev; 727 struct dax_device *dax_ddev = fs_dax_get_by_bdev(ddev); 728 struct dax_device *dax_logdev = NULL, *dax_rtdev = NULL; 729 struct block_device *logdev = NULL, *rtdev = NULL; 730 int error; 731 732 /* 733 * Open real time and log devices - order is important. 734 */ 735 if (mp->m_logname) { 736 error = xfs_blkdev_get(mp, mp->m_logname, &logdev); 737 if (error) 738 goto out; 739 dax_logdev = fs_dax_get_by_bdev(logdev); 740 } 741 742 if (mp->m_rtname) { 743 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); 744 if (error) 745 goto out_close_logdev; 746 747 if (rtdev == ddev || rtdev == logdev) { 748 xfs_warn(mp, 749 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 750 error = -EINVAL; 751 goto out_close_rtdev; 752 } 753 dax_rtdev = fs_dax_get_by_bdev(rtdev); 754 } 755 756 /* 757 * Setup xfs_mount buffer target pointers 758 */ 759 error = -ENOMEM; 760 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, dax_ddev); 761 if (!mp->m_ddev_targp) 762 goto out_close_rtdev; 763 764 if (rtdev) { 765 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, dax_rtdev); 766 if (!mp->m_rtdev_targp) 767 goto out_free_ddev_targ; 768 } 769 770 if (logdev && logdev != ddev) { 771 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, dax_logdev); 772 if (!mp->m_logdev_targp) 773 goto out_free_rtdev_targ; 774 } else { 775 mp->m_logdev_targp = mp->m_ddev_targp; 776 } 777 778 return 0; 779 780 out_free_rtdev_targ: 781 if (mp->m_rtdev_targp) 782 xfs_free_buftarg(mp->m_rtdev_targp); 783 out_free_ddev_targ: 784 xfs_free_buftarg(mp->m_ddev_targp); 785 out_close_rtdev: 786 xfs_blkdev_put(rtdev); 787 fs_put_dax(dax_rtdev); 788 out_close_logdev: 789 if (logdev && logdev != ddev) { 790 xfs_blkdev_put(logdev); 791 fs_put_dax(dax_logdev); 792 } 793 out: 794 fs_put_dax(dax_ddev); 795 return error; 796 } 797 798 /* 799 * Setup xfs_mount buffer target pointers based on superblock 800 */ 801 STATIC int 802 xfs_setup_devices( 803 struct xfs_mount *mp) 804 { 805 int error; 806 807 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); 808 if (error) 809 return error; 810 811 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 812 unsigned int log_sector_size = BBSIZE; 813 814 if (xfs_sb_version_hassector(&mp->m_sb)) 815 log_sector_size = mp->m_sb.sb_logsectsize; 816 error = xfs_setsize_buftarg(mp->m_logdev_targp, 817 log_sector_size); 818 if (error) 819 return error; 820 } 821 if (mp->m_rtdev_targp) { 822 error = xfs_setsize_buftarg(mp->m_rtdev_targp, 823 mp->m_sb.sb_sectsize); 824 if (error) 825 return error; 826 } 827 828 return 0; 829 } 830 831 STATIC int 832 xfs_init_mount_workqueues( 833 struct xfs_mount *mp) 834 { 835 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 836 WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname); 837 if (!mp->m_buf_workqueue) 838 goto out; 839 840 mp->m_data_workqueue = alloc_workqueue("xfs-data/%s", 841 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 842 if (!mp->m_data_workqueue) 843 goto out_destroy_buf; 844 845 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 846 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 847 if (!mp->m_unwritten_workqueue) 848 goto out_destroy_data_iodone_queue; 849 850 mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", 851 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 852 if (!mp->m_cil_workqueue) 853 goto out_destroy_unwritten; 854 855 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 856 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 857 if (!mp->m_reclaim_workqueue) 858 goto out_destroy_cil; 859 860 mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", 861 WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, 862 mp->m_fsname); 863 if (!mp->m_log_workqueue) 864 goto out_destroy_reclaim; 865 866 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", 867 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 868 if (!mp->m_eofblocks_workqueue) 869 goto out_destroy_log; 870 871 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, 872 mp->m_fsname); 873 if (!mp->m_sync_workqueue) 874 goto out_destroy_eofb; 875 876 return 0; 877 878 out_destroy_eofb: 879 destroy_workqueue(mp->m_eofblocks_workqueue); 880 out_destroy_log: 881 destroy_workqueue(mp->m_log_workqueue); 882 out_destroy_reclaim: 883 destroy_workqueue(mp->m_reclaim_workqueue); 884 out_destroy_cil: 885 destroy_workqueue(mp->m_cil_workqueue); 886 out_destroy_unwritten: 887 destroy_workqueue(mp->m_unwritten_workqueue); 888 out_destroy_data_iodone_queue: 889 destroy_workqueue(mp->m_data_workqueue); 890 out_destroy_buf: 891 destroy_workqueue(mp->m_buf_workqueue); 892 out: 893 return -ENOMEM; 894 } 895 896 STATIC void 897 xfs_destroy_mount_workqueues( 898 struct xfs_mount *mp) 899 { 900 destroy_workqueue(mp->m_sync_workqueue); 901 destroy_workqueue(mp->m_eofblocks_workqueue); 902 destroy_workqueue(mp->m_log_workqueue); 903 destroy_workqueue(mp->m_reclaim_workqueue); 904 destroy_workqueue(mp->m_cil_workqueue); 905 destroy_workqueue(mp->m_data_workqueue); 906 destroy_workqueue(mp->m_unwritten_workqueue); 907 destroy_workqueue(mp->m_buf_workqueue); 908 } 909 910 /* 911 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 912 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 913 * for IO to complete so that we effectively throttle multiple callers to the 914 * rate at which IO is completing. 915 */ 916 void 917 xfs_flush_inodes( 918 struct xfs_mount *mp) 919 { 920 struct super_block *sb = mp->m_super; 921 922 if (down_read_trylock(&sb->s_umount)) { 923 sync_inodes_sb(sb); 924 up_read(&sb->s_umount); 925 } 926 } 927 928 /* Catch misguided souls that try to use this interface on XFS */ 929 STATIC struct inode * 930 xfs_fs_alloc_inode( 931 struct super_block *sb) 932 { 933 BUG(); 934 return NULL; 935 } 936 937 #ifdef DEBUG 938 static void 939 xfs_check_delalloc( 940 struct xfs_inode *ip, 941 int whichfork) 942 { 943 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 944 struct xfs_bmbt_irec got; 945 struct xfs_iext_cursor icur; 946 947 if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) 948 return; 949 do { 950 if (isnullstartblock(got.br_startblock)) { 951 xfs_warn(ip->i_mount, 952 "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", 953 ip->i_ino, 954 whichfork == XFS_DATA_FORK ? "data" : "cow", 955 got.br_startoff, got.br_blockcount); 956 } 957 } while (xfs_iext_next_extent(ifp, &icur, &got)); 958 } 959 #else 960 #define xfs_check_delalloc(ip, whichfork) do { } while (0) 961 #endif 962 963 /* 964 * Now that the generic code is guaranteed not to be accessing 965 * the linux inode, we can inactivate and reclaim the inode. 966 */ 967 STATIC void 968 xfs_fs_destroy_inode( 969 struct inode *inode) 970 { 971 struct xfs_inode *ip = XFS_I(inode); 972 973 trace_xfs_destroy_inode(ip); 974 975 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 976 XFS_STATS_INC(ip->i_mount, vn_rele); 977 XFS_STATS_INC(ip->i_mount, vn_remove); 978 979 xfs_inactive(ip); 980 981 if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) { 982 xfs_check_delalloc(ip, XFS_DATA_FORK); 983 xfs_check_delalloc(ip, XFS_COW_FORK); 984 ASSERT(0); 985 } 986 987 XFS_STATS_INC(ip->i_mount, vn_reclaim); 988 989 /* 990 * We should never get here with one of the reclaim flags already set. 991 */ 992 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); 993 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 994 995 /* 996 * We always use background reclaim here because even if the 997 * inode is clean, it still may be under IO and hence we have 998 * to take the flush lock. The background reclaim path handles 999 * this more efficiently than we can here, so simply let background 1000 * reclaim tear down all inodes. 1001 */ 1002 xfs_inode_set_reclaim_tag(ip); 1003 } 1004 1005 static void 1006 xfs_fs_dirty_inode( 1007 struct inode *inode, 1008 int flag) 1009 { 1010 struct xfs_inode *ip = XFS_I(inode); 1011 struct xfs_mount *mp = ip->i_mount; 1012 struct xfs_trans *tp; 1013 1014 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 1015 return; 1016 if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) 1017 return; 1018 1019 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) 1020 return; 1021 xfs_ilock(ip, XFS_ILOCK_EXCL); 1022 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1023 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 1024 xfs_trans_commit(tp); 1025 } 1026 1027 /* 1028 * Slab object creation initialisation for the XFS inode. 1029 * This covers only the idempotent fields in the XFS inode; 1030 * all other fields need to be initialised on allocation 1031 * from the slab. This avoids the need to repeatedly initialise 1032 * fields in the xfs inode that left in the initialise state 1033 * when freeing the inode. 1034 */ 1035 STATIC void 1036 xfs_fs_inode_init_once( 1037 void *inode) 1038 { 1039 struct xfs_inode *ip = inode; 1040 1041 memset(ip, 0, sizeof(struct xfs_inode)); 1042 1043 /* vfs inode */ 1044 inode_init_once(VFS_I(ip)); 1045 1046 /* xfs inode */ 1047 atomic_set(&ip->i_pincount, 0); 1048 spin_lock_init(&ip->i_flags_lock); 1049 1050 mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 1051 "xfsino", ip->i_ino); 1052 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 1053 "xfsino", ip->i_ino); 1054 } 1055 1056 /* 1057 * We do an unlocked check for XFS_IDONTCACHE here because we are already 1058 * serialised against cache hits here via the inode->i_lock and igrab() in 1059 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 1060 * racing with us, and it avoids needing to grab a spinlock here for every inode 1061 * we drop the final reference on. 1062 */ 1063 STATIC int 1064 xfs_fs_drop_inode( 1065 struct inode *inode) 1066 { 1067 struct xfs_inode *ip = XFS_I(inode); 1068 1069 /* 1070 * If this unlinked inode is in the middle of recovery, don't 1071 * drop the inode just yet; log recovery will take care of 1072 * that. See the comment for this inode flag. 1073 */ 1074 if (ip->i_flags & XFS_IRECOVERY) { 1075 ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED); 1076 return 0; 1077 } 1078 1079 return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE); 1080 } 1081 1082 STATIC void 1083 xfs_free_fsname( 1084 struct xfs_mount *mp) 1085 { 1086 kfree(mp->m_fsname); 1087 kfree(mp->m_rtname); 1088 kfree(mp->m_logname); 1089 } 1090 1091 STATIC int 1092 xfs_fs_sync_fs( 1093 struct super_block *sb, 1094 int wait) 1095 { 1096 struct xfs_mount *mp = XFS_M(sb); 1097 1098 /* 1099 * Doing anything during the async pass would be counterproductive. 1100 */ 1101 if (!wait) 1102 return 0; 1103 1104 xfs_log_force(mp, XFS_LOG_SYNC); 1105 if (laptop_mode) { 1106 /* 1107 * The disk must be active because we're syncing. 1108 * We schedule log work now (now that the disk is 1109 * active) instead of later (when it might not be). 1110 */ 1111 flush_delayed_work(&mp->m_log->l_work); 1112 } 1113 1114 return 0; 1115 } 1116 1117 STATIC int 1118 xfs_fs_statfs( 1119 struct dentry *dentry, 1120 struct kstatfs *statp) 1121 { 1122 struct xfs_mount *mp = XFS_M(dentry->d_sb); 1123 xfs_sb_t *sbp = &mp->m_sb; 1124 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 1125 uint64_t fakeinos, id; 1126 uint64_t icount; 1127 uint64_t ifree; 1128 uint64_t fdblocks; 1129 xfs_extlen_t lsize; 1130 int64_t ffree; 1131 1132 statp->f_type = XFS_SUPER_MAGIC; 1133 statp->f_namelen = MAXNAMELEN - 1; 1134 1135 id = huge_encode_dev(mp->m_ddev_targp->bt_dev); 1136 statp->f_fsid.val[0] = (u32)id; 1137 statp->f_fsid.val[1] = (u32)(id >> 32); 1138 1139 icount = percpu_counter_sum(&mp->m_icount); 1140 ifree = percpu_counter_sum(&mp->m_ifree); 1141 fdblocks = percpu_counter_sum(&mp->m_fdblocks); 1142 1143 spin_lock(&mp->m_sb_lock); 1144 statp->f_bsize = sbp->sb_blocksize; 1145 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 1146 statp->f_blocks = sbp->sb_dblocks - lsize; 1147 spin_unlock(&mp->m_sb_lock); 1148 1149 statp->f_bfree = fdblocks - mp->m_alloc_set_aside; 1150 statp->f_bavail = statp->f_bfree; 1151 1152 fakeinos = statp->f_bfree << sbp->sb_inopblog; 1153 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 1154 if (mp->m_maxicount) 1155 statp->f_files = min_t(typeof(statp->f_files), 1156 statp->f_files, 1157 mp->m_maxicount); 1158 1159 /* If sb_icount overshot maxicount, report actual allocation */ 1160 statp->f_files = max_t(typeof(statp->f_files), 1161 statp->f_files, 1162 sbp->sb_icount); 1163 1164 /* make sure statp->f_ffree does not underflow */ 1165 ffree = statp->f_files - (icount - ifree); 1166 statp->f_ffree = max_t(int64_t, ffree, 0); 1167 1168 1169 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1170 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 1171 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 1172 xfs_qm_statvfs(ip, statp); 1173 1174 if (XFS_IS_REALTIME_MOUNT(mp) && 1175 (ip->i_d.di_flags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 1176 statp->f_blocks = sbp->sb_rblocks; 1177 statp->f_bavail = statp->f_bfree = 1178 sbp->sb_frextents * sbp->sb_rextsize; 1179 } 1180 1181 return 0; 1182 } 1183 1184 STATIC void 1185 xfs_save_resvblks(struct xfs_mount *mp) 1186 { 1187 uint64_t resblks = 0; 1188 1189 mp->m_resblks_save = mp->m_resblks; 1190 xfs_reserve_blocks(mp, &resblks, NULL); 1191 } 1192 1193 STATIC void 1194 xfs_restore_resvblks(struct xfs_mount *mp) 1195 { 1196 uint64_t resblks; 1197 1198 if (mp->m_resblks_save) { 1199 resblks = mp->m_resblks_save; 1200 mp->m_resblks_save = 0; 1201 } else 1202 resblks = xfs_default_resblks(mp); 1203 1204 xfs_reserve_blocks(mp, &resblks, NULL); 1205 } 1206 1207 /* 1208 * Trigger writeback of all the dirty metadata in the file system. 1209 * 1210 * This ensures that the metadata is written to their location on disk rather 1211 * than just existing in transactions in the log. This means after a quiesce 1212 * there is no log replay required to write the inodes to disk - this is the 1213 * primary difference between a sync and a quiesce. 1214 * 1215 * Note: xfs_log_quiesce() stops background log work - the callers must ensure 1216 * it is started again when appropriate. 1217 */ 1218 void 1219 xfs_quiesce_attr( 1220 struct xfs_mount *mp) 1221 { 1222 int error = 0; 1223 1224 /* wait for all modifications to complete */ 1225 while (atomic_read(&mp->m_active_trans) > 0) 1226 delay(100); 1227 1228 /* force the log to unpin objects from the now complete transactions */ 1229 xfs_log_force(mp, XFS_LOG_SYNC); 1230 1231 /* reclaim inodes to do any IO before the freeze completes */ 1232 xfs_reclaim_inodes(mp, 0); 1233 xfs_reclaim_inodes(mp, SYNC_WAIT); 1234 1235 /* Push the superblock and write an unmount record */ 1236 error = xfs_log_sbcount(mp); 1237 if (error) 1238 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " 1239 "Frozen image may not be consistent."); 1240 /* 1241 * Just warn here till VFS can correctly support 1242 * read-only remount without racing. 1243 */ 1244 WARN_ON(atomic_read(&mp->m_active_trans) != 0); 1245 1246 xfs_log_quiesce(mp); 1247 } 1248 1249 STATIC int 1250 xfs_test_remount_options( 1251 struct super_block *sb, 1252 char *options) 1253 { 1254 int error = 0; 1255 struct xfs_mount *tmp_mp; 1256 1257 tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL); 1258 if (!tmp_mp) 1259 return -ENOMEM; 1260 1261 tmp_mp->m_super = sb; 1262 error = xfs_parseargs(tmp_mp, options); 1263 xfs_free_fsname(tmp_mp); 1264 kmem_free(tmp_mp); 1265 1266 return error; 1267 } 1268 1269 STATIC int 1270 xfs_fs_remount( 1271 struct super_block *sb, 1272 int *flags, 1273 char *options) 1274 { 1275 struct xfs_mount *mp = XFS_M(sb); 1276 xfs_sb_t *sbp = &mp->m_sb; 1277 substring_t args[MAX_OPT_ARGS]; 1278 char *p; 1279 int error; 1280 1281 /* First, check for complete junk; i.e. invalid options */ 1282 error = xfs_test_remount_options(sb, options); 1283 if (error) 1284 return error; 1285 1286 sync_filesystem(sb); 1287 while ((p = strsep(&options, ",")) != NULL) { 1288 int token; 1289 1290 if (!*p) 1291 continue; 1292 1293 token = match_token(p, tokens, args); 1294 switch (token) { 1295 case Opt_inode64: 1296 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 1297 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); 1298 break; 1299 case Opt_inode32: 1300 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 1301 mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); 1302 break; 1303 default: 1304 /* 1305 * Logically we would return an error here to prevent 1306 * users from believing they might have changed 1307 * mount options using remount which can't be changed. 1308 * 1309 * But unfortunately mount(8) adds all options from 1310 * mtab and fstab to the mount arguments in some cases 1311 * so we can't blindly reject options, but have to 1312 * check for each specified option if it actually 1313 * differs from the currently set option and only 1314 * reject it if that's the case. 1315 * 1316 * Until that is implemented we return success for 1317 * every remount request, and silently ignore all 1318 * options that we can't actually change. 1319 */ 1320 #if 0 1321 xfs_info(mp, 1322 "mount option \"%s\" not supported for remount", p); 1323 return -EINVAL; 1324 #else 1325 break; 1326 #endif 1327 } 1328 } 1329 1330 /* ro -> rw */ 1331 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { 1332 if (mp->m_flags & XFS_MOUNT_NORECOVERY) { 1333 xfs_warn(mp, 1334 "ro->rw transition prohibited on norecovery mount"); 1335 return -EINVAL; 1336 } 1337 1338 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && 1339 xfs_sb_has_ro_compat_feature(sbp, 1340 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 1341 xfs_warn(mp, 1342 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 1343 (sbp->sb_features_ro_compat & 1344 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 1345 return -EINVAL; 1346 } 1347 1348 mp->m_flags &= ~XFS_MOUNT_RDONLY; 1349 1350 /* 1351 * If this is the first remount to writeable state we 1352 * might have some superblock changes to update. 1353 */ 1354 if (mp->m_update_sb) { 1355 error = xfs_sync_sb(mp, false); 1356 if (error) { 1357 xfs_warn(mp, "failed to write sb changes"); 1358 return error; 1359 } 1360 mp->m_update_sb = false; 1361 } 1362 1363 /* 1364 * Fill out the reserve pool if it is empty. Use the stashed 1365 * value if it is non-zero, otherwise go with the default. 1366 */ 1367 xfs_restore_resvblks(mp); 1368 xfs_log_work_queue(mp); 1369 1370 /* Recover any CoW blocks that never got remapped. */ 1371 error = xfs_reflink_recover_cow(mp); 1372 if (error) { 1373 xfs_err(mp, 1374 "Error %d recovering leftover CoW allocations.", error); 1375 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1376 return error; 1377 } 1378 xfs_icache_enable_reclaim(mp); 1379 1380 /* Create the per-AG metadata reservation pool .*/ 1381 error = xfs_fs_reserve_ag_blocks(mp); 1382 if (error && error != -ENOSPC) 1383 return error; 1384 } 1385 1386 /* rw -> ro */ 1387 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1388 /* 1389 * Cancel background eofb scanning so it cannot race with the 1390 * final log force+buftarg wait and deadlock the remount. 1391 */ 1392 xfs_icache_disable_reclaim(mp); 1393 1394 /* Get rid of any leftover CoW reservations... */ 1395 error = xfs_icache_free_cowblocks(mp, NULL); 1396 if (error) { 1397 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1398 return error; 1399 } 1400 1401 /* Free the per-AG metadata reservation pool. */ 1402 error = xfs_fs_unreserve_ag_blocks(mp); 1403 if (error) { 1404 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1405 return error; 1406 } 1407 1408 /* 1409 * Before we sync the metadata, we need to free up the reserve 1410 * block pool so that the used block count in the superblock on 1411 * disk is correct at the end of the remount. Stash the current 1412 * reserve pool size so that if we get remounted rw, we can 1413 * return it to the same size. 1414 */ 1415 xfs_save_resvblks(mp); 1416 1417 xfs_quiesce_attr(mp); 1418 mp->m_flags |= XFS_MOUNT_RDONLY; 1419 } 1420 1421 return 0; 1422 } 1423 1424 /* 1425 * Second stage of a freeze. The data is already frozen so we only 1426 * need to take care of the metadata. Once that's done sync the superblock 1427 * to the log to dirty it in case of a crash while frozen. This ensures that we 1428 * will recover the unlinked inode lists on the next mount. 1429 */ 1430 STATIC int 1431 xfs_fs_freeze( 1432 struct super_block *sb) 1433 { 1434 struct xfs_mount *mp = XFS_M(sb); 1435 1436 xfs_icache_disable_reclaim(mp); 1437 xfs_save_resvblks(mp); 1438 xfs_quiesce_attr(mp); 1439 return xfs_sync_sb(mp, true); 1440 } 1441 1442 STATIC int 1443 xfs_fs_unfreeze( 1444 struct super_block *sb) 1445 { 1446 struct xfs_mount *mp = XFS_M(sb); 1447 1448 xfs_restore_resvblks(mp); 1449 xfs_log_work_queue(mp); 1450 xfs_icache_enable_reclaim(mp); 1451 return 0; 1452 } 1453 1454 STATIC int 1455 xfs_fs_show_options( 1456 struct seq_file *m, 1457 struct dentry *root) 1458 { 1459 return xfs_showargs(XFS_M(root->d_sb), m); 1460 } 1461 1462 /* 1463 * This function fills in xfs_mount_t fields based on mount args. 1464 * Note: the superblock _has_ now been read in. 1465 */ 1466 STATIC int 1467 xfs_finish_flags( 1468 struct xfs_mount *mp) 1469 { 1470 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); 1471 1472 /* Fail a mount where the logbuf is smaller than the log stripe */ 1473 if (xfs_sb_version_haslogv2(&mp->m_sb)) { 1474 if (mp->m_logbsize <= 0 && 1475 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 1476 mp->m_logbsize = mp->m_sb.sb_logsunit; 1477 } else if (mp->m_logbsize > 0 && 1478 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1479 xfs_warn(mp, 1480 "logbuf size must be greater than or equal to log stripe size"); 1481 return -EINVAL; 1482 } 1483 } else { 1484 /* Fail a mount if the logbuf is larger than 32K */ 1485 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1486 xfs_warn(mp, 1487 "logbuf size for version 1 logs must be 16K or 32K"); 1488 return -EINVAL; 1489 } 1490 } 1491 1492 /* 1493 * V5 filesystems always use attr2 format for attributes. 1494 */ 1495 if (xfs_sb_version_hascrc(&mp->m_sb) && 1496 (mp->m_flags & XFS_MOUNT_NOATTR2)) { 1497 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " 1498 "attr2 is always enabled for V5 filesystems."); 1499 return -EINVAL; 1500 } 1501 1502 /* 1503 * mkfs'ed attr2 will turn on attr2 mount unless explicitly 1504 * told by noattr2 to turn it off 1505 */ 1506 if (xfs_sb_version_hasattr2(&mp->m_sb) && 1507 !(mp->m_flags & XFS_MOUNT_NOATTR2)) 1508 mp->m_flags |= XFS_MOUNT_ATTR2; 1509 1510 /* 1511 * prohibit r/w mounts of read-only filesystems 1512 */ 1513 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { 1514 xfs_warn(mp, 1515 "cannot mount a read-only filesystem as read-write"); 1516 return -EROFS; 1517 } 1518 1519 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && 1520 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) && 1521 !xfs_sb_version_has_pquotino(&mp->m_sb)) { 1522 xfs_warn(mp, 1523 "Super block does not support project and group quota together"); 1524 return -EINVAL; 1525 } 1526 1527 return 0; 1528 } 1529 1530 static int 1531 xfs_init_percpu_counters( 1532 struct xfs_mount *mp) 1533 { 1534 int error; 1535 1536 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1537 if (error) 1538 return -ENOMEM; 1539 1540 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1541 if (error) 1542 goto free_icount; 1543 1544 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); 1545 if (error) 1546 goto free_ifree; 1547 1548 return 0; 1549 1550 free_ifree: 1551 percpu_counter_destroy(&mp->m_ifree); 1552 free_icount: 1553 percpu_counter_destroy(&mp->m_icount); 1554 return -ENOMEM; 1555 } 1556 1557 void 1558 xfs_reinit_percpu_counters( 1559 struct xfs_mount *mp) 1560 { 1561 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1562 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1563 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); 1564 } 1565 1566 static void 1567 xfs_destroy_percpu_counters( 1568 struct xfs_mount *mp) 1569 { 1570 percpu_counter_destroy(&mp->m_icount); 1571 percpu_counter_destroy(&mp->m_ifree); 1572 percpu_counter_destroy(&mp->m_fdblocks); 1573 } 1574 1575 static struct xfs_mount * 1576 xfs_mount_alloc( 1577 struct super_block *sb) 1578 { 1579 struct xfs_mount *mp; 1580 1581 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); 1582 if (!mp) 1583 return NULL; 1584 1585 mp->m_super = sb; 1586 spin_lock_init(&mp->m_sb_lock); 1587 spin_lock_init(&mp->m_agirotor_lock); 1588 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1589 spin_lock_init(&mp->m_perag_lock); 1590 mutex_init(&mp->m_growlock); 1591 atomic_set(&mp->m_active_trans, 0); 1592 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1593 INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); 1594 INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); 1595 mp->m_kobj.kobject.kset = xfs_kset; 1596 return mp; 1597 } 1598 1599 1600 STATIC int 1601 xfs_fs_fill_super( 1602 struct super_block *sb, 1603 void *data, 1604 int silent) 1605 { 1606 struct inode *root; 1607 struct xfs_mount *mp = NULL; 1608 int flags = 0, error = -ENOMEM; 1609 1610 /* 1611 * allocate mp and do all low-level struct initializations before we 1612 * attach it to the super 1613 */ 1614 mp = xfs_mount_alloc(sb); 1615 if (!mp) 1616 goto out; 1617 sb->s_fs_info = mp; 1618 1619 error = xfs_parseargs(mp, (char *)data); 1620 if (error) 1621 goto out_free_fsname; 1622 1623 sb_min_blocksize(sb, BBSIZE); 1624 sb->s_xattr = xfs_xattr_handlers; 1625 sb->s_export_op = &xfs_export_operations; 1626 #ifdef CONFIG_XFS_QUOTA 1627 sb->s_qcop = &xfs_quotactl_operations; 1628 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1629 #endif 1630 sb->s_op = &xfs_super_operations; 1631 1632 /* 1633 * Delay mount work if the debug hook is set. This is debug 1634 * instrumention to coordinate simulation of xfs mount failures with 1635 * VFS superblock operations 1636 */ 1637 if (xfs_globals.mount_delay) { 1638 xfs_notice(mp, "Delaying mount for %d seconds.", 1639 xfs_globals.mount_delay); 1640 msleep(xfs_globals.mount_delay * 1000); 1641 } 1642 1643 if (silent) 1644 flags |= XFS_MFSI_QUIET; 1645 1646 error = xfs_open_devices(mp); 1647 if (error) 1648 goto out_free_fsname; 1649 1650 error = xfs_init_mount_workqueues(mp); 1651 if (error) 1652 goto out_close_devices; 1653 1654 error = xfs_init_percpu_counters(mp); 1655 if (error) 1656 goto out_destroy_workqueues; 1657 1658 /* Allocate stats memory before we do operations that might use it */ 1659 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1660 if (!mp->m_stats.xs_stats) { 1661 error = -ENOMEM; 1662 goto out_destroy_counters; 1663 } 1664 1665 error = xfs_readsb(mp, flags); 1666 if (error) 1667 goto out_free_stats; 1668 1669 error = xfs_finish_flags(mp); 1670 if (error) 1671 goto out_free_sb; 1672 1673 error = xfs_setup_devices(mp); 1674 if (error) 1675 goto out_free_sb; 1676 1677 error = xfs_filestream_mount(mp); 1678 if (error) 1679 goto out_free_sb; 1680 1681 /* 1682 * we must configure the block size in the superblock before we run the 1683 * full mount process as the mount process can lookup and cache inodes. 1684 */ 1685 sb->s_magic = XFS_SUPER_MAGIC; 1686 sb->s_blocksize = mp->m_sb.sb_blocksize; 1687 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1688 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); 1689 sb->s_max_links = XFS_MAXLINK; 1690 sb->s_time_gran = 1; 1691 set_posix_acl_flag(sb); 1692 1693 /* version 5 superblocks support inode version counters. */ 1694 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) 1695 sb->s_flags |= SB_I_VERSION; 1696 1697 if (mp->m_flags & XFS_MOUNT_DAX) { 1698 bool rtdev_is_dax = false, datadev_is_dax; 1699 1700 xfs_warn(mp, 1701 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 1702 1703 datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev, 1704 sb->s_blocksize); 1705 if (mp->m_rtdev_targp) 1706 rtdev_is_dax = bdev_dax_supported( 1707 mp->m_rtdev_targp->bt_bdev, sb->s_blocksize); 1708 if (!rtdev_is_dax && !datadev_is_dax) { 1709 xfs_alert(mp, 1710 "DAX unsupported by block device. Turning off DAX."); 1711 mp->m_flags &= ~XFS_MOUNT_DAX; 1712 } 1713 if (xfs_sb_version_hasreflink(&mp->m_sb)) { 1714 xfs_alert(mp, 1715 "DAX and reflink cannot be used together!"); 1716 error = -EINVAL; 1717 goto out_filestream_unmount; 1718 } 1719 } 1720 1721 if (mp->m_flags & XFS_MOUNT_DISCARD) { 1722 struct request_queue *q = bdev_get_queue(sb->s_bdev); 1723 1724 if (!blk_queue_discard(q)) { 1725 xfs_warn(mp, "mounting with \"discard\" option, but " 1726 "the device does not support discard"); 1727 mp->m_flags &= ~XFS_MOUNT_DISCARD; 1728 } 1729 } 1730 1731 if (xfs_sb_version_hasreflink(&mp->m_sb) && mp->m_sb.sb_rblocks) { 1732 xfs_alert(mp, 1733 "reflink not compatible with realtime device!"); 1734 error = -EINVAL; 1735 goto out_filestream_unmount; 1736 } 1737 1738 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) { 1739 xfs_alert(mp, 1740 "reverse mapping btree not compatible with realtime device!"); 1741 error = -EINVAL; 1742 goto out_filestream_unmount; 1743 } 1744 1745 error = xfs_mountfs(mp); 1746 if (error) 1747 goto out_filestream_unmount; 1748 1749 root = igrab(VFS_I(mp->m_rootip)); 1750 if (!root) { 1751 error = -ENOENT; 1752 goto out_unmount; 1753 } 1754 sb->s_root = d_make_root(root); 1755 if (!sb->s_root) { 1756 error = -ENOMEM; 1757 goto out_unmount; 1758 } 1759 1760 return 0; 1761 1762 out_filestream_unmount: 1763 xfs_filestream_unmount(mp); 1764 out_free_sb: 1765 xfs_freesb(mp); 1766 out_free_stats: 1767 free_percpu(mp->m_stats.xs_stats); 1768 out_destroy_counters: 1769 xfs_destroy_percpu_counters(mp); 1770 out_destroy_workqueues: 1771 xfs_destroy_mount_workqueues(mp); 1772 out_close_devices: 1773 xfs_close_devices(mp); 1774 out_free_fsname: 1775 sb->s_fs_info = NULL; 1776 xfs_free_fsname(mp); 1777 kfree(mp); 1778 out: 1779 return error; 1780 1781 out_unmount: 1782 xfs_filestream_unmount(mp); 1783 xfs_unmountfs(mp); 1784 goto out_free_sb; 1785 } 1786 1787 STATIC void 1788 xfs_fs_put_super( 1789 struct super_block *sb) 1790 { 1791 struct xfs_mount *mp = XFS_M(sb); 1792 1793 /* if ->fill_super failed, we have no mount to tear down */ 1794 if (!sb->s_fs_info) 1795 return; 1796 1797 xfs_notice(mp, "Unmounting Filesystem"); 1798 xfs_filestream_unmount(mp); 1799 xfs_unmountfs(mp); 1800 1801 xfs_freesb(mp); 1802 free_percpu(mp->m_stats.xs_stats); 1803 xfs_destroy_percpu_counters(mp); 1804 xfs_destroy_mount_workqueues(mp); 1805 xfs_close_devices(mp); 1806 1807 sb->s_fs_info = NULL; 1808 xfs_free_fsname(mp); 1809 kfree(mp); 1810 } 1811 1812 STATIC struct dentry * 1813 xfs_fs_mount( 1814 struct file_system_type *fs_type, 1815 int flags, 1816 const char *dev_name, 1817 void *data) 1818 { 1819 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); 1820 } 1821 1822 static long 1823 xfs_fs_nr_cached_objects( 1824 struct super_block *sb, 1825 struct shrink_control *sc) 1826 { 1827 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1828 if (WARN_ON_ONCE(!sb->s_fs_info)) 1829 return 0; 1830 return xfs_reclaim_inodes_count(XFS_M(sb)); 1831 } 1832 1833 static long 1834 xfs_fs_free_cached_objects( 1835 struct super_block *sb, 1836 struct shrink_control *sc) 1837 { 1838 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1839 } 1840 1841 static const struct super_operations xfs_super_operations = { 1842 .alloc_inode = xfs_fs_alloc_inode, 1843 .destroy_inode = xfs_fs_destroy_inode, 1844 .dirty_inode = xfs_fs_dirty_inode, 1845 .drop_inode = xfs_fs_drop_inode, 1846 .put_super = xfs_fs_put_super, 1847 .sync_fs = xfs_fs_sync_fs, 1848 .freeze_fs = xfs_fs_freeze, 1849 .unfreeze_fs = xfs_fs_unfreeze, 1850 .statfs = xfs_fs_statfs, 1851 .remount_fs = xfs_fs_remount, 1852 .show_options = xfs_fs_show_options, 1853 .nr_cached_objects = xfs_fs_nr_cached_objects, 1854 .free_cached_objects = xfs_fs_free_cached_objects, 1855 }; 1856 1857 static struct file_system_type xfs_fs_type = { 1858 .owner = THIS_MODULE, 1859 .name = "xfs", 1860 .mount = xfs_fs_mount, 1861 .kill_sb = kill_block_super, 1862 .fs_flags = FS_REQUIRES_DEV, 1863 }; 1864 MODULE_ALIAS_FS("xfs"); 1865 1866 STATIC int __init 1867 xfs_init_zones(void) 1868 { 1869 if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE), 1870 offsetof(struct xfs_ioend, io_inline_bio), 1871 BIOSET_NEED_BVECS)) 1872 goto out; 1873 1874 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), 1875 "xfs_log_ticket"); 1876 if (!xfs_log_ticket_zone) 1877 goto out_free_ioend_bioset; 1878 1879 xfs_bmap_free_item_zone = kmem_zone_init( 1880 sizeof(struct xfs_extent_free_item), 1881 "xfs_bmap_free_item"); 1882 if (!xfs_bmap_free_item_zone) 1883 goto out_destroy_log_ticket_zone; 1884 1885 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), 1886 "xfs_btree_cur"); 1887 if (!xfs_btree_cur_zone) 1888 goto out_destroy_bmap_free_item_zone; 1889 1890 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), 1891 "xfs_da_state"); 1892 if (!xfs_da_state_zone) 1893 goto out_destroy_btree_cur_zone; 1894 1895 xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork"); 1896 if (!xfs_ifork_zone) 1897 goto out_destroy_da_state_zone; 1898 1899 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); 1900 if (!xfs_trans_zone) 1901 goto out_destroy_ifork_zone; 1902 1903 1904 /* 1905 * The size of the zone allocated buf log item is the maximum 1906 * size possible under XFS. This wastes a little bit of memory, 1907 * but it is much faster. 1908 */ 1909 xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item), 1910 "xfs_buf_item"); 1911 if (!xfs_buf_item_zone) 1912 goto out_destroy_trans_zone; 1913 1914 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + 1915 ((XFS_EFD_MAX_FAST_EXTENTS - 1) * 1916 sizeof(xfs_extent_t))), "xfs_efd_item"); 1917 if (!xfs_efd_zone) 1918 goto out_destroy_buf_item_zone; 1919 1920 xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + 1921 ((XFS_EFI_MAX_FAST_EXTENTS - 1) * 1922 sizeof(xfs_extent_t))), "xfs_efi_item"); 1923 if (!xfs_efi_zone) 1924 goto out_destroy_efd_zone; 1925 1926 xfs_inode_zone = 1927 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", 1928 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD | 1929 KM_ZONE_ACCOUNT, xfs_fs_inode_init_once); 1930 if (!xfs_inode_zone) 1931 goto out_destroy_efi_zone; 1932 1933 xfs_ili_zone = 1934 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", 1935 KM_ZONE_SPREAD, NULL); 1936 if (!xfs_ili_zone) 1937 goto out_destroy_inode_zone; 1938 xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item), 1939 "xfs_icr"); 1940 if (!xfs_icreate_zone) 1941 goto out_destroy_ili_zone; 1942 1943 xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item), 1944 "xfs_rud_item"); 1945 if (!xfs_rud_zone) 1946 goto out_destroy_icreate_zone; 1947 1948 xfs_rui_zone = kmem_zone_init( 1949 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 1950 "xfs_rui_item"); 1951 if (!xfs_rui_zone) 1952 goto out_destroy_rud_zone; 1953 1954 xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item), 1955 "xfs_cud_item"); 1956 if (!xfs_cud_zone) 1957 goto out_destroy_rui_zone; 1958 1959 xfs_cui_zone = kmem_zone_init( 1960 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 1961 "xfs_cui_item"); 1962 if (!xfs_cui_zone) 1963 goto out_destroy_cud_zone; 1964 1965 xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item), 1966 "xfs_bud_item"); 1967 if (!xfs_bud_zone) 1968 goto out_destroy_cui_zone; 1969 1970 xfs_bui_zone = kmem_zone_init( 1971 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 1972 "xfs_bui_item"); 1973 if (!xfs_bui_zone) 1974 goto out_destroy_bud_zone; 1975 1976 return 0; 1977 1978 out_destroy_bud_zone: 1979 kmem_zone_destroy(xfs_bud_zone); 1980 out_destroy_cui_zone: 1981 kmem_zone_destroy(xfs_cui_zone); 1982 out_destroy_cud_zone: 1983 kmem_zone_destroy(xfs_cud_zone); 1984 out_destroy_rui_zone: 1985 kmem_zone_destroy(xfs_rui_zone); 1986 out_destroy_rud_zone: 1987 kmem_zone_destroy(xfs_rud_zone); 1988 out_destroy_icreate_zone: 1989 kmem_zone_destroy(xfs_icreate_zone); 1990 out_destroy_ili_zone: 1991 kmem_zone_destroy(xfs_ili_zone); 1992 out_destroy_inode_zone: 1993 kmem_zone_destroy(xfs_inode_zone); 1994 out_destroy_efi_zone: 1995 kmem_zone_destroy(xfs_efi_zone); 1996 out_destroy_efd_zone: 1997 kmem_zone_destroy(xfs_efd_zone); 1998 out_destroy_buf_item_zone: 1999 kmem_zone_destroy(xfs_buf_item_zone); 2000 out_destroy_trans_zone: 2001 kmem_zone_destroy(xfs_trans_zone); 2002 out_destroy_ifork_zone: 2003 kmem_zone_destroy(xfs_ifork_zone); 2004 out_destroy_da_state_zone: 2005 kmem_zone_destroy(xfs_da_state_zone); 2006 out_destroy_btree_cur_zone: 2007 kmem_zone_destroy(xfs_btree_cur_zone); 2008 out_destroy_bmap_free_item_zone: 2009 kmem_zone_destroy(xfs_bmap_free_item_zone); 2010 out_destroy_log_ticket_zone: 2011 kmem_zone_destroy(xfs_log_ticket_zone); 2012 out_free_ioend_bioset: 2013 bioset_exit(&xfs_ioend_bioset); 2014 out: 2015 return -ENOMEM; 2016 } 2017 2018 STATIC void 2019 xfs_destroy_zones(void) 2020 { 2021 /* 2022 * Make sure all delayed rcu free are flushed before we 2023 * destroy caches. 2024 */ 2025 rcu_barrier(); 2026 kmem_zone_destroy(xfs_bui_zone); 2027 kmem_zone_destroy(xfs_bud_zone); 2028 kmem_zone_destroy(xfs_cui_zone); 2029 kmem_zone_destroy(xfs_cud_zone); 2030 kmem_zone_destroy(xfs_rui_zone); 2031 kmem_zone_destroy(xfs_rud_zone); 2032 kmem_zone_destroy(xfs_icreate_zone); 2033 kmem_zone_destroy(xfs_ili_zone); 2034 kmem_zone_destroy(xfs_inode_zone); 2035 kmem_zone_destroy(xfs_efi_zone); 2036 kmem_zone_destroy(xfs_efd_zone); 2037 kmem_zone_destroy(xfs_buf_item_zone); 2038 kmem_zone_destroy(xfs_trans_zone); 2039 kmem_zone_destroy(xfs_ifork_zone); 2040 kmem_zone_destroy(xfs_da_state_zone); 2041 kmem_zone_destroy(xfs_btree_cur_zone); 2042 kmem_zone_destroy(xfs_bmap_free_item_zone); 2043 kmem_zone_destroy(xfs_log_ticket_zone); 2044 bioset_exit(&xfs_ioend_bioset); 2045 } 2046 2047 STATIC int __init 2048 xfs_init_workqueues(void) 2049 { 2050 /* 2051 * The allocation workqueue can be used in memory reclaim situations 2052 * (writepage path), and parallelism is only limited by the number of 2053 * AGs in all the filesystems mounted. Hence use the default large 2054 * max_active value for this workqueue. 2055 */ 2056 xfs_alloc_wq = alloc_workqueue("xfsalloc", 2057 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0); 2058 if (!xfs_alloc_wq) 2059 return -ENOMEM; 2060 2061 xfs_discard_wq = alloc_workqueue("xfsdiscard", WQ_UNBOUND, 0); 2062 if (!xfs_discard_wq) 2063 goto out_free_alloc_wq; 2064 2065 return 0; 2066 out_free_alloc_wq: 2067 destroy_workqueue(xfs_alloc_wq); 2068 return -ENOMEM; 2069 } 2070 2071 STATIC void 2072 xfs_destroy_workqueues(void) 2073 { 2074 destroy_workqueue(xfs_discard_wq); 2075 destroy_workqueue(xfs_alloc_wq); 2076 } 2077 2078 STATIC int __init 2079 init_xfs_fs(void) 2080 { 2081 int error; 2082 2083 xfs_check_ondisk_structs(); 2084 2085 printk(KERN_INFO XFS_VERSION_STRING " with " 2086 XFS_BUILD_OPTIONS " enabled\n"); 2087 2088 xfs_extent_free_init_defer_op(); 2089 xfs_rmap_update_init_defer_op(); 2090 xfs_refcount_update_init_defer_op(); 2091 xfs_bmap_update_init_defer_op(); 2092 2093 xfs_dir_startup(); 2094 2095 error = xfs_init_zones(); 2096 if (error) 2097 goto out; 2098 2099 error = xfs_init_workqueues(); 2100 if (error) 2101 goto out_destroy_zones; 2102 2103 error = xfs_mru_cache_init(); 2104 if (error) 2105 goto out_destroy_wq; 2106 2107 error = xfs_buf_init(); 2108 if (error) 2109 goto out_mru_cache_uninit; 2110 2111 error = xfs_init_procfs(); 2112 if (error) 2113 goto out_buf_terminate; 2114 2115 error = xfs_sysctl_register(); 2116 if (error) 2117 goto out_cleanup_procfs; 2118 2119 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2120 if (!xfs_kset) { 2121 error = -ENOMEM; 2122 goto out_sysctl_unregister; 2123 } 2124 2125 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2126 2127 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2128 if (!xfsstats.xs_stats) { 2129 error = -ENOMEM; 2130 goto out_kset_unregister; 2131 } 2132 2133 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2134 "stats"); 2135 if (error) 2136 goto out_free_stats; 2137 2138 #ifdef DEBUG 2139 xfs_dbg_kobj.kobject.kset = xfs_kset; 2140 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2141 if (error) 2142 goto out_remove_stats_kobj; 2143 #endif 2144 2145 error = xfs_qm_init(); 2146 if (error) 2147 goto out_remove_dbg_kobj; 2148 2149 error = register_filesystem(&xfs_fs_type); 2150 if (error) 2151 goto out_qm_exit; 2152 return 0; 2153 2154 out_qm_exit: 2155 xfs_qm_exit(); 2156 out_remove_dbg_kobj: 2157 #ifdef DEBUG 2158 xfs_sysfs_del(&xfs_dbg_kobj); 2159 out_remove_stats_kobj: 2160 #endif 2161 xfs_sysfs_del(&xfsstats.xs_kobj); 2162 out_free_stats: 2163 free_percpu(xfsstats.xs_stats); 2164 out_kset_unregister: 2165 kset_unregister(xfs_kset); 2166 out_sysctl_unregister: 2167 xfs_sysctl_unregister(); 2168 out_cleanup_procfs: 2169 xfs_cleanup_procfs(); 2170 out_buf_terminate: 2171 xfs_buf_terminate(); 2172 out_mru_cache_uninit: 2173 xfs_mru_cache_uninit(); 2174 out_destroy_wq: 2175 xfs_destroy_workqueues(); 2176 out_destroy_zones: 2177 xfs_destroy_zones(); 2178 out: 2179 return error; 2180 } 2181 2182 STATIC void __exit 2183 exit_xfs_fs(void) 2184 { 2185 xfs_qm_exit(); 2186 unregister_filesystem(&xfs_fs_type); 2187 #ifdef DEBUG 2188 xfs_sysfs_del(&xfs_dbg_kobj); 2189 #endif 2190 xfs_sysfs_del(&xfsstats.xs_kobj); 2191 free_percpu(xfsstats.xs_stats); 2192 kset_unregister(xfs_kset); 2193 xfs_sysctl_unregister(); 2194 xfs_cleanup_procfs(); 2195 xfs_buf_terminate(); 2196 xfs_mru_cache_uninit(); 2197 xfs_destroy_workqueues(); 2198 xfs_destroy_zones(); 2199 xfs_uuid_table_free(); 2200 } 2201 2202 module_init(init_xfs_fs); 2203 module_exit(exit_xfs_fs); 2204 2205 MODULE_AUTHOR("Silicon Graphics, Inc."); 2206 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2207 MODULE_LICENSE("GPL"); 2208