xref: /linux/fs/xfs/xfs_super.c (revision c148bc7535650fbfa95a1f571b9ffa2ab478ea33)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_btree.h"
16 #include "xfs_bmap.h"
17 #include "xfs_alloc.h"
18 #include "xfs_fsops.h"
19 #include "xfs_trans.h"
20 #include "xfs_buf_item.h"
21 #include "xfs_log.h"
22 #include "xfs_log_priv.h"
23 #include "xfs_dir2.h"
24 #include "xfs_extfree_item.h"
25 #include "xfs_mru_cache.h"
26 #include "xfs_inode_item.h"
27 #include "xfs_icache.h"
28 #include "xfs_trace.h"
29 #include "xfs_icreate_item.h"
30 #include "xfs_filestream.h"
31 #include "xfs_quota.h"
32 #include "xfs_sysfs.h"
33 #include "xfs_ondisk.h"
34 #include "xfs_rmap_item.h"
35 #include "xfs_refcount_item.h"
36 #include "xfs_bmap_item.h"
37 #include "xfs_reflink.h"
38 #include "xfs_pwork.h"
39 #include "xfs_ag.h"
40 #include "xfs_defer.h"
41 #include "xfs_attr_item.h"
42 #include "xfs_xattr.h"
43 #include "xfs_iunlink_item.h"
44 #include "xfs_dahash_test.h"
45 #include "xfs_rtbitmap.h"
46 #include "xfs_exchmaps_item.h"
47 #include "xfs_parent.h"
48 #include "xfs_rtalloc.h"
49 #include "xfs_zone_alloc.h"
50 #include "scrub/stats.h"
51 #include "scrub/rcbag_btree.h"
52 
53 #include <linux/magic.h>
54 #include <linux/fs_context.h>
55 #include <linux/fs_parser.h>
56 
57 static const struct super_operations xfs_super_operations;
58 
59 static struct dentry *xfs_debugfs;	/* top-level xfs debugfs dir */
60 static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
61 #ifdef DEBUG
62 static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
63 #endif
64 
65 enum xfs_dax_mode {
66 	XFS_DAX_INODE = 0,
67 	XFS_DAX_ALWAYS = 1,
68 	XFS_DAX_NEVER = 2,
69 };
70 
71 /* Were quota mount options provided?  Must use the upper 16 bits of qflags. */
72 #define XFS_QFLAGS_MNTOPTS	(1U << 31)
73 
74 static void
xfs_mount_set_dax_mode(struct xfs_mount * mp,enum xfs_dax_mode mode)75 xfs_mount_set_dax_mode(
76 	struct xfs_mount	*mp,
77 	enum xfs_dax_mode	mode)
78 {
79 	switch (mode) {
80 	case XFS_DAX_INODE:
81 		mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
82 		break;
83 	case XFS_DAX_ALWAYS:
84 		mp->m_features |= XFS_FEAT_DAX_ALWAYS;
85 		mp->m_features &= ~XFS_FEAT_DAX_NEVER;
86 		break;
87 	case XFS_DAX_NEVER:
88 		mp->m_features |= XFS_FEAT_DAX_NEVER;
89 		mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
90 		break;
91 	}
92 }
93 
94 static const struct constant_table dax_param_enums[] = {
95 	{"inode",	XFS_DAX_INODE },
96 	{"always",	XFS_DAX_ALWAYS },
97 	{"never",	XFS_DAX_NEVER },
98 	{}
99 };
100 
101 /*
102  * Table driven mount option parser.
103  */
104 enum {
105 	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
106 	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
107 	Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
108 	Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
109 	Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
110 	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
111 	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
112 	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
113 	Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
114 	Opt_lifetime, Opt_nolifetime,
115 };
116 
117 static const struct fs_parameter_spec xfs_fs_parameters[] = {
118 	fsparam_u32("logbufs",		Opt_logbufs),
119 	fsparam_string("logbsize",	Opt_logbsize),
120 	fsparam_string("logdev",	Opt_logdev),
121 	fsparam_string("rtdev",		Opt_rtdev),
122 	fsparam_flag("wsync",		Opt_wsync),
123 	fsparam_flag("noalign",		Opt_noalign),
124 	fsparam_flag("swalloc",		Opt_swalloc),
125 	fsparam_u32("sunit",		Opt_sunit),
126 	fsparam_u32("swidth",		Opt_swidth),
127 	fsparam_flag("nouuid",		Opt_nouuid),
128 	fsparam_flag("grpid",		Opt_grpid),
129 	fsparam_flag("nogrpid",		Opt_nogrpid),
130 	fsparam_flag("bsdgroups",	Opt_bsdgroups),
131 	fsparam_flag("sysvgroups",	Opt_sysvgroups),
132 	fsparam_string("allocsize",	Opt_allocsize),
133 	fsparam_flag("norecovery",	Opt_norecovery),
134 	fsparam_flag("inode64",		Opt_inode64),
135 	fsparam_flag("inode32",		Opt_inode32),
136 	fsparam_flag("ikeep",		Opt_ikeep),
137 	fsparam_flag("noikeep",		Opt_noikeep),
138 	fsparam_flag("largeio",		Opt_largeio),
139 	fsparam_flag("nolargeio",	Opt_nolargeio),
140 	fsparam_flag("attr2",		Opt_attr2),
141 	fsparam_flag("noattr2",		Opt_noattr2),
142 	fsparam_flag("filestreams",	Opt_filestreams),
143 	fsparam_flag("quota",		Opt_quota),
144 	fsparam_flag("noquota",		Opt_noquota),
145 	fsparam_flag("usrquota",	Opt_usrquota),
146 	fsparam_flag("grpquota",	Opt_grpquota),
147 	fsparam_flag("prjquota",	Opt_prjquota),
148 	fsparam_flag("uquota",		Opt_uquota),
149 	fsparam_flag("gquota",		Opt_gquota),
150 	fsparam_flag("pquota",		Opt_pquota),
151 	fsparam_flag("uqnoenforce",	Opt_uqnoenforce),
152 	fsparam_flag("gqnoenforce",	Opt_gqnoenforce),
153 	fsparam_flag("pqnoenforce",	Opt_pqnoenforce),
154 	fsparam_flag("qnoenforce",	Opt_qnoenforce),
155 	fsparam_flag("discard",		Opt_discard),
156 	fsparam_flag("nodiscard",	Opt_nodiscard),
157 	fsparam_flag("dax",		Opt_dax),
158 	fsparam_enum("dax",		Opt_dax_enum, dax_param_enums),
159 	fsparam_u32("max_open_zones",	Opt_max_open_zones),
160 	fsparam_flag("lifetime",	Opt_lifetime),
161 	fsparam_flag("nolifetime",	Opt_nolifetime),
162 	{}
163 };
164 
165 struct proc_xfs_info {
166 	uint64_t	flag;
167 	char		*str;
168 };
169 
170 static int
xfs_fs_show_options(struct seq_file * m,struct dentry * root)171 xfs_fs_show_options(
172 	struct seq_file		*m,
173 	struct dentry		*root)
174 {
175 	static struct proc_xfs_info xfs_info_set[] = {
176 		/* the few simple ones we can get from the mount struct */
177 		{ XFS_FEAT_IKEEP,		",ikeep" },
178 		{ XFS_FEAT_WSYNC,		",wsync" },
179 		{ XFS_FEAT_NOALIGN,		",noalign" },
180 		{ XFS_FEAT_SWALLOC,		",swalloc" },
181 		{ XFS_FEAT_NOUUID,		",nouuid" },
182 		{ XFS_FEAT_NORECOVERY,		",norecovery" },
183 		{ XFS_FEAT_ATTR2,		",attr2" },
184 		{ XFS_FEAT_FILESTREAMS,		",filestreams" },
185 		{ XFS_FEAT_GRPID,		",grpid" },
186 		{ XFS_FEAT_DISCARD,		",discard" },
187 		{ XFS_FEAT_LARGE_IOSIZE,	",largeio" },
188 		{ XFS_FEAT_DAX_ALWAYS,		",dax=always" },
189 		{ XFS_FEAT_DAX_NEVER,		",dax=never" },
190 		{ XFS_FEAT_NOLIFETIME,		",nolifetime" },
191 		{ 0, NULL }
192 	};
193 	struct xfs_mount	*mp = XFS_M(root->d_sb);
194 	struct proc_xfs_info	*xfs_infop;
195 
196 	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
197 		if (mp->m_features & xfs_infop->flag)
198 			seq_puts(m, xfs_infop->str);
199 	}
200 
201 	seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
202 
203 	if (xfs_has_allocsize(mp))
204 		seq_printf(m, ",allocsize=%dk",
205 			   (1 << mp->m_allocsize_log) >> 10);
206 
207 	if (mp->m_logbufs > 0)
208 		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
209 	if (mp->m_logbsize > 0)
210 		seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
211 
212 	if (mp->m_logname)
213 		seq_show_option(m, "logdev", mp->m_logname);
214 	if (mp->m_rtname)
215 		seq_show_option(m, "rtdev", mp->m_rtname);
216 
217 	if (mp->m_dalign > 0)
218 		seq_printf(m, ",sunit=%d",
219 				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
220 	if (mp->m_swidth > 0)
221 		seq_printf(m, ",swidth=%d",
222 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
223 
224 	if (mp->m_qflags & XFS_UQUOTA_ENFD)
225 		seq_puts(m, ",usrquota");
226 	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
227 		seq_puts(m, ",uqnoenforce");
228 
229 	if (mp->m_qflags & XFS_PQUOTA_ENFD)
230 		seq_puts(m, ",prjquota");
231 	else if (mp->m_qflags & XFS_PQUOTA_ACCT)
232 		seq_puts(m, ",pqnoenforce");
233 
234 	if (mp->m_qflags & XFS_GQUOTA_ENFD)
235 		seq_puts(m, ",grpquota");
236 	else if (mp->m_qflags & XFS_GQUOTA_ACCT)
237 		seq_puts(m, ",gqnoenforce");
238 
239 	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
240 		seq_puts(m, ",noquota");
241 
242 	if (mp->m_max_open_zones)
243 		seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
244 
245 	return 0;
246 }
247 
248 static bool
xfs_set_inode_alloc_perag(struct xfs_perag * pag,xfs_ino_t ino,xfs_agnumber_t max_metadata)249 xfs_set_inode_alloc_perag(
250 	struct xfs_perag	*pag,
251 	xfs_ino_t		ino,
252 	xfs_agnumber_t		max_metadata)
253 {
254 	if (!xfs_is_inode32(pag_mount(pag))) {
255 		set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
256 		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
257 		return false;
258 	}
259 
260 	if (ino > XFS_MAXINUMBER_32) {
261 		clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
262 		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
263 		return false;
264 	}
265 
266 	set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
267 	if (pag_agno(pag) < max_metadata)
268 		set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
269 	else
270 		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
271 	return true;
272 }
273 
274 /*
275  * Set parameters for inode allocation heuristics, taking into account
276  * filesystem size and inode32/inode64 mount options; i.e. specifically
277  * whether or not XFS_FEAT_SMALL_INUMS is set.
278  *
279  * Inode allocation patterns are altered only if inode32 is requested
280  * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
281  * If altered, XFS_OPSTATE_INODE32 is set as well.
282  *
283  * An agcount independent of that in the mount structure is provided
284  * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
285  * to the potentially higher ag count.
286  *
287  * Returns the maximum AG index which may contain inodes.
288  */
289 xfs_agnumber_t
xfs_set_inode_alloc(struct xfs_mount * mp,xfs_agnumber_t agcount)290 xfs_set_inode_alloc(
291 	struct xfs_mount *mp,
292 	xfs_agnumber_t	agcount)
293 {
294 	xfs_agnumber_t	index;
295 	xfs_agnumber_t	maxagi = 0;
296 	xfs_sb_t	*sbp = &mp->m_sb;
297 	xfs_agnumber_t	max_metadata;
298 	xfs_agino_t	agino;
299 	xfs_ino_t	ino;
300 
301 	/*
302 	 * Calculate how much should be reserved for inodes to meet
303 	 * the max inode percentage.  Used only for inode32.
304 	 */
305 	if (M_IGEO(mp)->maxicount) {
306 		uint64_t	icount;
307 
308 		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
309 		do_div(icount, 100);
310 		icount += sbp->sb_agblocks - 1;
311 		do_div(icount, sbp->sb_agblocks);
312 		max_metadata = icount;
313 	} else {
314 		max_metadata = agcount;
315 	}
316 
317 	/* Get the last possible inode in the filesystem */
318 	agino =	XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
319 	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
320 
321 	/*
322 	 * If user asked for no more than 32-bit inodes, and the fs is
323 	 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
324 	 * the allocator to accommodate the request.
325 	 */
326 	if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
327 		xfs_set_inode32(mp);
328 	else
329 		xfs_clear_inode32(mp);
330 
331 	for (index = 0; index < agcount; index++) {
332 		struct xfs_perag	*pag;
333 
334 		ino = XFS_AGINO_TO_INO(mp, index, agino);
335 
336 		pag = xfs_perag_get(mp, index);
337 		if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
338 			maxagi++;
339 		xfs_perag_put(pag);
340 	}
341 
342 	return xfs_is_inode32(mp) ? maxagi : agcount;
343 }
344 
345 static int
xfs_setup_dax_always(struct xfs_mount * mp)346 xfs_setup_dax_always(
347 	struct xfs_mount	*mp)
348 {
349 	if (!mp->m_ddev_targp->bt_daxdev &&
350 	    (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
351 		xfs_alert(mp,
352 			"DAX unsupported by block device. Turning off DAX.");
353 		goto disable_dax;
354 	}
355 
356 	if (mp->m_super->s_blocksize != PAGE_SIZE) {
357 		xfs_alert(mp,
358 			"DAX not supported for blocksize. Turning off DAX.");
359 		goto disable_dax;
360 	}
361 
362 	if (xfs_has_reflink(mp) &&
363 	    bdev_is_partition(mp->m_ddev_targp->bt_bdev)) {
364 		xfs_alert(mp,
365 			"DAX and reflink cannot work with multi-partitions!");
366 		return -EINVAL;
367 	}
368 
369 	return 0;
370 
371 disable_dax:
372 	xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
373 	return 0;
374 }
375 
376 STATIC int
xfs_blkdev_get(xfs_mount_t * mp,const char * name,struct file ** bdev_filep)377 xfs_blkdev_get(
378 	xfs_mount_t		*mp,
379 	const char		*name,
380 	struct file		**bdev_filep)
381 {
382 	int			error = 0;
383 
384 	*bdev_filep = bdev_file_open_by_path(name,
385 		BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
386 		mp->m_super, &fs_holder_ops);
387 	if (IS_ERR(*bdev_filep)) {
388 		error = PTR_ERR(*bdev_filep);
389 		*bdev_filep = NULL;
390 		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
391 	}
392 
393 	return error;
394 }
395 
396 STATIC void
xfs_shutdown_devices(struct xfs_mount * mp)397 xfs_shutdown_devices(
398 	struct xfs_mount	*mp)
399 {
400 	/*
401 	 * Udev is triggered whenever anyone closes a block device or unmounts
402 	 * a file systemm on a block device.
403 	 * The default udev rules invoke blkid to read the fs super and create
404 	 * symlinks to the bdev under /dev/disk.  For this, it uses buffered
405 	 * reads through the page cache.
406 	 *
407 	 * xfs_db also uses buffered reads to examine metadata.  There is no
408 	 * coordination between xfs_db and udev, which means that they can run
409 	 * concurrently.  Note there is no coordination between the kernel and
410 	 * blkid either.
411 	 *
412 	 * On a system with 64k pages, the page cache can cache the superblock
413 	 * and the root inode (and hence the root directory) with the same 64k
414 	 * page.  If udev spawns blkid after the mkfs and the system is busy
415 	 * enough that it is still running when xfs_db starts up, they'll both
416 	 * read from the same page in the pagecache.
417 	 *
418 	 * The unmount writes updated inode metadata to disk directly.  The XFS
419 	 * buffer cache does not use the bdev pagecache, so it needs to
420 	 * invalidate that pagecache on unmount.  If the above scenario occurs,
421 	 * the pagecache no longer reflects what's on disk, xfs_db reads the
422 	 * stale metadata, and fails to find /a.  Most of the time this succeeds
423 	 * because closing a bdev invalidates the page cache, but when processes
424 	 * race, everyone loses.
425 	 */
426 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
427 		blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
428 		invalidate_bdev(mp->m_logdev_targp->bt_bdev);
429 	}
430 	if (mp->m_rtdev_targp) {
431 		blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
432 		invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
433 	}
434 	blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
435 	invalidate_bdev(mp->m_ddev_targp->bt_bdev);
436 }
437 
438 /*
439  * The file system configurations are:
440  *	(1) device (partition) with data and internal log
441  *	(2) logical volume with data and log subvolumes.
442  *	(3) logical volume with data, log, and realtime subvolumes.
443  *
444  * We only have to handle opening the log and realtime volumes here if
445  * they are present.  The data subvolume has already been opened by
446  * get_sb_bdev() and is stored in sb->s_bdev.
447  */
448 STATIC int
xfs_open_devices(struct xfs_mount * mp)449 xfs_open_devices(
450 	struct xfs_mount	*mp)
451 {
452 	struct super_block	*sb = mp->m_super;
453 	struct block_device	*ddev = sb->s_bdev;
454 	struct file		*logdev_file = NULL, *rtdev_file = NULL;
455 	int			error;
456 
457 	/*
458 	 * Open real time and log devices - order is important.
459 	 */
460 	if (mp->m_logname) {
461 		error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file);
462 		if (error)
463 			return error;
464 	}
465 
466 	if (mp->m_rtname) {
467 		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file);
468 		if (error)
469 			goto out_close_logdev;
470 
471 		if (file_bdev(rtdev_file) == ddev ||
472 		    (logdev_file &&
473 		     file_bdev(rtdev_file) == file_bdev(logdev_file))) {
474 			xfs_warn(mp,
475 	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
476 			error = -EINVAL;
477 			goto out_close_rtdev;
478 		}
479 	}
480 
481 	/*
482 	 * Setup xfs_mount buffer target pointers
483 	 */
484 	error = -ENOMEM;
485 	mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
486 	if (!mp->m_ddev_targp)
487 		goto out_close_rtdev;
488 
489 	if (rtdev_file) {
490 		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
491 		if (!mp->m_rtdev_targp)
492 			goto out_free_ddev_targ;
493 	}
494 
495 	if (logdev_file && file_bdev(logdev_file) != ddev) {
496 		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
497 		if (!mp->m_logdev_targp)
498 			goto out_free_rtdev_targ;
499 	} else {
500 		mp->m_logdev_targp = mp->m_ddev_targp;
501 		/* Handle won't be used, drop it */
502 		if (logdev_file)
503 			bdev_fput(logdev_file);
504 	}
505 
506 	return 0;
507 
508  out_free_rtdev_targ:
509 	if (mp->m_rtdev_targp)
510 		xfs_free_buftarg(mp->m_rtdev_targp);
511  out_free_ddev_targ:
512 	xfs_free_buftarg(mp->m_ddev_targp);
513  out_close_rtdev:
514 	 if (rtdev_file)
515 		bdev_fput(rtdev_file);
516  out_close_logdev:
517 	if (logdev_file)
518 		bdev_fput(logdev_file);
519 	return error;
520 }
521 
522 /*
523  * Setup xfs_mount buffer target pointers based on superblock
524  */
525 STATIC int
xfs_setup_devices(struct xfs_mount * mp)526 xfs_setup_devices(
527 	struct xfs_mount	*mp)
528 {
529 	int			error;
530 
531 	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
532 	if (error)
533 		return error;
534 
535 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
536 		unsigned int	log_sector_size = BBSIZE;
537 
538 		if (xfs_has_sector(mp))
539 			log_sector_size = mp->m_sb.sb_logsectsize;
540 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
541 					    log_sector_size);
542 		if (error)
543 			return error;
544 	}
545 
546 	if (mp->m_sb.sb_rtstart) {
547 		if (mp->m_rtdev_targp) {
548 			xfs_warn(mp,
549 		"can't use internal and external rtdev at the same time");
550 			return -EINVAL;
551 		}
552 		mp->m_rtdev_targp = mp->m_ddev_targp;
553 	} else if (mp->m_rtname) {
554 		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
555 					    mp->m_sb.sb_sectsize);
556 		if (error)
557 			return error;
558 	}
559 
560 	return 0;
561 }
562 
563 STATIC int
xfs_init_mount_workqueues(struct xfs_mount * mp)564 xfs_init_mount_workqueues(
565 	struct xfs_mount	*mp)
566 {
567 	mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
568 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
569 			1, mp->m_super->s_id);
570 	if (!mp->m_buf_workqueue)
571 		goto out;
572 
573 	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
574 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
575 			0, mp->m_super->s_id);
576 	if (!mp->m_unwritten_workqueue)
577 		goto out_destroy_buf;
578 
579 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
580 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
581 			0, mp->m_super->s_id);
582 	if (!mp->m_reclaim_workqueue)
583 		goto out_destroy_unwritten;
584 
585 	mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
586 			XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
587 			0, mp->m_super->s_id);
588 	if (!mp->m_blockgc_wq)
589 		goto out_destroy_reclaim;
590 
591 	mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
592 			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
593 			1, mp->m_super->s_id);
594 	if (!mp->m_inodegc_wq)
595 		goto out_destroy_blockgc;
596 
597 	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
598 			XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
599 	if (!mp->m_sync_workqueue)
600 		goto out_destroy_inodegc;
601 
602 	return 0;
603 
604 out_destroy_inodegc:
605 	destroy_workqueue(mp->m_inodegc_wq);
606 out_destroy_blockgc:
607 	destroy_workqueue(mp->m_blockgc_wq);
608 out_destroy_reclaim:
609 	destroy_workqueue(mp->m_reclaim_workqueue);
610 out_destroy_unwritten:
611 	destroy_workqueue(mp->m_unwritten_workqueue);
612 out_destroy_buf:
613 	destroy_workqueue(mp->m_buf_workqueue);
614 out:
615 	return -ENOMEM;
616 }
617 
618 STATIC void
xfs_destroy_mount_workqueues(struct xfs_mount * mp)619 xfs_destroy_mount_workqueues(
620 	struct xfs_mount	*mp)
621 {
622 	destroy_workqueue(mp->m_sync_workqueue);
623 	destroy_workqueue(mp->m_blockgc_wq);
624 	destroy_workqueue(mp->m_inodegc_wq);
625 	destroy_workqueue(mp->m_reclaim_workqueue);
626 	destroy_workqueue(mp->m_unwritten_workqueue);
627 	destroy_workqueue(mp->m_buf_workqueue);
628 }
629 
630 static void
xfs_flush_inodes_worker(struct work_struct * work)631 xfs_flush_inodes_worker(
632 	struct work_struct	*work)
633 {
634 	struct xfs_mount	*mp = container_of(work, struct xfs_mount,
635 						   m_flush_inodes_work);
636 	struct super_block	*sb = mp->m_super;
637 
638 	if (down_read_trylock(&sb->s_umount)) {
639 		sync_inodes_sb(sb);
640 		up_read(&sb->s_umount);
641 	}
642 }
643 
644 /*
645  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
646  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
647  * for IO to complete so that we effectively throttle multiple callers to the
648  * rate at which IO is completing.
649  */
650 void
xfs_flush_inodes(struct xfs_mount * mp)651 xfs_flush_inodes(
652 	struct xfs_mount	*mp)
653 {
654 	/*
655 	 * If flush_work() returns true then that means we waited for a flush
656 	 * which was already in progress.  Don't bother running another scan.
657 	 */
658 	if (flush_work(&mp->m_flush_inodes_work))
659 		return;
660 
661 	queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
662 	flush_work(&mp->m_flush_inodes_work);
663 }
664 
665 /* Catch misguided souls that try to use this interface on XFS */
666 STATIC struct inode *
xfs_fs_alloc_inode(struct super_block * sb)667 xfs_fs_alloc_inode(
668 	struct super_block	*sb)
669 {
670 	BUG();
671 	return NULL;
672 }
673 
674 /*
675  * Now that the generic code is guaranteed not to be accessing
676  * the linux inode, we can inactivate and reclaim the inode.
677  */
678 STATIC void
xfs_fs_destroy_inode(struct inode * inode)679 xfs_fs_destroy_inode(
680 	struct inode		*inode)
681 {
682 	struct xfs_inode	*ip = XFS_I(inode);
683 
684 	trace_xfs_destroy_inode(ip);
685 
686 	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
687 	XFS_STATS_INC(ip->i_mount, vn_rele);
688 	XFS_STATS_INC(ip->i_mount, vn_remove);
689 	xfs_inode_mark_reclaimable(ip);
690 }
691 
692 static void
xfs_fs_dirty_inode(struct inode * inode,int flags)693 xfs_fs_dirty_inode(
694 	struct inode			*inode,
695 	int				flags)
696 {
697 	struct xfs_inode		*ip = XFS_I(inode);
698 	struct xfs_mount		*mp = ip->i_mount;
699 	struct xfs_trans		*tp;
700 
701 	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
702 		return;
703 
704 	/*
705 	 * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
706 	 * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
707 	 * in flags possibly together with I_DIRTY_SYNC.
708 	 */
709 	if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
710 		return;
711 
712 	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
713 		return;
714 	xfs_ilock(ip, XFS_ILOCK_EXCL);
715 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
716 	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
717 	xfs_trans_commit(tp);
718 }
719 
720 /*
721  * Slab object creation initialisation for the XFS inode.
722  * This covers only the idempotent fields in the XFS inode;
723  * all other fields need to be initialised on allocation
724  * from the slab. This avoids the need to repeatedly initialise
725  * fields in the xfs inode that left in the initialise state
726  * when freeing the inode.
727  */
728 STATIC void
xfs_fs_inode_init_once(void * inode)729 xfs_fs_inode_init_once(
730 	void			*inode)
731 {
732 	struct xfs_inode	*ip = inode;
733 
734 	memset(ip, 0, sizeof(struct xfs_inode));
735 
736 	/* vfs inode */
737 	inode_init_once(VFS_I(ip));
738 
739 	/* xfs inode */
740 	atomic_set(&ip->i_pincount, 0);
741 	spin_lock_init(&ip->i_flags_lock);
742 	init_rwsem(&ip->i_lock);
743 }
744 
745 /*
746  * We do an unlocked check for XFS_IDONTCACHE here because we are already
747  * serialised against cache hits here via the inode->i_lock and igrab() in
748  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
749  * racing with us, and it avoids needing to grab a spinlock here for every inode
750  * we drop the final reference on.
751  */
752 STATIC int
xfs_fs_drop_inode(struct inode * inode)753 xfs_fs_drop_inode(
754 	struct inode		*inode)
755 {
756 	struct xfs_inode	*ip = XFS_I(inode);
757 
758 	/*
759 	 * If this unlinked inode is in the middle of recovery, don't
760 	 * drop the inode just yet; log recovery will take care of
761 	 * that.  See the comment for this inode flag.
762 	 */
763 	if (ip->i_flags & XFS_IRECOVERY) {
764 		ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
765 		return 0;
766 	}
767 
768 	return generic_drop_inode(inode);
769 }
770 
771 static void
xfs_mount_free(struct xfs_mount * mp)772 xfs_mount_free(
773 	struct xfs_mount	*mp)
774 {
775 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
776 		xfs_free_buftarg(mp->m_logdev_targp);
777 	if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp)
778 		xfs_free_buftarg(mp->m_rtdev_targp);
779 	if (mp->m_ddev_targp)
780 		xfs_free_buftarg(mp->m_ddev_targp);
781 
782 	debugfs_remove(mp->m_debugfs);
783 	kfree(mp->m_rtname);
784 	kfree(mp->m_logname);
785 	kfree(mp);
786 }
787 
788 STATIC int
xfs_fs_sync_fs(struct super_block * sb,int wait)789 xfs_fs_sync_fs(
790 	struct super_block	*sb,
791 	int			wait)
792 {
793 	struct xfs_mount	*mp = XFS_M(sb);
794 	int			error;
795 
796 	trace_xfs_fs_sync_fs(mp, __return_address);
797 
798 	/*
799 	 * Doing anything during the async pass would be counterproductive.
800 	 */
801 	if (!wait)
802 		return 0;
803 
804 	error = xfs_log_force(mp, XFS_LOG_SYNC);
805 	if (error)
806 		return error;
807 
808 	if (laptop_mode) {
809 		/*
810 		 * The disk must be active because we're syncing.
811 		 * We schedule log work now (now that the disk is
812 		 * active) instead of later (when it might not be).
813 		 */
814 		flush_delayed_work(&mp->m_log->l_work);
815 	}
816 
817 	/*
818 	 * If we are called with page faults frozen out, it means we are about
819 	 * to freeze the transaction subsystem. Take the opportunity to shut
820 	 * down inodegc because once SB_FREEZE_FS is set it's too late to
821 	 * prevent inactivation races with freeze. The fs doesn't get called
822 	 * again by the freezing process until after SB_FREEZE_FS has been set,
823 	 * so it's now or never.  Same logic applies to speculative allocation
824 	 * garbage collection.
825 	 *
826 	 * We don't care if this is a normal syncfs call that does this or
827 	 * freeze that does this - we can run this multiple times without issue
828 	 * and we won't race with a restart because a restart can only occur
829 	 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
830 	 */
831 	if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
832 		xfs_inodegc_stop(mp);
833 		xfs_blockgc_stop(mp);
834 		xfs_zone_gc_stop(mp);
835 	}
836 
837 	return 0;
838 }
839 
840 static xfs_extlen_t
xfs_internal_log_size(struct xfs_mount * mp)841 xfs_internal_log_size(
842 	struct xfs_mount	*mp)
843 {
844 	if (!mp->m_sb.sb_logstart)
845 		return 0;
846 	return mp->m_sb.sb_logblocks;
847 }
848 
849 static void
xfs_statfs_data(struct xfs_mount * mp,struct kstatfs * st)850 xfs_statfs_data(
851 	struct xfs_mount	*mp,
852 	struct kstatfs		*st)
853 {
854 	int64_t			fdblocks =
855 		xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
856 
857 	/* make sure st->f_bfree does not underflow */
858 	st->f_bfree = max(0LL,
859 		fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS));
860 
861 	/*
862 	 * sb_dblocks can change during growfs, but nothing cares about reporting
863 	 * the old or new value during growfs.
864 	 */
865 	st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp);
866 }
867 
868 /*
869  * When stat(v)fs is called on a file with the realtime bit set or a directory
870  * with the rtinherit bit, report freespace information for the RT device
871  * instead of the main data device.
872  */
873 static void
xfs_statfs_rt(struct xfs_mount * mp,struct kstatfs * st)874 xfs_statfs_rt(
875 	struct xfs_mount	*mp,
876 	struct kstatfs		*st)
877 {
878 	st->f_bfree = xfs_rtbxlen_to_blen(mp,
879 			xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
880 	st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp,
881 			mp->m_free[XC_FREE_RTEXTENTS].res_total);
882 }
883 
884 static void
xfs_statfs_inodes(struct xfs_mount * mp,struct kstatfs * st)885 xfs_statfs_inodes(
886 	struct xfs_mount	*mp,
887 	struct kstatfs		*st)
888 {
889 	uint64_t		icount = percpu_counter_sum(&mp->m_icount);
890 	uint64_t		ifree = percpu_counter_sum(&mp->m_ifree);
891 	uint64_t		fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree);
892 
893 	st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
894 	if (M_IGEO(mp)->maxicount)
895 		st->f_files = min_t(typeof(st->f_files), st->f_files,
896 					M_IGEO(mp)->maxicount);
897 
898 	/* If sb_icount overshot maxicount, report actual allocation */
899 	st->f_files = max_t(typeof(st->f_files), st->f_files,
900 			mp->m_sb.sb_icount);
901 
902 	/* Make sure st->f_ffree does not underflow */
903 	st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree));
904 }
905 
906 STATIC int
xfs_fs_statfs(struct dentry * dentry,struct kstatfs * st)907 xfs_fs_statfs(
908 	struct dentry		*dentry,
909 	struct kstatfs		*st)
910 {
911 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
912 	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
913 
914 	/*
915 	 * Expedite background inodegc but don't wait. We do not want to block
916 	 * here waiting hours for a billion extent file to be truncated.
917 	 */
918 	xfs_inodegc_push(mp);
919 
920 	st->f_type = XFS_SUPER_MAGIC;
921 	st->f_namelen = MAXNAMELEN - 1;
922 	st->f_bsize = mp->m_sb.sb_blocksize;
923 	st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev));
924 
925 	xfs_statfs_data(mp, st);
926 	xfs_statfs_inodes(mp, st);
927 
928 	if (XFS_IS_REALTIME_MOUNT(mp) &&
929 	    (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME)))
930 		xfs_statfs_rt(mp, st);
931 
932 	if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
933 	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
934 			      (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
935 		xfs_qm_statvfs(ip, st);
936 
937 	/*
938 	 * XFS does not distinguish between blocks available to privileged and
939 	 * unprivileged users.
940 	 */
941 	st->f_bavail = st->f_bfree;
942 	return 0;
943 }
944 
945 STATIC void
xfs_save_resvblks(struct xfs_mount * mp)946 xfs_save_resvblks(
947 	struct xfs_mount	*mp)
948 {
949 	enum xfs_free_counter	i;
950 
951 	for (i = 0; i < XC_FREE_NR; i++) {
952 		mp->m_free[i].res_saved = mp->m_free[i].res_total;
953 		xfs_reserve_blocks(mp, i, 0);
954 	}
955 }
956 
957 STATIC void
xfs_restore_resvblks(struct xfs_mount * mp)958 xfs_restore_resvblks(
959 	struct xfs_mount	*mp)
960 {
961 	uint64_t		resblks;
962 	enum xfs_free_counter	i;
963 
964 	for (i = 0; i < XC_FREE_NR; i++) {
965 		if (mp->m_free[i].res_saved) {
966 			resblks = mp->m_free[i].res_saved;
967 			mp->m_free[i].res_saved = 0;
968 		} else
969 			resblks = xfs_default_resblks(mp, i);
970 		xfs_reserve_blocks(mp, i, resblks);
971 	}
972 }
973 
974 /*
975  * Second stage of a freeze. The data is already frozen so we only
976  * need to take care of the metadata. Once that's done sync the superblock
977  * to the log to dirty it in case of a crash while frozen. This ensures that we
978  * will recover the unlinked inode lists on the next mount.
979  */
980 STATIC int
xfs_fs_freeze(struct super_block * sb)981 xfs_fs_freeze(
982 	struct super_block	*sb)
983 {
984 	struct xfs_mount	*mp = XFS_M(sb);
985 	unsigned int		flags;
986 	int			ret;
987 
988 	/*
989 	 * The filesystem is now frozen far enough that memory reclaim
990 	 * cannot safely operate on the filesystem. Hence we need to
991 	 * set a GFP_NOFS context here to avoid recursion deadlocks.
992 	 */
993 	flags = memalloc_nofs_save();
994 	xfs_save_resvblks(mp);
995 	ret = xfs_log_quiesce(mp);
996 	memalloc_nofs_restore(flags);
997 
998 	/*
999 	 * For read-write filesystems, we need to restart the inodegc on error
1000 	 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
1001 	 * going to be run to restart it now.  We are at SB_FREEZE_FS level
1002 	 * here, so we can restart safely without racing with a stop in
1003 	 * xfs_fs_sync_fs().
1004 	 */
1005 	if (ret && !xfs_is_readonly(mp)) {
1006 		xfs_blockgc_start(mp);
1007 		xfs_inodegc_start(mp);
1008 		xfs_zone_gc_start(mp);
1009 	}
1010 
1011 	return ret;
1012 }
1013 
1014 STATIC int
xfs_fs_unfreeze(struct super_block * sb)1015 xfs_fs_unfreeze(
1016 	struct super_block	*sb)
1017 {
1018 	struct xfs_mount	*mp = XFS_M(sb);
1019 
1020 	xfs_restore_resvblks(mp);
1021 	xfs_log_work_queue(mp);
1022 
1023 	/*
1024 	 * Don't reactivate the inodegc worker on a readonly filesystem because
1025 	 * inodes are sent directly to reclaim.  Don't reactivate the blockgc
1026 	 * worker because there are no speculative preallocations on a readonly
1027 	 * filesystem.
1028 	 */
1029 	if (!xfs_is_readonly(mp)) {
1030 		xfs_zone_gc_start(mp);
1031 		xfs_blockgc_start(mp);
1032 		xfs_inodegc_start(mp);
1033 	}
1034 
1035 	return 0;
1036 }
1037 
1038 /*
1039  * This function fills in xfs_mount_t fields based on mount args.
1040  * Note: the superblock _has_ now been read in.
1041  */
1042 STATIC int
xfs_finish_flags(struct xfs_mount * mp)1043 xfs_finish_flags(
1044 	struct xfs_mount	*mp)
1045 {
1046 	/* Fail a mount where the logbuf is smaller than the log stripe */
1047 	if (xfs_has_logv2(mp)) {
1048 		if (mp->m_logbsize <= 0 &&
1049 		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1050 			mp->m_logbsize = mp->m_sb.sb_logsunit;
1051 		} else if (mp->m_logbsize > 0 &&
1052 			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
1053 			xfs_warn(mp,
1054 		"logbuf size must be greater than or equal to log stripe size");
1055 			return -EINVAL;
1056 		}
1057 	} else {
1058 		/* Fail a mount if the logbuf is larger than 32K */
1059 		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1060 			xfs_warn(mp,
1061 		"logbuf size for version 1 logs must be 16K or 32K");
1062 			return -EINVAL;
1063 		}
1064 	}
1065 
1066 	/*
1067 	 * V5 filesystems always use attr2 format for attributes.
1068 	 */
1069 	if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
1070 		xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1071 			     "attr2 is always enabled for V5 filesystems.");
1072 		return -EINVAL;
1073 	}
1074 
1075 	/*
1076 	 * prohibit r/w mounts of read-only filesystems
1077 	 */
1078 	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
1079 		xfs_warn(mp,
1080 			"cannot mount a read-only filesystem as read-write");
1081 		return -EROFS;
1082 	}
1083 
1084 	if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
1085 	    (mp->m_qflags & XFS_PQUOTA_ACCT) &&
1086 	    !xfs_has_pquotino(mp)) {
1087 		xfs_warn(mp,
1088 		  "Super block does not support project and group quota together");
1089 		return -EINVAL;
1090 	}
1091 
1092 	if (!xfs_has_zoned(mp)) {
1093 		if (mp->m_max_open_zones) {
1094 			xfs_warn(mp,
1095 "max_open_zones mount option only supported on zoned file systems.");
1096 			return -EINVAL;
1097 		}
1098 		if (mp->m_features & XFS_FEAT_NOLIFETIME) {
1099 			xfs_warn(mp,
1100 "nolifetime mount option only supported on zoned file systems.");
1101 			return -EINVAL;
1102 		}
1103 	}
1104 
1105 	return 0;
1106 }
1107 
1108 static int
xfs_init_percpu_counters(struct xfs_mount * mp)1109 xfs_init_percpu_counters(
1110 	struct xfs_mount	*mp)
1111 {
1112 	int			error;
1113 	int			i;
1114 
1115 	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1116 	if (error)
1117 		return -ENOMEM;
1118 
1119 	error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1120 	if (error)
1121 		goto free_icount;
1122 
1123 	error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
1124 	if (error)
1125 		goto free_ifree;
1126 
1127 	error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL);
1128 	if (error)
1129 		goto free_delalloc;
1130 
1131 	for (i = 0; i < XC_FREE_NR; i++) {
1132 		error = percpu_counter_init(&mp->m_free[i].count, 0,
1133 				GFP_KERNEL);
1134 		if (error)
1135 			goto free_freecounters;
1136 	}
1137 
1138 	return 0;
1139 
1140 free_freecounters:
1141 	while (--i > 0)
1142 		percpu_counter_destroy(&mp->m_free[i].count);
1143 	percpu_counter_destroy(&mp->m_delalloc_rtextents);
1144 free_delalloc:
1145 	percpu_counter_destroy(&mp->m_delalloc_blks);
1146 free_ifree:
1147 	percpu_counter_destroy(&mp->m_ifree);
1148 free_icount:
1149 	percpu_counter_destroy(&mp->m_icount);
1150 	return -ENOMEM;
1151 }
1152 
1153 void
xfs_reinit_percpu_counters(struct xfs_mount * mp)1154 xfs_reinit_percpu_counters(
1155 	struct xfs_mount	*mp)
1156 {
1157 	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1158 	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1159 	xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks);
1160 	if (!xfs_has_zoned(mp))
1161 		xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
1162 				mp->m_sb.sb_frextents);
1163 }
1164 
1165 static void
xfs_destroy_percpu_counters(struct xfs_mount * mp)1166 xfs_destroy_percpu_counters(
1167 	struct xfs_mount	*mp)
1168 {
1169 	enum xfs_free_counter	i;
1170 
1171 	for (i = 0; i < XC_FREE_NR; i++)
1172 		percpu_counter_destroy(&mp->m_free[i].count);
1173 	percpu_counter_destroy(&mp->m_icount);
1174 	percpu_counter_destroy(&mp->m_ifree);
1175 	ASSERT(xfs_is_shutdown(mp) ||
1176 	       percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
1177 	percpu_counter_destroy(&mp->m_delalloc_rtextents);
1178 	ASSERT(xfs_is_shutdown(mp) ||
1179 	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
1180 	percpu_counter_destroy(&mp->m_delalloc_blks);
1181 }
1182 
1183 static int
xfs_inodegc_init_percpu(struct xfs_mount * mp)1184 xfs_inodegc_init_percpu(
1185 	struct xfs_mount	*mp)
1186 {
1187 	struct xfs_inodegc	*gc;
1188 	int			cpu;
1189 
1190 	mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
1191 	if (!mp->m_inodegc)
1192 		return -ENOMEM;
1193 
1194 	for_each_possible_cpu(cpu) {
1195 		gc = per_cpu_ptr(mp->m_inodegc, cpu);
1196 		gc->cpu = cpu;
1197 		gc->mp = mp;
1198 		init_llist_head(&gc->list);
1199 		gc->items = 0;
1200 		gc->error = 0;
1201 		INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
1202 	}
1203 	return 0;
1204 }
1205 
1206 static void
xfs_inodegc_free_percpu(struct xfs_mount * mp)1207 xfs_inodegc_free_percpu(
1208 	struct xfs_mount	*mp)
1209 {
1210 	if (!mp->m_inodegc)
1211 		return;
1212 	free_percpu(mp->m_inodegc);
1213 }
1214 
1215 static void
xfs_fs_put_super(struct super_block * sb)1216 xfs_fs_put_super(
1217 	struct super_block	*sb)
1218 {
1219 	struct xfs_mount	*mp = XFS_M(sb);
1220 
1221 	xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
1222 	xfs_filestream_unmount(mp);
1223 	xfs_unmountfs(mp);
1224 
1225 	xfs_rtmount_freesb(mp);
1226 	xfs_freesb(mp);
1227 	xchk_mount_stats_free(mp);
1228 	free_percpu(mp->m_stats.xs_stats);
1229 	xfs_inodegc_free_percpu(mp);
1230 	xfs_destroy_percpu_counters(mp);
1231 	xfs_destroy_mount_workqueues(mp);
1232 	xfs_shutdown_devices(mp);
1233 }
1234 
1235 static long
xfs_fs_nr_cached_objects(struct super_block * sb,struct shrink_control * sc)1236 xfs_fs_nr_cached_objects(
1237 	struct super_block	*sb,
1238 	struct shrink_control	*sc)
1239 {
1240 	/* Paranoia: catch incorrect calls during mount setup or teardown */
1241 	if (WARN_ON_ONCE(!sb->s_fs_info))
1242 		return 0;
1243 	return xfs_reclaim_inodes_count(XFS_M(sb));
1244 }
1245 
1246 static long
xfs_fs_free_cached_objects(struct super_block * sb,struct shrink_control * sc)1247 xfs_fs_free_cached_objects(
1248 	struct super_block	*sb,
1249 	struct shrink_control	*sc)
1250 {
1251 	return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1252 }
1253 
1254 static void
xfs_fs_shutdown(struct super_block * sb)1255 xfs_fs_shutdown(
1256 	struct super_block	*sb)
1257 {
1258 	xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED);
1259 }
1260 
1261 static int
xfs_fs_show_stats(struct seq_file * m,struct dentry * root)1262 xfs_fs_show_stats(
1263 	struct seq_file		*m,
1264 	struct dentry		*root)
1265 {
1266 	struct xfs_mount	*mp = XFS_M(root->d_sb);
1267 
1268 	if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT))
1269 		xfs_zoned_show_stats(m, mp);
1270 	return 0;
1271 }
1272 
1273 static const struct super_operations xfs_super_operations = {
1274 	.alloc_inode		= xfs_fs_alloc_inode,
1275 	.destroy_inode		= xfs_fs_destroy_inode,
1276 	.dirty_inode		= xfs_fs_dirty_inode,
1277 	.drop_inode		= xfs_fs_drop_inode,
1278 	.put_super		= xfs_fs_put_super,
1279 	.sync_fs		= xfs_fs_sync_fs,
1280 	.freeze_fs		= xfs_fs_freeze,
1281 	.unfreeze_fs		= xfs_fs_unfreeze,
1282 	.statfs			= xfs_fs_statfs,
1283 	.show_options		= xfs_fs_show_options,
1284 	.nr_cached_objects	= xfs_fs_nr_cached_objects,
1285 	.free_cached_objects	= xfs_fs_free_cached_objects,
1286 	.shutdown		= xfs_fs_shutdown,
1287 	.show_stats		= xfs_fs_show_stats,
1288 };
1289 
1290 static int
suffix_kstrtoint(const char * s,unsigned int base,int * res)1291 suffix_kstrtoint(
1292 	const char	*s,
1293 	unsigned int	base,
1294 	int		*res)
1295 {
1296 	int		last, shift_left_factor = 0, _res;
1297 	char		*value;
1298 	int		ret = 0;
1299 
1300 	value = kstrdup(s, GFP_KERNEL);
1301 	if (!value)
1302 		return -ENOMEM;
1303 
1304 	last = strlen(value) - 1;
1305 	if (value[last] == 'K' || value[last] == 'k') {
1306 		shift_left_factor = 10;
1307 		value[last] = '\0';
1308 	}
1309 	if (value[last] == 'M' || value[last] == 'm') {
1310 		shift_left_factor = 20;
1311 		value[last] = '\0';
1312 	}
1313 	if (value[last] == 'G' || value[last] == 'g') {
1314 		shift_left_factor = 30;
1315 		value[last] = '\0';
1316 	}
1317 
1318 	if (kstrtoint(value, base, &_res))
1319 		ret = -EINVAL;
1320 	kfree(value);
1321 	*res = _res << shift_left_factor;
1322 	return ret;
1323 }
1324 
1325 static inline void
xfs_fs_warn_deprecated(struct fs_context * fc,struct fs_parameter * param,uint64_t flag,bool value)1326 xfs_fs_warn_deprecated(
1327 	struct fs_context	*fc,
1328 	struct fs_parameter	*param,
1329 	uint64_t		flag,
1330 	bool			value)
1331 {
1332 	/* Don't print the warning if reconfiguring and current mount point
1333 	 * already had the flag set
1334 	 */
1335 	if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
1336             !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
1337 		return;
1338 	xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
1339 }
1340 
1341 /*
1342  * Set mount state from a mount option.
1343  *
1344  * NOTE: mp->m_super is NULL here!
1345  */
1346 static int
xfs_fs_parse_param(struct fs_context * fc,struct fs_parameter * param)1347 xfs_fs_parse_param(
1348 	struct fs_context	*fc,
1349 	struct fs_parameter	*param)
1350 {
1351 	struct xfs_mount	*parsing_mp = fc->s_fs_info;
1352 	struct fs_parse_result	result;
1353 	int			size = 0;
1354 	int			opt;
1355 
1356 	BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL);
1357 
1358 	opt = fs_parse(fc, xfs_fs_parameters, param, &result);
1359 	if (opt < 0)
1360 		return opt;
1361 
1362 	switch (opt) {
1363 	case Opt_logbufs:
1364 		parsing_mp->m_logbufs = result.uint_32;
1365 		return 0;
1366 	case Opt_logbsize:
1367 		if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize))
1368 			return -EINVAL;
1369 		return 0;
1370 	case Opt_logdev:
1371 		kfree(parsing_mp->m_logname);
1372 		parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL);
1373 		if (!parsing_mp->m_logname)
1374 			return -ENOMEM;
1375 		return 0;
1376 	case Opt_rtdev:
1377 		kfree(parsing_mp->m_rtname);
1378 		parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
1379 		if (!parsing_mp->m_rtname)
1380 			return -ENOMEM;
1381 		return 0;
1382 	case Opt_allocsize:
1383 		if (suffix_kstrtoint(param->string, 10, &size))
1384 			return -EINVAL;
1385 		parsing_mp->m_allocsize_log = ffs(size) - 1;
1386 		parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
1387 		return 0;
1388 	case Opt_grpid:
1389 	case Opt_bsdgroups:
1390 		parsing_mp->m_features |= XFS_FEAT_GRPID;
1391 		return 0;
1392 	case Opt_nogrpid:
1393 	case Opt_sysvgroups:
1394 		parsing_mp->m_features &= ~XFS_FEAT_GRPID;
1395 		return 0;
1396 	case Opt_wsync:
1397 		parsing_mp->m_features |= XFS_FEAT_WSYNC;
1398 		return 0;
1399 	case Opt_norecovery:
1400 		parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
1401 		return 0;
1402 	case Opt_noalign:
1403 		parsing_mp->m_features |= XFS_FEAT_NOALIGN;
1404 		return 0;
1405 	case Opt_swalloc:
1406 		parsing_mp->m_features |= XFS_FEAT_SWALLOC;
1407 		return 0;
1408 	case Opt_sunit:
1409 		parsing_mp->m_dalign = result.uint_32;
1410 		return 0;
1411 	case Opt_swidth:
1412 		parsing_mp->m_swidth = result.uint_32;
1413 		return 0;
1414 	case Opt_inode32:
1415 		parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
1416 		return 0;
1417 	case Opt_inode64:
1418 		parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1419 		return 0;
1420 	case Opt_nouuid:
1421 		parsing_mp->m_features |= XFS_FEAT_NOUUID;
1422 		return 0;
1423 	case Opt_largeio:
1424 		parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
1425 		return 0;
1426 	case Opt_nolargeio:
1427 		parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
1428 		return 0;
1429 	case Opt_filestreams:
1430 		parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
1431 		return 0;
1432 	case Opt_noquota:
1433 		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
1434 		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
1435 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1436 		return 0;
1437 	case Opt_quota:
1438 	case Opt_uquota:
1439 	case Opt_usrquota:
1440 		parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
1441 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1442 		return 0;
1443 	case Opt_qnoenforce:
1444 	case Opt_uqnoenforce:
1445 		parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
1446 		parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
1447 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1448 		return 0;
1449 	case Opt_pquota:
1450 	case Opt_prjquota:
1451 		parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
1452 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1453 		return 0;
1454 	case Opt_pqnoenforce:
1455 		parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
1456 		parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
1457 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1458 		return 0;
1459 	case Opt_gquota:
1460 	case Opt_grpquota:
1461 		parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
1462 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1463 		return 0;
1464 	case Opt_gqnoenforce:
1465 		parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
1466 		parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
1467 		parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
1468 		return 0;
1469 	case Opt_discard:
1470 		parsing_mp->m_features |= XFS_FEAT_DISCARD;
1471 		return 0;
1472 	case Opt_nodiscard:
1473 		parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
1474 		return 0;
1475 #ifdef CONFIG_FS_DAX
1476 	case Opt_dax:
1477 		xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS);
1478 		return 0;
1479 	case Opt_dax_enum:
1480 		xfs_mount_set_dax_mode(parsing_mp, result.uint_32);
1481 		return 0;
1482 #endif
1483 	/* Following mount options will be removed in September 2025 */
1484 	case Opt_ikeep:
1485 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
1486 		parsing_mp->m_features |= XFS_FEAT_IKEEP;
1487 		return 0;
1488 	case Opt_noikeep:
1489 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
1490 		parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
1491 		return 0;
1492 	case Opt_attr2:
1493 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
1494 		parsing_mp->m_features |= XFS_FEAT_ATTR2;
1495 		return 0;
1496 	case Opt_noattr2:
1497 		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
1498 		parsing_mp->m_features |= XFS_FEAT_NOATTR2;
1499 		return 0;
1500 	case Opt_max_open_zones:
1501 		parsing_mp->m_max_open_zones = result.uint_32;
1502 		return 0;
1503 	case Opt_lifetime:
1504 		parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME;
1505 		return 0;
1506 	case Opt_nolifetime:
1507 		parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
1508 		return 0;
1509 	default:
1510 		xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
1511 		return -EINVAL;
1512 	}
1513 
1514 	return 0;
1515 }
1516 
1517 static int
xfs_fs_validate_params(struct xfs_mount * mp)1518 xfs_fs_validate_params(
1519 	struct xfs_mount	*mp)
1520 {
1521 	/* No recovery flag requires a read-only mount */
1522 	if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
1523 		xfs_warn(mp, "no-recovery mounts must be read-only.");
1524 		return -EINVAL;
1525 	}
1526 
1527 	/*
1528 	 * We have not read the superblock at this point, so only the attr2
1529 	 * mount option can set the attr2 feature by this stage.
1530 	 */
1531 	if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
1532 		xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
1533 		return -EINVAL;
1534 	}
1535 
1536 
1537 	if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
1538 		xfs_warn(mp,
1539 	"sunit and swidth options incompatible with the noalign option");
1540 		return -EINVAL;
1541 	}
1542 
1543 	if (!IS_ENABLED(CONFIG_XFS_QUOTA) &&
1544 	    (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) {
1545 		xfs_warn(mp, "quota support not available in this kernel.");
1546 		return -EINVAL;
1547 	}
1548 
1549 	if ((mp->m_dalign && !mp->m_swidth) ||
1550 	    (!mp->m_dalign && mp->m_swidth)) {
1551 		xfs_warn(mp, "sunit and swidth must be specified together");
1552 		return -EINVAL;
1553 	}
1554 
1555 	if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
1556 		xfs_warn(mp,
1557 	"stripe width (%d) must be a multiple of the stripe unit (%d)",
1558 			mp->m_swidth, mp->m_dalign);
1559 		return -EINVAL;
1560 	}
1561 
1562 	if (mp->m_logbufs != -1 &&
1563 	    mp->m_logbufs != 0 &&
1564 	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
1565 	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
1566 		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
1567 			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1568 		return -EINVAL;
1569 	}
1570 
1571 	if (mp->m_logbsize != -1 &&
1572 	    mp->m_logbsize !=  0 &&
1573 	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
1574 	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
1575 	     !is_power_of_2(mp->m_logbsize))) {
1576 		xfs_warn(mp,
1577 			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1578 			mp->m_logbsize);
1579 		return -EINVAL;
1580 	}
1581 
1582 	if (xfs_has_allocsize(mp) &&
1583 	    (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
1584 	     mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
1585 		xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
1586 			mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
1587 		return -EINVAL;
1588 	}
1589 
1590 	return 0;
1591 }
1592 
1593 struct dentry *
xfs_debugfs_mkdir(const char * name,struct dentry * parent)1594 xfs_debugfs_mkdir(
1595 	const char	*name,
1596 	struct dentry	*parent)
1597 {
1598 	struct dentry	*child;
1599 
1600 	/* Apparently we're expected to ignore error returns?? */
1601 	child = debugfs_create_dir(name, parent);
1602 	if (IS_ERR(child))
1603 		return NULL;
1604 
1605 	return child;
1606 }
1607 
1608 static int
xfs_fs_fill_super(struct super_block * sb,struct fs_context * fc)1609 xfs_fs_fill_super(
1610 	struct super_block	*sb,
1611 	struct fs_context	*fc)
1612 {
1613 	struct xfs_mount	*mp = sb->s_fs_info;
1614 	struct inode		*root;
1615 	int			flags = 0, error;
1616 
1617 	mp->m_super = sb;
1618 
1619 	/*
1620 	 * Copy VFS mount flags from the context now that all parameter parsing
1621 	 * is guaranteed to have been completed by either the old mount API or
1622 	 * the newer fsopen/fsconfig API.
1623 	 */
1624 	if (fc->sb_flags & SB_RDONLY)
1625 		xfs_set_readonly(mp);
1626 	if (fc->sb_flags & SB_DIRSYNC)
1627 		mp->m_features |= XFS_FEAT_DIRSYNC;
1628 	if (fc->sb_flags & SB_SYNCHRONOUS)
1629 		mp->m_features |= XFS_FEAT_WSYNC;
1630 
1631 	error = xfs_fs_validate_params(mp);
1632 	if (error)
1633 		return error;
1634 
1635 	sb_min_blocksize(sb, BBSIZE);
1636 	sb->s_xattr = xfs_xattr_handlers;
1637 	sb->s_export_op = &xfs_export_operations;
1638 #ifdef CONFIG_XFS_QUOTA
1639 	sb->s_qcop = &xfs_quotactl_operations;
1640 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1641 #endif
1642 	sb->s_op = &xfs_super_operations;
1643 
1644 	/*
1645 	 * Delay mount work if the debug hook is set. This is debug
1646 	 * instrumention to coordinate simulation of xfs mount failures with
1647 	 * VFS superblock operations
1648 	 */
1649 	if (xfs_globals.mount_delay) {
1650 		xfs_notice(mp, "Delaying mount for %d seconds.",
1651 			xfs_globals.mount_delay);
1652 		msleep(xfs_globals.mount_delay * 1000);
1653 	}
1654 
1655 	if (fc->sb_flags & SB_SILENT)
1656 		flags |= XFS_MFSI_QUIET;
1657 
1658 	error = xfs_open_devices(mp);
1659 	if (error)
1660 		return error;
1661 
1662 	if (xfs_debugfs) {
1663 		mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id,
1664 						  xfs_debugfs);
1665 	} else {
1666 		mp->m_debugfs = NULL;
1667 	}
1668 
1669 	error = xfs_init_mount_workqueues(mp);
1670 	if (error)
1671 		goto out_shutdown_devices;
1672 
1673 	error = xfs_init_percpu_counters(mp);
1674 	if (error)
1675 		goto out_destroy_workqueues;
1676 
1677 	error = xfs_inodegc_init_percpu(mp);
1678 	if (error)
1679 		goto out_destroy_counters;
1680 
1681 	/* Allocate stats memory before we do operations that might use it */
1682 	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1683 	if (!mp->m_stats.xs_stats) {
1684 		error = -ENOMEM;
1685 		goto out_destroy_inodegc;
1686 	}
1687 
1688 	error = xchk_mount_stats_alloc(mp);
1689 	if (error)
1690 		goto out_free_stats;
1691 
1692 	error = xfs_readsb(mp, flags);
1693 	if (error)
1694 		goto out_free_scrub_stats;
1695 
1696 	error = xfs_finish_flags(mp);
1697 	if (error)
1698 		goto out_free_sb;
1699 
1700 	error = xfs_setup_devices(mp);
1701 	if (error)
1702 		goto out_free_sb;
1703 
1704 	/*
1705 	 * V4 support is undergoing deprecation.
1706 	 *
1707 	 * Note: this has to use an open coded m_features check as xfs_has_crc
1708 	 * always returns false for !CONFIG_XFS_SUPPORT_V4.
1709 	 */
1710 	if (!(mp->m_features & XFS_FEAT_CRC)) {
1711 		if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) {
1712 			xfs_warn(mp,
1713 	"Deprecated V4 format (crc=0) not supported by kernel.");
1714 			error = -EINVAL;
1715 			goto out_free_sb;
1716 		}
1717 		xfs_warn_once(mp,
1718 	"Deprecated V4 format (crc=0) will not be supported after September 2030.");
1719 	}
1720 
1721 	/* ASCII case insensitivity is undergoing deprecation. */
1722 	if (xfs_has_asciici(mp)) {
1723 #ifdef CONFIG_XFS_SUPPORT_ASCII_CI
1724 		xfs_warn_once(mp,
1725 	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030.");
1726 #else
1727 		xfs_warn(mp,
1728 	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel.");
1729 		error = -EINVAL;
1730 		goto out_free_sb;
1731 #endif
1732 	}
1733 
1734 	/*
1735 	 * Filesystem claims it needs repair, so refuse the mount unless
1736 	 * norecovery is also specified, in which case the filesystem can
1737 	 * be mounted with no risk of further damage.
1738 	 */
1739 	if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) {
1740 		xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
1741 		error = -EFSCORRUPTED;
1742 		goto out_free_sb;
1743 	}
1744 
1745 	/*
1746 	 * Don't touch the filesystem if a user tool thinks it owns the primary
1747 	 * superblock.  mkfs doesn't clear the flag from secondary supers, so
1748 	 * we don't check them at all.
1749 	 */
1750 	if (mp->m_sb.sb_inprogress) {
1751 		xfs_warn(mp, "Offline file system operation in progress!");
1752 		error = -EFSCORRUPTED;
1753 		goto out_free_sb;
1754 	}
1755 
1756 	if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
1757 		size_t max_folio_size = mapping_max_folio_size_supported();
1758 
1759 		if (!xfs_has_crc(mp)) {
1760 			xfs_warn(mp,
1761 "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.",
1762 				mp->m_sb.sb_blocksize, PAGE_SIZE);
1763 			error = -ENOSYS;
1764 			goto out_free_sb;
1765 		}
1766 
1767 		if (mp->m_sb.sb_blocksize > max_folio_size) {
1768 			xfs_warn(mp,
1769 "block size (%u bytes) not supported; Only block size (%zu) or less is supported",
1770 				mp->m_sb.sb_blocksize, max_folio_size);
1771 			error = -ENOSYS;
1772 			goto out_free_sb;
1773 		}
1774 
1775 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS);
1776 	}
1777 
1778 	/* Ensure this filesystem fits in the page cache limits */
1779 	if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
1780 	    xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
1781 		xfs_warn(mp,
1782 		"file system too large to be mounted on this system.");
1783 		error = -EFBIG;
1784 		goto out_free_sb;
1785 	}
1786 
1787 	/*
1788 	 * XFS block mappings use 54 bits to store the logical block offset.
1789 	 * This should suffice to handle the maximum file size that the VFS
1790 	 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
1791 	 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
1792 	 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
1793 	 * to check this assertion.
1794 	 *
1795 	 * Avoid integer overflow by comparing the maximum bmbt offset to the
1796 	 * maximum pagecache offset in units of fs blocks.
1797 	 */
1798 	if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
1799 		xfs_warn(mp,
1800 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
1801 			 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
1802 			 XFS_MAX_FILEOFF);
1803 		error = -EINVAL;
1804 		goto out_free_sb;
1805 	}
1806 
1807 	error = xfs_rtmount_readsb(mp);
1808 	if (error)
1809 		goto out_free_sb;
1810 
1811 	error = xfs_filestream_mount(mp);
1812 	if (error)
1813 		goto out_free_rtsb;
1814 
1815 	/*
1816 	 * we must configure the block size in the superblock before we run the
1817 	 * full mount process as the mount process can lookup and cache inodes.
1818 	 */
1819 	sb->s_magic = XFS_SUPER_MAGIC;
1820 	sb->s_blocksize = mp->m_sb.sb_blocksize;
1821 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1822 	sb->s_maxbytes = MAX_LFS_FILESIZE;
1823 	sb->s_max_links = XFS_MAXLINK;
1824 	sb->s_time_gran = 1;
1825 	if (xfs_has_bigtime(mp)) {
1826 		sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
1827 		sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
1828 	} else {
1829 		sb->s_time_min = XFS_LEGACY_TIME_MIN;
1830 		sb->s_time_max = XFS_LEGACY_TIME_MAX;
1831 	}
1832 	trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
1833 	sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
1834 
1835 	set_posix_acl_flag(sb);
1836 
1837 	/* version 5 superblocks support inode version counters. */
1838 	if (xfs_has_crc(mp))
1839 		sb->s_flags |= SB_I_VERSION;
1840 
1841 	if (xfs_has_dax_always(mp)) {
1842 		error = xfs_setup_dax_always(mp);
1843 		if (error)
1844 			goto out_filestream_unmount;
1845 	}
1846 
1847 	if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
1848 		xfs_warn(mp,
1849 	"mounting with \"discard\" option, but the device does not support discard");
1850 		mp->m_features &= ~XFS_FEAT_DISCARD;
1851 	}
1852 
1853 	if (xfs_has_zoned(mp)) {
1854 		if (!xfs_has_metadir(mp)) {
1855 			xfs_alert(mp,
1856 		"metadir feature required for zoned realtime devices.");
1857 			error = -EINVAL;
1858 			goto out_filestream_unmount;
1859 		}
1860 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED);
1861 	} else if (xfs_has_metadir(mp)) {
1862 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
1863 	}
1864 
1865 	if (xfs_has_reflink(mp)) {
1866 		if (xfs_has_realtime(mp) &&
1867 		    !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) {
1868 			xfs_alert(mp,
1869 	"reflink not compatible with realtime extent size %u!",
1870 					mp->m_sb.sb_rextsize);
1871 			error = -EINVAL;
1872 			goto out_filestream_unmount;
1873 		}
1874 
1875 		if (xfs_has_zoned(mp)) {
1876 			xfs_alert(mp,
1877 	"reflink not compatible with zoned RT device!");
1878 			error = -EINVAL;
1879 			goto out_filestream_unmount;
1880 		}
1881 
1882 		if (xfs_globals.always_cow) {
1883 			xfs_info(mp, "using DEBUG-only always_cow mode.");
1884 			mp->m_always_cow = true;
1885 		}
1886 	}
1887 
1888 
1889 	if (xfs_has_exchange_range(mp))
1890 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE);
1891 
1892 	if (xfs_has_parent(mp))
1893 		xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR);
1894 
1895 	/*
1896 	 * If no quota mount options were provided, maybe we'll try to pick
1897 	 * up the quota accounting and enforcement flags from the ondisk sb.
1898 	 */
1899 	if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS))
1900 		xfs_set_resuming_quotaon(mp);
1901 	mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
1902 
1903 	error = xfs_mountfs(mp);
1904 	if (error)
1905 		goto out_filestream_unmount;
1906 
1907 	root = igrab(VFS_I(mp->m_rootip));
1908 	if (!root) {
1909 		error = -ENOENT;
1910 		goto out_unmount;
1911 	}
1912 	sb->s_root = d_make_root(root);
1913 	if (!sb->s_root) {
1914 		error = -ENOMEM;
1915 		goto out_unmount;
1916 	}
1917 
1918 	return 0;
1919 
1920  out_filestream_unmount:
1921 	xfs_filestream_unmount(mp);
1922  out_free_rtsb:
1923 	xfs_rtmount_freesb(mp);
1924  out_free_sb:
1925 	xfs_freesb(mp);
1926  out_free_scrub_stats:
1927 	xchk_mount_stats_free(mp);
1928  out_free_stats:
1929 	free_percpu(mp->m_stats.xs_stats);
1930  out_destroy_inodegc:
1931 	xfs_inodegc_free_percpu(mp);
1932  out_destroy_counters:
1933 	xfs_destroy_percpu_counters(mp);
1934  out_destroy_workqueues:
1935 	xfs_destroy_mount_workqueues(mp);
1936  out_shutdown_devices:
1937 	xfs_shutdown_devices(mp);
1938 	return error;
1939 
1940  out_unmount:
1941 	xfs_filestream_unmount(mp);
1942 	xfs_unmountfs(mp);
1943 	goto out_free_rtsb;
1944 }
1945 
1946 static int
xfs_fs_get_tree(struct fs_context * fc)1947 xfs_fs_get_tree(
1948 	struct fs_context	*fc)
1949 {
1950 	return get_tree_bdev(fc, xfs_fs_fill_super);
1951 }
1952 
1953 static int
xfs_remount_rw(struct xfs_mount * mp)1954 xfs_remount_rw(
1955 	struct xfs_mount	*mp)
1956 {
1957 	struct xfs_sb		*sbp = &mp->m_sb;
1958 	int error;
1959 
1960 	if (xfs_has_norecovery(mp)) {
1961 		xfs_warn(mp,
1962 			"ro->rw transition prohibited on norecovery mount");
1963 		return -EINVAL;
1964 	}
1965 
1966 	if (xfs_sb_is_v5(sbp) &&
1967 	    xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1968 		xfs_warn(mp,
1969 	"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1970 			(sbp->sb_features_ro_compat &
1971 				XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1972 		return -EINVAL;
1973 	}
1974 
1975 	xfs_clear_readonly(mp);
1976 
1977 	/*
1978 	 * If this is the first remount to writeable state we might have some
1979 	 * superblock changes to update.
1980 	 */
1981 	if (mp->m_update_sb) {
1982 		error = xfs_sync_sb(mp, false);
1983 		if (error) {
1984 			xfs_warn(mp, "failed to write sb changes");
1985 			return error;
1986 		}
1987 		mp->m_update_sb = false;
1988 	}
1989 
1990 	/*
1991 	 * Fill out the reserve pool if it is empty. Use the stashed value if
1992 	 * it is non-zero, otherwise go with the default.
1993 	 */
1994 	xfs_restore_resvblks(mp);
1995 	xfs_log_work_queue(mp);
1996 	xfs_blockgc_start(mp);
1997 
1998 	/* Create the per-AG metadata reservation pool .*/
1999 	error = xfs_fs_reserve_ag_blocks(mp);
2000 	if (error && error != -ENOSPC)
2001 		return error;
2002 
2003 	/* Re-enable the background inode inactivation worker. */
2004 	xfs_inodegc_start(mp);
2005 
2006 	/* Restart zone reclaim */
2007 	xfs_zone_gc_start(mp);
2008 
2009 	return 0;
2010 }
2011 
2012 static int
xfs_remount_ro(struct xfs_mount * mp)2013 xfs_remount_ro(
2014 	struct xfs_mount	*mp)
2015 {
2016 	struct xfs_icwalk	icw = {
2017 		.icw_flags	= XFS_ICWALK_FLAG_SYNC,
2018 	};
2019 	int			error;
2020 
2021 	/* Flush all the dirty data to disk. */
2022 	error = sync_filesystem(mp->m_super);
2023 	if (error)
2024 		return error;
2025 
2026 	/*
2027 	 * Cancel background eofb scanning so it cannot race with the final
2028 	 * log force+buftarg wait and deadlock the remount.
2029 	 */
2030 	xfs_blockgc_stop(mp);
2031 
2032 	/*
2033 	 * Clear out all remaining COW staging extents and speculative post-EOF
2034 	 * preallocations so that we don't leave inodes requiring inactivation
2035 	 * cleanups during reclaim on a read-only mount.  We must process every
2036 	 * cached inode, so this requires a synchronous cache scan.
2037 	 */
2038 	error = xfs_blockgc_free_space(mp, &icw);
2039 	if (error) {
2040 		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
2041 		return error;
2042 	}
2043 
2044 	/*
2045 	 * Stop the inodegc background worker.  xfs_fs_reconfigure already
2046 	 * flushed all pending inodegc work when it sync'd the filesystem.
2047 	 * The VFS holds s_umount, so we know that inodes cannot enter
2048 	 * xfs_fs_destroy_inode during a remount operation.  In readonly mode
2049 	 * we send inodes straight to reclaim, so no inodes will be queued.
2050 	 */
2051 	xfs_inodegc_stop(mp);
2052 
2053 	/* Stop zone reclaim */
2054 	xfs_zone_gc_stop(mp);
2055 
2056 	/* Free the per-AG metadata reservation pool. */
2057 	xfs_fs_unreserve_ag_blocks(mp);
2058 
2059 	/*
2060 	 * Before we sync the metadata, we need to free up the reserve block
2061 	 * pool so that the used block count in the superblock on disk is
2062 	 * correct at the end of the remount. Stash the current* reserve pool
2063 	 * size so that if we get remounted rw, we can return it to the same
2064 	 * size.
2065 	 */
2066 	xfs_save_resvblks(mp);
2067 
2068 	xfs_log_clean(mp);
2069 	xfs_set_readonly(mp);
2070 
2071 	return 0;
2072 }
2073 
2074 /*
2075  * Logically we would return an error here to prevent users from believing
2076  * they might have changed mount options using remount which can't be changed.
2077  *
2078  * But unfortunately mount(8) adds all options from mtab and fstab to the mount
2079  * arguments in some cases so we can't blindly reject options, but have to
2080  * check for each specified option if it actually differs from the currently
2081  * set option and only reject it if that's the case.
2082  *
2083  * Until that is implemented we return success for every remount request, and
2084  * silently ignore all options that we can't actually change.
2085  */
2086 static int
xfs_fs_reconfigure(struct fs_context * fc)2087 xfs_fs_reconfigure(
2088 	struct fs_context *fc)
2089 {
2090 	struct xfs_mount	*mp = XFS_M(fc->root->d_sb);
2091 	struct xfs_mount        *new_mp = fc->s_fs_info;
2092 	int			flags = fc->sb_flags;
2093 	int			error;
2094 
2095 	new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
2096 
2097 	/* version 5 superblocks always support version counters. */
2098 	if (xfs_has_crc(mp))
2099 		fc->sb_flags |= SB_I_VERSION;
2100 
2101 	error = xfs_fs_validate_params(new_mp);
2102 	if (error)
2103 		return error;
2104 
2105 	/* inode32 -> inode64 */
2106 	if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
2107 		mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
2108 		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
2109 	}
2110 
2111 	/* inode64 -> inode32 */
2112 	if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
2113 		mp->m_features |= XFS_FEAT_SMALL_INUMS;
2114 		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
2115 	}
2116 
2117 	/* ro -> rw */
2118 	if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
2119 		error = xfs_remount_rw(mp);
2120 		if (error)
2121 			return error;
2122 	}
2123 
2124 	/* rw -> ro */
2125 	if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
2126 		error = xfs_remount_ro(mp);
2127 		if (error)
2128 			return error;
2129 	}
2130 
2131 	return 0;
2132 }
2133 
2134 static void
xfs_fs_free(struct fs_context * fc)2135 xfs_fs_free(
2136 	struct fs_context	*fc)
2137 {
2138 	struct xfs_mount	*mp = fc->s_fs_info;
2139 
2140 	/*
2141 	 * mp is stored in the fs_context when it is initialized.
2142 	 * mp is transferred to the superblock on a successful mount,
2143 	 * but if an error occurs before the transfer we have to free
2144 	 * it here.
2145 	 */
2146 	if (mp)
2147 		xfs_mount_free(mp);
2148 }
2149 
2150 static const struct fs_context_operations xfs_context_ops = {
2151 	.parse_param = xfs_fs_parse_param,
2152 	.get_tree    = xfs_fs_get_tree,
2153 	.reconfigure = xfs_fs_reconfigure,
2154 	.free        = xfs_fs_free,
2155 };
2156 
2157 /*
2158  * WARNING: do not initialise any parameters in this function that depend on
2159  * mount option parsing having already been performed as this can be called from
2160  * fsopen() before any parameters have been set.
2161  */
2162 static int
xfs_init_fs_context(struct fs_context * fc)2163 xfs_init_fs_context(
2164 	struct fs_context	*fc)
2165 {
2166 	struct xfs_mount	*mp;
2167 	int			i;
2168 
2169 	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL);
2170 	if (!mp)
2171 		return -ENOMEM;
2172 
2173 	spin_lock_init(&mp->m_sb_lock);
2174 	for (i = 0; i < XG_TYPE_MAX; i++)
2175 		xa_init(&mp->m_groups[i].xa);
2176 	mutex_init(&mp->m_growlock);
2177 	mutex_init(&mp->m_metafile_resv_lock);
2178 	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
2179 	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
2180 	mp->m_kobj.kobject.kset = xfs_kset;
2181 	/*
2182 	 * We don't create the finobt per-ag space reservation until after log
2183 	 * recovery, so we must set this to true so that an ifree transaction
2184 	 * started during log recovery will not depend on space reservations
2185 	 * for finobt expansion.
2186 	 */
2187 	mp->m_finobt_nores = true;
2188 
2189 	/*
2190 	 * These can be overridden by the mount option parsing.
2191 	 */
2192 	mp->m_logbufs = -1;
2193 	mp->m_logbsize = -1;
2194 	mp->m_allocsize_log = 16; /* 64k */
2195 
2196 	xfs_hooks_init(&mp->m_dir_update_hooks);
2197 
2198 	fc->s_fs_info = mp;
2199 	fc->ops = &xfs_context_ops;
2200 
2201 	return 0;
2202 }
2203 
2204 static void
xfs_kill_sb(struct super_block * sb)2205 xfs_kill_sb(
2206 	struct super_block		*sb)
2207 {
2208 	kill_block_super(sb);
2209 	xfs_mount_free(XFS_M(sb));
2210 }
2211 
2212 static struct file_system_type xfs_fs_type = {
2213 	.owner			= THIS_MODULE,
2214 	.name			= "xfs",
2215 	.init_fs_context	= xfs_init_fs_context,
2216 	.parameters		= xfs_fs_parameters,
2217 	.kill_sb		= xfs_kill_sb,
2218 	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME |
2219 				  FS_LBS,
2220 };
2221 MODULE_ALIAS_FS("xfs");
2222 
2223 STATIC int __init
xfs_init_caches(void)2224 xfs_init_caches(void)
2225 {
2226 	int		error;
2227 
2228 	xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
2229 					 SLAB_HWCACHE_ALIGN |
2230 					 SLAB_RECLAIM_ACCOUNT,
2231 					 NULL);
2232 	if (!xfs_buf_cache)
2233 		goto out;
2234 
2235 	xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket",
2236 						sizeof(struct xlog_ticket),
2237 						0, 0, NULL);
2238 	if (!xfs_log_ticket_cache)
2239 		goto out_destroy_buf_cache;
2240 
2241 	error = xfs_btree_init_cur_caches();
2242 	if (error)
2243 		goto out_destroy_log_ticket_cache;
2244 
2245 	error = rcbagbt_init_cur_cache();
2246 	if (error)
2247 		goto out_destroy_btree_cur_cache;
2248 
2249 	error = xfs_defer_init_item_caches();
2250 	if (error)
2251 		goto out_destroy_rcbagbt_cur_cache;
2252 
2253 	xfs_da_state_cache = kmem_cache_create("xfs_da_state",
2254 					      sizeof(struct xfs_da_state),
2255 					      0, 0, NULL);
2256 	if (!xfs_da_state_cache)
2257 		goto out_destroy_defer_item_cache;
2258 
2259 	xfs_ifork_cache = kmem_cache_create("xfs_ifork",
2260 					   sizeof(struct xfs_ifork),
2261 					   0, 0, NULL);
2262 	if (!xfs_ifork_cache)
2263 		goto out_destroy_da_state_cache;
2264 
2265 	xfs_trans_cache = kmem_cache_create("xfs_trans",
2266 					   sizeof(struct xfs_trans),
2267 					   0, 0, NULL);
2268 	if (!xfs_trans_cache)
2269 		goto out_destroy_ifork_cache;
2270 
2271 
2272 	/*
2273 	 * The size of the cache-allocated buf log item is the maximum
2274 	 * size possible under XFS.  This wastes a little bit of memory,
2275 	 * but it is much faster.
2276 	 */
2277 	xfs_buf_item_cache = kmem_cache_create("xfs_buf_item",
2278 					      sizeof(struct xfs_buf_log_item),
2279 					      0, 0, NULL);
2280 	if (!xfs_buf_item_cache)
2281 		goto out_destroy_trans_cache;
2282 
2283 	xfs_efd_cache = kmem_cache_create("xfs_efd_item",
2284 			xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
2285 			0, 0, NULL);
2286 	if (!xfs_efd_cache)
2287 		goto out_destroy_buf_item_cache;
2288 
2289 	xfs_efi_cache = kmem_cache_create("xfs_efi_item",
2290 			xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
2291 			0, 0, NULL);
2292 	if (!xfs_efi_cache)
2293 		goto out_destroy_efd_cache;
2294 
2295 	xfs_inode_cache = kmem_cache_create("xfs_inode",
2296 					   sizeof(struct xfs_inode), 0,
2297 					   (SLAB_HWCACHE_ALIGN |
2298 					    SLAB_RECLAIM_ACCOUNT |
2299 					    SLAB_ACCOUNT),
2300 					   xfs_fs_inode_init_once);
2301 	if (!xfs_inode_cache)
2302 		goto out_destroy_efi_cache;
2303 
2304 	xfs_ili_cache = kmem_cache_create("xfs_ili",
2305 					 sizeof(struct xfs_inode_log_item), 0,
2306 					 SLAB_RECLAIM_ACCOUNT,
2307 					 NULL);
2308 	if (!xfs_ili_cache)
2309 		goto out_destroy_inode_cache;
2310 
2311 	xfs_icreate_cache = kmem_cache_create("xfs_icr",
2312 					     sizeof(struct xfs_icreate_item),
2313 					     0, 0, NULL);
2314 	if (!xfs_icreate_cache)
2315 		goto out_destroy_ili_cache;
2316 
2317 	xfs_rud_cache = kmem_cache_create("xfs_rud_item",
2318 					 sizeof(struct xfs_rud_log_item),
2319 					 0, 0, NULL);
2320 	if (!xfs_rud_cache)
2321 		goto out_destroy_icreate_cache;
2322 
2323 	xfs_rui_cache = kmem_cache_create("xfs_rui_item",
2324 			xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
2325 			0, 0, NULL);
2326 	if (!xfs_rui_cache)
2327 		goto out_destroy_rud_cache;
2328 
2329 	xfs_cud_cache = kmem_cache_create("xfs_cud_item",
2330 					 sizeof(struct xfs_cud_log_item),
2331 					 0, 0, NULL);
2332 	if (!xfs_cud_cache)
2333 		goto out_destroy_rui_cache;
2334 
2335 	xfs_cui_cache = kmem_cache_create("xfs_cui_item",
2336 			xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
2337 			0, 0, NULL);
2338 	if (!xfs_cui_cache)
2339 		goto out_destroy_cud_cache;
2340 
2341 	xfs_bud_cache = kmem_cache_create("xfs_bud_item",
2342 					 sizeof(struct xfs_bud_log_item),
2343 					 0, 0, NULL);
2344 	if (!xfs_bud_cache)
2345 		goto out_destroy_cui_cache;
2346 
2347 	xfs_bui_cache = kmem_cache_create("xfs_bui_item",
2348 			xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
2349 			0, 0, NULL);
2350 	if (!xfs_bui_cache)
2351 		goto out_destroy_bud_cache;
2352 
2353 	xfs_attrd_cache = kmem_cache_create("xfs_attrd_item",
2354 					    sizeof(struct xfs_attrd_log_item),
2355 					    0, 0, NULL);
2356 	if (!xfs_attrd_cache)
2357 		goto out_destroy_bui_cache;
2358 
2359 	xfs_attri_cache = kmem_cache_create("xfs_attri_item",
2360 					    sizeof(struct xfs_attri_log_item),
2361 					    0, 0, NULL);
2362 	if (!xfs_attri_cache)
2363 		goto out_destroy_attrd_cache;
2364 
2365 	xfs_iunlink_cache = kmem_cache_create("xfs_iul_item",
2366 					     sizeof(struct xfs_iunlink_item),
2367 					     0, 0, NULL);
2368 	if (!xfs_iunlink_cache)
2369 		goto out_destroy_attri_cache;
2370 
2371 	xfs_xmd_cache = kmem_cache_create("xfs_xmd_item",
2372 					 sizeof(struct xfs_xmd_log_item),
2373 					 0, 0, NULL);
2374 	if (!xfs_xmd_cache)
2375 		goto out_destroy_iul_cache;
2376 
2377 	xfs_xmi_cache = kmem_cache_create("xfs_xmi_item",
2378 					 sizeof(struct xfs_xmi_log_item),
2379 					 0, 0, NULL);
2380 	if (!xfs_xmi_cache)
2381 		goto out_destroy_xmd_cache;
2382 
2383 	xfs_parent_args_cache = kmem_cache_create("xfs_parent_args",
2384 					     sizeof(struct xfs_parent_args),
2385 					     0, 0, NULL);
2386 	if (!xfs_parent_args_cache)
2387 		goto out_destroy_xmi_cache;
2388 
2389 	return 0;
2390 
2391  out_destroy_xmi_cache:
2392 	kmem_cache_destroy(xfs_xmi_cache);
2393  out_destroy_xmd_cache:
2394 	kmem_cache_destroy(xfs_xmd_cache);
2395  out_destroy_iul_cache:
2396 	kmem_cache_destroy(xfs_iunlink_cache);
2397  out_destroy_attri_cache:
2398 	kmem_cache_destroy(xfs_attri_cache);
2399  out_destroy_attrd_cache:
2400 	kmem_cache_destroy(xfs_attrd_cache);
2401  out_destroy_bui_cache:
2402 	kmem_cache_destroy(xfs_bui_cache);
2403  out_destroy_bud_cache:
2404 	kmem_cache_destroy(xfs_bud_cache);
2405  out_destroy_cui_cache:
2406 	kmem_cache_destroy(xfs_cui_cache);
2407  out_destroy_cud_cache:
2408 	kmem_cache_destroy(xfs_cud_cache);
2409  out_destroy_rui_cache:
2410 	kmem_cache_destroy(xfs_rui_cache);
2411  out_destroy_rud_cache:
2412 	kmem_cache_destroy(xfs_rud_cache);
2413  out_destroy_icreate_cache:
2414 	kmem_cache_destroy(xfs_icreate_cache);
2415  out_destroy_ili_cache:
2416 	kmem_cache_destroy(xfs_ili_cache);
2417  out_destroy_inode_cache:
2418 	kmem_cache_destroy(xfs_inode_cache);
2419  out_destroy_efi_cache:
2420 	kmem_cache_destroy(xfs_efi_cache);
2421  out_destroy_efd_cache:
2422 	kmem_cache_destroy(xfs_efd_cache);
2423  out_destroy_buf_item_cache:
2424 	kmem_cache_destroy(xfs_buf_item_cache);
2425  out_destroy_trans_cache:
2426 	kmem_cache_destroy(xfs_trans_cache);
2427  out_destroy_ifork_cache:
2428 	kmem_cache_destroy(xfs_ifork_cache);
2429  out_destroy_da_state_cache:
2430 	kmem_cache_destroy(xfs_da_state_cache);
2431  out_destroy_defer_item_cache:
2432 	xfs_defer_destroy_item_caches();
2433  out_destroy_rcbagbt_cur_cache:
2434 	rcbagbt_destroy_cur_cache();
2435  out_destroy_btree_cur_cache:
2436 	xfs_btree_destroy_cur_caches();
2437  out_destroy_log_ticket_cache:
2438 	kmem_cache_destroy(xfs_log_ticket_cache);
2439  out_destroy_buf_cache:
2440 	kmem_cache_destroy(xfs_buf_cache);
2441  out:
2442 	return -ENOMEM;
2443 }
2444 
2445 STATIC void
xfs_destroy_caches(void)2446 xfs_destroy_caches(void)
2447 {
2448 	/*
2449 	 * Make sure all delayed rcu free are flushed before we
2450 	 * destroy caches.
2451 	 */
2452 	rcu_barrier();
2453 	kmem_cache_destroy(xfs_parent_args_cache);
2454 	kmem_cache_destroy(xfs_xmd_cache);
2455 	kmem_cache_destroy(xfs_xmi_cache);
2456 	kmem_cache_destroy(xfs_iunlink_cache);
2457 	kmem_cache_destroy(xfs_attri_cache);
2458 	kmem_cache_destroy(xfs_attrd_cache);
2459 	kmem_cache_destroy(xfs_bui_cache);
2460 	kmem_cache_destroy(xfs_bud_cache);
2461 	kmem_cache_destroy(xfs_cui_cache);
2462 	kmem_cache_destroy(xfs_cud_cache);
2463 	kmem_cache_destroy(xfs_rui_cache);
2464 	kmem_cache_destroy(xfs_rud_cache);
2465 	kmem_cache_destroy(xfs_icreate_cache);
2466 	kmem_cache_destroy(xfs_ili_cache);
2467 	kmem_cache_destroy(xfs_inode_cache);
2468 	kmem_cache_destroy(xfs_efi_cache);
2469 	kmem_cache_destroy(xfs_efd_cache);
2470 	kmem_cache_destroy(xfs_buf_item_cache);
2471 	kmem_cache_destroy(xfs_trans_cache);
2472 	kmem_cache_destroy(xfs_ifork_cache);
2473 	kmem_cache_destroy(xfs_da_state_cache);
2474 	xfs_defer_destroy_item_caches();
2475 	rcbagbt_destroy_cur_cache();
2476 	xfs_btree_destroy_cur_caches();
2477 	kmem_cache_destroy(xfs_log_ticket_cache);
2478 	kmem_cache_destroy(xfs_buf_cache);
2479 }
2480 
2481 STATIC int __init
xfs_init_workqueues(void)2482 xfs_init_workqueues(void)
2483 {
2484 	/*
2485 	 * The allocation workqueue can be used in memory reclaim situations
2486 	 * (writepage path), and parallelism is only limited by the number of
2487 	 * AGs in all the filesystems mounted. Hence use the default large
2488 	 * max_active value for this workqueue.
2489 	 */
2490 	xfs_alloc_wq = alloc_workqueue("xfsalloc",
2491 			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
2492 	if (!xfs_alloc_wq)
2493 		return -ENOMEM;
2494 
2495 	xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND),
2496 			0);
2497 	if (!xfs_discard_wq)
2498 		goto out_free_alloc_wq;
2499 
2500 	return 0;
2501 out_free_alloc_wq:
2502 	destroy_workqueue(xfs_alloc_wq);
2503 	return -ENOMEM;
2504 }
2505 
2506 STATIC void
xfs_destroy_workqueues(void)2507 xfs_destroy_workqueues(void)
2508 {
2509 	destroy_workqueue(xfs_discard_wq);
2510 	destroy_workqueue(xfs_alloc_wq);
2511 }
2512 
2513 STATIC int __init
init_xfs_fs(void)2514 init_xfs_fs(void)
2515 {
2516 	int			error;
2517 
2518 	xfs_check_ondisk_structs();
2519 
2520 	error = xfs_dahash_test();
2521 	if (error)
2522 		return error;
2523 
2524 	printk(KERN_INFO XFS_VERSION_STRING " with "
2525 			 XFS_BUILD_OPTIONS " enabled\n");
2526 
2527 	xfs_dir_startup();
2528 
2529 	error = xfs_init_caches();
2530 	if (error)
2531 		goto out;
2532 
2533 	error = xfs_init_workqueues();
2534 	if (error)
2535 		goto out_destroy_caches;
2536 
2537 	error = xfs_mru_cache_init();
2538 	if (error)
2539 		goto out_destroy_wq;
2540 
2541 	error = xfs_init_procfs();
2542 	if (error)
2543 		goto out_mru_cache_uninit;
2544 
2545 	error = xfs_sysctl_register();
2546 	if (error)
2547 		goto out_cleanup_procfs;
2548 
2549 	xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL);
2550 
2551 	xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2552 	if (!xfs_kset) {
2553 		error = -ENOMEM;
2554 		goto out_debugfs_unregister;
2555 	}
2556 
2557 	xfsstats.xs_kobj.kobject.kset = xfs_kset;
2558 
2559 	xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2560 	if (!xfsstats.xs_stats) {
2561 		error = -ENOMEM;
2562 		goto out_kset_unregister;
2563 	}
2564 
2565 	error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2566 			       "stats");
2567 	if (error)
2568 		goto out_free_stats;
2569 
2570 	error = xchk_global_stats_setup(xfs_debugfs);
2571 	if (error)
2572 		goto out_remove_stats_kobj;
2573 
2574 #ifdef DEBUG
2575 	xfs_dbg_kobj.kobject.kset = xfs_kset;
2576 	error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2577 	if (error)
2578 		goto out_remove_scrub_stats;
2579 #endif
2580 
2581 	error = xfs_qm_init();
2582 	if (error)
2583 		goto out_remove_dbg_kobj;
2584 
2585 	error = register_filesystem(&xfs_fs_type);
2586 	if (error)
2587 		goto out_qm_exit;
2588 	return 0;
2589 
2590  out_qm_exit:
2591 	xfs_qm_exit();
2592  out_remove_dbg_kobj:
2593 #ifdef DEBUG
2594 	xfs_sysfs_del(&xfs_dbg_kobj);
2595  out_remove_scrub_stats:
2596 #endif
2597 	xchk_global_stats_teardown();
2598  out_remove_stats_kobj:
2599 	xfs_sysfs_del(&xfsstats.xs_kobj);
2600  out_free_stats:
2601 	free_percpu(xfsstats.xs_stats);
2602  out_kset_unregister:
2603 	kset_unregister(xfs_kset);
2604  out_debugfs_unregister:
2605 	debugfs_remove(xfs_debugfs);
2606 	xfs_sysctl_unregister();
2607  out_cleanup_procfs:
2608 	xfs_cleanup_procfs();
2609  out_mru_cache_uninit:
2610 	xfs_mru_cache_uninit();
2611  out_destroy_wq:
2612 	xfs_destroy_workqueues();
2613  out_destroy_caches:
2614 	xfs_destroy_caches();
2615  out:
2616 	return error;
2617 }
2618 
2619 STATIC void __exit
exit_xfs_fs(void)2620 exit_xfs_fs(void)
2621 {
2622 	xfs_qm_exit();
2623 	unregister_filesystem(&xfs_fs_type);
2624 #ifdef DEBUG
2625 	xfs_sysfs_del(&xfs_dbg_kobj);
2626 #endif
2627 	xchk_global_stats_teardown();
2628 	xfs_sysfs_del(&xfsstats.xs_kobj);
2629 	free_percpu(xfsstats.xs_stats);
2630 	kset_unregister(xfs_kset);
2631 	debugfs_remove(xfs_debugfs);
2632 	xfs_sysctl_unregister();
2633 	xfs_cleanup_procfs();
2634 	xfs_mru_cache_uninit();
2635 	xfs_destroy_workqueues();
2636 	xfs_destroy_caches();
2637 	xfs_uuid_table_free();
2638 }
2639 
2640 module_init(init_xfs_fs);
2641 module_exit(exit_xfs_fs);
2642 
2643 MODULE_AUTHOR("Silicon Graphics, Inc.");
2644 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2645 MODULE_LICENSE("GPL");
2646