xref: /linux/fs/xfs/xfs_mount.c (revision 776cfebb430c7b22c208b1b17add97f354d97cab)
1 /*
2  * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 
33 #include "xfs.h"
34 #include "xfs_macros.h"
35 #include "xfs_types.h"
36 #include "xfs_inum.h"
37 #include "xfs_log.h"
38 #include "xfs_trans.h"
39 #include "xfs_sb.h"
40 #include "xfs_ag.h"
41 #include "xfs_dir.h"
42 #include "xfs_dir2.h"
43 #include "xfs_dmapi.h"
44 #include "xfs_mount.h"
45 #include "xfs_alloc_btree.h"
46 #include "xfs_bmap_btree.h"
47 #include "xfs_ialloc_btree.h"
48 #include "xfs_btree.h"
49 #include "xfs_ialloc.h"
50 #include "xfs_attr_sf.h"
51 #include "xfs_dir_sf.h"
52 #include "xfs_dir2_sf.h"
53 #include "xfs_dinode.h"
54 #include "xfs_inode.h"
55 #include "xfs_alloc.h"
56 #include "xfs_rtalloc.h"
57 #include "xfs_bmap.h"
58 #include "xfs_error.h"
59 #include "xfs_bit.h"
60 #include "xfs_rw.h"
61 #include "xfs_quota.h"
62 #include "xfs_fsops.h"
63 
64 STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
65 STATIC int	xfs_uuid_mount(xfs_mount_t *);
66 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
67 
68 static struct {
69     short offset;
70     short type;     /* 0 = integer
71 		* 1 = binary / string (no translation)
72 		*/
73 } xfs_sb_info[] = {
74     { offsetof(xfs_sb_t, sb_magicnum),   0 },
75     { offsetof(xfs_sb_t, sb_blocksize),  0 },
76     { offsetof(xfs_sb_t, sb_dblocks),    0 },
77     { offsetof(xfs_sb_t, sb_rblocks),    0 },
78     { offsetof(xfs_sb_t, sb_rextents),   0 },
79     { offsetof(xfs_sb_t, sb_uuid),       1 },
80     { offsetof(xfs_sb_t, sb_logstart),   0 },
81     { offsetof(xfs_sb_t, sb_rootino),    0 },
82     { offsetof(xfs_sb_t, sb_rbmino),     0 },
83     { offsetof(xfs_sb_t, sb_rsumino),    0 },
84     { offsetof(xfs_sb_t, sb_rextsize),   0 },
85     { offsetof(xfs_sb_t, sb_agblocks),   0 },
86     { offsetof(xfs_sb_t, sb_agcount),    0 },
87     { offsetof(xfs_sb_t, sb_rbmblocks),  0 },
88     { offsetof(xfs_sb_t, sb_logblocks),  0 },
89     { offsetof(xfs_sb_t, sb_versionnum), 0 },
90     { offsetof(xfs_sb_t, sb_sectsize),   0 },
91     { offsetof(xfs_sb_t, sb_inodesize),  0 },
92     { offsetof(xfs_sb_t, sb_inopblock),  0 },
93     { offsetof(xfs_sb_t, sb_fname[0]),   1 },
94     { offsetof(xfs_sb_t, sb_blocklog),   0 },
95     { offsetof(xfs_sb_t, sb_sectlog),    0 },
96     { offsetof(xfs_sb_t, sb_inodelog),   0 },
97     { offsetof(xfs_sb_t, sb_inopblog),   0 },
98     { offsetof(xfs_sb_t, sb_agblklog),   0 },
99     { offsetof(xfs_sb_t, sb_rextslog),   0 },
100     { offsetof(xfs_sb_t, sb_inprogress), 0 },
101     { offsetof(xfs_sb_t, sb_imax_pct),   0 },
102     { offsetof(xfs_sb_t, sb_icount),     0 },
103     { offsetof(xfs_sb_t, sb_ifree),      0 },
104     { offsetof(xfs_sb_t, sb_fdblocks),   0 },
105     { offsetof(xfs_sb_t, sb_frextents),  0 },
106     { offsetof(xfs_sb_t, sb_uquotino),   0 },
107     { offsetof(xfs_sb_t, sb_gquotino),   0 },
108     { offsetof(xfs_sb_t, sb_qflags),     0 },
109     { offsetof(xfs_sb_t, sb_flags),      0 },
110     { offsetof(xfs_sb_t, sb_shared_vn),  0 },
111     { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
112     { offsetof(xfs_sb_t, sb_unit),	 0 },
113     { offsetof(xfs_sb_t, sb_width),	 0 },
114     { offsetof(xfs_sb_t, sb_dirblklog),	 0 },
115     { offsetof(xfs_sb_t, sb_logsectlog), 0 },
116     { offsetof(xfs_sb_t, sb_logsectsize),0 },
117     { offsetof(xfs_sb_t, sb_logsunit),	 0 },
118     { offsetof(xfs_sb_t, sb_features2),	 0 },
119     { sizeof(xfs_sb_t),			 0 }
120 };
121 
122 /*
123  * Return a pointer to an initialized xfs_mount structure.
124  */
125 xfs_mount_t *
126 xfs_mount_init(void)
127 {
128 	xfs_mount_t *mp;
129 
130 	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
131 
132 	AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
133 	spinlock_init(&mp->m_sb_lock, "xfs_sb");
134 	mutex_init(&mp->m_ilock, MUTEX_DEFAULT, "xfs_ilock");
135 	initnsema(&mp->m_growlock, 1, "xfs_grow");
136 	/*
137 	 * Initialize the AIL.
138 	 */
139 	xfs_trans_ail_init(mp);
140 
141 	atomic_set(&mp->m_active_trans, 0);
142 
143 	return mp;
144 }
145 
146 /*
147  * Free up the resources associated with a mount structure.  Assume that
148  * the structure was initially zeroed, so we can tell which fields got
149  * initialized.
150  */
151 void
152 xfs_mount_free(
153 	xfs_mount_t *mp,
154 	int	    remove_bhv)
155 {
156 	if (mp->m_ihash)
157 		xfs_ihash_free(mp);
158 	if (mp->m_chash)
159 		xfs_chash_free(mp);
160 
161 	if (mp->m_perag) {
162 		int	agno;
163 
164 		for (agno = 0; agno < mp->m_maxagi; agno++)
165 			if (mp->m_perag[agno].pagb_list)
166 				kmem_free(mp->m_perag[agno].pagb_list,
167 						sizeof(xfs_perag_busy_t) *
168 							XFS_PAGB_NUM_SLOTS);
169 		kmem_free(mp->m_perag,
170 			  sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
171 	}
172 
173 	AIL_LOCK_DESTROY(&mp->m_ail_lock);
174 	spinlock_destroy(&mp->m_sb_lock);
175 	mutex_destroy(&mp->m_ilock);
176 	freesema(&mp->m_growlock);
177 	if (mp->m_quotainfo)
178 		XFS_QM_DONE(mp);
179 
180 	if (mp->m_fsname != NULL)
181 		kmem_free(mp->m_fsname, mp->m_fsname_len);
182 
183 	if (remove_bhv) {
184 		struct vfs	*vfsp = XFS_MTOVFS(mp);
185 
186 		bhv_remove_all_vfsops(vfsp, 0);
187 		VFS_REMOVEBHV(vfsp, &mp->m_bhv);
188 	}
189 
190 	kmem_free(mp, sizeof(xfs_mount_t));
191 }
192 
193 
194 /*
195  * Check the validity of the SB found.
196  */
197 STATIC int
198 xfs_mount_validate_sb(
199 	xfs_mount_t	*mp,
200 	xfs_sb_t	*sbp)
201 {
202 	/*
203 	 * If the log device and data device have the
204 	 * same device number, the log is internal.
205 	 * Consequently, the sb_logstart should be non-zero.  If
206 	 * we have a zero sb_logstart in this case, we may be trying to mount
207 	 * a volume filesystem in a non-volume manner.
208 	 */
209 	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
210 		cmn_err(CE_WARN, "XFS: bad magic number");
211 		return XFS_ERROR(EWRONGFS);
212 	}
213 
214 	if (!XFS_SB_GOOD_VERSION(sbp)) {
215 		cmn_err(CE_WARN, "XFS: bad version");
216 		return XFS_ERROR(EWRONGFS);
217 	}
218 
219 	if (unlikely(
220 	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
221 		cmn_err(CE_WARN,
222 	"XFS: filesystem is marked as having an external log; "
223 	"specify logdev on the\nmount command line.");
224 		XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(1)",
225 				     XFS_ERRLEVEL_HIGH, mp, sbp);
226 		return XFS_ERROR(EFSCORRUPTED);
227 	}
228 
229 	if (unlikely(
230 	    sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
231 		cmn_err(CE_WARN,
232 	"XFS: filesystem is marked as having an internal log; "
233 	"don't specify logdev on\nthe mount command line.");
234 		XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(2)",
235 				     XFS_ERRLEVEL_HIGH, mp, sbp);
236 		return XFS_ERROR(EFSCORRUPTED);
237 	}
238 
239 	/*
240 	 * More sanity checking. These were stolen directly from
241 	 * xfs_repair.
242 	 */
243 	if (unlikely(
244 	    sbp->sb_agcount <= 0					||
245 	    sbp->sb_sectsize < XFS_MIN_SECTORSIZE			||
246 	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
247 	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
248 	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
249 	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
250 	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
251 	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
252 	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
253 	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
254 	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
255 	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
256 	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
257 	    sbp->sb_imax_pct > 100)) {
258 		cmn_err(CE_WARN, "XFS: SB sanity check 1 failed");
259 		XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)",
260 				     XFS_ERRLEVEL_LOW, mp, sbp);
261 		return XFS_ERROR(EFSCORRUPTED);
262 	}
263 
264 	/*
265 	 * Sanity check AG count, size fields against data size field
266 	 */
267 	if (unlikely(
268 	    sbp->sb_dblocks == 0 ||
269 	    sbp->sb_dblocks >
270 	     (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
271 	    sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
272 			      sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
273 		cmn_err(CE_WARN, "XFS: SB sanity check 2 failed");
274 		XFS_ERROR_REPORT("xfs_mount_validate_sb(4)",
275 				 XFS_ERRLEVEL_LOW, mp);
276 		return XFS_ERROR(EFSCORRUPTED);
277 	}
278 
279 	ASSERT(PAGE_SHIFT >= sbp->sb_blocklog);
280 	ASSERT(sbp->sb_blocklog >= BBSHIFT);
281 
282 #if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
283 	if (unlikely(
284 	    (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX ||
285 	    (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) {
286 #else                  /* Limited by UINT_MAX of sectors */
287 	if (unlikely(
288 	    (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX ||
289 	    (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) {
290 #endif
291 		cmn_err(CE_WARN,
292 	"XFS: File system is too large to be mounted on this system.");
293 		return XFS_ERROR(E2BIG);
294 	}
295 
296 	if (unlikely(sbp->sb_inprogress)) {
297 		cmn_err(CE_WARN, "XFS: file system busy");
298 		XFS_ERROR_REPORT("xfs_mount_validate_sb(5)",
299 				 XFS_ERRLEVEL_LOW, mp);
300 		return XFS_ERROR(EFSCORRUPTED);
301 	}
302 
303 	/*
304 	 * Version 1 directory format has never worked on Linux.
305 	 */
306 	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
307 		cmn_err(CE_WARN,
308 	"XFS: Attempted to mount file system using version 1 directory format");
309 		return XFS_ERROR(ENOSYS);
310 	}
311 
312 	/*
313 	 * Until this is fixed only page-sized or smaller data blocks work.
314 	 */
315 	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
316 		cmn_err(CE_WARN,
317 		"XFS: Attempted to mount file system with blocksize %d bytes",
318 			sbp->sb_blocksize);
319 		cmn_err(CE_WARN,
320 		"XFS: Only page-sized (%d) or less blocksizes currently work.",
321 			PAGE_SIZE);
322 		return XFS_ERROR(ENOSYS);
323 	}
324 
325 	return 0;
326 }
327 
328 xfs_agnumber_t
329 xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount)
330 {
331 	xfs_agnumber_t	index, max_metadata;
332 	xfs_perag_t	*pag;
333 	xfs_agino_t	agino;
334 	xfs_ino_t	ino;
335 	xfs_sb_t	*sbp = &mp->m_sb;
336 	xfs_ino_t	max_inum = XFS_MAXINUMBER_32;
337 
338 	/* Check to see if the filesystem can overflow 32 bit inodes */
339 	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
340 	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
341 
342 	/* Clear the mount flag if no inode can overflow 32 bits
343 	 * on this filesystem, or if specifically requested..
344 	 */
345 	if ((mp->m_flags & XFS_MOUNT_32BITINOOPT) && ino > max_inum) {
346 		mp->m_flags |= XFS_MOUNT_32BITINODES;
347 	} else {
348 		mp->m_flags &= ~XFS_MOUNT_32BITINODES;
349 	}
350 
351 	/* If we can overflow then setup the ag headers accordingly */
352 	if (mp->m_flags & XFS_MOUNT_32BITINODES) {
353 		/* Calculate how much should be reserved for inodes to
354 		 * meet the max inode percentage.
355 		 */
356 		if (mp->m_maxicount) {
357 			__uint64_t	icount;
358 
359 			icount = sbp->sb_dblocks * sbp->sb_imax_pct;
360 			do_div(icount, 100);
361 			icount += sbp->sb_agblocks - 1;
362 			do_div(icount, mp->m_ialloc_blks);
363 			max_metadata = icount;
364 		} else {
365 			max_metadata = agcount;
366 		}
367 		for (index = 0; index < agcount; index++) {
368 			ino = XFS_AGINO_TO_INO(mp, index, agino);
369 			if (ino > max_inum) {
370 				index++;
371 				break;
372 			}
373 
374 			/* This ag is prefered for inodes */
375 			pag = &mp->m_perag[index];
376 			pag->pagi_inodeok = 1;
377 			if (index < max_metadata)
378 				pag->pagf_metadata = 1;
379 		}
380 	} else {
381 		/* Setup default behavior for smaller filesystems */
382 		for (index = 0; index < agcount; index++) {
383 			pag = &mp->m_perag[index];
384 			pag->pagi_inodeok = 1;
385 		}
386 	}
387 	return index;
388 }
389 
390 /*
391  * xfs_xlatesb
392  *
393  *     data       - on disk version of sb
394  *     sb         - a superblock
395  *     dir        - conversion direction: <0 - convert sb to buf
396  *                                        >0 - convert buf to sb
397  *     fields     - which fields to copy (bitmask)
398  */
399 void
400 xfs_xlatesb(
401 	void		*data,
402 	xfs_sb_t	*sb,
403 	int		dir,
404 	__int64_t	fields)
405 {
406 	xfs_caddr_t	buf_ptr;
407 	xfs_caddr_t	mem_ptr;
408 	xfs_sb_field_t	f;
409 	int		first;
410 	int		size;
411 
412 	ASSERT(dir);
413 	ASSERT(fields);
414 
415 	if (!fields)
416 		return;
417 
418 	buf_ptr = (xfs_caddr_t)data;
419 	mem_ptr = (xfs_caddr_t)sb;
420 
421 	while (fields) {
422 		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
423 		first = xfs_sb_info[f].offset;
424 		size = xfs_sb_info[f + 1].offset - first;
425 
426 		ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);
427 
428 		if (size == 1 || xfs_sb_info[f].type == 1) {
429 			if (dir > 0) {
430 				memcpy(mem_ptr + first, buf_ptr + first, size);
431 			} else {
432 				memcpy(buf_ptr + first, mem_ptr + first, size);
433 			}
434 		} else {
435 			switch (size) {
436 			case 2:
437 				INT_XLATE(*(__uint16_t*)(buf_ptr+first),
438 					  *(__uint16_t*)(mem_ptr+first),
439 					  dir, ARCH_CONVERT);
440 				break;
441 			case 4:
442 				INT_XLATE(*(__uint32_t*)(buf_ptr+first),
443 					  *(__uint32_t*)(mem_ptr+first),
444 					  dir, ARCH_CONVERT);
445 				break;
446 			case 8:
447 				INT_XLATE(*(__uint64_t*)(buf_ptr+first),
448 					  *(__uint64_t*)(mem_ptr+first), dir, ARCH_CONVERT);
449 				break;
450 			default:
451 				ASSERT(0);
452 			}
453 		}
454 
455 		fields &= ~(1LL << f);
456 	}
457 }
458 
459 /*
460  * xfs_readsb
461  *
462  * Does the initial read of the superblock.
463  */
464 int
465 xfs_readsb(xfs_mount_t *mp)
466 {
467 	unsigned int	sector_size;
468 	unsigned int	extra_flags;
469 	xfs_buf_t	*bp;
470 	xfs_sb_t	*sbp;
471 	int		error;
472 
473 	ASSERT(mp->m_sb_bp == NULL);
474 	ASSERT(mp->m_ddev_targp != NULL);
475 
476 	/*
477 	 * Allocate a (locked) buffer to hold the superblock.
478 	 * This will be kept around at all times to optimize
479 	 * access to the superblock.
480 	 */
481 	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
482 	extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
483 
484 	bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
485 				BTOBB(sector_size), extra_flags);
486 	if (!bp || XFS_BUF_ISERROR(bp)) {
487 		cmn_err(CE_WARN, "XFS: SB read failed");
488 		error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
489 		goto fail;
490 	}
491 	ASSERT(XFS_BUF_ISBUSY(bp));
492 	ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
493 
494 	/*
495 	 * Initialize the mount structure from the superblock.
496 	 * But first do some basic consistency checking.
497 	 */
498 	sbp = XFS_BUF_TO_SBP(bp);
499 	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS);
500 
501 	error = xfs_mount_validate_sb(mp, &(mp->m_sb));
502 	if (error) {
503 		cmn_err(CE_WARN, "XFS: SB validate failed");
504 		goto fail;
505 	}
506 
507 	/*
508 	 * We must be able to do sector-sized and sector-aligned IO.
509 	 */
510 	if (sector_size > mp->m_sb.sb_sectsize) {
511 		cmn_err(CE_WARN,
512 			"XFS: device supports only %u byte sectors (not %u)",
513 			sector_size, mp->m_sb.sb_sectsize);
514 		error = ENOSYS;
515 		goto fail;
516 	}
517 
518 	/*
519 	 * If device sector size is smaller than the superblock size,
520 	 * re-read the superblock so the buffer is correctly sized.
521 	 */
522 	if (sector_size < mp->m_sb.sb_sectsize) {
523 		XFS_BUF_UNMANAGE(bp);
524 		xfs_buf_relse(bp);
525 		sector_size = mp->m_sb.sb_sectsize;
526 		bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR,
527 					BTOBB(sector_size), extra_flags);
528 		if (!bp || XFS_BUF_ISERROR(bp)) {
529 			cmn_err(CE_WARN, "XFS: SB re-read failed");
530 			error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
531 			goto fail;
532 		}
533 		ASSERT(XFS_BUF_ISBUSY(bp));
534 		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
535 	}
536 
537 	mp->m_sb_bp = bp;
538 	xfs_buf_relse(bp);
539 	ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
540 	return 0;
541 
542  fail:
543 	if (bp) {
544 		XFS_BUF_UNMANAGE(bp);
545 		xfs_buf_relse(bp);
546 	}
547 	return error;
548 }
549 
550 
551 /*
552  * xfs_mount_common
553  *
554  * Mount initialization code establishing various mount
555  * fields from the superblock associated with the given
556  * mount structure
557  */
558 void
559 xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
560 {
561 	int	i;
562 
563 	mp->m_agfrotor = mp->m_agirotor = 0;
564 	spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock");
565 	mp->m_maxagi = mp->m_sb.sb_agcount;
566 	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
567 	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
568 	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
569 	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
570 	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
571 	mp->m_litino = sbp->sb_inodesize -
572 		((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t));
573 	mp->m_blockmask = sbp->sb_blocksize - 1;
574 	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
575 	mp->m_blockwmask = mp->m_blockwsize - 1;
576 	INIT_LIST_HEAD(&mp->m_del_inodes);
577 
578 	/*
579 	 * Setup for attributes, in case they get created.
580 	 * This value is for inodes getting attributes for the first time,
581 	 * the per-inode value is for old attribute values.
582 	 */
583 	ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048);
584 	switch (sbp->sb_inodesize) {
585 	case 256:
586 		mp->m_attroffset = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(2);
587 		break;
588 	case 512:
589 	case 1024:
590 	case 2048:
591 		mp->m_attroffset = XFS_BMDR_SPACE_CALC(12);
592 		break;
593 	default:
594 		ASSERT(0);
595 	}
596 	ASSERT(mp->m_attroffset < XFS_LITINO(mp));
597 
598 	for (i = 0; i < 2; i++) {
599 		mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
600 			xfs_alloc, i == 0);
601 		mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
602 			xfs_alloc, i == 0);
603 	}
604 	for (i = 0; i < 2; i++) {
605 		mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
606 			xfs_bmbt, i == 0);
607 		mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
608 			xfs_bmbt, i == 0);
609 	}
610 	for (i = 0; i < 2; i++) {
611 		mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize,
612 			xfs_inobt, i == 0);
613 		mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize,
614 			xfs_inobt, i == 0);
615 	}
616 
617 	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
618 	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
619 					sbp->sb_inopblock);
620 	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
621 }
622 /*
623  * xfs_mountfs
624  *
625  * This function does the following on an initial mount of a file system:
626  *	- reads the superblock from disk and init the mount struct
627  *	- if we're a 32-bit kernel, do a size check on the superblock
628  *		so we don't mount terabyte filesystems
629  *	- init mount struct realtime fields
630  *	- allocate inode hash table for fs
631  *	- init directory manager
632  *	- perform recovery and init the log manager
633  */
634 int
635 xfs_mountfs(
636 	vfs_t		*vfsp,
637 	xfs_mount_t	*mp,
638 	int		mfsi_flags)
639 {
640 	xfs_buf_t	*bp;
641 	xfs_sb_t	*sbp = &(mp->m_sb);
642 	xfs_inode_t	*rip;
643 	vnode_t		*rvp = NULL;
644 	int		readio_log, writeio_log;
645 	xfs_daddr_t	d;
646 	__uint64_t	ret64;
647 	__int64_t	update_flags;
648 	uint		quotamount, quotaflags;
649 	int		agno;
650 	int		uuid_mounted = 0;
651 	int		error = 0;
652 
653 	if (mp->m_sb_bp == NULL) {
654 		if ((error = xfs_readsb(mp))) {
655 			return (error);
656 		}
657 	}
658 	xfs_mount_common(mp, sbp);
659 
660 	/*
661 	 * Check if sb_agblocks is aligned at stripe boundary
662 	 * If sb_agblocks is NOT aligned turn off m_dalign since
663 	 * allocator alignment is within an ag, therefore ag has
664 	 * to be aligned at stripe boundary.
665 	 */
666 	update_flags = 0LL;
667 	if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) {
668 		/*
669 		 * If stripe unit and stripe width are not multiples
670 		 * of the fs blocksize turn off alignment.
671 		 */
672 		if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
673 		    (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
674 			if (mp->m_flags & XFS_MOUNT_RETERR) {
675 				cmn_err(CE_WARN,
676 					"XFS: alignment check 1 failed");
677 				error = XFS_ERROR(EINVAL);
678 				goto error1;
679 			}
680 			mp->m_dalign = mp->m_swidth = 0;
681 		} else {
682 			/*
683 			 * Convert the stripe unit and width to FSBs.
684 			 */
685 			mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
686 			if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
687 				if (mp->m_flags & XFS_MOUNT_RETERR) {
688 					error = XFS_ERROR(EINVAL);
689 					goto error1;
690 				}
691 				xfs_fs_cmn_err(CE_WARN, mp,
692 "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)",
693 					mp->m_dalign, mp->m_swidth,
694 					sbp->sb_agblocks);
695 
696 				mp->m_dalign = 0;
697 				mp->m_swidth = 0;
698 			} else if (mp->m_dalign) {
699 				mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
700 			} else {
701 				if (mp->m_flags & XFS_MOUNT_RETERR) {
702 					xfs_fs_cmn_err(CE_WARN, mp,
703 "stripe alignment turned off: sunit(%d) less than bsize(%d)",
704                                         	mp->m_dalign,
705 						mp->m_blockmask +1);
706 					error = XFS_ERROR(EINVAL);
707 					goto error1;
708 				}
709 				mp->m_swidth = 0;
710 			}
711 		}
712 
713 		/*
714 		 * Update superblock with new values
715 		 * and log changes
716 		 */
717 		if (XFS_SB_VERSION_HASDALIGN(sbp)) {
718 			if (sbp->sb_unit != mp->m_dalign) {
719 				sbp->sb_unit = mp->m_dalign;
720 				update_flags |= XFS_SB_UNIT;
721 			}
722 			if (sbp->sb_width != mp->m_swidth) {
723 				sbp->sb_width = mp->m_swidth;
724 				update_flags |= XFS_SB_WIDTH;
725 			}
726 		}
727 	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
728 		    XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
729 			mp->m_dalign = sbp->sb_unit;
730 			mp->m_swidth = sbp->sb_width;
731 	}
732 
733 	xfs_alloc_compute_maxlevels(mp);
734 	xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
735 	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
736 	xfs_ialloc_compute_maxlevels(mp);
737 
738 	if (sbp->sb_imax_pct) {
739 		__uint64_t	icount;
740 
741 		/* Make sure the maximum inode count is a multiple of the
742 		 * units we allocate inodes in.
743 		 */
744 
745 		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
746 		do_div(icount, 100);
747 		do_div(icount, mp->m_ialloc_blks);
748 		mp->m_maxicount = (icount * mp->m_ialloc_blks)  <<
749 				   sbp->sb_inopblog;
750 	} else
751 		mp->m_maxicount = 0;
752 
753 	mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
754 
755 	/*
756 	 * XFS uses the uuid from the superblock as the unique
757 	 * identifier for fsid.  We can not use the uuid from the volume
758 	 * since a single partition filesystem is identical to a single
759 	 * partition volume/filesystem.
760 	 */
761 	if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
762 	    (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
763 		if (xfs_uuid_mount(mp)) {
764 			error = XFS_ERROR(EINVAL);
765 			goto error1;
766 		}
767 		uuid_mounted=1;
768 		ret64 = uuid_hash64(&sbp->sb_uuid);
769 		memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64));
770 	}
771 
772 	/*
773 	 * Set the default minimum read and write sizes unless
774 	 * already specified in a mount option.
775 	 * We use smaller I/O sizes when the file system
776 	 * is being used for NFS service (wsync mount option).
777 	 */
778 	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
779 		if (mp->m_flags & XFS_MOUNT_WSYNC) {
780 			readio_log = XFS_WSYNC_READIO_LOG;
781 			writeio_log = XFS_WSYNC_WRITEIO_LOG;
782 		} else {
783 			readio_log = XFS_READIO_LOG_LARGE;
784 			writeio_log = XFS_WRITEIO_LOG_LARGE;
785 		}
786 	} else {
787 		readio_log = mp->m_readio_log;
788 		writeio_log = mp->m_writeio_log;
789 	}
790 
791 	/*
792 	 * Set the number of readahead buffers to use based on
793 	 * physical memory size.
794 	 */
795 	if (xfs_physmem <= 4096)		/* <= 16MB */
796 		mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB;
797 	else if (xfs_physmem <= 8192)	/* <= 32MB */
798 		mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB;
799 	else
800 		mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32;
801 	if (sbp->sb_blocklog > readio_log) {
802 		mp->m_readio_log = sbp->sb_blocklog;
803 	} else {
804 		mp->m_readio_log = readio_log;
805 	}
806 	mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
807 	if (sbp->sb_blocklog > writeio_log) {
808 		mp->m_writeio_log = sbp->sb_blocklog;
809 	} else {
810 		mp->m_writeio_log = writeio_log;
811 	}
812 	mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
813 
814 	/*
815 	 * Set the inode cluster size based on the physical memory
816 	 * size.  This may still be overridden by the file system
817 	 * block size if it is larger than the chosen cluster size.
818 	 */
819 	if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */
820 		mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE;
821 	} else {
822 		mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
823 	}
824 	/*
825 	 * Set whether we're using inode alignment.
826 	 */
827 	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
828 	    mp->m_sb.sb_inoalignmt >=
829 	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
830 		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
831 	else
832 		mp->m_inoalign_mask = 0;
833 	/*
834 	 * If we are using stripe alignment, check whether
835 	 * the stripe unit is a multiple of the inode alignment
836 	 */
837 	if (mp->m_dalign && mp->m_inoalign_mask &&
838 	    !(mp->m_dalign & mp->m_inoalign_mask))
839 		mp->m_sinoalign = mp->m_dalign;
840 	else
841 		mp->m_sinoalign = 0;
842 	/*
843 	 * Check that the data (and log if separate) are an ok size.
844 	 */
845 	d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
846 	if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
847 		cmn_err(CE_WARN, "XFS: size check 1 failed");
848 		error = XFS_ERROR(E2BIG);
849 		goto error1;
850 	}
851 	error = xfs_read_buf(mp, mp->m_ddev_targp,
852 			     d - XFS_FSS_TO_BB(mp, 1),
853 			     XFS_FSS_TO_BB(mp, 1), 0, &bp);
854 	if (!error) {
855 		xfs_buf_relse(bp);
856 	} else {
857 		cmn_err(CE_WARN, "XFS: size check 2 failed");
858 		if (error == ENOSPC) {
859 			error = XFS_ERROR(E2BIG);
860 		}
861 		goto error1;
862 	}
863 
864 	if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) &&
865 	    mp->m_logdev_targp != mp->m_ddev_targp) {
866 		d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
867 		if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
868 			cmn_err(CE_WARN, "XFS: size check 3 failed");
869 			error = XFS_ERROR(E2BIG);
870 			goto error1;
871 		}
872 		error = xfs_read_buf(mp, mp->m_logdev_targp,
873 				     d - XFS_FSB_TO_BB(mp, 1),
874 				     XFS_FSB_TO_BB(mp, 1), 0, &bp);
875 		if (!error) {
876 			xfs_buf_relse(bp);
877 		} else {
878 			cmn_err(CE_WARN, "XFS: size check 3 failed");
879 			if (error == ENOSPC) {
880 				error = XFS_ERROR(E2BIG);
881 			}
882 			goto error1;
883 		}
884 	}
885 
886 	/*
887 	 * Initialize realtime fields in the mount structure
888 	 */
889 	if ((error = xfs_rtmount_init(mp))) {
890 		cmn_err(CE_WARN, "XFS: RT mount failed");
891 		goto error1;
892 	}
893 
894 	/*
895 	 * For client case we are done now
896 	 */
897 	if (mfsi_flags & XFS_MFSI_CLIENT) {
898 		return(0);
899 	}
900 
901 	/*
902 	 *  Copies the low order bits of the timestamp and the randomly
903 	 *  set "sequence" number out of a UUID.
904 	 */
905 	uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid);
906 
907 	/*
908 	 *  The vfs structure needs to have a file system independent
909 	 *  way of checking for the invariant file system ID.  Since it
910 	 *  can't look at mount structures it has a pointer to the data
911 	 *  in the mount structure.
912 	 *
913 	 *  File systems that don't support user level file handles (i.e.
914 	 *  all of them except for XFS) will leave vfs_altfsid as NULL.
915 	 */
916 	vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid;
917 	mp->m_dmevmask = 0;	/* not persistent; set after each mount */
918 
919 	/*
920 	 * Select the right directory manager.
921 	 */
922 	mp->m_dirops =
923 		XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
924 			xfsv2_dirops :
925 			xfsv1_dirops;
926 
927 	/*
928 	 * Initialize directory manager's entries.
929 	 */
930 	XFS_DIR_MOUNT(mp);
931 
932 	/*
933 	 * Initialize the attribute manager's entries.
934 	 */
935 	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100;
936 
937 	/*
938 	 * Initialize the precomputed transaction reservations values.
939 	 */
940 	xfs_trans_init(mp);
941 
942 	/*
943 	 * Allocate and initialize the inode hash table for this
944 	 * file system.
945 	 */
946 	xfs_ihash_init(mp);
947 	xfs_chash_init(mp);
948 
949 	/*
950 	 * Allocate and initialize the per-ag data.
951 	 */
952 	init_rwsem(&mp->m_peraglock);
953 	mp->m_perag =
954 		kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP);
955 
956 	mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
957 
958 	/*
959 	 * log's mount-time initialization. Perform 1st part recovery if needed
960 	 */
961 	if (likely(sbp->sb_logblocks > 0)) {	/* check for volume case */
962 		error = xfs_log_mount(mp, mp->m_logdev_targp,
963 				      XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
964 				      XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
965 		if (error) {
966 			cmn_err(CE_WARN, "XFS: log mount failed");
967 			goto error2;
968 		}
969 	} else {	/* No log has been defined */
970 		cmn_err(CE_WARN, "XFS: no log defined");
971 		XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp);
972 		error = XFS_ERROR(EFSCORRUPTED);
973 		goto error2;
974 	}
975 
976 	/*
977 	 * Get and sanity-check the root inode.
978 	 * Save the pointer to it in the mount structure.
979 	 */
980 	error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
981 	if (error) {
982 		cmn_err(CE_WARN, "XFS: failed to read root inode");
983 		goto error3;
984 	}
985 
986 	ASSERT(rip != NULL);
987 	rvp = XFS_ITOV(rip);
988 
989 	if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
990 		cmn_err(CE_WARN, "XFS: corrupted root inode");
991 		prdev("Root inode %llu is not a directory",
992 		      mp->m_ddev_targp, (unsigned long long)rip->i_ino);
993 		xfs_iunlock(rip, XFS_ILOCK_EXCL);
994 		XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
995 				 mp);
996 		error = XFS_ERROR(EFSCORRUPTED);
997 		goto error4;
998 	}
999 	mp->m_rootip = rip;	/* save it */
1000 
1001 	xfs_iunlock(rip, XFS_ILOCK_EXCL);
1002 
1003 	/*
1004 	 * Initialize realtime inode pointers in the mount structure
1005 	 */
1006 	if ((error = xfs_rtmount_inodes(mp))) {
1007 		/*
1008 		 * Free up the root inode.
1009 		 */
1010 		cmn_err(CE_WARN, "XFS: failed to read RT inodes");
1011 		goto error4;
1012 	}
1013 
1014 	/*
1015 	 * If fs is not mounted readonly, then update the superblock
1016 	 * unit and width changes.
1017 	 */
1018 	if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY))
1019 		xfs_mount_log_sbunit(mp, update_flags);
1020 
1021 	/*
1022 	 * Initialise the XFS quota management subsystem for this mount
1023 	 */
1024 	if ((error = XFS_QM_INIT(mp, &quotamount, &quotaflags)))
1025 		goto error4;
1026 
1027 	/*
1028 	 * Finish recovering the file system.  This part needed to be
1029 	 * delayed until after the root and real-time bitmap inodes
1030 	 * were consistently read in.
1031 	 */
1032 	error = xfs_log_mount_finish(mp, mfsi_flags);
1033 	if (error) {
1034 		cmn_err(CE_WARN, "XFS: log mount finish failed");
1035 		goto error4;
1036 	}
1037 
1038 	/*
1039 	 * Complete the quota initialisation, post-log-replay component.
1040 	 */
1041 	if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
1042 		goto error4;
1043 
1044 	return 0;
1045 
1046  error4:
1047 	/*
1048 	 * Free up the root inode.
1049 	 */
1050 	VN_RELE(rvp);
1051  error3:
1052 	xfs_log_unmount_dealloc(mp);
1053  error2:
1054 	xfs_ihash_free(mp);
1055 	xfs_chash_free(mp);
1056 	for (agno = 0; agno < sbp->sb_agcount; agno++)
1057 		if (mp->m_perag[agno].pagb_list)
1058 			kmem_free(mp->m_perag[agno].pagb_list,
1059 			  sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS);
1060 	kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
1061 	mp->m_perag = NULL;
1062 	/* FALLTHROUGH */
1063  error1:
1064 	if (uuid_mounted)
1065 		xfs_uuid_unmount(mp);
1066 	xfs_freesb(mp);
1067 	return error;
1068 }
1069 
1070 /*
1071  * xfs_unmountfs
1072  *
1073  * This flushes out the inodes,dquots and the superblock, unmounts the
1074  * log and makes sure that incore structures are freed.
1075  */
1076 int
1077 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1078 {
1079 	struct vfs	*vfsp = XFS_MTOVFS(mp);
1080 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1081 	int64_t		fsid;
1082 #endif
1083 
1084 	xfs_iflush_all(mp, XFS_FLUSH_ALL);
1085 
1086 	XFS_QM_DQPURGEALL(mp,
1087 		XFS_QMOPT_UQUOTA | XFS_QMOPT_GQUOTA | XFS_QMOPT_UMOUNTING);
1088 
1089 	/*
1090 	 * Flush out the log synchronously so that we know for sure
1091 	 * that nothing is pinned.  This is important because bflush()
1092 	 * will skip pinned buffers.
1093 	 */
1094 	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
1095 
1096 	xfs_binval(mp->m_ddev_targp);
1097 	if (mp->m_rtdev_targp) {
1098 		xfs_binval(mp->m_rtdev_targp);
1099 	}
1100 
1101 	xfs_unmountfs_writesb(mp);
1102 
1103 	xfs_unmountfs_wait(mp); 		/* wait for async bufs */
1104 
1105 	xfs_log_unmount(mp);			/* Done! No more fs ops. */
1106 
1107 	xfs_freesb(mp);
1108 
1109 	/*
1110 	 * All inodes from this mount point should be freed.
1111 	 */
1112 	ASSERT(mp->m_inodes == NULL);
1113 
1114 	/*
1115 	 * We may have bufs that are in the process of getting written still.
1116 	 * We must wait for the I/O completion of those. The sync flag here
1117 	 * does a two pass iteration thru the bufcache.
1118 	 */
1119 	if (XFS_FORCED_SHUTDOWN(mp)) {
1120 		xfs_incore_relse(mp->m_ddev_targp, 0, 1); /* synchronous */
1121 	}
1122 
1123 	xfs_unmountfs_close(mp, cr);
1124 	if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1125 		xfs_uuid_unmount(mp);
1126 
1127 #if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1128 	/*
1129 	 * clear all error tags on this filesystem
1130 	 */
1131 	memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t));
1132 	xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0);
1133 #endif
1134 	XFS_IODONE(vfsp);
1135 	xfs_mount_free(mp, 1);
1136 	return 0;
1137 }
1138 
1139 void
1140 xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
1141 {
1142 	if (mp->m_logdev_targp != mp->m_ddev_targp)
1143 		xfs_free_buftarg(mp->m_logdev_targp, 1);
1144 	if (mp->m_rtdev_targp)
1145 		xfs_free_buftarg(mp->m_rtdev_targp, 1);
1146 	xfs_free_buftarg(mp->m_ddev_targp, 0);
1147 }
1148 
1149 void
1150 xfs_unmountfs_wait(xfs_mount_t *mp)
1151 {
1152 	if (mp->m_logdev_targp != mp->m_ddev_targp)
1153 		xfs_wait_buftarg(mp->m_logdev_targp);
1154 	if (mp->m_rtdev_targp)
1155 		xfs_wait_buftarg(mp->m_rtdev_targp);
1156 	xfs_wait_buftarg(mp->m_ddev_targp);
1157 }
1158 
1159 int
1160 xfs_unmountfs_writesb(xfs_mount_t *mp)
1161 {
1162 	xfs_buf_t	*sbp;
1163 	xfs_sb_t	*sb;
1164 	int		error = 0;
1165 
1166 	/*
1167 	 * skip superblock write if fs is read-only, or
1168 	 * if we are doing a forced umount.
1169 	 */
1170 	sbp = xfs_getsb(mp, 0);
1171 	if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
1172 		XFS_FORCED_SHUTDOWN(mp))) {
1173 		/*
1174 		 * mark shared-readonly if desired
1175 		 */
1176 		sb = XFS_BUF_TO_SBP(sbp);
1177 		if (mp->m_mk_sharedro) {
1178 			if (!(sb->sb_flags & XFS_SBF_READONLY))
1179 				sb->sb_flags |= XFS_SBF_READONLY;
1180 			if (!XFS_SB_VERSION_HASSHARED(sb))
1181 				XFS_SB_VERSION_ADDSHARED(sb);
1182 			xfs_fs_cmn_err(CE_NOTE, mp,
1183 				"Unmounting, marking shared read-only");
1184 		}
1185 		XFS_BUF_UNDONE(sbp);
1186 		XFS_BUF_UNREAD(sbp);
1187 		XFS_BUF_UNDELAYWRITE(sbp);
1188 		XFS_BUF_WRITE(sbp);
1189 		XFS_BUF_UNASYNC(sbp);
1190 		ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
1191 		xfsbdstrat(mp, sbp);
1192 		/* Nevermind errors we might get here. */
1193 		error = xfs_iowait(sbp);
1194 		if (error)
1195 			xfs_ioerror_alert("xfs_unmountfs_writesb",
1196 					  mp, sbp, XFS_BUF_ADDR(sbp));
1197 		if (error && mp->m_mk_sharedro)
1198 			xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting.  Filesystem may not be marked shared readonly");
1199 	}
1200 	xfs_buf_relse(sbp);
1201 	return (error);
1202 }
1203 
1204 /*
1205  * xfs_mod_sb() can be used to copy arbitrary changes to the
1206  * in-core superblock into the superblock buffer to be logged.
1207  * It does not provide the higher level of locking that is
1208  * needed to protect the in-core superblock from concurrent
1209  * access.
1210  */
1211 void
1212 xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1213 {
1214 	xfs_buf_t	*bp;
1215 	int		first;
1216 	int		last;
1217 	xfs_mount_t	*mp;
1218 	xfs_sb_t	*sbp;
1219 	xfs_sb_field_t	f;
1220 
1221 	ASSERT(fields);
1222 	if (!fields)
1223 		return;
1224 	mp = tp->t_mountp;
1225 	bp = xfs_trans_getsb(tp, mp, 0);
1226 	sbp = XFS_BUF_TO_SBP(bp);
1227 	first = sizeof(xfs_sb_t);
1228 	last = 0;
1229 
1230 	/* translate/copy */
1231 
1232 	xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields);
1233 
1234 	/* find modified range */
1235 
1236 	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
1237 	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1238 	first = xfs_sb_info[f].offset;
1239 
1240 	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1241 	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1242 	last = xfs_sb_info[f + 1].offset - 1;
1243 
1244 	xfs_trans_log_buf(tp, bp, first, last);
1245 }
1246 
1247 /*
1248  * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
1249  * a delta to a specified field in the in-core superblock.  Simply
1250  * switch on the field indicated and apply the delta to that field.
1251  * Fields are not allowed to dip below zero, so if the delta would
1252  * do this do not apply it and return EINVAL.
1253  *
1254  * The SB_LOCK must be held when this routine is called.
1255  */
1256 STATIC int
1257 xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1258 			int delta, int rsvd)
1259 {
1260 	int		scounter;	/* short counter for 32 bit fields */
1261 	long long	lcounter;	/* long counter for 64 bit fields */
1262 	long long	res_used, rem;
1263 
1264 	/*
1265 	 * With the in-core superblock spin lock held, switch
1266 	 * on the indicated field.  Apply the delta to the
1267 	 * proper field.  If the fields value would dip below
1268 	 * 0, then do not apply the delta and return EINVAL.
1269 	 */
1270 	switch (field) {
1271 	case XFS_SBS_ICOUNT:
1272 		lcounter = (long long)mp->m_sb.sb_icount;
1273 		lcounter += delta;
1274 		if (lcounter < 0) {
1275 			ASSERT(0);
1276 			return (XFS_ERROR(EINVAL));
1277 		}
1278 		mp->m_sb.sb_icount = lcounter;
1279 		return (0);
1280 	case XFS_SBS_IFREE:
1281 		lcounter = (long long)mp->m_sb.sb_ifree;
1282 		lcounter += delta;
1283 		if (lcounter < 0) {
1284 			ASSERT(0);
1285 			return (XFS_ERROR(EINVAL));
1286 		}
1287 		mp->m_sb.sb_ifree = lcounter;
1288 		return (0);
1289 	case XFS_SBS_FDBLOCKS:
1290 
1291 		lcounter = (long long)mp->m_sb.sb_fdblocks;
1292 		res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1293 
1294 		if (delta > 0) {		/* Putting blocks back */
1295 			if (res_used > delta) {
1296 				mp->m_resblks_avail += delta;
1297 			} else {
1298 				rem = delta - res_used;
1299 				mp->m_resblks_avail = mp->m_resblks;
1300 				lcounter += rem;
1301 			}
1302 		} else {				/* Taking blocks away */
1303 
1304 			lcounter += delta;
1305 
1306 		/*
1307 		 * If were out of blocks, use any available reserved blocks if
1308 		 * were allowed to.
1309 		 */
1310 
1311 			if (lcounter < 0) {
1312 				if (rsvd) {
1313 					lcounter = (long long)mp->m_resblks_avail + delta;
1314 					if (lcounter < 0) {
1315 						return (XFS_ERROR(ENOSPC));
1316 					}
1317 					mp->m_resblks_avail = lcounter;
1318 					return (0);
1319 				} else {	/* not reserved */
1320 					return (XFS_ERROR(ENOSPC));
1321 				}
1322 			}
1323 		}
1324 
1325 		mp->m_sb.sb_fdblocks = lcounter;
1326 		return (0);
1327 	case XFS_SBS_FREXTENTS:
1328 		lcounter = (long long)mp->m_sb.sb_frextents;
1329 		lcounter += delta;
1330 		if (lcounter < 0) {
1331 			return (XFS_ERROR(ENOSPC));
1332 		}
1333 		mp->m_sb.sb_frextents = lcounter;
1334 		return (0);
1335 	case XFS_SBS_DBLOCKS:
1336 		lcounter = (long long)mp->m_sb.sb_dblocks;
1337 		lcounter += delta;
1338 		if (lcounter < 0) {
1339 			ASSERT(0);
1340 			return (XFS_ERROR(EINVAL));
1341 		}
1342 		mp->m_sb.sb_dblocks = lcounter;
1343 		return (0);
1344 	case XFS_SBS_AGCOUNT:
1345 		scounter = mp->m_sb.sb_agcount;
1346 		scounter += delta;
1347 		if (scounter < 0) {
1348 			ASSERT(0);
1349 			return (XFS_ERROR(EINVAL));
1350 		}
1351 		mp->m_sb.sb_agcount = scounter;
1352 		return (0);
1353 	case XFS_SBS_IMAX_PCT:
1354 		scounter = mp->m_sb.sb_imax_pct;
1355 		scounter += delta;
1356 		if (scounter < 0) {
1357 			ASSERT(0);
1358 			return (XFS_ERROR(EINVAL));
1359 		}
1360 		mp->m_sb.sb_imax_pct = scounter;
1361 		return (0);
1362 	case XFS_SBS_REXTSIZE:
1363 		scounter = mp->m_sb.sb_rextsize;
1364 		scounter += delta;
1365 		if (scounter < 0) {
1366 			ASSERT(0);
1367 			return (XFS_ERROR(EINVAL));
1368 		}
1369 		mp->m_sb.sb_rextsize = scounter;
1370 		return (0);
1371 	case XFS_SBS_RBMBLOCKS:
1372 		scounter = mp->m_sb.sb_rbmblocks;
1373 		scounter += delta;
1374 		if (scounter < 0) {
1375 			ASSERT(0);
1376 			return (XFS_ERROR(EINVAL));
1377 		}
1378 		mp->m_sb.sb_rbmblocks = scounter;
1379 		return (0);
1380 	case XFS_SBS_RBLOCKS:
1381 		lcounter = (long long)mp->m_sb.sb_rblocks;
1382 		lcounter += delta;
1383 		if (lcounter < 0) {
1384 			ASSERT(0);
1385 			return (XFS_ERROR(EINVAL));
1386 		}
1387 		mp->m_sb.sb_rblocks = lcounter;
1388 		return (0);
1389 	case XFS_SBS_REXTENTS:
1390 		lcounter = (long long)mp->m_sb.sb_rextents;
1391 		lcounter += delta;
1392 		if (lcounter < 0) {
1393 			ASSERT(0);
1394 			return (XFS_ERROR(EINVAL));
1395 		}
1396 		mp->m_sb.sb_rextents = lcounter;
1397 		return (0);
1398 	case XFS_SBS_REXTSLOG:
1399 		scounter = mp->m_sb.sb_rextslog;
1400 		scounter += delta;
1401 		if (scounter < 0) {
1402 			ASSERT(0);
1403 			return (XFS_ERROR(EINVAL));
1404 		}
1405 		mp->m_sb.sb_rextslog = scounter;
1406 		return (0);
1407 	default:
1408 		ASSERT(0);
1409 		return (XFS_ERROR(EINVAL));
1410 	}
1411 }
1412 
1413 /*
1414  * xfs_mod_incore_sb() is used to change a field in the in-core
1415  * superblock structure by the specified delta.  This modification
1416  * is protected by the SB_LOCK.  Just use the xfs_mod_incore_sb_unlocked()
1417  * routine to do the work.
1418  */
1419 int
1420 xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
1421 {
1422 	unsigned long	s;
1423 	int	status;
1424 
1425 	s = XFS_SB_LOCK(mp);
1426 	status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1427 	XFS_SB_UNLOCK(mp, s);
1428 	return (status);
1429 }
1430 
1431 /*
1432  * xfs_mod_incore_sb_batch() is used to change more than one field
1433  * in the in-core superblock structure at a time.  This modification
1434  * is protected by a lock internal to this module.  The fields and
1435  * changes to those fields are specified in the array of xfs_mod_sb
1436  * structures passed in.
1437  *
1438  * Either all of the specified deltas will be applied or none of
1439  * them will.  If any modified field dips below 0, then all modifications
1440  * will be backed out and EINVAL will be returned.
1441  */
1442 int
1443 xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
1444 {
1445 	unsigned long	s;
1446 	int		status=0;
1447 	xfs_mod_sb_t	*msbp;
1448 
1449 	/*
1450 	 * Loop through the array of mod structures and apply each
1451 	 * individually.  If any fail, then back out all those
1452 	 * which have already been applied.  Do all of this within
1453 	 * the scope of the SB_LOCK so that all of the changes will
1454 	 * be atomic.
1455 	 */
1456 	s = XFS_SB_LOCK(mp);
1457 	msbp = &msb[0];
1458 	for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
1459 		/*
1460 		 * Apply the delta at index n.  If it fails, break
1461 		 * from the loop so we'll fall into the undo loop
1462 		 * below.
1463 		 */
1464 		status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
1465 						    msbp->msb_delta, rsvd);
1466 		if (status != 0) {
1467 			break;
1468 		}
1469 	}
1470 
1471 	/*
1472 	 * If we didn't complete the loop above, then back out
1473 	 * any changes made to the superblock.  If you add code
1474 	 * between the loop above and here, make sure that you
1475 	 * preserve the value of status. Loop back until
1476 	 * we step below the beginning of the array.  Make sure
1477 	 * we don't touch anything back there.
1478 	 */
1479 	if (status != 0) {
1480 		msbp--;
1481 		while (msbp >= msb) {
1482 			status = xfs_mod_incore_sb_unlocked(mp,
1483 				    msbp->msb_field, -(msbp->msb_delta), rsvd);
1484 			ASSERT(status == 0);
1485 			msbp--;
1486 		}
1487 	}
1488 	XFS_SB_UNLOCK(mp, s);
1489 	return (status);
1490 }
1491 
1492 /*
1493  * xfs_getsb() is called to obtain the buffer for the superblock.
1494  * The buffer is returned locked and read in from disk.
1495  * The buffer should be released with a call to xfs_brelse().
1496  *
1497  * If the flags parameter is BUF_TRYLOCK, then we'll only return
1498  * the superblock buffer if it can be locked without sleeping.
1499  * If it can't then we'll return NULL.
1500  */
1501 xfs_buf_t *
1502 xfs_getsb(
1503 	xfs_mount_t	*mp,
1504 	int		flags)
1505 {
1506 	xfs_buf_t	*bp;
1507 
1508 	ASSERT(mp->m_sb_bp != NULL);
1509 	bp = mp->m_sb_bp;
1510 	if (flags & XFS_BUF_TRYLOCK) {
1511 		if (!XFS_BUF_CPSEMA(bp)) {
1512 			return NULL;
1513 		}
1514 	} else {
1515 		XFS_BUF_PSEMA(bp, PRIBIO);
1516 	}
1517 	XFS_BUF_HOLD(bp);
1518 	ASSERT(XFS_BUF_ISDONE(bp));
1519 	return (bp);
1520 }
1521 
1522 /*
1523  * Used to free the superblock along various error paths.
1524  */
1525 void
1526 xfs_freesb(
1527 	xfs_mount_t	*mp)
1528 {
1529 	xfs_buf_t	*bp;
1530 
1531 	/*
1532 	 * Use xfs_getsb() so that the buffer will be locked
1533 	 * when we call xfs_buf_relse().
1534 	 */
1535 	bp = xfs_getsb(mp, 0);
1536 	XFS_BUF_UNMANAGE(bp);
1537 	xfs_buf_relse(bp);
1538 	mp->m_sb_bp = NULL;
1539 }
1540 
1541 /*
1542  * See if the UUID is unique among mounted XFS filesystems.
1543  * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
1544  */
1545 STATIC int
1546 xfs_uuid_mount(
1547 	xfs_mount_t	*mp)
1548 {
1549 	if (uuid_is_nil(&mp->m_sb.sb_uuid)) {
1550 		cmn_err(CE_WARN,
1551 			"XFS: Filesystem %s has nil UUID - can't mount",
1552 			mp->m_fsname);
1553 		return -1;
1554 	}
1555 	if (!uuid_table_insert(&mp->m_sb.sb_uuid)) {
1556 		cmn_err(CE_WARN,
1557 			"XFS: Filesystem %s has duplicate UUID - can't mount",
1558 			mp->m_fsname);
1559 		return -1;
1560 	}
1561 	return 0;
1562 }
1563 
1564 /*
1565  * Remove filesystem from the UUID table.
1566  */
1567 STATIC void
1568 xfs_uuid_unmount(
1569 	xfs_mount_t	*mp)
1570 {
1571 	uuid_table_remove(&mp->m_sb.sb_uuid);
1572 }
1573 
1574 /*
1575  * Used to log changes to the superblock unit and width fields which could
1576  * be altered by the mount options. Only the first superblock is updated.
1577  */
1578 STATIC void
1579 xfs_mount_log_sbunit(
1580 	xfs_mount_t	*mp,
1581 	__int64_t	fields)
1582 {
1583 	xfs_trans_t	*tp;
1584 
1585 	ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID));
1586 
1587 	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1588 	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1589 				XFS_DEFAULT_LOG_COUNT)) {
1590 		xfs_trans_cancel(tp, 0);
1591 		return;
1592 	}
1593 	xfs_mod_sb(tp, fields);
1594 	xfs_trans_commit(tp, 0, NULL);
1595 }
1596