xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 4ed925457ab06e83238a5db33e89ccc94b99a713)
1 /*-
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_quota.h"
36 #include "opt_ufs.h"
37 #include "opt_ffs.h"
38 #include "opt_ddb.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 
55 #include <security/mac/mac_framework.h>
56 
57 #include <ufs/ufs/extattr.h>
58 #include <ufs/ufs/gjournal.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66 
67 #include <vm/vm.h>
68 #include <vm/uma.h>
69 #include <vm/vm_page.h>
70 
71 #include <geom/geom.h>
72 #include <geom/geom_vfs.h>
73 
74 #include <ddb/ddb.h>
75 
76 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
77 
78 static int	ffs_reload(struct mount *, struct thread *);
79 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
80 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
81 		    ufs2_daddr_t);
82 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
83 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
84 static vfs_init_t ffs_init;
85 static vfs_uninit_t ffs_uninit;
86 static vfs_extattrctl_t ffs_extattrctl;
87 static vfs_cmount_t ffs_cmount;
88 static vfs_unmount_t ffs_unmount;
89 static vfs_mount_t ffs_mount;
90 static vfs_statfs_t ffs_statfs;
91 static vfs_fhtovp_t ffs_fhtovp;
92 static vfs_sync_t ffs_sync;
93 
94 static struct vfsops ufs_vfsops = {
95 	.vfs_extattrctl =	ffs_extattrctl,
96 	.vfs_fhtovp =		ffs_fhtovp,
97 	.vfs_init =		ffs_init,
98 	.vfs_mount =		ffs_mount,
99 	.vfs_cmount =		ffs_cmount,
100 	.vfs_quotactl =		ufs_quotactl,
101 	.vfs_root =		ufs_root,
102 	.vfs_statfs =		ffs_statfs,
103 	.vfs_sync =		ffs_sync,
104 	.vfs_uninit =		ffs_uninit,
105 	.vfs_unmount =		ffs_unmount,
106 	.vfs_vget =		ffs_vget,
107 	.vfs_susp_clean =	process_deferred_inactive,
108 };
109 
110 VFS_SET(ufs_vfsops, ufs, 0);
111 MODULE_VERSION(ufs, 1);
112 
113 static b_strategy_t ffs_geom_strategy;
114 static b_write_t ffs_bufwrite;
115 
116 static struct buf_ops ffs_ops = {
117 	.bop_name =	"FFS",
118 	.bop_write =	ffs_bufwrite,
119 	.bop_strategy =	ffs_geom_strategy,
120 	.bop_sync =	bufsync,
121 #ifdef NO_FFS_SNAPSHOT
122 	.bop_bdflush =	bufbdflush,
123 #else
124 	.bop_bdflush =	ffs_bdflush,
125 #endif
126 };
127 
128 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
129     "noclusterw", "noexec", "export", "force", "from", "multilabel",
130     "snapshot", "nosuid", "suiddir", "nosymfollow", "sync",
131     "union", "nfsv4acls", NULL };
132 
133 static int
134 ffs_mount(struct mount *mp)
135 {
136 	struct vnode *devvp;
137 	struct thread *td;
138 	struct ufsmount *ump = 0;
139 	struct fs *fs;
140 	int error, flags;
141 	u_int mntorflags;
142 	accmode_t accmode;
143 	struct nameidata ndp;
144 	char *fspec;
145 
146 	td = curthread;
147 	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
148 		return (EINVAL);
149 	if (uma_inode == NULL) {
150 		uma_inode = uma_zcreate("FFS inode",
151 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
152 		    UMA_ALIGN_PTR, 0);
153 		uma_ufs1 = uma_zcreate("FFS1 dinode",
154 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
155 		    UMA_ALIGN_PTR, 0);
156 		uma_ufs2 = uma_zcreate("FFS2 dinode",
157 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
158 		    UMA_ALIGN_PTR, 0);
159 	}
160 
161 	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
162 	if (error)
163 		return (error);
164 
165 	mntorflags = 0;
166 	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
167 		mntorflags |= MNT_ACLS;
168 
169 	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
170 		mntorflags |= MNT_SNAPSHOT;
171 		/*
172 		 * Once we have set the MNT_SNAPSHOT flag, do not
173 		 * persist "snapshot" in the options list.
174 		 */
175 		vfs_deleteopt(mp->mnt_optnew, "snapshot");
176 		vfs_deleteopt(mp->mnt_opt, "snapshot");
177 	}
178 
179 	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
180 		if (mntorflags & MNT_ACLS) {
181 			printf("WARNING: \"acls\" and \"nfsv4acls\" "
182 			    "options are mutually exclusive\n");
183 			return (EINVAL);
184 		}
185 		mntorflags |= MNT_NFS4ACLS;
186 	}
187 
188 	MNT_ILOCK(mp);
189 	mp->mnt_flag |= mntorflags;
190 	MNT_IUNLOCK(mp);
191 	/*
192 	 * If updating, check whether changing from read-only to
193 	 * read/write; if there is no device name, that's all we do.
194 	 */
195 	if (mp->mnt_flag & MNT_UPDATE) {
196 		ump = VFSTOUFS(mp);
197 		fs = ump->um_fs;
198 		devvp = ump->um_devvp;
199 		if (fs->fs_ronly == 0 &&
200 		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
201 			/*
202 			 * Flush any dirty data and suspend filesystem.
203 			 */
204 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
205 				return (error);
206 			for (;;) {
207 				vn_finished_write(mp);
208 				if ((error = vfs_write_suspend(mp)) != 0)
209 					return (error);
210 				MNT_ILOCK(mp);
211 				if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
212 					/*
213 					 * Allow the secondary writes
214 					 * to proceed.
215 					 */
216 					mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
217 					    MNTK_SUSPEND2);
218 					wakeup(&mp->mnt_flag);
219 					MNT_IUNLOCK(mp);
220 					/*
221 					 * Allow the curthread to
222 					 * ignore the suspension to
223 					 * synchronize on-disk state.
224 					 */
225 					td->td_pflags |= TDP_IGNSUSP;
226 					break;
227 				}
228 				MNT_IUNLOCK(mp);
229 				vn_start_write(NULL, &mp, V_WAIT);
230 			}
231 			/*
232 			 * Check for and optionally get rid of files open
233 			 * for writing.
234 			 */
235 			flags = WRITECLOSE;
236 			if (mp->mnt_flag & MNT_FORCE)
237 				flags |= FORCECLOSE;
238 			if (mp->mnt_flag & MNT_SOFTDEP) {
239 				error = softdep_flushfiles(mp, flags, td);
240 			} else {
241 				error = ffs_flushfiles(mp, flags, td);
242 			}
243 			if (error) {
244 				vfs_write_resume(mp);
245 				return (error);
246 			}
247 			if (fs->fs_pendingblocks != 0 ||
248 			    fs->fs_pendinginodes != 0) {
249 				printf("%s: %s: blocks %jd files %d\n",
250 				    fs->fs_fsmnt, "update error",
251 				    (intmax_t)fs->fs_pendingblocks,
252 				    fs->fs_pendinginodes);
253 				fs->fs_pendingblocks = 0;
254 				fs->fs_pendinginodes = 0;
255 			}
256 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
257 				fs->fs_clean = 1;
258 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
259 				fs->fs_ronly = 0;
260 				fs->fs_clean = 0;
261 				vfs_write_resume(mp);
262 				return (error);
263 			}
264 			DROP_GIANT();
265 			g_topology_lock();
266 			g_access(ump->um_cp, 0, -1, 0);
267 			g_topology_unlock();
268 			PICKUP_GIANT();
269 			fs->fs_ronly = 1;
270 			MNT_ILOCK(mp);
271 			mp->mnt_flag |= MNT_RDONLY;
272 			MNT_IUNLOCK(mp);
273 			/*
274 			 * Allow the writers to note that filesystem
275 			 * is ro now.
276 			 */
277 			vfs_write_resume(mp);
278 		}
279 		if ((mp->mnt_flag & MNT_RELOAD) &&
280 		    (error = ffs_reload(mp, td)) != 0)
281 			return (error);
282 		if (fs->fs_ronly &&
283 		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
284 			/*
285 			 * If upgrade to read-write by non-root, then verify
286 			 * that user has necessary permissions on the device.
287 			 */
288 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
289 			error = VOP_ACCESS(devvp, VREAD | VWRITE,
290 			    td->td_ucred, td);
291 			if (error)
292 				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
293 			if (error) {
294 				VOP_UNLOCK(devvp, 0);
295 				return (error);
296 			}
297 			VOP_UNLOCK(devvp, 0);
298 			fs->fs_flags &= ~FS_UNCLEAN;
299 			if (fs->fs_clean == 0) {
300 				fs->fs_flags |= FS_UNCLEAN;
301 				if ((mp->mnt_flag & MNT_FORCE) ||
302 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
303 				     (fs->fs_flags & FS_DOSOFTDEP))) {
304 					printf("WARNING: %s was not %s\n",
305 					   fs->fs_fsmnt, "properly dismounted");
306 				} else {
307 					printf(
308 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
309 					    fs->fs_fsmnt);
310 					return (EPERM);
311 				}
312 			}
313 			DROP_GIANT();
314 			g_topology_lock();
315 			/*
316 			 * If we're the root device, we may not have an E count
317 			 * yet, get it now.
318 			 */
319 			if (ump->um_cp->ace == 0)
320 				error = g_access(ump->um_cp, 0, 1, 1);
321 			else
322 				error = g_access(ump->um_cp, 0, 1, 0);
323 			g_topology_unlock();
324 			PICKUP_GIANT();
325 			if (error)
326 				return (error);
327 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
328 				return (error);
329 			fs->fs_ronly = 0;
330 			MNT_ILOCK(mp);
331 			mp->mnt_flag &= ~MNT_RDONLY;
332 			MNT_IUNLOCK(mp);
333 			fs->fs_clean = 0;
334 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
335 				vn_finished_write(mp);
336 				return (error);
337 			}
338 			/* check to see if we need to start softdep */
339 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
340 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
341 				vn_finished_write(mp);
342 				return (error);
343 			}
344 			if (fs->fs_snapinum[0] != 0)
345 				ffs_snapshot_mount(mp);
346 			vn_finished_write(mp);
347 		}
348 		/*
349 		 * Soft updates is incompatible with "async",
350 		 * so if we are doing softupdates stop the user
351 		 * from setting the async flag in an update.
352 		 * Softdep_mount() clears it in an initial mount
353 		 * or ro->rw remount.
354 		 */
355 		if (mp->mnt_flag & MNT_SOFTDEP) {
356 			/* XXX: Reset too late ? */
357 			MNT_ILOCK(mp);
358 			mp->mnt_flag &= ~MNT_ASYNC;
359 			MNT_IUNLOCK(mp);
360 		}
361 		/*
362 		 * Keep MNT_ACLS flag if it is stored in superblock.
363 		 */
364 		if ((fs->fs_flags & FS_ACLS) != 0) {
365 			/* XXX: Set too late ? */
366 			MNT_ILOCK(mp);
367 			mp->mnt_flag |= MNT_ACLS;
368 			MNT_IUNLOCK(mp);
369 		}
370 
371 		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
372 			/* XXX: Set too late ? */
373 			MNT_ILOCK(mp);
374 			mp->mnt_flag |= MNT_NFS4ACLS;
375 			MNT_IUNLOCK(mp);
376 		}
377 
378 		/*
379 		 * If this is a snapshot request, take the snapshot.
380 		 */
381 		if (mp->mnt_flag & MNT_SNAPSHOT)
382 			return (ffs_snapshot(mp, fspec));
383 	}
384 
385 	/*
386 	 * Not an update, or updating the name: look up the name
387 	 * and verify that it refers to a sensible disk device.
388 	 */
389 	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
390 	if ((error = namei(&ndp)) != 0)
391 		return (error);
392 	NDFREE(&ndp, NDF_ONLY_PNBUF);
393 	devvp = ndp.ni_vp;
394 	if (!vn_isdisk(devvp, &error)) {
395 		vput(devvp);
396 		return (error);
397 	}
398 
399 	/*
400 	 * If mount by non-root, then verify that user has necessary
401 	 * permissions on the device.
402 	 */
403 	accmode = VREAD;
404 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
405 		accmode |= VWRITE;
406 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
407 	if (error)
408 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
409 	if (error) {
410 		vput(devvp);
411 		return (error);
412 	}
413 
414 	if (mp->mnt_flag & MNT_UPDATE) {
415 		/*
416 		 * Update only
417 		 *
418 		 * If it's not the same vnode, or at least the same device
419 		 * then it's not correct.
420 		 */
421 
422 		if (devvp->v_rdev != ump->um_devvp->v_rdev)
423 			error = EINVAL;	/* needs translation */
424 		vput(devvp);
425 		if (error)
426 			return (error);
427 	} else {
428 		/*
429 		 * New mount
430 		 *
431 		 * We need the name for the mount point (also used for
432 		 * "last mounted on") copied in. If an error occurs,
433 		 * the mount point is discarded by the upper level code.
434 		 * Note that vfs_mount() populates f_mntonname for us.
435 		 */
436 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
437 			vrele(devvp);
438 			return (error);
439 		}
440 	}
441 	vfs_mountedfrom(mp, fspec);
442 	return (0);
443 }
444 
445 /*
446  * Compatibility with old mount system call.
447  */
448 
449 static int
450 ffs_cmount(struct mntarg *ma, void *data, int flags)
451 {
452 	struct ufs_args args;
453 	int error;
454 
455 	if (data == NULL)
456 		return (EINVAL);
457 	error = copyin(data, &args, sizeof args);
458 	if (error)
459 		return (error);
460 
461 	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
462 	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
463 	error = kernel_mount(ma, flags);
464 
465 	return (error);
466 }
467 
468 /*
469  * Reload all incore data for a filesystem (used after running fsck on
470  * the root filesystem and finding things to fix). The filesystem must
471  * be mounted read-only.
472  *
473  * Things to do to update the mount:
474  *	1) invalidate all cached meta-data.
475  *	2) re-read superblock from disk.
476  *	3) re-read summary information from disk.
477  *	4) invalidate all inactive vnodes.
478  *	5) invalidate all cached file data.
479  *	6) re-read inode data for all active vnodes.
480  */
481 static int
482 ffs_reload(struct mount *mp, struct thread *td)
483 {
484 	struct vnode *vp, *mvp, *devvp;
485 	struct inode *ip;
486 	void *space;
487 	struct buf *bp;
488 	struct fs *fs, *newfs;
489 	struct ufsmount *ump;
490 	ufs2_daddr_t sblockloc;
491 	int i, blks, size, error;
492 	int32_t *lp;
493 
494 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
495 		return (EINVAL);
496 	ump = VFSTOUFS(mp);
497 	/*
498 	 * Step 1: invalidate all cached meta-data.
499 	 */
500 	devvp = VFSTOUFS(mp)->um_devvp;
501 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
502 	if (vinvalbuf(devvp, 0, 0, 0) != 0)
503 		panic("ffs_reload: dirty1");
504 	VOP_UNLOCK(devvp, 0);
505 
506 	/*
507 	 * Step 2: re-read superblock from disk.
508 	 */
509 	fs = VFSTOUFS(mp)->um_fs;
510 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
511 	    NOCRED, &bp)) != 0)
512 		return (error);
513 	newfs = (struct fs *)bp->b_data;
514 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
515 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
516 	    newfs->fs_bsize > MAXBSIZE ||
517 	    newfs->fs_bsize < sizeof(struct fs)) {
518 			brelse(bp);
519 			return (EIO);		/* XXX needs translation */
520 	}
521 	/*
522 	 * Copy pointer fields back into superblock before copying in	XXX
523 	 * new superblock. These should really be in the ufsmount.	XXX
524 	 * Note that important parameters (eg fs_ncg) are unchanged.
525 	 */
526 	newfs->fs_csp = fs->fs_csp;
527 	newfs->fs_maxcluster = fs->fs_maxcluster;
528 	newfs->fs_contigdirs = fs->fs_contigdirs;
529 	newfs->fs_active = fs->fs_active;
530 	/* The file system is still read-only. */
531 	newfs->fs_ronly = 1;
532 	sblockloc = fs->fs_sblockloc;
533 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
534 	brelse(bp);
535 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
536 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
537 	UFS_LOCK(ump);
538 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
539 		printf("%s: reload pending error: blocks %jd files %d\n",
540 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
541 		    fs->fs_pendinginodes);
542 		fs->fs_pendingblocks = 0;
543 		fs->fs_pendinginodes = 0;
544 	}
545 	UFS_UNLOCK(ump);
546 
547 	/*
548 	 * Step 3: re-read summary information from disk.
549 	 */
550 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
551 	space = fs->fs_csp;
552 	for (i = 0; i < blks; i += fs->fs_frag) {
553 		size = fs->fs_bsize;
554 		if (i + fs->fs_frag > blks)
555 			size = (blks - i) * fs->fs_fsize;
556 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
557 		    NOCRED, &bp);
558 		if (error)
559 			return (error);
560 		bcopy(bp->b_data, space, (u_int)size);
561 		space = (char *)space + size;
562 		brelse(bp);
563 	}
564 	/*
565 	 * We no longer know anything about clusters per cylinder group.
566 	 */
567 	if (fs->fs_contigsumsize > 0) {
568 		lp = fs->fs_maxcluster;
569 		for (i = 0; i < fs->fs_ncg; i++)
570 			*lp++ = fs->fs_contigsumsize;
571 	}
572 
573 loop:
574 	MNT_ILOCK(mp);
575 	MNT_VNODE_FOREACH(vp, mp, mvp) {
576 		VI_LOCK(vp);
577 		if (vp->v_iflag & VI_DOOMED) {
578 			VI_UNLOCK(vp);
579 			continue;
580 		}
581 		MNT_IUNLOCK(mp);
582 		/*
583 		 * Step 4: invalidate all cached file data.
584 		 */
585 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
586 			MNT_VNODE_FOREACH_ABORT(mp, mvp);
587 			goto loop;
588 		}
589 		if (vinvalbuf(vp, 0, 0, 0))
590 			panic("ffs_reload: dirty2");
591 		/*
592 		 * Step 5: re-read inode data for all active vnodes.
593 		 */
594 		ip = VTOI(vp);
595 		error =
596 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
597 		    (int)fs->fs_bsize, NOCRED, &bp);
598 		if (error) {
599 			VOP_UNLOCK(vp, 0);
600 			vrele(vp);
601 			MNT_VNODE_FOREACH_ABORT(mp, mvp);
602 			return (error);
603 		}
604 		ffs_load_inode(bp, ip, fs, ip->i_number);
605 		ip->i_effnlink = ip->i_nlink;
606 		brelse(bp);
607 		VOP_UNLOCK(vp, 0);
608 		vrele(vp);
609 		MNT_ILOCK(mp);
610 	}
611 	MNT_IUNLOCK(mp);
612 	return (0);
613 }
614 
615 /*
616  * Possible superblock locations ordered from most to least likely.
617  */
618 static int sblock_try[] = SBLOCKSEARCH;
619 
620 /*
621  * Common code for mount and mountroot
622  */
623 static int
624 ffs_mountfs(devvp, mp, td)
625 	struct vnode *devvp;
626 	struct mount *mp;
627 	struct thread *td;
628 {
629 	struct ufsmount *ump;
630 	struct buf *bp;
631 	struct fs *fs;
632 	struct cdev *dev;
633 	void *space;
634 	ufs2_daddr_t sblockloc;
635 	int error, i, blks, size, ronly;
636 	int32_t *lp;
637 	struct ucred *cred;
638 	struct g_consumer *cp;
639 	struct mount *nmp;
640 
641 	bp = NULL;
642 	ump = NULL;
643 	cred = td ? td->td_ucred : NOCRED;
644 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
645 
646 	dev = devvp->v_rdev;
647 	dev_ref(dev);
648 	DROP_GIANT();
649 	g_topology_lock();
650 	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
651 
652 	/*
653 	 * If we are a root mount, drop the E flag so fsck can do its magic.
654 	 * We will pick it up again when we remount R/W.
655 	 */
656 	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
657 		error = g_access(cp, 0, 0, -1);
658 	g_topology_unlock();
659 	PICKUP_GIANT();
660 	VOP_UNLOCK(devvp, 0);
661 	if (error)
662 		goto out;
663 	if (devvp->v_rdev->si_iosize_max != 0)
664 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
665 	if (mp->mnt_iosize_max > MAXPHYS)
666 		mp->mnt_iosize_max = MAXPHYS;
667 
668 	devvp->v_bufobj.bo_private = cp;
669 	devvp->v_bufobj.bo_ops = &ffs_ops;
670 
671 	fs = NULL;
672 	sblockloc = 0;
673 	/*
674 	 * Try reading the superblock in each of its possible locations.
675 	 */
676 	for (i = 0; sblock_try[i] != -1; i++) {
677 		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
678 			error = EINVAL;
679 			vfs_mount_error(mp,
680 			    "Invalid sectorsize %d for superblock size %d",
681 			    cp->provider->sectorsize, SBLOCKSIZE);
682 			goto out;
683 		}
684 		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
685 		    cred, &bp)) != 0)
686 			goto out;
687 		fs = (struct fs *)bp->b_data;
688 		sblockloc = sblock_try[i];
689 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
690 		     (fs->fs_magic == FS_UFS2_MAGIC &&
691 		      (fs->fs_sblockloc == sblockloc ||
692 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
693 		    fs->fs_bsize <= MAXBSIZE &&
694 		    fs->fs_bsize >= sizeof(struct fs))
695 			break;
696 		brelse(bp);
697 		bp = NULL;
698 	}
699 	if (sblock_try[i] == -1) {
700 		error = EINVAL;		/* XXX needs translation */
701 		goto out;
702 	}
703 	fs->fs_fmod = 0;
704 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
705 	fs->fs_flags &= ~FS_UNCLEAN;
706 	if (fs->fs_clean == 0) {
707 		fs->fs_flags |= FS_UNCLEAN;
708 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
709 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
710 		     (fs->fs_flags & FS_DOSOFTDEP))) {
711 			printf(
712 "WARNING: %s was not properly dismounted\n",
713 			    fs->fs_fsmnt);
714 		} else {
715 			printf(
716 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
717 			    fs->fs_fsmnt);
718 			error = EPERM;
719 			goto out;
720 		}
721 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
722 		    (mp->mnt_flag & MNT_FORCE)) {
723 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
724 			    (intmax_t)fs->fs_pendingblocks,
725 			    fs->fs_pendinginodes);
726 			fs->fs_pendingblocks = 0;
727 			fs->fs_pendinginodes = 0;
728 		}
729 	}
730 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
731 		printf("%s: mount pending error: blocks %jd files %d\n",
732 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
733 		    fs->fs_pendinginodes);
734 		fs->fs_pendingblocks = 0;
735 		fs->fs_pendinginodes = 0;
736 	}
737 	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
738 #ifdef UFS_GJOURNAL
739 		/*
740 		 * Get journal provider name.
741 		 */
742 		size = 1024;
743 		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
744 		if (g_io_getattr("GJOURNAL::provider", cp, &size,
745 		    mp->mnt_gjprovider) == 0) {
746 			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
747 			    M_UFSMNT, M_WAITOK);
748 			MNT_ILOCK(mp);
749 			mp->mnt_flag |= MNT_GJOURNAL;
750 			MNT_IUNLOCK(mp);
751 		} else {
752 			printf(
753 "WARNING: %s: GJOURNAL flag on fs but no gjournal provider below\n",
754 			    mp->mnt_stat.f_mntonname);
755 			free(mp->mnt_gjprovider, M_UFSMNT);
756 			mp->mnt_gjprovider = NULL;
757 		}
758 #else
759 		printf(
760 "WARNING: %s: GJOURNAL flag on fs but no UFS_GJOURNAL support\n",
761 		    mp->mnt_stat.f_mntonname);
762 #endif
763 	} else {
764 		mp->mnt_gjprovider = NULL;
765 	}
766 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
767 	ump->um_cp = cp;
768 	ump->um_bo = &devvp->v_bufobj;
769 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
770 	if (fs->fs_magic == FS_UFS1_MAGIC) {
771 		ump->um_fstype = UFS1;
772 		ump->um_balloc = ffs_balloc_ufs1;
773 	} else {
774 		ump->um_fstype = UFS2;
775 		ump->um_balloc = ffs_balloc_ufs2;
776 	}
777 	ump->um_blkatoff = ffs_blkatoff;
778 	ump->um_truncate = ffs_truncate;
779 	ump->um_update = ffs_update;
780 	ump->um_valloc = ffs_valloc;
781 	ump->um_vfree = ffs_vfree;
782 	ump->um_ifree = ffs_ifree;
783 	ump->um_rdonly = ffs_rdonly;
784 	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
785 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
786 	if (fs->fs_sbsize < SBLOCKSIZE)
787 		bp->b_flags |= B_INVAL | B_NOCACHE;
788 	brelse(bp);
789 	bp = NULL;
790 	fs = ump->um_fs;
791 	ffs_oldfscompat_read(fs, ump, sblockloc);
792 	fs->fs_ronly = ronly;
793 	size = fs->fs_cssize;
794 	blks = howmany(size, fs->fs_fsize);
795 	if (fs->fs_contigsumsize > 0)
796 		size += fs->fs_ncg * sizeof(int32_t);
797 	size += fs->fs_ncg * sizeof(u_int8_t);
798 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
799 	fs->fs_csp = space;
800 	for (i = 0; i < blks; i += fs->fs_frag) {
801 		size = fs->fs_bsize;
802 		if (i + fs->fs_frag > blks)
803 			size = (blks - i) * fs->fs_fsize;
804 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
805 		    cred, &bp)) != 0) {
806 			free(fs->fs_csp, M_UFSMNT);
807 			goto out;
808 		}
809 		bcopy(bp->b_data, space, (u_int)size);
810 		space = (char *)space + size;
811 		brelse(bp);
812 		bp = NULL;
813 	}
814 	if (fs->fs_contigsumsize > 0) {
815 		fs->fs_maxcluster = lp = space;
816 		for (i = 0; i < fs->fs_ncg; i++)
817 			*lp++ = fs->fs_contigsumsize;
818 		space = lp;
819 	}
820 	size = fs->fs_ncg * sizeof(u_int8_t);
821 	fs->fs_contigdirs = (u_int8_t *)space;
822 	bzero(fs->fs_contigdirs, size);
823 	fs->fs_active = NULL;
824 	mp->mnt_data = ump;
825 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
826 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
827 	nmp = NULL;
828 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
829 	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
830 		if (nmp)
831 			vfs_rel(nmp);
832 		vfs_getnewfsid(mp);
833 	}
834 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
835 	MNT_ILOCK(mp);
836 	mp->mnt_flag |= MNT_LOCAL;
837 	MNT_IUNLOCK(mp);
838 	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
839 #ifdef MAC
840 		MNT_ILOCK(mp);
841 		mp->mnt_flag |= MNT_MULTILABEL;
842 		MNT_IUNLOCK(mp);
843 #else
844 		printf(
845 "WARNING: %s: multilabel flag on fs but no MAC support\n",
846 		    mp->mnt_stat.f_mntonname);
847 #endif
848 	}
849 	if ((fs->fs_flags & FS_ACLS) != 0) {
850 #ifdef UFS_ACL
851 		MNT_ILOCK(mp);
852 
853 		if (mp->mnt_flag & MNT_NFS4ACLS)
854 			printf("WARNING: ACLs flag on fs conflicts with "
855 			    "\"nfsv4acls\" mount option; option ignored\n");
856 		mp->mnt_flag &= ~MNT_NFS4ACLS;
857 		mp->mnt_flag |= MNT_ACLS;
858 
859 		MNT_IUNLOCK(mp);
860 #else
861 		printf(
862 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
863 		    mp->mnt_stat.f_mntonname);
864 #endif
865 	}
866 	if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
867 #ifdef UFS_ACL
868 		MNT_ILOCK(mp);
869 
870 		if (mp->mnt_flag & MNT_ACLS)
871 			printf("WARNING: NFSv4 ACLs flag on fs conflicts with "
872 			    "\"acls\" mount option; option ignored\n");
873 		mp->mnt_flag &= ~MNT_ACLS;
874 		mp->mnt_flag |= MNT_NFS4ACLS;
875 
876 		MNT_IUNLOCK(mp);
877 #else
878 		printf(
879 "WARNING: %s: NFSv4 ACLs flag on fs but no ACLs support\n",
880 		    mp->mnt_stat.f_mntonname);
881 #endif
882 	}
883 
884 	ump->um_mountp = mp;
885 	ump->um_dev = dev;
886 	ump->um_devvp = devvp;
887 	ump->um_nindir = fs->fs_nindir;
888 	ump->um_bptrtodb = fs->fs_fsbtodb;
889 	ump->um_seqinc = fs->fs_frag;
890 	for (i = 0; i < MAXQUOTAS; i++)
891 		ump->um_quotas[i] = NULLVP;
892 #ifdef UFS_EXTATTR
893 	ufs_extattr_uepm_init(&ump->um_extattr);
894 #endif
895 	/*
896 	 * Set FS local "last mounted on" information (NULL pad)
897 	 */
898 	bzero(fs->fs_fsmnt, MAXMNTLEN);
899 	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
900 
901 	if( mp->mnt_flag & MNT_ROOTFS) {
902 		/*
903 		 * Root mount; update timestamp in mount structure.
904 		 * this will be used by the common root mount code
905 		 * to update the system clock.
906 		 */
907 		mp->mnt_time = fs->fs_time;
908 	}
909 
910 	if (ronly == 0) {
911 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
912 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
913 			free(fs->fs_csp, M_UFSMNT);
914 			goto out;
915 		}
916 		if (fs->fs_snapinum[0] != 0)
917 			ffs_snapshot_mount(mp);
918 		fs->fs_fmod = 1;
919 		fs->fs_clean = 0;
920 		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
921 	}
922 	/*
923 	 * Initialize filesystem stat information in mount struct.
924 	 */
925 	MNT_ILOCK(mp);
926 	mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
927 	    MNTK_EXTENDED_SHARED;
928 	MNT_IUNLOCK(mp);
929 #ifdef UFS_EXTATTR
930 #ifdef UFS_EXTATTR_AUTOSTART
931 	/*
932 	 *
933 	 * Auto-starting does the following:
934 	 *	- check for /.attribute in the fs, and extattr_start if so
935 	 *	- for each file in .attribute, enable that file with
936 	 * 	  an attribute of the same name.
937 	 * Not clear how to report errors -- probably eat them.
938 	 * This would all happen while the filesystem was busy/not
939 	 * available, so would effectively be "atomic".
940 	 */
941 	mp->mnt_stat.f_iosize = fs->fs_bsize;
942 	(void) ufs_extattr_autostart(mp, td);
943 #endif /* !UFS_EXTATTR_AUTOSTART */
944 #endif /* !UFS_EXTATTR */
945 	return (0);
946 out:
947 	if (bp)
948 		brelse(bp);
949 	if (cp != NULL) {
950 		DROP_GIANT();
951 		g_topology_lock();
952 		g_vfs_close(cp);
953 		g_topology_unlock();
954 		PICKUP_GIANT();
955 	}
956 	if (ump) {
957 		mtx_destroy(UFS_MTX(ump));
958 		if (mp->mnt_gjprovider != NULL) {
959 			free(mp->mnt_gjprovider, M_UFSMNT);
960 			mp->mnt_gjprovider = NULL;
961 		}
962 		free(ump->um_fs, M_UFSMNT);
963 		free(ump, M_UFSMNT);
964 		mp->mnt_data = NULL;
965 	}
966 	dev_rel(dev);
967 	return (error);
968 }
969 
970 #include <sys/sysctl.h>
971 static int bigcgs = 0;
972 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
973 
974 /*
975  * Sanity checks for loading old filesystem superblocks.
976  * See ffs_oldfscompat_write below for unwound actions.
977  *
978  * XXX - Parts get retired eventually.
979  * Unfortunately new bits get added.
980  */
981 static void
982 ffs_oldfscompat_read(fs, ump, sblockloc)
983 	struct fs *fs;
984 	struct ufsmount *ump;
985 	ufs2_daddr_t sblockloc;
986 {
987 	off_t maxfilesize;
988 
989 	/*
990 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
991 	 */
992 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
993 		fs->fs_flags = fs->fs_old_flags;
994 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
995 		fs->fs_sblockloc = sblockloc;
996 	}
997 	/*
998 	 * If not yet done, update UFS1 superblock with new wider fields.
999 	 */
1000 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1001 		fs->fs_maxbsize = fs->fs_bsize;
1002 		fs->fs_time = fs->fs_old_time;
1003 		fs->fs_size = fs->fs_old_size;
1004 		fs->fs_dsize = fs->fs_old_dsize;
1005 		fs->fs_csaddr = fs->fs_old_csaddr;
1006 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1007 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1008 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1009 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1010 	}
1011 	if (fs->fs_magic == FS_UFS1_MAGIC &&
1012 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
1013 		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1014 		fs->fs_qbmask = ~fs->fs_bmask;
1015 		fs->fs_qfmask = ~fs->fs_fmask;
1016 	}
1017 	if (fs->fs_magic == FS_UFS1_MAGIC) {
1018 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1019 		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1020 		if (fs->fs_maxfilesize > maxfilesize)
1021 			fs->fs_maxfilesize = maxfilesize;
1022 	}
1023 	/* Compatibility for old filesystems */
1024 	if (fs->fs_avgfilesize <= 0)
1025 		fs->fs_avgfilesize = AVFILESIZ;
1026 	if (fs->fs_avgfpdir <= 0)
1027 		fs->fs_avgfpdir = AFPDIR;
1028 	if (bigcgs) {
1029 		fs->fs_save_cgsize = fs->fs_cgsize;
1030 		fs->fs_cgsize = fs->fs_bsize;
1031 	}
1032 }
1033 
1034 /*
1035  * Unwinding superblock updates for old filesystems.
1036  * See ffs_oldfscompat_read above for details.
1037  *
1038  * XXX - Parts get retired eventually.
1039  * Unfortunately new bits get added.
1040  */
1041 static void
1042 ffs_oldfscompat_write(fs, ump)
1043 	struct fs *fs;
1044 	struct ufsmount *ump;
1045 {
1046 
1047 	/*
1048 	 * Copy back UFS2 updated fields that UFS1 inspects.
1049 	 */
1050 	if (fs->fs_magic == FS_UFS1_MAGIC) {
1051 		fs->fs_old_time = fs->fs_time;
1052 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1053 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1054 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1055 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1056 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1057 	}
1058 	if (bigcgs) {
1059 		fs->fs_cgsize = fs->fs_save_cgsize;
1060 		fs->fs_save_cgsize = 0;
1061 	}
1062 }
1063 
1064 /*
1065  * unmount system call
1066  */
1067 static int
1068 ffs_unmount(mp, mntflags)
1069 	struct mount *mp;
1070 	int mntflags;
1071 {
1072 	struct thread *td;
1073 	struct ufsmount *ump = VFSTOUFS(mp);
1074 	struct fs *fs;
1075 	int error, flags, susp;
1076 #ifdef UFS_EXTATTR
1077 	int e_restart;
1078 #endif
1079 
1080 	flags = 0;
1081 	td = curthread;
1082 	fs = ump->um_fs;
1083 	if (mntflags & MNT_FORCE) {
1084 		flags |= FORCECLOSE;
1085 		susp = fs->fs_ronly != 0;
1086 	} else
1087 		susp = 0;
1088 #ifdef UFS_EXTATTR
1089 	if ((error = ufs_extattr_stop(mp, td))) {
1090 		if (error != EOPNOTSUPP)
1091 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
1092 			    error);
1093 		e_restart = 0;
1094 	} else {
1095 		ufs_extattr_uepm_destroy(&ump->um_extattr);
1096 		e_restart = 1;
1097 	}
1098 #endif
1099 	if (susp) {
1100 		/*
1101 		 * dounmount already called vn_start_write().
1102 		 */
1103 		for (;;) {
1104 			vn_finished_write(mp);
1105 			if ((error = vfs_write_suspend(mp)) != 0)
1106 				return (error);
1107 			MNT_ILOCK(mp);
1108 			if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1109 				mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1110 				    MNTK_SUSPEND2);
1111 				wakeup(&mp->mnt_flag);
1112 				MNT_IUNLOCK(mp);
1113 				td->td_pflags |= TDP_IGNSUSP;
1114 				break;
1115 			}
1116 			MNT_IUNLOCK(mp);
1117 			vn_start_write(NULL, &mp, V_WAIT);
1118 		}
1119 	}
1120 	if (mp->mnt_flag & MNT_SOFTDEP)
1121 		error = softdep_flushfiles(mp, flags, td);
1122 	else
1123 		error = ffs_flushfiles(mp, flags, td);
1124 	if (error != 0 && error != ENXIO)
1125 		goto fail;
1126 
1127 	UFS_LOCK(ump);
1128 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1129 		printf("%s: unmount pending error: blocks %jd files %d\n",
1130 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1131 		    fs->fs_pendinginodes);
1132 		fs->fs_pendingblocks = 0;
1133 		fs->fs_pendinginodes = 0;
1134 	}
1135 	UFS_UNLOCK(ump);
1136 	if (fs->fs_ronly == 0) {
1137 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1138 		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1139 		if (error && error != ENXIO) {
1140 			fs->fs_clean = 0;
1141 			goto fail;
1142 		}
1143 	}
1144 	if (susp) {
1145 		vfs_write_resume(mp);
1146 		vn_start_write(NULL, &mp, V_WAIT);
1147 	}
1148 	DROP_GIANT();
1149 	g_topology_lock();
1150 	g_vfs_close(ump->um_cp);
1151 	g_topology_unlock();
1152 	PICKUP_GIANT();
1153 	vrele(ump->um_devvp);
1154 	dev_rel(ump->um_dev);
1155 	mtx_destroy(UFS_MTX(ump));
1156 	if (mp->mnt_gjprovider != NULL) {
1157 		free(mp->mnt_gjprovider, M_UFSMNT);
1158 		mp->mnt_gjprovider = NULL;
1159 	}
1160 	free(fs->fs_csp, M_UFSMNT);
1161 	free(fs, M_UFSMNT);
1162 	free(ump, M_UFSMNT);
1163 	mp->mnt_data = NULL;
1164 	MNT_ILOCK(mp);
1165 	mp->mnt_flag &= ~MNT_LOCAL;
1166 	MNT_IUNLOCK(mp);
1167 	return (error);
1168 
1169 fail:
1170 	if (susp) {
1171 		vfs_write_resume(mp);
1172 		vn_start_write(NULL, &mp, V_WAIT);
1173 	}
1174 #ifdef UFS_EXTATTR
1175 	if (e_restart) {
1176 		ufs_extattr_uepm_init(&ump->um_extattr);
1177 #ifdef UFS_EXTATTR_AUTOSTART
1178 		(void) ufs_extattr_autostart(mp, td);
1179 #endif
1180 	}
1181 #endif
1182 
1183 	return (error);
1184 }
1185 
1186 /*
1187  * Flush out all the files in a filesystem.
1188  */
1189 int
1190 ffs_flushfiles(mp, flags, td)
1191 	struct mount *mp;
1192 	int flags;
1193 	struct thread *td;
1194 {
1195 	struct ufsmount *ump;
1196 	int error;
1197 
1198 	ump = VFSTOUFS(mp);
1199 #ifdef QUOTA
1200 	if (mp->mnt_flag & MNT_QUOTA) {
1201 		int i;
1202 		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1203 		if (error)
1204 			return (error);
1205 		for (i = 0; i < MAXQUOTAS; i++) {
1206 			quotaoff(td, mp, i);
1207 		}
1208 		/*
1209 		 * Here we fall through to vflush again to ensure
1210 		 * that we have gotten rid of all the system vnodes.
1211 		 */
1212 	}
1213 #endif
1214 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1215 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1216 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1217 			return (error);
1218 		ffs_snapshot_unmount(mp);
1219 		flags |= FORCECLOSE;
1220 		/*
1221 		 * Here we fall through to vflush again to ensure
1222 		 * that we have gotten rid of all the system vnodes.
1223 		 */
1224 	}
1225         /*
1226 	 * Flush all the files.
1227 	 */
1228 	if ((error = vflush(mp, 0, flags, td)) != 0)
1229 		return (error);
1230 	/*
1231 	 * Flush filesystem metadata.
1232 	 */
1233 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1234 	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1235 	VOP_UNLOCK(ump->um_devvp, 0);
1236 	return (error);
1237 }
1238 
1239 /*
1240  * Get filesystem statistics.
1241  */
1242 static int
1243 ffs_statfs(mp, sbp)
1244 	struct mount *mp;
1245 	struct statfs *sbp;
1246 {
1247 	struct ufsmount *ump;
1248 	struct fs *fs;
1249 
1250 	ump = VFSTOUFS(mp);
1251 	fs = ump->um_fs;
1252 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1253 		panic("ffs_statfs");
1254 	sbp->f_version = STATFS_VERSION;
1255 	sbp->f_bsize = fs->fs_fsize;
1256 	sbp->f_iosize = fs->fs_bsize;
1257 	sbp->f_blocks = fs->fs_dsize;
1258 	UFS_LOCK(ump);
1259 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1260 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1261 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1262 	    dbtofsb(fs, fs->fs_pendingblocks);
1263 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1264 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1265 	UFS_UNLOCK(ump);
1266 	sbp->f_namemax = NAME_MAX;
1267 	return (0);
1268 }
1269 
1270 /*
1271  * Go through the disk queues to initiate sandbagged IO;
1272  * go through the inodes to write those that have been modified;
1273  * initiate the writing of the super block if it has been modified.
1274  *
1275  * Note: we are always called with the filesystem marked `MPBUSY'.
1276  */
1277 static int
1278 ffs_sync(mp, waitfor)
1279 	struct mount *mp;
1280 	int waitfor;
1281 {
1282 	struct vnode *mvp, *vp, *devvp;
1283 	struct thread *td;
1284 	struct inode *ip;
1285 	struct ufsmount *ump = VFSTOUFS(mp);
1286 	struct fs *fs;
1287 	int error, count, wait, lockreq, allerror = 0;
1288 	int suspend;
1289 	int suspended;
1290 	int secondary_writes;
1291 	int secondary_accwrites;
1292 	int softdep_deps;
1293 	int softdep_accdeps;
1294 	struct bufobj *bo;
1295 
1296 	td = curthread;
1297 	fs = ump->um_fs;
1298 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1299 		printf("fs = %s\n", fs->fs_fsmnt);
1300 		panic("ffs_sync: rofs mod");
1301 	}
1302 	/*
1303 	 * Write back each (modified) inode.
1304 	 */
1305 	wait = 0;
1306 	suspend = 0;
1307 	suspended = 0;
1308 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1309 	if (waitfor == MNT_SUSPEND) {
1310 		suspend = 1;
1311 		waitfor = MNT_WAIT;
1312 	}
1313 	if (waitfor == MNT_WAIT) {
1314 		wait = 1;
1315 		lockreq = LK_EXCLUSIVE;
1316 	}
1317 	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1318 	MNT_ILOCK(mp);
1319 loop:
1320 	/* Grab snapshot of secondary write counts */
1321 	secondary_writes = mp->mnt_secondary_writes;
1322 	secondary_accwrites = mp->mnt_secondary_accwrites;
1323 
1324 	/* Grab snapshot of softdep dependency counts */
1325 	MNT_IUNLOCK(mp);
1326 	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1327 	MNT_ILOCK(mp);
1328 
1329 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1330 		/*
1331 		 * Depend on the mntvnode_slock to keep things stable enough
1332 		 * for a quick test.  Since there might be hundreds of
1333 		 * thousands of vnodes, we cannot afford even a subroutine
1334 		 * call unless there's a good chance that we have work to do.
1335 		 */
1336 		VI_LOCK(vp);
1337 		if (vp->v_iflag & VI_DOOMED) {
1338 			VI_UNLOCK(vp);
1339 			continue;
1340 		}
1341 		ip = VTOI(vp);
1342 		if (vp->v_type == VNON || ((ip->i_flag &
1343 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1344 		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1345 			VI_UNLOCK(vp);
1346 			continue;
1347 		}
1348 		MNT_IUNLOCK(mp);
1349 		if ((error = vget(vp, lockreq, td)) != 0) {
1350 			MNT_ILOCK(mp);
1351 			if (error == ENOENT || error == ENOLCK) {
1352 				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1353 				goto loop;
1354 			}
1355 			continue;
1356 		}
1357 		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1358 			allerror = error;
1359 		vput(vp);
1360 		MNT_ILOCK(mp);
1361 	}
1362 	MNT_IUNLOCK(mp);
1363 	/*
1364 	 * Force stale filesystem control information to be flushed.
1365 	 */
1366 	if (waitfor == MNT_WAIT) {
1367 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1368 			allerror = error;
1369 		/* Flushed work items may create new vnodes to clean */
1370 		if (allerror == 0 && count) {
1371 			MNT_ILOCK(mp);
1372 			goto loop;
1373 		}
1374 	}
1375 #ifdef QUOTA
1376 	qsync(mp);
1377 #endif
1378 	devvp = ump->um_devvp;
1379 	bo = &devvp->v_bufobj;
1380 	BO_LOCK(bo);
1381 	if (waitfor != MNT_LAZY &&
1382 	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1383 		BO_UNLOCK(bo);
1384 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1385 		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1386 			allerror = error;
1387 		VOP_UNLOCK(devvp, 0);
1388 		if (allerror == 0 && waitfor == MNT_WAIT) {
1389 			MNT_ILOCK(mp);
1390 			goto loop;
1391 		}
1392 	} else if (suspend != 0) {
1393 		if (softdep_check_suspend(mp,
1394 					  devvp,
1395 					  softdep_deps,
1396 					  softdep_accdeps,
1397 					  secondary_writes,
1398 					  secondary_accwrites) != 0)
1399 			goto loop;	/* More work needed */
1400 		mtx_assert(MNT_MTX(mp), MA_OWNED);
1401 		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1402 		MNT_IUNLOCK(mp);
1403 		suspended = 1;
1404 	} else
1405 		BO_UNLOCK(bo);
1406 	/*
1407 	 * Write back modified superblock.
1408 	 */
1409 	if (fs->fs_fmod != 0 &&
1410 	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1411 		allerror = error;
1412 	return (allerror);
1413 }
1414 
1415 int
1416 ffs_vget(mp, ino, flags, vpp)
1417 	struct mount *mp;
1418 	ino_t ino;
1419 	int flags;
1420 	struct vnode **vpp;
1421 {
1422 	return (ffs_vgetf(mp, ino, flags, vpp, 0));
1423 }
1424 
1425 int
1426 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1427 	struct mount *mp;
1428 	ino_t ino;
1429 	int flags;
1430 	struct vnode **vpp;
1431 	int ffs_flags;
1432 {
1433 	struct fs *fs;
1434 	struct inode *ip;
1435 	struct ufsmount *ump;
1436 	struct buf *bp;
1437 	struct vnode *vp;
1438 	struct cdev *dev;
1439 	int error;
1440 
1441 	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1442 	if (error || *vpp != NULL)
1443 		return (error);
1444 
1445 	/*
1446 	 * We must promote to an exclusive lock for vnode creation.  This
1447 	 * can happen if lookup is passed LOCKSHARED.
1448  	 */
1449 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1450 		flags &= ~LK_TYPE_MASK;
1451 		flags |= LK_EXCLUSIVE;
1452 	}
1453 
1454 	/*
1455 	 * We do not lock vnode creation as it is believed to be too
1456 	 * expensive for such rare case as simultaneous creation of vnode
1457 	 * for same ino by different processes. We just allow them to race
1458 	 * and check later to decide who wins. Let the race begin!
1459 	 */
1460 
1461 	ump = VFSTOUFS(mp);
1462 	dev = ump->um_dev;
1463 	fs = ump->um_fs;
1464 
1465 	/*
1466 	 * If this malloc() is performed after the getnewvnode()
1467 	 * it might block, leaving a vnode with a NULL v_data to be
1468 	 * found by ffs_sync() if a sync happens to fire right then,
1469 	 * which will cause a panic because ffs_sync() blindly
1470 	 * dereferences vp->v_data (as well it should).
1471 	 */
1472 	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1473 
1474 	/* Allocate a new vnode/inode. */
1475 	if (fs->fs_magic == FS_UFS1_MAGIC)
1476 		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1477 	else
1478 		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1479 	if (error) {
1480 		*vpp = NULL;
1481 		uma_zfree(uma_inode, ip);
1482 		return (error);
1483 	}
1484 	/*
1485 	 * FFS supports recursive locking.
1486 	 */
1487 	VN_LOCK_AREC(vp);
1488 	vp->v_data = ip;
1489 	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1490 	ip->i_vnode = vp;
1491 	ip->i_ump = ump;
1492 	ip->i_fs = fs;
1493 	ip->i_dev = dev;
1494 	ip->i_number = ino;
1495 	ip->i_ea_refs = 0;
1496 #ifdef QUOTA
1497 	{
1498 		int i;
1499 		for (i = 0; i < MAXQUOTAS; i++)
1500 			ip->i_dquot[i] = NODQUOT;
1501 	}
1502 #endif
1503 
1504 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1505 	if (ffs_flags & FFSV_FORCEINSMQ)
1506 		vp->v_vflag |= VV_FORCEINSMQ;
1507 	error = insmntque(vp, mp);
1508 	if (error != 0) {
1509 		uma_zfree(uma_inode, ip);
1510 		*vpp = NULL;
1511 		return (error);
1512 	}
1513 	vp->v_vflag &= ~VV_FORCEINSMQ;
1514 	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1515 	if (error || *vpp != NULL)
1516 		return (error);
1517 
1518 	/* Read in the disk contents for the inode, copy into the inode. */
1519 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1520 	    (int)fs->fs_bsize, NOCRED, &bp);
1521 	if (error) {
1522 		/*
1523 		 * The inode does not contain anything useful, so it would
1524 		 * be misleading to leave it on its hash chain. With mode
1525 		 * still zero, it will be unlinked and returned to the free
1526 		 * list by vput().
1527 		 */
1528 		brelse(bp);
1529 		vput(vp);
1530 		*vpp = NULL;
1531 		return (error);
1532 	}
1533 	if (ip->i_ump->um_fstype == UFS1)
1534 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1535 	else
1536 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1537 	ffs_load_inode(bp, ip, fs, ino);
1538 	if (DOINGSOFTDEP(vp))
1539 		softdep_load_inodeblock(ip);
1540 	else
1541 		ip->i_effnlink = ip->i_nlink;
1542 	bqrelse(bp);
1543 
1544 	/*
1545 	 * Initialize the vnode from the inode, check for aliases.
1546 	 * Note that the underlying vnode may have changed.
1547 	 */
1548 	if (ip->i_ump->um_fstype == UFS1)
1549 		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1550 	else
1551 		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1552 	if (error) {
1553 		vput(vp);
1554 		*vpp = NULL;
1555 		return (error);
1556 	}
1557 
1558 	/*
1559 	 * Finish inode initialization.
1560 	 */
1561 	if (vp->v_type != VFIFO) {
1562 		/* FFS supports shared locking for all files except fifos. */
1563 		VN_LOCK_ASHARE(vp);
1564 	}
1565 
1566 	/*
1567 	 * Set up a generation number for this inode if it does not
1568 	 * already have one. This should only happen on old filesystems.
1569 	 */
1570 	if (ip->i_gen == 0) {
1571 		ip->i_gen = arc4random() / 2 + 1;
1572 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1573 			ip->i_flag |= IN_MODIFIED;
1574 			DIP_SET(ip, i_gen, ip->i_gen);
1575 		}
1576 	}
1577 	/*
1578 	 * Ensure that uid and gid are correct. This is a temporary
1579 	 * fix until fsck has been changed to do the update.
1580 	 */
1581 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1582 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1583 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1584 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1585 	}						/* XXX */
1586 
1587 #ifdef MAC
1588 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1589 		/*
1590 		 * If this vnode is already allocated, and we're running
1591 		 * multi-label, attempt to perform a label association
1592 		 * from the extended attributes on the inode.
1593 		 */
1594 		error = mac_vnode_associate_extattr(mp, vp);
1595 		if (error) {
1596 			/* ufs_inactive will release ip->i_devvp ref. */
1597 			vput(vp);
1598 			*vpp = NULL;
1599 			return (error);
1600 		}
1601 	}
1602 #endif
1603 
1604 	*vpp = vp;
1605 	return (0);
1606 }
1607 
1608 /*
1609  * File handle to vnode
1610  *
1611  * Have to be really careful about stale file handles:
1612  * - check that the inode number is valid
1613  * - call ffs_vget() to get the locked inode
1614  * - check for an unallocated inode (i_mode == 0)
1615  * - check that the given client host has export rights and return
1616  *   those rights via. exflagsp and credanonp
1617  */
1618 static int
1619 ffs_fhtovp(mp, fhp, vpp)
1620 	struct mount *mp;
1621 	struct fid *fhp;
1622 	struct vnode **vpp;
1623 {
1624 	struct ufid *ufhp;
1625 	struct fs *fs;
1626 
1627 	ufhp = (struct ufid *)fhp;
1628 	fs = VFSTOUFS(mp)->um_fs;
1629 	if (ufhp->ufid_ino < ROOTINO ||
1630 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1631 		return (ESTALE);
1632 	return (ufs_fhtovp(mp, ufhp, vpp));
1633 }
1634 
1635 /*
1636  * Initialize the filesystem.
1637  */
1638 static int
1639 ffs_init(vfsp)
1640 	struct vfsconf *vfsp;
1641 {
1642 
1643 	softdep_initialize();
1644 	return (ufs_init(vfsp));
1645 }
1646 
1647 /*
1648  * Undo the work of ffs_init().
1649  */
1650 static int
1651 ffs_uninit(vfsp)
1652 	struct vfsconf *vfsp;
1653 {
1654 	int ret;
1655 
1656 	ret = ufs_uninit(vfsp);
1657 	softdep_uninitialize();
1658 	return (ret);
1659 }
1660 
1661 /*
1662  * Write a superblock and associated information back to disk.
1663  */
1664 int
1665 ffs_sbupdate(mp, waitfor, suspended)
1666 	struct ufsmount *mp;
1667 	int waitfor;
1668 	int suspended;
1669 {
1670 	struct fs *fs = mp->um_fs;
1671 	struct buf *sbbp;
1672 	struct buf *bp;
1673 	int blks;
1674 	void *space;
1675 	int i, size, error, allerror = 0;
1676 
1677 	if (fs->fs_ronly == 1 &&
1678 	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1679 	    (MNT_RDONLY | MNT_UPDATE))
1680 		panic("ffs_sbupdate: write read-only filesystem");
1681 	/*
1682 	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1683 	 */
1684 	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1685 	    0, 0, 0);
1686 	/*
1687 	 * First write back the summary information.
1688 	 */
1689 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1690 	space = fs->fs_csp;
1691 	for (i = 0; i < blks; i += fs->fs_frag) {
1692 		size = fs->fs_bsize;
1693 		if (i + fs->fs_frag > blks)
1694 			size = (blks - i) * fs->fs_fsize;
1695 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1696 		    size, 0, 0, 0);
1697 		bcopy(space, bp->b_data, (u_int)size);
1698 		space = (char *)space + size;
1699 		if (suspended)
1700 			bp->b_flags |= B_VALIDSUSPWRT;
1701 		if (waitfor != MNT_WAIT)
1702 			bawrite(bp);
1703 		else if ((error = bwrite(bp)) != 0)
1704 			allerror = error;
1705 	}
1706 	/*
1707 	 * Now write back the superblock itself. If any errors occurred
1708 	 * up to this point, then fail so that the superblock avoids
1709 	 * being written out as clean.
1710 	 */
1711 	if (allerror) {
1712 		brelse(sbbp);
1713 		return (allerror);
1714 	}
1715 	bp = sbbp;
1716 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1717 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1718 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1719 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1720 		fs->fs_sblockloc = SBLOCK_UFS1;
1721 	}
1722 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1723 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1724 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1725 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1726 		fs->fs_sblockloc = SBLOCK_UFS2;
1727 	}
1728 	fs->fs_fmod = 0;
1729 	fs->fs_time = time_second;
1730 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1731 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1732 	if (suspended)
1733 		bp->b_flags |= B_VALIDSUSPWRT;
1734 	if (waitfor != MNT_WAIT)
1735 		bawrite(bp);
1736 	else if ((error = bwrite(bp)) != 0)
1737 		allerror = error;
1738 	return (allerror);
1739 }
1740 
1741 static int
1742 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1743 	int attrnamespace, const char *attrname)
1744 {
1745 
1746 #ifdef UFS_EXTATTR
1747 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1748 	    attrname));
1749 #else
1750 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1751 	    attrname));
1752 #endif
1753 }
1754 
1755 static void
1756 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1757 {
1758 
1759 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1760 		uma_zfree(uma_ufs1, ip->i_din1);
1761 	else if (ip->i_din2 != NULL)
1762 		uma_zfree(uma_ufs2, ip->i_din2);
1763 	uma_zfree(uma_inode, ip);
1764 }
1765 
1766 static int dobkgrdwrite = 1;
1767 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1768     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1769 
1770 /*
1771  * Complete a background write started from bwrite.
1772  */
1773 static void
1774 ffs_backgroundwritedone(struct buf *bp)
1775 {
1776 	struct bufobj *bufobj;
1777 	struct buf *origbp;
1778 
1779 	/*
1780 	 * Find the original buffer that we are writing.
1781 	 */
1782 	bufobj = bp->b_bufobj;
1783 	BO_LOCK(bufobj);
1784 	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1785 		panic("backgroundwritedone: lost buffer");
1786 	/* Grab an extra reference to be dropped by the bufdone() below. */
1787 	bufobj_wrefl(bufobj);
1788 	BO_UNLOCK(bufobj);
1789 	/*
1790 	 * Process dependencies then return any unfinished ones.
1791 	 */
1792 	if (!LIST_EMPTY(&bp->b_dep))
1793 		buf_complete(bp);
1794 #ifdef SOFTUPDATES
1795 	if (!LIST_EMPTY(&bp->b_dep))
1796 		softdep_move_dependencies(bp, origbp);
1797 #endif
1798 	/*
1799 	 * This buffer is marked B_NOCACHE so when it is released
1800 	 * by biodone it will be tossed.
1801 	 */
1802 	bp->b_flags |= B_NOCACHE;
1803 	bp->b_flags &= ~B_CACHE;
1804 	bufdone(bp);
1805 	BO_LOCK(bufobj);
1806 	/*
1807 	 * Clear the BV_BKGRDINPROG flag in the original buffer
1808 	 * and awaken it if it is waiting for the write to complete.
1809 	 * If BV_BKGRDINPROG is not set in the original buffer it must
1810 	 * have been released and re-instantiated - which is not legal.
1811 	 */
1812 	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1813 	    ("backgroundwritedone: lost buffer2"));
1814 	origbp->b_vflags &= ~BV_BKGRDINPROG;
1815 	if (origbp->b_vflags & BV_BKGRDWAIT) {
1816 		origbp->b_vflags &= ~BV_BKGRDWAIT;
1817 		wakeup(&origbp->b_xflags);
1818 	}
1819 	BO_UNLOCK(bufobj);
1820 }
1821 
1822 
1823 /*
1824  * Write, release buffer on completion.  (Done by iodone
1825  * if async).  Do not bother writing anything if the buffer
1826  * is invalid.
1827  *
1828  * Note that we set B_CACHE here, indicating that buffer is
1829  * fully valid and thus cacheable.  This is true even of NFS
1830  * now so we set it generally.  This could be set either here
1831  * or in biodone() since the I/O is synchronous.  We put it
1832  * here.
1833  */
1834 static int
1835 ffs_bufwrite(struct buf *bp)
1836 {
1837 	int oldflags, s;
1838 	struct buf *newbp;
1839 
1840 	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1841 	if (bp->b_flags & B_INVAL) {
1842 		brelse(bp);
1843 		return (0);
1844 	}
1845 
1846 	oldflags = bp->b_flags;
1847 
1848 	if (!BUF_ISLOCKED(bp))
1849 		panic("bufwrite: buffer is not busy???");
1850 	s = splbio();
1851 	/*
1852 	 * If a background write is already in progress, delay
1853 	 * writing this block if it is asynchronous. Otherwise
1854 	 * wait for the background write to complete.
1855 	 */
1856 	BO_LOCK(bp->b_bufobj);
1857 	if (bp->b_vflags & BV_BKGRDINPROG) {
1858 		if (bp->b_flags & B_ASYNC) {
1859 			BO_UNLOCK(bp->b_bufobj);
1860 			splx(s);
1861 			bdwrite(bp);
1862 			return (0);
1863 		}
1864 		bp->b_vflags |= BV_BKGRDWAIT;
1865 		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1866 		if (bp->b_vflags & BV_BKGRDINPROG)
1867 			panic("bufwrite: still writing");
1868 	}
1869 	BO_UNLOCK(bp->b_bufobj);
1870 
1871 	/* Mark the buffer clean */
1872 	bundirty(bp);
1873 
1874 	/*
1875 	 * If this buffer is marked for background writing and we
1876 	 * do not have to wait for it, make a copy and write the
1877 	 * copy so as to leave this buffer ready for further use.
1878 	 *
1879 	 * This optimization eats a lot of memory.  If we have a page
1880 	 * or buffer shortfall we can't do it.
1881 	 */
1882 	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1883 	    (bp->b_flags & B_ASYNC) &&
1884 	    !vm_page_count_severe() &&
1885 	    !buf_dirty_count_severe()) {
1886 		KASSERT(bp->b_iodone == NULL,
1887 		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1888 
1889 		/* get a new block */
1890 		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
1891 		if (newbp == NULL)
1892 			goto normal_write;
1893 
1894 		/*
1895 		 * set it to be identical to the old block.  We have to
1896 		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1897 		 * to avoid confusing the splay tree and gbincore().
1898 		 */
1899 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1900 		newbp->b_lblkno = bp->b_lblkno;
1901 		newbp->b_xflags |= BX_BKGRDMARKER;
1902 		BO_LOCK(bp->b_bufobj);
1903 		bp->b_vflags |= BV_BKGRDINPROG;
1904 		bgetvp(bp->b_vp, newbp);
1905 		BO_UNLOCK(bp->b_bufobj);
1906 		newbp->b_bufobj = &bp->b_vp->v_bufobj;
1907 		newbp->b_blkno = bp->b_blkno;
1908 		newbp->b_offset = bp->b_offset;
1909 		newbp->b_iodone = ffs_backgroundwritedone;
1910 		newbp->b_flags |= B_ASYNC;
1911 		newbp->b_flags &= ~B_INVAL;
1912 
1913 #ifdef SOFTUPDATES
1914 		/* move over the dependencies */
1915 		if (!LIST_EMPTY(&bp->b_dep))
1916 			softdep_move_dependencies(bp, newbp);
1917 #endif
1918 
1919 		/*
1920 		 * Initiate write on the copy, release the original to
1921 		 * the B_LOCKED queue so that it cannot go away until
1922 		 * the background write completes. If not locked it could go
1923 		 * away and then be reconstituted while it was being written.
1924 		 * If the reconstituted buffer were written, we could end up
1925 		 * with two background copies being written at the same time.
1926 		 */
1927 		bqrelse(bp);
1928 		bp = newbp;
1929 	}
1930 
1931 	/* Let the normal bufwrite do the rest for us */
1932 normal_write:
1933 	return (bufwrite(bp));
1934 }
1935 
1936 
1937 static void
1938 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1939 {
1940 	struct vnode *vp;
1941 	int error;
1942 	struct buf *tbp;
1943 
1944 	vp = bo->__bo_vnode;
1945 	if (bp->b_iocmd == BIO_WRITE) {
1946 		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
1947 		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
1948 		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
1949 			panic("ffs_geom_strategy: bad I/O");
1950 		bp->b_flags &= ~B_VALIDSUSPWRT;
1951 		if ((vp->v_vflag & VV_COPYONWRITE) &&
1952 		    vp->v_rdev->si_snapdata != NULL) {
1953 			if ((bp->b_flags & B_CLUSTER) != 0) {
1954 				runningbufwakeup(bp);
1955 				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1956 					      b_cluster.cluster_entry) {
1957 					error = ffs_copyonwrite(vp, tbp);
1958 					if (error != 0 &&
1959 					    error != EOPNOTSUPP) {
1960 						bp->b_error = error;
1961 						bp->b_ioflags |= BIO_ERROR;
1962 						bufdone(bp);
1963 						return;
1964 					}
1965 				}
1966 				bp->b_runningbufspace = bp->b_bufsize;
1967 				atomic_add_long(&runningbufspace,
1968 					       bp->b_runningbufspace);
1969 			} else {
1970 				error = ffs_copyonwrite(vp, bp);
1971 				if (error != 0 && error != EOPNOTSUPP) {
1972 					bp->b_error = error;
1973 					bp->b_ioflags |= BIO_ERROR;
1974 					bufdone(bp);
1975 					return;
1976 				}
1977 			}
1978 		}
1979 #ifdef SOFTUPDATES
1980 		if ((bp->b_flags & B_CLUSTER) != 0) {
1981 			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
1982 				      b_cluster.cluster_entry) {
1983 				if (!LIST_EMPTY(&tbp->b_dep))
1984 					buf_start(tbp);
1985 			}
1986 		} else {
1987 			if (!LIST_EMPTY(&bp->b_dep))
1988 				buf_start(bp);
1989 		}
1990 
1991 #endif
1992 	}
1993 	g_vfs_strategy(bo, bp);
1994 }
1995 
1996 #ifdef	DDB
1997 
1998 static void
1999 db_print_ffs(struct ufsmount *ump)
2000 {
2001 	db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d "
2002 		  "su_req %d\n",
2003 	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
2004 	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
2005 	    ump->softdep_on_worklist_inprogress, ump->softdep_deps,
2006 	    ump->softdep_req);
2007 }
2008 
2009 DB_SHOW_COMMAND(ffs, db_show_ffs)
2010 {
2011 	struct mount *mp;
2012 	struct ufsmount *ump;
2013 
2014 	if (have_addr) {
2015 		ump = VFSTOUFS((struct mount *)addr);
2016 		db_print_ffs(ump);
2017 		return;
2018 	}
2019 
2020 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2021 		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2022 			db_print_ffs(VFSTOUFS(mp));
2023 	}
2024 }
2025 
2026 #endif	/* DDB */
2027