xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision b52b9d56d4e96089873a75f9e29062eec19fabba)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_quota.h"
38 #include "opt_ufs.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/stdint.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/disk.h>
53 #include <sys/malloc.h>
54 #include <sys/mutex.h>
55 
56 #include <ufs/ufs/extattr.h>
57 #include <ufs/ufs/quota.h>
58 #include <ufs/ufs/ufsmount.h>
59 #include <ufs/ufs/inode.h>
60 #include <ufs/ufs/ufs_extern.h>
61 
62 #include <ufs/ffs/fs.h>
63 #include <ufs/ffs/ffs_extern.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_page.h>
67 
68 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
69 
70 static int	ffs_sbupdate(struct ufsmount *, int);
71        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
72 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
73 		    ufs2_daddr_t);
74 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
75 static int	ffs_init(struct vfsconf *);
76 static int	ffs_uninit(struct vfsconf *);
77 
78 static struct vfsops ufs_vfsops = {
79 	ffs_mount,
80 	ufs_start,
81 	ffs_unmount,
82 	ufs_root,
83 	ufs_quotactl,
84 	ffs_statfs,
85 	ffs_sync,
86 	ffs_vget,
87 	ffs_fhtovp,
88 	vfs_stdcheckexp,
89 	ffs_vptofh,
90 	ffs_init,
91 	ffs_uninit,
92 #ifdef UFS_EXTATTR
93 	ufs_extattrctl,
94 #else
95 	vfs_stdextattrctl,
96 #endif
97 };
98 
99 VFS_SET(ufs_vfsops, ufs, 0);
100 
101 /*
102  * ffs_mount
103  *
104  * Called when mounting local physical media
105  *
106  * PARAMETERS:
107  *		mountroot
108  *			mp	mount point structure
109  *			path	NULL (flag for root mount!!!)
110  *			data	<unused>
111  *			ndp	<unused>
112  *			p	process (user credentials check [statfs])
113  *
114  *		mount
115  *			mp	mount point structure
116  *			path	path to mount point
117  *			data	pointer to argument struct in user space
118  *			ndp	mount point namei() return (used for
119  *				credentials on reload), reused to look
120  *				up block device.
121  *			p	process (user credentials check)
122  *
123  * RETURNS:	0	Success
124  *		!0	error number (errno.h)
125  *
126  * LOCK STATE:
127  *
128  *		ENTRY
129  *			mount point is locked
130  *		EXIT
131  *			mount point is locked
132  *
133  * NOTES:
134  *		A NULL path can be used for a flag since the mount
135  *		system call will fail with EFAULT in copyinstr in
136  *		namei() if it is a genuine NULL from the user.
137  */
138 int
139 ffs_mount(mp, path, data, ndp, td)
140         struct mount		*mp;	/* mount struct pointer*/
141         char			*path;	/* path to mount point*/
142         caddr_t			data;	/* arguments to FS specific mount*/
143         struct nameidata	*ndp;	/* mount point credentials*/
144         struct thread		*td;	/* process requesting mount*/
145 {
146 	size_t size;
147 	struct vnode *devvp;
148 	struct ufs_args args;
149 	struct ufsmount *ump = 0;
150 	struct fs *fs;
151 	int error, flags;
152 	mode_t accessmode;
153 
154 	/*
155 	 * Use NULL path to indicate we are mounting the root filesystem.
156 	 */
157 	if (path == NULL) {
158 		if ((error = bdevvp(rootdev, &rootvp))) {
159 			printf("ffs_mountroot: can't find rootvp\n");
160 			return (error);
161 		}
162 
163 		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
164 			return (error);
165 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
166 		return (0);
167 	}
168 
169 	/*
170 	 * Mounting non-root filesystem or updating a filesystem
171 	 */
172 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
173 		return (error);
174 
175 	/*
176 	 * If updating, check whether changing from read-only to
177 	 * read/write; if there is no device name, that's all we do.
178 	 */
179 	if (mp->mnt_flag & MNT_UPDATE) {
180 		ump = VFSTOUFS(mp);
181 		fs = ump->um_fs;
182 		devvp = ump->um_devvp;
183 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
184 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
185 				return (error);
186 			/*
187 			 * Flush any dirty data.
188 			 */
189 			VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td);
190 			/*
191 			 * Check for and optionally get rid of files open
192 			 * for writing.
193 			 */
194 			flags = WRITECLOSE;
195 			if (mp->mnt_flag & MNT_FORCE)
196 				flags |= FORCECLOSE;
197 			if (mp->mnt_flag & MNT_SOFTDEP) {
198 				error = softdep_flushfiles(mp, flags, td);
199 			} else {
200 				error = ffs_flushfiles(mp, flags, td);
201 			}
202 			if (error) {
203 				vn_finished_write(mp);
204 				return (error);
205 			}
206 			if (fs->fs_pendingblocks != 0 ||
207 			    fs->fs_pendinginodes != 0) {
208 				printf("%s: %s: blocks %jd files %d\n",
209 				    fs->fs_fsmnt, "update error",
210 				    (intmax_t)fs->fs_pendingblocks,
211 				    fs->fs_pendinginodes);
212 				fs->fs_pendingblocks = 0;
213 				fs->fs_pendinginodes = 0;
214 			}
215 			fs->fs_ronly = 1;
216 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
217 				fs->fs_clean = 1;
218 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
219 				fs->fs_ronly = 0;
220 				fs->fs_clean = 0;
221 				vn_finished_write(mp);
222 				return (error);
223 			}
224 			vn_finished_write(mp);
225 		}
226 		if ((mp->mnt_flag & MNT_RELOAD) &&
227 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
228 			return (error);
229 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
230 			/*
231 			 * If upgrade to read-write by non-root, then verify
232 			 * that user has necessary permissions on the device.
233 			 */
234 			if (suser(td)) {
235 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
236 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
237 				    td->td_ucred, td)) != 0) {
238 					VOP_UNLOCK(devvp, 0, td);
239 					return (error);
240 				}
241 				VOP_UNLOCK(devvp, 0, td);
242 			}
243 			fs->fs_flags &= ~FS_UNCLEAN;
244 			if (fs->fs_clean == 0) {
245 				fs->fs_flags |= FS_UNCLEAN;
246 				if ((mp->mnt_flag & MNT_FORCE) ||
247 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
248 				     (fs->fs_flags & FS_DOSOFTDEP))) {
249 					printf("WARNING: %s was not %s\n",
250 					   fs->fs_fsmnt, "properly dismounted");
251 				} else {
252 					printf(
253 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
254 					    fs->fs_fsmnt);
255 					return (EPERM);
256 				}
257 			}
258 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
259 				return (error);
260 			fs->fs_ronly = 0;
261 			fs->fs_clean = 0;
262 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
263 				vn_finished_write(mp);
264 				return (error);
265 			}
266 			/* check to see if we need to start softdep */
267 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
268 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
269 				vn_finished_write(mp);
270 				return (error);
271 			}
272 			if (fs->fs_snapinum[0] != 0)
273 				ffs_snapshot_mount(mp);
274 			vn_finished_write(mp);
275 		}
276 		/*
277 		 * Soft updates is incompatible with "async",
278 		 * so if we are doing softupdates stop the user
279 		 * from setting the async flag in an update.
280 		 * Softdep_mount() clears it in an initial mount
281 		 * or ro->rw remount.
282 		 */
283 		if (mp->mnt_flag & MNT_SOFTDEP)
284 			mp->mnt_flag &= ~MNT_ASYNC;
285 		/*
286 		 * If not updating name, process export requests.
287 		 */
288 		if (args.fspec == 0)
289 			return (vfs_export(mp, &args.export));
290 		/*
291 		 * If this is a snapshot request, take the snapshot.
292 		 */
293 		if (mp->mnt_flag & MNT_SNAPSHOT)
294 			return (ffs_snapshot(mp, args.fspec));
295 	}
296 
297 	/*
298 	 * Not an update, or updating the name: look up the name
299 	 * and verify that it refers to a sensible block device.
300 	 */
301 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
302 	if ((error = namei(ndp)) != 0)
303 		return (error);
304 	NDFREE(ndp, NDF_ONLY_PNBUF);
305 	devvp = ndp->ni_vp;
306 	if (!vn_isdisk(devvp, &error)) {
307 		vrele(devvp);
308 		return (error);
309 	}
310 
311 	/*
312 	 * If mount by non-root, then verify that user has necessary
313 	 * permissions on the device.
314 	 */
315 	if (suser(td)) {
316 		accessmode = VREAD;
317 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
318 			accessmode |= VWRITE;
319 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
320 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
321 			vput(devvp);
322 			return (error);
323 		}
324 		VOP_UNLOCK(devvp, 0, td);
325 	}
326 
327 	if (mp->mnt_flag & MNT_UPDATE) {
328 		/*
329 		 * Update only
330 		 *
331 		 * If it's not the same vnode, or at least the same device
332 		 * then it's not correct.
333 		 */
334 
335 		if (devvp != ump->um_devvp &&
336 		    devvp->v_rdev != ump->um_devvp->v_rdev)
337 			error = EINVAL;	/* needs translation */
338 		vrele(devvp);
339 		if (error)
340 			return (error);
341 	} else {
342 		/*
343 		 * New mount
344 		 *
345 		 * We need the name for the mount point (also used for
346 		 * "last mounted on") copied in. If an error occurs,
347 		 * the mount point is discarded by the upper level code.
348 		 * Note that vfs_mount() populates f_mntonname for us.
349 		 */
350 		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
351 			vrele(devvp);
352 			return (error);
353 		}
354 	}
355 	/*
356 	 * Save "mounted from" device name info for mount point (NULL pad).
357 	 */
358 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
359 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
360 	/*
361 	 * Initialize filesystem stat information in mount struct.
362 	 */
363 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
364 	return (0);
365 }
366 
367 /*
368  * Reload all incore data for a filesystem (used after running fsck on
369  * the root filesystem and finding things to fix). The filesystem must
370  * be mounted read-only.
371  *
372  * Things to do to update the mount:
373  *	1) invalidate all cached meta-data.
374  *	2) re-read superblock from disk.
375  *	3) re-read summary information from disk.
376  *	4) invalidate all inactive vnodes.
377  *	5) invalidate all cached file data.
378  *	6) re-read inode data for all active vnodes.
379  */
380 int
381 ffs_reload(mp, cred, td)
382 	struct mount *mp;
383 	struct ucred *cred;
384 	struct thread *td;
385 {
386 	struct vnode *vp, *nvp, *devvp;
387 	struct inode *ip;
388 	void *space;
389 	struct buf *bp;
390 	struct fs *fs, *newfs;
391 	dev_t dev;
392 	ufs2_daddr_t sblockloc;
393 	int i, blks, size, error;
394 	int32_t *lp;
395 
396 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
397 		return (EINVAL);
398 	/*
399 	 * Step 1: invalidate all cached meta-data.
400 	 */
401 	devvp = VFSTOUFS(mp)->um_devvp;
402 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
403 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
404 	VOP_UNLOCK(devvp, 0, td);
405 	if (error)
406 		panic("ffs_reload: dirty1");
407 
408 	dev = devvp->v_rdev;
409 
410 	/*
411 	 * Only VMIO the backing device if the backing device is a real
412 	 * block device.
413 	 */
414 	if (vn_isdisk(devvp, NULL)) {
415 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
416 		vfs_object_create(devvp, td, td->td_ucred);
417 		mtx_lock(&devvp->v_interlock);
418 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
419 	}
420 
421 	/*
422 	 * Step 2: re-read superblock from disk.
423 	 */
424 	fs = VFSTOUFS(mp)->um_fs;
425 	if ((error = bread(devvp, fsbtodb(fs, fs->fs_sblockloc), fs->fs_sbsize,
426 	    NOCRED, &bp)) != 0)
427 		return (error);
428 	newfs = (struct fs *)bp->b_data;
429 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
430 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
431 	    newfs->fs_bsize > MAXBSIZE ||
432 	    newfs->fs_bsize < sizeof(struct fs)) {
433 			brelse(bp);
434 			return (EIO);		/* XXX needs translation */
435 	}
436 	/*
437 	 * Copy pointer fields back into superblock before copying in	XXX
438 	 * new superblock. These should really be in the ufsmount.	XXX
439 	 * Note that important parameters (eg fs_ncg) are unchanged.
440 	 */
441 	newfs->fs_csp = fs->fs_csp;
442 	newfs->fs_maxcluster = fs->fs_maxcluster;
443 	newfs->fs_contigdirs = fs->fs_contigdirs;
444 	newfs->fs_active = fs->fs_active;
445 	sblockloc = fs->fs_sblockloc;
446 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
447 	brelse(bp);
448 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
449 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
450 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
451 		printf("%s: reload pending error: blocks %jd files %d\n",
452 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
453 		    fs->fs_pendinginodes);
454 		fs->fs_pendingblocks = 0;
455 		fs->fs_pendinginodes = 0;
456 	}
457 
458 	/*
459 	 * Step 3: re-read summary information from disk.
460 	 */
461 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
462 	space = fs->fs_csp;
463 	for (i = 0; i < blks; i += fs->fs_frag) {
464 		size = fs->fs_bsize;
465 		if (i + fs->fs_frag > blks)
466 			size = (blks - i) * fs->fs_fsize;
467 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
468 		    NOCRED, &bp);
469 		if (error)
470 			return (error);
471 		bcopy(bp->b_data, space, (u_int)size);
472 		space = (char *)space + size;
473 		brelse(bp);
474 	}
475 	/*
476 	 * We no longer know anything about clusters per cylinder group.
477 	 */
478 	if (fs->fs_contigsumsize > 0) {
479 		lp = fs->fs_maxcluster;
480 		for (i = 0; i < fs->fs_ncg; i++)
481 			*lp++ = fs->fs_contigsumsize;
482 	}
483 
484 loop:
485 	mtx_lock(&mntvnode_mtx);
486 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
487 		if (vp->v_mount != mp) {
488 			mtx_unlock(&mntvnode_mtx);
489 			goto loop;
490 		}
491 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
492 		mtx_unlock(&mntvnode_mtx);
493 		/*
494 		 * Step 4: invalidate all inactive vnodes.
495 		 */
496 		if (vrecycle(vp, NULL, td))
497 			goto loop;
498 		/*
499 		 * Step 5: invalidate all cached file data.
500 		 */
501 		mtx_lock(&vp->v_interlock);
502 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
503 			goto loop;
504 		}
505 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
506 			panic("ffs_reload: dirty2");
507 		/*
508 		 * Step 6: re-read inode data for all active vnodes.
509 		 */
510 		ip = VTOI(vp);
511 		error =
512 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
513 		    (int)fs->fs_bsize, NOCRED, &bp);
514 		if (error) {
515 			vput(vp);
516 			return (error);
517 		}
518 		ffs_load_inode(bp, ip, NULL, fs, ip->i_number);
519 		ip->i_effnlink = ip->i_nlink;
520 		brelse(bp);
521 		vput(vp);
522 		mtx_lock(&mntvnode_mtx);
523 	}
524 	mtx_unlock(&mntvnode_mtx);
525 	return (0);
526 }
527 
528 /*
529  * Possible superblock locations ordered from most to least likely.
530  */
531 static int sblock_try[] = SBLOCKSEARCH;
532 
533 /*
534  * Common code for mount and mountroot
535  */
536 int
537 ffs_mountfs(devvp, mp, td, malloctype)
538 	struct vnode *devvp;
539 	struct mount *mp;
540 	struct thread *td;
541 	struct malloc_type *malloctype;
542 {
543 	struct ufsmount *ump;
544 	struct buf *bp;
545 	struct fs *fs;
546 	dev_t dev;
547 	void *space;
548 	ufs2_daddr_t sblockloc;
549 	int error, i, blks, size, ronly;
550 	int32_t *lp;
551 	struct ucred *cred;
552 	size_t strsize;
553 	int ncount;
554 	u_int sectorsize;
555 
556 	dev = devvp->v_rdev;
557 	cred = td ? td->td_ucred : NOCRED;
558 	/*
559 	 * Disallow multiple mounts of the same device.
560 	 * Disallow mounting of a device that is currently in use
561 	 * (except for root, which might share swap device for miniroot).
562 	 * Flush out any old buffers remaining from a previous use.
563 	 */
564 	error = vfs_mountedon(devvp);
565 	if (error)
566 		return (error);
567 	ncount = vcount(devvp);
568 
569 	if (ncount > 1 && devvp != rootvp)
570 		return (EBUSY);
571 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
572 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
573 	VOP_UNLOCK(devvp, 0, td);
574 	if (error)
575 		return (error);
576 
577 	/*
578 	 * Only VMIO the backing device if the backing device is a real
579 	 * block device.
580 	 * Note that it is optional that the backing device be VMIOed.  This
581 	 * increases the opportunity for metadata caching.
582 	 */
583 	if (vn_isdisk(devvp, NULL)) {
584 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
585 		vfs_object_create(devvp, td, cred);
586 		mtx_lock(&devvp->v_interlock);
587 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
588 	}
589 
590 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
591 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
592 	/*
593 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
594 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
595 	 * XXX: start to avoid getting trashed later on.
596 	 */
597 #ifdef notyet
598 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
599 #else
600 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
601 #endif
602 	VOP_UNLOCK(devvp, 0, td);
603 	if (error)
604 		return (error);
605 	if (devvp->v_rdev->si_iosize_max != 0)
606 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
607 	if (mp->mnt_iosize_max > MAXPHYS)
608 		mp->mnt_iosize_max = MAXPHYS;
609 
610 	if (VOP_IOCTL(devvp, DIOCGSECTORSIZE, (caddr_t)&sectorsize,
611 	    FREAD, cred, td) != 0)
612 		size = DEV_BSIZE;
613 	else
614 		size = sectorsize;
615 
616 	bp = NULL;
617 	ump = NULL;
618 	fs = NULL;
619 	sblockloc = 0;
620 	/*
621 	 * Try reading the superblock in each of its possible locations.
622 	 */
623 	for (i = 0; sblock_try[i] != -1; i++) {
624 		if ((error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE,
625 		    cred, &bp)) != 0)
626 			goto out;
627 		fs = (struct fs *)bp->b_data;
628 		sblockloc = numfrags(fs, sblock_try[i]);
629 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
630 		     (fs->fs_magic == FS_UFS2_MAGIC &&
631 		      fs->fs_sblockloc == sblockloc)) &&
632 		    fs->fs_bsize <= MAXBSIZE &&
633 		    fs->fs_bsize >= sizeof(struct fs))
634 			break;
635 		brelse(bp);
636 		bp = NULL;
637 	}
638 	if (sblock_try[i] == -1) {
639 		error = EINVAL;		/* XXX needs translation */
640 		goto out;
641 	}
642 	fs->fs_fmod = 0;
643 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
644 	fs->fs_flags &= ~FS_UNCLEAN;
645 	if (fs->fs_clean == 0) {
646 		fs->fs_flags |= FS_UNCLEAN;
647 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
648 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
649 		     (fs->fs_flags & FS_DOSOFTDEP))) {
650 			printf(
651 "WARNING: %s was not properly dismounted\n",
652 			    fs->fs_fsmnt);
653 		} else {
654 			printf(
655 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
656 			    fs->fs_fsmnt);
657 			error = EPERM;
658 			goto out;
659 		}
660 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
661 		    (mp->mnt_flag & MNT_FORCE)) {
662 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
663 			    (intmax_t)fs->fs_pendingblocks,
664 			    fs->fs_pendinginodes);
665 			fs->fs_pendingblocks = 0;
666 			fs->fs_pendinginodes = 0;
667 		}
668 	}
669 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
670 		printf("%s: mount pending error: blocks %jd files %d\n",
671 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
672 		    fs->fs_pendinginodes);
673 		fs->fs_pendingblocks = 0;
674 		fs->fs_pendinginodes = 0;
675 	}
676 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
677 	ump->um_malloctype = malloctype;
678 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
679 	    M_WAITOK);
680 	if (fs->fs_magic == FS_UFS1_MAGIC) {
681 		ump->um_fstype = UFS1;
682 		ump->um_balloc = ffs_balloc_ufs1;
683 	} else {
684 		ump->um_fstype = UFS2;
685 		ump->um_balloc = ffs_balloc_ufs2;
686 	}
687 	ump->um_blkatoff = ffs_blkatoff;
688 	ump->um_truncate = ffs_truncate;
689 	ump->um_update = ffs_update;
690 	ump->um_valloc = ffs_valloc;
691 	ump->um_vfree = ffs_vfree;
692 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
693 	if (fs->fs_sbsize < SBLOCKSIZE)
694 		bp->b_flags |= B_INVAL | B_NOCACHE;
695 	brelse(bp);
696 	bp = NULL;
697 	fs = ump->um_fs;
698 	ffs_oldfscompat_read(fs, ump, sblockloc);
699 	fs->fs_ronly = ronly;
700 	size = fs->fs_cssize;
701 	blks = howmany(size, fs->fs_fsize);
702 	if (fs->fs_contigsumsize > 0)
703 		size += fs->fs_ncg * sizeof(int32_t);
704 	size += fs->fs_ncg * sizeof(u_int8_t);
705 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
706 	fs->fs_csp = space;
707 	for (i = 0; i < blks; i += fs->fs_frag) {
708 		size = fs->fs_bsize;
709 		if (i + fs->fs_frag > blks)
710 			size = (blks - i) * fs->fs_fsize;
711 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
712 		    cred, &bp)) != 0) {
713 			free(fs->fs_csp, M_UFSMNT);
714 			goto out;
715 		}
716 		bcopy(bp->b_data, space, (u_int)size);
717 		space = (char *)space + size;
718 		brelse(bp);
719 		bp = NULL;
720 	}
721 	if (fs->fs_contigsumsize > 0) {
722 		fs->fs_maxcluster = lp = space;
723 		for (i = 0; i < fs->fs_ncg; i++)
724 			*lp++ = fs->fs_contigsumsize;
725 		space = lp;
726 	}
727 	size = fs->fs_ncg * sizeof(u_int8_t);
728 	fs->fs_contigdirs = (u_int8_t *)space;
729 	bzero(fs->fs_contigdirs, size);
730 	fs->fs_active = NULL;
731 	mp->mnt_data = (qaddr_t)ump;
732 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
733 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
734 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
735 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
736 		vfs_getnewfsid(mp);
737 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
738 	mp->mnt_flag |= MNT_LOCAL;
739 	ump->um_mountp = mp;
740 	ump->um_dev = dev;
741 	ump->um_devvp = devvp;
742 	ump->um_nindir = fs->fs_nindir;
743 	ump->um_bptrtodb = fs->fs_fsbtodb;
744 	ump->um_seqinc = fs->fs_frag;
745 	for (i = 0; i < MAXQUOTAS; i++)
746 		ump->um_quotas[i] = NULLVP;
747 #ifdef UFS_EXTATTR
748 	ufs_extattr_uepm_init(&ump->um_extattr);
749 #endif
750 	devvp->v_rdev->si_mountpoint = mp;
751 
752 	/*
753 	 * Set FS local "last mounted on" information (NULL pad)
754 	 */
755 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
756 			fs->fs_fsmnt,			/* copy area*/
757 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
758 			&strsize);			/* real size*/
759 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
760 
761 	if( mp->mnt_flag & MNT_ROOTFS) {
762 		/*
763 		 * Root mount; update timestamp in mount structure.
764 		 * this will be used by the common root mount code
765 		 * to update the system clock.
766 		 */
767 		mp->mnt_time = fs->fs_time;
768 	}
769 
770 	if (ronly == 0) {
771 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
772 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
773 			free(fs->fs_csp, M_UFSMNT);
774 			goto out;
775 		}
776 		if (fs->fs_snapinum[0] != 0)
777 			ffs_snapshot_mount(mp);
778 		fs->fs_fmod = 1;
779 		fs->fs_clean = 0;
780 		(void) ffs_sbupdate(ump, MNT_WAIT);
781 	}
782 #ifdef UFS_EXTATTR
783 #ifdef UFS_EXTATTR_AUTOSTART
784 	/*
785 	 *
786 	 * Auto-starting does the following:
787 	 *	- check for /.attribute in the fs, and extattr_start if so
788 	 *	- for each file in .attribute, enable that file with
789 	 * 	  an attribute of the same name.
790 	 * Not clear how to report errors -- probably eat them.
791 	 * This would all happen while the filesystem was busy/not
792 	 * available, so would effectively be "atomic".
793 	 */
794 	(void) ufs_extattr_autostart(mp, td);
795 #endif /* !UFS_EXTATTR_AUTOSTART */
796 #endif /* !UFS_EXTATTR */
797 	return (0);
798 out:
799 	devvp->v_rdev->si_mountpoint = NULL;
800 	if (bp)
801 		brelse(bp);
802 	/* XXX: see comment above VOP_OPEN */
803 #ifdef notyet
804 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
805 #else
806 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
807 #endif
808 	if (ump) {
809 		free(ump->um_fs, M_UFSMNT);
810 		free(ump, M_UFSMNT);
811 		mp->mnt_data = (qaddr_t)0;
812 	}
813 	return (error);
814 }
815 
816 #include <sys/sysctl.h>
817 int bigcgs = 0;
818 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
819 
820 /*
821  * Sanity checks for loading old filesystem superblocks.
822  * See ffs_oldfscompat_write below for unwound actions.
823  *
824  * XXX - Parts get retired eventually.
825  * Unfortunately new bits get added.
826  */
827 static void
828 ffs_oldfscompat_read(fs, ump, sblockloc)
829 	struct fs *fs;
830 	struct ufsmount *ump;
831 	ufs2_daddr_t sblockloc;
832 {
833 	off_t maxfilesize;
834 
835 	/*
836 	 * If not yet done, update UFS1 superblock with new wider fields.
837 	 */
838 	if (fs->fs_magic == FS_UFS1_MAGIC &&
839 	    fs->fs_sblockloc != sblockloc) {
840 		fs->fs_maxbsize = fs->fs_bsize;
841 		fs->fs_sblockloc = sblockloc;
842 		fs->fs_time = fs->fs_old_time;
843 		fs->fs_size = fs->fs_old_size;
844 		fs->fs_dsize = fs->fs_old_dsize;
845 		fs->fs_csaddr = fs->fs_old_csaddr;
846 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
847 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
848 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
849 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
850 	}
851 	if (fs->fs_magic == FS_UFS1_MAGIC &&
852 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
853 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
854 		fs->fs_qbmask = ~fs->fs_bmask;
855 		fs->fs_qfmask = ~fs->fs_fmask;
856 	}
857 	if (fs->fs_magic == FS_UFS1_MAGIC) {
858 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
859 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
860 		if (fs->fs_maxfilesize > maxfilesize)
861 			fs->fs_maxfilesize = maxfilesize;
862 	}
863 	/* Compatibility for old filesystems */
864 	if (fs->fs_avgfilesize <= 0)
865 		fs->fs_avgfilesize = AVFILESIZ;
866 	if (fs->fs_avgfpdir <= 0)
867 		fs->fs_avgfpdir = AFPDIR;
868 	if (bigcgs) {
869 		fs->fs_save_cgsize = fs->fs_cgsize;
870 		fs->fs_cgsize = fs->fs_bsize;
871 	}
872 }
873 
874 /*
875  * Unwinding superblock updates for old filesystems.
876  * See ffs_oldfscompat_read above for details.
877  *
878  * XXX - Parts get retired eventually.
879  * Unfortunately new bits get added.
880  */
881 static void
882 ffs_oldfscompat_write(fs, ump)
883 	struct fs *fs;
884 	struct ufsmount *ump;
885 {
886 
887 	/*
888 	 * Copy back UFS2 updated fields that UFS1 inspects.
889 	 */
890 	if (fs->fs_magic == FS_UFS1_MAGIC) {
891 		fs->fs_old_time = fs->fs_time;
892 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
893 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
894 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
895 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
896 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
897 	}
898 	if (bigcgs) {
899 		fs->fs_cgsize = fs->fs_save_cgsize;
900 		fs->fs_save_cgsize = 0;
901 	}
902 }
903 
904 /*
905  * unmount system call
906  */
907 int
908 ffs_unmount(mp, mntflags, td)
909 	struct mount *mp;
910 	int mntflags;
911 	struct thread *td;
912 {
913 	struct ufsmount *ump = VFSTOUFS(mp);
914 	struct fs *fs;
915 	int error, flags;
916 
917 	flags = 0;
918 	if (mntflags & MNT_FORCE) {
919 		flags |= FORCECLOSE;
920 	}
921 #ifdef UFS_EXTATTR
922 	if ((error = ufs_extattr_stop(mp, td))) {
923 		if (error != EOPNOTSUPP)
924 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
925 			    error);
926 	} else {
927 		ufs_extattr_uepm_destroy(&ump->um_extattr);
928 	}
929 #endif
930 	if (mp->mnt_flag & MNT_SOFTDEP) {
931 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
932 			return (error);
933 	} else {
934 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
935 			return (error);
936 	}
937 	fs = ump->um_fs;
938 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
939 		printf("%s: unmount pending error: blocks %jd files %d\n",
940 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
941 		    fs->fs_pendinginodes);
942 		fs->fs_pendingblocks = 0;
943 		fs->fs_pendinginodes = 0;
944 	}
945 	if (fs->fs_ronly == 0) {
946 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
947 		error = ffs_sbupdate(ump, MNT_WAIT);
948 		if (error) {
949 			fs->fs_clean = 0;
950 			return (error);
951 		}
952 	}
953 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
954 
955 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
956 	/* XXX: see comment above VOP_OPEN */
957 #ifdef notyet
958 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
959 		NOCRED, td);
960 #else
961 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
962 #endif
963 
964 	vrele(ump->um_devvp);
965 
966 	free(fs->fs_csp, M_UFSMNT);
967 	free(fs, M_UFSMNT);
968 	free(ump, M_UFSMNT);
969 	mp->mnt_data = (qaddr_t)0;
970 	mp->mnt_flag &= ~MNT_LOCAL;
971 	return (error);
972 }
973 
974 /*
975  * Flush out all the files in a filesystem.
976  */
977 int
978 ffs_flushfiles(mp, flags, td)
979 	struct mount *mp;
980 	int flags;
981 	struct thread *td;
982 {
983 	struct ufsmount *ump;
984 	int error;
985 
986 	ump = VFSTOUFS(mp);
987 #ifdef QUOTA
988 	if (mp->mnt_flag & MNT_QUOTA) {
989 		int i;
990 		error = vflush(mp, 0, SKIPSYSTEM|flags);
991 		if (error)
992 			return (error);
993 		for (i = 0; i < MAXQUOTAS; i++) {
994 			if (ump->um_quotas[i] == NULLVP)
995 				continue;
996 			quotaoff(td, mp, i);
997 		}
998 		/*
999 		 * Here we fall through to vflush again to ensure
1000 		 * that we have gotten rid of all the system vnodes.
1001 		 */
1002 	}
1003 #endif
1004 	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
1005 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1006 			return (error);
1007 		ffs_snapshot_unmount(mp);
1008 		/*
1009 		 * Here we fall through to vflush again to ensure
1010 		 * that we have gotten rid of all the system vnodes.
1011 		 */
1012 	}
1013         /*
1014 	 * Flush all the files.
1015 	 */
1016 	if ((error = vflush(mp, 0, flags)) != 0)
1017 		return (error);
1018 	/*
1019 	 * Flush filesystem metadata.
1020 	 */
1021 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1022 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1023 	VOP_UNLOCK(ump->um_devvp, 0, td);
1024 	return (error);
1025 }
1026 
1027 /*
1028  * Get filesystem statistics.
1029  */
1030 int
1031 ffs_statfs(mp, sbp, td)
1032 	struct mount *mp;
1033 	struct statfs *sbp;
1034 	struct thread *td;
1035 {
1036 	struct ufsmount *ump;
1037 	struct fs *fs;
1038 
1039 	ump = VFSTOUFS(mp);
1040 	fs = ump->um_fs;
1041 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1042 		panic("ffs_statfs");
1043 	sbp->f_bsize = fs->fs_fsize;
1044 	sbp->f_iosize = fs->fs_bsize;
1045 	sbp->f_blocks = fs->fs_dsize;
1046 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1047 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1048 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1049 	    dbtofsb(fs, fs->fs_pendingblocks);
1050 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1051 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1052 	if (sbp != &mp->mnt_stat) {
1053 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1054 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1055 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1056 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1057 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1058 	}
1059 	return (0);
1060 }
1061 
1062 /*
1063  * Go through the disk queues to initiate sandbagged IO;
1064  * go through the inodes to write those that have been modified;
1065  * initiate the writing of the super block if it has been modified.
1066  *
1067  * Note: we are always called with the filesystem marked `MPBUSY'.
1068  */
1069 int
1070 ffs_sync(mp, waitfor, cred, td)
1071 	struct mount *mp;
1072 	int waitfor;
1073 	struct ucred *cred;
1074 	struct thread *td;
1075 {
1076 	struct vnode *nvp, *vp, *devvp;
1077 	struct inode *ip;
1078 	struct ufsmount *ump = VFSTOUFS(mp);
1079 	struct fs *fs;
1080 	int error, count, wait, lockreq, allerror = 0;
1081 
1082 	fs = ump->um_fs;
1083 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1084 		printf("fs = %s\n", fs->fs_fsmnt);
1085 		panic("ffs_sync: rofs mod");
1086 	}
1087 	/*
1088 	 * Write back each (modified) inode.
1089 	 */
1090 	wait = 0;
1091 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1092 	if (waitfor == MNT_WAIT) {
1093 		wait = 1;
1094 		lockreq = LK_EXCLUSIVE;
1095 	}
1096 	mtx_lock(&mntvnode_mtx);
1097 loop:
1098 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1099 		/*
1100 		 * If the vnode that we are about to sync is no longer
1101 		 * associated with this mount point, start over.
1102 		 */
1103 		if (vp->v_mount != mp)
1104 			goto loop;
1105 
1106 		/*
1107 		 * Depend on the mntvnode_slock to keep things stable enough
1108 		 * for a quick test.  Since there might be hundreds of
1109 		 * thousands of vnodes, we cannot afford even a subroutine
1110 		 * call unless there's a good chance that we have work to do.
1111 		 */
1112 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1113 		ip = VTOI(vp);
1114 		if (vp->v_type == VNON || ((ip->i_flag &
1115 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1116 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1117 			continue;
1118 		}
1119 		if (vp->v_type != VCHR) {
1120 			mtx_unlock(&mntvnode_mtx);
1121 			if ((error = vget(vp, lockreq, td)) != 0) {
1122 				mtx_lock(&mntvnode_mtx);
1123 				if (error == ENOENT)
1124 					goto loop;
1125 			} else {
1126 				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1127 					allerror = error;
1128 				VOP_UNLOCK(vp, 0, td);
1129 				vrele(vp);
1130 				mtx_lock(&mntvnode_mtx);
1131 			}
1132 		} else {
1133 			mtx_unlock(&mntvnode_mtx);
1134 			UFS_UPDATE(vp, wait);
1135 			mtx_lock(&mntvnode_mtx);
1136 		}
1137 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1138 			goto loop;
1139 	}
1140 	mtx_unlock(&mntvnode_mtx);
1141 	/*
1142 	 * Force stale filesystem control information to be flushed.
1143 	 */
1144 	if (waitfor == MNT_WAIT) {
1145 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1146 			allerror = error;
1147 		/* Flushed work items may create new vnodes to clean */
1148 		if (count) {
1149 			mtx_lock(&mntvnode_mtx);
1150 			goto loop;
1151 		}
1152 	}
1153 #ifdef QUOTA
1154 	qsync(mp);
1155 #endif
1156 	devvp = ump->um_devvp;
1157 	mtx_lock(&devvp->v_interlock);
1158 	if (waitfor != MNT_LAZY &&
1159 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1160 		mtx_unlock(&devvp->v_interlock);
1161 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
1162 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1163 			allerror = error;
1164 		VOP_UNLOCK(devvp, 0, td);
1165 		if (waitfor == MNT_WAIT) {
1166 			mtx_lock(&mntvnode_mtx);
1167 			goto loop;
1168 		}
1169 	} else
1170 		mtx_unlock(&devvp->v_interlock);
1171 	/*
1172 	 * Write back modified superblock.
1173 	 */
1174 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1175 		allerror = error;
1176 	return (allerror);
1177 }
1178 
1179 int
1180 ffs_vget(mp, ino, flags, vpp)
1181 	struct mount *mp;
1182 	ino_t ino;
1183 	int flags;
1184 	struct vnode **vpp;
1185 {
1186 	struct thread *td = curthread; 		/* XXX */
1187 	struct fs *fs;
1188 	struct inode *ip;
1189 	struct ufsmount *ump;
1190 	struct buf *bp;
1191 	struct vnode *vp;
1192 	dev_t dev;
1193 	int error;
1194 
1195 	ump = VFSTOUFS(mp);
1196 	dev = ump->um_dev;
1197 
1198 	/*
1199 	 * We do not lock vnode creation as it is believed to be too
1200 	 * expensive for such rare case as simultaneous creation of vnode
1201 	 * for same ino by different processes. We just allow them to race
1202 	 * and check later to decide who wins. Let the race begin!
1203 	 */
1204 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1205 		return (error);
1206 	if (*vpp != NULL)
1207 		return (0);
1208 
1209 	/*
1210 	 * If this MALLOC() is performed after the getnewvnode()
1211 	 * it might block, leaving a vnode with a NULL v_data to be
1212 	 * found by ffs_sync() if a sync happens to fire right then,
1213 	 * which will cause a panic because ffs_sync() blindly
1214 	 * dereferences vp->v_data (as well it should).
1215 	 */
1216 	MALLOC(ip, struct inode *, sizeof(struct inode),
1217 	    ump->um_malloctype, M_WAITOK);
1218 
1219 	/* Allocate a new vnode/inode. */
1220 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1221 	if (error) {
1222 		*vpp = NULL;
1223 		FREE(ip, ump->um_malloctype);
1224 		return (error);
1225 	}
1226 	bzero((caddr_t)ip, sizeof(struct inode));
1227 	/*
1228 	 * FFS supports lock sharing in the stack of vnodes
1229 	 */
1230 	vp->v_vnlock = &vp->v_lock;
1231 	lockinit(vp->v_vnlock, PINOD, "inode", VLKTIMEOUT, LK_CANRECURSE);
1232 	vp->v_data = ip;
1233 	ip->i_vnode = vp;
1234 	ip->i_ump = ump;
1235 	ip->i_fs = fs = ump->um_fs;
1236 	ip->i_dev = dev;
1237 	ip->i_number = ino;
1238 #ifdef QUOTA
1239 	{
1240 		int i;
1241 		for (i = 0; i < MAXQUOTAS; i++)
1242 			ip->i_dquot[i] = NODQUOT;
1243 	}
1244 #endif
1245 	/*
1246 	 * Exclusively lock the vnode before adding to hash. Note, that we
1247 	 * must not release nor downgrade the lock (despite flags argument
1248 	 * says) till it is fully initialized.
1249 	 */
1250 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1251 
1252 	/*
1253 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1254 	 * duplicate of vnode being created and add it to the hash. If a
1255 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1256 	 */
1257 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1258 		vput(vp);
1259 		*vpp = NULL;
1260 		return (error);
1261 	}
1262 
1263 	/* We lost the race, then throw away our vnode and return existing */
1264 	if (*vpp != NULL) {
1265 		vput(vp);
1266 		return (0);
1267 	}
1268 
1269 	/* Read in the disk contents for the inode, copy into the inode. */
1270 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1271 	    (int)fs->fs_bsize, NOCRED, &bp);
1272 	if (error) {
1273 		/*
1274 		 * The inode does not contain anything useful, so it would
1275 		 * be misleading to leave it on its hash chain. With mode
1276 		 * still zero, it will be unlinked and returned to the free
1277 		 * list by vput().
1278 		 */
1279 		brelse(bp);
1280 		vput(vp);
1281 		*vpp = NULL;
1282 		return (error);
1283 	}
1284 	ffs_load_inode(bp, ip, ump->um_malloctype, fs, ino);
1285 	if (DOINGSOFTDEP(vp))
1286 		softdep_load_inodeblock(ip);
1287 	else
1288 		ip->i_effnlink = ip->i_nlink;
1289 	bqrelse(bp);
1290 
1291 	/*
1292 	 * Initialize the vnode from the inode, check for aliases.
1293 	 * Note that the underlying vnode may have changed.
1294 	 */
1295 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1296 	if (error) {
1297 		vput(vp);
1298 		*vpp = NULL;
1299 		return (error);
1300 	}
1301 	/*
1302 	 * Finish inode initialization now that aliasing has been resolved.
1303 	 */
1304 	ip->i_devvp = ump->um_devvp;
1305 	VREF(ip->i_devvp);
1306 	/*
1307 	 * Set up a generation number for this inode if it does not
1308 	 * already have one. This should only happen on old filesystems.
1309 	 */
1310 	if (ip->i_gen == 0) {
1311 		ip->i_gen = random() / 2 + 1;
1312 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1313 			ip->i_flag |= IN_MODIFIED;
1314 			DIP(ip, i_gen) = ip->i_gen;
1315 		}
1316 	}
1317 	/*
1318 	 * Ensure that uid and gid are correct. This is a temporary
1319 	 * fix until fsck has been changed to do the update.
1320 	 */
1321 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1322 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1323 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1324 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1325 	}						/* XXX */
1326 
1327 	*vpp = vp;
1328 	return (0);
1329 }
1330 
1331 /*
1332  * File handle to vnode
1333  *
1334  * Have to be really careful about stale file handles:
1335  * - check that the inode number is valid
1336  * - call ffs_vget() to get the locked inode
1337  * - check for an unallocated inode (i_mode == 0)
1338  * - check that the given client host has export rights and return
1339  *   those rights via. exflagsp and credanonp
1340  */
1341 int
1342 ffs_fhtovp(mp, fhp, vpp)
1343 	struct mount *mp;
1344 	struct fid *fhp;
1345 	struct vnode **vpp;
1346 {
1347 	struct ufid *ufhp;
1348 	struct fs *fs;
1349 
1350 	ufhp = (struct ufid *)fhp;
1351 	fs = VFSTOUFS(mp)->um_fs;
1352 	if (ufhp->ufid_ino < ROOTINO ||
1353 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1354 		return (ESTALE);
1355 	return (ufs_fhtovp(mp, ufhp, vpp));
1356 }
1357 
1358 /*
1359  * Vnode pointer to File handle
1360  */
1361 /* ARGSUSED */
1362 int
1363 ffs_vptofh(vp, fhp)
1364 	struct vnode *vp;
1365 	struct fid *fhp;
1366 {
1367 	struct inode *ip;
1368 	struct ufid *ufhp;
1369 
1370 	ip = VTOI(vp);
1371 	ufhp = (struct ufid *)fhp;
1372 	ufhp->ufid_len = sizeof(struct ufid);
1373 	ufhp->ufid_ino = ip->i_number;
1374 	ufhp->ufid_gen = ip->i_gen;
1375 	return (0);
1376 }
1377 
1378 /*
1379  * Initialize the filesystem.
1380  */
1381 static int
1382 ffs_init(vfsp)
1383 	struct vfsconf *vfsp;
1384 {
1385 
1386 	softdep_initialize();
1387 	return (ufs_init(vfsp));
1388 }
1389 
1390 /*
1391  * Undo the work of ffs_init().
1392  */
1393 static int
1394 ffs_uninit(vfsp)
1395 	struct vfsconf *vfsp;
1396 {
1397 	int ret;
1398 
1399 	ret = ufs_uninit(vfsp);
1400 	softdep_uninitialize();
1401 	return (ret);
1402 }
1403 
1404 /*
1405  * Write a superblock and associated information back to disk.
1406  */
1407 static int
1408 ffs_sbupdate(mp, waitfor)
1409 	struct ufsmount *mp;
1410 	int waitfor;
1411 {
1412 	struct fs *fs = mp->um_fs;
1413 	struct buf *bp;
1414 	int blks;
1415 	void *space;
1416 	int i, size, error, allerror = 0;
1417 
1418 	/*
1419 	 * First write back the summary information.
1420 	 */
1421 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1422 	space = fs->fs_csp;
1423 	for (i = 0; i < blks; i += fs->fs_frag) {
1424 		size = fs->fs_bsize;
1425 		if (i + fs->fs_frag > blks)
1426 			size = (blks - i) * fs->fs_fsize;
1427 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1428 		    size, 0, 0);
1429 		bcopy(space, bp->b_data, (u_int)size);
1430 		space = (char *)space + size;
1431 		if (waitfor != MNT_WAIT)
1432 			bawrite(bp);
1433 		else if ((error = bwrite(bp)) != 0)
1434 			allerror = error;
1435 	}
1436 	/*
1437 	 * Now write back the superblock itself. If any errors occurred
1438 	 * up to this point, then fail so that the superblock avoids
1439 	 * being written out as clean.
1440 	 */
1441 	if (allerror)
1442 		return (allerror);
1443 	bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_sblockloc),
1444 	    (int)fs->fs_sbsize, 0, 0);
1445 	fs->fs_fmod = 0;
1446 	fs->fs_time = time_second;
1447 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1448 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1449 	if (waitfor != MNT_WAIT)
1450 		bawrite(bp);
1451 	else if ((error = bwrite(bp)) != 0)
1452 		allerror = error;
1453 	return (allerror);
1454 }
1455