xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 09e8dea79366f1e5b3a73e8a271b26e4b6bf2e6a)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_quota.h"
38 #include "opt_ufs.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/stdint.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/disk.h>
53 #include <sys/malloc.h>
54 #include <sys/mutex.h>
55 
56 #include <ufs/ufs/extattr.h>
57 #include <ufs/ufs/quota.h>
58 #include <ufs/ufs/ufsmount.h>
59 #include <ufs/ufs/inode.h>
60 #include <ufs/ufs/ufs_extern.h>
61 
62 #include <ufs/ffs/fs.h>
63 #include <ufs/ffs/ffs_extern.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_page.h>
67 
68 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
69 
70 static int	ffs_sbupdate(struct ufsmount *, int);
71        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
72 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
73 		    ufs2_daddr_t);
74 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
75 static int	ffs_init(struct vfsconf *);
76 
77 static struct vfsops ufs_vfsops = {
78 	ffs_mount,
79 	ufs_start,
80 	ffs_unmount,
81 	ufs_root,
82 	ufs_quotactl,
83 	ffs_statfs,
84 	ffs_sync,
85 	ffs_vget,
86 	ffs_fhtovp,
87 	vfs_stdcheckexp,
88 	ffs_vptofh,
89 	ffs_init,
90 	vfs_stduninit,
91 #ifdef UFS_EXTATTR
92 	ufs_extattrctl,
93 #else
94 	vfs_stdextattrctl,
95 #endif
96 };
97 
98 VFS_SET(ufs_vfsops, ufs, 0);
99 
100 /*
101  * ffs_mount
102  *
103  * Called when mounting local physical media
104  *
105  * PARAMETERS:
106  *		mountroot
107  *			mp	mount point structure
108  *			path	NULL (flag for root mount!!!)
109  *			data	<unused>
110  *			ndp	<unused>
111  *			p	process (user credentials check [statfs])
112  *
113  *		mount
114  *			mp	mount point structure
115  *			path	path to mount point
116  *			data	pointer to argument struct in user space
117  *			ndp	mount point namei() return (used for
118  *				credentials on reload), reused to look
119  *				up block device.
120  *			p	process (user credentials check)
121  *
122  * RETURNS:	0	Success
123  *		!0	error number (errno.h)
124  *
125  * LOCK STATE:
126  *
127  *		ENTRY
128  *			mount point is locked
129  *		EXIT
130  *			mount point is locked
131  *
132  * NOTES:
133  *		A NULL path can be used for a flag since the mount
134  *		system call will fail with EFAULT in copyinstr in
135  *		namei() if it is a genuine NULL from the user.
136  */
137 int
138 ffs_mount(mp, path, data, ndp, td)
139         struct mount		*mp;	/* mount struct pointer*/
140         char			*path;	/* path to mount point*/
141         caddr_t			data;	/* arguments to FS specific mount*/
142         struct nameidata	*ndp;	/* mount point credentials*/
143         struct thread		*td;	/* process requesting mount*/
144 {
145 	size_t size;
146 	struct vnode *devvp;
147 	struct ufs_args args;
148 	struct ufsmount *ump = 0;
149 	struct fs *fs;
150 	int error, flags;
151 	mode_t accessmode;
152 
153 	/*
154 	 * Use NULL path to indicate we are mounting the root filesystem.
155 	 */
156 	if (path == NULL) {
157 		if ((error = bdevvp(rootdev, &rootvp))) {
158 			printf("ffs_mountroot: can't find rootvp\n");
159 			return (error);
160 		}
161 
162 		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
163 			return (error);
164 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
165 		return (0);
166 	}
167 
168 	/*
169 	 * Mounting non-root filesystem or updating a filesystem
170 	 */
171 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
172 		return (error);
173 
174 	/*
175 	 * If updating, check whether changing from read-only to
176 	 * read/write; if there is no device name, that's all we do.
177 	 */
178 	if (mp->mnt_flag & MNT_UPDATE) {
179 		ump = VFSTOUFS(mp);
180 		fs = ump->um_fs;
181 		devvp = ump->um_devvp;
182 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
183 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
184 				return (error);
185 			/*
186 			 * Flush any dirty data.
187 			 */
188 			VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td);
189 			/*
190 			 * Check for and optionally get rid of files open
191 			 * for writing.
192 			 */
193 			flags = WRITECLOSE;
194 			if (mp->mnt_flag & MNT_FORCE)
195 				flags |= FORCECLOSE;
196 			if (mp->mnt_flag & MNT_SOFTDEP) {
197 				error = softdep_flushfiles(mp, flags, td);
198 			} else {
199 				error = ffs_flushfiles(mp, flags, td);
200 			}
201 			if (error) {
202 				vn_finished_write(mp);
203 				return (error);
204 			}
205 			if (fs->fs_pendingblocks != 0 ||
206 			    fs->fs_pendinginodes != 0) {
207 				printf("%s: %s: blocks %jd files %d\n",
208 				    fs->fs_fsmnt, "update error",
209 				    (intmax_t)fs->fs_pendingblocks,
210 				    fs->fs_pendinginodes);
211 				fs->fs_pendingblocks = 0;
212 				fs->fs_pendinginodes = 0;
213 			}
214 			fs->fs_ronly = 1;
215 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
216 				fs->fs_clean = 1;
217 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
218 				fs->fs_ronly = 0;
219 				fs->fs_clean = 0;
220 				vn_finished_write(mp);
221 				return (error);
222 			}
223 			vn_finished_write(mp);
224 		}
225 		if ((mp->mnt_flag & MNT_RELOAD) &&
226 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
227 			return (error);
228 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
229 			/*
230 			 * If upgrade to read-write by non-root, then verify
231 			 * that user has necessary permissions on the device.
232 			 */
233 			if (suser(td)) {
234 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
235 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
236 				    td->td_ucred, td)) != 0) {
237 					VOP_UNLOCK(devvp, 0, td);
238 					return (error);
239 				}
240 				VOP_UNLOCK(devvp, 0, td);
241 			}
242 			fs->fs_flags &= ~FS_UNCLEAN;
243 			if (fs->fs_clean == 0) {
244 				fs->fs_flags |= FS_UNCLEAN;
245 				if ((mp->mnt_flag & MNT_FORCE) ||
246 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
247 				     (fs->fs_flags & FS_DOSOFTDEP))) {
248 					printf("WARNING: %s was not %s\n",
249 					   fs->fs_fsmnt, "properly dismounted");
250 				} else {
251 					printf(
252 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
253 					    fs->fs_fsmnt);
254 					return (EPERM);
255 				}
256 			}
257 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
258 				return (error);
259 			fs->fs_ronly = 0;
260 			fs->fs_clean = 0;
261 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
262 				vn_finished_write(mp);
263 				return (error);
264 			}
265 			/* check to see if we need to start softdep */
266 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
267 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
268 				vn_finished_write(mp);
269 				return (error);
270 			}
271 			if (fs->fs_snapinum[0] != 0)
272 				ffs_snapshot_mount(mp);
273 			vn_finished_write(mp);
274 		}
275 		/*
276 		 * Soft updates is incompatible with "async",
277 		 * so if we are doing softupdates stop the user
278 		 * from setting the async flag in an update.
279 		 * Softdep_mount() clears it in an initial mount
280 		 * or ro->rw remount.
281 		 */
282 		if (mp->mnt_flag & MNT_SOFTDEP)
283 			mp->mnt_flag &= ~MNT_ASYNC;
284 		/*
285 		 * If not updating name, process export requests.
286 		 */
287 		if (args.fspec == 0)
288 			return (vfs_export(mp, &args.export));
289 		/*
290 		 * If this is a snapshot request, take the snapshot.
291 		 */
292 		if (mp->mnt_flag & MNT_SNAPSHOT)
293 			return (ffs_snapshot(mp, args.fspec));
294 	}
295 
296 	/*
297 	 * Not an update, or updating the name: look up the name
298 	 * and verify that it refers to a sensible block device.
299 	 */
300 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
301 	if ((error = namei(ndp)) != 0)
302 		return (error);
303 	NDFREE(ndp, NDF_ONLY_PNBUF);
304 	devvp = ndp->ni_vp;
305 	if (!vn_isdisk(devvp, &error)) {
306 		vrele(devvp);
307 		return (error);
308 	}
309 
310 	/*
311 	 * If mount by non-root, then verify that user has necessary
312 	 * permissions on the device.
313 	 */
314 	if (suser(td)) {
315 		accessmode = VREAD;
316 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
317 			accessmode |= VWRITE;
318 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
319 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
320 			vput(devvp);
321 			return (error);
322 		}
323 		VOP_UNLOCK(devvp, 0, td);
324 	}
325 
326 	if (mp->mnt_flag & MNT_UPDATE) {
327 		/*
328 		 * Update only
329 		 *
330 		 * If it's not the same vnode, or at least the same device
331 		 * then it's not correct.
332 		 */
333 
334 		if (devvp != ump->um_devvp &&
335 		    devvp->v_rdev != ump->um_devvp->v_rdev)
336 			error = EINVAL;	/* needs translation */
337 		vrele(devvp);
338 		if (error)
339 			return (error);
340 	} else {
341 		/*
342 		 * New mount
343 		 *
344 		 * We need the name for the mount point (also used for
345 		 * "last mounted on") copied in. If an error occurs,
346 		 * the mount point is discarded by the upper level code.
347 		 * Note that vfs_mount() populates f_mntonname for us.
348 		 */
349 		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
350 			vrele(devvp);
351 			return (error);
352 		}
353 	}
354 	/*
355 	 * Save "mounted from" device name info for mount point (NULL pad).
356 	 */
357 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
358 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
359 	/*
360 	 * Initialize filesystem stat information in mount struct.
361 	 */
362 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
363 	return (0);
364 }
365 
366 /*
367  * Reload all incore data for a filesystem (used after running fsck on
368  * the root filesystem and finding things to fix). The filesystem must
369  * be mounted read-only.
370  *
371  * Things to do to update the mount:
372  *	1) invalidate all cached meta-data.
373  *	2) re-read superblock from disk.
374  *	3) re-read summary information from disk.
375  *	4) invalidate all inactive vnodes.
376  *	5) invalidate all cached file data.
377  *	6) re-read inode data for all active vnodes.
378  */
379 int
380 ffs_reload(mp, cred, td)
381 	struct mount *mp;
382 	struct ucred *cred;
383 	struct thread *td;
384 {
385 	struct vnode *vp, *nvp, *devvp;
386 	struct inode *ip;
387 	void *space;
388 	struct buf *bp;
389 	struct fs *fs, *newfs;
390 	dev_t dev;
391 	ufs2_daddr_t sblockloc;
392 	int i, blks, size, error;
393 	int32_t *lp;
394 
395 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
396 		return (EINVAL);
397 	/*
398 	 * Step 1: invalidate all cached meta-data.
399 	 */
400 	devvp = VFSTOUFS(mp)->um_devvp;
401 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
402 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
403 	VOP_UNLOCK(devvp, 0, td);
404 	if (error)
405 		panic("ffs_reload: dirty1");
406 
407 	dev = devvp->v_rdev;
408 
409 	/*
410 	 * Only VMIO the backing device if the backing device is a real
411 	 * block device.
412 	 */
413 	if (vn_isdisk(devvp, NULL)) {
414 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
415 		vfs_object_create(devvp, td, td->td_ucred);
416 		mtx_lock(&devvp->v_interlock);
417 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
418 	}
419 
420 	/*
421 	 * Step 2: re-read superblock from disk.
422 	 */
423 	fs = VFSTOUFS(mp)->um_fs;
424 	if ((error = bread(devvp, fsbtodb(fs, fs->fs_sblockloc), fs->fs_sbsize,
425 	    NOCRED, &bp)) != 0)
426 		return (error);
427 	newfs = (struct fs *)bp->b_data;
428 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
429 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
430 	    newfs->fs_bsize > MAXBSIZE ||
431 	    newfs->fs_bsize < sizeof(struct fs)) {
432 			brelse(bp);
433 			return (EIO);		/* XXX needs translation */
434 	}
435 	/*
436 	 * Copy pointer fields back into superblock before copying in	XXX
437 	 * new superblock. These should really be in the ufsmount.	XXX
438 	 * Note that important parameters (eg fs_ncg) are unchanged.
439 	 */
440 	newfs->fs_csp = fs->fs_csp;
441 	newfs->fs_maxcluster = fs->fs_maxcluster;
442 	newfs->fs_contigdirs = fs->fs_contigdirs;
443 	newfs->fs_active = fs->fs_active;
444 	sblockloc = fs->fs_sblockloc;
445 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
446 	brelse(bp);
447 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
448 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
449 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
450 		printf("%s: reload pending error: blocks %jd files %d\n",
451 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
452 		    fs->fs_pendinginodes);
453 		fs->fs_pendingblocks = 0;
454 		fs->fs_pendinginodes = 0;
455 	}
456 
457 	/*
458 	 * Step 3: re-read summary information from disk.
459 	 */
460 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
461 	space = fs->fs_csp;
462 	for (i = 0; i < blks; i += fs->fs_frag) {
463 		size = fs->fs_bsize;
464 		if (i + fs->fs_frag > blks)
465 			size = (blks - i) * fs->fs_fsize;
466 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
467 		    NOCRED, &bp);
468 		if (error)
469 			return (error);
470 		bcopy(bp->b_data, space, (u_int)size);
471 		space = (char *)space + size;
472 		brelse(bp);
473 	}
474 	/*
475 	 * We no longer know anything about clusters per cylinder group.
476 	 */
477 	if (fs->fs_contigsumsize > 0) {
478 		lp = fs->fs_maxcluster;
479 		for (i = 0; i < fs->fs_ncg; i++)
480 			*lp++ = fs->fs_contigsumsize;
481 	}
482 
483 loop:
484 	mtx_lock(&mntvnode_mtx);
485 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
486 		if (vp->v_mount != mp) {
487 			mtx_unlock(&mntvnode_mtx);
488 			goto loop;
489 		}
490 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
491 		mtx_unlock(&mntvnode_mtx);
492 		/*
493 		 * Step 4: invalidate all inactive vnodes.
494 		 */
495 		if (vrecycle(vp, NULL, td))
496 			goto loop;
497 		/*
498 		 * Step 5: invalidate all cached file data.
499 		 */
500 		mtx_lock(&vp->v_interlock);
501 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
502 			goto loop;
503 		}
504 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
505 			panic("ffs_reload: dirty2");
506 		/*
507 		 * Step 6: re-read inode data for all active vnodes.
508 		 */
509 		ip = VTOI(vp);
510 		error =
511 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
512 		    (int)fs->fs_bsize, NOCRED, &bp);
513 		if (error) {
514 			vput(vp);
515 			return (error);
516 		}
517 		ffs_load_inode(bp, ip, NULL, fs, ip->i_number);
518 		ip->i_effnlink = ip->i_nlink;
519 		brelse(bp);
520 		vput(vp);
521 		mtx_lock(&mntvnode_mtx);
522 	}
523 	mtx_unlock(&mntvnode_mtx);
524 	return (0);
525 }
526 
527 /*
528  * Possible superblock locations ordered from most to least likely.
529  */
530 static int sblock_try[] = SBLOCKSEARCH;
531 
532 /*
533  * Common code for mount and mountroot
534  */
535 int
536 ffs_mountfs(devvp, mp, td, malloctype)
537 	struct vnode *devvp;
538 	struct mount *mp;
539 	struct thread *td;
540 	struct malloc_type *malloctype;
541 {
542 	struct ufsmount *ump;
543 	struct buf *bp;
544 	struct fs *fs;
545 	dev_t dev;
546 	void *space;
547 	ufs2_daddr_t sblockloc;
548 	int error, i, blks, size, ronly;
549 	int32_t *lp;
550 	struct ucred *cred;
551 	size_t strsize;
552 	int ncount;
553 	u_int sectorsize;
554 
555 	dev = devvp->v_rdev;
556 	cred = td ? td->td_ucred : NOCRED;
557 	/*
558 	 * Disallow multiple mounts of the same device.
559 	 * Disallow mounting of a device that is currently in use
560 	 * (except for root, which might share swap device for miniroot).
561 	 * Flush out any old buffers remaining from a previous use.
562 	 */
563 	error = vfs_mountedon(devvp);
564 	if (error)
565 		return (error);
566 	ncount = vcount(devvp);
567 
568 	if (ncount > 1 && devvp != rootvp)
569 		return (EBUSY);
570 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
571 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
572 	VOP_UNLOCK(devvp, 0, td);
573 	if (error)
574 		return (error);
575 
576 	/*
577 	 * Only VMIO the backing device if the backing device is a real
578 	 * block device.
579 	 * Note that it is optional that the backing device be VMIOed.  This
580 	 * increases the opportunity for metadata caching.
581 	 */
582 	if (vn_isdisk(devvp, NULL)) {
583 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
584 		vfs_object_create(devvp, td, cred);
585 		mtx_lock(&devvp->v_interlock);
586 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
587 	}
588 
589 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
590 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
591 	/*
592 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
593 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
594 	 * XXX: start to avoid getting trashed later on.
595 	 */
596 #ifdef notyet
597 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
598 #else
599 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
600 #endif
601 	VOP_UNLOCK(devvp, 0, td);
602 	if (error)
603 		return (error);
604 	if (devvp->v_rdev->si_iosize_max != 0)
605 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
606 	if (mp->mnt_iosize_max > MAXPHYS)
607 		mp->mnt_iosize_max = MAXPHYS;
608 
609 	if (VOP_IOCTL(devvp, DIOCGSECTORSIZE, (caddr_t)&sectorsize,
610 	    FREAD, cred, td) != 0)
611 		size = DEV_BSIZE;
612 	else
613 		size = sectorsize;
614 
615 	bp = NULL;
616 	ump = NULL;
617 	fs = NULL;
618 	sblockloc = 0;
619 	/*
620 	 * Try reading the superblock in each of its possible locations.
621 	 */
622 	for (i = 0; sblock_try[i] != -1; i++) {
623 		if ((error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE,
624 		    cred, &bp)) != 0)
625 			goto out;
626 		fs = (struct fs *)bp->b_data;
627 		sblockloc = numfrags(fs, sblock_try[i]);
628 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
629 		     (fs->fs_magic == FS_UFS2_MAGIC &&
630 		      fs->fs_sblockloc == sblockloc)) &&
631 		    fs->fs_bsize <= MAXBSIZE &&
632 		    fs->fs_bsize >= sizeof(struct fs))
633 			break;
634 		brelse(bp);
635 		bp = NULL;
636 	}
637 	if (sblock_try[i] == -1) {
638 		error = EINVAL;		/* XXX needs translation */
639 		goto out;
640 	}
641 	fs->fs_fmod = 0;
642 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
643 	fs->fs_flags &= ~FS_UNCLEAN;
644 	if (fs->fs_clean == 0) {
645 		fs->fs_flags |= FS_UNCLEAN;
646 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
647 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
648 		     (fs->fs_flags & FS_DOSOFTDEP))) {
649 			printf(
650 "WARNING: %s was not properly dismounted\n",
651 			    fs->fs_fsmnt);
652 		} else {
653 			printf(
654 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
655 			    fs->fs_fsmnt);
656 			error = EPERM;
657 			goto out;
658 		}
659 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
660 		    (mp->mnt_flag & MNT_FORCE)) {
661 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
662 			    (intmax_t)fs->fs_pendingblocks,
663 			    fs->fs_pendinginodes);
664 			fs->fs_pendingblocks = 0;
665 			fs->fs_pendinginodes = 0;
666 		}
667 	}
668 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
669 		printf("%s: mount pending error: blocks %jd files %d\n",
670 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
671 		    fs->fs_pendinginodes);
672 		fs->fs_pendingblocks = 0;
673 		fs->fs_pendinginodes = 0;
674 	}
675 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
676 	ump->um_malloctype = malloctype;
677 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
678 	    M_WAITOK);
679 	if (fs->fs_magic == FS_UFS1_MAGIC) {
680 		ump->um_fstype = UFS1;
681 		ump->um_balloc = ffs_balloc_ufs1;
682 	} else {
683 		ump->um_fstype = UFS2;
684 		ump->um_balloc = ffs_balloc_ufs2;
685 	}
686 	ump->um_blkatoff = ffs_blkatoff;
687 	ump->um_truncate = ffs_truncate;
688 	ump->um_update = ffs_update;
689 	ump->um_valloc = ffs_valloc;
690 	ump->um_vfree = ffs_vfree;
691 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
692 	if (fs->fs_sbsize < SBLOCKSIZE)
693 		bp->b_flags |= B_INVAL | B_NOCACHE;
694 	brelse(bp);
695 	bp = NULL;
696 	fs = ump->um_fs;
697 	ffs_oldfscompat_read(fs, ump, sblockloc);
698 	fs->fs_ronly = ronly;
699 	size = fs->fs_cssize;
700 	blks = howmany(size, fs->fs_fsize);
701 	if (fs->fs_contigsumsize > 0)
702 		size += fs->fs_ncg * sizeof(int32_t);
703 	size += fs->fs_ncg * sizeof(u_int8_t);
704 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
705 	fs->fs_csp = space;
706 	for (i = 0; i < blks; i += fs->fs_frag) {
707 		size = fs->fs_bsize;
708 		if (i + fs->fs_frag > blks)
709 			size = (blks - i) * fs->fs_fsize;
710 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
711 		    cred, &bp)) != 0) {
712 			free(fs->fs_csp, M_UFSMNT);
713 			goto out;
714 		}
715 		bcopy(bp->b_data, space, (u_int)size);
716 		space = (char *)space + size;
717 		brelse(bp);
718 		bp = NULL;
719 	}
720 	if (fs->fs_contigsumsize > 0) {
721 		fs->fs_maxcluster = lp = space;
722 		for (i = 0; i < fs->fs_ncg; i++)
723 			*lp++ = fs->fs_contigsumsize;
724 		space = lp;
725 	}
726 	size = fs->fs_ncg * sizeof(u_int8_t);
727 	fs->fs_contigdirs = (u_int8_t *)space;
728 	bzero(fs->fs_contigdirs, size);
729 	fs->fs_active = NULL;
730 	mp->mnt_data = (qaddr_t)ump;
731 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
732 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
733 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
734 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
735 		vfs_getnewfsid(mp);
736 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
737 	mp->mnt_flag |= MNT_LOCAL;
738 	ump->um_mountp = mp;
739 	ump->um_dev = dev;
740 	ump->um_devvp = devvp;
741 	ump->um_nindir = fs->fs_nindir;
742 	ump->um_bptrtodb = fs->fs_fsbtodb;
743 	ump->um_seqinc = fs->fs_frag;
744 	for (i = 0; i < MAXQUOTAS; i++)
745 		ump->um_quotas[i] = NULLVP;
746 #ifdef UFS_EXTATTR
747 	ufs_extattr_uepm_init(&ump->um_extattr);
748 #endif
749 	devvp->v_rdev->si_mountpoint = mp;
750 
751 	/*
752 	 * Set FS local "last mounted on" information (NULL pad)
753 	 */
754 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
755 			fs->fs_fsmnt,			/* copy area*/
756 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
757 			&strsize);			/* real size*/
758 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
759 
760 	if( mp->mnt_flag & MNT_ROOTFS) {
761 		/*
762 		 * Root mount; update timestamp in mount structure.
763 		 * this will be used by the common root mount code
764 		 * to update the system clock.
765 		 */
766 		mp->mnt_time = fs->fs_time;
767 	}
768 
769 	if (ronly == 0) {
770 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
771 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
772 			free(fs->fs_csp, M_UFSMNT);
773 			goto out;
774 		}
775 		if (fs->fs_snapinum[0] != 0)
776 			ffs_snapshot_mount(mp);
777 		fs->fs_fmod = 1;
778 		fs->fs_clean = 0;
779 		(void) ffs_sbupdate(ump, MNT_WAIT);
780 	}
781 #ifdef UFS_EXTATTR
782 #ifdef UFS_EXTATTR_AUTOSTART
783 	/*
784 	 *
785 	 * Auto-starting does the following:
786 	 *	- check for /.attribute in the fs, and extattr_start if so
787 	 *	- for each file in .attribute, enable that file with
788 	 * 	  an attribute of the same name.
789 	 * Not clear how to report errors -- probably eat them.
790 	 * This would all happen while the filesystem was busy/not
791 	 * available, so would effectively be "atomic".
792 	 */
793 	(void) ufs_extattr_autostart(mp, td);
794 #endif /* !UFS_EXTATTR_AUTOSTART */
795 #endif /* !UFS_EXTATTR */
796 	return (0);
797 out:
798 	devvp->v_rdev->si_mountpoint = NULL;
799 	if (bp)
800 		brelse(bp);
801 	/* XXX: see comment above VOP_OPEN */
802 #ifdef notyet
803 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
804 #else
805 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
806 #endif
807 	if (ump) {
808 		free(ump->um_fs, M_UFSMNT);
809 		free(ump, M_UFSMNT);
810 		mp->mnt_data = (qaddr_t)0;
811 	}
812 	return (error);
813 }
814 
815 #include <sys/sysctl.h>
816 int bigcgs = 0;
817 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
818 
819 /*
820  * Sanity checks for loading old filesystem superblocks.
821  * See ffs_oldfscompat_write below for unwound actions.
822  *
823  * XXX - Parts get retired eventually.
824  * Unfortunately new bits get added.
825  */
826 static void
827 ffs_oldfscompat_read(fs, ump, sblockloc)
828 	struct fs *fs;
829 	struct ufsmount *ump;
830 	ufs2_daddr_t sblockloc;
831 {
832 	off_t maxfilesize;
833 
834 	/*
835 	 * If not yet done, update UFS1 superblock with new wider fields.
836 	 */
837 	if (fs->fs_magic == FS_UFS1_MAGIC &&
838 	    fs->fs_sblockloc != sblockloc) {
839 		fs->fs_maxbsize = fs->fs_bsize;
840 		fs->fs_sblockloc = sblockloc;
841 		fs->fs_time = fs->fs_old_time;
842 		fs->fs_size = fs->fs_old_size;
843 		fs->fs_dsize = fs->fs_old_dsize;
844 		fs->fs_csaddr = fs->fs_old_csaddr;
845 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
846 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
847 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
848 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
849 	}
850 	if (fs->fs_magic == FS_UFS1_MAGIC &&
851 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
852 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
853 		fs->fs_qbmask = ~fs->fs_bmask;
854 		fs->fs_qfmask = ~fs->fs_fmask;
855 	}
856 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;
857 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
858 	if (fs->fs_maxfilesize > maxfilesize)
859 		fs->fs_maxfilesize = maxfilesize;
860 	/* Compatibility for old filesystems */
861 	if (fs->fs_avgfilesize <= 0)
862 		fs->fs_avgfilesize = AVFILESIZ;
863 	if (fs->fs_avgfpdir <= 0)
864 		fs->fs_avgfpdir = AFPDIR;
865 	if (bigcgs) {
866 		fs->fs_save_cgsize = fs->fs_cgsize;
867 		fs->fs_cgsize = fs->fs_bsize;
868 	}
869 }
870 
871 /*
872  * Unwinding superblock updates for old filesystems.
873  * See ffs_oldfscompat_read above for details.
874  *
875  * XXX - Parts get retired eventually.
876  * Unfortunately new bits get added.
877  */
878 static void
879 ffs_oldfscompat_write(fs, ump)
880 	struct fs *fs;
881 	struct ufsmount *ump;
882 {
883 
884 	/*
885 	 * Copy back UFS2 updated fields that UFS1 inspects.
886 	 */
887 	if (fs->fs_magic == FS_UFS1_MAGIC) {
888 		fs->fs_old_time = fs->fs_time;
889 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
890 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
891 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
892 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
893 	}
894 	fs->fs_maxfilesize = ump->um_savedmaxfilesize;
895 	if (bigcgs) {
896 		fs->fs_cgsize = fs->fs_save_cgsize;
897 		fs->fs_save_cgsize = 0;
898 	}
899 }
900 
901 /*
902  * unmount system call
903  */
904 int
905 ffs_unmount(mp, mntflags, td)
906 	struct mount *mp;
907 	int mntflags;
908 	struct thread *td;
909 {
910 	struct ufsmount *ump = VFSTOUFS(mp);
911 	struct fs *fs;
912 	int error, flags;
913 
914 	flags = 0;
915 	if (mntflags & MNT_FORCE) {
916 		flags |= FORCECLOSE;
917 	}
918 #ifdef UFS_EXTATTR
919 	if ((error = ufs_extattr_stop(mp, td))) {
920 		if (error != EOPNOTSUPP)
921 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
922 			    error);
923 	} else {
924 		ufs_extattr_uepm_destroy(&ump->um_extattr);
925 	}
926 #endif
927 	if (mp->mnt_flag & MNT_SOFTDEP) {
928 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
929 			return (error);
930 	} else {
931 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
932 			return (error);
933 	}
934 	fs = ump->um_fs;
935 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
936 		printf("%s: unmount pending error: blocks %jd files %d\n",
937 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
938 		    fs->fs_pendinginodes);
939 		fs->fs_pendingblocks = 0;
940 		fs->fs_pendinginodes = 0;
941 	}
942 	if (fs->fs_ronly == 0) {
943 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
944 		error = ffs_sbupdate(ump, MNT_WAIT);
945 		if (error) {
946 			fs->fs_clean = 0;
947 			return (error);
948 		}
949 	}
950 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
951 
952 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
953 	/* XXX: see comment above VOP_OPEN */
954 #ifdef notyet
955 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
956 		NOCRED, td);
957 #else
958 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
959 #endif
960 
961 	vrele(ump->um_devvp);
962 
963 	free(fs->fs_csp, M_UFSMNT);
964 	free(fs, M_UFSMNT);
965 	free(ump, M_UFSMNT);
966 	mp->mnt_data = (qaddr_t)0;
967 	mp->mnt_flag &= ~MNT_LOCAL;
968 	return (error);
969 }
970 
971 /*
972  * Flush out all the files in a filesystem.
973  */
974 int
975 ffs_flushfiles(mp, flags, td)
976 	struct mount *mp;
977 	int flags;
978 	struct thread *td;
979 {
980 	struct ufsmount *ump;
981 	int error;
982 
983 	ump = VFSTOUFS(mp);
984 #ifdef QUOTA
985 	if (mp->mnt_flag & MNT_QUOTA) {
986 		int i;
987 		error = vflush(mp, 0, SKIPSYSTEM|flags);
988 		if (error)
989 			return (error);
990 		for (i = 0; i < MAXQUOTAS; i++) {
991 			if (ump->um_quotas[i] == NULLVP)
992 				continue;
993 			quotaoff(td, mp, i);
994 		}
995 		/*
996 		 * Here we fall through to vflush again to ensure
997 		 * that we have gotten rid of all the system vnodes.
998 		 */
999 	}
1000 #endif
1001 	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
1002 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1003 			return (error);
1004 		ffs_snapshot_unmount(mp);
1005 		/*
1006 		 * Here we fall through to vflush again to ensure
1007 		 * that we have gotten rid of all the system vnodes.
1008 		 */
1009 	}
1010         /*
1011 	 * Flush all the files.
1012 	 */
1013 	if ((error = vflush(mp, 0, flags)) != 0)
1014 		return (error);
1015 	/*
1016 	 * Flush filesystem metadata.
1017 	 */
1018 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1019 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1020 	VOP_UNLOCK(ump->um_devvp, 0, td);
1021 	return (error);
1022 }
1023 
1024 /*
1025  * Get filesystem statistics.
1026  */
1027 int
1028 ffs_statfs(mp, sbp, td)
1029 	struct mount *mp;
1030 	struct statfs *sbp;
1031 	struct thread *td;
1032 {
1033 	struct ufsmount *ump;
1034 	struct fs *fs;
1035 
1036 	ump = VFSTOUFS(mp);
1037 	fs = ump->um_fs;
1038 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1039 		panic("ffs_statfs");
1040 	sbp->f_bsize = fs->fs_fsize;
1041 	sbp->f_iosize = fs->fs_bsize;
1042 	sbp->f_blocks = fs->fs_dsize;
1043 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1044 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1045 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1046 	    dbtofsb(fs, fs->fs_pendingblocks);
1047 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1048 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1049 	if (sbp != &mp->mnt_stat) {
1050 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1051 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1052 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1053 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1054 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1055 	}
1056 	return (0);
1057 }
1058 
1059 /*
1060  * Go through the disk queues to initiate sandbagged IO;
1061  * go through the inodes to write those that have been modified;
1062  * initiate the writing of the super block if it has been modified.
1063  *
1064  * Note: we are always called with the filesystem marked `MPBUSY'.
1065  */
1066 int
1067 ffs_sync(mp, waitfor, cred, td)
1068 	struct mount *mp;
1069 	int waitfor;
1070 	struct ucred *cred;
1071 	struct thread *td;
1072 {
1073 	struct vnode *nvp, *vp, *devvp;
1074 	struct inode *ip;
1075 	struct ufsmount *ump = VFSTOUFS(mp);
1076 	struct fs *fs;
1077 	int error, count, wait, lockreq, allerror = 0;
1078 
1079 	fs = ump->um_fs;
1080 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1081 		printf("fs = %s\n", fs->fs_fsmnt);
1082 		panic("ffs_sync: rofs mod");
1083 	}
1084 	/*
1085 	 * Write back each (modified) inode.
1086 	 */
1087 	wait = 0;
1088 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1089 	if (waitfor == MNT_WAIT) {
1090 		wait = 1;
1091 		lockreq = LK_EXCLUSIVE;
1092 	}
1093 	mtx_lock(&mntvnode_mtx);
1094 loop:
1095 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1096 		/*
1097 		 * If the vnode that we are about to sync is no longer
1098 		 * associated with this mount point, start over.
1099 		 */
1100 		if (vp->v_mount != mp)
1101 			goto loop;
1102 
1103 		/*
1104 		 * Depend on the mntvnode_slock to keep things stable enough
1105 		 * for a quick test.  Since there might be hundreds of
1106 		 * thousands of vnodes, we cannot afford even a subroutine
1107 		 * call unless there's a good chance that we have work to do.
1108 		 */
1109 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1110 		ip = VTOI(vp);
1111 		if (vp->v_type == VNON || ((ip->i_flag &
1112 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1113 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1114 			continue;
1115 		}
1116 		if (vp->v_type != VCHR) {
1117 			mtx_unlock(&mntvnode_mtx);
1118 			if ((error = vget(vp, lockreq, td)) != 0) {
1119 				mtx_lock(&mntvnode_mtx);
1120 				if (error == ENOENT)
1121 					goto loop;
1122 			} else {
1123 				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1124 					allerror = error;
1125 				VOP_UNLOCK(vp, 0, td);
1126 				vrele(vp);
1127 				mtx_lock(&mntvnode_mtx);
1128 			}
1129 		} else {
1130 			mtx_unlock(&mntvnode_mtx);
1131 			UFS_UPDATE(vp, wait);
1132 			mtx_lock(&mntvnode_mtx);
1133 		}
1134 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1135 			goto loop;
1136 	}
1137 	mtx_unlock(&mntvnode_mtx);
1138 	/*
1139 	 * Force stale filesystem control information to be flushed.
1140 	 */
1141 	if (waitfor == MNT_WAIT) {
1142 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1143 			allerror = error;
1144 		/* Flushed work items may create new vnodes to clean */
1145 		if (count) {
1146 			mtx_lock(&mntvnode_mtx);
1147 			goto loop;
1148 		}
1149 	}
1150 #ifdef QUOTA
1151 	qsync(mp);
1152 #endif
1153 	devvp = ump->um_devvp;
1154 	mtx_lock(&devvp->v_interlock);
1155 	if (waitfor != MNT_LAZY &&
1156 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1157 		mtx_unlock(&devvp->v_interlock);
1158 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
1159 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1160 			allerror = error;
1161 		VOP_UNLOCK(devvp, 0, td);
1162 		if (waitfor == MNT_WAIT) {
1163 			mtx_lock(&mntvnode_mtx);
1164 			goto loop;
1165 		}
1166 	} else
1167 		mtx_unlock(&devvp->v_interlock);
1168 	/*
1169 	 * Write back modified superblock.
1170 	 */
1171 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1172 		allerror = error;
1173 	return (allerror);
1174 }
1175 
1176 int
1177 ffs_vget(mp, ino, flags, vpp)
1178 	struct mount *mp;
1179 	ino_t ino;
1180 	int flags;
1181 	struct vnode **vpp;
1182 {
1183 	struct thread *td = curthread; 		/* XXX */
1184 	struct fs *fs;
1185 	struct inode *ip;
1186 	struct ufsmount *ump;
1187 	struct buf *bp;
1188 	struct vnode *vp;
1189 	dev_t dev;
1190 	int error;
1191 
1192 	ump = VFSTOUFS(mp);
1193 	dev = ump->um_dev;
1194 
1195 	/*
1196 	 * We do not lock vnode creation as it is believed to be too
1197 	 * expensive for such rare case as simultaneous creation of vnode
1198 	 * for same ino by different processes. We just allow them to race
1199 	 * and check later to decide who wins. Let the race begin!
1200 	 */
1201 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1202 		return (error);
1203 	if (*vpp != NULL)
1204 		return (0);
1205 
1206 	/*
1207 	 * If this MALLOC() is performed after the getnewvnode()
1208 	 * it might block, leaving a vnode with a NULL v_data to be
1209 	 * found by ffs_sync() if a sync happens to fire right then,
1210 	 * which will cause a panic because ffs_sync() blindly
1211 	 * dereferences vp->v_data (as well it should).
1212 	 */
1213 	MALLOC(ip, struct inode *, sizeof(struct inode),
1214 	    ump->um_malloctype, M_WAITOK);
1215 
1216 	/* Allocate a new vnode/inode. */
1217 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1218 	if (error) {
1219 		*vpp = NULL;
1220 		FREE(ip, ump->um_malloctype);
1221 		return (error);
1222 	}
1223 	bzero((caddr_t)ip, sizeof(struct inode));
1224 	/*
1225 	 * FFS supports lock sharing in the stack of vnodes
1226 	 */
1227 	vp->v_vnlock = &vp->v_lock;
1228 	lockinit(vp->v_vnlock, PINOD, "inode", VLKTIMEOUT, LK_CANRECURSE);
1229 	vp->v_data = ip;
1230 	ip->i_vnode = vp;
1231 	ip->i_ump = ump;
1232 	ip->i_fs = fs = ump->um_fs;
1233 	ip->i_dev = dev;
1234 	ip->i_number = ino;
1235 #ifdef QUOTA
1236 	{
1237 		int i;
1238 		for (i = 0; i < MAXQUOTAS; i++)
1239 			ip->i_dquot[i] = NODQUOT;
1240 	}
1241 #endif
1242 	/*
1243 	 * Exclusively lock the vnode before adding to hash. Note, that we
1244 	 * must not release nor downgrade the lock (despite flags argument
1245 	 * says) till it is fully initialized.
1246 	 */
1247 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1248 
1249 	/*
1250 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1251 	 * duplicate of vnode being created and add it to the hash. If a
1252 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1253 	 */
1254 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1255 		vput(vp);
1256 		*vpp = NULL;
1257 		return (error);
1258 	}
1259 
1260 	/* We lost the race, then throw away our vnode and return existing */
1261 	if (*vpp != NULL) {
1262 		vput(vp);
1263 		return (0);
1264 	}
1265 
1266 	/* Read in the disk contents for the inode, copy into the inode. */
1267 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1268 	    (int)fs->fs_bsize, NOCRED, &bp);
1269 	if (error) {
1270 		/*
1271 		 * The inode does not contain anything useful, so it would
1272 		 * be misleading to leave it on its hash chain. With mode
1273 		 * still zero, it will be unlinked and returned to the free
1274 		 * list by vput().
1275 		 */
1276 		brelse(bp);
1277 		vput(vp);
1278 		*vpp = NULL;
1279 		return (error);
1280 	}
1281 	ffs_load_inode(bp, ip, ump->um_malloctype, fs, ino);
1282 	if (DOINGSOFTDEP(vp))
1283 		softdep_load_inodeblock(ip);
1284 	else
1285 		ip->i_effnlink = ip->i_nlink;
1286 	bqrelse(bp);
1287 
1288 	/*
1289 	 * Initialize the vnode from the inode, check for aliases.
1290 	 * Note that the underlying vnode may have changed.
1291 	 */
1292 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1293 	if (error) {
1294 		vput(vp);
1295 		*vpp = NULL;
1296 		return (error);
1297 	}
1298 	/*
1299 	 * Finish inode initialization now that aliasing has been resolved.
1300 	 */
1301 	ip->i_devvp = ump->um_devvp;
1302 	VREF(ip->i_devvp);
1303 	/*
1304 	 * Set up a generation number for this inode if it does not
1305 	 * already have one. This should only happen on old filesystems.
1306 	 */
1307 	if (ip->i_gen == 0) {
1308 		ip->i_gen = random() / 2 + 1;
1309 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1310 			ip->i_flag |= IN_MODIFIED;
1311 			DIP(ip, i_gen) = ip->i_gen;
1312 		}
1313 	}
1314 	/*
1315 	 * Ensure that uid and gid are correct. This is a temporary
1316 	 * fix until fsck has been changed to do the update.
1317 	 */
1318 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1319 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1320 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1321 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1322 	}						/* XXX */
1323 
1324 	*vpp = vp;
1325 	return (0);
1326 }
1327 
1328 /*
1329  * File handle to vnode
1330  *
1331  * Have to be really careful about stale file handles:
1332  * - check that the inode number is valid
1333  * - call ffs_vget() to get the locked inode
1334  * - check for an unallocated inode (i_mode == 0)
1335  * - check that the given client host has export rights and return
1336  *   those rights via. exflagsp and credanonp
1337  */
1338 int
1339 ffs_fhtovp(mp, fhp, vpp)
1340 	struct mount *mp;
1341 	struct fid *fhp;
1342 	struct vnode **vpp;
1343 {
1344 	struct ufid *ufhp;
1345 	struct fs *fs;
1346 
1347 	ufhp = (struct ufid *)fhp;
1348 	fs = VFSTOUFS(mp)->um_fs;
1349 	if (ufhp->ufid_ino < ROOTINO ||
1350 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1351 		return (ESTALE);
1352 	return (ufs_fhtovp(mp, ufhp, vpp));
1353 }
1354 
1355 /*
1356  * Vnode pointer to File handle
1357  */
1358 /* ARGSUSED */
1359 int
1360 ffs_vptofh(vp, fhp)
1361 	struct vnode *vp;
1362 	struct fid *fhp;
1363 {
1364 	struct inode *ip;
1365 	struct ufid *ufhp;
1366 
1367 	ip = VTOI(vp);
1368 	ufhp = (struct ufid *)fhp;
1369 	ufhp->ufid_len = sizeof(struct ufid);
1370 	ufhp->ufid_ino = ip->i_number;
1371 	ufhp->ufid_gen = ip->i_gen;
1372 	return (0);
1373 }
1374 
1375 /*
1376  * Initialize the filesystem; just use ufs_init.
1377  */
1378 static int
1379 ffs_init(vfsp)
1380 	struct vfsconf *vfsp;
1381 {
1382 
1383 	softdep_initialize();
1384 	return (ufs_init(vfsp));
1385 }
1386 
1387 /*
1388  * Write a superblock and associated information back to disk.
1389  */
1390 static int
1391 ffs_sbupdate(mp, waitfor)
1392 	struct ufsmount *mp;
1393 	int waitfor;
1394 {
1395 	struct fs *fs = mp->um_fs;
1396 	struct buf *bp;
1397 	int blks;
1398 	void *space;
1399 	int i, size, error, allerror = 0;
1400 
1401 	/*
1402 	 * First write back the summary information.
1403 	 */
1404 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1405 	space = fs->fs_csp;
1406 	for (i = 0; i < blks; i += fs->fs_frag) {
1407 		size = fs->fs_bsize;
1408 		if (i + fs->fs_frag > blks)
1409 			size = (blks - i) * fs->fs_fsize;
1410 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1411 		    size, 0, 0);
1412 		bcopy(space, bp->b_data, (u_int)size);
1413 		space = (char *)space + size;
1414 		if (waitfor != MNT_WAIT)
1415 			bawrite(bp);
1416 		else if ((error = bwrite(bp)) != 0)
1417 			allerror = error;
1418 	}
1419 	/*
1420 	 * Now write back the superblock itself. If any errors occurred
1421 	 * up to this point, then fail so that the superblock avoids
1422 	 * being written out as clean.
1423 	 */
1424 	if (allerror)
1425 		return (allerror);
1426 	bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_sblockloc),
1427 	    (int)fs->fs_sbsize, 0, 0);
1428 	fs->fs_fmod = 0;
1429 	fs->fs_time = time_second;
1430 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1431 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1432 	if (waitfor != MNT_WAIT)
1433 		bawrite(bp);
1434 	else if ((error = bwrite(bp)) != 0)
1435 		allerror = error;
1436 	return (allerror);
1437 }
1438