xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision a3e8fd0b7f663db7eafff527d5c3ca3bcfa8a537)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_mac.h"
38 #include "opt_quota.h"
39 #include "opt_ufs.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/stdint.h>
44 #include <sys/namei.h>
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/fcntl.h>
53 #include <sys/disk.h>
54 #include <sys/malloc.h>
55 #include <sys/mutex.h>
56 
57 #include <ufs/ufs/extattr.h>
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 
63 #include <ufs/ffs/fs.h>
64 #include <ufs/ffs/ffs_extern.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_page.h>
68 
69 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
70 
71 static int	ffs_sbupdate(struct ufsmount *, int);
72        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
73 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
74 		    ufs2_daddr_t);
75 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
76 static vfs_init_t ffs_init;
77 static vfs_uninit_t ffs_uninit;
78 static vfs_extattrctl_t ffs_extattrctl;
79 
80 static struct vfsops ufs_vfsops = {
81 	ffs_mount,
82 	ufs_start,
83 	ffs_unmount,
84 	ufs_root,
85 	ufs_quotactl,
86 	ffs_statfs,
87 	ffs_sync,
88 	ffs_vget,
89 	ffs_fhtovp,
90 	vfs_stdcheckexp,
91 	ffs_vptofh,
92 	ffs_init,
93 	ffs_uninit,
94 	ffs_extattrctl,
95 };
96 
97 VFS_SET(ufs_vfsops, ufs, 0);
98 
99 /*
100  * ffs_mount
101  *
102  * Called when mounting local physical media
103  *
104  * PARAMETERS:
105  *		mountroot
106  *			mp	mount point structure
107  *			path	NULL (flag for root mount!!!)
108  *			data	<unused>
109  *			ndp	<unused>
110  *			p	process (user credentials check [statfs])
111  *
112  *		mount
113  *			mp	mount point structure
114  *			path	path to mount point
115  *			data	pointer to argument struct in user space
116  *			ndp	mount point namei() return (used for
117  *				credentials on reload), reused to look
118  *				up block device.
119  *			p	process (user credentials check)
120  *
121  * RETURNS:	0	Success
122  *		!0	error number (errno.h)
123  *
124  * LOCK STATE:
125  *
126  *		ENTRY
127  *			mount point is locked
128  *		EXIT
129  *			mount point is locked
130  *
131  * NOTES:
132  *		A NULL path can be used for a flag since the mount
133  *		system call will fail with EFAULT in copyinstr in
134  *		namei() if it is a genuine NULL from the user.
135  */
136 int
137 ffs_mount(mp, path, data, ndp, td)
138         struct mount		*mp;	/* mount struct pointer*/
139         char			*path;	/* path to mount point*/
140         caddr_t			data;	/* arguments to FS specific mount*/
141         struct nameidata	*ndp;	/* mount point credentials*/
142         struct thread		*td;	/* process requesting mount*/
143 {
144 	size_t size;
145 	struct vnode *devvp;
146 	struct ufs_args args;
147 	struct ufsmount *ump = 0;
148 	struct fs *fs;
149 	int error, flags;
150 	mode_t accessmode;
151 
152 	/*
153 	 * Use NULL path to indicate we are mounting the root filesystem.
154 	 */
155 	if (path == NULL) {
156 		if ((error = bdevvp(rootdev, &rootvp))) {
157 			printf("ffs_mountroot: can't find rootvp\n");
158 			return (error);
159 		}
160 
161 		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
162 			return (error);
163 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
164 		return (0);
165 	}
166 
167 	/*
168 	 * Mounting non-root filesystem or updating a filesystem
169 	 */
170 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
171 		return (error);
172 
173 	/*
174 	 * If updating, check whether changing from read-only to
175 	 * read/write; if there is no device name, that's all we do.
176 	 */
177 	if (mp->mnt_flag & MNT_UPDATE) {
178 		ump = VFSTOUFS(mp);
179 		fs = ump->um_fs;
180 		devvp = ump->um_devvp;
181 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
182 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
183 				return (error);
184 			/*
185 			 * Flush any dirty data.
186 			 */
187 			if ((error = VFS_SYNC(mp, MNT_WAIT,
188 			    td->td_proc->p_ucred, td)) != 0) {
189 				vn_finished_write(mp);
190 				return (error);
191 			}
192 			/*
193 			 * Check for and optionally get rid of files open
194 			 * for writing.
195 			 */
196 			flags = WRITECLOSE;
197 			if (mp->mnt_flag & MNT_FORCE)
198 				flags |= FORCECLOSE;
199 			if (mp->mnt_flag & MNT_SOFTDEP) {
200 				error = softdep_flushfiles(mp, flags, td);
201 			} else {
202 				error = ffs_flushfiles(mp, flags, td);
203 			}
204 			if (error) {
205 				vn_finished_write(mp);
206 				return (error);
207 			}
208 			if (fs->fs_pendingblocks != 0 ||
209 			    fs->fs_pendinginodes != 0) {
210 				printf("%s: %s: blocks %jd files %d\n",
211 				    fs->fs_fsmnt, "update error",
212 				    (intmax_t)fs->fs_pendingblocks,
213 				    fs->fs_pendinginodes);
214 				fs->fs_pendingblocks = 0;
215 				fs->fs_pendinginodes = 0;
216 			}
217 			fs->fs_ronly = 1;
218 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
219 				fs->fs_clean = 1;
220 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
221 				fs->fs_ronly = 0;
222 				fs->fs_clean = 0;
223 				vn_finished_write(mp);
224 				return (error);
225 			}
226 			vn_finished_write(mp);
227 		}
228 		if ((mp->mnt_flag & MNT_RELOAD) &&
229 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
230 			return (error);
231 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
232 			/*
233 			 * If upgrade to read-write by non-root, then verify
234 			 * that user has necessary permissions on the device.
235 			 */
236 			if (suser(td)) {
237 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
238 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
239 				    td->td_ucred, td)) != 0) {
240 					VOP_UNLOCK(devvp, 0, td);
241 					return (error);
242 				}
243 				VOP_UNLOCK(devvp, 0, td);
244 			}
245 			fs->fs_flags &= ~FS_UNCLEAN;
246 			if (fs->fs_clean == 0) {
247 				fs->fs_flags |= FS_UNCLEAN;
248 				if ((mp->mnt_flag & MNT_FORCE) ||
249 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
250 				     (fs->fs_flags & FS_DOSOFTDEP))) {
251 					printf("WARNING: %s was not %s\n",
252 					   fs->fs_fsmnt, "properly dismounted");
253 				} else {
254 					printf(
255 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
256 					    fs->fs_fsmnt);
257 					return (EPERM);
258 				}
259 			}
260 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
261 				return (error);
262 			fs->fs_ronly = 0;
263 			fs->fs_clean = 0;
264 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
265 				vn_finished_write(mp);
266 				return (error);
267 			}
268 			/* check to see if we need to start softdep */
269 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
270 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
271 				vn_finished_write(mp);
272 				return (error);
273 			}
274 			if (fs->fs_snapinum[0] != 0)
275 				ffs_snapshot_mount(mp);
276 			vn_finished_write(mp);
277 		}
278 		/*
279 		 * Soft updates is incompatible with "async",
280 		 * so if we are doing softupdates stop the user
281 		 * from setting the async flag in an update.
282 		 * Softdep_mount() clears it in an initial mount
283 		 * or ro->rw remount.
284 		 */
285 		if (mp->mnt_flag & MNT_SOFTDEP)
286 			mp->mnt_flag &= ~MNT_ASYNC;
287 		/*
288 		 * If not updating name, process export requests.
289 		 */
290 		if (args.fspec == 0)
291 			return (vfs_export(mp, &args.export));
292 		/*
293 		 * If this is a snapshot request, take the snapshot.
294 		 */
295 		if (mp->mnt_flag & MNT_SNAPSHOT)
296 			return (ffs_snapshot(mp, args.fspec));
297 	}
298 
299 	/*
300 	 * Not an update, or updating the name: look up the name
301 	 * and verify that it refers to a sensible block device.
302 	 */
303 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
304 	if ((error = namei(ndp)) != 0)
305 		return (error);
306 	NDFREE(ndp, NDF_ONLY_PNBUF);
307 	devvp = ndp->ni_vp;
308 	if (!vn_isdisk(devvp, &error)) {
309 		vrele(devvp);
310 		return (error);
311 	}
312 
313 	/*
314 	 * If mount by non-root, then verify that user has necessary
315 	 * permissions on the device.
316 	 */
317 	if (suser(td)) {
318 		accessmode = VREAD;
319 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
320 			accessmode |= VWRITE;
321 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
322 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
323 			vput(devvp);
324 			return (error);
325 		}
326 		VOP_UNLOCK(devvp, 0, td);
327 	}
328 
329 	if (mp->mnt_flag & MNT_UPDATE) {
330 		/*
331 		 * Update only
332 		 *
333 		 * If it's not the same vnode, or at least the same device
334 		 * then it's not correct.
335 		 */
336 
337 		if (devvp != ump->um_devvp &&
338 		    devvp->v_rdev != ump->um_devvp->v_rdev)
339 			error = EINVAL;	/* needs translation */
340 		vrele(devvp);
341 		if (error)
342 			return (error);
343 	} else {
344 		/*
345 		 * New mount
346 		 *
347 		 * We need the name for the mount point (also used for
348 		 * "last mounted on") copied in. If an error occurs,
349 		 * the mount point is discarded by the upper level code.
350 		 * Note that vfs_mount() populates f_mntonname for us.
351 		 */
352 		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
353 			vrele(devvp);
354 			return (error);
355 		}
356 	}
357 	/*
358 	 * Save "mounted from" device name info for mount point (NULL pad).
359 	 */
360 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
361 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
362 	/*
363 	 * Initialize filesystem stat information in mount struct.
364 	 */
365 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
366 	return (0);
367 }
368 
369 /*
370  * Reload all incore data for a filesystem (used after running fsck on
371  * the root filesystem and finding things to fix). The filesystem must
372  * be mounted read-only.
373  *
374  * Things to do to update the mount:
375  *	1) invalidate all cached meta-data.
376  *	2) re-read superblock from disk.
377  *	3) re-read summary information from disk.
378  *	4) invalidate all inactive vnodes.
379  *	5) invalidate all cached file data.
380  *	6) re-read inode data for all active vnodes.
381  */
382 int
383 ffs_reload(mp, cred, td)
384 	struct mount *mp;
385 	struct ucred *cred;
386 	struct thread *td;
387 {
388 	struct vnode *vp, *nvp, *devvp;
389 	struct inode *ip;
390 	void *space;
391 	struct buf *bp;
392 	struct fs *fs, *newfs;
393 	dev_t dev;
394 	ufs2_daddr_t sblockloc;
395 	int i, blks, size, error;
396 	int32_t *lp;
397 
398 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
399 		return (EINVAL);
400 	/*
401 	 * Step 1: invalidate all cached meta-data.
402 	 */
403 	devvp = VFSTOUFS(mp)->um_devvp;
404 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
405 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
406 	VOP_UNLOCK(devvp, 0, td);
407 	if (error)
408 		panic("ffs_reload: dirty1");
409 
410 	dev = devvp->v_rdev;
411 
412 	/*
413 	 * Only VMIO the backing device if the backing device is a real
414 	 * block device.
415 	 */
416 	if (vn_isdisk(devvp, NULL)) {
417 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
418 		vfs_object_create(devvp, td, td->td_ucred);
419 		/* XXX Why lock only to release immediately?? */
420 		mtx_lock(&devvp->v_interlock);
421 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
422 	}
423 
424 	/*
425 	 * Step 2: re-read superblock from disk.
426 	 */
427 	fs = VFSTOUFS(mp)->um_fs;
428 	if ((error = bread(devvp, fsbtodb(fs, fs->fs_sblockloc), fs->fs_sbsize,
429 	    NOCRED, &bp)) != 0)
430 		return (error);
431 	newfs = (struct fs *)bp->b_data;
432 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
433 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
434 	    newfs->fs_bsize > MAXBSIZE ||
435 	    newfs->fs_bsize < sizeof(struct fs)) {
436 			brelse(bp);
437 			return (EIO);		/* XXX needs translation */
438 	}
439 	/*
440 	 * Copy pointer fields back into superblock before copying in	XXX
441 	 * new superblock. These should really be in the ufsmount.	XXX
442 	 * Note that important parameters (eg fs_ncg) are unchanged.
443 	 */
444 	newfs->fs_csp = fs->fs_csp;
445 	newfs->fs_maxcluster = fs->fs_maxcluster;
446 	newfs->fs_contigdirs = fs->fs_contigdirs;
447 	newfs->fs_active = fs->fs_active;
448 	sblockloc = fs->fs_sblockloc;
449 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
450 	brelse(bp);
451 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
452 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
453 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
454 		printf("%s: reload pending error: blocks %jd files %d\n",
455 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
456 		    fs->fs_pendinginodes);
457 		fs->fs_pendingblocks = 0;
458 		fs->fs_pendinginodes = 0;
459 	}
460 
461 	/*
462 	 * Step 3: re-read summary information from disk.
463 	 */
464 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
465 	space = fs->fs_csp;
466 	for (i = 0; i < blks; i += fs->fs_frag) {
467 		size = fs->fs_bsize;
468 		if (i + fs->fs_frag > blks)
469 			size = (blks - i) * fs->fs_fsize;
470 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
471 		    NOCRED, &bp);
472 		if (error)
473 			return (error);
474 		bcopy(bp->b_data, space, (u_int)size);
475 		space = (char *)space + size;
476 		brelse(bp);
477 	}
478 	/*
479 	 * We no longer know anything about clusters per cylinder group.
480 	 */
481 	if (fs->fs_contigsumsize > 0) {
482 		lp = fs->fs_maxcluster;
483 		for (i = 0; i < fs->fs_ncg; i++)
484 			*lp++ = fs->fs_contigsumsize;
485 	}
486 
487 loop:
488 	mtx_lock(&mntvnode_mtx);
489 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
490 		if (vp->v_mount != mp) {
491 			mtx_unlock(&mntvnode_mtx);
492 			goto loop;
493 		}
494 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
495 		mtx_unlock(&mntvnode_mtx);
496 		/*
497 		 * Step 4: invalidate all inactive vnodes.
498 		 */
499 		if (vrecycle(vp, NULL, td))
500 			goto loop;
501 		/*
502 		 * Step 5: invalidate all cached file data.
503 		 */
504 		/* XXX Why lock only to release immediately? */
505 		mtx_lock(&vp->v_interlock);
506 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
507 			goto loop;
508 		}
509 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
510 			panic("ffs_reload: dirty2");
511 		/*
512 		 * Step 6: re-read inode data for all active vnodes.
513 		 */
514 		ip = VTOI(vp);
515 		error =
516 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
517 		    (int)fs->fs_bsize, NOCRED, &bp);
518 		if (error) {
519 			vput(vp);
520 			return (error);
521 		}
522 		ffs_load_inode(bp, ip, NULL, fs, ip->i_number);
523 		ip->i_effnlink = ip->i_nlink;
524 		brelse(bp);
525 		vput(vp);
526 		mtx_lock(&mntvnode_mtx);
527 	}
528 	mtx_unlock(&mntvnode_mtx);
529 	return (0);
530 }
531 
532 /*
533  * Possible superblock locations ordered from most to least likely.
534  */
535 static int sblock_try[] = SBLOCKSEARCH;
536 
537 /*
538  * Common code for mount and mountroot
539  */
540 int
541 ffs_mountfs(devvp, mp, td, malloctype)
542 	struct vnode *devvp;
543 	struct mount *mp;
544 	struct thread *td;
545 	struct malloc_type *malloctype;
546 {
547 	struct ufsmount *ump;
548 	struct buf *bp;
549 	struct fs *fs;
550 	dev_t dev;
551 	void *space;
552 	ufs2_daddr_t sblockloc;
553 	int error, i, blks, size, ronly;
554 	int32_t *lp;
555 	struct ucred *cred;
556 	size_t strsize;
557 	int ncount;
558 
559 	dev = devvp->v_rdev;
560 	cred = td ? td->td_ucred : NOCRED;
561 	/*
562 	 * Disallow multiple mounts of the same device.
563 	 * Disallow mounting of a device that is currently in use
564 	 * (except for root, which might share swap device for miniroot).
565 	 * Flush out any old buffers remaining from a previous use.
566 	 */
567 	error = vfs_mountedon(devvp);
568 	if (error)
569 		return (error);
570 	ncount = vcount(devvp);
571 
572 	if (ncount > 1 && devvp != rootvp)
573 		return (EBUSY);
574 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
575 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
576 	VOP_UNLOCK(devvp, 0, td);
577 	if (error)
578 		return (error);
579 
580 	/*
581 	 * Only VMIO the backing device if the backing device is a real
582 	 * block device.
583 	 * Note that it is optional that the backing device be VMIOed.  This
584 	 * increases the opportunity for metadata caching.
585 	 */
586 	if (vn_isdisk(devvp, NULL)) {
587 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
588 		vfs_object_create(devvp, td, cred);
589 		/* XXX Why lock only to release immediately?? */
590 		mtx_lock(&devvp->v_interlock);
591 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
592 	}
593 
594 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
595 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
596 	/*
597 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
598 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
599 	 * XXX: start to avoid getting trashed later on.
600 	 */
601 #ifdef notyet
602 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
603 #else
604 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
605 #endif
606 	VOP_UNLOCK(devvp, 0, td);
607 	if (error)
608 		return (error);
609 	if (devvp->v_rdev->si_iosize_max != 0)
610 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
611 	if (mp->mnt_iosize_max > MAXPHYS)
612 		mp->mnt_iosize_max = MAXPHYS;
613 
614 	bp = NULL;
615 	ump = NULL;
616 	fs = NULL;
617 	sblockloc = 0;
618 	/*
619 	 * Try reading the superblock in each of its possible locations.
620 	 */
621 	for (i = 0; sblock_try[i] != -1; i++) {
622 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
623 		    cred, &bp)) != 0)
624 			goto out;
625 		fs = (struct fs *)bp->b_data;
626 		sblockloc = numfrags(fs, sblock_try[i]);
627 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
628 		     (fs->fs_magic == FS_UFS2_MAGIC &&
629 		      fs->fs_sblockloc == sblockloc)) &&
630 		    fs->fs_bsize <= MAXBSIZE &&
631 		    fs->fs_bsize >= sizeof(struct fs))
632 			break;
633 		brelse(bp);
634 		bp = NULL;
635 	}
636 	if (sblock_try[i] == -1) {
637 		error = EINVAL;		/* XXX needs translation */
638 		goto out;
639 	}
640 	fs->fs_fmod = 0;
641 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
642 	fs->fs_flags &= ~FS_UNCLEAN;
643 	if (fs->fs_clean == 0) {
644 		fs->fs_flags |= FS_UNCLEAN;
645 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
646 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
647 		     (fs->fs_flags & FS_DOSOFTDEP))) {
648 			printf(
649 "WARNING: %s was not properly dismounted\n",
650 			    fs->fs_fsmnt);
651 		} else {
652 			printf(
653 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
654 			    fs->fs_fsmnt);
655 			error = EPERM;
656 			goto out;
657 		}
658 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
659 		    (mp->mnt_flag & MNT_FORCE)) {
660 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
661 			    (intmax_t)fs->fs_pendingblocks,
662 			    fs->fs_pendinginodes);
663 			fs->fs_pendingblocks = 0;
664 			fs->fs_pendinginodes = 0;
665 		}
666 	}
667 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
668 		printf("%s: mount pending error: blocks %jd files %d\n",
669 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
670 		    fs->fs_pendinginodes);
671 		fs->fs_pendingblocks = 0;
672 		fs->fs_pendinginodes = 0;
673 	}
674 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
675 	ump->um_malloctype = malloctype;
676 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
677 	    M_WAITOK);
678 	if (fs->fs_magic == FS_UFS1_MAGIC) {
679 		ump->um_fstype = UFS1;
680 		ump->um_balloc = ffs_balloc_ufs1;
681 	} else {
682 		ump->um_fstype = UFS2;
683 		ump->um_balloc = ffs_balloc_ufs2;
684 	}
685 	ump->um_blkatoff = ffs_blkatoff;
686 	ump->um_truncate = ffs_truncate;
687 	ump->um_update = ffs_update;
688 	ump->um_valloc = ffs_valloc;
689 	ump->um_vfree = ffs_vfree;
690 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
691 	if (fs->fs_sbsize < SBLOCKSIZE)
692 		bp->b_flags |= B_INVAL | B_NOCACHE;
693 	brelse(bp);
694 	bp = NULL;
695 	fs = ump->um_fs;
696 	ffs_oldfscompat_read(fs, ump, sblockloc);
697 	fs->fs_ronly = ronly;
698 	size = fs->fs_cssize;
699 	blks = howmany(size, fs->fs_fsize);
700 	if (fs->fs_contigsumsize > 0)
701 		size += fs->fs_ncg * sizeof(int32_t);
702 	size += fs->fs_ncg * sizeof(u_int8_t);
703 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
704 	fs->fs_csp = space;
705 	for (i = 0; i < blks; i += fs->fs_frag) {
706 		size = fs->fs_bsize;
707 		if (i + fs->fs_frag > blks)
708 			size = (blks - i) * fs->fs_fsize;
709 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
710 		    cred, &bp)) != 0) {
711 			free(fs->fs_csp, M_UFSMNT);
712 			goto out;
713 		}
714 		bcopy(bp->b_data, space, (u_int)size);
715 		space = (char *)space + size;
716 		brelse(bp);
717 		bp = NULL;
718 	}
719 	if (fs->fs_contigsumsize > 0) {
720 		fs->fs_maxcluster = lp = space;
721 		for (i = 0; i < fs->fs_ncg; i++)
722 			*lp++ = fs->fs_contigsumsize;
723 		space = lp;
724 	}
725 	size = fs->fs_ncg * sizeof(u_int8_t);
726 	fs->fs_contigdirs = (u_int8_t *)space;
727 	bzero(fs->fs_contigdirs, size);
728 	fs->fs_active = NULL;
729 	mp->mnt_data = (qaddr_t)ump;
730 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
731 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
732 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
733 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
734 		vfs_getnewfsid(mp);
735 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
736 	mp->mnt_flag |= MNT_LOCAL;
737 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
738 #ifdef MAC
739 		mp->mnt_flag |= MNT_MULTILABEL;
740 #else
741 		printf(
742 "WARNING: %s: multilabel flag on fs but no MAC support\n",
743 		    fs->fs_fsmnt);
744 #endif
745 	if ((fs->fs_flags & FS_ACLS) != 0)
746 #ifdef UFS_ACL
747 		mp->mnt_flag |= MNT_ACLS;
748 #else
749 		printf(
750 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
751 		    fs->fs_fsmnt);
752 #endif
753 	ump->um_mountp = mp;
754 	ump->um_dev = dev;
755 	ump->um_devvp = devvp;
756 	ump->um_nindir = fs->fs_nindir;
757 	ump->um_bptrtodb = fs->fs_fsbtodb;
758 	ump->um_seqinc = fs->fs_frag;
759 	for (i = 0; i < MAXQUOTAS; i++)
760 		ump->um_quotas[i] = NULLVP;
761 #ifdef UFS_EXTATTR
762 	ufs_extattr_uepm_init(&ump->um_extattr);
763 #endif
764 	devvp->v_rdev->si_mountpoint = mp;
765 
766 	/*
767 	 * Set FS local "last mounted on" information (NULL pad)
768 	 */
769 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
770 			fs->fs_fsmnt,			/* copy area*/
771 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
772 			&strsize);			/* real size*/
773 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
774 
775 	if( mp->mnt_flag & MNT_ROOTFS) {
776 		/*
777 		 * Root mount; update timestamp in mount structure.
778 		 * this will be used by the common root mount code
779 		 * to update the system clock.
780 		 */
781 		mp->mnt_time = fs->fs_time;
782 	}
783 
784 	if (ronly == 0) {
785 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
786 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
787 			free(fs->fs_csp, M_UFSMNT);
788 			goto out;
789 		}
790 		if (fs->fs_snapinum[0] != 0)
791 			ffs_snapshot_mount(mp);
792 		fs->fs_fmod = 1;
793 		fs->fs_clean = 0;
794 		(void) ffs_sbupdate(ump, MNT_WAIT);
795 	}
796 #ifdef UFS_EXTATTR
797 #ifdef UFS_EXTATTR_AUTOSTART
798 	/*
799 	 *
800 	 * Auto-starting does the following:
801 	 *	- check for /.attribute in the fs, and extattr_start if so
802 	 *	- for each file in .attribute, enable that file with
803 	 * 	  an attribute of the same name.
804 	 * Not clear how to report errors -- probably eat them.
805 	 * This would all happen while the filesystem was busy/not
806 	 * available, so would effectively be "atomic".
807 	 */
808 	(void) ufs_extattr_autostart(mp, td);
809 #endif /* !UFS_EXTATTR_AUTOSTART */
810 #endif /* !UFS_EXTATTR */
811 	return (0);
812 out:
813 	devvp->v_rdev->si_mountpoint = NULL;
814 	if (bp)
815 		brelse(bp);
816 	/* XXX: see comment above VOP_OPEN */
817 #ifdef notyet
818 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
819 #else
820 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
821 #endif
822 	if (ump) {
823 		free(ump->um_fs, M_UFSMNT);
824 		free(ump, M_UFSMNT);
825 		mp->mnt_data = (qaddr_t)0;
826 	}
827 	return (error);
828 }
829 
830 #include <sys/sysctl.h>
831 int bigcgs = 0;
832 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
833 
834 /*
835  * Sanity checks for loading old filesystem superblocks.
836  * See ffs_oldfscompat_write below for unwound actions.
837  *
838  * XXX - Parts get retired eventually.
839  * Unfortunately new bits get added.
840  */
841 static void
842 ffs_oldfscompat_read(fs, ump, sblockloc)
843 	struct fs *fs;
844 	struct ufsmount *ump;
845 	ufs2_daddr_t sblockloc;
846 {
847 	off_t maxfilesize;
848 
849 	/*
850 	 * If not yet done, update UFS1 superblock with new wider fields.
851 	 */
852 	if (fs->fs_magic == FS_UFS1_MAGIC &&
853 	    fs->fs_sblockloc != sblockloc) {
854 		fs->fs_maxbsize = fs->fs_bsize;
855 		fs->fs_sblockloc = sblockloc;
856 		fs->fs_time = fs->fs_old_time;
857 		fs->fs_size = fs->fs_old_size;
858 		fs->fs_dsize = fs->fs_old_dsize;
859 		fs->fs_csaddr = fs->fs_old_csaddr;
860 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
861 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
862 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
863 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
864 	}
865 	if (fs->fs_magic == FS_UFS1_MAGIC &&
866 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
867 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
868 		fs->fs_qbmask = ~fs->fs_bmask;
869 		fs->fs_qfmask = ~fs->fs_fmask;
870 	}
871 	if (fs->fs_magic == FS_UFS1_MAGIC) {
872 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
873 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
874 		if (fs->fs_maxfilesize > maxfilesize)
875 			fs->fs_maxfilesize = maxfilesize;
876 	}
877 	/* Compatibility for old filesystems */
878 	if (fs->fs_avgfilesize <= 0)
879 		fs->fs_avgfilesize = AVFILESIZ;
880 	if (fs->fs_avgfpdir <= 0)
881 		fs->fs_avgfpdir = AFPDIR;
882 	if (bigcgs) {
883 		fs->fs_save_cgsize = fs->fs_cgsize;
884 		fs->fs_cgsize = fs->fs_bsize;
885 	}
886 }
887 
888 /*
889  * Unwinding superblock updates for old filesystems.
890  * See ffs_oldfscompat_read above for details.
891  *
892  * XXX - Parts get retired eventually.
893  * Unfortunately new bits get added.
894  */
895 static void
896 ffs_oldfscompat_write(fs, ump)
897 	struct fs *fs;
898 	struct ufsmount *ump;
899 {
900 
901 	/*
902 	 * Copy back UFS2 updated fields that UFS1 inspects.
903 	 */
904 	if (fs->fs_magic == FS_UFS1_MAGIC) {
905 		fs->fs_old_time = fs->fs_time;
906 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
907 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
908 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
909 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
910 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
911 	}
912 	if (bigcgs) {
913 		fs->fs_cgsize = fs->fs_save_cgsize;
914 		fs->fs_save_cgsize = 0;
915 	}
916 }
917 
918 /*
919  * unmount system call
920  */
921 int
922 ffs_unmount(mp, mntflags, td)
923 	struct mount *mp;
924 	int mntflags;
925 	struct thread *td;
926 {
927 	struct ufsmount *ump = VFSTOUFS(mp);
928 	struct fs *fs;
929 	int error, flags;
930 
931 	flags = 0;
932 	if (mntflags & MNT_FORCE) {
933 		flags |= FORCECLOSE;
934 	}
935 #ifdef UFS_EXTATTR
936 	if ((error = ufs_extattr_stop(mp, td))) {
937 		if (error != EOPNOTSUPP)
938 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
939 			    error);
940 	} else {
941 		ufs_extattr_uepm_destroy(&ump->um_extattr);
942 	}
943 #endif
944 	if (mp->mnt_flag & MNT_SOFTDEP) {
945 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
946 			return (error);
947 	} else {
948 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
949 			return (error);
950 	}
951 	fs = ump->um_fs;
952 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
953 		printf("%s: unmount pending error: blocks %jd files %d\n",
954 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
955 		    fs->fs_pendinginodes);
956 		fs->fs_pendingblocks = 0;
957 		fs->fs_pendinginodes = 0;
958 	}
959 	if (fs->fs_ronly == 0) {
960 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
961 		error = ffs_sbupdate(ump, MNT_WAIT);
962 		if (error) {
963 			fs->fs_clean = 0;
964 			return (error);
965 		}
966 	}
967 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
968 
969 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
970 	/* XXX: see comment above VOP_OPEN */
971 #ifdef notyet
972 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
973 		NOCRED, td);
974 #else
975 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
976 #endif
977 
978 	vrele(ump->um_devvp);
979 
980 	free(fs->fs_csp, M_UFSMNT);
981 	free(fs, M_UFSMNT);
982 	free(ump, M_UFSMNT);
983 	mp->mnt_data = (qaddr_t)0;
984 	mp->mnt_flag &= ~MNT_LOCAL;
985 	return (error);
986 }
987 
988 /*
989  * Flush out all the files in a filesystem.
990  */
991 int
992 ffs_flushfiles(mp, flags, td)
993 	struct mount *mp;
994 	int flags;
995 	struct thread *td;
996 {
997 	struct ufsmount *ump;
998 	int error;
999 
1000 	ump = VFSTOUFS(mp);
1001 #ifdef QUOTA
1002 	if (mp->mnt_flag & MNT_QUOTA) {
1003 		int i;
1004 		error = vflush(mp, 0, SKIPSYSTEM|flags);
1005 		if (error)
1006 			return (error);
1007 		for (i = 0; i < MAXQUOTAS; i++) {
1008 			if (ump->um_quotas[i] == NULLVP)
1009 				continue;
1010 			quotaoff(td, mp, i);
1011 		}
1012 		/*
1013 		 * Here we fall through to vflush again to ensure
1014 		 * that we have gotten rid of all the system vnodes.
1015 		 */
1016 	}
1017 #endif
1018 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1019 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1020 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1021 			return (error);
1022 		ffs_snapshot_unmount(mp);
1023 		/*
1024 		 * Here we fall through to vflush again to ensure
1025 		 * that we have gotten rid of all the system vnodes.
1026 		 */
1027 	}
1028         /*
1029 	 * Flush all the files.
1030 	 */
1031 	if ((error = vflush(mp, 0, flags)) != 0)
1032 		return (error);
1033 	/*
1034 	 * Flush filesystem metadata.
1035 	 */
1036 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1037 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1038 	VOP_UNLOCK(ump->um_devvp, 0, td);
1039 	return (error);
1040 }
1041 
1042 /*
1043  * Get filesystem statistics.
1044  */
1045 int
1046 ffs_statfs(mp, sbp, td)
1047 	struct mount *mp;
1048 	struct statfs *sbp;
1049 	struct thread *td;
1050 {
1051 	struct ufsmount *ump;
1052 	struct fs *fs;
1053 
1054 	ump = VFSTOUFS(mp);
1055 	fs = ump->um_fs;
1056 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1057 		panic("ffs_statfs");
1058 	sbp->f_bsize = fs->fs_fsize;
1059 	sbp->f_iosize = fs->fs_bsize;
1060 	sbp->f_blocks = fs->fs_dsize;
1061 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1062 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1063 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1064 	    dbtofsb(fs, fs->fs_pendingblocks);
1065 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1066 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1067 	if (sbp != &mp->mnt_stat) {
1068 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1069 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1070 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1071 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1072 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1073 	}
1074 	return (0);
1075 }
1076 
1077 /*
1078  * Go through the disk queues to initiate sandbagged IO;
1079  * go through the inodes to write those that have been modified;
1080  * initiate the writing of the super block if it has been modified.
1081  *
1082  * Note: we are always called with the filesystem marked `MPBUSY'.
1083  */
1084 int
1085 ffs_sync(mp, waitfor, cred, td)
1086 	struct mount *mp;
1087 	int waitfor;
1088 	struct ucred *cred;
1089 	struct thread *td;
1090 {
1091 	struct vnode *nvp, *vp, *devvp;
1092 	struct inode *ip;
1093 	struct ufsmount *ump = VFSTOUFS(mp);
1094 	struct fs *fs;
1095 	int error, count, wait, lockreq, allerror = 0;
1096 
1097 	fs = ump->um_fs;
1098 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1099 		printf("fs = %s\n", fs->fs_fsmnt);
1100 		panic("ffs_sync: rofs mod");
1101 	}
1102 	/*
1103 	 * Write back each (modified) inode.
1104 	 */
1105 	wait = 0;
1106 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1107 	if (waitfor == MNT_WAIT) {
1108 		wait = 1;
1109 		lockreq = LK_EXCLUSIVE;
1110 	}
1111 	mtx_lock(&mntvnode_mtx);
1112 loop:
1113 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1114 		/*
1115 		 * If the vnode that we are about to sync is no longer
1116 		 * associated with this mount point, start over.
1117 		 */
1118 		if (vp->v_mount != mp)
1119 			goto loop;
1120 
1121 		/*
1122 		 * Depend on the mntvnode_slock to keep things stable enough
1123 		 * for a quick test.  Since there might be hundreds of
1124 		 * thousands of vnodes, we cannot afford even a subroutine
1125 		 * call unless there's a good chance that we have work to do.
1126 		 */
1127 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1128 		ip = VTOI(vp);
1129 		if (vp->v_type == VNON || ((ip->i_flag &
1130 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1131 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1132 			continue;
1133 		}
1134 		if (vp->v_type != VCHR) {
1135 			mtx_unlock(&mntvnode_mtx);
1136 			if ((error = vget(vp, lockreq, td)) != 0) {
1137 				mtx_lock(&mntvnode_mtx);
1138 				if (error == ENOENT)
1139 					goto loop;
1140 			} else {
1141 				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1142 					allerror = error;
1143 				VOP_UNLOCK(vp, 0, td);
1144 				vrele(vp);
1145 				mtx_lock(&mntvnode_mtx);
1146 			}
1147 		} else {
1148 			mtx_unlock(&mntvnode_mtx);
1149 			UFS_UPDATE(vp, wait);
1150 			mtx_lock(&mntvnode_mtx);
1151 		}
1152 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1153 			goto loop;
1154 	}
1155 	mtx_unlock(&mntvnode_mtx);
1156 	/*
1157 	 * Force stale filesystem control information to be flushed.
1158 	 */
1159 	if (waitfor == MNT_WAIT) {
1160 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1161 			allerror = error;
1162 		/* Flushed work items may create new vnodes to clean */
1163 		if (allerror == 0 && count) {
1164 			mtx_lock(&mntvnode_mtx);
1165 			goto loop;
1166 		}
1167 	}
1168 #ifdef QUOTA
1169 	qsync(mp);
1170 #endif
1171 	devvp = ump->um_devvp;
1172 	VI_LOCK(devvp);
1173 	if (waitfor != MNT_LAZY &&
1174 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1175 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1176 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1177 			allerror = error;
1178 		VOP_UNLOCK(devvp, 0, td);
1179 		if (allerror == 0 && waitfor == MNT_WAIT) {
1180 			mtx_lock(&mntvnode_mtx);
1181 			goto loop;
1182 		}
1183 	} else
1184 		VI_UNLOCK(devvp);
1185 	/*
1186 	 * Write back modified superblock.
1187 	 */
1188 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1189 		allerror = error;
1190 	return (allerror);
1191 }
1192 
1193 int
1194 ffs_vget(mp, ino, flags, vpp)
1195 	struct mount *mp;
1196 	ino_t ino;
1197 	int flags;
1198 	struct vnode **vpp;
1199 {
1200 	struct thread *td = curthread; 		/* XXX */
1201 	struct fs *fs;
1202 	struct inode *ip;
1203 	struct ufsmount *ump;
1204 	struct buf *bp;
1205 	struct vnode *vp;
1206 	dev_t dev;
1207 	int error;
1208 
1209 	ump = VFSTOUFS(mp);
1210 	dev = ump->um_dev;
1211 
1212 	/*
1213 	 * We do not lock vnode creation as it is believed to be too
1214 	 * expensive for such rare case as simultaneous creation of vnode
1215 	 * for same ino by different processes. We just allow them to race
1216 	 * and check later to decide who wins. Let the race begin!
1217 	 */
1218 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1219 		return (error);
1220 	if (*vpp != NULL)
1221 		return (0);
1222 
1223 	/*
1224 	 * If this MALLOC() is performed after the getnewvnode()
1225 	 * it might block, leaving a vnode with a NULL v_data to be
1226 	 * found by ffs_sync() if a sync happens to fire right then,
1227 	 * which will cause a panic because ffs_sync() blindly
1228 	 * dereferences vp->v_data (as well it should).
1229 	 */
1230 	MALLOC(ip, struct inode *, sizeof(struct inode),
1231 	    ump->um_malloctype, M_WAITOK);
1232 
1233 	/* Allocate a new vnode/inode. */
1234 	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1235 	if (error) {
1236 		*vpp = NULL;
1237 		FREE(ip, ump->um_malloctype);
1238 		return (error);
1239 	}
1240 	bzero((caddr_t)ip, sizeof(struct inode));
1241 	/*
1242 	 * FFS supports recursive locking.
1243 	 */
1244 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1245 	vp->v_data = ip;
1246 	ip->i_vnode = vp;
1247 	ip->i_ump = ump;
1248 	ip->i_fs = fs = ump->um_fs;
1249 	ip->i_dev = dev;
1250 	ip->i_number = ino;
1251 #ifdef QUOTA
1252 	{
1253 		int i;
1254 		for (i = 0; i < MAXQUOTAS; i++)
1255 			ip->i_dquot[i] = NODQUOT;
1256 	}
1257 #endif
1258 	/*
1259 	 * Exclusively lock the vnode before adding to hash. Note, that we
1260 	 * must not release nor downgrade the lock (despite flags argument
1261 	 * says) till it is fully initialized.
1262 	 */
1263 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1264 
1265 	/*
1266 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1267 	 * duplicate of vnode being created and add it to the hash. If a
1268 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1269 	 */
1270 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1271 		vput(vp);
1272 		*vpp = NULL;
1273 		return (error);
1274 	}
1275 
1276 	/* We lost the race, then throw away our vnode and return existing */
1277 	if (*vpp != NULL) {
1278 		vput(vp);
1279 		return (0);
1280 	}
1281 
1282 	/* Read in the disk contents for the inode, copy into the inode. */
1283 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1284 	    (int)fs->fs_bsize, NOCRED, &bp);
1285 	if (error) {
1286 		/*
1287 		 * The inode does not contain anything useful, so it would
1288 		 * be misleading to leave it on its hash chain. With mode
1289 		 * still zero, it will be unlinked and returned to the free
1290 		 * list by vput().
1291 		 */
1292 		brelse(bp);
1293 		vput(vp);
1294 		*vpp = NULL;
1295 		return (error);
1296 	}
1297 	ffs_load_inode(bp, ip, ump->um_malloctype, fs, ino);
1298 	if (DOINGSOFTDEP(vp))
1299 		softdep_load_inodeblock(ip);
1300 	else
1301 		ip->i_effnlink = ip->i_nlink;
1302 	bqrelse(bp);
1303 
1304 	/*
1305 	 * Initialize the vnode from the inode, check for aliases.
1306 	 * Note that the underlying vnode may have changed.
1307 	 */
1308 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1309 	if (error) {
1310 		vput(vp);
1311 		*vpp = NULL;
1312 		return (error);
1313 	}
1314 	/*
1315 	 * Finish inode initialization now that aliasing has been resolved.
1316 	 */
1317 	ip->i_devvp = ump->um_devvp;
1318 	VREF(ip->i_devvp);
1319 	/*
1320 	 * Set up a generation number for this inode if it does not
1321 	 * already have one. This should only happen on old filesystems.
1322 	 */
1323 	if (ip->i_gen == 0) {
1324 		ip->i_gen = random() / 2 + 1;
1325 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1326 			ip->i_flag |= IN_MODIFIED;
1327 			DIP(ip, i_gen) = ip->i_gen;
1328 		}
1329 	}
1330 	/*
1331 	 * Ensure that uid and gid are correct. This is a temporary
1332 	 * fix until fsck has been changed to do the update.
1333 	 */
1334 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1335 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1336 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1337 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1338 	}						/* XXX */
1339 
1340 	*vpp = vp;
1341 	return (0);
1342 }
1343 
1344 /*
1345  * File handle to vnode
1346  *
1347  * Have to be really careful about stale file handles:
1348  * - check that the inode number is valid
1349  * - call ffs_vget() to get the locked inode
1350  * - check for an unallocated inode (i_mode == 0)
1351  * - check that the given client host has export rights and return
1352  *   those rights via. exflagsp and credanonp
1353  */
1354 int
1355 ffs_fhtovp(mp, fhp, vpp)
1356 	struct mount *mp;
1357 	struct fid *fhp;
1358 	struct vnode **vpp;
1359 {
1360 	struct ufid *ufhp;
1361 	struct fs *fs;
1362 
1363 	ufhp = (struct ufid *)fhp;
1364 	fs = VFSTOUFS(mp)->um_fs;
1365 	if (ufhp->ufid_ino < ROOTINO ||
1366 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1367 		return (ESTALE);
1368 	return (ufs_fhtovp(mp, ufhp, vpp));
1369 }
1370 
1371 /*
1372  * Vnode pointer to File handle
1373  */
1374 /* ARGSUSED */
1375 int
1376 ffs_vptofh(vp, fhp)
1377 	struct vnode *vp;
1378 	struct fid *fhp;
1379 {
1380 	struct inode *ip;
1381 	struct ufid *ufhp;
1382 
1383 	ip = VTOI(vp);
1384 	ufhp = (struct ufid *)fhp;
1385 	ufhp->ufid_len = sizeof(struct ufid);
1386 	ufhp->ufid_ino = ip->i_number;
1387 	ufhp->ufid_gen = ip->i_gen;
1388 	return (0);
1389 }
1390 
1391 /*
1392  * Initialize the filesystem.
1393  */
1394 static int
1395 ffs_init(vfsp)
1396 	struct vfsconf *vfsp;
1397 {
1398 
1399 	softdep_initialize();
1400 	return (ufs_init(vfsp));
1401 }
1402 
1403 /*
1404  * Undo the work of ffs_init().
1405  */
1406 static int
1407 ffs_uninit(vfsp)
1408 	struct vfsconf *vfsp;
1409 {
1410 	int ret;
1411 
1412 	ret = ufs_uninit(vfsp);
1413 	softdep_uninitialize();
1414 	return (ret);
1415 }
1416 
1417 /*
1418  * Write a superblock and associated information back to disk.
1419  */
1420 static int
1421 ffs_sbupdate(mp, waitfor)
1422 	struct ufsmount *mp;
1423 	int waitfor;
1424 {
1425 	struct fs *fs = mp->um_fs;
1426 	struct buf *bp;
1427 	int blks;
1428 	void *space;
1429 	int i, size, error, allerror = 0;
1430 
1431 	/*
1432 	 * First write back the summary information.
1433 	 */
1434 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1435 	space = fs->fs_csp;
1436 	for (i = 0; i < blks; i += fs->fs_frag) {
1437 		size = fs->fs_bsize;
1438 		if (i + fs->fs_frag > blks)
1439 			size = (blks - i) * fs->fs_fsize;
1440 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1441 		    size, 0, 0);
1442 		bcopy(space, bp->b_data, (u_int)size);
1443 		space = (char *)space + size;
1444 		if (waitfor != MNT_WAIT)
1445 			bawrite(bp);
1446 		else if ((error = bwrite(bp)) != 0)
1447 			allerror = error;
1448 	}
1449 	/*
1450 	 * Now write back the superblock itself. If any errors occurred
1451 	 * up to this point, then fail so that the superblock avoids
1452 	 * being written out as clean.
1453 	 */
1454 	if (allerror)
1455 		return (allerror);
1456 	bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_sblockloc),
1457 	    (int)fs->fs_sbsize, 0, 0);
1458 	fs->fs_fmod = 0;
1459 	fs->fs_time = time_second;
1460 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1461 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1462 	if (waitfor != MNT_WAIT)
1463 		bawrite(bp);
1464 	else if ((error = bwrite(bp)) != 0)
1465 		allerror = error;
1466 	return (allerror);
1467 }
1468 
1469 static int
1470 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1471 	int attrnamespace, const char *attrname, struct thread *td)
1472 {
1473 
1474 #ifdef UFS_EXTATTR
1475 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1476 	    attrname, td));
1477 #else
1478 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1479 	    attrname, td));
1480 #endif
1481 }
1482