xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 71fe318b852b8dfb3e799cb12ef184750f7f8eac)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_mac.h"
38 #include "opt_quota.h"
39 #include "opt_ufs.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/stdint.h>
44 #include <sys/namei.h>
45 #include <sys/proc.h>
46 #include <sys/kernel.h>
47 #include <sys/mac.h>
48 #include <sys/vnode.h>
49 #include <sys/mount.h>
50 #include <sys/bio.h>
51 #include <sys/buf.h>
52 #include <sys/conf.h>
53 #include <sys/fcntl.h>
54 #include <sys/disk.h>
55 #include <sys/malloc.h>
56 #include <sys/mutex.h>
57 
58 #include <ufs/ufs/extattr.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66 
67 #include <vm/vm.h>
68 #include <vm/vm_page.h>
69 
70 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
71 
72 static int	ffs_sbupdate(struct ufsmount *, int);
73        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
74 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
75 		    ufs2_daddr_t);
76 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
77 static vfs_init_t ffs_init;
78 static vfs_uninit_t ffs_uninit;
79 static vfs_extattrctl_t ffs_extattrctl;
80 
81 static struct vfsops ufs_vfsops = {
82 	ffs_mount,
83 	ufs_start,
84 	ffs_unmount,
85 	ufs_root,
86 	ufs_quotactl,
87 	ffs_statfs,
88 	ffs_sync,
89 	ffs_vget,
90 	ffs_fhtovp,
91 	vfs_stdcheckexp,
92 	ffs_vptofh,
93 	ffs_init,
94 	ffs_uninit,
95 	ffs_extattrctl,
96 };
97 
98 VFS_SET(ufs_vfsops, ufs, 0);
99 
100 /*
101  * ffs_mount
102  *
103  * Called when mounting local physical media
104  *
105  * PARAMETERS:
106  *		mountroot
107  *			mp	mount point structure
108  *			path	NULL (flag for root mount!!!)
109  *			data	<unused>
110  *			ndp	<unused>
111  *			p	process (user credentials check [statfs])
112  *
113  *		mount
114  *			mp	mount point structure
115  *			path	path to mount point
116  *			data	pointer to argument struct in user space
117  *			ndp	mount point namei() return (used for
118  *				credentials on reload), reused to look
119  *				up block device.
120  *			p	process (user credentials check)
121  *
122  * RETURNS:	0	Success
123  *		!0	error number (errno.h)
124  *
125  * LOCK STATE:
126  *
127  *		ENTRY
128  *			mount point is locked
129  *		EXIT
130  *			mount point is locked
131  *
132  * NOTES:
133  *		A NULL path can be used for a flag since the mount
134  *		system call will fail with EFAULT in copyinstr in
135  *		namei() if it is a genuine NULL from the user.
136  */
137 int
138 ffs_mount(mp, path, data, ndp, td)
139         struct mount		*mp;	/* mount struct pointer*/
140         char			*path;	/* path to mount point*/
141         caddr_t			data;	/* arguments to FS specific mount*/
142         struct nameidata	*ndp;	/* mount point credentials*/
143         struct thread		*td;	/* process requesting mount*/
144 {
145 	size_t size;
146 	struct vnode *devvp;
147 	struct ufs_args args;
148 	struct ufsmount *ump = 0;
149 	struct fs *fs;
150 	int error, flags;
151 	mode_t accessmode;
152 
153 	/*
154 	 * Use NULL path to indicate we are mounting the root filesystem.
155 	 */
156 	if (path == NULL) {
157 		if ((error = bdevvp(rootdev, &rootvp))) {
158 			printf("ffs_mountroot: can't find rootvp\n");
159 			return (error);
160 		}
161 
162 		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
163 			return (error);
164 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
165 		return (0);
166 	}
167 
168 	/*
169 	 * Mounting non-root filesystem or updating a filesystem
170 	 */
171 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
172 		return (error);
173 
174 	/*
175 	 * If updating, check whether changing from read-only to
176 	 * read/write; if there is no device name, that's all we do.
177 	 */
178 	if (mp->mnt_flag & MNT_UPDATE) {
179 		ump = VFSTOUFS(mp);
180 		fs = ump->um_fs;
181 		devvp = ump->um_devvp;
182 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
183 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
184 				return (error);
185 			/*
186 			 * Flush any dirty data.
187 			 */
188 			if ((error = VFS_SYNC(mp, MNT_WAIT,
189 			    td->td_proc->p_ucred, td)) != 0) {
190 				vn_finished_write(mp);
191 				return (error);
192 			}
193 			/*
194 			 * Check for and optionally get rid of files open
195 			 * for writing.
196 			 */
197 			flags = WRITECLOSE;
198 			if (mp->mnt_flag & MNT_FORCE)
199 				flags |= FORCECLOSE;
200 			if (mp->mnt_flag & MNT_SOFTDEP) {
201 				error = softdep_flushfiles(mp, flags, td);
202 			} else {
203 				error = ffs_flushfiles(mp, flags, td);
204 			}
205 			if (error) {
206 				vn_finished_write(mp);
207 				return (error);
208 			}
209 			if (fs->fs_pendingblocks != 0 ||
210 			    fs->fs_pendinginodes != 0) {
211 				printf("%s: %s: blocks %jd files %d\n",
212 				    fs->fs_fsmnt, "update error",
213 				    (intmax_t)fs->fs_pendingblocks,
214 				    fs->fs_pendinginodes);
215 				fs->fs_pendingblocks = 0;
216 				fs->fs_pendinginodes = 0;
217 			}
218 			fs->fs_ronly = 1;
219 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
220 				fs->fs_clean = 1;
221 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
222 				fs->fs_ronly = 0;
223 				fs->fs_clean = 0;
224 				vn_finished_write(mp);
225 				return (error);
226 			}
227 			vn_finished_write(mp);
228 		}
229 		if ((mp->mnt_flag & MNT_RELOAD) &&
230 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
231 			return (error);
232 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
233 			/*
234 			 * If upgrade to read-write by non-root, then verify
235 			 * that user has necessary permissions on the device.
236 			 */
237 			if (suser(td)) {
238 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
239 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
240 				    td->td_ucred, td)) != 0) {
241 					VOP_UNLOCK(devvp, 0, td);
242 					return (error);
243 				}
244 				VOP_UNLOCK(devvp, 0, td);
245 			}
246 			fs->fs_flags &= ~FS_UNCLEAN;
247 			if (fs->fs_clean == 0) {
248 				fs->fs_flags |= FS_UNCLEAN;
249 				if ((mp->mnt_flag & MNT_FORCE) ||
250 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
251 				     (fs->fs_flags & FS_DOSOFTDEP))) {
252 					printf("WARNING: %s was not %s\n",
253 					   fs->fs_fsmnt, "properly dismounted");
254 				} else {
255 					printf(
256 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
257 					    fs->fs_fsmnt);
258 					return (EPERM);
259 				}
260 			}
261 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
262 				return (error);
263 			fs->fs_ronly = 0;
264 			fs->fs_clean = 0;
265 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
266 				vn_finished_write(mp);
267 				return (error);
268 			}
269 			/* check to see if we need to start softdep */
270 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
271 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
272 				vn_finished_write(mp);
273 				return (error);
274 			}
275 			if (fs->fs_snapinum[0] != 0)
276 				ffs_snapshot_mount(mp);
277 			vn_finished_write(mp);
278 		}
279 		/*
280 		 * Soft updates is incompatible with "async",
281 		 * so if we are doing softupdates stop the user
282 		 * from setting the async flag in an update.
283 		 * Softdep_mount() clears it in an initial mount
284 		 * or ro->rw remount.
285 		 */
286 		if (mp->mnt_flag & MNT_SOFTDEP)
287 			mp->mnt_flag &= ~MNT_ASYNC;
288 		/*
289 		 * If not updating name, process export requests.
290 		 */
291 		if (args.fspec == 0)
292 			return (vfs_export(mp, &args.export));
293 		/*
294 		 * If this is a snapshot request, take the snapshot.
295 		 */
296 		if (mp->mnt_flag & MNT_SNAPSHOT)
297 			return (ffs_snapshot(mp, args.fspec));
298 	}
299 
300 	/*
301 	 * Not an update, or updating the name: look up the name
302 	 * and verify that it refers to a sensible block device.
303 	 */
304 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
305 	if ((error = namei(ndp)) != 0)
306 		return (error);
307 	NDFREE(ndp, NDF_ONLY_PNBUF);
308 	devvp = ndp->ni_vp;
309 	if (!vn_isdisk(devvp, &error)) {
310 		vrele(devvp);
311 		return (error);
312 	}
313 
314 	/*
315 	 * If mount by non-root, then verify that user has necessary
316 	 * permissions on the device.
317 	 */
318 	if (suser(td)) {
319 		accessmode = VREAD;
320 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
321 			accessmode |= VWRITE;
322 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
323 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
324 			vput(devvp);
325 			return (error);
326 		}
327 		VOP_UNLOCK(devvp, 0, td);
328 	}
329 
330 	if (mp->mnt_flag & MNT_UPDATE) {
331 		/*
332 		 * Update only
333 		 *
334 		 * If it's not the same vnode, or at least the same device
335 		 * then it's not correct.
336 		 */
337 
338 		if (devvp != ump->um_devvp &&
339 		    devvp->v_rdev != ump->um_devvp->v_rdev)
340 			error = EINVAL;	/* needs translation */
341 		vrele(devvp);
342 		if (error)
343 			return (error);
344 	} else {
345 		/*
346 		 * New mount
347 		 *
348 		 * We need the name for the mount point (also used for
349 		 * "last mounted on") copied in. If an error occurs,
350 		 * the mount point is discarded by the upper level code.
351 		 * Note that vfs_mount() populates f_mntonname for us.
352 		 */
353 		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
354 			vrele(devvp);
355 			return (error);
356 		}
357 	}
358 	/*
359 	 * Save "mounted from" device name info for mount point (NULL pad).
360 	 */
361 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
362 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
363 	/*
364 	 * Initialize filesystem stat information in mount struct.
365 	 */
366 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
367 	return (0);
368 }
369 
370 /*
371  * Reload all incore data for a filesystem (used after running fsck on
372  * the root filesystem and finding things to fix). The filesystem must
373  * be mounted read-only.
374  *
375  * Things to do to update the mount:
376  *	1) invalidate all cached meta-data.
377  *	2) re-read superblock from disk.
378  *	3) re-read summary information from disk.
379  *	4) invalidate all inactive vnodes.
380  *	5) invalidate all cached file data.
381  *	6) re-read inode data for all active vnodes.
382  */
383 int
384 ffs_reload(mp, cred, td)
385 	struct mount *mp;
386 	struct ucred *cred;
387 	struct thread *td;
388 {
389 	struct vnode *vp, *nvp, *devvp;
390 	struct inode *ip;
391 	void *space;
392 	struct buf *bp;
393 	struct fs *fs, *newfs;
394 	dev_t dev;
395 	ufs2_daddr_t sblockloc;
396 	int i, blks, size, error;
397 	int32_t *lp;
398 
399 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
400 		return (EINVAL);
401 	/*
402 	 * Step 1: invalidate all cached meta-data.
403 	 */
404 	devvp = VFSTOUFS(mp)->um_devvp;
405 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
406 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
407 	VOP_UNLOCK(devvp, 0, td);
408 	if (error)
409 		panic("ffs_reload: dirty1");
410 
411 	dev = devvp->v_rdev;
412 
413 	/*
414 	 * Only VMIO the backing device if the backing device is a real
415 	 * block device.
416 	 */
417 	if (vn_isdisk(devvp, NULL)) {
418 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
419 		vfs_object_create(devvp, td, td->td_ucred);
420 		/* XXX Why lock only to release immediately?? */
421 		mtx_lock(&devvp->v_interlock);
422 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
423 	}
424 
425 	/*
426 	 * Step 2: re-read superblock from disk.
427 	 */
428 	fs = VFSTOUFS(mp)->um_fs;
429 	if ((error = bread(devvp, fsbtodb(fs, fs->fs_sblockloc), fs->fs_sbsize,
430 	    NOCRED, &bp)) != 0)
431 		return (error);
432 	newfs = (struct fs *)bp->b_data;
433 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
434 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
435 	    newfs->fs_bsize > MAXBSIZE ||
436 	    newfs->fs_bsize < sizeof(struct fs)) {
437 			brelse(bp);
438 			return (EIO);		/* XXX needs translation */
439 	}
440 	/*
441 	 * Copy pointer fields back into superblock before copying in	XXX
442 	 * new superblock. These should really be in the ufsmount.	XXX
443 	 * Note that important parameters (eg fs_ncg) are unchanged.
444 	 */
445 	newfs->fs_csp = fs->fs_csp;
446 	newfs->fs_maxcluster = fs->fs_maxcluster;
447 	newfs->fs_contigdirs = fs->fs_contigdirs;
448 	newfs->fs_active = fs->fs_active;
449 	sblockloc = fs->fs_sblockloc;
450 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
451 	brelse(bp);
452 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
453 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
454 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
455 		printf("%s: reload pending error: blocks %jd files %d\n",
456 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
457 		    fs->fs_pendinginodes);
458 		fs->fs_pendingblocks = 0;
459 		fs->fs_pendinginodes = 0;
460 	}
461 
462 	/*
463 	 * Step 3: re-read summary information from disk.
464 	 */
465 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
466 	space = fs->fs_csp;
467 	for (i = 0; i < blks; i += fs->fs_frag) {
468 		size = fs->fs_bsize;
469 		if (i + fs->fs_frag > blks)
470 			size = (blks - i) * fs->fs_fsize;
471 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
472 		    NOCRED, &bp);
473 		if (error)
474 			return (error);
475 		bcopy(bp->b_data, space, (u_int)size);
476 		space = (char *)space + size;
477 		brelse(bp);
478 	}
479 	/*
480 	 * We no longer know anything about clusters per cylinder group.
481 	 */
482 	if (fs->fs_contigsumsize > 0) {
483 		lp = fs->fs_maxcluster;
484 		for (i = 0; i < fs->fs_ncg; i++)
485 			*lp++ = fs->fs_contigsumsize;
486 	}
487 
488 loop:
489 	mtx_lock(&mntvnode_mtx);
490 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
491 		if (vp->v_mount != mp) {
492 			mtx_unlock(&mntvnode_mtx);
493 			goto loop;
494 		}
495 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
496 		mtx_unlock(&mntvnode_mtx);
497 		/*
498 		 * Step 4: invalidate all inactive vnodes.
499 		 */
500 		if (vrecycle(vp, NULL, td))
501 			goto loop;
502 		/*
503 		 * Step 5: invalidate all cached file data.
504 		 */
505 		/* XXX Why lock only to release immediately? */
506 		mtx_lock(&vp->v_interlock);
507 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
508 			goto loop;
509 		}
510 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
511 			panic("ffs_reload: dirty2");
512 		/*
513 		 * Step 6: re-read inode data for all active vnodes.
514 		 */
515 		ip = VTOI(vp);
516 		error =
517 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
518 		    (int)fs->fs_bsize, NOCRED, &bp);
519 		if (error) {
520 			vput(vp);
521 			return (error);
522 		}
523 		ffs_load_inode(bp, ip, NULL, fs, ip->i_number);
524 		ip->i_effnlink = ip->i_nlink;
525 		brelse(bp);
526 		vput(vp);
527 		mtx_lock(&mntvnode_mtx);
528 	}
529 	mtx_unlock(&mntvnode_mtx);
530 	return (0);
531 }
532 
533 /*
534  * Possible superblock locations ordered from most to least likely.
535  */
536 static int sblock_try[] = SBLOCKSEARCH;
537 
538 /*
539  * Common code for mount and mountroot
540  */
541 int
542 ffs_mountfs(devvp, mp, td, malloctype)
543 	struct vnode *devvp;
544 	struct mount *mp;
545 	struct thread *td;
546 	struct malloc_type *malloctype;
547 {
548 	struct ufsmount *ump;
549 	struct buf *bp;
550 	struct fs *fs;
551 	dev_t dev;
552 	void *space;
553 	ufs2_daddr_t sblockloc;
554 	int error, i, blks, size, ronly;
555 	int32_t *lp;
556 	struct ucred *cred;
557 	size_t strsize;
558 	int ncount;
559 
560 	dev = devvp->v_rdev;
561 	cred = td ? td->td_ucred : NOCRED;
562 	/*
563 	 * Disallow multiple mounts of the same device.
564 	 * Disallow mounting of a device that is currently in use
565 	 * (except for root, which might share swap device for miniroot).
566 	 * Flush out any old buffers remaining from a previous use.
567 	 */
568 	error = vfs_mountedon(devvp);
569 	if (error)
570 		return (error);
571 	ncount = vcount(devvp);
572 
573 	if (ncount > 1 && devvp != rootvp)
574 		return (EBUSY);
575 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
576 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
577 	VOP_UNLOCK(devvp, 0, td);
578 	if (error)
579 		return (error);
580 
581 	/*
582 	 * Only VMIO the backing device if the backing device is a real
583 	 * block device.
584 	 * Note that it is optional that the backing device be VMIOed.  This
585 	 * increases the opportunity for metadata caching.
586 	 */
587 	if (vn_isdisk(devvp, NULL)) {
588 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
589 		vfs_object_create(devvp, td, cred);
590 		/* XXX Why lock only to release immediately?? */
591 		mtx_lock(&devvp->v_interlock);
592 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
593 	}
594 
595 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
596 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
597 	/*
598 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
599 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
600 	 * XXX: start to avoid getting trashed later on.
601 	 */
602 #ifdef notyet
603 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
604 #else
605 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
606 #endif
607 	VOP_UNLOCK(devvp, 0, td);
608 	if (error)
609 		return (error);
610 	if (devvp->v_rdev->si_iosize_max != 0)
611 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
612 	if (mp->mnt_iosize_max > MAXPHYS)
613 		mp->mnt_iosize_max = MAXPHYS;
614 
615 	bp = NULL;
616 	ump = NULL;
617 	fs = NULL;
618 	sblockloc = 0;
619 	/*
620 	 * Try reading the superblock in each of its possible locations.
621 	 */
622 	for (i = 0; sblock_try[i] != -1; i++) {
623 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
624 		    cred, &bp)) != 0)
625 			goto out;
626 		fs = (struct fs *)bp->b_data;
627 		sblockloc = numfrags(fs, sblock_try[i]);
628 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
629 		     (fs->fs_magic == FS_UFS2_MAGIC &&
630 		      fs->fs_sblockloc == sblockloc)) &&
631 		    fs->fs_bsize <= MAXBSIZE &&
632 		    fs->fs_bsize >= sizeof(struct fs))
633 			break;
634 		brelse(bp);
635 		bp = NULL;
636 	}
637 	if (sblock_try[i] == -1) {
638 		error = EINVAL;		/* XXX needs translation */
639 		goto out;
640 	}
641 	fs->fs_fmod = 0;
642 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
643 	fs->fs_flags &= ~FS_UNCLEAN;
644 	if (fs->fs_clean == 0) {
645 		fs->fs_flags |= FS_UNCLEAN;
646 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
647 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
648 		     (fs->fs_flags & FS_DOSOFTDEP))) {
649 			printf(
650 "WARNING: %s was not properly dismounted\n",
651 			    fs->fs_fsmnt);
652 		} else {
653 			printf(
654 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
655 			    fs->fs_fsmnt);
656 			error = EPERM;
657 			goto out;
658 		}
659 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
660 		    (mp->mnt_flag & MNT_FORCE)) {
661 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
662 			    (intmax_t)fs->fs_pendingblocks,
663 			    fs->fs_pendinginodes);
664 			fs->fs_pendingblocks = 0;
665 			fs->fs_pendinginodes = 0;
666 		}
667 	}
668 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
669 		printf("%s: mount pending error: blocks %jd files %d\n",
670 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
671 		    fs->fs_pendinginodes);
672 		fs->fs_pendingblocks = 0;
673 		fs->fs_pendinginodes = 0;
674 	}
675 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
676 	ump->um_malloctype = malloctype;
677 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
678 	    M_WAITOK);
679 	if (fs->fs_magic == FS_UFS1_MAGIC) {
680 		ump->um_fstype = UFS1;
681 		ump->um_balloc = ffs_balloc_ufs1;
682 	} else {
683 		ump->um_fstype = UFS2;
684 		ump->um_balloc = ffs_balloc_ufs2;
685 	}
686 	ump->um_blkatoff = ffs_blkatoff;
687 	ump->um_truncate = ffs_truncate;
688 	ump->um_update = ffs_update;
689 	ump->um_valloc = ffs_valloc;
690 	ump->um_vfree = ffs_vfree;
691 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
692 	if (fs->fs_sbsize < SBLOCKSIZE)
693 		bp->b_flags |= B_INVAL | B_NOCACHE;
694 	brelse(bp);
695 	bp = NULL;
696 	fs = ump->um_fs;
697 	ffs_oldfscompat_read(fs, ump, sblockloc);
698 	fs->fs_ronly = ronly;
699 	size = fs->fs_cssize;
700 	blks = howmany(size, fs->fs_fsize);
701 	if (fs->fs_contigsumsize > 0)
702 		size += fs->fs_ncg * sizeof(int32_t);
703 	size += fs->fs_ncg * sizeof(u_int8_t);
704 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
705 	fs->fs_csp = space;
706 	for (i = 0; i < blks; i += fs->fs_frag) {
707 		size = fs->fs_bsize;
708 		if (i + fs->fs_frag > blks)
709 			size = (blks - i) * fs->fs_fsize;
710 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
711 		    cred, &bp)) != 0) {
712 			free(fs->fs_csp, M_UFSMNT);
713 			goto out;
714 		}
715 		bcopy(bp->b_data, space, (u_int)size);
716 		space = (char *)space + size;
717 		brelse(bp);
718 		bp = NULL;
719 	}
720 	if (fs->fs_contigsumsize > 0) {
721 		fs->fs_maxcluster = lp = space;
722 		for (i = 0; i < fs->fs_ncg; i++)
723 			*lp++ = fs->fs_contigsumsize;
724 		space = lp;
725 	}
726 	size = fs->fs_ncg * sizeof(u_int8_t);
727 	fs->fs_contigdirs = (u_int8_t *)space;
728 	bzero(fs->fs_contigdirs, size);
729 	fs->fs_active = NULL;
730 	mp->mnt_data = (qaddr_t)ump;
731 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
732 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
733 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
734 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
735 		vfs_getnewfsid(mp);
736 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
737 	mp->mnt_flag |= MNT_LOCAL;
738 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
739 #ifdef MAC
740 		mp->mnt_flag |= MNT_MULTILABEL;
741 #else
742 		printf(
743 "WARNING: %s: multilabel flag on fs but no MAC support\n",
744 		    fs->fs_fsmnt);
745 #endif
746 	if ((fs->fs_flags & FS_ACLS) != 0)
747 #ifdef UFS_ACL
748 		mp->mnt_flag |= MNT_ACLS;
749 #else
750 		printf(
751 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
752 		    fs->fs_fsmnt);
753 #endif
754 	ump->um_mountp = mp;
755 	ump->um_dev = dev;
756 	ump->um_devvp = devvp;
757 	ump->um_nindir = fs->fs_nindir;
758 	ump->um_bptrtodb = fs->fs_fsbtodb;
759 	ump->um_seqinc = fs->fs_frag;
760 	for (i = 0; i < MAXQUOTAS; i++)
761 		ump->um_quotas[i] = NULLVP;
762 #ifdef UFS_EXTATTR
763 	ufs_extattr_uepm_init(&ump->um_extattr);
764 #endif
765 	devvp->v_rdev->si_mountpoint = mp;
766 
767 	/*
768 	 * Set FS local "last mounted on" information (NULL pad)
769 	 */
770 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
771 			fs->fs_fsmnt,			/* copy area*/
772 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
773 			&strsize);			/* real size*/
774 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
775 
776 	if( mp->mnt_flag & MNT_ROOTFS) {
777 		/*
778 		 * Root mount; update timestamp in mount structure.
779 		 * this will be used by the common root mount code
780 		 * to update the system clock.
781 		 */
782 		mp->mnt_time = fs->fs_time;
783 	}
784 
785 	if (ronly == 0) {
786 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
787 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
788 			free(fs->fs_csp, M_UFSMNT);
789 			goto out;
790 		}
791 		if (fs->fs_snapinum[0] != 0)
792 			ffs_snapshot_mount(mp);
793 		fs->fs_fmod = 1;
794 		fs->fs_clean = 0;
795 		(void) ffs_sbupdate(ump, MNT_WAIT);
796 	}
797 #ifdef UFS_EXTATTR
798 #ifdef UFS_EXTATTR_AUTOSTART
799 	/*
800 	 *
801 	 * Auto-starting does the following:
802 	 *	- check for /.attribute in the fs, and extattr_start if so
803 	 *	- for each file in .attribute, enable that file with
804 	 * 	  an attribute of the same name.
805 	 * Not clear how to report errors -- probably eat them.
806 	 * This would all happen while the filesystem was busy/not
807 	 * available, so would effectively be "atomic".
808 	 */
809 	(void) ufs_extattr_autostart(mp, td);
810 #endif /* !UFS_EXTATTR_AUTOSTART */
811 #endif /* !UFS_EXTATTR */
812 	return (0);
813 out:
814 	devvp->v_rdev->si_mountpoint = NULL;
815 	if (bp)
816 		brelse(bp);
817 	/* XXX: see comment above VOP_OPEN */
818 #ifdef notyet
819 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
820 #else
821 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
822 #endif
823 	if (ump) {
824 		free(ump->um_fs, M_UFSMNT);
825 		free(ump, M_UFSMNT);
826 		mp->mnt_data = (qaddr_t)0;
827 	}
828 	return (error);
829 }
830 
831 #include <sys/sysctl.h>
832 int bigcgs = 0;
833 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
834 
835 /*
836  * Sanity checks for loading old filesystem superblocks.
837  * See ffs_oldfscompat_write below for unwound actions.
838  *
839  * XXX - Parts get retired eventually.
840  * Unfortunately new bits get added.
841  */
842 static void
843 ffs_oldfscompat_read(fs, ump, sblockloc)
844 	struct fs *fs;
845 	struct ufsmount *ump;
846 	ufs2_daddr_t sblockloc;
847 {
848 	off_t maxfilesize;
849 
850 	/*
851 	 * If not yet done, update UFS1 superblock with new wider fields.
852 	 */
853 	if (fs->fs_magic == FS_UFS1_MAGIC &&
854 	    fs->fs_sblockloc != sblockloc) {
855 		fs->fs_maxbsize = fs->fs_bsize;
856 		fs->fs_sblockloc = sblockloc;
857 		fs->fs_time = fs->fs_old_time;
858 		fs->fs_size = fs->fs_old_size;
859 		fs->fs_dsize = fs->fs_old_dsize;
860 		fs->fs_csaddr = fs->fs_old_csaddr;
861 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
862 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
863 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
864 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
865 	}
866 	if (fs->fs_magic == FS_UFS1_MAGIC &&
867 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
868 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
869 		fs->fs_qbmask = ~fs->fs_bmask;
870 		fs->fs_qfmask = ~fs->fs_fmask;
871 	}
872 	if (fs->fs_magic == FS_UFS1_MAGIC) {
873 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
874 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
875 		if (fs->fs_maxfilesize > maxfilesize)
876 			fs->fs_maxfilesize = maxfilesize;
877 	}
878 	/* Compatibility for old filesystems */
879 	if (fs->fs_avgfilesize <= 0)
880 		fs->fs_avgfilesize = AVFILESIZ;
881 	if (fs->fs_avgfpdir <= 0)
882 		fs->fs_avgfpdir = AFPDIR;
883 	if (bigcgs) {
884 		fs->fs_save_cgsize = fs->fs_cgsize;
885 		fs->fs_cgsize = fs->fs_bsize;
886 	}
887 }
888 
889 /*
890  * Unwinding superblock updates for old filesystems.
891  * See ffs_oldfscompat_read above for details.
892  *
893  * XXX - Parts get retired eventually.
894  * Unfortunately new bits get added.
895  */
896 static void
897 ffs_oldfscompat_write(fs, ump)
898 	struct fs *fs;
899 	struct ufsmount *ump;
900 {
901 
902 	/*
903 	 * Copy back UFS2 updated fields that UFS1 inspects.
904 	 */
905 	if (fs->fs_magic == FS_UFS1_MAGIC) {
906 		fs->fs_old_time = fs->fs_time;
907 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
908 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
909 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
910 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
911 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
912 	}
913 	if (bigcgs) {
914 		fs->fs_cgsize = fs->fs_save_cgsize;
915 		fs->fs_save_cgsize = 0;
916 	}
917 }
918 
919 /*
920  * unmount system call
921  */
922 int
923 ffs_unmount(mp, mntflags, td)
924 	struct mount *mp;
925 	int mntflags;
926 	struct thread *td;
927 {
928 	struct ufsmount *ump = VFSTOUFS(mp);
929 	struct fs *fs;
930 	int error, flags;
931 
932 	flags = 0;
933 	if (mntflags & MNT_FORCE) {
934 		flags |= FORCECLOSE;
935 	}
936 #ifdef UFS_EXTATTR
937 	if ((error = ufs_extattr_stop(mp, td))) {
938 		if (error != EOPNOTSUPP)
939 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
940 			    error);
941 	} else {
942 		ufs_extattr_uepm_destroy(&ump->um_extattr);
943 	}
944 #endif
945 	if (mp->mnt_flag & MNT_SOFTDEP) {
946 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
947 			return (error);
948 	} else {
949 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
950 			return (error);
951 	}
952 	fs = ump->um_fs;
953 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
954 		printf("%s: unmount pending error: blocks %jd files %d\n",
955 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
956 		    fs->fs_pendinginodes);
957 		fs->fs_pendingblocks = 0;
958 		fs->fs_pendinginodes = 0;
959 	}
960 	if (fs->fs_ronly == 0) {
961 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
962 		error = ffs_sbupdate(ump, MNT_WAIT);
963 		if (error) {
964 			fs->fs_clean = 0;
965 			return (error);
966 		}
967 	}
968 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
969 
970 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
971 	/* XXX: see comment above VOP_OPEN */
972 #ifdef notyet
973 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
974 		NOCRED, td);
975 #else
976 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
977 #endif
978 
979 	vrele(ump->um_devvp);
980 
981 	free(fs->fs_csp, M_UFSMNT);
982 	free(fs, M_UFSMNT);
983 	free(ump, M_UFSMNT);
984 	mp->mnt_data = (qaddr_t)0;
985 	mp->mnt_flag &= ~MNT_LOCAL;
986 	return (error);
987 }
988 
989 /*
990  * Flush out all the files in a filesystem.
991  */
992 int
993 ffs_flushfiles(mp, flags, td)
994 	struct mount *mp;
995 	int flags;
996 	struct thread *td;
997 {
998 	struct ufsmount *ump;
999 	int error;
1000 
1001 	ump = VFSTOUFS(mp);
1002 #ifdef QUOTA
1003 	if (mp->mnt_flag & MNT_QUOTA) {
1004 		int i;
1005 		error = vflush(mp, 0, SKIPSYSTEM|flags);
1006 		if (error)
1007 			return (error);
1008 		for (i = 0; i < MAXQUOTAS; i++) {
1009 			if (ump->um_quotas[i] == NULLVP)
1010 				continue;
1011 			quotaoff(td, mp, i);
1012 		}
1013 		/*
1014 		 * Here we fall through to vflush again to ensure
1015 		 * that we have gotten rid of all the system vnodes.
1016 		 */
1017 	}
1018 #endif
1019 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1020 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1021 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1022 			return (error);
1023 		ffs_snapshot_unmount(mp);
1024 		/*
1025 		 * Here we fall through to vflush again to ensure
1026 		 * that we have gotten rid of all the system vnodes.
1027 		 */
1028 	}
1029         /*
1030 	 * Flush all the files.
1031 	 */
1032 	if ((error = vflush(mp, 0, flags)) != 0)
1033 		return (error);
1034 	/*
1035 	 * Flush filesystem metadata.
1036 	 */
1037 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1038 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1039 	VOP_UNLOCK(ump->um_devvp, 0, td);
1040 	return (error);
1041 }
1042 
1043 /*
1044  * Get filesystem statistics.
1045  */
1046 int
1047 ffs_statfs(mp, sbp, td)
1048 	struct mount *mp;
1049 	struct statfs *sbp;
1050 	struct thread *td;
1051 {
1052 	struct ufsmount *ump;
1053 	struct fs *fs;
1054 
1055 	ump = VFSTOUFS(mp);
1056 	fs = ump->um_fs;
1057 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1058 		panic("ffs_statfs");
1059 	sbp->f_bsize = fs->fs_fsize;
1060 	sbp->f_iosize = fs->fs_bsize;
1061 	sbp->f_blocks = fs->fs_dsize;
1062 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1063 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1064 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1065 	    dbtofsb(fs, fs->fs_pendingblocks);
1066 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1067 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1068 	if (sbp != &mp->mnt_stat) {
1069 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1070 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1071 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1072 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1073 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1074 	}
1075 	return (0);
1076 }
1077 
1078 /*
1079  * Go through the disk queues to initiate sandbagged IO;
1080  * go through the inodes to write those that have been modified;
1081  * initiate the writing of the super block if it has been modified.
1082  *
1083  * Note: we are always called with the filesystem marked `MPBUSY'.
1084  */
1085 int
1086 ffs_sync(mp, waitfor, cred, td)
1087 	struct mount *mp;
1088 	int waitfor;
1089 	struct ucred *cred;
1090 	struct thread *td;
1091 {
1092 	struct vnode *nvp, *vp, *devvp;
1093 	struct inode *ip;
1094 	struct ufsmount *ump = VFSTOUFS(mp);
1095 	struct fs *fs;
1096 	int error, count, wait, lockreq, allerror = 0;
1097 
1098 	fs = ump->um_fs;
1099 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1100 		printf("fs = %s\n", fs->fs_fsmnt);
1101 		panic("ffs_sync: rofs mod");
1102 	}
1103 	/*
1104 	 * Write back each (modified) inode.
1105 	 */
1106 	wait = 0;
1107 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1108 	if (waitfor == MNT_WAIT) {
1109 		wait = 1;
1110 		lockreq = LK_EXCLUSIVE;
1111 	}
1112 	mtx_lock(&mntvnode_mtx);
1113 loop:
1114 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1115 		/*
1116 		 * If the vnode that we are about to sync is no longer
1117 		 * associated with this mount point, start over.
1118 		 */
1119 		if (vp->v_mount != mp)
1120 			goto loop;
1121 
1122 		/*
1123 		 * Depend on the mntvnode_slock to keep things stable enough
1124 		 * for a quick test.  Since there might be hundreds of
1125 		 * thousands of vnodes, we cannot afford even a subroutine
1126 		 * call unless there's a good chance that we have work to do.
1127 		 */
1128 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1129 		ip = VTOI(vp);
1130 		if (vp->v_type == VNON || ((ip->i_flag &
1131 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1132 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1133 			continue;
1134 		}
1135 		if (vp->v_type != VCHR) {
1136 			mtx_unlock(&mntvnode_mtx);
1137 			if ((error = vget(vp, lockreq, td)) != 0) {
1138 				mtx_lock(&mntvnode_mtx);
1139 				if (error == ENOENT)
1140 					goto loop;
1141 			} else {
1142 				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1143 					allerror = error;
1144 				VOP_UNLOCK(vp, 0, td);
1145 				vrele(vp);
1146 				mtx_lock(&mntvnode_mtx);
1147 			}
1148 		} else {
1149 			mtx_unlock(&mntvnode_mtx);
1150 			UFS_UPDATE(vp, wait);
1151 			mtx_lock(&mntvnode_mtx);
1152 		}
1153 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1154 			goto loop;
1155 	}
1156 	mtx_unlock(&mntvnode_mtx);
1157 	/*
1158 	 * Force stale filesystem control information to be flushed.
1159 	 */
1160 	if (waitfor == MNT_WAIT) {
1161 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1162 			allerror = error;
1163 		/* Flushed work items may create new vnodes to clean */
1164 		if (allerror == 0 && count) {
1165 			mtx_lock(&mntvnode_mtx);
1166 			goto loop;
1167 		}
1168 	}
1169 #ifdef QUOTA
1170 	qsync(mp);
1171 #endif
1172 	devvp = ump->um_devvp;
1173 	VI_LOCK(devvp);
1174 	if (waitfor != MNT_LAZY &&
1175 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1176 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1177 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1178 			allerror = error;
1179 		VOP_UNLOCK(devvp, 0, td);
1180 		if (allerror == 0 && waitfor == MNT_WAIT) {
1181 			mtx_lock(&mntvnode_mtx);
1182 			goto loop;
1183 		}
1184 	} else
1185 		VI_UNLOCK(devvp);
1186 	/*
1187 	 * Write back modified superblock.
1188 	 */
1189 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1190 		allerror = error;
1191 	return (allerror);
1192 }
1193 
1194 int
1195 ffs_vget(mp, ino, flags, vpp)
1196 	struct mount *mp;
1197 	ino_t ino;
1198 	int flags;
1199 	struct vnode **vpp;
1200 {
1201 	struct thread *td = curthread; 		/* XXX */
1202 	struct fs *fs;
1203 	struct inode *ip;
1204 	struct ufsmount *ump;
1205 	struct buf *bp;
1206 	struct vnode *vp;
1207 	dev_t dev;
1208 	int error;
1209 
1210 	ump = VFSTOUFS(mp);
1211 	dev = ump->um_dev;
1212 
1213 	/*
1214 	 * We do not lock vnode creation as it is believed to be too
1215 	 * expensive for such rare case as simultaneous creation of vnode
1216 	 * for same ino by different processes. We just allow them to race
1217 	 * and check later to decide who wins. Let the race begin!
1218 	 */
1219 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1220 		return (error);
1221 	if (*vpp != NULL)
1222 		return (0);
1223 
1224 	/*
1225 	 * If this MALLOC() is performed after the getnewvnode()
1226 	 * it might block, leaving a vnode with a NULL v_data to be
1227 	 * found by ffs_sync() if a sync happens to fire right then,
1228 	 * which will cause a panic because ffs_sync() blindly
1229 	 * dereferences vp->v_data (as well it should).
1230 	 */
1231 	MALLOC(ip, struct inode *, sizeof(struct inode),
1232 	    ump->um_malloctype, M_WAITOK);
1233 
1234 	/* Allocate a new vnode/inode. */
1235 	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1236 	if (error) {
1237 		*vpp = NULL;
1238 		FREE(ip, ump->um_malloctype);
1239 		return (error);
1240 	}
1241 	bzero((caddr_t)ip, sizeof(struct inode));
1242 	/*
1243 	 * FFS supports recursive locking.
1244 	 */
1245 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1246 	vp->v_data = ip;
1247 	ip->i_vnode = vp;
1248 	ip->i_ump = ump;
1249 	ip->i_fs = fs = ump->um_fs;
1250 	ip->i_dev = dev;
1251 	ip->i_number = ino;
1252 #ifdef QUOTA
1253 	{
1254 		int i;
1255 		for (i = 0; i < MAXQUOTAS; i++)
1256 			ip->i_dquot[i] = NODQUOT;
1257 	}
1258 #endif
1259 	/*
1260 	 * Exclusively lock the vnode before adding to hash. Note, that we
1261 	 * must not release nor downgrade the lock (despite flags argument
1262 	 * says) till it is fully initialized.
1263 	 */
1264 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1265 
1266 	/*
1267 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1268 	 * duplicate of vnode being created and add it to the hash. If a
1269 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1270 	 */
1271 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1272 		vput(vp);
1273 		*vpp = NULL;
1274 		return (error);
1275 	}
1276 
1277 	/* We lost the race, then throw away our vnode and return existing */
1278 	if (*vpp != NULL) {
1279 		vput(vp);
1280 		return (0);
1281 	}
1282 
1283 	/* Read in the disk contents for the inode, copy into the inode. */
1284 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1285 	    (int)fs->fs_bsize, NOCRED, &bp);
1286 	if (error) {
1287 		/*
1288 		 * The inode does not contain anything useful, so it would
1289 		 * be misleading to leave it on its hash chain. With mode
1290 		 * still zero, it will be unlinked and returned to the free
1291 		 * list by vput().
1292 		 */
1293 		brelse(bp);
1294 		vput(vp);
1295 		*vpp = NULL;
1296 		return (error);
1297 	}
1298 	ffs_load_inode(bp, ip, ump->um_malloctype, fs, ino);
1299 	if (DOINGSOFTDEP(vp))
1300 		softdep_load_inodeblock(ip);
1301 	else
1302 		ip->i_effnlink = ip->i_nlink;
1303 	bqrelse(bp);
1304 
1305 	/*
1306 	 * Initialize the vnode from the inode, check for aliases.
1307 	 * Note that the underlying vnode may have changed.
1308 	 */
1309 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1310 	if (error) {
1311 		vput(vp);
1312 		*vpp = NULL;
1313 		return (error);
1314 	}
1315 	/*
1316 	 * Finish inode initialization now that aliasing has been resolved.
1317 	 */
1318 	ip->i_devvp = ump->um_devvp;
1319 	VREF(ip->i_devvp);
1320 	/*
1321 	 * Set up a generation number for this inode if it does not
1322 	 * already have one. This should only happen on old filesystems.
1323 	 */
1324 	if (ip->i_gen == 0) {
1325 		ip->i_gen = random() / 2 + 1;
1326 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1327 			ip->i_flag |= IN_MODIFIED;
1328 			DIP(ip, i_gen) = ip->i_gen;
1329 		}
1330 	}
1331 	/*
1332 	 * Ensure that uid and gid are correct. This is a temporary
1333 	 * fix until fsck has been changed to do the update.
1334 	 */
1335 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1336 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1337 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1338 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1339 	}						/* XXX */
1340 
1341 #ifdef MAC
1342 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1343 		/*
1344 		 * If this vnode is already allocated, and we're running
1345 		 * multi-label, attempt to perform a label association
1346 		 * from the extended attributes on the inode.
1347 		 */
1348 		error = mac_associate_vnode_extattr(mp, vp);
1349 		if (error) {
1350 			/* ufs_inactive will release ip->i_devvp ref. */
1351 			vput(vp);
1352 			*vpp = NULL;
1353 			return (error);
1354 		}
1355 	}
1356 #endif
1357 
1358 	*vpp = vp;
1359 	return (0);
1360 }
1361 
1362 /*
1363  * File handle to vnode
1364  *
1365  * Have to be really careful about stale file handles:
1366  * - check that the inode number is valid
1367  * - call ffs_vget() to get the locked inode
1368  * - check for an unallocated inode (i_mode == 0)
1369  * - check that the given client host has export rights and return
1370  *   those rights via. exflagsp and credanonp
1371  */
1372 int
1373 ffs_fhtovp(mp, fhp, vpp)
1374 	struct mount *mp;
1375 	struct fid *fhp;
1376 	struct vnode **vpp;
1377 {
1378 	struct ufid *ufhp;
1379 	struct fs *fs;
1380 
1381 	ufhp = (struct ufid *)fhp;
1382 	fs = VFSTOUFS(mp)->um_fs;
1383 	if (ufhp->ufid_ino < ROOTINO ||
1384 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1385 		return (ESTALE);
1386 	return (ufs_fhtovp(mp, ufhp, vpp));
1387 }
1388 
1389 /*
1390  * Vnode pointer to File handle
1391  */
1392 /* ARGSUSED */
1393 int
1394 ffs_vptofh(vp, fhp)
1395 	struct vnode *vp;
1396 	struct fid *fhp;
1397 {
1398 	struct inode *ip;
1399 	struct ufid *ufhp;
1400 
1401 	ip = VTOI(vp);
1402 	ufhp = (struct ufid *)fhp;
1403 	ufhp->ufid_len = sizeof(struct ufid);
1404 	ufhp->ufid_ino = ip->i_number;
1405 	ufhp->ufid_gen = ip->i_gen;
1406 	return (0);
1407 }
1408 
1409 /*
1410  * Initialize the filesystem.
1411  */
1412 static int
1413 ffs_init(vfsp)
1414 	struct vfsconf *vfsp;
1415 {
1416 
1417 	softdep_initialize();
1418 	return (ufs_init(vfsp));
1419 }
1420 
1421 /*
1422  * Undo the work of ffs_init().
1423  */
1424 static int
1425 ffs_uninit(vfsp)
1426 	struct vfsconf *vfsp;
1427 {
1428 	int ret;
1429 
1430 	ret = ufs_uninit(vfsp);
1431 	softdep_uninitialize();
1432 	return (ret);
1433 }
1434 
1435 /*
1436  * Write a superblock and associated information back to disk.
1437  */
1438 static int
1439 ffs_sbupdate(mp, waitfor)
1440 	struct ufsmount *mp;
1441 	int waitfor;
1442 {
1443 	struct fs *fs = mp->um_fs;
1444 	struct buf *bp;
1445 	int blks;
1446 	void *space;
1447 	int i, size, error, allerror = 0;
1448 
1449 	/*
1450 	 * First write back the summary information.
1451 	 */
1452 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1453 	space = fs->fs_csp;
1454 	for (i = 0; i < blks; i += fs->fs_frag) {
1455 		size = fs->fs_bsize;
1456 		if (i + fs->fs_frag > blks)
1457 			size = (blks - i) * fs->fs_fsize;
1458 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1459 		    size, 0, 0);
1460 		bcopy(space, bp->b_data, (u_int)size);
1461 		space = (char *)space + size;
1462 		if (waitfor != MNT_WAIT)
1463 			bawrite(bp);
1464 		else if ((error = bwrite(bp)) != 0)
1465 			allerror = error;
1466 	}
1467 	/*
1468 	 * Now write back the superblock itself. If any errors occurred
1469 	 * up to this point, then fail so that the superblock avoids
1470 	 * being written out as clean.
1471 	 */
1472 	if (allerror)
1473 		return (allerror);
1474 	bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_sblockloc),
1475 	    (int)fs->fs_sbsize, 0, 0);
1476 	fs->fs_fmod = 0;
1477 	fs->fs_time = time_second;
1478 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1479 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1480 	if (waitfor != MNT_WAIT)
1481 		bawrite(bp);
1482 	else if ((error = bwrite(bp)) != 0)
1483 		allerror = error;
1484 	return (allerror);
1485 }
1486 
1487 static int
1488 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1489 	int attrnamespace, const char *attrname, struct thread *td)
1490 {
1491 
1492 #ifdef UFS_EXTATTR
1493 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1494 	    attrname, td));
1495 #else
1496 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1497 	    attrname, td));
1498 #endif
1499 }
1500