xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 390e8cc2974df1888369c06339ef8e0e92b312b6)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_mac.h"
38 #include "opt_quota.h"
39 #include "opt_ufs.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/mac.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/fcntl.h>
53 #include <sys/disk.h>
54 #include <sys/malloc.h>
55 #include <sys/mutex.h>
56 
57 #include <ufs/ufs/extattr.h>
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 
63 #include <ufs/ffs/fs.h>
64 #include <ufs/ffs/ffs_extern.h>
65 
66 #include <vm/vm.h>
67 #include <vm/uma.h>
68 #include <vm/vm_page.h>
69 
70 uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
71 
72 static int	ffs_sbupdate(struct ufsmount *, int);
73        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
74 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
75 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
76 		    ufs2_daddr_t);
77 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
78 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
79 static vfs_init_t ffs_init;
80 static vfs_uninit_t ffs_uninit;
81 static vfs_extattrctl_t ffs_extattrctl;
82 
83 static struct vfsops ufs_vfsops = {
84 	ffs_mount,
85 	ufs_start,
86 	ffs_unmount,
87 	ufs_root,
88 	ufs_quotactl,
89 	ffs_statfs,
90 	ffs_sync,
91 	ffs_vget,
92 	ffs_fhtovp,
93 	vfs_stdcheckexp,
94 	ffs_vptofh,
95 	ffs_init,
96 	ffs_uninit,
97 	ffs_extattrctl,
98 };
99 
100 VFS_SET(ufs_vfsops, ufs, 0);
101 
102 /*
103  * ffs_mount
104  *
105  * Called when mounting local physical media
106  *
107  * PARAMETERS:
108  *		mountroot
109  *			mp	mount point structure
110  *			path	NULL (flag for root mount!!!)
111  *			data	<unused>
112  *			ndp	<unused>
113  *			p	process (user credentials check [statfs])
114  *
115  *		mount
116  *			mp	mount point structure
117  *			path	path to mount point
118  *			data	pointer to argument struct in user space
119  *			ndp	mount point namei() return (used for
120  *				credentials on reload), reused to look
121  *				up block device.
122  *			p	process (user credentials check)
123  *
124  * RETURNS:	0	Success
125  *		!0	error number (errno.h)
126  *
127  * LOCK STATE:
128  *
129  *		ENTRY
130  *			mount point is locked
131  *		EXIT
132  *			mount point is locked
133  *
134  * NOTES:
135  *		A NULL path can be used for a flag since the mount
136  *		system call will fail with EFAULT in copyinstr in
137  *		namei() if it is a genuine NULL from the user.
138  */
139 int
140 ffs_mount(mp, path, data, ndp, td)
141         struct mount		*mp;	/* mount struct pointer*/
142         char			*path;	/* path to mount point*/
143         caddr_t			data;	/* arguments to FS specific mount*/
144         struct nameidata	*ndp;	/* mount point credentials*/
145         struct thread		*td;	/* process requesting mount*/
146 {
147 	size_t size;
148 	struct vnode *devvp;
149 	struct ufs_args args;
150 	struct ufsmount *ump = 0;
151 	struct fs *fs;
152 	int error, flags;
153 	mode_t accessmode;
154 
155 	if (uma_inode == NULL) {
156 		uma_inode = uma_zcreate("FFS inode",
157 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
158 		    UMA_ALIGN_PTR, 0);
159 		uma_ufs1 = uma_zcreate("FFS1 dinode",
160 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
161 		    UMA_ALIGN_PTR, 0);
162 		uma_ufs2 = uma_zcreate("FFS2 dinode",
163 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
164 		    UMA_ALIGN_PTR, 0);
165 	}
166 	/*
167 	 * Use NULL path to indicate we are mounting the root filesystem.
168 	 */
169 	if (path == NULL) {
170 		if ((error = bdevvp(rootdev, &rootvp))) {
171 			printf("ffs_mountroot: can't find rootvp\n");
172 			return (error);
173 		}
174 
175 		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
176 			return (error);
177 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
178 		return (0);
179 	}
180 
181 	/*
182 	 * Mounting non-root filesystem or updating a filesystem
183 	 */
184 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
185 		return (error);
186 
187 	/*
188 	 * If updating, check whether changing from read-only to
189 	 * read/write; if there is no device name, that's all we do.
190 	 */
191 	if (mp->mnt_flag & MNT_UPDATE) {
192 		ump = VFSTOUFS(mp);
193 		fs = ump->um_fs;
194 		devvp = ump->um_devvp;
195 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
196 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
197 				return (error);
198 			/*
199 			 * Flush any dirty data.
200 			 */
201 			if ((error = VFS_SYNC(mp, MNT_WAIT,
202 			    td->td_ucred, td)) != 0) {
203 				vn_finished_write(mp);
204 				return (error);
205 			}
206 			/*
207 			 * Check for and optionally get rid of files open
208 			 * for writing.
209 			 */
210 			flags = WRITECLOSE;
211 			if (mp->mnt_flag & MNT_FORCE)
212 				flags |= FORCECLOSE;
213 			if (mp->mnt_flag & MNT_SOFTDEP) {
214 				error = softdep_flushfiles(mp, flags, td);
215 			} else {
216 				error = ffs_flushfiles(mp, flags, td);
217 			}
218 			if (error) {
219 				vn_finished_write(mp);
220 				return (error);
221 			}
222 			if (fs->fs_pendingblocks != 0 ||
223 			    fs->fs_pendinginodes != 0) {
224 				printf("%s: %s: blocks %jd files %d\n",
225 				    fs->fs_fsmnt, "update error",
226 				    (intmax_t)fs->fs_pendingblocks,
227 				    fs->fs_pendinginodes);
228 				fs->fs_pendingblocks = 0;
229 				fs->fs_pendinginodes = 0;
230 			}
231 			fs->fs_ronly = 1;
232 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
233 				fs->fs_clean = 1;
234 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
235 				fs->fs_ronly = 0;
236 				fs->fs_clean = 0;
237 				vn_finished_write(mp);
238 				return (error);
239 			}
240 			vn_finished_write(mp);
241 		}
242 		if ((mp->mnt_flag & MNT_RELOAD) &&
243 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
244 			return (error);
245 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
246 			/*
247 			 * If upgrade to read-write by non-root, then verify
248 			 * that user has necessary permissions on the device.
249 			 */
250 			if (suser(td)) {
251 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
252 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
253 				    td->td_ucred, td)) != 0) {
254 					VOP_UNLOCK(devvp, 0, td);
255 					return (error);
256 				}
257 				VOP_UNLOCK(devvp, 0, td);
258 			}
259 			fs->fs_flags &= ~FS_UNCLEAN;
260 			if (fs->fs_clean == 0) {
261 				fs->fs_flags |= FS_UNCLEAN;
262 				if ((mp->mnt_flag & MNT_FORCE) ||
263 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
264 				     (fs->fs_flags & FS_DOSOFTDEP))) {
265 					printf("WARNING: %s was not %s\n",
266 					   fs->fs_fsmnt, "properly dismounted");
267 				} else {
268 					printf(
269 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
270 					    fs->fs_fsmnt);
271 					return (EPERM);
272 				}
273 			}
274 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
275 				return (error);
276 			fs->fs_ronly = 0;
277 			fs->fs_clean = 0;
278 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
279 				vn_finished_write(mp);
280 				return (error);
281 			}
282 			/* check to see if we need to start softdep */
283 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
284 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
285 				vn_finished_write(mp);
286 				return (error);
287 			}
288 			if (fs->fs_snapinum[0] != 0)
289 				ffs_snapshot_mount(mp);
290 			vn_finished_write(mp);
291 		}
292 		/*
293 		 * Soft updates is incompatible with "async",
294 		 * so if we are doing softupdates stop the user
295 		 * from setting the async flag in an update.
296 		 * Softdep_mount() clears it in an initial mount
297 		 * or ro->rw remount.
298 		 */
299 		if (mp->mnt_flag & MNT_SOFTDEP)
300 			mp->mnt_flag &= ~MNT_ASYNC;
301 		/*
302 		 * If not updating name, process export requests.
303 		 */
304 		if (args.fspec == 0)
305 			return (vfs_export(mp, &args.export));
306 		/*
307 		 * If this is a snapshot request, take the snapshot.
308 		 */
309 		if (mp->mnt_flag & MNT_SNAPSHOT)
310 			return (ffs_snapshot(mp, args.fspec));
311 	}
312 
313 	/*
314 	 * Not an update, or updating the name: look up the name
315 	 * and verify that it refers to a sensible block device.
316 	 */
317 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
318 	if ((error = namei(ndp)) != 0)
319 		return (error);
320 	NDFREE(ndp, NDF_ONLY_PNBUF);
321 	devvp = ndp->ni_vp;
322 	if (!vn_isdisk(devvp, &error)) {
323 		vrele(devvp);
324 		return (error);
325 	}
326 
327 	/*
328 	 * If mount by non-root, then verify that user has necessary
329 	 * permissions on the device.
330 	 */
331 	if (suser(td)) {
332 		accessmode = VREAD;
333 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
334 			accessmode |= VWRITE;
335 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
336 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
337 			vput(devvp);
338 			return (error);
339 		}
340 		VOP_UNLOCK(devvp, 0, td);
341 	}
342 
343 	if (mp->mnt_flag & MNT_UPDATE) {
344 		/*
345 		 * Update only
346 		 *
347 		 * If it's not the same vnode, or at least the same device
348 		 * then it's not correct.
349 		 */
350 
351 		if (devvp != ump->um_devvp &&
352 		    devvp->v_rdev != ump->um_devvp->v_rdev)
353 			error = EINVAL;	/* needs translation */
354 		vrele(devvp);
355 		if (error)
356 			return (error);
357 	} else {
358 		/*
359 		 * New mount
360 		 *
361 		 * We need the name for the mount point (also used for
362 		 * "last mounted on") copied in. If an error occurs,
363 		 * the mount point is discarded by the upper level code.
364 		 * Note that vfs_mount() populates f_mntonname for us.
365 		 */
366 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
367 			vrele(devvp);
368 			return (error);
369 		}
370 	}
371 	/*
372 	 * Save "mounted from" device name info for mount point (NULL pad).
373 	 */
374 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
375 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
376 	/*
377 	 * Initialize filesystem stat information in mount struct.
378 	 */
379 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
380 	return (0);
381 }
382 
383 /*
384  * Reload all incore data for a filesystem (used after running fsck on
385  * the root filesystem and finding things to fix). The filesystem must
386  * be mounted read-only.
387  *
388  * Things to do to update the mount:
389  *	1) invalidate all cached meta-data.
390  *	2) re-read superblock from disk.
391  *	3) re-read summary information from disk.
392  *	4) invalidate all inactive vnodes.
393  *	5) invalidate all cached file data.
394  *	6) re-read inode data for all active vnodes.
395  */
396 int
397 ffs_reload(mp, cred, td)
398 	struct mount *mp;
399 	struct ucred *cred;
400 	struct thread *td;
401 {
402 	struct vnode *vp, *nvp, *devvp;
403 	struct inode *ip;
404 	void *space;
405 	struct buf *bp;
406 	struct fs *fs, *newfs;
407 	ufs2_daddr_t sblockloc;
408 	int i, blks, size, error;
409 	int32_t *lp;
410 
411 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
412 		return (EINVAL);
413 	/*
414 	 * Step 1: invalidate all cached meta-data.
415 	 */
416 	devvp = VFSTOUFS(mp)->um_devvp;
417 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
418 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
419 	VOP_UNLOCK(devvp, 0, td);
420 	if (error)
421 		panic("ffs_reload: dirty1");
422 
423 	/*
424 	 * Only VMIO the backing device if the backing device is a real
425 	 * block device.
426 	 */
427 	if (vn_isdisk(devvp, NULL)) {
428 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
429 		vfs_object_create(devvp, td, td->td_ucred);
430 		/* XXX Why lock only to release immediately?? */
431 		mtx_lock(&devvp->v_interlock);
432 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
433 	}
434 
435 	/*
436 	 * Step 2: re-read superblock from disk.
437 	 */
438 	fs = VFSTOUFS(mp)->um_fs;
439 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
440 	    NOCRED, &bp)) != 0)
441 		return (error);
442 	newfs = (struct fs *)bp->b_data;
443 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
444 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
445 	    newfs->fs_bsize > MAXBSIZE ||
446 	    newfs->fs_bsize < sizeof(struct fs)) {
447 			brelse(bp);
448 			return (EIO);		/* XXX needs translation */
449 	}
450 	/*
451 	 * Copy pointer fields back into superblock before copying in	XXX
452 	 * new superblock. These should really be in the ufsmount.	XXX
453 	 * Note that important parameters (eg fs_ncg) are unchanged.
454 	 */
455 	newfs->fs_csp = fs->fs_csp;
456 	newfs->fs_maxcluster = fs->fs_maxcluster;
457 	newfs->fs_contigdirs = fs->fs_contigdirs;
458 	newfs->fs_active = fs->fs_active;
459 	sblockloc = fs->fs_sblockloc;
460 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
461 	brelse(bp);
462 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
463 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
464 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
465 		printf("%s: reload pending error: blocks %jd files %d\n",
466 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
467 		    fs->fs_pendinginodes);
468 		fs->fs_pendingblocks = 0;
469 		fs->fs_pendinginodes = 0;
470 	}
471 
472 	/*
473 	 * Step 3: re-read summary information from disk.
474 	 */
475 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
476 	space = fs->fs_csp;
477 	for (i = 0; i < blks; i += fs->fs_frag) {
478 		size = fs->fs_bsize;
479 		if (i + fs->fs_frag > blks)
480 			size = (blks - i) * fs->fs_fsize;
481 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
482 		    NOCRED, &bp);
483 		if (error)
484 			return (error);
485 		bcopy(bp->b_data, space, (u_int)size);
486 		space = (char *)space + size;
487 		brelse(bp);
488 	}
489 	/*
490 	 * We no longer know anything about clusters per cylinder group.
491 	 */
492 	if (fs->fs_contigsumsize > 0) {
493 		lp = fs->fs_maxcluster;
494 		for (i = 0; i < fs->fs_ncg; i++)
495 			*lp++ = fs->fs_contigsumsize;
496 	}
497 
498 loop:
499 	mtx_lock(&mntvnode_mtx);
500 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
501 		if (vp->v_mount != mp) {
502 			mtx_unlock(&mntvnode_mtx);
503 			goto loop;
504 		}
505 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
506 		mtx_unlock(&mntvnode_mtx);
507 		/*
508 		 * Step 4: invalidate all inactive vnodes.
509 		 */
510 		if (vrecycle(vp, NULL, td))
511 			goto loop;
512 		/*
513 		 * Step 5: invalidate all cached file data.
514 		 */
515 		/* XXX Why lock only to release immediately? */
516 		mtx_lock(&vp->v_interlock);
517 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
518 			goto loop;
519 		}
520 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
521 			panic("ffs_reload: dirty2");
522 		/*
523 		 * Step 6: re-read inode data for all active vnodes.
524 		 */
525 		ip = VTOI(vp);
526 		error =
527 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
528 		    (int)fs->fs_bsize, NOCRED, &bp);
529 		if (error) {
530 			vput(vp);
531 			return (error);
532 		}
533 		ffs_load_inode(bp, ip, fs, ip->i_number);
534 		ip->i_effnlink = ip->i_nlink;
535 		brelse(bp);
536 		vput(vp);
537 		mtx_lock(&mntvnode_mtx);
538 	}
539 	mtx_unlock(&mntvnode_mtx);
540 	return (0);
541 }
542 
543 /*
544  * Possible superblock locations ordered from most to least likely.
545  */
546 static int sblock_try[] = SBLOCKSEARCH;
547 
548 /*
549  * Common code for mount and mountroot
550  */
551 static int
552 ffs_mountfs(devvp, mp, td)
553 	struct vnode *devvp;
554 	struct mount *mp;
555 	struct thread *td;
556 {
557 	struct ufsmount *ump;
558 	struct buf *bp;
559 	struct fs *fs;
560 	dev_t dev;
561 	void *space;
562 	ufs2_daddr_t sblockloc;
563 	int error, i, blks, size, ronly;
564 	int32_t *lp;
565 	struct ucred *cred;
566 	size_t strsize;
567 	int ncount;
568 
569 	dev = devvp->v_rdev;
570 	cred = td ? td->td_ucred : NOCRED;
571 	/*
572 	 * Disallow multiple mounts of the same device.
573 	 * Disallow mounting of a device that is currently in use
574 	 * (except for root, which might share swap device for miniroot).
575 	 * Flush out any old buffers remaining from a previous use.
576 	 */
577 	error = vfs_mountedon(devvp);
578 	if (error)
579 		return (error);
580 	ncount = vcount(devvp);
581 
582 	if (ncount > 1 && devvp != rootvp)
583 		return (EBUSY);
584 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
585 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
586 	VOP_UNLOCK(devvp, 0, td);
587 	if (error)
588 		return (error);
589 
590 	/*
591 	 * Only VMIO the backing device if the backing device is a real
592 	 * block device.
593 	 * Note that it is optional that the backing device be VMIOed.  This
594 	 * increases the opportunity for metadata caching.
595 	 */
596 	if (vn_isdisk(devvp, NULL)) {
597 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
598 		vfs_object_create(devvp, td, cred);
599 		/* XXX Why lock only to release immediately?? */
600 		mtx_lock(&devvp->v_interlock);
601 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
602 	}
603 
604 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
605 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
606 	/*
607 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
608 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
609 	 * XXX: start to avoid getting trashed later on.
610 	 */
611 #ifdef notyet
612 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
613 #else
614 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
615 #endif
616 	VOP_UNLOCK(devvp, 0, td);
617 	if (error)
618 		return (error);
619 	if (devvp->v_rdev->si_iosize_max != 0)
620 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
621 	if (mp->mnt_iosize_max > MAXPHYS)
622 		mp->mnt_iosize_max = MAXPHYS;
623 
624 	bp = NULL;
625 	ump = NULL;
626 	fs = NULL;
627 	sblockloc = 0;
628 	/*
629 	 * Try reading the superblock in each of its possible locations.
630 	 */
631 	for (i = 0; sblock_try[i] != -1; i++) {
632 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
633 		    cred, &bp)) != 0)
634 			goto out;
635 		fs = (struct fs *)bp->b_data;
636 		sblockloc = sblock_try[i];
637 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
638 		     (fs->fs_magic == FS_UFS2_MAGIC &&
639 		      (fs->fs_sblockloc == sblockloc ||
640 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
641 		    fs->fs_bsize <= MAXBSIZE &&
642 		    fs->fs_bsize >= sizeof(struct fs))
643 			break;
644 		brelse(bp);
645 		bp = NULL;
646 	}
647 	if (sblock_try[i] == -1) {
648 		error = EINVAL;		/* XXX needs translation */
649 		goto out;
650 	}
651 	fs->fs_fmod = 0;
652 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
653 	fs->fs_flags &= ~FS_UNCLEAN;
654 	if (fs->fs_clean == 0) {
655 		fs->fs_flags |= FS_UNCLEAN;
656 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
657 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
658 		     (fs->fs_flags & FS_DOSOFTDEP))) {
659 			printf(
660 "WARNING: %s was not properly dismounted\n",
661 			    fs->fs_fsmnt);
662 		} else {
663 			printf(
664 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
665 			    fs->fs_fsmnt);
666 			error = EPERM;
667 			goto out;
668 		}
669 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
670 		    (mp->mnt_flag & MNT_FORCE)) {
671 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
672 			    (intmax_t)fs->fs_pendingblocks,
673 			    fs->fs_pendinginodes);
674 			fs->fs_pendingblocks = 0;
675 			fs->fs_pendinginodes = 0;
676 		}
677 	}
678 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
679 		printf("%s: mount pending error: blocks %jd files %d\n",
680 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
681 		    fs->fs_pendinginodes);
682 		fs->fs_pendingblocks = 0;
683 		fs->fs_pendinginodes = 0;
684 	}
685 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
686 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
687 	    M_WAITOK);
688 	if (fs->fs_magic == FS_UFS1_MAGIC) {
689 		ump->um_fstype = UFS1;
690 		ump->um_balloc = ffs_balloc_ufs1;
691 	} else {
692 		ump->um_fstype = UFS2;
693 		ump->um_balloc = ffs_balloc_ufs2;
694 	}
695 	ump->um_blkatoff = ffs_blkatoff;
696 	ump->um_truncate = ffs_truncate;
697 	ump->um_update = ffs_update;
698 	ump->um_valloc = ffs_valloc;
699 	ump->um_vfree = ffs_vfree;
700 	ump->um_ifree = ffs_ifree;
701 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
702 	if (fs->fs_sbsize < SBLOCKSIZE)
703 		bp->b_flags |= B_INVAL | B_NOCACHE;
704 	brelse(bp);
705 	bp = NULL;
706 	fs = ump->um_fs;
707 	ffs_oldfscompat_read(fs, ump, sblockloc);
708 	fs->fs_ronly = ronly;
709 	size = fs->fs_cssize;
710 	blks = howmany(size, fs->fs_fsize);
711 	if (fs->fs_contigsumsize > 0)
712 		size += fs->fs_ncg * sizeof(int32_t);
713 	size += fs->fs_ncg * sizeof(u_int8_t);
714 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
715 	fs->fs_csp = space;
716 	for (i = 0; i < blks; i += fs->fs_frag) {
717 		size = fs->fs_bsize;
718 		if (i + fs->fs_frag > blks)
719 			size = (blks - i) * fs->fs_fsize;
720 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
721 		    cred, &bp)) != 0) {
722 			free(fs->fs_csp, M_UFSMNT);
723 			goto out;
724 		}
725 		bcopy(bp->b_data, space, (u_int)size);
726 		space = (char *)space + size;
727 		brelse(bp);
728 		bp = NULL;
729 	}
730 	if (fs->fs_contigsumsize > 0) {
731 		fs->fs_maxcluster = lp = space;
732 		for (i = 0; i < fs->fs_ncg; i++)
733 			*lp++ = fs->fs_contigsumsize;
734 		space = lp;
735 	}
736 	size = fs->fs_ncg * sizeof(u_int8_t);
737 	fs->fs_contigdirs = (u_int8_t *)space;
738 	bzero(fs->fs_contigdirs, size);
739 	fs->fs_active = NULL;
740 	mp->mnt_data = (qaddr_t)ump;
741 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
742 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
743 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
744 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
745 		vfs_getnewfsid(mp);
746 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
747 	mp->mnt_flag |= MNT_LOCAL;
748 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
749 #ifdef MAC
750 		mp->mnt_flag |= MNT_MULTILABEL;
751 #else
752 		printf(
753 "WARNING: %s: multilabel flag on fs but no MAC support\n",
754 		    fs->fs_fsmnt);
755 #endif
756 	if ((fs->fs_flags & FS_ACLS) != 0)
757 #ifdef UFS_ACL
758 		mp->mnt_flag |= MNT_ACLS;
759 #else
760 		printf(
761 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
762 		    fs->fs_fsmnt);
763 #endif
764 	ump->um_mountp = mp;
765 	ump->um_dev = dev;
766 	ump->um_devvp = devvp;
767 	ump->um_nindir = fs->fs_nindir;
768 	ump->um_bptrtodb = fs->fs_fsbtodb;
769 	ump->um_seqinc = fs->fs_frag;
770 	for (i = 0; i < MAXQUOTAS; i++)
771 		ump->um_quotas[i] = NULLVP;
772 #ifdef UFS_EXTATTR
773 	ufs_extattr_uepm_init(&ump->um_extattr);
774 #endif
775 	devvp->v_rdev->si_mountpoint = mp;
776 
777 	/*
778 	 * Set FS local "last mounted on" information (NULL pad)
779 	 */
780 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
781 			fs->fs_fsmnt,			/* copy area*/
782 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
783 			&strsize);			/* real size*/
784 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
785 
786 	if( mp->mnt_flag & MNT_ROOTFS) {
787 		/*
788 		 * Root mount; update timestamp in mount structure.
789 		 * this will be used by the common root mount code
790 		 * to update the system clock.
791 		 */
792 		mp->mnt_time = fs->fs_time;
793 	}
794 
795 	if (ronly == 0) {
796 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
797 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
798 			free(fs->fs_csp, M_UFSMNT);
799 			goto out;
800 		}
801 		if (fs->fs_snapinum[0] != 0)
802 			ffs_snapshot_mount(mp);
803 		fs->fs_fmod = 1;
804 		fs->fs_clean = 0;
805 		(void) ffs_sbupdate(ump, MNT_WAIT);
806 	}
807 #ifdef UFS_EXTATTR
808 #ifdef UFS_EXTATTR_AUTOSTART
809 	/*
810 	 *
811 	 * Auto-starting does the following:
812 	 *	- check for /.attribute in the fs, and extattr_start if so
813 	 *	- for each file in .attribute, enable that file with
814 	 * 	  an attribute of the same name.
815 	 * Not clear how to report errors -- probably eat them.
816 	 * This would all happen while the filesystem was busy/not
817 	 * available, so would effectively be "atomic".
818 	 */
819 	(void) ufs_extattr_autostart(mp, td);
820 #endif /* !UFS_EXTATTR_AUTOSTART */
821 #endif /* !UFS_EXTATTR */
822 	return (0);
823 out:
824 	devvp->v_rdev->si_mountpoint = NULL;
825 	if (bp)
826 		brelse(bp);
827 	/* XXX: see comment above VOP_OPEN */
828 #ifdef notyet
829 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
830 #else
831 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
832 #endif
833 	if (ump) {
834 		free(ump->um_fs, M_UFSMNT);
835 		free(ump, M_UFSMNT);
836 		mp->mnt_data = (qaddr_t)0;
837 	}
838 	return (error);
839 }
840 
841 #include <sys/sysctl.h>
842 int bigcgs = 0;
843 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
844 
845 /*
846  * Sanity checks for loading old filesystem superblocks.
847  * See ffs_oldfscompat_write below for unwound actions.
848  *
849  * XXX - Parts get retired eventually.
850  * Unfortunately new bits get added.
851  */
852 static void
853 ffs_oldfscompat_read(fs, ump, sblockloc)
854 	struct fs *fs;
855 	struct ufsmount *ump;
856 	ufs2_daddr_t sblockloc;
857 {
858 	off_t maxfilesize;
859 
860 	/*
861 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
862 	 */
863 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
864 		fs->fs_flags = fs->fs_old_flags;
865 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
866 		fs->fs_sblockloc = sblockloc;
867 	}
868 	/*
869 	 * If not yet done, update UFS1 superblock with new wider fields.
870 	 */
871 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
872 		fs->fs_maxbsize = fs->fs_bsize;
873 		fs->fs_time = fs->fs_old_time;
874 		fs->fs_size = fs->fs_old_size;
875 		fs->fs_dsize = fs->fs_old_dsize;
876 		fs->fs_csaddr = fs->fs_old_csaddr;
877 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
878 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
879 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
880 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
881 	}
882 	if (fs->fs_magic == FS_UFS1_MAGIC &&
883 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
884 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
885 		fs->fs_qbmask = ~fs->fs_bmask;
886 		fs->fs_qfmask = ~fs->fs_fmask;
887 	}
888 	if (fs->fs_magic == FS_UFS1_MAGIC) {
889 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
890 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
891 		if (fs->fs_maxfilesize > maxfilesize)
892 			fs->fs_maxfilesize = maxfilesize;
893 	}
894 	/* Compatibility for old filesystems */
895 	if (fs->fs_avgfilesize <= 0)
896 		fs->fs_avgfilesize = AVFILESIZ;
897 	if (fs->fs_avgfpdir <= 0)
898 		fs->fs_avgfpdir = AFPDIR;
899 	if (bigcgs) {
900 		fs->fs_save_cgsize = fs->fs_cgsize;
901 		fs->fs_cgsize = fs->fs_bsize;
902 	}
903 }
904 
905 /*
906  * Unwinding superblock updates for old filesystems.
907  * See ffs_oldfscompat_read above for details.
908  *
909  * XXX - Parts get retired eventually.
910  * Unfortunately new bits get added.
911  */
912 static void
913 ffs_oldfscompat_write(fs, ump)
914 	struct fs *fs;
915 	struct ufsmount *ump;
916 {
917 
918 	/*
919 	 * Copy back UFS2 updated fields that UFS1 inspects.
920 	 */
921 	if (fs->fs_magic == FS_UFS1_MAGIC) {
922 		fs->fs_old_time = fs->fs_time;
923 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
924 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
925 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
926 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
927 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
928 	}
929 	if (bigcgs) {
930 		fs->fs_cgsize = fs->fs_save_cgsize;
931 		fs->fs_save_cgsize = 0;
932 	}
933 }
934 
935 /*
936  * unmount system call
937  */
938 int
939 ffs_unmount(mp, mntflags, td)
940 	struct mount *mp;
941 	int mntflags;
942 	struct thread *td;
943 {
944 	struct ufsmount *ump = VFSTOUFS(mp);
945 	struct fs *fs;
946 	int error, flags;
947 
948 	flags = 0;
949 	if (mntflags & MNT_FORCE) {
950 		flags |= FORCECLOSE;
951 	}
952 #ifdef UFS_EXTATTR
953 	if ((error = ufs_extattr_stop(mp, td))) {
954 		if (error != EOPNOTSUPP)
955 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
956 			    error);
957 	} else {
958 		ufs_extattr_uepm_destroy(&ump->um_extattr);
959 	}
960 #endif
961 	if (mp->mnt_flag & MNT_SOFTDEP) {
962 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
963 			return (error);
964 	} else {
965 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
966 			return (error);
967 	}
968 	fs = ump->um_fs;
969 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
970 		printf("%s: unmount pending error: blocks %jd files %d\n",
971 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
972 		    fs->fs_pendinginodes);
973 		fs->fs_pendingblocks = 0;
974 		fs->fs_pendinginodes = 0;
975 	}
976 	if (fs->fs_ronly == 0) {
977 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
978 		error = ffs_sbupdate(ump, MNT_WAIT);
979 		if (error) {
980 			fs->fs_clean = 0;
981 			return (error);
982 		}
983 	}
984 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
985 
986 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
987 	/* XXX: see comment above VOP_OPEN */
988 #ifdef notyet
989 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
990 		NOCRED, td);
991 #else
992 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
993 #endif
994 
995 	vrele(ump->um_devvp);
996 
997 	free(fs->fs_csp, M_UFSMNT);
998 	free(fs, M_UFSMNT);
999 	free(ump, M_UFSMNT);
1000 	mp->mnt_data = (qaddr_t)0;
1001 	mp->mnt_flag &= ~MNT_LOCAL;
1002 	return (error);
1003 }
1004 
1005 /*
1006  * Flush out all the files in a filesystem.
1007  */
1008 int
1009 ffs_flushfiles(mp, flags, td)
1010 	struct mount *mp;
1011 	int flags;
1012 	struct thread *td;
1013 {
1014 	struct ufsmount *ump;
1015 	int error;
1016 
1017 	ump = VFSTOUFS(mp);
1018 #ifdef QUOTA
1019 	if (mp->mnt_flag & MNT_QUOTA) {
1020 		int i;
1021 		error = vflush(mp, 0, SKIPSYSTEM|flags);
1022 		if (error)
1023 			return (error);
1024 		for (i = 0; i < MAXQUOTAS; i++) {
1025 			if (ump->um_quotas[i] == NULLVP)
1026 				continue;
1027 			quotaoff(td, mp, i);
1028 		}
1029 		/*
1030 		 * Here we fall through to vflush again to ensure
1031 		 * that we have gotten rid of all the system vnodes.
1032 		 */
1033 	}
1034 #endif
1035 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1036 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1037 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1038 			return (error);
1039 		ffs_snapshot_unmount(mp);
1040 		/*
1041 		 * Here we fall through to vflush again to ensure
1042 		 * that we have gotten rid of all the system vnodes.
1043 		 */
1044 	}
1045         /*
1046 	 * Flush all the files.
1047 	 */
1048 	if ((error = vflush(mp, 0, flags)) != 0)
1049 		return (error);
1050 	/*
1051 	 * Flush filesystem metadata.
1052 	 */
1053 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1054 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1055 	VOP_UNLOCK(ump->um_devvp, 0, td);
1056 	return (error);
1057 }
1058 
1059 /*
1060  * Get filesystem statistics.
1061  */
1062 int
1063 ffs_statfs(mp, sbp, td)
1064 	struct mount *mp;
1065 	struct statfs *sbp;
1066 	struct thread *td;
1067 {
1068 	struct ufsmount *ump;
1069 	struct fs *fs;
1070 
1071 	ump = VFSTOUFS(mp);
1072 	fs = ump->um_fs;
1073 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1074 		panic("ffs_statfs");
1075 	sbp->f_bsize = fs->fs_fsize;
1076 	sbp->f_iosize = fs->fs_bsize;
1077 	sbp->f_blocks = fs->fs_dsize;
1078 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1079 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1080 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1081 	    dbtofsb(fs, fs->fs_pendingblocks);
1082 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1083 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1084 	if (sbp != &mp->mnt_stat) {
1085 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1086 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1087 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1088 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1089 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1090 	}
1091 	return (0);
1092 }
1093 
1094 /*
1095  * Go through the disk queues to initiate sandbagged IO;
1096  * go through the inodes to write those that have been modified;
1097  * initiate the writing of the super block if it has been modified.
1098  *
1099  * Note: we are always called with the filesystem marked `MPBUSY'.
1100  */
1101 int
1102 ffs_sync(mp, waitfor, cred, td)
1103 	struct mount *mp;
1104 	int waitfor;
1105 	struct ucred *cred;
1106 	struct thread *td;
1107 {
1108 	struct vnode *nvp, *vp, *devvp;
1109 	struct inode *ip;
1110 	struct ufsmount *ump = VFSTOUFS(mp);
1111 	struct fs *fs;
1112 	int error, count, wait, lockreq, allerror = 0;
1113 
1114 	fs = ump->um_fs;
1115 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1116 		printf("fs = %s\n", fs->fs_fsmnt);
1117 		panic("ffs_sync: rofs mod");
1118 	}
1119 	/*
1120 	 * Write back each (modified) inode.
1121 	 */
1122 	wait = 0;
1123 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1124 	if (waitfor == MNT_WAIT) {
1125 		wait = 1;
1126 		lockreq = LK_EXCLUSIVE;
1127 	}
1128 	mtx_lock(&mntvnode_mtx);
1129 loop:
1130 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1131 		/*
1132 		 * If the vnode that we are about to sync is no longer
1133 		 * associated with this mount point, start over.
1134 		 */
1135 		if (vp->v_mount != mp)
1136 			goto loop;
1137 
1138 		/*
1139 		 * Depend on the mntvnode_slock to keep things stable enough
1140 		 * for a quick test.  Since there might be hundreds of
1141 		 * thousands of vnodes, we cannot afford even a subroutine
1142 		 * call unless there's a good chance that we have work to do.
1143 		 */
1144 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1145 		ip = VTOI(vp);
1146 		if (vp->v_type == VNON || ((ip->i_flag &
1147 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1148 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1149 			continue;
1150 		}
1151 		if (vp->v_type != VCHR) {
1152 			mtx_unlock(&mntvnode_mtx);
1153 			if ((error = vget(vp, lockreq, td)) != 0) {
1154 				mtx_lock(&mntvnode_mtx);
1155 				if (error == ENOENT)
1156 					goto loop;
1157 			} else {
1158 				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1159 					allerror = error;
1160 				VOP_UNLOCK(vp, 0, td);
1161 				vrele(vp);
1162 				mtx_lock(&mntvnode_mtx);
1163 			}
1164 		} else {
1165 			mtx_unlock(&mntvnode_mtx);
1166 			UFS_UPDATE(vp, wait);
1167 			mtx_lock(&mntvnode_mtx);
1168 		}
1169 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1170 			goto loop;
1171 	}
1172 	mtx_unlock(&mntvnode_mtx);
1173 	/*
1174 	 * Force stale filesystem control information to be flushed.
1175 	 */
1176 	if (waitfor == MNT_WAIT) {
1177 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1178 			allerror = error;
1179 		/* Flushed work items may create new vnodes to clean */
1180 		if (allerror == 0 && count) {
1181 			mtx_lock(&mntvnode_mtx);
1182 			goto loop;
1183 		}
1184 	}
1185 #ifdef QUOTA
1186 	qsync(mp);
1187 #endif
1188 	devvp = ump->um_devvp;
1189 	VI_LOCK(devvp);
1190 	if (waitfor != MNT_LAZY &&
1191 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1192 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1193 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1194 			allerror = error;
1195 		VOP_UNLOCK(devvp, 0, td);
1196 		if (allerror == 0 && waitfor == MNT_WAIT) {
1197 			mtx_lock(&mntvnode_mtx);
1198 			goto loop;
1199 		}
1200 	} else
1201 		VI_UNLOCK(devvp);
1202 	/*
1203 	 * Write back modified superblock.
1204 	 */
1205 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1206 		allerror = error;
1207 	return (allerror);
1208 }
1209 
1210 int
1211 ffs_vget(mp, ino, flags, vpp)
1212 	struct mount *mp;
1213 	ino_t ino;
1214 	int flags;
1215 	struct vnode **vpp;
1216 {
1217 	struct thread *td = curthread; 		/* XXX */
1218 	struct fs *fs;
1219 	struct inode *ip;
1220 	struct ufsmount *ump;
1221 	struct buf *bp;
1222 	struct vnode *vp;
1223 	dev_t dev;
1224 	int error;
1225 
1226 	ump = VFSTOUFS(mp);
1227 	dev = ump->um_dev;
1228 
1229 	/*
1230 	 * We do not lock vnode creation as it is believed to be too
1231 	 * expensive for such rare case as simultaneous creation of vnode
1232 	 * for same ino by different processes. We just allow them to race
1233 	 * and check later to decide who wins. Let the race begin!
1234 	 */
1235 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1236 		return (error);
1237 	if (*vpp != NULL)
1238 		return (0);
1239 
1240 	/*
1241 	 * If this MALLOC() is performed after the getnewvnode()
1242 	 * it might block, leaving a vnode with a NULL v_data to be
1243 	 * found by ffs_sync() if a sync happens to fire right then,
1244 	 * which will cause a panic because ffs_sync() blindly
1245 	 * dereferences vp->v_data (as well it should).
1246 	 */
1247 	ip = uma_zalloc(uma_inode, M_WAITOK);
1248 
1249 	/* Allocate a new vnode/inode. */
1250 	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1251 	if (error) {
1252 		*vpp = NULL;
1253 		uma_zfree(uma_inode, ip);
1254 		return (error);
1255 	}
1256 	bzero((caddr_t)ip, sizeof(struct inode));
1257 	/*
1258 	 * FFS supports recursive locking.
1259 	 */
1260 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1261 	vp->v_data = ip;
1262 	ip->i_vnode = vp;
1263 	ip->i_ump = ump;
1264 	ip->i_fs = fs = ump->um_fs;
1265 	ip->i_dev = dev;
1266 	ip->i_number = ino;
1267 #ifdef QUOTA
1268 	{
1269 		int i;
1270 		for (i = 0; i < MAXQUOTAS; i++)
1271 			ip->i_dquot[i] = NODQUOT;
1272 	}
1273 #endif
1274 	/*
1275 	 * Exclusively lock the vnode before adding to hash. Note, that we
1276 	 * must not release nor downgrade the lock (despite flags argument
1277 	 * says) till it is fully initialized.
1278 	 */
1279 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1280 
1281 	/*
1282 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1283 	 * duplicate of vnode being created and add it to the hash. If a
1284 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1285 	 */
1286 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1287 		vput(vp);
1288 		*vpp = NULL;
1289 		return (error);
1290 	}
1291 
1292 	/* We lost the race, then throw away our vnode and return existing */
1293 	if (*vpp != NULL) {
1294 		vput(vp);
1295 		return (0);
1296 	}
1297 
1298 	/* Read in the disk contents for the inode, copy into the inode. */
1299 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1300 	    (int)fs->fs_bsize, NOCRED, &bp);
1301 	if (error) {
1302 		/*
1303 		 * The inode does not contain anything useful, so it would
1304 		 * be misleading to leave it on its hash chain. With mode
1305 		 * still zero, it will be unlinked and returned to the free
1306 		 * list by vput().
1307 		 */
1308 		brelse(bp);
1309 		vput(vp);
1310 		*vpp = NULL;
1311 		return (error);
1312 	}
1313 	if (ip->i_ump->um_fstype == UFS1)
1314 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1315 	else
1316 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1317 	ffs_load_inode(bp, ip, fs, ino);
1318 	if (DOINGSOFTDEP(vp))
1319 		softdep_load_inodeblock(ip);
1320 	else
1321 		ip->i_effnlink = ip->i_nlink;
1322 	bqrelse(bp);
1323 
1324 	/*
1325 	 * Initialize the vnode from the inode, check for aliases.
1326 	 * Note that the underlying vnode may have changed.
1327 	 */
1328 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1329 	if (error) {
1330 		vput(vp);
1331 		*vpp = NULL;
1332 		return (error);
1333 	}
1334 	/*
1335 	 * Finish inode initialization now that aliasing has been resolved.
1336 	 */
1337 	ip->i_devvp = ump->um_devvp;
1338 	VREF(ip->i_devvp);
1339 	/*
1340 	 * Set up a generation number for this inode if it does not
1341 	 * already have one. This should only happen on old filesystems.
1342 	 */
1343 	if (ip->i_gen == 0) {
1344 		ip->i_gen = arc4random() / 2 + 1;
1345 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1346 			ip->i_flag |= IN_MODIFIED;
1347 			DIP(ip, i_gen) = ip->i_gen;
1348 		}
1349 	}
1350 	/*
1351 	 * Ensure that uid and gid are correct. This is a temporary
1352 	 * fix until fsck has been changed to do the update.
1353 	 */
1354 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1355 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1356 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1357 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1358 	}						/* XXX */
1359 
1360 #ifdef MAC
1361 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1362 		/*
1363 		 * If this vnode is already allocated, and we're running
1364 		 * multi-label, attempt to perform a label association
1365 		 * from the extended attributes on the inode.
1366 		 */
1367 		error = mac_associate_vnode_extattr(mp, vp);
1368 		if (error) {
1369 			/* ufs_inactive will release ip->i_devvp ref. */
1370 			vput(vp);
1371 			*vpp = NULL;
1372 			return (error);
1373 		}
1374 	}
1375 #endif
1376 
1377 	*vpp = vp;
1378 	return (0);
1379 }
1380 
1381 /*
1382  * File handle to vnode
1383  *
1384  * Have to be really careful about stale file handles:
1385  * - check that the inode number is valid
1386  * - call ffs_vget() to get the locked inode
1387  * - check for an unallocated inode (i_mode == 0)
1388  * - check that the given client host has export rights and return
1389  *   those rights via. exflagsp and credanonp
1390  */
1391 int
1392 ffs_fhtovp(mp, fhp, vpp)
1393 	struct mount *mp;
1394 	struct fid *fhp;
1395 	struct vnode **vpp;
1396 {
1397 	struct ufid *ufhp;
1398 	struct fs *fs;
1399 
1400 	ufhp = (struct ufid *)fhp;
1401 	fs = VFSTOUFS(mp)->um_fs;
1402 	if (ufhp->ufid_ino < ROOTINO ||
1403 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1404 		return (ESTALE);
1405 	return (ufs_fhtovp(mp, ufhp, vpp));
1406 }
1407 
1408 /*
1409  * Vnode pointer to File handle
1410  */
1411 /* ARGSUSED */
1412 int
1413 ffs_vptofh(vp, fhp)
1414 	struct vnode *vp;
1415 	struct fid *fhp;
1416 {
1417 	struct inode *ip;
1418 	struct ufid *ufhp;
1419 
1420 	ip = VTOI(vp);
1421 	ufhp = (struct ufid *)fhp;
1422 	ufhp->ufid_len = sizeof(struct ufid);
1423 	ufhp->ufid_ino = ip->i_number;
1424 	ufhp->ufid_gen = ip->i_gen;
1425 	return (0);
1426 }
1427 
1428 /*
1429  * Initialize the filesystem.
1430  */
1431 static int
1432 ffs_init(vfsp)
1433 	struct vfsconf *vfsp;
1434 {
1435 
1436 	softdep_initialize();
1437 	return (ufs_init(vfsp));
1438 }
1439 
1440 /*
1441  * Undo the work of ffs_init().
1442  */
1443 static int
1444 ffs_uninit(vfsp)
1445 	struct vfsconf *vfsp;
1446 {
1447 	int ret;
1448 
1449 	ret = ufs_uninit(vfsp);
1450 	softdep_uninitialize();
1451 	return (ret);
1452 }
1453 
1454 /*
1455  * Write a superblock and associated information back to disk.
1456  */
1457 static int
1458 ffs_sbupdate(mp, waitfor)
1459 	struct ufsmount *mp;
1460 	int waitfor;
1461 {
1462 	struct fs *fs = mp->um_fs;
1463 	struct buf *bp;
1464 	int blks;
1465 	void *space;
1466 	int i, size, error, allerror = 0;
1467 
1468 	if (fs->fs_ronly == 1 &&
1469 	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1470 	    (MNT_RDONLY | MNT_UPDATE))
1471 		panic("ffs_sbupdate: write read-only filesystem");
1472 	/*
1473 	 * First write back the summary information.
1474 	 */
1475 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1476 	space = fs->fs_csp;
1477 	for (i = 0; i < blks; i += fs->fs_frag) {
1478 		size = fs->fs_bsize;
1479 		if (i + fs->fs_frag > blks)
1480 			size = (blks - i) * fs->fs_fsize;
1481 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1482 		    size, 0, 0, 0);
1483 		bcopy(space, bp->b_data, (u_int)size);
1484 		space = (char *)space + size;
1485 		if (waitfor != MNT_WAIT)
1486 			bawrite(bp);
1487 		else if ((error = bwrite(bp)) != 0)
1488 			allerror = error;
1489 	}
1490 	/*
1491 	 * Now write back the superblock itself. If any errors occurred
1492 	 * up to this point, then fail so that the superblock avoids
1493 	 * being written out as clean.
1494 	 */
1495 	if (allerror)
1496 		return (allerror);
1497 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1498 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1499 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1500 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1501 		fs->fs_sblockloc = SBLOCK_UFS1;
1502 	}
1503 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1504 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1505 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1506 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1507 		fs->fs_sblockloc = SBLOCK_UFS2;
1508 	}
1509 	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1510 	    0, 0, 0);
1511 	fs->fs_fmod = 0;
1512 	fs->fs_time = time_second;
1513 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1514 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1515 	if (waitfor != MNT_WAIT)
1516 		bawrite(bp);
1517 	else if ((error = bwrite(bp)) != 0)
1518 		allerror = error;
1519 	return (allerror);
1520 }
1521 
1522 static int
1523 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1524 	int attrnamespace, const char *attrname, struct thread *td)
1525 {
1526 
1527 #ifdef UFS_EXTATTR
1528 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1529 	    attrname, td));
1530 #else
1531 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1532 	    attrname, td));
1533 #endif
1534 }
1535 
1536 static void
1537 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1538 {
1539 
1540 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1541 		uma_zfree(uma_ufs1, ip->i_din1);
1542 	else if (ip->i_din2 != NULL)
1543 		uma_zfree(uma_ufs2, ip->i_din2);
1544 	uma_zfree(uma_inode, ip);
1545 }
1546