xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 729362425c09cf6b362366aabc6fb547eee8035a)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_mac.h"
38 #include "opt_quota.h"
39 #include "opt_ufs.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/mac.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/bio.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/fcntl.h>
53 #include <sys/disk.h>
54 #include <sys/malloc.h>
55 #include <sys/mutex.h>
56 
57 #include <ufs/ufs/extattr.h>
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 
63 #include <ufs/ffs/fs.h>
64 #include <ufs/ffs/ffs_extern.h>
65 
66 #include <vm/vm.h>
67 #include <vm/uma.h>
68 #include <vm/vm_page.h>
69 
70 uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
71 
72 static int	ffs_sbupdate(struct ufsmount *, int);
73        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
74 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
75 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
76 		    ufs2_daddr_t);
77 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
78 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
79 static vfs_init_t ffs_init;
80 static vfs_uninit_t ffs_uninit;
81 static vfs_extattrctl_t ffs_extattrctl;
82 
83 static struct vfsops ufs_vfsops = {
84 	ffs_mount,
85 	ufs_start,
86 	ffs_unmount,
87 	ufs_root,
88 	ufs_quotactl,
89 	ffs_statfs,
90 	ffs_sync,
91 	ffs_vget,
92 	ffs_fhtovp,
93 	vfs_stdcheckexp,
94 	ffs_vptofh,
95 	ffs_init,
96 	ffs_uninit,
97 	ffs_extattrctl,
98 };
99 
100 VFS_SET(ufs_vfsops, ufs, 0);
101 
102 /*
103  * ffs_mount
104  *
105  * Called when mounting local physical media
106  *
107  * PARAMETERS:
108  *		mountroot
109  *			mp	mount point structure
110  *			path	NULL (flag for root mount!!!)
111  *			data	<unused>
112  *			ndp	<unused>
113  *			p	process (user credentials check [statfs])
114  *
115  *		mount
116  *			mp	mount point structure
117  *			path	path to mount point
118  *			data	pointer to argument struct in user space
119  *			ndp	mount point namei() return (used for
120  *				credentials on reload), reused to look
121  *				up block device.
122  *			p	process (user credentials check)
123  *
124  * RETURNS:	0	Success
125  *		!0	error number (errno.h)
126  *
127  * LOCK STATE:
128  *
129  *		ENTRY
130  *			mount point is locked
131  *		EXIT
132  *			mount point is locked
133  *
134  * NOTES:
135  *		A NULL path can be used for a flag since the mount
136  *		system call will fail with EFAULT in copyinstr in
137  *		namei() if it is a genuine NULL from the user.
138  */
139 int
140 ffs_mount(mp, path, data, ndp, td)
141         struct mount		*mp;	/* mount struct pointer*/
142         char			*path;	/* path to mount point*/
143         caddr_t			data;	/* arguments to FS specific mount*/
144         struct nameidata	*ndp;	/* mount point credentials*/
145         struct thread		*td;	/* process requesting mount*/
146 {
147 	size_t size;
148 	struct vnode *devvp;
149 	struct ufs_args args;
150 	struct ufsmount *ump = 0;
151 	struct fs *fs;
152 	int error, flags;
153 	mode_t accessmode;
154 
155 	if (uma_inode == NULL) {
156 		uma_inode = uma_zcreate("FFS inode",
157 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
158 		    UMA_ALIGN_PTR, 0);
159 		uma_ufs1 = uma_zcreate("FFS1 dinode",
160 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
161 		    UMA_ALIGN_PTR, 0);
162 		uma_ufs2 = uma_zcreate("FFS2 dinode",
163 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
164 		    UMA_ALIGN_PTR, 0);
165 	}
166 	/*
167 	 * Use NULL path to indicate we are mounting the root filesystem.
168 	 */
169 	if (path == NULL) {
170 		if ((error = bdevvp(rootdev, &rootvp))) {
171 			printf("ffs_mountroot: can't find rootvp\n");
172 			return (error);
173 		}
174 
175 		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
176 			return (error);
177 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
178 		return (0);
179 	}
180 
181 	/*
182 	 * Mounting non-root filesystem or updating a filesystem
183 	 */
184 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
185 		return (error);
186 
187 	/*
188 	 * If updating, check whether changing from read-only to
189 	 * read/write; if there is no device name, that's all we do.
190 	 */
191 	if (mp->mnt_flag & MNT_UPDATE) {
192 		ump = VFSTOUFS(mp);
193 		fs = ump->um_fs;
194 		devvp = ump->um_devvp;
195 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
196 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
197 				return (error);
198 			/*
199 			 * Flush any dirty data.
200 			 */
201 			if ((error = VFS_SYNC(mp, MNT_WAIT,
202 			    td->td_ucred, td)) != 0) {
203 				vn_finished_write(mp);
204 				return (error);
205 			}
206 			/*
207 			 * Check for and optionally get rid of files open
208 			 * for writing.
209 			 */
210 			flags = WRITECLOSE;
211 			if (mp->mnt_flag & MNT_FORCE)
212 				flags |= FORCECLOSE;
213 			if (mp->mnt_flag & MNT_SOFTDEP) {
214 				error = softdep_flushfiles(mp, flags, td);
215 			} else {
216 				error = ffs_flushfiles(mp, flags, td);
217 			}
218 			if (error) {
219 				vn_finished_write(mp);
220 				return (error);
221 			}
222 			if (fs->fs_pendingblocks != 0 ||
223 			    fs->fs_pendinginodes != 0) {
224 				printf("%s: %s: blocks %jd files %d\n",
225 				    fs->fs_fsmnt, "update error",
226 				    (intmax_t)fs->fs_pendingblocks,
227 				    fs->fs_pendinginodes);
228 				fs->fs_pendingblocks = 0;
229 				fs->fs_pendinginodes = 0;
230 			}
231 			fs->fs_ronly = 1;
232 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
233 				fs->fs_clean = 1;
234 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
235 				fs->fs_ronly = 0;
236 				fs->fs_clean = 0;
237 				vn_finished_write(mp);
238 				return (error);
239 			}
240 			vn_finished_write(mp);
241 		}
242 		if ((mp->mnt_flag & MNT_RELOAD) &&
243 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
244 			return (error);
245 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
246 			/*
247 			 * If upgrade to read-write by non-root, then verify
248 			 * that user has necessary permissions on the device.
249 			 */
250 			if (suser(td)) {
251 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
252 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
253 				    td->td_ucred, td)) != 0) {
254 					VOP_UNLOCK(devvp, 0, td);
255 					return (error);
256 				}
257 				VOP_UNLOCK(devvp, 0, td);
258 			}
259 			fs->fs_flags &= ~FS_UNCLEAN;
260 			if (fs->fs_clean == 0) {
261 				fs->fs_flags |= FS_UNCLEAN;
262 				if ((mp->mnt_flag & MNT_FORCE) ||
263 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
264 				     (fs->fs_flags & FS_DOSOFTDEP))) {
265 					printf("WARNING: %s was not %s\n",
266 					   fs->fs_fsmnt, "properly dismounted");
267 				} else {
268 					printf(
269 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
270 					    fs->fs_fsmnt);
271 					return (EPERM);
272 				}
273 			}
274 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
275 				return (error);
276 			fs->fs_ronly = 0;
277 			fs->fs_clean = 0;
278 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
279 				vn_finished_write(mp);
280 				return (error);
281 			}
282 			/* check to see if we need to start softdep */
283 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
284 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
285 				vn_finished_write(mp);
286 				return (error);
287 			}
288 			if (fs->fs_snapinum[0] != 0)
289 				ffs_snapshot_mount(mp);
290 			vn_finished_write(mp);
291 		}
292 		/*
293 		 * Soft updates is incompatible with "async",
294 		 * so if we are doing softupdates stop the user
295 		 * from setting the async flag in an update.
296 		 * Softdep_mount() clears it in an initial mount
297 		 * or ro->rw remount.
298 		 */
299 		if (mp->mnt_flag & MNT_SOFTDEP)
300 			mp->mnt_flag &= ~MNT_ASYNC;
301 		/*
302 		 * If not updating name, process export requests.
303 		 */
304 		if (args.fspec == 0)
305 			return (vfs_export(mp, &args.export));
306 		/*
307 		 * If this is a snapshot request, take the snapshot.
308 		 */
309 		if (mp->mnt_flag & MNT_SNAPSHOT)
310 			return (ffs_snapshot(mp, args.fspec));
311 	}
312 
313 	/*
314 	 * Not an update, or updating the name: look up the name
315 	 * and verify that it refers to a sensible block device.
316 	 */
317 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
318 	if ((error = namei(ndp)) != 0)
319 		return (error);
320 	NDFREE(ndp, NDF_ONLY_PNBUF);
321 	devvp = ndp->ni_vp;
322 	if (!vn_isdisk(devvp, &error)) {
323 		vrele(devvp);
324 		return (error);
325 	}
326 
327 	/*
328 	 * If mount by non-root, then verify that user has necessary
329 	 * permissions on the device.
330 	 */
331 	if (suser(td)) {
332 		accessmode = VREAD;
333 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
334 			accessmode |= VWRITE;
335 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
336 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
337 			vput(devvp);
338 			return (error);
339 		}
340 		VOP_UNLOCK(devvp, 0, td);
341 	}
342 
343 	if (mp->mnt_flag & MNT_UPDATE) {
344 		/*
345 		 * Update only
346 		 *
347 		 * If it's not the same vnode, or at least the same device
348 		 * then it's not correct.
349 		 */
350 
351 		if (devvp != ump->um_devvp &&
352 		    devvp->v_rdev != ump->um_devvp->v_rdev)
353 			error = EINVAL;	/* needs translation */
354 		vrele(devvp);
355 		if (error)
356 			return (error);
357 	} else {
358 		/*
359 		 * New mount
360 		 *
361 		 * We need the name for the mount point (also used for
362 		 * "last mounted on") copied in. If an error occurs,
363 		 * the mount point is discarded by the upper level code.
364 		 * Note that vfs_mount() populates f_mntonname for us.
365 		 */
366 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
367 			vrele(devvp);
368 			return (error);
369 		}
370 	}
371 	/*
372 	 * Save "mounted from" device name info for mount point (NULL pad).
373 	 */
374 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
375 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
376 	/*
377 	 * Initialize filesystem stat information in mount struct.
378 	 */
379 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
380 	return (0);
381 }
382 
383 /*
384  * Reload all incore data for a filesystem (used after running fsck on
385  * the root filesystem and finding things to fix). The filesystem must
386  * be mounted read-only.
387  *
388  * Things to do to update the mount:
389  *	1) invalidate all cached meta-data.
390  *	2) re-read superblock from disk.
391  *	3) re-read summary information from disk.
392  *	4) invalidate all inactive vnodes.
393  *	5) invalidate all cached file data.
394  *	6) re-read inode data for all active vnodes.
395  */
396 int
397 ffs_reload(mp, cred, td)
398 	struct mount *mp;
399 	struct ucred *cred;
400 	struct thread *td;
401 {
402 	struct vnode *vp, *nvp, *devvp;
403 	struct inode *ip;
404 	void *space;
405 	struct buf *bp;
406 	struct fs *fs, *newfs;
407 	dev_t dev;
408 	ufs2_daddr_t sblockloc;
409 	int i, blks, size, error;
410 	int32_t *lp;
411 
412 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
413 		return (EINVAL);
414 	/*
415 	 * Step 1: invalidate all cached meta-data.
416 	 */
417 	devvp = VFSTOUFS(mp)->um_devvp;
418 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
419 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
420 	VOP_UNLOCK(devvp, 0, td);
421 	if (error)
422 		panic("ffs_reload: dirty1");
423 
424 	dev = devvp->v_rdev;
425 
426 	/*
427 	 * Only VMIO the backing device if the backing device is a real
428 	 * block device.
429 	 */
430 	if (vn_isdisk(devvp, NULL)) {
431 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
432 		vfs_object_create(devvp, td, td->td_ucred);
433 		/* XXX Why lock only to release immediately?? */
434 		mtx_lock(&devvp->v_interlock);
435 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
436 	}
437 
438 	/*
439 	 * Step 2: re-read superblock from disk.
440 	 */
441 	fs = VFSTOUFS(mp)->um_fs;
442 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
443 	    NOCRED, &bp)) != 0)
444 		return (error);
445 	newfs = (struct fs *)bp->b_data;
446 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
447 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
448 	    newfs->fs_bsize > MAXBSIZE ||
449 	    newfs->fs_bsize < sizeof(struct fs)) {
450 			brelse(bp);
451 			return (EIO);		/* XXX needs translation */
452 	}
453 	/*
454 	 * Copy pointer fields back into superblock before copying in	XXX
455 	 * new superblock. These should really be in the ufsmount.	XXX
456 	 * Note that important parameters (eg fs_ncg) are unchanged.
457 	 */
458 	newfs->fs_csp = fs->fs_csp;
459 	newfs->fs_maxcluster = fs->fs_maxcluster;
460 	newfs->fs_contigdirs = fs->fs_contigdirs;
461 	newfs->fs_active = fs->fs_active;
462 	sblockloc = fs->fs_sblockloc;
463 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
464 	brelse(bp);
465 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
466 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
467 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
468 		printf("%s: reload pending error: blocks %jd files %d\n",
469 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
470 		    fs->fs_pendinginodes);
471 		fs->fs_pendingblocks = 0;
472 		fs->fs_pendinginodes = 0;
473 	}
474 
475 	/*
476 	 * Step 3: re-read summary information from disk.
477 	 */
478 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
479 	space = fs->fs_csp;
480 	for (i = 0; i < blks; i += fs->fs_frag) {
481 		size = fs->fs_bsize;
482 		if (i + fs->fs_frag > blks)
483 			size = (blks - i) * fs->fs_fsize;
484 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
485 		    NOCRED, &bp);
486 		if (error)
487 			return (error);
488 		bcopy(bp->b_data, space, (u_int)size);
489 		space = (char *)space + size;
490 		brelse(bp);
491 	}
492 	/*
493 	 * We no longer know anything about clusters per cylinder group.
494 	 */
495 	if (fs->fs_contigsumsize > 0) {
496 		lp = fs->fs_maxcluster;
497 		for (i = 0; i < fs->fs_ncg; i++)
498 			*lp++ = fs->fs_contigsumsize;
499 	}
500 
501 loop:
502 	mtx_lock(&mntvnode_mtx);
503 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
504 		if (vp->v_mount != mp) {
505 			mtx_unlock(&mntvnode_mtx);
506 			goto loop;
507 		}
508 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
509 		mtx_unlock(&mntvnode_mtx);
510 		/*
511 		 * Step 4: invalidate all inactive vnodes.
512 		 */
513 		if (vrecycle(vp, NULL, td))
514 			goto loop;
515 		/*
516 		 * Step 5: invalidate all cached file data.
517 		 */
518 		/* XXX Why lock only to release immediately? */
519 		mtx_lock(&vp->v_interlock);
520 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
521 			goto loop;
522 		}
523 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
524 			panic("ffs_reload: dirty2");
525 		/*
526 		 * Step 6: re-read inode data for all active vnodes.
527 		 */
528 		ip = VTOI(vp);
529 		error =
530 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
531 		    (int)fs->fs_bsize, NOCRED, &bp);
532 		if (error) {
533 			vput(vp);
534 			return (error);
535 		}
536 		ffs_load_inode(bp, ip, fs, ip->i_number);
537 		ip->i_effnlink = ip->i_nlink;
538 		brelse(bp);
539 		vput(vp);
540 		mtx_lock(&mntvnode_mtx);
541 	}
542 	mtx_unlock(&mntvnode_mtx);
543 	return (0);
544 }
545 
546 /*
547  * Possible superblock locations ordered from most to least likely.
548  */
549 static int sblock_try[] = SBLOCKSEARCH;
550 
551 /*
552  * Common code for mount and mountroot
553  */
554 static int
555 ffs_mountfs(devvp, mp, td)
556 	struct vnode *devvp;
557 	struct mount *mp;
558 	struct thread *td;
559 {
560 	struct ufsmount *ump;
561 	struct buf *bp;
562 	struct fs *fs;
563 	dev_t dev;
564 	void *space;
565 	ufs2_daddr_t sblockloc;
566 	int error, i, blks, size, ronly;
567 	int32_t *lp;
568 	struct ucred *cred;
569 	size_t strsize;
570 	int ncount;
571 
572 	dev = devvp->v_rdev;
573 	cred = td ? td->td_ucred : NOCRED;
574 	/*
575 	 * Disallow multiple mounts of the same device.
576 	 * Disallow mounting of a device that is currently in use
577 	 * (except for root, which might share swap device for miniroot).
578 	 * Flush out any old buffers remaining from a previous use.
579 	 */
580 	error = vfs_mountedon(devvp);
581 	if (error)
582 		return (error);
583 	ncount = vcount(devvp);
584 
585 	if (ncount > 1 && devvp != rootvp)
586 		return (EBUSY);
587 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
588 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
589 	VOP_UNLOCK(devvp, 0, td);
590 	if (error)
591 		return (error);
592 
593 	/*
594 	 * Only VMIO the backing device if the backing device is a real
595 	 * block device.
596 	 * Note that it is optional that the backing device be VMIOed.  This
597 	 * increases the opportunity for metadata caching.
598 	 */
599 	if (vn_isdisk(devvp, NULL)) {
600 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
601 		vfs_object_create(devvp, td, cred);
602 		/* XXX Why lock only to release immediately?? */
603 		mtx_lock(&devvp->v_interlock);
604 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
605 	}
606 
607 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
608 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
609 	/*
610 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
611 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
612 	 * XXX: start to avoid getting trashed later on.
613 	 */
614 #ifdef notyet
615 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
616 #else
617 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
618 #endif
619 	VOP_UNLOCK(devvp, 0, td);
620 	if (error)
621 		return (error);
622 	if (devvp->v_rdev->si_iosize_max != 0)
623 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
624 	if (mp->mnt_iosize_max > MAXPHYS)
625 		mp->mnt_iosize_max = MAXPHYS;
626 
627 	bp = NULL;
628 	ump = NULL;
629 	fs = NULL;
630 	sblockloc = 0;
631 	/*
632 	 * Try reading the superblock in each of its possible locations.
633 	 */
634 	for (i = 0; sblock_try[i] != -1; i++) {
635 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
636 		    cred, &bp)) != 0)
637 			goto out;
638 		fs = (struct fs *)bp->b_data;
639 		sblockloc = sblock_try[i];
640 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
641 		     (fs->fs_magic == FS_UFS2_MAGIC &&
642 		      (fs->fs_sblockloc == sblockloc ||
643 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
644 		    fs->fs_bsize <= MAXBSIZE &&
645 		    fs->fs_bsize >= sizeof(struct fs))
646 			break;
647 		brelse(bp);
648 		bp = NULL;
649 	}
650 	if (sblock_try[i] == -1) {
651 		error = EINVAL;		/* XXX needs translation */
652 		goto out;
653 	}
654 	fs->fs_fmod = 0;
655 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
656 	fs->fs_flags &= ~FS_UNCLEAN;
657 	if (fs->fs_clean == 0) {
658 		fs->fs_flags |= FS_UNCLEAN;
659 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
660 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
661 		     (fs->fs_flags & FS_DOSOFTDEP))) {
662 			printf(
663 "WARNING: %s was not properly dismounted\n",
664 			    fs->fs_fsmnt);
665 		} else {
666 			printf(
667 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
668 			    fs->fs_fsmnt);
669 			error = EPERM;
670 			goto out;
671 		}
672 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
673 		    (mp->mnt_flag & MNT_FORCE)) {
674 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
675 			    (intmax_t)fs->fs_pendingblocks,
676 			    fs->fs_pendinginodes);
677 			fs->fs_pendingblocks = 0;
678 			fs->fs_pendinginodes = 0;
679 		}
680 	}
681 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
682 		printf("%s: mount pending error: blocks %jd files %d\n",
683 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
684 		    fs->fs_pendinginodes);
685 		fs->fs_pendingblocks = 0;
686 		fs->fs_pendinginodes = 0;
687 	}
688 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
689 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
690 	    M_WAITOK);
691 	if (fs->fs_magic == FS_UFS1_MAGIC) {
692 		ump->um_fstype = UFS1;
693 		ump->um_balloc = ffs_balloc_ufs1;
694 	} else {
695 		ump->um_fstype = UFS2;
696 		ump->um_balloc = ffs_balloc_ufs2;
697 	}
698 	ump->um_blkatoff = ffs_blkatoff;
699 	ump->um_truncate = ffs_truncate;
700 	ump->um_update = ffs_update;
701 	ump->um_valloc = ffs_valloc;
702 	ump->um_vfree = ffs_vfree;
703 	ump->um_ifree = ffs_ifree;
704 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
705 	if (fs->fs_sbsize < SBLOCKSIZE)
706 		bp->b_flags |= B_INVAL | B_NOCACHE;
707 	brelse(bp);
708 	bp = NULL;
709 	fs = ump->um_fs;
710 	ffs_oldfscompat_read(fs, ump, sblockloc);
711 	fs->fs_ronly = ronly;
712 	size = fs->fs_cssize;
713 	blks = howmany(size, fs->fs_fsize);
714 	if (fs->fs_contigsumsize > 0)
715 		size += fs->fs_ncg * sizeof(int32_t);
716 	size += fs->fs_ncg * sizeof(u_int8_t);
717 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
718 	fs->fs_csp = space;
719 	for (i = 0; i < blks; i += fs->fs_frag) {
720 		size = fs->fs_bsize;
721 		if (i + fs->fs_frag > blks)
722 			size = (blks - i) * fs->fs_fsize;
723 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
724 		    cred, &bp)) != 0) {
725 			free(fs->fs_csp, M_UFSMNT);
726 			goto out;
727 		}
728 		bcopy(bp->b_data, space, (u_int)size);
729 		space = (char *)space + size;
730 		brelse(bp);
731 		bp = NULL;
732 	}
733 	if (fs->fs_contigsumsize > 0) {
734 		fs->fs_maxcluster = lp = space;
735 		for (i = 0; i < fs->fs_ncg; i++)
736 			*lp++ = fs->fs_contigsumsize;
737 		space = lp;
738 	}
739 	size = fs->fs_ncg * sizeof(u_int8_t);
740 	fs->fs_contigdirs = (u_int8_t *)space;
741 	bzero(fs->fs_contigdirs, size);
742 	fs->fs_active = NULL;
743 	mp->mnt_data = (qaddr_t)ump;
744 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
745 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
746 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
747 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
748 		vfs_getnewfsid(mp);
749 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
750 	mp->mnt_flag |= MNT_LOCAL;
751 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
752 #ifdef MAC
753 		mp->mnt_flag |= MNT_MULTILABEL;
754 #else
755 		printf(
756 "WARNING: %s: multilabel flag on fs but no MAC support\n",
757 		    fs->fs_fsmnt);
758 #endif
759 	if ((fs->fs_flags & FS_ACLS) != 0)
760 #ifdef UFS_ACL
761 		mp->mnt_flag |= MNT_ACLS;
762 #else
763 		printf(
764 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
765 		    fs->fs_fsmnt);
766 #endif
767 	ump->um_mountp = mp;
768 	ump->um_dev = dev;
769 	ump->um_devvp = devvp;
770 	ump->um_nindir = fs->fs_nindir;
771 	ump->um_bptrtodb = fs->fs_fsbtodb;
772 	ump->um_seqinc = fs->fs_frag;
773 	for (i = 0; i < MAXQUOTAS; i++)
774 		ump->um_quotas[i] = NULLVP;
775 #ifdef UFS_EXTATTR
776 	ufs_extattr_uepm_init(&ump->um_extattr);
777 #endif
778 	devvp->v_rdev->si_mountpoint = mp;
779 
780 	/*
781 	 * Set FS local "last mounted on" information (NULL pad)
782 	 */
783 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
784 			fs->fs_fsmnt,			/* copy area*/
785 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
786 			&strsize);			/* real size*/
787 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
788 
789 	if( mp->mnt_flag & MNT_ROOTFS) {
790 		/*
791 		 * Root mount; update timestamp in mount structure.
792 		 * this will be used by the common root mount code
793 		 * to update the system clock.
794 		 */
795 		mp->mnt_time = fs->fs_time;
796 	}
797 
798 	if (ronly == 0) {
799 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
800 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
801 			free(fs->fs_csp, M_UFSMNT);
802 			goto out;
803 		}
804 		if (fs->fs_snapinum[0] != 0)
805 			ffs_snapshot_mount(mp);
806 		fs->fs_fmod = 1;
807 		fs->fs_clean = 0;
808 		(void) ffs_sbupdate(ump, MNT_WAIT);
809 	}
810 #ifdef UFS_EXTATTR
811 #ifdef UFS_EXTATTR_AUTOSTART
812 	/*
813 	 *
814 	 * Auto-starting does the following:
815 	 *	- check for /.attribute in the fs, and extattr_start if so
816 	 *	- for each file in .attribute, enable that file with
817 	 * 	  an attribute of the same name.
818 	 * Not clear how to report errors -- probably eat them.
819 	 * This would all happen while the filesystem was busy/not
820 	 * available, so would effectively be "atomic".
821 	 */
822 	(void) ufs_extattr_autostart(mp, td);
823 #endif /* !UFS_EXTATTR_AUTOSTART */
824 #endif /* !UFS_EXTATTR */
825 	return (0);
826 out:
827 	devvp->v_rdev->si_mountpoint = NULL;
828 	if (bp)
829 		brelse(bp);
830 	/* XXX: see comment above VOP_OPEN */
831 #ifdef notyet
832 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
833 #else
834 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
835 #endif
836 	if (ump) {
837 		free(ump->um_fs, M_UFSMNT);
838 		free(ump, M_UFSMNT);
839 		mp->mnt_data = (qaddr_t)0;
840 	}
841 	return (error);
842 }
843 
844 #include <sys/sysctl.h>
845 int bigcgs = 0;
846 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
847 
848 /*
849  * Sanity checks for loading old filesystem superblocks.
850  * See ffs_oldfscompat_write below for unwound actions.
851  *
852  * XXX - Parts get retired eventually.
853  * Unfortunately new bits get added.
854  */
855 static void
856 ffs_oldfscompat_read(fs, ump, sblockloc)
857 	struct fs *fs;
858 	struct ufsmount *ump;
859 	ufs2_daddr_t sblockloc;
860 {
861 	off_t maxfilesize;
862 
863 	/*
864 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
865 	 */
866 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
867 		fs->fs_flags = fs->fs_old_flags;
868 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
869 		fs->fs_sblockloc = sblockloc;
870 	}
871 	/*
872 	 * If not yet done, update UFS1 superblock with new wider fields.
873 	 */
874 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
875 		fs->fs_maxbsize = fs->fs_bsize;
876 		fs->fs_time = fs->fs_old_time;
877 		fs->fs_size = fs->fs_old_size;
878 		fs->fs_dsize = fs->fs_old_dsize;
879 		fs->fs_csaddr = fs->fs_old_csaddr;
880 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
881 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
882 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
883 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
884 	}
885 	if (fs->fs_magic == FS_UFS1_MAGIC &&
886 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
887 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
888 		fs->fs_qbmask = ~fs->fs_bmask;
889 		fs->fs_qfmask = ~fs->fs_fmask;
890 	}
891 	if (fs->fs_magic == FS_UFS1_MAGIC) {
892 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
893 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
894 		if (fs->fs_maxfilesize > maxfilesize)
895 			fs->fs_maxfilesize = maxfilesize;
896 	}
897 	/* Compatibility for old filesystems */
898 	if (fs->fs_avgfilesize <= 0)
899 		fs->fs_avgfilesize = AVFILESIZ;
900 	if (fs->fs_avgfpdir <= 0)
901 		fs->fs_avgfpdir = AFPDIR;
902 	if (bigcgs) {
903 		fs->fs_save_cgsize = fs->fs_cgsize;
904 		fs->fs_cgsize = fs->fs_bsize;
905 	}
906 }
907 
908 /*
909  * Unwinding superblock updates for old filesystems.
910  * See ffs_oldfscompat_read above for details.
911  *
912  * XXX - Parts get retired eventually.
913  * Unfortunately new bits get added.
914  */
915 static void
916 ffs_oldfscompat_write(fs, ump)
917 	struct fs *fs;
918 	struct ufsmount *ump;
919 {
920 
921 	/*
922 	 * Copy back UFS2 updated fields that UFS1 inspects.
923 	 */
924 	if (fs->fs_magic == FS_UFS1_MAGIC) {
925 		fs->fs_old_time = fs->fs_time;
926 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
927 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
928 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
929 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
930 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
931 	}
932 	if (bigcgs) {
933 		fs->fs_cgsize = fs->fs_save_cgsize;
934 		fs->fs_save_cgsize = 0;
935 	}
936 }
937 
938 /*
939  * unmount system call
940  */
941 int
942 ffs_unmount(mp, mntflags, td)
943 	struct mount *mp;
944 	int mntflags;
945 	struct thread *td;
946 {
947 	struct ufsmount *ump = VFSTOUFS(mp);
948 	struct fs *fs;
949 	int error, flags;
950 
951 	flags = 0;
952 	if (mntflags & MNT_FORCE) {
953 		flags |= FORCECLOSE;
954 	}
955 #ifdef UFS_EXTATTR
956 	if ((error = ufs_extattr_stop(mp, td))) {
957 		if (error != EOPNOTSUPP)
958 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
959 			    error);
960 	} else {
961 		ufs_extattr_uepm_destroy(&ump->um_extattr);
962 	}
963 #endif
964 	if (mp->mnt_flag & MNT_SOFTDEP) {
965 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
966 			return (error);
967 	} else {
968 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
969 			return (error);
970 	}
971 	fs = ump->um_fs;
972 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
973 		printf("%s: unmount pending error: blocks %jd files %d\n",
974 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
975 		    fs->fs_pendinginodes);
976 		fs->fs_pendingblocks = 0;
977 		fs->fs_pendinginodes = 0;
978 	}
979 	if (fs->fs_ronly == 0) {
980 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
981 		error = ffs_sbupdate(ump, MNT_WAIT);
982 		if (error) {
983 			fs->fs_clean = 0;
984 			return (error);
985 		}
986 	}
987 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
988 
989 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
990 	/* XXX: see comment above VOP_OPEN */
991 #ifdef notyet
992 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
993 		NOCRED, td);
994 #else
995 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
996 #endif
997 
998 	vrele(ump->um_devvp);
999 
1000 	free(fs->fs_csp, M_UFSMNT);
1001 	free(fs, M_UFSMNT);
1002 	free(ump, M_UFSMNT);
1003 	mp->mnt_data = (qaddr_t)0;
1004 	mp->mnt_flag &= ~MNT_LOCAL;
1005 	return (error);
1006 }
1007 
1008 /*
1009  * Flush out all the files in a filesystem.
1010  */
1011 int
1012 ffs_flushfiles(mp, flags, td)
1013 	struct mount *mp;
1014 	int flags;
1015 	struct thread *td;
1016 {
1017 	struct ufsmount *ump;
1018 	int error;
1019 
1020 	ump = VFSTOUFS(mp);
1021 #ifdef QUOTA
1022 	if (mp->mnt_flag & MNT_QUOTA) {
1023 		int i;
1024 		error = vflush(mp, 0, SKIPSYSTEM|flags);
1025 		if (error)
1026 			return (error);
1027 		for (i = 0; i < MAXQUOTAS; i++) {
1028 			if (ump->um_quotas[i] == NULLVP)
1029 				continue;
1030 			quotaoff(td, mp, i);
1031 		}
1032 		/*
1033 		 * Here we fall through to vflush again to ensure
1034 		 * that we have gotten rid of all the system vnodes.
1035 		 */
1036 	}
1037 #endif
1038 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1039 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1040 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1041 			return (error);
1042 		ffs_snapshot_unmount(mp);
1043 		/*
1044 		 * Here we fall through to vflush again to ensure
1045 		 * that we have gotten rid of all the system vnodes.
1046 		 */
1047 	}
1048         /*
1049 	 * Flush all the files.
1050 	 */
1051 	if ((error = vflush(mp, 0, flags)) != 0)
1052 		return (error);
1053 	/*
1054 	 * Flush filesystem metadata.
1055 	 */
1056 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1057 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1058 	VOP_UNLOCK(ump->um_devvp, 0, td);
1059 	return (error);
1060 }
1061 
1062 /*
1063  * Get filesystem statistics.
1064  */
1065 int
1066 ffs_statfs(mp, sbp, td)
1067 	struct mount *mp;
1068 	struct statfs *sbp;
1069 	struct thread *td;
1070 {
1071 	struct ufsmount *ump;
1072 	struct fs *fs;
1073 
1074 	ump = VFSTOUFS(mp);
1075 	fs = ump->um_fs;
1076 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1077 		panic("ffs_statfs");
1078 	sbp->f_bsize = fs->fs_fsize;
1079 	sbp->f_iosize = fs->fs_bsize;
1080 	sbp->f_blocks = fs->fs_dsize;
1081 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1082 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1083 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1084 	    dbtofsb(fs, fs->fs_pendingblocks);
1085 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1086 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1087 	if (sbp != &mp->mnt_stat) {
1088 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1089 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1090 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1091 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1092 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1093 	}
1094 	return (0);
1095 }
1096 
1097 /*
1098  * Go through the disk queues to initiate sandbagged IO;
1099  * go through the inodes to write those that have been modified;
1100  * initiate the writing of the super block if it has been modified.
1101  *
1102  * Note: we are always called with the filesystem marked `MPBUSY'.
1103  */
1104 int
1105 ffs_sync(mp, waitfor, cred, td)
1106 	struct mount *mp;
1107 	int waitfor;
1108 	struct ucred *cred;
1109 	struct thread *td;
1110 {
1111 	struct vnode *nvp, *vp, *devvp;
1112 	struct inode *ip;
1113 	struct ufsmount *ump = VFSTOUFS(mp);
1114 	struct fs *fs;
1115 	int error, count, wait, lockreq, allerror = 0;
1116 
1117 	fs = ump->um_fs;
1118 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1119 		printf("fs = %s\n", fs->fs_fsmnt);
1120 		panic("ffs_sync: rofs mod");
1121 	}
1122 	/*
1123 	 * Write back each (modified) inode.
1124 	 */
1125 	wait = 0;
1126 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1127 	if (waitfor == MNT_WAIT) {
1128 		wait = 1;
1129 		lockreq = LK_EXCLUSIVE;
1130 	}
1131 	mtx_lock(&mntvnode_mtx);
1132 loop:
1133 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1134 		/*
1135 		 * If the vnode that we are about to sync is no longer
1136 		 * associated with this mount point, start over.
1137 		 */
1138 		if (vp->v_mount != mp)
1139 			goto loop;
1140 
1141 		/*
1142 		 * Depend on the mntvnode_slock to keep things stable enough
1143 		 * for a quick test.  Since there might be hundreds of
1144 		 * thousands of vnodes, we cannot afford even a subroutine
1145 		 * call unless there's a good chance that we have work to do.
1146 		 */
1147 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1148 		ip = VTOI(vp);
1149 		if (vp->v_type == VNON || ((ip->i_flag &
1150 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1151 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1152 			continue;
1153 		}
1154 		if (vp->v_type != VCHR) {
1155 			mtx_unlock(&mntvnode_mtx);
1156 			if ((error = vget(vp, lockreq, td)) != 0) {
1157 				mtx_lock(&mntvnode_mtx);
1158 				if (error == ENOENT)
1159 					goto loop;
1160 			} else {
1161 				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1162 					allerror = error;
1163 				VOP_UNLOCK(vp, 0, td);
1164 				vrele(vp);
1165 				mtx_lock(&mntvnode_mtx);
1166 			}
1167 		} else {
1168 			mtx_unlock(&mntvnode_mtx);
1169 			UFS_UPDATE(vp, wait);
1170 			mtx_lock(&mntvnode_mtx);
1171 		}
1172 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1173 			goto loop;
1174 	}
1175 	mtx_unlock(&mntvnode_mtx);
1176 	/*
1177 	 * Force stale filesystem control information to be flushed.
1178 	 */
1179 	if (waitfor == MNT_WAIT) {
1180 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1181 			allerror = error;
1182 		/* Flushed work items may create new vnodes to clean */
1183 		if (allerror == 0 && count) {
1184 			mtx_lock(&mntvnode_mtx);
1185 			goto loop;
1186 		}
1187 	}
1188 #ifdef QUOTA
1189 	qsync(mp);
1190 #endif
1191 	devvp = ump->um_devvp;
1192 	VI_LOCK(devvp);
1193 	if (waitfor != MNT_LAZY &&
1194 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1195 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1196 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1197 			allerror = error;
1198 		VOP_UNLOCK(devvp, 0, td);
1199 		if (allerror == 0 && waitfor == MNT_WAIT) {
1200 			mtx_lock(&mntvnode_mtx);
1201 			goto loop;
1202 		}
1203 	} else
1204 		VI_UNLOCK(devvp);
1205 	/*
1206 	 * Write back modified superblock.
1207 	 */
1208 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1209 		allerror = error;
1210 	return (allerror);
1211 }
1212 
1213 int
1214 ffs_vget(mp, ino, flags, vpp)
1215 	struct mount *mp;
1216 	ino_t ino;
1217 	int flags;
1218 	struct vnode **vpp;
1219 {
1220 	struct thread *td = curthread; 		/* XXX */
1221 	struct fs *fs;
1222 	struct inode *ip;
1223 	struct ufsmount *ump;
1224 	struct buf *bp;
1225 	struct vnode *vp;
1226 	dev_t dev;
1227 	int error;
1228 
1229 	ump = VFSTOUFS(mp);
1230 	dev = ump->um_dev;
1231 
1232 	/*
1233 	 * We do not lock vnode creation as it is believed to be too
1234 	 * expensive for such rare case as simultaneous creation of vnode
1235 	 * for same ino by different processes. We just allow them to race
1236 	 * and check later to decide who wins. Let the race begin!
1237 	 */
1238 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1239 		return (error);
1240 	if (*vpp != NULL)
1241 		return (0);
1242 
1243 	/*
1244 	 * If this MALLOC() is performed after the getnewvnode()
1245 	 * it might block, leaving a vnode with a NULL v_data to be
1246 	 * found by ffs_sync() if a sync happens to fire right then,
1247 	 * which will cause a panic because ffs_sync() blindly
1248 	 * dereferences vp->v_data (as well it should).
1249 	 */
1250 	ip = uma_zalloc(uma_inode, M_WAITOK);
1251 
1252 	/* Allocate a new vnode/inode. */
1253 	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1254 	if (error) {
1255 		*vpp = NULL;
1256 		uma_zfree(uma_inode, ip);
1257 		return (error);
1258 	}
1259 	bzero((caddr_t)ip, sizeof(struct inode));
1260 	/*
1261 	 * FFS supports recursive locking.
1262 	 */
1263 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1264 	vp->v_data = ip;
1265 	ip->i_vnode = vp;
1266 	ip->i_ump = ump;
1267 	ip->i_fs = fs = ump->um_fs;
1268 	ip->i_dev = dev;
1269 	ip->i_number = ino;
1270 #ifdef QUOTA
1271 	{
1272 		int i;
1273 		for (i = 0; i < MAXQUOTAS; i++)
1274 			ip->i_dquot[i] = NODQUOT;
1275 	}
1276 #endif
1277 	/*
1278 	 * Exclusively lock the vnode before adding to hash. Note, that we
1279 	 * must not release nor downgrade the lock (despite flags argument
1280 	 * says) till it is fully initialized.
1281 	 */
1282 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1283 
1284 	/*
1285 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1286 	 * duplicate of vnode being created and add it to the hash. If a
1287 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1288 	 */
1289 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1290 		vput(vp);
1291 		*vpp = NULL;
1292 		return (error);
1293 	}
1294 
1295 	/* We lost the race, then throw away our vnode and return existing */
1296 	if (*vpp != NULL) {
1297 		vput(vp);
1298 		return (0);
1299 	}
1300 
1301 	/* Read in the disk contents for the inode, copy into the inode. */
1302 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1303 	    (int)fs->fs_bsize, NOCRED, &bp);
1304 	if (error) {
1305 		/*
1306 		 * The inode does not contain anything useful, so it would
1307 		 * be misleading to leave it on its hash chain. With mode
1308 		 * still zero, it will be unlinked and returned to the free
1309 		 * list by vput().
1310 		 */
1311 		brelse(bp);
1312 		vput(vp);
1313 		*vpp = NULL;
1314 		return (error);
1315 	}
1316 	if (ip->i_ump->um_fstype == UFS1)
1317 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1318 	else
1319 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1320 	ffs_load_inode(bp, ip, fs, ino);
1321 	if (DOINGSOFTDEP(vp))
1322 		softdep_load_inodeblock(ip);
1323 	else
1324 		ip->i_effnlink = ip->i_nlink;
1325 	bqrelse(bp);
1326 
1327 	/*
1328 	 * Initialize the vnode from the inode, check for aliases.
1329 	 * Note that the underlying vnode may have changed.
1330 	 */
1331 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1332 	if (error) {
1333 		vput(vp);
1334 		*vpp = NULL;
1335 		return (error);
1336 	}
1337 	/*
1338 	 * Finish inode initialization now that aliasing has been resolved.
1339 	 */
1340 	ip->i_devvp = ump->um_devvp;
1341 	VREF(ip->i_devvp);
1342 	/*
1343 	 * Set up a generation number for this inode if it does not
1344 	 * already have one. This should only happen on old filesystems.
1345 	 */
1346 	if (ip->i_gen == 0) {
1347 		ip->i_gen = arc4random() / 2 + 1;
1348 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1349 			ip->i_flag |= IN_MODIFIED;
1350 			DIP(ip, i_gen) = ip->i_gen;
1351 		}
1352 	}
1353 	/*
1354 	 * Ensure that uid and gid are correct. This is a temporary
1355 	 * fix until fsck has been changed to do the update.
1356 	 */
1357 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1358 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1359 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1360 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1361 	}						/* XXX */
1362 
1363 #ifdef MAC
1364 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1365 		/*
1366 		 * If this vnode is already allocated, and we're running
1367 		 * multi-label, attempt to perform a label association
1368 		 * from the extended attributes on the inode.
1369 		 */
1370 		error = mac_associate_vnode_extattr(mp, vp);
1371 		if (error) {
1372 			/* ufs_inactive will release ip->i_devvp ref. */
1373 			vput(vp);
1374 			*vpp = NULL;
1375 			return (error);
1376 		}
1377 	}
1378 #endif
1379 
1380 	*vpp = vp;
1381 	return (0);
1382 }
1383 
1384 /*
1385  * File handle to vnode
1386  *
1387  * Have to be really careful about stale file handles:
1388  * - check that the inode number is valid
1389  * - call ffs_vget() to get the locked inode
1390  * - check for an unallocated inode (i_mode == 0)
1391  * - check that the given client host has export rights and return
1392  *   those rights via. exflagsp and credanonp
1393  */
1394 int
1395 ffs_fhtovp(mp, fhp, vpp)
1396 	struct mount *mp;
1397 	struct fid *fhp;
1398 	struct vnode **vpp;
1399 {
1400 	struct ufid *ufhp;
1401 	struct fs *fs;
1402 
1403 	ufhp = (struct ufid *)fhp;
1404 	fs = VFSTOUFS(mp)->um_fs;
1405 	if (ufhp->ufid_ino < ROOTINO ||
1406 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1407 		return (ESTALE);
1408 	return (ufs_fhtovp(mp, ufhp, vpp));
1409 }
1410 
1411 /*
1412  * Vnode pointer to File handle
1413  */
1414 /* ARGSUSED */
1415 int
1416 ffs_vptofh(vp, fhp)
1417 	struct vnode *vp;
1418 	struct fid *fhp;
1419 {
1420 	struct inode *ip;
1421 	struct ufid *ufhp;
1422 
1423 	ip = VTOI(vp);
1424 	ufhp = (struct ufid *)fhp;
1425 	ufhp->ufid_len = sizeof(struct ufid);
1426 	ufhp->ufid_ino = ip->i_number;
1427 	ufhp->ufid_gen = ip->i_gen;
1428 	return (0);
1429 }
1430 
1431 /*
1432  * Initialize the filesystem.
1433  */
1434 static int
1435 ffs_init(vfsp)
1436 	struct vfsconf *vfsp;
1437 {
1438 
1439 	softdep_initialize();
1440 	return (ufs_init(vfsp));
1441 }
1442 
1443 /*
1444  * Undo the work of ffs_init().
1445  */
1446 static int
1447 ffs_uninit(vfsp)
1448 	struct vfsconf *vfsp;
1449 {
1450 	int ret;
1451 
1452 	ret = ufs_uninit(vfsp);
1453 	softdep_uninitialize();
1454 	return (ret);
1455 }
1456 
1457 /*
1458  * Write a superblock and associated information back to disk.
1459  */
1460 static int
1461 ffs_sbupdate(mp, waitfor)
1462 	struct ufsmount *mp;
1463 	int waitfor;
1464 {
1465 	struct fs *fs = mp->um_fs;
1466 	struct buf *bp;
1467 	int blks;
1468 	void *space;
1469 	int i, size, error, allerror = 0;
1470 
1471 	if (fs->fs_ronly == 1 &&
1472 	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1473 	    (MNT_RDONLY | MNT_UPDATE))
1474 		panic("ffs_sbupdate: write read-only filesystem");
1475 	/*
1476 	 * First write back the summary information.
1477 	 */
1478 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1479 	space = fs->fs_csp;
1480 	for (i = 0; i < blks; i += fs->fs_frag) {
1481 		size = fs->fs_bsize;
1482 		if (i + fs->fs_frag > blks)
1483 			size = (blks - i) * fs->fs_fsize;
1484 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1485 		    size, 0, 0, 0);
1486 		bcopy(space, bp->b_data, (u_int)size);
1487 		space = (char *)space + size;
1488 		if (waitfor != MNT_WAIT)
1489 			bawrite(bp);
1490 		else if ((error = bwrite(bp)) != 0)
1491 			allerror = error;
1492 	}
1493 	/*
1494 	 * Now write back the superblock itself. If any errors occurred
1495 	 * up to this point, then fail so that the superblock avoids
1496 	 * being written out as clean.
1497 	 */
1498 	if (allerror)
1499 		return (allerror);
1500 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1501 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1502 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1503 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1504 		fs->fs_sblockloc = SBLOCK_UFS1;
1505 	}
1506 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1507 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1508 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1509 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1510 		fs->fs_sblockloc = SBLOCK_UFS2;
1511 	}
1512 	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1513 	    0, 0, 0);
1514 	fs->fs_fmod = 0;
1515 	fs->fs_time = time_second;
1516 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1517 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1518 	if (waitfor != MNT_WAIT)
1519 		bawrite(bp);
1520 	else if ((error = bwrite(bp)) != 0)
1521 		allerror = error;
1522 	return (allerror);
1523 }
1524 
1525 static int
1526 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1527 	int attrnamespace, const char *attrname, struct thread *td)
1528 {
1529 
1530 #ifdef UFS_EXTATTR
1531 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1532 	    attrname, td));
1533 #else
1534 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1535 	    attrname, td));
1536 #endif
1537 }
1538 
1539 static void
1540 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1541 {
1542 
1543 	if (ump->um_fstype == UFS1)
1544 		uma_zfree(uma_ufs1, ip->i_din1);
1545 	else
1546 		uma_zfree(uma_ufs2, ip->i_din1);
1547 	uma_zfree(uma_inode, ip);
1548 }
1549