xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision c4f6a2a9e1b1879b618c436ab4f56ff75c73a0f5)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_quota.h"
38 #include "opt_ufs.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/stdint.h>
43 #include <sys/namei.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/disk.h>
53 #include <sys/malloc.h>
54 #include <sys/mutex.h>
55 
56 #include <ufs/ufs/extattr.h>
57 #include <ufs/ufs/quota.h>
58 #include <ufs/ufs/ufsmount.h>
59 #include <ufs/ufs/inode.h>
60 #include <ufs/ufs/ufs_extern.h>
61 
62 #include <ufs/ffs/fs.h>
63 #include <ufs/ffs/ffs_extern.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_page.h>
67 
68 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
69 
70 static int	ffs_sbupdate(struct ufsmount *, int);
71        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
72 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
73 		    ufs2_daddr_t);
74 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
75 static vfs_init_t ffs_init;
76 static vfs_uninit_t ffs_uninit;
77 static vfs_extattrctl_t ffs_extattrctl;
78 
79 static struct vfsops ufs_vfsops = {
80 	ffs_mount,
81 	ufs_start,
82 	ffs_unmount,
83 	ufs_root,
84 	ufs_quotactl,
85 	ffs_statfs,
86 	ffs_sync,
87 	ffs_vget,
88 	ffs_fhtovp,
89 	vfs_stdcheckexp,
90 	ffs_vptofh,
91 	ffs_init,
92 	ffs_uninit,
93 	ffs_extattrctl,
94 };
95 
96 VFS_SET(ufs_vfsops, ufs, 0);
97 
98 /*
99  * ffs_mount
100  *
101  * Called when mounting local physical media
102  *
103  * PARAMETERS:
104  *		mountroot
105  *			mp	mount point structure
106  *			path	NULL (flag for root mount!!!)
107  *			data	<unused>
108  *			ndp	<unused>
109  *			p	process (user credentials check [statfs])
110  *
111  *		mount
112  *			mp	mount point structure
113  *			path	path to mount point
114  *			data	pointer to argument struct in user space
115  *			ndp	mount point namei() return (used for
116  *				credentials on reload), reused to look
117  *				up block device.
118  *			p	process (user credentials check)
119  *
120  * RETURNS:	0	Success
121  *		!0	error number (errno.h)
122  *
123  * LOCK STATE:
124  *
125  *		ENTRY
126  *			mount point is locked
127  *		EXIT
128  *			mount point is locked
129  *
130  * NOTES:
131  *		A NULL path can be used for a flag since the mount
132  *		system call will fail with EFAULT in copyinstr in
133  *		namei() if it is a genuine NULL from the user.
134  */
135 int
136 ffs_mount(mp, path, data, ndp, td)
137         struct mount		*mp;	/* mount struct pointer*/
138         char			*path;	/* path to mount point*/
139         caddr_t			data;	/* arguments to FS specific mount*/
140         struct nameidata	*ndp;	/* mount point credentials*/
141         struct thread		*td;	/* process requesting mount*/
142 {
143 	size_t size;
144 	struct vnode *devvp;
145 	struct ufs_args args;
146 	struct ufsmount *ump = 0;
147 	struct fs *fs;
148 	int error, flags;
149 	mode_t accessmode;
150 
151 	/*
152 	 * Use NULL path to indicate we are mounting the root filesystem.
153 	 */
154 	if (path == NULL) {
155 		if ((error = bdevvp(rootdev, &rootvp))) {
156 			printf("ffs_mountroot: can't find rootvp\n");
157 			return (error);
158 		}
159 
160 		if ((error = ffs_mountfs(rootvp, mp, td, M_FFSNODE)) != 0)
161 			return (error);
162 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
163 		return (0);
164 	}
165 
166 	/*
167 	 * Mounting non-root filesystem or updating a filesystem
168 	 */
169 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
170 		return (error);
171 
172 	/*
173 	 * If updating, check whether changing from read-only to
174 	 * read/write; if there is no device name, that's all we do.
175 	 */
176 	if (mp->mnt_flag & MNT_UPDATE) {
177 		ump = VFSTOUFS(mp);
178 		fs = ump->um_fs;
179 		devvp = ump->um_devvp;
180 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
181 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
182 				return (error);
183 			/*
184 			 * Flush any dirty data.
185 			 */
186 			VFS_SYNC(mp, MNT_WAIT, td->td_proc->p_ucred, td);
187 			/*
188 			 * Check for and optionally get rid of files open
189 			 * for writing.
190 			 */
191 			flags = WRITECLOSE;
192 			if (mp->mnt_flag & MNT_FORCE)
193 				flags |= FORCECLOSE;
194 			if (mp->mnt_flag & MNT_SOFTDEP) {
195 				error = softdep_flushfiles(mp, flags, td);
196 			} else {
197 				error = ffs_flushfiles(mp, flags, td);
198 			}
199 			if (error) {
200 				vn_finished_write(mp);
201 				return (error);
202 			}
203 			if (fs->fs_pendingblocks != 0 ||
204 			    fs->fs_pendinginodes != 0) {
205 				printf("%s: %s: blocks %jd files %d\n",
206 				    fs->fs_fsmnt, "update error",
207 				    (intmax_t)fs->fs_pendingblocks,
208 				    fs->fs_pendinginodes);
209 				fs->fs_pendingblocks = 0;
210 				fs->fs_pendinginodes = 0;
211 			}
212 			fs->fs_ronly = 1;
213 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
214 				fs->fs_clean = 1;
215 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
216 				fs->fs_ronly = 0;
217 				fs->fs_clean = 0;
218 				vn_finished_write(mp);
219 				return (error);
220 			}
221 			vn_finished_write(mp);
222 		}
223 		if ((mp->mnt_flag & MNT_RELOAD) &&
224 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
225 			return (error);
226 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
227 			/*
228 			 * If upgrade to read-write by non-root, then verify
229 			 * that user has necessary permissions on the device.
230 			 */
231 			if (suser(td)) {
232 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
233 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
234 				    td->td_ucred, td)) != 0) {
235 					VOP_UNLOCK(devvp, 0, td);
236 					return (error);
237 				}
238 				VOP_UNLOCK(devvp, 0, td);
239 			}
240 			fs->fs_flags &= ~FS_UNCLEAN;
241 			if (fs->fs_clean == 0) {
242 				fs->fs_flags |= FS_UNCLEAN;
243 				if ((mp->mnt_flag & MNT_FORCE) ||
244 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
245 				     (fs->fs_flags & FS_DOSOFTDEP))) {
246 					printf("WARNING: %s was not %s\n",
247 					   fs->fs_fsmnt, "properly dismounted");
248 				} else {
249 					printf(
250 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
251 					    fs->fs_fsmnt);
252 					return (EPERM);
253 				}
254 			}
255 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
256 				return (error);
257 			fs->fs_ronly = 0;
258 			fs->fs_clean = 0;
259 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
260 				vn_finished_write(mp);
261 				return (error);
262 			}
263 			/* check to see if we need to start softdep */
264 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
265 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
266 				vn_finished_write(mp);
267 				return (error);
268 			}
269 			if (fs->fs_snapinum[0] != 0)
270 				ffs_snapshot_mount(mp);
271 			vn_finished_write(mp);
272 		}
273 		/*
274 		 * Soft updates is incompatible with "async",
275 		 * so if we are doing softupdates stop the user
276 		 * from setting the async flag in an update.
277 		 * Softdep_mount() clears it in an initial mount
278 		 * or ro->rw remount.
279 		 */
280 		if (mp->mnt_flag & MNT_SOFTDEP)
281 			mp->mnt_flag &= ~MNT_ASYNC;
282 		/*
283 		 * If not updating name, process export requests.
284 		 */
285 		if (args.fspec == 0)
286 			return (vfs_export(mp, &args.export));
287 		/*
288 		 * If this is a snapshot request, take the snapshot.
289 		 */
290 		if (mp->mnt_flag & MNT_SNAPSHOT)
291 			return (ffs_snapshot(mp, args.fspec));
292 	}
293 
294 	/*
295 	 * Not an update, or updating the name: look up the name
296 	 * and verify that it refers to a sensible block device.
297 	 */
298 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
299 	if ((error = namei(ndp)) != 0)
300 		return (error);
301 	NDFREE(ndp, NDF_ONLY_PNBUF);
302 	devvp = ndp->ni_vp;
303 	if (!vn_isdisk(devvp, &error)) {
304 		vrele(devvp);
305 		return (error);
306 	}
307 
308 	/*
309 	 * If mount by non-root, then verify that user has necessary
310 	 * permissions on the device.
311 	 */
312 	if (suser(td)) {
313 		accessmode = VREAD;
314 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
315 			accessmode |= VWRITE;
316 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
317 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
318 			vput(devvp);
319 			return (error);
320 		}
321 		VOP_UNLOCK(devvp, 0, td);
322 	}
323 
324 	if (mp->mnt_flag & MNT_UPDATE) {
325 		/*
326 		 * Update only
327 		 *
328 		 * If it's not the same vnode, or at least the same device
329 		 * then it's not correct.
330 		 */
331 
332 		if (devvp != ump->um_devvp &&
333 		    devvp->v_rdev != ump->um_devvp->v_rdev)
334 			error = EINVAL;	/* needs translation */
335 		vrele(devvp);
336 		if (error)
337 			return (error);
338 	} else {
339 		/*
340 		 * New mount
341 		 *
342 		 * We need the name for the mount point (also used for
343 		 * "last mounted on") copied in. If an error occurs,
344 		 * the mount point is discarded by the upper level code.
345 		 * Note that vfs_mount() populates f_mntonname for us.
346 		 */
347 		if ((error = ffs_mountfs(devvp, mp, td, M_FFSNODE)) != 0) {
348 			vrele(devvp);
349 			return (error);
350 		}
351 	}
352 	/*
353 	 * Save "mounted from" device name info for mount point (NULL pad).
354 	 */
355 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
356 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
357 	/*
358 	 * Initialize filesystem stat information in mount struct.
359 	 */
360 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
361 	return (0);
362 }
363 
364 /*
365  * Reload all incore data for a filesystem (used after running fsck on
366  * the root filesystem and finding things to fix). The filesystem must
367  * be mounted read-only.
368  *
369  * Things to do to update the mount:
370  *	1) invalidate all cached meta-data.
371  *	2) re-read superblock from disk.
372  *	3) re-read summary information from disk.
373  *	4) invalidate all inactive vnodes.
374  *	5) invalidate all cached file data.
375  *	6) re-read inode data for all active vnodes.
376  */
377 int
378 ffs_reload(mp, cred, td)
379 	struct mount *mp;
380 	struct ucred *cred;
381 	struct thread *td;
382 {
383 	struct vnode *vp, *nvp, *devvp;
384 	struct inode *ip;
385 	void *space;
386 	struct buf *bp;
387 	struct fs *fs, *newfs;
388 	dev_t dev;
389 	ufs2_daddr_t sblockloc;
390 	int i, blks, size, error;
391 	int32_t *lp;
392 
393 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
394 		return (EINVAL);
395 	/*
396 	 * Step 1: invalidate all cached meta-data.
397 	 */
398 	devvp = VFSTOUFS(mp)->um_devvp;
399 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
400 	error = vinvalbuf(devvp, 0, cred, td, 0, 0);
401 	VOP_UNLOCK(devvp, 0, td);
402 	if (error)
403 		panic("ffs_reload: dirty1");
404 
405 	dev = devvp->v_rdev;
406 
407 	/*
408 	 * Only VMIO the backing device if the backing device is a real
409 	 * block device.
410 	 */
411 	if (vn_isdisk(devvp, NULL)) {
412 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
413 		vfs_object_create(devvp, td, td->td_ucred);
414 		mtx_lock(&devvp->v_interlock);
415 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
416 	}
417 
418 	/*
419 	 * Step 2: re-read superblock from disk.
420 	 */
421 	fs = VFSTOUFS(mp)->um_fs;
422 	if ((error = bread(devvp, fsbtodb(fs, fs->fs_sblockloc), fs->fs_sbsize,
423 	    NOCRED, &bp)) != 0)
424 		return (error);
425 	newfs = (struct fs *)bp->b_data;
426 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
427 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
428 	    newfs->fs_bsize > MAXBSIZE ||
429 	    newfs->fs_bsize < sizeof(struct fs)) {
430 			brelse(bp);
431 			return (EIO);		/* XXX needs translation */
432 	}
433 	/*
434 	 * Copy pointer fields back into superblock before copying in	XXX
435 	 * new superblock. These should really be in the ufsmount.	XXX
436 	 * Note that important parameters (eg fs_ncg) are unchanged.
437 	 */
438 	newfs->fs_csp = fs->fs_csp;
439 	newfs->fs_maxcluster = fs->fs_maxcluster;
440 	newfs->fs_contigdirs = fs->fs_contigdirs;
441 	newfs->fs_active = fs->fs_active;
442 	sblockloc = fs->fs_sblockloc;
443 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
444 	brelse(bp);
445 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
446 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
447 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
448 		printf("%s: reload pending error: blocks %jd files %d\n",
449 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
450 		    fs->fs_pendinginodes);
451 		fs->fs_pendingblocks = 0;
452 		fs->fs_pendinginodes = 0;
453 	}
454 
455 	/*
456 	 * Step 3: re-read summary information from disk.
457 	 */
458 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
459 	space = fs->fs_csp;
460 	for (i = 0; i < blks; i += fs->fs_frag) {
461 		size = fs->fs_bsize;
462 		if (i + fs->fs_frag > blks)
463 			size = (blks - i) * fs->fs_fsize;
464 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
465 		    NOCRED, &bp);
466 		if (error)
467 			return (error);
468 		bcopy(bp->b_data, space, (u_int)size);
469 		space = (char *)space + size;
470 		brelse(bp);
471 	}
472 	/*
473 	 * We no longer know anything about clusters per cylinder group.
474 	 */
475 	if (fs->fs_contigsumsize > 0) {
476 		lp = fs->fs_maxcluster;
477 		for (i = 0; i < fs->fs_ncg; i++)
478 			*lp++ = fs->fs_contigsumsize;
479 	}
480 
481 loop:
482 	mtx_lock(&mntvnode_mtx);
483 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
484 		if (vp->v_mount != mp) {
485 			mtx_unlock(&mntvnode_mtx);
486 			goto loop;
487 		}
488 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
489 		mtx_unlock(&mntvnode_mtx);
490 		/*
491 		 * Step 4: invalidate all inactive vnodes.
492 		 */
493 		if (vrecycle(vp, NULL, td))
494 			goto loop;
495 		/*
496 		 * Step 5: invalidate all cached file data.
497 		 */
498 		mtx_lock(&vp->v_interlock);
499 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
500 			goto loop;
501 		}
502 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
503 			panic("ffs_reload: dirty2");
504 		/*
505 		 * Step 6: re-read inode data for all active vnodes.
506 		 */
507 		ip = VTOI(vp);
508 		error =
509 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
510 		    (int)fs->fs_bsize, NOCRED, &bp);
511 		if (error) {
512 			vput(vp);
513 			return (error);
514 		}
515 		ffs_load_inode(bp, ip, NULL, fs, ip->i_number);
516 		ip->i_effnlink = ip->i_nlink;
517 		brelse(bp);
518 		vput(vp);
519 		mtx_lock(&mntvnode_mtx);
520 	}
521 	mtx_unlock(&mntvnode_mtx);
522 	return (0);
523 }
524 
525 /*
526  * Possible superblock locations ordered from most to least likely.
527  */
528 static int sblock_try[] = SBLOCKSEARCH;
529 
530 /*
531  * Common code for mount and mountroot
532  */
533 int
534 ffs_mountfs(devvp, mp, td, malloctype)
535 	struct vnode *devvp;
536 	struct mount *mp;
537 	struct thread *td;
538 	struct malloc_type *malloctype;
539 {
540 	struct ufsmount *ump;
541 	struct buf *bp;
542 	struct fs *fs;
543 	dev_t dev;
544 	void *space;
545 	ufs2_daddr_t sblockloc;
546 	int error, i, blks, size, ronly;
547 	int32_t *lp;
548 	struct ucred *cred;
549 	size_t strsize;
550 	int ncount;
551 	u_int sectorsize;
552 
553 	dev = devvp->v_rdev;
554 	cred = td ? td->td_ucred : NOCRED;
555 	/*
556 	 * Disallow multiple mounts of the same device.
557 	 * Disallow mounting of a device that is currently in use
558 	 * (except for root, which might share swap device for miniroot).
559 	 * Flush out any old buffers remaining from a previous use.
560 	 */
561 	error = vfs_mountedon(devvp);
562 	if (error)
563 		return (error);
564 	ncount = vcount(devvp);
565 
566 	if (ncount > 1 && devvp != rootvp)
567 		return (EBUSY);
568 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
569 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
570 	VOP_UNLOCK(devvp, 0, td);
571 	if (error)
572 		return (error);
573 
574 	/*
575 	 * Only VMIO the backing device if the backing device is a real
576 	 * block device.
577 	 * Note that it is optional that the backing device be VMIOed.  This
578 	 * increases the opportunity for metadata caching.
579 	 */
580 	if (vn_isdisk(devvp, NULL)) {
581 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
582 		vfs_object_create(devvp, td, cred);
583 		mtx_lock(&devvp->v_interlock);
584 		VOP_UNLOCK(devvp, LK_INTERLOCK, td);
585 	}
586 
587 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
588 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
589 	/*
590 	 * XXX: We don't re-VOP_OPEN in FREAD|FWRITE mode if the filesystem
591 	 * XXX: is subsequently remounted, so open it FREAD|FWRITE from the
592 	 * XXX: start to avoid getting trashed later on.
593 	 */
594 #ifdef notyet
595 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, td);
596 #else
597 	error = VOP_OPEN(devvp, FREAD|FWRITE, FSCRED, td);
598 #endif
599 	VOP_UNLOCK(devvp, 0, td);
600 	if (error)
601 		return (error);
602 	if (devvp->v_rdev->si_iosize_max != 0)
603 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
604 	if (mp->mnt_iosize_max > MAXPHYS)
605 		mp->mnt_iosize_max = MAXPHYS;
606 
607 	if (VOP_IOCTL(devvp, DIOCGSECTORSIZE, (caddr_t)&sectorsize,
608 	    FREAD, cred, td) != 0)
609 		size = DEV_BSIZE;
610 	else
611 		size = sectorsize;
612 
613 	bp = NULL;
614 	ump = NULL;
615 	fs = NULL;
616 	sblockloc = 0;
617 	/*
618 	 * Try reading the superblock in each of its possible locations.
619 	 */
620 	for (i = 0; sblock_try[i] != -1; i++) {
621 		if ((error = bread(devvp, sblock_try[i] / size, SBLOCKSIZE,
622 		    cred, &bp)) != 0)
623 			goto out;
624 		fs = (struct fs *)bp->b_data;
625 		sblockloc = numfrags(fs, sblock_try[i]);
626 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
627 		     (fs->fs_magic == FS_UFS2_MAGIC &&
628 		      fs->fs_sblockloc == sblockloc)) &&
629 		    fs->fs_bsize <= MAXBSIZE &&
630 		    fs->fs_bsize >= sizeof(struct fs))
631 			break;
632 		brelse(bp);
633 		bp = NULL;
634 	}
635 	if (sblock_try[i] == -1) {
636 		error = EINVAL;		/* XXX needs translation */
637 		goto out;
638 	}
639 	fs->fs_fmod = 0;
640 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
641 	fs->fs_flags &= ~FS_UNCLEAN;
642 	if (fs->fs_clean == 0) {
643 		fs->fs_flags |= FS_UNCLEAN;
644 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
645 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
646 		     (fs->fs_flags & FS_DOSOFTDEP))) {
647 			printf(
648 "WARNING: %s was not properly dismounted\n",
649 			    fs->fs_fsmnt);
650 		} else {
651 			printf(
652 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
653 			    fs->fs_fsmnt);
654 			error = EPERM;
655 			goto out;
656 		}
657 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
658 		    (mp->mnt_flag & MNT_FORCE)) {
659 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
660 			    (intmax_t)fs->fs_pendingblocks,
661 			    fs->fs_pendinginodes);
662 			fs->fs_pendingblocks = 0;
663 			fs->fs_pendinginodes = 0;
664 		}
665 	}
666 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
667 		printf("%s: mount pending error: blocks %jd files %d\n",
668 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
669 		    fs->fs_pendinginodes);
670 		fs->fs_pendingblocks = 0;
671 		fs->fs_pendinginodes = 0;
672 	}
673 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
674 	ump->um_malloctype = malloctype;
675 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
676 	    M_WAITOK);
677 	if (fs->fs_magic == FS_UFS1_MAGIC) {
678 		ump->um_fstype = UFS1;
679 		ump->um_balloc = ffs_balloc_ufs1;
680 	} else {
681 		ump->um_fstype = UFS2;
682 		ump->um_balloc = ffs_balloc_ufs2;
683 	}
684 	ump->um_blkatoff = ffs_blkatoff;
685 	ump->um_truncate = ffs_truncate;
686 	ump->um_update = ffs_update;
687 	ump->um_valloc = ffs_valloc;
688 	ump->um_vfree = ffs_vfree;
689 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
690 	if (fs->fs_sbsize < SBLOCKSIZE)
691 		bp->b_flags |= B_INVAL | B_NOCACHE;
692 	brelse(bp);
693 	bp = NULL;
694 	fs = ump->um_fs;
695 	ffs_oldfscompat_read(fs, ump, sblockloc);
696 	fs->fs_ronly = ronly;
697 	size = fs->fs_cssize;
698 	blks = howmany(size, fs->fs_fsize);
699 	if (fs->fs_contigsumsize > 0)
700 		size += fs->fs_ncg * sizeof(int32_t);
701 	size += fs->fs_ncg * sizeof(u_int8_t);
702 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
703 	fs->fs_csp = space;
704 	for (i = 0; i < blks; i += fs->fs_frag) {
705 		size = fs->fs_bsize;
706 		if (i + fs->fs_frag > blks)
707 			size = (blks - i) * fs->fs_fsize;
708 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
709 		    cred, &bp)) != 0) {
710 			free(fs->fs_csp, M_UFSMNT);
711 			goto out;
712 		}
713 		bcopy(bp->b_data, space, (u_int)size);
714 		space = (char *)space + size;
715 		brelse(bp);
716 		bp = NULL;
717 	}
718 	if (fs->fs_contigsumsize > 0) {
719 		fs->fs_maxcluster = lp = space;
720 		for (i = 0; i < fs->fs_ncg; i++)
721 			*lp++ = fs->fs_contigsumsize;
722 		space = lp;
723 	}
724 	size = fs->fs_ncg * sizeof(u_int8_t);
725 	fs->fs_contigdirs = (u_int8_t *)space;
726 	bzero(fs->fs_contigdirs, size);
727 	fs->fs_active = NULL;
728 	mp->mnt_data = (qaddr_t)ump;
729 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
730 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
731 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
732 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
733 		vfs_getnewfsid(mp);
734 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
735 	mp->mnt_flag |= MNT_LOCAL;
736 	ump->um_mountp = mp;
737 	ump->um_dev = dev;
738 	ump->um_devvp = devvp;
739 	ump->um_nindir = fs->fs_nindir;
740 	ump->um_bptrtodb = fs->fs_fsbtodb;
741 	ump->um_seqinc = fs->fs_frag;
742 	for (i = 0; i < MAXQUOTAS; i++)
743 		ump->um_quotas[i] = NULLVP;
744 #ifdef UFS_EXTATTR
745 	ufs_extattr_uepm_init(&ump->um_extattr);
746 #endif
747 	devvp->v_rdev->si_mountpoint = mp;
748 
749 	/*
750 	 * Set FS local "last mounted on" information (NULL pad)
751 	 */
752 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
753 			fs->fs_fsmnt,			/* copy area*/
754 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
755 			&strsize);			/* real size*/
756 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
757 
758 	if( mp->mnt_flag & MNT_ROOTFS) {
759 		/*
760 		 * Root mount; update timestamp in mount structure.
761 		 * this will be used by the common root mount code
762 		 * to update the system clock.
763 		 */
764 		mp->mnt_time = fs->fs_time;
765 	}
766 
767 	if (ronly == 0) {
768 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
769 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
770 			free(fs->fs_csp, M_UFSMNT);
771 			goto out;
772 		}
773 		if (fs->fs_snapinum[0] != 0)
774 			ffs_snapshot_mount(mp);
775 		fs->fs_fmod = 1;
776 		fs->fs_clean = 0;
777 		(void) ffs_sbupdate(ump, MNT_WAIT);
778 	}
779 #ifdef UFS_EXTATTR
780 #ifdef UFS_EXTATTR_AUTOSTART
781 	/*
782 	 *
783 	 * Auto-starting does the following:
784 	 *	- check for /.attribute in the fs, and extattr_start if so
785 	 *	- for each file in .attribute, enable that file with
786 	 * 	  an attribute of the same name.
787 	 * Not clear how to report errors -- probably eat them.
788 	 * This would all happen while the filesystem was busy/not
789 	 * available, so would effectively be "atomic".
790 	 */
791 	(void) ufs_extattr_autostart(mp, td);
792 #endif /* !UFS_EXTATTR_AUTOSTART */
793 #endif /* !UFS_EXTATTR */
794 	return (0);
795 out:
796 	devvp->v_rdev->si_mountpoint = NULL;
797 	if (bp)
798 		brelse(bp);
799 	/* XXX: see comment above VOP_OPEN */
800 #ifdef notyet
801 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, td);
802 #else
803 	(void)VOP_CLOSE(devvp, FREAD|FWRITE, cred, td);
804 #endif
805 	if (ump) {
806 		free(ump->um_fs, M_UFSMNT);
807 		free(ump, M_UFSMNT);
808 		mp->mnt_data = (qaddr_t)0;
809 	}
810 	return (error);
811 }
812 
813 #include <sys/sysctl.h>
814 int bigcgs = 0;
815 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
816 
817 /*
818  * Sanity checks for loading old filesystem superblocks.
819  * See ffs_oldfscompat_write below for unwound actions.
820  *
821  * XXX - Parts get retired eventually.
822  * Unfortunately new bits get added.
823  */
824 static void
825 ffs_oldfscompat_read(fs, ump, sblockloc)
826 	struct fs *fs;
827 	struct ufsmount *ump;
828 	ufs2_daddr_t sblockloc;
829 {
830 	off_t maxfilesize;
831 
832 	/*
833 	 * If not yet done, update UFS1 superblock with new wider fields.
834 	 */
835 	if (fs->fs_magic == FS_UFS1_MAGIC &&
836 	    fs->fs_sblockloc != sblockloc) {
837 		fs->fs_maxbsize = fs->fs_bsize;
838 		fs->fs_sblockloc = sblockloc;
839 		fs->fs_time = fs->fs_old_time;
840 		fs->fs_size = fs->fs_old_size;
841 		fs->fs_dsize = fs->fs_old_dsize;
842 		fs->fs_csaddr = fs->fs_old_csaddr;
843 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
844 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
845 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
846 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
847 	}
848 	if (fs->fs_magic == FS_UFS1_MAGIC &&
849 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
850 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
851 		fs->fs_qbmask = ~fs->fs_bmask;
852 		fs->fs_qfmask = ~fs->fs_fmask;
853 	}
854 	if (fs->fs_magic == FS_UFS1_MAGIC) {
855 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
856 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
857 		if (fs->fs_maxfilesize > maxfilesize)
858 			fs->fs_maxfilesize = maxfilesize;
859 	}
860 	/* Compatibility for old filesystems */
861 	if (fs->fs_avgfilesize <= 0)
862 		fs->fs_avgfilesize = AVFILESIZ;
863 	if (fs->fs_avgfpdir <= 0)
864 		fs->fs_avgfpdir = AFPDIR;
865 	if (bigcgs) {
866 		fs->fs_save_cgsize = fs->fs_cgsize;
867 		fs->fs_cgsize = fs->fs_bsize;
868 	}
869 }
870 
871 /*
872  * Unwinding superblock updates for old filesystems.
873  * See ffs_oldfscompat_read above for details.
874  *
875  * XXX - Parts get retired eventually.
876  * Unfortunately new bits get added.
877  */
878 static void
879 ffs_oldfscompat_write(fs, ump)
880 	struct fs *fs;
881 	struct ufsmount *ump;
882 {
883 
884 	/*
885 	 * Copy back UFS2 updated fields that UFS1 inspects.
886 	 */
887 	if (fs->fs_magic == FS_UFS1_MAGIC) {
888 		fs->fs_old_time = fs->fs_time;
889 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
890 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
891 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
892 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
893 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
894 	}
895 	if (bigcgs) {
896 		fs->fs_cgsize = fs->fs_save_cgsize;
897 		fs->fs_save_cgsize = 0;
898 	}
899 }
900 
901 /*
902  * unmount system call
903  */
904 int
905 ffs_unmount(mp, mntflags, td)
906 	struct mount *mp;
907 	int mntflags;
908 	struct thread *td;
909 {
910 	struct ufsmount *ump = VFSTOUFS(mp);
911 	struct fs *fs;
912 	int error, flags;
913 
914 	flags = 0;
915 	if (mntflags & MNT_FORCE) {
916 		flags |= FORCECLOSE;
917 	}
918 #ifdef UFS_EXTATTR
919 	if ((error = ufs_extattr_stop(mp, td))) {
920 		if (error != EOPNOTSUPP)
921 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
922 			    error);
923 	} else {
924 		ufs_extattr_uepm_destroy(&ump->um_extattr);
925 	}
926 #endif
927 	if (mp->mnt_flag & MNT_SOFTDEP) {
928 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
929 			return (error);
930 	} else {
931 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
932 			return (error);
933 	}
934 	fs = ump->um_fs;
935 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
936 		printf("%s: unmount pending error: blocks %jd files %d\n",
937 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
938 		    fs->fs_pendinginodes);
939 		fs->fs_pendingblocks = 0;
940 		fs->fs_pendinginodes = 0;
941 	}
942 	if (fs->fs_ronly == 0) {
943 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
944 		error = ffs_sbupdate(ump, MNT_WAIT);
945 		if (error) {
946 			fs->fs_clean = 0;
947 			return (error);
948 		}
949 	}
950 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
951 
952 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
953 	/* XXX: see comment above VOP_OPEN */
954 #ifdef notyet
955 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
956 		NOCRED, td);
957 #else
958 	error = VOP_CLOSE(ump->um_devvp, FREAD|FWRITE, NOCRED, td);
959 #endif
960 
961 	vrele(ump->um_devvp);
962 
963 	free(fs->fs_csp, M_UFSMNT);
964 	free(fs, M_UFSMNT);
965 	free(ump, M_UFSMNT);
966 	mp->mnt_data = (qaddr_t)0;
967 	mp->mnt_flag &= ~MNT_LOCAL;
968 	return (error);
969 }
970 
971 /*
972  * Flush out all the files in a filesystem.
973  */
974 int
975 ffs_flushfiles(mp, flags, td)
976 	struct mount *mp;
977 	int flags;
978 	struct thread *td;
979 {
980 	struct ufsmount *ump;
981 	int error;
982 
983 	ump = VFSTOUFS(mp);
984 #ifdef QUOTA
985 	if (mp->mnt_flag & MNT_QUOTA) {
986 		int i;
987 		error = vflush(mp, 0, SKIPSYSTEM|flags);
988 		if (error)
989 			return (error);
990 		for (i = 0; i < MAXQUOTAS; i++) {
991 			if (ump->um_quotas[i] == NULLVP)
992 				continue;
993 			quotaoff(td, mp, i);
994 		}
995 		/*
996 		 * Here we fall through to vflush again to ensure
997 		 * that we have gotten rid of all the system vnodes.
998 		 */
999 	}
1000 #endif
1001 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1002 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1003 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1004 			return (error);
1005 		ffs_snapshot_unmount(mp);
1006 		/*
1007 		 * Here we fall through to vflush again to ensure
1008 		 * that we have gotten rid of all the system vnodes.
1009 		 */
1010 	}
1011         /*
1012 	 * Flush all the files.
1013 	 */
1014 	if ((error = vflush(mp, 0, flags)) != 0)
1015 		return (error);
1016 	/*
1017 	 * Flush filesystem metadata.
1018 	 */
1019 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1020 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1021 	VOP_UNLOCK(ump->um_devvp, 0, td);
1022 	return (error);
1023 }
1024 
1025 /*
1026  * Get filesystem statistics.
1027  */
1028 int
1029 ffs_statfs(mp, sbp, td)
1030 	struct mount *mp;
1031 	struct statfs *sbp;
1032 	struct thread *td;
1033 {
1034 	struct ufsmount *ump;
1035 	struct fs *fs;
1036 
1037 	ump = VFSTOUFS(mp);
1038 	fs = ump->um_fs;
1039 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1040 		panic("ffs_statfs");
1041 	sbp->f_bsize = fs->fs_fsize;
1042 	sbp->f_iosize = fs->fs_bsize;
1043 	sbp->f_blocks = fs->fs_dsize;
1044 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1045 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1046 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1047 	    dbtofsb(fs, fs->fs_pendingblocks);
1048 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1049 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1050 	if (sbp != &mp->mnt_stat) {
1051 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1052 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1053 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1054 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1055 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1056 	}
1057 	return (0);
1058 }
1059 
1060 /*
1061  * Go through the disk queues to initiate sandbagged IO;
1062  * go through the inodes to write those that have been modified;
1063  * initiate the writing of the super block if it has been modified.
1064  *
1065  * Note: we are always called with the filesystem marked `MPBUSY'.
1066  */
1067 int
1068 ffs_sync(mp, waitfor, cred, td)
1069 	struct mount *mp;
1070 	int waitfor;
1071 	struct ucred *cred;
1072 	struct thread *td;
1073 {
1074 	struct vnode *nvp, *vp, *devvp;
1075 	struct inode *ip;
1076 	struct ufsmount *ump = VFSTOUFS(mp);
1077 	struct fs *fs;
1078 	int error, count, wait, lockreq, allerror = 0;
1079 
1080 	fs = ump->um_fs;
1081 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1082 		printf("fs = %s\n", fs->fs_fsmnt);
1083 		panic("ffs_sync: rofs mod");
1084 	}
1085 	/*
1086 	 * Write back each (modified) inode.
1087 	 */
1088 	wait = 0;
1089 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1090 	if (waitfor == MNT_WAIT) {
1091 		wait = 1;
1092 		lockreq = LK_EXCLUSIVE;
1093 	}
1094 	mtx_lock(&mntvnode_mtx);
1095 loop:
1096 	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
1097 		/*
1098 		 * If the vnode that we are about to sync is no longer
1099 		 * associated with this mount point, start over.
1100 		 */
1101 		if (vp->v_mount != mp)
1102 			goto loop;
1103 
1104 		/*
1105 		 * Depend on the mntvnode_slock to keep things stable enough
1106 		 * for a quick test.  Since there might be hundreds of
1107 		 * thousands of vnodes, we cannot afford even a subroutine
1108 		 * call unless there's a good chance that we have work to do.
1109 		 */
1110 		nvp = TAILQ_NEXT(vp, v_nmntvnodes);
1111 		ip = VTOI(vp);
1112 		if (vp->v_type == VNON || ((ip->i_flag &
1113 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1114 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1115 			continue;
1116 		}
1117 		if (vp->v_type != VCHR) {
1118 			mtx_unlock(&mntvnode_mtx);
1119 			if ((error = vget(vp, lockreq, td)) != 0) {
1120 				mtx_lock(&mntvnode_mtx);
1121 				if (error == ENOENT)
1122 					goto loop;
1123 			} else {
1124 				if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1125 					allerror = error;
1126 				VOP_UNLOCK(vp, 0, td);
1127 				vrele(vp);
1128 				mtx_lock(&mntvnode_mtx);
1129 			}
1130 		} else {
1131 			mtx_unlock(&mntvnode_mtx);
1132 			UFS_UPDATE(vp, wait);
1133 			mtx_lock(&mntvnode_mtx);
1134 		}
1135 		if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1136 			goto loop;
1137 	}
1138 	mtx_unlock(&mntvnode_mtx);
1139 	/*
1140 	 * Force stale filesystem control information to be flushed.
1141 	 */
1142 	if (waitfor == MNT_WAIT) {
1143 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1144 			allerror = error;
1145 		/* Flushed work items may create new vnodes to clean */
1146 		if (count) {
1147 			mtx_lock(&mntvnode_mtx);
1148 			goto loop;
1149 		}
1150 	}
1151 #ifdef QUOTA
1152 	qsync(mp);
1153 #endif
1154 	devvp = ump->um_devvp;
1155 	mtx_lock(&devvp->v_interlock);
1156 	if (waitfor != MNT_LAZY &&
1157 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1158 		mtx_unlock(&devvp->v_interlock);
1159 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
1160 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1161 			allerror = error;
1162 		VOP_UNLOCK(devvp, 0, td);
1163 		if (waitfor == MNT_WAIT) {
1164 			mtx_lock(&mntvnode_mtx);
1165 			goto loop;
1166 		}
1167 	} else
1168 		mtx_unlock(&devvp->v_interlock);
1169 	/*
1170 	 * Write back modified superblock.
1171 	 */
1172 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1173 		allerror = error;
1174 	return (allerror);
1175 }
1176 
1177 int
1178 ffs_vget(mp, ino, flags, vpp)
1179 	struct mount *mp;
1180 	ino_t ino;
1181 	int flags;
1182 	struct vnode **vpp;
1183 {
1184 	struct thread *td = curthread; 		/* XXX */
1185 	struct fs *fs;
1186 	struct inode *ip;
1187 	struct ufsmount *ump;
1188 	struct buf *bp;
1189 	struct vnode *vp;
1190 	dev_t dev;
1191 	int error;
1192 
1193 	ump = VFSTOUFS(mp);
1194 	dev = ump->um_dev;
1195 
1196 	/*
1197 	 * We do not lock vnode creation as it is believed to be too
1198 	 * expensive for such rare case as simultaneous creation of vnode
1199 	 * for same ino by different processes. We just allow them to race
1200 	 * and check later to decide who wins. Let the race begin!
1201 	 */
1202 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1203 		return (error);
1204 	if (*vpp != NULL)
1205 		return (0);
1206 
1207 	/*
1208 	 * If this MALLOC() is performed after the getnewvnode()
1209 	 * it might block, leaving a vnode with a NULL v_data to be
1210 	 * found by ffs_sync() if a sync happens to fire right then,
1211 	 * which will cause a panic because ffs_sync() blindly
1212 	 * dereferences vp->v_data (as well it should).
1213 	 */
1214 	MALLOC(ip, struct inode *, sizeof(struct inode),
1215 	    ump->um_malloctype, M_WAITOK);
1216 
1217 	/* Allocate a new vnode/inode. */
1218 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1219 	if (error) {
1220 		*vpp = NULL;
1221 		FREE(ip, ump->um_malloctype);
1222 		return (error);
1223 	}
1224 	bzero((caddr_t)ip, sizeof(struct inode));
1225 	/*
1226 	 * FFS supports lock sharing in the stack of vnodes
1227 	 */
1228 	vp->v_vnlock = &vp->v_lock;
1229 	lockinit(vp->v_vnlock, PINOD, "inode", VLKTIMEOUT, LK_CANRECURSE);
1230 	vp->v_data = ip;
1231 	ip->i_vnode = vp;
1232 	ip->i_ump = ump;
1233 	ip->i_fs = fs = ump->um_fs;
1234 	ip->i_dev = dev;
1235 	ip->i_number = ino;
1236 #ifdef QUOTA
1237 	{
1238 		int i;
1239 		for (i = 0; i < MAXQUOTAS; i++)
1240 			ip->i_dquot[i] = NODQUOT;
1241 	}
1242 #endif
1243 	/*
1244 	 * Exclusively lock the vnode before adding to hash. Note, that we
1245 	 * must not release nor downgrade the lock (despite flags argument
1246 	 * says) till it is fully initialized.
1247 	 */
1248 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1249 
1250 	/*
1251 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1252 	 * duplicate of vnode being created and add it to the hash. If a
1253 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1254 	 */
1255 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1256 		vput(vp);
1257 		*vpp = NULL;
1258 		return (error);
1259 	}
1260 
1261 	/* We lost the race, then throw away our vnode and return existing */
1262 	if (*vpp != NULL) {
1263 		vput(vp);
1264 		return (0);
1265 	}
1266 
1267 	/* Read in the disk contents for the inode, copy into the inode. */
1268 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1269 	    (int)fs->fs_bsize, NOCRED, &bp);
1270 	if (error) {
1271 		/*
1272 		 * The inode does not contain anything useful, so it would
1273 		 * be misleading to leave it on its hash chain. With mode
1274 		 * still zero, it will be unlinked and returned to the free
1275 		 * list by vput().
1276 		 */
1277 		brelse(bp);
1278 		vput(vp);
1279 		*vpp = NULL;
1280 		return (error);
1281 	}
1282 	ffs_load_inode(bp, ip, ump->um_malloctype, fs, ino);
1283 	if (DOINGSOFTDEP(vp))
1284 		softdep_load_inodeblock(ip);
1285 	else
1286 		ip->i_effnlink = ip->i_nlink;
1287 	bqrelse(bp);
1288 
1289 	/*
1290 	 * Initialize the vnode from the inode, check for aliases.
1291 	 * Note that the underlying vnode may have changed.
1292 	 */
1293 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1294 	if (error) {
1295 		vput(vp);
1296 		*vpp = NULL;
1297 		return (error);
1298 	}
1299 	/*
1300 	 * Finish inode initialization now that aliasing has been resolved.
1301 	 */
1302 	ip->i_devvp = ump->um_devvp;
1303 	VREF(ip->i_devvp);
1304 	/*
1305 	 * Set up a generation number for this inode if it does not
1306 	 * already have one. This should only happen on old filesystems.
1307 	 */
1308 	if (ip->i_gen == 0) {
1309 		ip->i_gen = random() / 2 + 1;
1310 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1311 			ip->i_flag |= IN_MODIFIED;
1312 			DIP(ip, i_gen) = ip->i_gen;
1313 		}
1314 	}
1315 	/*
1316 	 * Ensure that uid and gid are correct. This is a temporary
1317 	 * fix until fsck has been changed to do the update.
1318 	 */
1319 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1320 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1321 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1322 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1323 	}						/* XXX */
1324 
1325 	*vpp = vp;
1326 	return (0);
1327 }
1328 
1329 /*
1330  * File handle to vnode
1331  *
1332  * Have to be really careful about stale file handles:
1333  * - check that the inode number is valid
1334  * - call ffs_vget() to get the locked inode
1335  * - check for an unallocated inode (i_mode == 0)
1336  * - check that the given client host has export rights and return
1337  *   those rights via. exflagsp and credanonp
1338  */
1339 int
1340 ffs_fhtovp(mp, fhp, vpp)
1341 	struct mount *mp;
1342 	struct fid *fhp;
1343 	struct vnode **vpp;
1344 {
1345 	struct ufid *ufhp;
1346 	struct fs *fs;
1347 
1348 	ufhp = (struct ufid *)fhp;
1349 	fs = VFSTOUFS(mp)->um_fs;
1350 	if (ufhp->ufid_ino < ROOTINO ||
1351 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1352 		return (ESTALE);
1353 	return (ufs_fhtovp(mp, ufhp, vpp));
1354 }
1355 
1356 /*
1357  * Vnode pointer to File handle
1358  */
1359 /* ARGSUSED */
1360 int
1361 ffs_vptofh(vp, fhp)
1362 	struct vnode *vp;
1363 	struct fid *fhp;
1364 {
1365 	struct inode *ip;
1366 	struct ufid *ufhp;
1367 
1368 	ip = VTOI(vp);
1369 	ufhp = (struct ufid *)fhp;
1370 	ufhp->ufid_len = sizeof(struct ufid);
1371 	ufhp->ufid_ino = ip->i_number;
1372 	ufhp->ufid_gen = ip->i_gen;
1373 	return (0);
1374 }
1375 
1376 /*
1377  * Initialize the filesystem.
1378  */
1379 static int
1380 ffs_init(vfsp)
1381 	struct vfsconf *vfsp;
1382 {
1383 
1384 	softdep_initialize();
1385 	return (ufs_init(vfsp));
1386 }
1387 
1388 /*
1389  * Undo the work of ffs_init().
1390  */
1391 static int
1392 ffs_uninit(vfsp)
1393 	struct vfsconf *vfsp;
1394 {
1395 	int ret;
1396 
1397 	ret = ufs_uninit(vfsp);
1398 	softdep_uninitialize();
1399 	return (ret);
1400 }
1401 
1402 /*
1403  * Write a superblock and associated information back to disk.
1404  */
1405 static int
1406 ffs_sbupdate(mp, waitfor)
1407 	struct ufsmount *mp;
1408 	int waitfor;
1409 {
1410 	struct fs *fs = mp->um_fs;
1411 	struct buf *bp;
1412 	int blks;
1413 	void *space;
1414 	int i, size, error, allerror = 0;
1415 
1416 	/*
1417 	 * First write back the summary information.
1418 	 */
1419 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1420 	space = fs->fs_csp;
1421 	for (i = 0; i < blks; i += fs->fs_frag) {
1422 		size = fs->fs_bsize;
1423 		if (i + fs->fs_frag > blks)
1424 			size = (blks - i) * fs->fs_fsize;
1425 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1426 		    size, 0, 0);
1427 		bcopy(space, bp->b_data, (u_int)size);
1428 		space = (char *)space + size;
1429 		if (waitfor != MNT_WAIT)
1430 			bawrite(bp);
1431 		else if ((error = bwrite(bp)) != 0)
1432 			allerror = error;
1433 	}
1434 	/*
1435 	 * Now write back the superblock itself. If any errors occurred
1436 	 * up to this point, then fail so that the superblock avoids
1437 	 * being written out as clean.
1438 	 */
1439 	if (allerror)
1440 		return (allerror);
1441 	bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_sblockloc),
1442 	    (int)fs->fs_sbsize, 0, 0);
1443 	fs->fs_fmod = 0;
1444 	fs->fs_time = time_second;
1445 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1446 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1447 	if (waitfor != MNT_WAIT)
1448 		bawrite(bp);
1449 	else if ((error = bwrite(bp)) != 0)
1450 		allerror = error;
1451 	return (allerror);
1452 }
1453 
1454 static int
1455 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1456 	int attrnamespace, const char *attrname, struct thread *td)
1457 {
1458 
1459 #ifdef UFS_EXTATTR
1460 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1461 	    attrname, td));
1462 #else
1463 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1464 	    attrname, td));
1465 #endif
1466 }
1467