xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision f257b7a54b4fe77840cf694314bdc401e00c31a1)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_mac.h"
36 #include "opt_quota.h"
37 #include "opt_ufs.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/namei.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/mac.h>
45 #include <sys/vnode.h>
46 #include <sys/mount.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/conf.h>
50 #include <sys/fcntl.h>
51 #include <sys/disk.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 
55 #include <ufs/ufs/extattr.h>
56 #include <ufs/ufs/quota.h>
57 #include <ufs/ufs/ufsmount.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/ufs_extern.h>
60 
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63 
64 #include <vm/vm.h>
65 #include <vm/uma.h>
66 #include <vm/vm_page.h>
67 
68 uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
69 
70 static int	ffs_sbupdate(struct ufsmount *, int);
71        int	ffs_reload(struct mount *,struct ucred *,struct thread *);
72 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
73 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
74 		    ufs2_daddr_t);
75 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
76 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
77 static vfs_init_t ffs_init;
78 static vfs_uninit_t ffs_uninit;
79 static vfs_extattrctl_t ffs_extattrctl;
80 
81 static struct vfsops ufs_vfsops = {
82 	.vfs_extattrctl =	ffs_extattrctl,
83 	.vfs_fhtovp =		ffs_fhtovp,
84 	.vfs_init =		ffs_init,
85 	.vfs_mount =		ffs_mount,
86 	.vfs_quotactl =		ufs_quotactl,
87 	.vfs_root =		ufs_root,
88 	.vfs_start =		ufs_start,
89 	.vfs_statfs =		ffs_statfs,
90 	.vfs_sync =		ffs_sync,
91 	.vfs_uninit =		ffs_uninit,
92 	.vfs_unmount =		ffs_unmount,
93 	.vfs_vget =		ffs_vget,
94 	.vfs_vptofh =		ffs_vptofh,
95 };
96 
97 VFS_SET(ufs_vfsops, ufs, 0);
98 
99 /*
100  * ffs_mount
101  *
102  * Called when mounting local physical media
103  *
104  * PARAMETERS:
105  *		mountroot
106  *			mp	mount point structure
107  *			path	NULL (flag for root mount!!!)
108  *			data	<unused>
109  *			ndp	<unused>
110  *			p	process (user credentials check [statfs])
111  *
112  *		mount
113  *			mp	mount point structure
114  *			path	path to mount point
115  *			data	pointer to argument struct in user space
116  *			ndp	mount point namei() return (used for
117  *				credentials on reload), reused to look
118  *				up block device.
119  *			p	process (user credentials check)
120  *
121  * RETURNS:	0	Success
122  *		!0	error number (errno.h)
123  *
124  * LOCK STATE:
125  *
126  *		ENTRY
127  *			mount point is locked
128  *		EXIT
129  *			mount point is locked
130  *
131  * NOTES:
132  *		A NULL path can be used for a flag since the mount
133  *		system call will fail with EFAULT in copyinstr in
134  *		namei() if it is a genuine NULL from the user.
135  */
136 int
137 ffs_mount(mp, path, data, ndp, td)
138         struct mount		*mp;	/* mount struct pointer*/
139         char			*path;	/* path to mount point*/
140         caddr_t			data;	/* arguments to FS specific mount*/
141         struct nameidata	*ndp;	/* mount point credentials*/
142         struct thread		*td;	/* process requesting mount*/
143 {
144 	size_t size;
145 	struct vnode *devvp;
146 	struct ufs_args args;
147 	struct ufsmount *ump = 0;
148 	struct fs *fs;
149 	int error, flags;
150 	mode_t accessmode;
151 
152 	if (uma_inode == NULL) {
153 		uma_inode = uma_zcreate("FFS inode",
154 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
155 		    UMA_ALIGN_PTR, 0);
156 		uma_ufs1 = uma_zcreate("FFS1 dinode",
157 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
158 		    UMA_ALIGN_PTR, 0);
159 		uma_ufs2 = uma_zcreate("FFS2 dinode",
160 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
161 		    UMA_ALIGN_PTR, 0);
162 	}
163 	/*
164 	 * Use NULL path to indicate we are mounting the root filesystem.
165 	 */
166 	if (path == NULL) {
167 		if ((error = bdevvp(rootdev, &rootvp))) {
168 			printf("ffs_mountroot: can't find rootvp\n");
169 			return (error);
170 		}
171 
172 		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
173 			return (error);
174 		(void)VFS_STATFS(mp, &mp->mnt_stat, td);
175 		return (0);
176 	}
177 
178 	/*
179 	 * Mounting non-root filesystem or updating a filesystem
180 	 */
181 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
182 		return (error);
183 
184 	/*
185 	 * If updating, check whether changing from read-only to
186 	 * read/write; if there is no device name, that's all we do.
187 	 */
188 	if (mp->mnt_flag & MNT_UPDATE) {
189 		ump = VFSTOUFS(mp);
190 		fs = ump->um_fs;
191 		devvp = ump->um_devvp;
192 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
193 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
194 				return (error);
195 			/*
196 			 * Flush any dirty data.
197 			 */
198 			if ((error = VFS_SYNC(mp, MNT_WAIT,
199 			    td->td_ucred, td)) != 0) {
200 				vn_finished_write(mp);
201 				return (error);
202 			}
203 			/*
204 			 * Check for and optionally get rid of files open
205 			 * for writing.
206 			 */
207 			flags = WRITECLOSE;
208 			if (mp->mnt_flag & MNT_FORCE)
209 				flags |= FORCECLOSE;
210 			if (mp->mnt_flag & MNT_SOFTDEP) {
211 				error = softdep_flushfiles(mp, flags, td);
212 			} else {
213 				error = ffs_flushfiles(mp, flags, td);
214 			}
215 			if (error) {
216 				vn_finished_write(mp);
217 				return (error);
218 			}
219 			if (fs->fs_pendingblocks != 0 ||
220 			    fs->fs_pendinginodes != 0) {
221 				printf("%s: %s: blocks %jd files %d\n",
222 				    fs->fs_fsmnt, "update error",
223 				    (intmax_t)fs->fs_pendingblocks,
224 				    fs->fs_pendinginodes);
225 				fs->fs_pendingblocks = 0;
226 				fs->fs_pendinginodes = 0;
227 			}
228 			fs->fs_ronly = 1;
229 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
230 				fs->fs_clean = 1;
231 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
232 				fs->fs_ronly = 0;
233 				fs->fs_clean = 0;
234 				vn_finished_write(mp);
235 				return (error);
236 			}
237 			vn_finished_write(mp);
238 		}
239 		if ((mp->mnt_flag & MNT_RELOAD) &&
240 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, td)) != 0)
241 			return (error);
242 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
243 			/*
244 			 * If upgrade to read-write by non-root, then verify
245 			 * that user has necessary permissions on the device.
246 			 */
247 			if (suser(td)) {
248 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
249 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
250 				    td->td_ucred, td)) != 0) {
251 					VOP_UNLOCK(devvp, 0, td);
252 					return (error);
253 				}
254 				VOP_UNLOCK(devvp, 0, td);
255 			}
256 			fs->fs_flags &= ~FS_UNCLEAN;
257 			if (fs->fs_clean == 0) {
258 				fs->fs_flags |= FS_UNCLEAN;
259 				if ((mp->mnt_flag & MNT_FORCE) ||
260 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
261 				     (fs->fs_flags & FS_DOSOFTDEP))) {
262 					printf("WARNING: %s was not %s\n",
263 					   fs->fs_fsmnt, "properly dismounted");
264 				} else {
265 					printf(
266 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
267 					    fs->fs_fsmnt);
268 					return (EPERM);
269 				}
270 			}
271 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
272 				return (error);
273 			fs->fs_ronly = 0;
274 			fs->fs_clean = 0;
275 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
276 				vn_finished_write(mp);
277 				return (error);
278 			}
279 			/* check to see if we need to start softdep */
280 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
281 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
282 				vn_finished_write(mp);
283 				return (error);
284 			}
285 			if (fs->fs_snapinum[0] != 0)
286 				ffs_snapshot_mount(mp);
287 			vn_finished_write(mp);
288 		}
289 		/*
290 		 * Soft updates is incompatible with "async",
291 		 * so if we are doing softupdates stop the user
292 		 * from setting the async flag in an update.
293 		 * Softdep_mount() clears it in an initial mount
294 		 * or ro->rw remount.
295 		 */
296 		if (mp->mnt_flag & MNT_SOFTDEP)
297 			mp->mnt_flag &= ~MNT_ASYNC;
298 		/*
299 		 * If not updating name, process export requests.
300 		 */
301 		if (args.fspec == 0)
302 			return (vfs_export(mp, &args.export));
303 		/*
304 		 * If this is a snapshot request, take the snapshot.
305 		 */
306 		if (mp->mnt_flag & MNT_SNAPSHOT)
307 			return (ffs_snapshot(mp, args.fspec));
308 	}
309 
310 	/*
311 	 * Not an update, or updating the name: look up the name
312 	 * and verify that it refers to a sensible disk device.
313 	 */
314 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
315 	if ((error = namei(ndp)) != 0)
316 		return (error);
317 	NDFREE(ndp, NDF_ONLY_PNBUF);
318 	devvp = ndp->ni_vp;
319 	if (!vn_isdisk(devvp, &error)) {
320 		vrele(devvp);
321 		return (error);
322 	}
323 
324 	/*
325 	 * If mount by non-root, then verify that user has necessary
326 	 * permissions on the device.
327 	 */
328 	if (suser(td)) {
329 		accessmode = VREAD;
330 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
331 			accessmode |= VWRITE;
332 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
333 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
334 			vput(devvp);
335 			return (error);
336 		}
337 		VOP_UNLOCK(devvp, 0, td);
338 	}
339 
340 	if (mp->mnt_flag & MNT_UPDATE) {
341 		/*
342 		 * Update only
343 		 *
344 		 * If it's not the same vnode, or at least the same device
345 		 * then it's not correct.
346 		 */
347 
348 		if (devvp != ump->um_devvp &&
349 		    devvp->v_rdev != ump->um_devvp->v_rdev)
350 			error = EINVAL;	/* needs translation */
351 		vrele(devvp);
352 		if (error)
353 			return (error);
354 	} else {
355 		/*
356 		 * New mount
357 		 *
358 		 * We need the name for the mount point (also used for
359 		 * "last mounted on") copied in. If an error occurs,
360 		 * the mount point is discarded by the upper level code.
361 		 * Note that vfs_mount() populates f_mntonname for us.
362 		 */
363 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
364 			vrele(devvp);
365 			return (error);
366 		}
367 	}
368 	/*
369 	 * Save "mounted from" device name info for mount point (NULL pad).
370 	 */
371 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
372 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
373 	/*
374 	 * Initialize filesystem stat information in mount struct.
375 	 */
376 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
377 	return (0);
378 }
379 
380 /*
381  * Reload all incore data for a filesystem (used after running fsck on
382  * the root filesystem and finding things to fix). The filesystem must
383  * be mounted read-only.
384  *
385  * Things to do to update the mount:
386  *	1) invalidate all cached meta-data.
387  *	2) re-read superblock from disk.
388  *	3) re-read summary information from disk.
389  *	4) invalidate all inactive vnodes.
390  *	5) invalidate all cached file data.
391  *	6) re-read inode data for all active vnodes.
392  */
393 int
394 ffs_reload(mp, cred, td)
395 	struct mount *mp;
396 	struct ucred *cred;
397 	struct thread *td;
398 {
399 	struct vnode *vp, *nvp, *devvp;
400 	struct inode *ip;
401 	void *space;
402 	struct buf *bp;
403 	struct fs *fs, *newfs;
404 	ufs2_daddr_t sblockloc;
405 	int i, blks, size, error;
406 	int32_t *lp;
407 
408 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
409 		return (EINVAL);
410 	/*
411 	 * Step 1: invalidate all cached meta-data.
412 	 */
413 	devvp = VFSTOUFS(mp)->um_devvp;
414 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
415 	if (vinvalbuf(devvp, 0, cred, td, 0, 0) != 0)
416 		panic("ffs_reload: dirty1");
417 	/*
418 	 * Only VMIO the backing device if the backing device is a real
419 	 * disk device.  See ffs_mountfs() for more details.
420 	 */
421 	if (vn_isdisk(devvp, NULL))
422 		vfs_object_create(devvp, td, td->td_ucred);
423 	VOP_UNLOCK(devvp, 0, td);
424 
425 	/*
426 	 * Step 2: re-read superblock from disk.
427 	 */
428 	fs = VFSTOUFS(mp)->um_fs;
429 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
430 	    NOCRED, &bp)) != 0)
431 		return (error);
432 	newfs = (struct fs *)bp->b_data;
433 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
434 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
435 	    newfs->fs_bsize > MAXBSIZE ||
436 	    newfs->fs_bsize < sizeof(struct fs)) {
437 			brelse(bp);
438 			return (EIO);		/* XXX needs translation */
439 	}
440 	/*
441 	 * Copy pointer fields back into superblock before copying in	XXX
442 	 * new superblock. These should really be in the ufsmount.	XXX
443 	 * Note that important parameters (eg fs_ncg) are unchanged.
444 	 */
445 	newfs->fs_csp = fs->fs_csp;
446 	newfs->fs_maxcluster = fs->fs_maxcluster;
447 	newfs->fs_contigdirs = fs->fs_contigdirs;
448 	newfs->fs_active = fs->fs_active;
449 	/* The file system is still read-only. */
450 	newfs->fs_ronly = 1;
451 	sblockloc = fs->fs_sblockloc;
452 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
453 	brelse(bp);
454 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
455 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
456 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
457 		printf("%s: reload pending error: blocks %jd files %d\n",
458 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
459 		    fs->fs_pendinginodes);
460 		fs->fs_pendingblocks = 0;
461 		fs->fs_pendinginodes = 0;
462 	}
463 
464 	/*
465 	 * Step 3: re-read summary information from disk.
466 	 */
467 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
468 	space = fs->fs_csp;
469 	for (i = 0; i < blks; i += fs->fs_frag) {
470 		size = fs->fs_bsize;
471 		if (i + fs->fs_frag > blks)
472 			size = (blks - i) * fs->fs_fsize;
473 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
474 		    NOCRED, &bp);
475 		if (error)
476 			return (error);
477 		bcopy(bp->b_data, space, (u_int)size);
478 		space = (char *)space + size;
479 		brelse(bp);
480 	}
481 	/*
482 	 * We no longer know anything about clusters per cylinder group.
483 	 */
484 	if (fs->fs_contigsumsize > 0) {
485 		lp = fs->fs_maxcluster;
486 		for (i = 0; i < fs->fs_ncg; i++)
487 			*lp++ = fs->fs_contigsumsize;
488 	}
489 
490 loop:
491 	MNT_ILOCK(mp);
492 	MNT_VNODE_FOREACH(vp, mp, nvp) {
493 		VI_LOCK(vp);
494 		if (vp->v_iflag & VI_XLOCK) {
495 			VI_UNLOCK(vp);
496 			continue;
497 		}
498 		MNT_IUNLOCK(mp);
499 		/*
500 		 * Step 4: invalidate all inactive vnodes.
501 		 */
502 		if (vp->v_usecount == 0) {
503 			vgonel(vp, td);
504 			goto loop;
505 		}
506 		/*
507 		 * Step 5: invalidate all cached file data.
508 		 */
509 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
510 			goto loop;
511 		}
512 		if (vinvalbuf(vp, 0, cred, td, 0, 0))
513 			panic("ffs_reload: dirty2");
514 		/*
515 		 * Step 6: re-read inode data for all active vnodes.
516 		 */
517 		ip = VTOI(vp);
518 		error =
519 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
520 		    (int)fs->fs_bsize, NOCRED, &bp);
521 		if (error) {
522 			VOP_UNLOCK(vp, 0, td);
523 			vrele(vp);
524 			return (error);
525 		}
526 		ffs_load_inode(bp, ip, fs, ip->i_number);
527 		ip->i_effnlink = ip->i_nlink;
528 		brelse(bp);
529 		VOP_UNLOCK(vp, 0, td);
530 		vrele(vp);
531 		MNT_ILOCK(mp);
532 	}
533 	MNT_IUNLOCK(mp);
534 	return (0);
535 }
536 
537 /*
538  * Possible superblock locations ordered from most to least likely.
539  */
540 static int sblock_try[] = SBLOCKSEARCH;
541 
542 /*
543  * Common code for mount and mountroot
544  */
545 static int
546 ffs_mountfs(devvp, mp, td)
547 	struct vnode *devvp;
548 	struct mount *mp;
549 	struct thread *td;
550 {
551 	struct ufsmount *ump;
552 	struct buf *bp;
553 	struct fs *fs;
554 	struct cdev *dev;
555 	void *space;
556 	ufs2_daddr_t sblockloc;
557 	int error, i, blks, size, ronly;
558 	int32_t *lp;
559 	struct ucred *cred;
560 	size_t strsize;
561 
562 	dev = devvp->v_rdev;
563 	cred = td ? td->td_ucred : NOCRED;
564 	/*
565 	 * Disallow multiple mounts of the same device.
566 	 * Disallow mounting of a device that is currently in use
567 	 * (except for root, which might share swap device for miniroot).
568 	 * Flush out any old buffers remaining from a previous use.
569 	 */
570 	error = vfs_mountedon(devvp);
571 	if (error)
572 		return (error);
573 	if (vcount(devvp) > 1 && devvp != rootvp)
574 		return (EBUSY);
575 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
576 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
577 	if (error) {
578 		VOP_UNLOCK(devvp, 0, td);
579 		return (error);
580 	}
581 
582 	/*
583 	 * Only VMIO the backing device if the backing device is a real
584 	 * disk device.
585 	 * Note that it is optional that the backing device be VMIOed.  This
586 	 * increases the opportunity for metadata caching.
587 	 */
588 	if (vn_isdisk(devvp, NULL))
589 		vfs_object_create(devvp, td, cred);
590 
591 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
592 	/*
593 	 * XXX: open the device with read and write access even if only
594 	 * read access is needed now.  Write access is needed if the
595 	 * filesystem is ever mounted read/write, and we don't change the
596 	 * access mode for remounts.
597 	 */
598 #ifdef notyet
599 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1);
600 #else
601 	error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1);
602 #endif
603 	VOP_UNLOCK(devvp, 0, td);
604 	if (error)
605 		return (error);
606 	if (devvp->v_rdev->si_iosize_max != 0)
607 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
608 	if (mp->mnt_iosize_max > MAXPHYS)
609 		mp->mnt_iosize_max = MAXPHYS;
610 
611 	bp = NULL;
612 	ump = NULL;
613 	fs = NULL;
614 	sblockloc = 0;
615 	/*
616 	 * Try reading the superblock in each of its possible locations.
617 	 */
618 	for (i = 0; sblock_try[i] != -1; i++) {
619 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
620 		    cred, &bp)) != 0)
621 			goto out;
622 		fs = (struct fs *)bp->b_data;
623 		sblockloc = sblock_try[i];
624 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
625 		     (fs->fs_magic == FS_UFS2_MAGIC &&
626 		      (fs->fs_sblockloc == sblockloc ||
627 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
628 		    fs->fs_bsize <= MAXBSIZE &&
629 		    fs->fs_bsize >= sizeof(struct fs))
630 			break;
631 		brelse(bp);
632 		bp = NULL;
633 	}
634 	if (sblock_try[i] == -1) {
635 		error = EINVAL;		/* XXX needs translation */
636 		goto out;
637 	}
638 	fs->fs_fmod = 0;
639 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
640 	fs->fs_flags &= ~FS_UNCLEAN;
641 	if (fs->fs_clean == 0) {
642 		fs->fs_flags |= FS_UNCLEAN;
643 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
644 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
645 		     (fs->fs_flags & FS_DOSOFTDEP))) {
646 			printf(
647 "WARNING: %s was not properly dismounted\n",
648 			    fs->fs_fsmnt);
649 		} else {
650 			printf(
651 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
652 			    fs->fs_fsmnt);
653 			error = EPERM;
654 			goto out;
655 		}
656 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
657 		    (mp->mnt_flag & MNT_FORCE)) {
658 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
659 			    (intmax_t)fs->fs_pendingblocks,
660 			    fs->fs_pendinginodes);
661 			fs->fs_pendingblocks = 0;
662 			fs->fs_pendinginodes = 0;
663 		}
664 	}
665 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
666 		printf("%s: mount pending error: blocks %jd files %d\n",
667 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
668 		    fs->fs_pendinginodes);
669 		fs->fs_pendingblocks = 0;
670 		fs->fs_pendinginodes = 0;
671 	}
672 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
673 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
674 	    M_WAITOK);
675 	if (fs->fs_magic == FS_UFS1_MAGIC) {
676 		ump->um_fstype = UFS1;
677 		ump->um_balloc = ffs_balloc_ufs1;
678 	} else {
679 		ump->um_fstype = UFS2;
680 		ump->um_balloc = ffs_balloc_ufs2;
681 	}
682 	ump->um_blkatoff = ffs_blkatoff;
683 	ump->um_truncate = ffs_truncate;
684 	ump->um_update = ffs_update;
685 	ump->um_valloc = ffs_valloc;
686 	ump->um_vfree = ffs_vfree;
687 	ump->um_ifree = ffs_ifree;
688 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
689 	if (fs->fs_sbsize < SBLOCKSIZE)
690 		bp->b_flags |= B_INVAL | B_NOCACHE;
691 	brelse(bp);
692 	bp = NULL;
693 	fs = ump->um_fs;
694 	ffs_oldfscompat_read(fs, ump, sblockloc);
695 	fs->fs_ronly = ronly;
696 	size = fs->fs_cssize;
697 	blks = howmany(size, fs->fs_fsize);
698 	if (fs->fs_contigsumsize > 0)
699 		size += fs->fs_ncg * sizeof(int32_t);
700 	size += fs->fs_ncg * sizeof(u_int8_t);
701 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
702 	fs->fs_csp = space;
703 	for (i = 0; i < blks; i += fs->fs_frag) {
704 		size = fs->fs_bsize;
705 		if (i + fs->fs_frag > blks)
706 			size = (blks - i) * fs->fs_fsize;
707 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
708 		    cred, &bp)) != 0) {
709 			free(fs->fs_csp, M_UFSMNT);
710 			goto out;
711 		}
712 		bcopy(bp->b_data, space, (u_int)size);
713 		space = (char *)space + size;
714 		brelse(bp);
715 		bp = NULL;
716 	}
717 	if (fs->fs_contigsumsize > 0) {
718 		fs->fs_maxcluster = lp = space;
719 		for (i = 0; i < fs->fs_ncg; i++)
720 			*lp++ = fs->fs_contigsumsize;
721 		space = lp;
722 	}
723 	size = fs->fs_ncg * sizeof(u_int8_t);
724 	fs->fs_contigdirs = (u_int8_t *)space;
725 	bzero(fs->fs_contigdirs, size);
726 	fs->fs_active = NULL;
727 	mp->mnt_data = (qaddr_t)ump;
728 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
729 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
730 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
731 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
732 		vfs_getnewfsid(mp);
733 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
734 	mp->mnt_flag |= MNT_LOCAL;
735 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
736 #ifdef MAC
737 		mp->mnt_flag |= MNT_MULTILABEL;
738 #else
739 		printf(
740 "WARNING: %s: multilabel flag on fs but no MAC support\n",
741 		    fs->fs_fsmnt);
742 #endif
743 	if ((fs->fs_flags & FS_ACLS) != 0)
744 #ifdef UFS_ACL
745 		mp->mnt_flag |= MNT_ACLS;
746 #else
747 		printf(
748 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
749 		    fs->fs_fsmnt);
750 #endif
751 	ump->um_mountp = mp;
752 	ump->um_dev = dev;
753 	ump->um_devvp = devvp;
754 	ump->um_nindir = fs->fs_nindir;
755 	ump->um_bptrtodb = fs->fs_fsbtodb;
756 	ump->um_seqinc = fs->fs_frag;
757 	for (i = 0; i < MAXQUOTAS; i++)
758 		ump->um_quotas[i] = NULLVP;
759 #ifdef UFS_EXTATTR
760 	ufs_extattr_uepm_init(&ump->um_extattr);
761 #endif
762 	devvp->v_rdev->si_mountpoint = mp;
763 
764 	/*
765 	 * Set FS local "last mounted on" information (NULL pad)
766 	 */
767 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
768 			fs->fs_fsmnt,			/* copy area*/
769 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
770 			&strsize);			/* real size*/
771 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
772 
773 	if( mp->mnt_flag & MNT_ROOTFS) {
774 		/*
775 		 * Root mount; update timestamp in mount structure.
776 		 * this will be used by the common root mount code
777 		 * to update the system clock.
778 		 */
779 		mp->mnt_time = fs->fs_time;
780 	}
781 
782 	if (ronly == 0) {
783 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
784 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
785 			free(fs->fs_csp, M_UFSMNT);
786 			goto out;
787 		}
788 		if (fs->fs_snapinum[0] != 0)
789 			ffs_snapshot_mount(mp);
790 		fs->fs_fmod = 1;
791 		fs->fs_clean = 0;
792 		(void) ffs_sbupdate(ump, MNT_WAIT);
793 	}
794 #ifdef UFS_EXTATTR
795 #ifdef UFS_EXTATTR_AUTOSTART
796 	/*
797 	 *
798 	 * Auto-starting does the following:
799 	 *	- check for /.attribute in the fs, and extattr_start if so
800 	 *	- for each file in .attribute, enable that file with
801 	 * 	  an attribute of the same name.
802 	 * Not clear how to report errors -- probably eat them.
803 	 * This would all happen while the filesystem was busy/not
804 	 * available, so would effectively be "atomic".
805 	 */
806 	(void) ufs_extattr_autostart(mp, td);
807 #endif /* !UFS_EXTATTR_AUTOSTART */
808 #endif /* !UFS_EXTATTR */
809 	return (0);
810 out:
811 	devvp->v_rdev->si_mountpoint = NULL;
812 	if (bp)
813 		brelse(bp);
814 	/* XXX: see comment above VOP_OPEN. */
815 #ifdef notyet
816 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, cred, td);
817 #else
818 	(void)VOP_CLOSE(devvp, FREAD | FWRITE, cred, td);
819 #endif
820 	if (ump) {
821 		free(ump->um_fs, M_UFSMNT);
822 		free(ump, M_UFSMNT);
823 		mp->mnt_data = (qaddr_t)0;
824 	}
825 	return (error);
826 }
827 
828 #include <sys/sysctl.h>
829 int bigcgs = 0;
830 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
831 
832 /*
833  * Sanity checks for loading old filesystem superblocks.
834  * See ffs_oldfscompat_write below for unwound actions.
835  *
836  * XXX - Parts get retired eventually.
837  * Unfortunately new bits get added.
838  */
839 static void
840 ffs_oldfscompat_read(fs, ump, sblockloc)
841 	struct fs *fs;
842 	struct ufsmount *ump;
843 	ufs2_daddr_t sblockloc;
844 {
845 	off_t maxfilesize;
846 
847 	/*
848 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
849 	 */
850 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
851 		fs->fs_flags = fs->fs_old_flags;
852 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
853 		fs->fs_sblockloc = sblockloc;
854 	}
855 	/*
856 	 * If not yet done, update UFS1 superblock with new wider fields.
857 	 */
858 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
859 		fs->fs_maxbsize = fs->fs_bsize;
860 		fs->fs_time = fs->fs_old_time;
861 		fs->fs_size = fs->fs_old_size;
862 		fs->fs_dsize = fs->fs_old_dsize;
863 		fs->fs_csaddr = fs->fs_old_csaddr;
864 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
865 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
866 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
867 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
868 	}
869 	if (fs->fs_magic == FS_UFS1_MAGIC &&
870 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
871 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
872 		fs->fs_qbmask = ~fs->fs_bmask;
873 		fs->fs_qfmask = ~fs->fs_fmask;
874 	}
875 	if (fs->fs_magic == FS_UFS1_MAGIC) {
876 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
877 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
878 		if (fs->fs_maxfilesize > maxfilesize)
879 			fs->fs_maxfilesize = maxfilesize;
880 	}
881 	/* Compatibility for old filesystems */
882 	if (fs->fs_avgfilesize <= 0)
883 		fs->fs_avgfilesize = AVFILESIZ;
884 	if (fs->fs_avgfpdir <= 0)
885 		fs->fs_avgfpdir = AFPDIR;
886 	if (bigcgs) {
887 		fs->fs_save_cgsize = fs->fs_cgsize;
888 		fs->fs_cgsize = fs->fs_bsize;
889 	}
890 }
891 
892 /*
893  * Unwinding superblock updates for old filesystems.
894  * See ffs_oldfscompat_read above for details.
895  *
896  * XXX - Parts get retired eventually.
897  * Unfortunately new bits get added.
898  */
899 static void
900 ffs_oldfscompat_write(fs, ump)
901 	struct fs *fs;
902 	struct ufsmount *ump;
903 {
904 
905 	/*
906 	 * Copy back UFS2 updated fields that UFS1 inspects.
907 	 */
908 	if (fs->fs_magic == FS_UFS1_MAGIC) {
909 		fs->fs_old_time = fs->fs_time;
910 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
911 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
912 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
913 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
914 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
915 	}
916 	if (bigcgs) {
917 		fs->fs_cgsize = fs->fs_save_cgsize;
918 		fs->fs_save_cgsize = 0;
919 	}
920 }
921 
922 /*
923  * unmount system call
924  */
925 int
926 ffs_unmount(mp, mntflags, td)
927 	struct mount *mp;
928 	int mntflags;
929 	struct thread *td;
930 {
931 	struct ufsmount *ump = VFSTOUFS(mp);
932 	struct fs *fs;
933 	int error, flags;
934 
935 	flags = 0;
936 	if (mntflags & MNT_FORCE) {
937 		flags |= FORCECLOSE;
938 	}
939 #ifdef UFS_EXTATTR
940 	if ((error = ufs_extattr_stop(mp, td))) {
941 		if (error != EOPNOTSUPP)
942 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
943 			    error);
944 	} else {
945 		ufs_extattr_uepm_destroy(&ump->um_extattr);
946 	}
947 #endif
948 	if (mp->mnt_flag & MNT_SOFTDEP) {
949 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
950 			return (error);
951 	} else {
952 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
953 			return (error);
954 	}
955 	fs = ump->um_fs;
956 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
957 		printf("%s: unmount pending error: blocks %jd files %d\n",
958 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
959 		    fs->fs_pendinginodes);
960 		fs->fs_pendingblocks = 0;
961 		fs->fs_pendinginodes = 0;
962 	}
963 	if (fs->fs_ronly == 0) {
964 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
965 		error = ffs_sbupdate(ump, MNT_WAIT);
966 		if (error) {
967 			fs->fs_clean = 0;
968 			return (error);
969 		}
970 	}
971 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
972 
973 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
974 	/* XXX: see comment above VOP_OPEN. */
975 #ifdef notyet
976 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
977 	    NOCRED, td);
978 #else
979 	error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td);
980 #endif
981 	vrele(ump->um_devvp);
982 	free(fs->fs_csp, M_UFSMNT);
983 	free(fs, M_UFSMNT);
984 	free(ump, M_UFSMNT);
985 	mp->mnt_data = (qaddr_t)0;
986 	mp->mnt_flag &= ~MNT_LOCAL;
987 	return (error);
988 }
989 
990 /*
991  * Flush out all the files in a filesystem.
992  */
993 int
994 ffs_flushfiles(mp, flags, td)
995 	struct mount *mp;
996 	int flags;
997 	struct thread *td;
998 {
999 	struct ufsmount *ump;
1000 	int error;
1001 
1002 	ump = VFSTOUFS(mp);
1003 #ifdef QUOTA
1004 	if (mp->mnt_flag & MNT_QUOTA) {
1005 		int i;
1006 		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1007 		if (error)
1008 			return (error);
1009 		for (i = 0; i < MAXQUOTAS; i++) {
1010 			if (ump->um_quotas[i] == NULLVP)
1011 				continue;
1012 			quotaoff(td, mp, i);
1013 		}
1014 		/*
1015 		 * Here we fall through to vflush again to ensure
1016 		 * that we have gotten rid of all the system vnodes.
1017 		 */
1018 	}
1019 #endif
1020 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1021 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1022 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1023 			return (error);
1024 		ffs_snapshot_unmount(mp);
1025 		/*
1026 		 * Here we fall through to vflush again to ensure
1027 		 * that we have gotten rid of all the system vnodes.
1028 		 */
1029 	}
1030         /*
1031 	 * Flush all the files.
1032 	 */
1033 	if ((error = vflush(mp, 0, flags, td)) != 0)
1034 		return (error);
1035 	/*
1036 	 * Flush filesystem metadata.
1037 	 */
1038 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1039 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1040 	VOP_UNLOCK(ump->um_devvp, 0, td);
1041 	return (error);
1042 }
1043 
1044 /*
1045  * Get filesystem statistics.
1046  */
1047 int
1048 ffs_statfs(mp, sbp, td)
1049 	struct mount *mp;
1050 	struct statfs *sbp;
1051 	struct thread *td;
1052 {
1053 	struct ufsmount *ump;
1054 	struct fs *fs;
1055 
1056 	ump = VFSTOUFS(mp);
1057 	fs = ump->um_fs;
1058 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1059 		panic("ffs_statfs");
1060 	sbp->f_version = STATFS_VERSION;
1061 	sbp->f_bsize = fs->fs_fsize;
1062 	sbp->f_iosize = fs->fs_bsize;
1063 	sbp->f_blocks = fs->fs_dsize;
1064 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1065 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1066 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1067 	    dbtofsb(fs, fs->fs_pendingblocks);
1068 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1069 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1070 	sbp->f_namemax = NAME_MAX;
1071 	if (sbp != &mp->mnt_stat) {
1072 		sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1073 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1074 		sbp->f_syncwrites = mp->mnt_stat.f_syncwrites;
1075 		sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites;
1076 		sbp->f_syncreads = mp->mnt_stat.f_syncreads;
1077 		sbp->f_asyncreads = mp->mnt_stat.f_asyncreads;
1078 		sbp->f_owner = mp->mnt_stat.f_owner;
1079 		sbp->f_fsid = mp->mnt_stat.f_fsid;
1080 		bcopy((caddr_t)mp->mnt_stat.f_fstypename,
1081 			(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1082 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1083 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1084 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1085 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1086 	}
1087 	return (0);
1088 }
1089 
1090 /*
1091  * Go through the disk queues to initiate sandbagged IO;
1092  * go through the inodes to write those that have been modified;
1093  * initiate the writing of the super block if it has been modified.
1094  *
1095  * Note: we are always called with the filesystem marked `MPBUSY'.
1096  */
1097 int
1098 ffs_sync(mp, waitfor, cred, td)
1099 	struct mount *mp;
1100 	int waitfor;
1101 	struct ucred *cred;
1102 	struct thread *td;
1103 {
1104 	struct vnode *nvp, *vp, *devvp;
1105 	struct inode *ip;
1106 	struct ufsmount *ump = VFSTOUFS(mp);
1107 	struct fs *fs;
1108 	int error, count, wait, lockreq, allerror = 0;
1109 
1110 	fs = ump->um_fs;
1111 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1112 		printf("fs = %s\n", fs->fs_fsmnt);
1113 		panic("ffs_sync: rofs mod");
1114 	}
1115 	/*
1116 	 * Write back each (modified) inode.
1117 	 */
1118 	wait = 0;
1119 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1120 	if (waitfor == MNT_WAIT) {
1121 		wait = 1;
1122 		lockreq = LK_EXCLUSIVE;
1123 	}
1124 	lockreq |= LK_INTERLOCK;
1125 	MNT_ILOCK(mp);
1126 loop:
1127 	MNT_VNODE_FOREACH(vp, mp, nvp) {
1128 		/*
1129 		 * Depend on the mntvnode_slock to keep things stable enough
1130 		 * for a quick test.  Since there might be hundreds of
1131 		 * thousands of vnodes, we cannot afford even a subroutine
1132 		 * call unless there's a good chance that we have work to do.
1133 		 */
1134 		VI_LOCK(vp);
1135 		if (vp->v_iflag & VI_XLOCK) {
1136 			VI_UNLOCK(vp);
1137 			continue;
1138 		}
1139 		ip = VTOI(vp);
1140 		if (vp->v_type == VNON || ((ip->i_flag &
1141 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1142 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1143 			VI_UNLOCK(vp);
1144 			continue;
1145 		}
1146 		MNT_IUNLOCK(mp);
1147 		if ((error = vget(vp, lockreq, td)) != 0) {
1148 			MNT_ILOCK(mp);
1149 			if (error == ENOENT)
1150 				goto loop;
1151 			continue;
1152 		}
1153 		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1154 			allerror = error;
1155 		VOP_UNLOCK(vp, 0, td);
1156 		vrele(vp);
1157 		MNT_ILOCK(mp);
1158 	}
1159 	MNT_IUNLOCK(mp);
1160 	/*
1161 	 * Force stale filesystem control information to be flushed.
1162 	 */
1163 	if (waitfor == MNT_WAIT) {
1164 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1165 			allerror = error;
1166 		/* Flushed work items may create new vnodes to clean */
1167 		if (allerror == 0 && count) {
1168 			MNT_ILOCK(mp);
1169 			goto loop;
1170 		}
1171 	}
1172 #ifdef QUOTA
1173 	qsync(mp);
1174 #endif
1175 	devvp = ump->um_devvp;
1176 	VI_LOCK(devvp);
1177 	if (waitfor != MNT_LAZY &&
1178 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1179 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1180 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1181 			allerror = error;
1182 		VOP_UNLOCK(devvp, 0, td);
1183 		if (allerror == 0 && waitfor == MNT_WAIT) {
1184 			MNT_ILOCK(mp);
1185 			goto loop;
1186 		}
1187 	} else
1188 		VI_UNLOCK(devvp);
1189 	/*
1190 	 * Write back modified superblock.
1191 	 */
1192 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1193 		allerror = error;
1194 	return (allerror);
1195 }
1196 
1197 int
1198 ffs_vget(mp, ino, flags, vpp)
1199 	struct mount *mp;
1200 	ino_t ino;
1201 	int flags;
1202 	struct vnode **vpp;
1203 {
1204 	struct thread *td = curthread; 		/* XXX */
1205 	struct fs *fs;
1206 	struct inode *ip;
1207 	struct ufsmount *ump;
1208 	struct buf *bp;
1209 	struct vnode *vp;
1210 	struct cdev *dev;
1211 	int error;
1212 
1213 	ump = VFSTOUFS(mp);
1214 	dev = ump->um_dev;
1215 
1216 	/*
1217 	 * We do not lock vnode creation as it is believed to be too
1218 	 * expensive for such rare case as simultaneous creation of vnode
1219 	 * for same ino by different processes. We just allow them to race
1220 	 * and check later to decide who wins. Let the race begin!
1221 	 */
1222 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1223 		return (error);
1224 	if (*vpp != NULL)
1225 		return (0);
1226 
1227 	/*
1228 	 * If this MALLOC() is performed after the getnewvnode()
1229 	 * it might block, leaving a vnode with a NULL v_data to be
1230 	 * found by ffs_sync() if a sync happens to fire right then,
1231 	 * which will cause a panic because ffs_sync() blindly
1232 	 * dereferences vp->v_data (as well it should).
1233 	 */
1234 	ip = uma_zalloc(uma_inode, M_WAITOK);
1235 
1236 	/* Allocate a new vnode/inode. */
1237 	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1238 	if (error) {
1239 		*vpp = NULL;
1240 		uma_zfree(uma_inode, ip);
1241 		return (error);
1242 	}
1243 	bzero((caddr_t)ip, sizeof(struct inode));
1244 	/*
1245 	 * FFS supports recursive locking.
1246 	 */
1247 	fs = ump->um_fs;
1248 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1249 	vp->v_data = ip;
1250 	vp->v_bsize = fs->fs_bsize;
1251 	ip->i_vnode = vp;
1252 	ip->i_ump = ump;
1253 	ip->i_fs = fs;
1254 	ip->i_dev = dev;
1255 	ip->i_number = ino;
1256 #ifdef QUOTA
1257 	{
1258 		int i;
1259 		for (i = 0; i < MAXQUOTAS; i++)
1260 			ip->i_dquot[i] = NODQUOT;
1261 	}
1262 #endif
1263 	/*
1264 	 * Exclusively lock the vnode before adding to hash. Note, that we
1265 	 * must not release nor downgrade the lock (despite flags argument
1266 	 * says) till it is fully initialized.
1267 	 */
1268 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1269 
1270 	/*
1271 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1272 	 * duplicate of vnode being created and add it to the hash. If a
1273 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1274 	 */
1275 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1276 		vput(vp);
1277 		*vpp = NULL;
1278 		return (error);
1279 	}
1280 
1281 	/* We lost the race, then throw away our vnode and return existing */
1282 	if (*vpp != NULL) {
1283 		vput(vp);
1284 		return (0);
1285 	}
1286 
1287 	/* Read in the disk contents for the inode, copy into the inode. */
1288 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1289 	    (int)fs->fs_bsize, NOCRED, &bp);
1290 	if (error) {
1291 		/*
1292 		 * The inode does not contain anything useful, so it would
1293 		 * be misleading to leave it on its hash chain. With mode
1294 		 * still zero, it will be unlinked and returned to the free
1295 		 * list by vput().
1296 		 */
1297 		brelse(bp);
1298 		vput(vp);
1299 		*vpp = NULL;
1300 		return (error);
1301 	}
1302 	if (ip->i_ump->um_fstype == UFS1)
1303 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1304 	else
1305 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1306 	ffs_load_inode(bp, ip, fs, ino);
1307 	if (DOINGSOFTDEP(vp))
1308 		softdep_load_inodeblock(ip);
1309 	else
1310 		ip->i_effnlink = ip->i_nlink;
1311 	bqrelse(bp);
1312 
1313 	/*
1314 	 * Initialize the vnode from the inode, check for aliases.
1315 	 * Note that the underlying vnode may have changed.
1316 	 */
1317 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1318 	if (error) {
1319 		vput(vp);
1320 		*vpp = NULL;
1321 		return (error);
1322 	}
1323 	/*
1324 	 * Finish inode initialization.
1325 	 */
1326 	VREF(ip->i_devvp);
1327 	/*
1328 	 * Set up a generation number for this inode if it does not
1329 	 * already have one. This should only happen on old filesystems.
1330 	 */
1331 	if (ip->i_gen == 0) {
1332 		ip->i_gen = arc4random() / 2 + 1;
1333 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1334 			ip->i_flag |= IN_MODIFIED;
1335 			DIP(ip, i_gen) = ip->i_gen;
1336 		}
1337 	}
1338 	/*
1339 	 * Ensure that uid and gid are correct. This is a temporary
1340 	 * fix until fsck has been changed to do the update.
1341 	 */
1342 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1343 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1344 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1345 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1346 	}						/* XXX */
1347 
1348 #ifdef MAC
1349 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1350 		/*
1351 		 * If this vnode is already allocated, and we're running
1352 		 * multi-label, attempt to perform a label association
1353 		 * from the extended attributes on the inode.
1354 		 */
1355 		error = mac_associate_vnode_extattr(mp, vp);
1356 		if (error) {
1357 			/* ufs_inactive will release ip->i_devvp ref. */
1358 			vput(vp);
1359 			*vpp = NULL;
1360 			return (error);
1361 		}
1362 	}
1363 #endif
1364 
1365 	*vpp = vp;
1366 	return (0);
1367 }
1368 
1369 /*
1370  * File handle to vnode
1371  *
1372  * Have to be really careful about stale file handles:
1373  * - check that the inode number is valid
1374  * - call ffs_vget() to get the locked inode
1375  * - check for an unallocated inode (i_mode == 0)
1376  * - check that the given client host has export rights and return
1377  *   those rights via. exflagsp and credanonp
1378  */
1379 int
1380 ffs_fhtovp(mp, fhp, vpp)
1381 	struct mount *mp;
1382 	struct fid *fhp;
1383 	struct vnode **vpp;
1384 {
1385 	struct ufid *ufhp;
1386 	struct fs *fs;
1387 
1388 	ufhp = (struct ufid *)fhp;
1389 	fs = VFSTOUFS(mp)->um_fs;
1390 	if (ufhp->ufid_ino < ROOTINO ||
1391 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1392 		return (ESTALE);
1393 	return (ufs_fhtovp(mp, ufhp, vpp));
1394 }
1395 
1396 /*
1397  * Vnode pointer to File handle
1398  */
1399 /* ARGSUSED */
1400 int
1401 ffs_vptofh(vp, fhp)
1402 	struct vnode *vp;
1403 	struct fid *fhp;
1404 {
1405 	struct inode *ip;
1406 	struct ufid *ufhp;
1407 
1408 	ip = VTOI(vp);
1409 	ufhp = (struct ufid *)fhp;
1410 	ufhp->ufid_len = sizeof(struct ufid);
1411 	ufhp->ufid_ino = ip->i_number;
1412 	ufhp->ufid_gen = ip->i_gen;
1413 	return (0);
1414 }
1415 
1416 /*
1417  * Initialize the filesystem.
1418  */
1419 static int
1420 ffs_init(vfsp)
1421 	struct vfsconf *vfsp;
1422 {
1423 
1424 	softdep_initialize();
1425 	return (ufs_init(vfsp));
1426 }
1427 
1428 /*
1429  * Undo the work of ffs_init().
1430  */
1431 static int
1432 ffs_uninit(vfsp)
1433 	struct vfsconf *vfsp;
1434 {
1435 	int ret;
1436 
1437 	ret = ufs_uninit(vfsp);
1438 	softdep_uninitialize();
1439 	return (ret);
1440 }
1441 
1442 /*
1443  * Write a superblock and associated information back to disk.
1444  */
1445 static int
1446 ffs_sbupdate(mp, waitfor)
1447 	struct ufsmount *mp;
1448 	int waitfor;
1449 {
1450 	struct fs *fs = mp->um_fs;
1451 	struct buf *bp;
1452 	int blks;
1453 	void *space;
1454 	int i, size, error, allerror = 0;
1455 
1456 	if (fs->fs_ronly == 1 &&
1457 	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1458 	    (MNT_RDONLY | MNT_UPDATE))
1459 		panic("ffs_sbupdate: write read-only filesystem");
1460 	/*
1461 	 * First write back the summary information.
1462 	 */
1463 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1464 	space = fs->fs_csp;
1465 	for (i = 0; i < blks; i += fs->fs_frag) {
1466 		size = fs->fs_bsize;
1467 		if (i + fs->fs_frag > blks)
1468 			size = (blks - i) * fs->fs_fsize;
1469 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1470 		    size, 0, 0, 0);
1471 		bcopy(space, bp->b_data, (u_int)size);
1472 		space = (char *)space + size;
1473 		if (waitfor != MNT_WAIT)
1474 			bawrite(bp);
1475 		else if ((error = bwrite(bp)) != 0)
1476 			allerror = error;
1477 	}
1478 	/*
1479 	 * Now write back the superblock itself. If any errors occurred
1480 	 * up to this point, then fail so that the superblock avoids
1481 	 * being written out as clean.
1482 	 */
1483 	if (allerror)
1484 		return (allerror);
1485 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1486 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1487 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1488 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1489 		fs->fs_sblockloc = SBLOCK_UFS1;
1490 	}
1491 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1492 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1493 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1494 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1495 		fs->fs_sblockloc = SBLOCK_UFS2;
1496 	}
1497 	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1498 	    0, 0, 0);
1499 	fs->fs_fmod = 0;
1500 	fs->fs_time = time_second;
1501 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1502 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1503 	if (waitfor != MNT_WAIT)
1504 		bawrite(bp);
1505 	else if ((error = bwrite(bp)) != 0)
1506 		allerror = error;
1507 	return (allerror);
1508 }
1509 
1510 static int
1511 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1512 	int attrnamespace, const char *attrname, struct thread *td)
1513 {
1514 
1515 #ifdef UFS_EXTATTR
1516 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1517 	    attrname, td));
1518 #else
1519 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1520 	    attrname, td));
1521 #endif
1522 }
1523 
1524 static void
1525 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1526 {
1527 
1528 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1529 		uma_zfree(uma_ufs1, ip->i_din1);
1530 	else if (ip->i_din2 != NULL)
1531 		uma_zfree(uma_ufs2, ip->i_din2);
1532 	uma_zfree(uma_inode, ip);
1533 }
1534