xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 8ab2f5ecc596131f6ca790d6ae35540c06ed7985)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_mac.h"
36 #include "opt_quota.h"
37 #include "opt_ufs.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/namei.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/mac.h>
45 #include <sys/vnode.h>
46 #include <sys/mount.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/conf.h>
50 #include <sys/fcntl.h>
51 #include <sys/disk.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 
55 #include <ufs/ufs/extattr.h>
56 #include <ufs/ufs/quota.h>
57 #include <ufs/ufs/ufsmount.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/ufs_extern.h>
60 
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63 
64 #include <vm/vm.h>
65 #include <vm/uma.h>
66 #include <vm/vm_page.h>
67 
68 uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
69 
70 static int	ffs_sbupdate(struct ufsmount *, int);
71 static int	ffs_reload(struct mount *, struct thread *);
72 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
73 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
74 		    ufs2_daddr_t);
75 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
76 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
77 static vfs_init_t ffs_init;
78 static vfs_uninit_t ffs_uninit;
79 static vfs_extattrctl_t ffs_extattrctl;
80 static vfs_omount_t ffs_omount;
81 
82 static struct vfsops ufs_vfsops = {
83 	.vfs_extattrctl =	ffs_extattrctl,
84 	.vfs_fhtovp =		ffs_fhtovp,
85 	.vfs_init =		ffs_init,
86 	.vfs_omount =		ffs_omount,
87 	.vfs_quotactl =		ufs_quotactl,
88 	.vfs_root =		ufs_root,
89 	.vfs_start =		ufs_start,
90 	.vfs_statfs =		ffs_statfs,
91 	.vfs_sync =		ffs_sync,
92 	.vfs_uninit =		ffs_uninit,
93 	.vfs_unmount =		ffs_unmount,
94 	.vfs_vget =		ffs_vget,
95 	.vfs_vptofh =		ffs_vptofh,
96 };
97 
98 VFS_SET(ufs_vfsops, ufs, 0);
99 
100 /*
101  * ffs_omount
102  *
103  * Called when mounting local physical media
104  *
105  * PARAMETERS:
106  *		mountroot
107  *			mp	mount point structure
108  *			path	NULL (flag for root mount!!!)
109  *			data	<unused>
110  *			ndp	<unused>
111  *			p	process (user credentials check [statfs])
112  *
113  *		mount
114  *			mp	mount point structure
115  *			path	path to mount point
116  *			data	pointer to argument struct in user space
117  *			ndp	mount point namei() return (used for
118  *				credentials on reload), reused to look
119  *				up block device.
120  *			p	process (user credentials check)
121  *
122  * RETURNS:	0	Success
123  *		!0	error number (errno.h)
124  *
125  * LOCK STATE:
126  *
127  *		ENTRY
128  *			mount point is locked
129  *		EXIT
130  *			mount point is locked
131  *
132  * NOTES:
133  *		A NULL path can be used for a flag since the mount
134  *		system call will fail with EFAULT in copyinstr in
135  *		namei() if it is a genuine NULL from the user.
136  */
137 static int
138 ffs_omount(struct mount *mp, char *path, caddr_t data, struct thread *td)
139 {
140 	size_t size;
141 	struct vnode *devvp, *rootvp;
142 	struct ufs_args args;
143 	struct ufsmount *ump = 0;
144 	struct fs *fs;
145 	int error, flags;
146 	mode_t accessmode;
147 	struct nameidata ndp;
148 
149 	if (uma_inode == NULL) {
150 		uma_inode = uma_zcreate("FFS inode",
151 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
152 		    UMA_ALIGN_PTR, 0);
153 		uma_ufs1 = uma_zcreate("FFS1 dinode",
154 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
155 		    UMA_ALIGN_PTR, 0);
156 		uma_ufs2 = uma_zcreate("FFS2 dinode",
157 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
158 		    UMA_ALIGN_PTR, 0);
159 	}
160 	/*
161 	 * Use NULL path to indicate we are mounting the root filesystem.
162 	 */
163 	if (path == NULL) {
164 		if ((error = bdevvp(rootdev, &rootvp))) {
165 			printf("ffs_mountroot: can't find rootvp\n");
166 			return (error);
167 		}
168 
169 		if ((error = ffs_mountfs(rootvp, mp, td)) != 0)
170 			return (error);
171 		return (0);
172 	}
173 
174 	/*
175 	 * Mounting non-root filesystem or updating a filesystem
176 	 */
177 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
178 		return (error);
179 
180 	/*
181 	 * If updating, check whether changing from read-only to
182 	 * read/write; if there is no device name, that's all we do.
183 	 */
184 	if (mp->mnt_flag & MNT_UPDATE) {
185 		ump = VFSTOUFS(mp);
186 		fs = ump->um_fs;
187 		devvp = ump->um_devvp;
188 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
189 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
190 				return (error);
191 			/*
192 			 * Flush any dirty data.
193 			 */
194 			if ((error = VFS_SYNC(mp, MNT_WAIT,
195 			    td->td_ucred, td)) != 0) {
196 				vn_finished_write(mp);
197 				return (error);
198 			}
199 			/*
200 			 * Check for and optionally get rid of files open
201 			 * for writing.
202 			 */
203 			flags = WRITECLOSE;
204 			if (mp->mnt_flag & MNT_FORCE)
205 				flags |= FORCECLOSE;
206 			if (mp->mnt_flag & MNT_SOFTDEP) {
207 				error = softdep_flushfiles(mp, flags, td);
208 			} else {
209 				error = ffs_flushfiles(mp, flags, td);
210 			}
211 			if (error) {
212 				vn_finished_write(mp);
213 				return (error);
214 			}
215 			if (fs->fs_pendingblocks != 0 ||
216 			    fs->fs_pendinginodes != 0) {
217 				printf("%s: %s: blocks %jd files %d\n",
218 				    fs->fs_fsmnt, "update error",
219 				    (intmax_t)fs->fs_pendingblocks,
220 				    fs->fs_pendinginodes);
221 				fs->fs_pendingblocks = 0;
222 				fs->fs_pendinginodes = 0;
223 			}
224 			fs->fs_ronly = 1;
225 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
226 				fs->fs_clean = 1;
227 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
228 				fs->fs_ronly = 0;
229 				fs->fs_clean = 0;
230 				vn_finished_write(mp);
231 				return (error);
232 			}
233 			vn_finished_write(mp);
234 		}
235 		if ((mp->mnt_flag & MNT_RELOAD) &&
236 		    (error = ffs_reload(mp, td)) != 0)
237 			return (error);
238 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
239 			/*
240 			 * If upgrade to read-write by non-root, then verify
241 			 * that user has necessary permissions on the device.
242 			 */
243 			if (suser(td)) {
244 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
245 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
246 				    td->td_ucred, td)) != 0) {
247 					VOP_UNLOCK(devvp, 0, td);
248 					return (error);
249 				}
250 				VOP_UNLOCK(devvp, 0, td);
251 			}
252 			fs->fs_flags &= ~FS_UNCLEAN;
253 			if (fs->fs_clean == 0) {
254 				fs->fs_flags |= FS_UNCLEAN;
255 				if ((mp->mnt_flag & MNT_FORCE) ||
256 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
257 				     (fs->fs_flags & FS_DOSOFTDEP))) {
258 					printf("WARNING: %s was not %s\n",
259 					   fs->fs_fsmnt, "properly dismounted");
260 				} else {
261 					printf(
262 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
263 					    fs->fs_fsmnt);
264 					return (EPERM);
265 				}
266 			}
267 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
268 				return (error);
269 			fs->fs_ronly = 0;
270 			fs->fs_clean = 0;
271 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
272 				vn_finished_write(mp);
273 				return (error);
274 			}
275 			/* check to see if we need to start softdep */
276 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
277 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
278 				vn_finished_write(mp);
279 				return (error);
280 			}
281 			if (fs->fs_snapinum[0] != 0)
282 				ffs_snapshot_mount(mp);
283 			vn_finished_write(mp);
284 		}
285 		/*
286 		 * Soft updates is incompatible with "async",
287 		 * so if we are doing softupdates stop the user
288 		 * from setting the async flag in an update.
289 		 * Softdep_mount() clears it in an initial mount
290 		 * or ro->rw remount.
291 		 */
292 		if (mp->mnt_flag & MNT_SOFTDEP)
293 			mp->mnt_flag &= ~MNT_ASYNC;
294 		/*
295 		 * If not updating name, process export requests.
296 		 */
297 		if (args.fspec == 0)
298 			return (vfs_export(mp, &args.export));
299 		/*
300 		 * If this is a snapshot request, take the snapshot.
301 		 */
302 		if (mp->mnt_flag & MNT_SNAPSHOT)
303 			return (ffs_snapshot(mp, args.fspec));
304 	}
305 
306 	/*
307 	 * Not an update, or updating the name: look up the name
308 	 * and verify that it refers to a sensible disk device.
309 	 */
310 	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, td);
311 	if ((error = namei(&ndp)) != 0)
312 		return (error);
313 	NDFREE(&ndp, NDF_ONLY_PNBUF);
314 	devvp = ndp.ni_vp;
315 	if (!vn_isdisk(devvp, &error)) {
316 		vrele(devvp);
317 		return (error);
318 	}
319 
320 	/*
321 	 * If mount by non-root, then verify that user has necessary
322 	 * permissions on the device.
323 	 */
324 	if (suser(td)) {
325 		accessmode = VREAD;
326 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
327 			accessmode |= VWRITE;
328 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
329 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
330 			vput(devvp);
331 			return (error);
332 		}
333 		VOP_UNLOCK(devvp, 0, td);
334 	}
335 
336 	if (mp->mnt_flag & MNT_UPDATE) {
337 		/*
338 		 * Update only
339 		 *
340 		 * If it's not the same vnode, or at least the same device
341 		 * then it's not correct.
342 		 */
343 
344 		if (devvp != ump->um_devvp &&
345 		    devvp->v_rdev != ump->um_devvp->v_rdev)
346 			error = EINVAL;	/* needs translation */
347 		vrele(devvp);
348 		if (error)
349 			return (error);
350 	} else {
351 		/*
352 		 * New mount
353 		 *
354 		 * We need the name for the mount point (also used for
355 		 * "last mounted on") copied in. If an error occurs,
356 		 * the mount point is discarded by the upper level code.
357 		 * Note that vfs_mount() populates f_mntonname for us.
358 		 */
359 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
360 			vrele(devvp);
361 			return (error);
362 		}
363 	}
364 	/*
365 	 * Save "mounted from" device name info for mount point (NULL pad).
366 	 */
367 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
368 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
369 	return (0);
370 }
371 
372 /*
373  * Reload all incore data for a filesystem (used after running fsck on
374  * the root filesystem and finding things to fix). The filesystem must
375  * be mounted read-only.
376  *
377  * Things to do to update the mount:
378  *	1) invalidate all cached meta-data.
379  *	2) re-read superblock from disk.
380  *	3) re-read summary information from disk.
381  *	4) invalidate all inactive vnodes.
382  *	5) invalidate all cached file data.
383  *	6) re-read inode data for all active vnodes.
384  */
385 static int
386 ffs_reload(struct mount *mp, struct thread *td)
387 {
388 	struct vnode *vp, *nvp, *devvp;
389 	struct inode *ip;
390 	void *space;
391 	struct buf *bp;
392 	struct fs *fs, *newfs;
393 	ufs2_daddr_t sblockloc;
394 	int i, blks, size, error;
395 	int32_t *lp;
396 
397 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
398 		return (EINVAL);
399 	/*
400 	 * Step 1: invalidate all cached meta-data.
401 	 */
402 	devvp = VFSTOUFS(mp)->um_devvp;
403 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
404 	if (vinvalbuf(devvp, 0, td->td_ucred, td, 0, 0) != 0)
405 		panic("ffs_reload: dirty1");
406 	/*
407 	 * Only VMIO the backing device if the backing device is a real
408 	 * disk device.  See ffs_mountfs() for more details.
409 	 */
410 	if (vn_isdisk(devvp, NULL))
411 		vfs_object_create(devvp, td, td->td_ucred);
412 	VOP_UNLOCK(devvp, 0, td);
413 
414 	/*
415 	 * Step 2: re-read superblock from disk.
416 	 */
417 	fs = VFSTOUFS(mp)->um_fs;
418 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
419 	    NOCRED, &bp)) != 0)
420 		return (error);
421 	newfs = (struct fs *)bp->b_data;
422 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
423 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
424 	    newfs->fs_bsize > MAXBSIZE ||
425 	    newfs->fs_bsize < sizeof(struct fs)) {
426 			brelse(bp);
427 			return (EIO);		/* XXX needs translation */
428 	}
429 	/*
430 	 * Copy pointer fields back into superblock before copying in	XXX
431 	 * new superblock. These should really be in the ufsmount.	XXX
432 	 * Note that important parameters (eg fs_ncg) are unchanged.
433 	 */
434 	newfs->fs_csp = fs->fs_csp;
435 	newfs->fs_maxcluster = fs->fs_maxcluster;
436 	newfs->fs_contigdirs = fs->fs_contigdirs;
437 	newfs->fs_active = fs->fs_active;
438 	/* The file system is still read-only. */
439 	newfs->fs_ronly = 1;
440 	sblockloc = fs->fs_sblockloc;
441 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
442 	brelse(bp);
443 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
444 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
445 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
446 		printf("%s: reload pending error: blocks %jd files %d\n",
447 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
448 		    fs->fs_pendinginodes);
449 		fs->fs_pendingblocks = 0;
450 		fs->fs_pendinginodes = 0;
451 	}
452 
453 	/*
454 	 * Step 3: re-read summary information from disk.
455 	 */
456 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
457 	space = fs->fs_csp;
458 	for (i = 0; i < blks; i += fs->fs_frag) {
459 		size = fs->fs_bsize;
460 		if (i + fs->fs_frag > blks)
461 			size = (blks - i) * fs->fs_fsize;
462 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
463 		    NOCRED, &bp);
464 		if (error)
465 			return (error);
466 		bcopy(bp->b_data, space, (u_int)size);
467 		space = (char *)space + size;
468 		brelse(bp);
469 	}
470 	/*
471 	 * We no longer know anything about clusters per cylinder group.
472 	 */
473 	if (fs->fs_contigsumsize > 0) {
474 		lp = fs->fs_maxcluster;
475 		for (i = 0; i < fs->fs_ncg; i++)
476 			*lp++ = fs->fs_contigsumsize;
477 	}
478 
479 loop:
480 	MNT_ILOCK(mp);
481 	MNT_VNODE_FOREACH(vp, mp, nvp) {
482 		VI_LOCK(vp);
483 		if (vp->v_iflag & VI_XLOCK) {
484 			VI_UNLOCK(vp);
485 			continue;
486 		}
487 		MNT_IUNLOCK(mp);
488 		/*
489 		 * Step 4: invalidate all inactive vnodes.
490 		 */
491 		if (vp->v_usecount == 0) {
492 			vgonel(vp, td);
493 			goto loop;
494 		}
495 		/*
496 		 * Step 5: invalidate all cached file data.
497 		 */
498 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
499 			goto loop;
500 		}
501 		if (vinvalbuf(vp, 0, td->td_ucred, td, 0, 0))
502 			panic("ffs_reload: dirty2");
503 		/*
504 		 * Step 6: re-read inode data for all active vnodes.
505 		 */
506 		ip = VTOI(vp);
507 		error =
508 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
509 		    (int)fs->fs_bsize, NOCRED, &bp);
510 		if (error) {
511 			VOP_UNLOCK(vp, 0, td);
512 			vrele(vp);
513 			return (error);
514 		}
515 		ffs_load_inode(bp, ip, fs, ip->i_number);
516 		ip->i_effnlink = ip->i_nlink;
517 		brelse(bp);
518 		VOP_UNLOCK(vp, 0, td);
519 		vrele(vp);
520 		MNT_ILOCK(mp);
521 	}
522 	MNT_IUNLOCK(mp);
523 	return (0);
524 }
525 
526 /*
527  * Possible superblock locations ordered from most to least likely.
528  */
529 static int sblock_try[] = SBLOCKSEARCH;
530 
531 /*
532  * Common code for mount and mountroot
533  */
534 static int
535 ffs_mountfs(devvp, mp, td)
536 	struct vnode *devvp;
537 	struct mount *mp;
538 	struct thread *td;
539 {
540 	struct ufsmount *ump;
541 	struct buf *bp;
542 	struct fs *fs;
543 	struct cdev *dev;
544 	void *space;
545 	ufs2_daddr_t sblockloc;
546 	int error, i, blks, size, ronly;
547 	int32_t *lp;
548 	struct ucred *cred;
549 	size_t strsize;
550 
551 	dev = devvp->v_rdev;
552 	cred = td ? td->td_ucred : NOCRED;
553 	/*
554 	 * Disallow multiple mounts of the same device.
555 	 * Disallow mounting of a device that is currently in use
556 	 * (except for root, which might share swap device for miniroot).
557 	 * Flush out any old buffers remaining from a previous use.
558 	 */
559 	error = vfs_mountedon(devvp);
560 	if (error)
561 		return (error);
562 	if (vcount(devvp) > 1)
563 		return (EBUSY);
564 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
565 	error = vinvalbuf(devvp, V_SAVE, cred, td, 0, 0);
566 	if (error) {
567 		VOP_UNLOCK(devvp, 0, td);
568 		return (error);
569 	}
570 
571 	/*
572 	 * Only VMIO the backing device if the backing device is a real
573 	 * disk device.
574 	 * Note that it is optional that the backing device be VMIOed.  This
575 	 * increases the opportunity for metadata caching.
576 	 */
577 	if (vn_isdisk(devvp, NULL))
578 		vfs_object_create(devvp, td, cred);
579 
580 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
581 	/*
582 	 * XXX: open the device with read and write access even if only
583 	 * read access is needed now.  Write access is needed if the
584 	 * filesystem is ever mounted read/write, and we don't change the
585 	 * access mode for remounts.
586 	 */
587 #ifdef notyet
588 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, td, -1);
589 #else
590 	error = VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, td, -1);
591 #endif
592 	VOP_UNLOCK(devvp, 0, td);
593 	if (error)
594 		return (error);
595 	if (devvp->v_rdev->si_iosize_max != 0)
596 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
597 	if (mp->mnt_iosize_max > MAXPHYS)
598 		mp->mnt_iosize_max = MAXPHYS;
599 
600 	bp = NULL;
601 	ump = NULL;
602 	fs = NULL;
603 	sblockloc = 0;
604 	/*
605 	 * Try reading the superblock in each of its possible locations.
606 	 */
607 	for (i = 0; sblock_try[i] != -1; i++) {
608 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
609 		    cred, &bp)) != 0)
610 			goto out;
611 		fs = (struct fs *)bp->b_data;
612 		sblockloc = sblock_try[i];
613 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
614 		     (fs->fs_magic == FS_UFS2_MAGIC &&
615 		      (fs->fs_sblockloc == sblockloc ||
616 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
617 		    fs->fs_bsize <= MAXBSIZE &&
618 		    fs->fs_bsize >= sizeof(struct fs))
619 			break;
620 		brelse(bp);
621 		bp = NULL;
622 	}
623 	if (sblock_try[i] == -1) {
624 		error = EINVAL;		/* XXX needs translation */
625 		goto out;
626 	}
627 	fs->fs_fmod = 0;
628 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
629 	fs->fs_flags &= ~FS_UNCLEAN;
630 	if (fs->fs_clean == 0) {
631 		fs->fs_flags |= FS_UNCLEAN;
632 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
633 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
634 		     (fs->fs_flags & FS_DOSOFTDEP))) {
635 			printf(
636 "WARNING: %s was not properly dismounted\n",
637 			    fs->fs_fsmnt);
638 		} else {
639 			printf(
640 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
641 			    fs->fs_fsmnt);
642 			error = EPERM;
643 			goto out;
644 		}
645 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
646 		    (mp->mnt_flag & MNT_FORCE)) {
647 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
648 			    (intmax_t)fs->fs_pendingblocks,
649 			    fs->fs_pendinginodes);
650 			fs->fs_pendingblocks = 0;
651 			fs->fs_pendinginodes = 0;
652 		}
653 	}
654 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
655 		printf("%s: mount pending error: blocks %jd files %d\n",
656 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
657 		    fs->fs_pendinginodes);
658 		fs->fs_pendingblocks = 0;
659 		fs->fs_pendinginodes = 0;
660 	}
661 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
662 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
663 	    M_WAITOK);
664 	if (fs->fs_magic == FS_UFS1_MAGIC) {
665 		ump->um_fstype = UFS1;
666 		ump->um_balloc = ffs_balloc_ufs1;
667 	} else {
668 		ump->um_fstype = UFS2;
669 		ump->um_balloc = ffs_balloc_ufs2;
670 	}
671 	ump->um_blkatoff = ffs_blkatoff;
672 	ump->um_truncate = ffs_truncate;
673 	ump->um_update = ffs_update;
674 	ump->um_valloc = ffs_valloc;
675 	ump->um_vfree = ffs_vfree;
676 	ump->um_ifree = ffs_ifree;
677 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
678 	if (fs->fs_sbsize < SBLOCKSIZE)
679 		bp->b_flags |= B_INVAL | B_NOCACHE;
680 	brelse(bp);
681 	bp = NULL;
682 	fs = ump->um_fs;
683 	ffs_oldfscompat_read(fs, ump, sblockloc);
684 	fs->fs_ronly = ronly;
685 	size = fs->fs_cssize;
686 	blks = howmany(size, fs->fs_fsize);
687 	if (fs->fs_contigsumsize > 0)
688 		size += fs->fs_ncg * sizeof(int32_t);
689 	size += fs->fs_ncg * sizeof(u_int8_t);
690 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
691 	fs->fs_csp = space;
692 	for (i = 0; i < blks; i += fs->fs_frag) {
693 		size = fs->fs_bsize;
694 		if (i + fs->fs_frag > blks)
695 			size = (blks - i) * fs->fs_fsize;
696 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
697 		    cred, &bp)) != 0) {
698 			free(fs->fs_csp, M_UFSMNT);
699 			goto out;
700 		}
701 		bcopy(bp->b_data, space, (u_int)size);
702 		space = (char *)space + size;
703 		brelse(bp);
704 		bp = NULL;
705 	}
706 	if (fs->fs_contigsumsize > 0) {
707 		fs->fs_maxcluster = lp = space;
708 		for (i = 0; i < fs->fs_ncg; i++)
709 			*lp++ = fs->fs_contigsumsize;
710 		space = lp;
711 	}
712 	size = fs->fs_ncg * sizeof(u_int8_t);
713 	fs->fs_contigdirs = (u_int8_t *)space;
714 	bzero(fs->fs_contigdirs, size);
715 	fs->fs_active = NULL;
716 	mp->mnt_data = (qaddr_t)ump;
717 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
718 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
719 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
720 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
721 		vfs_getnewfsid(mp);
722 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
723 	mp->mnt_flag |= MNT_LOCAL;
724 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
725 #ifdef MAC
726 		mp->mnt_flag |= MNT_MULTILABEL;
727 #else
728 		printf(
729 "WARNING: %s: multilabel flag on fs but no MAC support\n",
730 		    fs->fs_fsmnt);
731 #endif
732 	if ((fs->fs_flags & FS_ACLS) != 0)
733 #ifdef UFS_ACL
734 		mp->mnt_flag |= MNT_ACLS;
735 #else
736 		printf(
737 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
738 		    fs->fs_fsmnt);
739 #endif
740 	ump->um_mountp = mp;
741 	ump->um_dev = dev;
742 	ump->um_devvp = devvp;
743 	ump->um_nindir = fs->fs_nindir;
744 	ump->um_bptrtodb = fs->fs_fsbtodb;
745 	ump->um_seqinc = fs->fs_frag;
746 	for (i = 0; i < MAXQUOTAS; i++)
747 		ump->um_quotas[i] = NULLVP;
748 #ifdef UFS_EXTATTR
749 	ufs_extattr_uepm_init(&ump->um_extattr);
750 #endif
751 	devvp->v_rdev->si_mountpoint = mp;
752 
753 	/*
754 	 * Set FS local "last mounted on" information (NULL pad)
755 	 */
756 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
757 			fs->fs_fsmnt,			/* copy area*/
758 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
759 			&strsize);			/* real size*/
760 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
761 
762 	if( mp->mnt_flag & MNT_ROOTFS) {
763 		/*
764 		 * Root mount; update timestamp in mount structure.
765 		 * this will be used by the common root mount code
766 		 * to update the system clock.
767 		 */
768 		mp->mnt_time = fs->fs_time;
769 	}
770 
771 	if (ronly == 0) {
772 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
773 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
774 			free(fs->fs_csp, M_UFSMNT);
775 			goto out;
776 		}
777 		if (fs->fs_snapinum[0] != 0)
778 			ffs_snapshot_mount(mp);
779 		fs->fs_fmod = 1;
780 		fs->fs_clean = 0;
781 		(void) ffs_sbupdate(ump, MNT_WAIT);
782 	}
783 	/*
784 	 * Initialize filesystem stat information in mount struct.
785 	 */
786 	(void)VFS_STATFS(mp, &mp->mnt_stat, td);
787 #ifdef UFS_EXTATTR
788 #ifdef UFS_EXTATTR_AUTOSTART
789 	/*
790 	 *
791 	 * Auto-starting does the following:
792 	 *	- check for /.attribute in the fs, and extattr_start if so
793 	 *	- for each file in .attribute, enable that file with
794 	 * 	  an attribute of the same name.
795 	 * Not clear how to report errors -- probably eat them.
796 	 * This would all happen while the filesystem was busy/not
797 	 * available, so would effectively be "atomic".
798 	 */
799 	(void) ufs_extattr_autostart(mp, td);
800 #endif /* !UFS_EXTATTR_AUTOSTART */
801 #endif /* !UFS_EXTATTR */
802 	return (0);
803 out:
804 	devvp->v_rdev->si_mountpoint = NULL;
805 	if (bp)
806 		brelse(bp);
807 	/* XXX: see comment above VOP_OPEN. */
808 #ifdef notyet
809 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, cred, td);
810 #else
811 	(void)VOP_CLOSE(devvp, FREAD | FWRITE, cred, td);
812 #endif
813 	if (ump) {
814 		free(ump->um_fs, M_UFSMNT);
815 		free(ump, M_UFSMNT);
816 		mp->mnt_data = (qaddr_t)0;
817 	}
818 	return (error);
819 }
820 
821 #include <sys/sysctl.h>
822 int bigcgs = 0;
823 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
824 
825 /*
826  * Sanity checks for loading old filesystem superblocks.
827  * See ffs_oldfscompat_write below for unwound actions.
828  *
829  * XXX - Parts get retired eventually.
830  * Unfortunately new bits get added.
831  */
832 static void
833 ffs_oldfscompat_read(fs, ump, sblockloc)
834 	struct fs *fs;
835 	struct ufsmount *ump;
836 	ufs2_daddr_t sblockloc;
837 {
838 	off_t maxfilesize;
839 
840 	/*
841 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
842 	 */
843 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
844 		fs->fs_flags = fs->fs_old_flags;
845 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
846 		fs->fs_sblockloc = sblockloc;
847 	}
848 	/*
849 	 * If not yet done, update UFS1 superblock with new wider fields.
850 	 */
851 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
852 		fs->fs_maxbsize = fs->fs_bsize;
853 		fs->fs_time = fs->fs_old_time;
854 		fs->fs_size = fs->fs_old_size;
855 		fs->fs_dsize = fs->fs_old_dsize;
856 		fs->fs_csaddr = fs->fs_old_csaddr;
857 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
858 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
859 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
860 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
861 	}
862 	if (fs->fs_magic == FS_UFS1_MAGIC &&
863 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
864 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
865 		fs->fs_qbmask = ~fs->fs_bmask;
866 		fs->fs_qfmask = ~fs->fs_fmask;
867 	}
868 	if (fs->fs_magic == FS_UFS1_MAGIC) {
869 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
870 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
871 		if (fs->fs_maxfilesize > maxfilesize)
872 			fs->fs_maxfilesize = maxfilesize;
873 	}
874 	/* Compatibility for old filesystems */
875 	if (fs->fs_avgfilesize <= 0)
876 		fs->fs_avgfilesize = AVFILESIZ;
877 	if (fs->fs_avgfpdir <= 0)
878 		fs->fs_avgfpdir = AFPDIR;
879 	if (bigcgs) {
880 		fs->fs_save_cgsize = fs->fs_cgsize;
881 		fs->fs_cgsize = fs->fs_bsize;
882 	}
883 }
884 
885 /*
886  * Unwinding superblock updates for old filesystems.
887  * See ffs_oldfscompat_read above for details.
888  *
889  * XXX - Parts get retired eventually.
890  * Unfortunately new bits get added.
891  */
892 static void
893 ffs_oldfscompat_write(fs, ump)
894 	struct fs *fs;
895 	struct ufsmount *ump;
896 {
897 
898 	/*
899 	 * Copy back UFS2 updated fields that UFS1 inspects.
900 	 */
901 	if (fs->fs_magic == FS_UFS1_MAGIC) {
902 		fs->fs_old_time = fs->fs_time;
903 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
904 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
905 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
906 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
907 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
908 	}
909 	if (bigcgs) {
910 		fs->fs_cgsize = fs->fs_save_cgsize;
911 		fs->fs_save_cgsize = 0;
912 	}
913 }
914 
915 /*
916  * unmount system call
917  */
918 int
919 ffs_unmount(mp, mntflags, td)
920 	struct mount *mp;
921 	int mntflags;
922 	struct thread *td;
923 {
924 	struct ufsmount *ump = VFSTOUFS(mp);
925 	struct fs *fs;
926 	int error, flags;
927 
928 	flags = 0;
929 	if (mntflags & MNT_FORCE) {
930 		flags |= FORCECLOSE;
931 	}
932 #ifdef UFS_EXTATTR
933 	if ((error = ufs_extattr_stop(mp, td))) {
934 		if (error != EOPNOTSUPP)
935 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
936 			    error);
937 	} else {
938 		ufs_extattr_uepm_destroy(&ump->um_extattr);
939 	}
940 #endif
941 	if (mp->mnt_flag & MNT_SOFTDEP) {
942 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
943 			return (error);
944 	} else {
945 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
946 			return (error);
947 	}
948 	fs = ump->um_fs;
949 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
950 		printf("%s: unmount pending error: blocks %jd files %d\n",
951 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
952 		    fs->fs_pendinginodes);
953 		fs->fs_pendingblocks = 0;
954 		fs->fs_pendinginodes = 0;
955 	}
956 	if (fs->fs_ronly == 0) {
957 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
958 		error = ffs_sbupdate(ump, MNT_WAIT);
959 		if (error) {
960 			fs->fs_clean = 0;
961 			return (error);
962 		}
963 	}
964 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
965 
966 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, td, 0, 0);
967 	/* XXX: see comment above VOP_OPEN. */
968 #ifdef notyet
969 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
970 	    NOCRED, td);
971 #else
972 	error = VOP_CLOSE(ump->um_devvp, FREAD | FWRITE, NOCRED, td);
973 #endif
974 	vrele(ump->um_devvp);
975 	free(fs->fs_csp, M_UFSMNT);
976 	free(fs, M_UFSMNT);
977 	free(ump, M_UFSMNT);
978 	mp->mnt_data = (qaddr_t)0;
979 	mp->mnt_flag &= ~MNT_LOCAL;
980 	return (error);
981 }
982 
983 /*
984  * Flush out all the files in a filesystem.
985  */
986 int
987 ffs_flushfiles(mp, flags, td)
988 	struct mount *mp;
989 	int flags;
990 	struct thread *td;
991 {
992 	struct ufsmount *ump;
993 	int error;
994 
995 	ump = VFSTOUFS(mp);
996 #ifdef QUOTA
997 	if (mp->mnt_flag & MNT_QUOTA) {
998 		int i;
999 		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1000 		if (error)
1001 			return (error);
1002 		for (i = 0; i < MAXQUOTAS; i++) {
1003 			if (ump->um_quotas[i] == NULLVP)
1004 				continue;
1005 			quotaoff(td, mp, i);
1006 		}
1007 		/*
1008 		 * Here we fall through to vflush again to ensure
1009 		 * that we have gotten rid of all the system vnodes.
1010 		 */
1011 	}
1012 #endif
1013 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1014 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1015 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1016 			return (error);
1017 		ffs_snapshot_unmount(mp);
1018 		/*
1019 		 * Here we fall through to vflush again to ensure
1020 		 * that we have gotten rid of all the system vnodes.
1021 		 */
1022 	}
1023         /*
1024 	 * Flush all the files.
1025 	 */
1026 	if ((error = vflush(mp, 0, flags, td)) != 0)
1027 		return (error);
1028 	/*
1029 	 * Flush filesystem metadata.
1030 	 */
1031 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1032 	error = VOP_FSYNC(ump->um_devvp, td->td_ucred, MNT_WAIT, td);
1033 	VOP_UNLOCK(ump->um_devvp, 0, td);
1034 	return (error);
1035 }
1036 
1037 /*
1038  * Get filesystem statistics.
1039  */
1040 int
1041 ffs_statfs(mp, sbp, td)
1042 	struct mount *mp;
1043 	struct statfs *sbp;
1044 	struct thread *td;
1045 {
1046 	struct ufsmount *ump;
1047 	struct fs *fs;
1048 
1049 	ump = VFSTOUFS(mp);
1050 	fs = ump->um_fs;
1051 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1052 		panic("ffs_statfs");
1053 	sbp->f_version = STATFS_VERSION;
1054 	sbp->f_bsize = fs->fs_fsize;
1055 	sbp->f_iosize = fs->fs_bsize;
1056 	sbp->f_blocks = fs->fs_dsize;
1057 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1058 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1059 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1060 	    dbtofsb(fs, fs->fs_pendingblocks);
1061 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1062 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1063 	sbp->f_namemax = NAME_MAX;
1064 	if (sbp != &mp->mnt_stat) {
1065 		sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
1066 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
1067 		sbp->f_syncwrites = mp->mnt_stat.f_syncwrites;
1068 		sbp->f_asyncwrites = mp->mnt_stat.f_asyncwrites;
1069 		sbp->f_syncreads = mp->mnt_stat.f_syncreads;
1070 		sbp->f_asyncreads = mp->mnt_stat.f_asyncreads;
1071 		sbp->f_owner = mp->mnt_stat.f_owner;
1072 		sbp->f_fsid = mp->mnt_stat.f_fsid;
1073 		bcopy((caddr_t)mp->mnt_stat.f_fstypename,
1074 			(caddr_t)&sbp->f_fstypename[0], MFSNAMELEN);
1075 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
1076 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
1077 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
1078 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
1079 	}
1080 	return (0);
1081 }
1082 
1083 /*
1084  * Go through the disk queues to initiate sandbagged IO;
1085  * go through the inodes to write those that have been modified;
1086  * initiate the writing of the super block if it has been modified.
1087  *
1088  * Note: we are always called with the filesystem marked `MPBUSY'.
1089  */
1090 int
1091 ffs_sync(mp, waitfor, cred, td)
1092 	struct mount *mp;
1093 	int waitfor;
1094 	struct ucred *cred;
1095 	struct thread *td;
1096 {
1097 	struct vnode *nvp, *vp, *devvp;
1098 	struct inode *ip;
1099 	struct ufsmount *ump = VFSTOUFS(mp);
1100 	struct fs *fs;
1101 	int error, count, wait, lockreq, allerror = 0;
1102 
1103 	fs = ump->um_fs;
1104 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1105 		printf("fs = %s\n", fs->fs_fsmnt);
1106 		panic("ffs_sync: rofs mod");
1107 	}
1108 	/*
1109 	 * Write back each (modified) inode.
1110 	 */
1111 	wait = 0;
1112 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1113 	if (waitfor == MNT_WAIT) {
1114 		wait = 1;
1115 		lockreq = LK_EXCLUSIVE;
1116 	}
1117 	lockreq |= LK_INTERLOCK;
1118 	MNT_ILOCK(mp);
1119 loop:
1120 	MNT_VNODE_FOREACH(vp, mp, nvp) {
1121 		/*
1122 		 * Depend on the mntvnode_slock to keep things stable enough
1123 		 * for a quick test.  Since there might be hundreds of
1124 		 * thousands of vnodes, we cannot afford even a subroutine
1125 		 * call unless there's a good chance that we have work to do.
1126 		 */
1127 		VI_LOCK(vp);
1128 		if (vp->v_iflag & VI_XLOCK) {
1129 			VI_UNLOCK(vp);
1130 			continue;
1131 		}
1132 		ip = VTOI(vp);
1133 		if (vp->v_type == VNON || ((ip->i_flag &
1134 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1135 		    TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1136 			VI_UNLOCK(vp);
1137 			continue;
1138 		}
1139 		MNT_IUNLOCK(mp);
1140 		if ((error = vget(vp, lockreq, td)) != 0) {
1141 			MNT_ILOCK(mp);
1142 			if (error == ENOENT)
1143 				goto loop;
1144 			continue;
1145 		}
1146 		if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
1147 			allerror = error;
1148 		VOP_UNLOCK(vp, 0, td);
1149 		vrele(vp);
1150 		MNT_ILOCK(mp);
1151 	}
1152 	MNT_IUNLOCK(mp);
1153 	/*
1154 	 * Force stale filesystem control information to be flushed.
1155 	 */
1156 	if (waitfor == MNT_WAIT) {
1157 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1158 			allerror = error;
1159 		/* Flushed work items may create new vnodes to clean */
1160 		if (allerror == 0 && count) {
1161 			MNT_ILOCK(mp);
1162 			goto loop;
1163 		}
1164 	}
1165 #ifdef QUOTA
1166 	qsync(mp);
1167 #endif
1168 	devvp = ump->um_devvp;
1169 	VI_LOCK(devvp);
1170 	if (waitfor != MNT_LAZY &&
1171 	    (devvp->v_numoutput > 0 || TAILQ_FIRST(&devvp->v_dirtyblkhd))) {
1172 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1173 		if ((error = VOP_FSYNC(devvp, cred, waitfor, td)) != 0)
1174 			allerror = error;
1175 		VOP_UNLOCK(devvp, 0, td);
1176 		if (allerror == 0 && waitfor == MNT_WAIT) {
1177 			MNT_ILOCK(mp);
1178 			goto loop;
1179 		}
1180 	} else
1181 		VI_UNLOCK(devvp);
1182 	/*
1183 	 * Write back modified superblock.
1184 	 */
1185 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1186 		allerror = error;
1187 	return (allerror);
1188 }
1189 
1190 int
1191 ffs_vget(mp, ino, flags, vpp)
1192 	struct mount *mp;
1193 	ino_t ino;
1194 	int flags;
1195 	struct vnode **vpp;
1196 {
1197 	struct thread *td = curthread; 		/* XXX */
1198 	struct fs *fs;
1199 	struct inode *ip;
1200 	struct ufsmount *ump;
1201 	struct buf *bp;
1202 	struct vnode *vp;
1203 	struct cdev *dev;
1204 	int error;
1205 
1206 	ump = VFSTOUFS(mp);
1207 	dev = ump->um_dev;
1208 
1209 	/*
1210 	 * We do not lock vnode creation as it is believed to be too
1211 	 * expensive for such rare case as simultaneous creation of vnode
1212 	 * for same ino by different processes. We just allow them to race
1213 	 * and check later to decide who wins. Let the race begin!
1214 	 */
1215 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1216 		return (error);
1217 	if (*vpp != NULL)
1218 		return (0);
1219 
1220 	/*
1221 	 * If this MALLOC() is performed after the getnewvnode()
1222 	 * it might block, leaving a vnode with a NULL v_data to be
1223 	 * found by ffs_sync() if a sync happens to fire right then,
1224 	 * which will cause a panic because ffs_sync() blindly
1225 	 * dereferences vp->v_data (as well it should).
1226 	 */
1227 	ip = uma_zalloc(uma_inode, M_WAITOK);
1228 
1229 	/* Allocate a new vnode/inode. */
1230 	error = getnewvnode("ufs", mp, ffs_vnodeop_p, &vp);
1231 	if (error) {
1232 		*vpp = NULL;
1233 		uma_zfree(uma_inode, ip);
1234 		return (error);
1235 	}
1236 	bzero((caddr_t)ip, sizeof(struct inode));
1237 	/*
1238 	 * FFS supports recursive locking.
1239 	 */
1240 	fs = ump->um_fs;
1241 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1242 	vp->v_data = ip;
1243 	vp->v_bsize = fs->fs_bsize;
1244 	ip->i_vnode = vp;
1245 	ip->i_ump = ump;
1246 	ip->i_fs = fs;
1247 	ip->i_dev = dev;
1248 	ip->i_number = ino;
1249 #ifdef QUOTA
1250 	{
1251 		int i;
1252 		for (i = 0; i < MAXQUOTAS; i++)
1253 			ip->i_dquot[i] = NODQUOT;
1254 	}
1255 #endif
1256 	/*
1257 	 * Exclusively lock the vnode before adding to hash. Note, that we
1258 	 * must not release nor downgrade the lock (despite flags argument
1259 	 * says) till it is fully initialized.
1260 	 */
1261 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1262 
1263 	/*
1264 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1265 	 * duplicate of vnode being created and add it to the hash. If a
1266 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1267 	 */
1268 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1269 		vput(vp);
1270 		*vpp = NULL;
1271 		return (error);
1272 	}
1273 
1274 	/* We lost the race, then throw away our vnode and return existing */
1275 	if (*vpp != NULL) {
1276 		vput(vp);
1277 		return (0);
1278 	}
1279 
1280 	/* Read in the disk contents for the inode, copy into the inode. */
1281 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1282 	    (int)fs->fs_bsize, NOCRED, &bp);
1283 	if (error) {
1284 		/*
1285 		 * The inode does not contain anything useful, so it would
1286 		 * be misleading to leave it on its hash chain. With mode
1287 		 * still zero, it will be unlinked and returned to the free
1288 		 * list by vput().
1289 		 */
1290 		brelse(bp);
1291 		vput(vp);
1292 		*vpp = NULL;
1293 		return (error);
1294 	}
1295 	if (ip->i_ump->um_fstype == UFS1)
1296 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1297 	else
1298 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1299 	ffs_load_inode(bp, ip, fs, ino);
1300 	if (DOINGSOFTDEP(vp))
1301 		softdep_load_inodeblock(ip);
1302 	else
1303 		ip->i_effnlink = ip->i_nlink;
1304 	bqrelse(bp);
1305 
1306 	/*
1307 	 * Initialize the vnode from the inode, check for aliases.
1308 	 * Note that the underlying vnode may have changed.
1309 	 */
1310 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1311 	if (error) {
1312 		vput(vp);
1313 		*vpp = NULL;
1314 		return (error);
1315 	}
1316 	/*
1317 	 * Finish inode initialization.
1318 	 */
1319 	VREF(ip->i_devvp);
1320 	/*
1321 	 * Set up a generation number for this inode if it does not
1322 	 * already have one. This should only happen on old filesystems.
1323 	 */
1324 	if (ip->i_gen == 0) {
1325 		ip->i_gen = arc4random() / 2 + 1;
1326 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1327 			ip->i_flag |= IN_MODIFIED;
1328 			DIP_SET(ip, i_gen, ip->i_gen);
1329 		}
1330 	}
1331 	/*
1332 	 * Ensure that uid and gid are correct. This is a temporary
1333 	 * fix until fsck has been changed to do the update.
1334 	 */
1335 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1336 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1337 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1338 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1339 	}						/* XXX */
1340 
1341 #ifdef MAC
1342 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1343 		/*
1344 		 * If this vnode is already allocated, and we're running
1345 		 * multi-label, attempt to perform a label association
1346 		 * from the extended attributes on the inode.
1347 		 */
1348 		error = mac_associate_vnode_extattr(mp, vp);
1349 		if (error) {
1350 			/* ufs_inactive will release ip->i_devvp ref. */
1351 			vput(vp);
1352 			*vpp = NULL;
1353 			return (error);
1354 		}
1355 	}
1356 #endif
1357 
1358 	*vpp = vp;
1359 	return (0);
1360 }
1361 
1362 /*
1363  * File handle to vnode
1364  *
1365  * Have to be really careful about stale file handles:
1366  * - check that the inode number is valid
1367  * - call ffs_vget() to get the locked inode
1368  * - check for an unallocated inode (i_mode == 0)
1369  * - check that the given client host has export rights and return
1370  *   those rights via. exflagsp and credanonp
1371  */
1372 int
1373 ffs_fhtovp(mp, fhp, vpp)
1374 	struct mount *mp;
1375 	struct fid *fhp;
1376 	struct vnode **vpp;
1377 {
1378 	struct ufid *ufhp;
1379 	struct fs *fs;
1380 
1381 	ufhp = (struct ufid *)fhp;
1382 	fs = VFSTOUFS(mp)->um_fs;
1383 	if (ufhp->ufid_ino < ROOTINO ||
1384 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1385 		return (ESTALE);
1386 	return (ufs_fhtovp(mp, ufhp, vpp));
1387 }
1388 
1389 /*
1390  * Vnode pointer to File handle
1391  */
1392 /* ARGSUSED */
1393 int
1394 ffs_vptofh(vp, fhp)
1395 	struct vnode *vp;
1396 	struct fid *fhp;
1397 {
1398 	struct inode *ip;
1399 	struct ufid *ufhp;
1400 
1401 	ip = VTOI(vp);
1402 	ufhp = (struct ufid *)fhp;
1403 	ufhp->ufid_len = sizeof(struct ufid);
1404 	ufhp->ufid_ino = ip->i_number;
1405 	ufhp->ufid_gen = ip->i_gen;
1406 	return (0);
1407 }
1408 
1409 /*
1410  * Initialize the filesystem.
1411  */
1412 static int
1413 ffs_init(vfsp)
1414 	struct vfsconf *vfsp;
1415 {
1416 
1417 	softdep_initialize();
1418 	return (ufs_init(vfsp));
1419 }
1420 
1421 /*
1422  * Undo the work of ffs_init().
1423  */
1424 static int
1425 ffs_uninit(vfsp)
1426 	struct vfsconf *vfsp;
1427 {
1428 	int ret;
1429 
1430 	ret = ufs_uninit(vfsp);
1431 	softdep_uninitialize();
1432 	return (ret);
1433 }
1434 
1435 /*
1436  * Write a superblock and associated information back to disk.
1437  */
1438 static int
1439 ffs_sbupdate(mp, waitfor)
1440 	struct ufsmount *mp;
1441 	int waitfor;
1442 {
1443 	struct fs *fs = mp->um_fs;
1444 	struct buf *bp;
1445 	int blks;
1446 	void *space;
1447 	int i, size, error, allerror = 0;
1448 
1449 	if (fs->fs_ronly == 1 &&
1450 	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1451 	    (MNT_RDONLY | MNT_UPDATE))
1452 		panic("ffs_sbupdate: write read-only filesystem");
1453 	/*
1454 	 * First write back the summary information.
1455 	 */
1456 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1457 	space = fs->fs_csp;
1458 	for (i = 0; i < blks; i += fs->fs_frag) {
1459 		size = fs->fs_bsize;
1460 		if (i + fs->fs_frag > blks)
1461 			size = (blks - i) * fs->fs_fsize;
1462 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1463 		    size, 0, 0, 0);
1464 		bcopy(space, bp->b_data, (u_int)size);
1465 		space = (char *)space + size;
1466 		if (waitfor != MNT_WAIT)
1467 			bawrite(bp);
1468 		else if ((error = bwrite(bp)) != 0)
1469 			allerror = error;
1470 	}
1471 	/*
1472 	 * Now write back the superblock itself. If any errors occurred
1473 	 * up to this point, then fail so that the superblock avoids
1474 	 * being written out as clean.
1475 	 */
1476 	if (allerror)
1477 		return (allerror);
1478 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1479 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1480 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1481 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1482 		fs->fs_sblockloc = SBLOCK_UFS1;
1483 	}
1484 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1485 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1486 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1487 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1488 		fs->fs_sblockloc = SBLOCK_UFS2;
1489 	}
1490 	bp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1491 	    0, 0, 0);
1492 	fs->fs_fmod = 0;
1493 	fs->fs_time = time_second;
1494 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1495 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1496 	if (waitfor != MNT_WAIT)
1497 		bawrite(bp);
1498 	else if ((error = bwrite(bp)) != 0)
1499 		allerror = error;
1500 	return (allerror);
1501 }
1502 
1503 static int
1504 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1505 	int attrnamespace, const char *attrname, struct thread *td)
1506 {
1507 
1508 #ifdef UFS_EXTATTR
1509 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1510 	    attrname, td));
1511 #else
1512 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1513 	    attrname, td));
1514 #endif
1515 }
1516 
1517 static void
1518 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1519 {
1520 
1521 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1522 		uma_zfree(uma_ufs1, ip->i_din1);
1523 	else if (ip->i_din2 != NULL)
1524 		uma_zfree(uma_ufs2, ip->i_din2);
1525 	uma_zfree(uma_inode, ip);
1526 }
1527