xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 6b806d21d144c25f4fad714e1c0cf780f5e27d7e)
1 /*-
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_mac.h"
36 #include "opt_quota.h"
37 #include "opt_ufs.h"
38 #include "opt_ffs.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/mac.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 
55 #include <ufs/ufs/extattr.h>
56 #include <ufs/ufs/quota.h>
57 #include <ufs/ufs/ufsmount.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/ufs_extern.h>
60 
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63 
64 #include <vm/vm.h>
65 #include <vm/uma.h>
66 #include <vm/vm_page.h>
67 
68 #include <geom/geom.h>
69 #include <geom/geom_vfs.h>
70 
71 uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
72 
73 static int	ffs_sbupdate(struct ufsmount *, int);
74 static int	ffs_reload(struct mount *, struct thread *);
75 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
76 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
77 		    ufs2_daddr_t);
78 static void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
79 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
80 static vfs_init_t ffs_init;
81 static vfs_uninit_t ffs_uninit;
82 static vfs_extattrctl_t ffs_extattrctl;
83 static vfs_cmount_t ffs_cmount;
84 static vfs_mount_t ffs_mount;
85 
86 static struct vfsops ufs_vfsops = {
87 	.vfs_extattrctl =	ffs_extattrctl,
88 	.vfs_fhtovp =		ffs_fhtovp,
89 	.vfs_init =		ffs_init,
90 	.vfs_mount =		ffs_mount,
91 	.vfs_cmount =		ffs_cmount,
92 	.vfs_quotactl =		ufs_quotactl,
93 	.vfs_root =		ufs_root,
94 	.vfs_statfs =		ffs_statfs,
95 	.vfs_sync =		ffs_sync,
96 	.vfs_uninit =		ffs_uninit,
97 	.vfs_unmount =		ffs_unmount,
98 	.vfs_vget =		ffs_vget,
99 	.vfs_vptofh =		ffs_vptofh,
100 };
101 
102 VFS_SET(ufs_vfsops, ufs, 0);
103 
104 static b_strategy_t ffs_geom_strategy;
105 static b_write_t ffs_bufwrite;
106 
107 static struct buf_ops ffs_ops = {
108 	.bop_name =	"FFS",
109 	.bop_write =	ffs_bufwrite,
110 	.bop_strategy =	ffs_geom_strategy,
111 	.bop_sync =	bufsync,
112 };
113 
114 static const char *ffs_opts[] = { "from", "export", NULL };
115 
116 static int
117 ffs_mount(struct mount *mp, struct thread *td)
118 {
119 	struct vnode *devvp;
120 	struct ufsmount *ump = 0;
121 	struct fs *fs;
122 	int error, flags;
123 	mode_t accessmode;
124 	struct nameidata ndp;
125 	struct export_args export;
126 	char *fspec;
127 
128 	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
129 		return (EINVAL);
130 	if (uma_inode == NULL) {
131 		uma_inode = uma_zcreate("FFS inode",
132 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
133 		    UMA_ALIGN_PTR, 0);
134 		uma_ufs1 = uma_zcreate("FFS1 dinode",
135 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
136 		    UMA_ALIGN_PTR, 0);
137 		uma_ufs2 = uma_zcreate("FFS2 dinode",
138 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
139 		    UMA_ALIGN_PTR, 0);
140 	}
141 
142 	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
143 	if (error)
144 		return (error);
145 
146 	/*
147 	 * If updating, check whether changing from read-only to
148 	 * read/write; if there is no device name, that's all we do.
149 	 */
150 	if (mp->mnt_flag & MNT_UPDATE) {
151 		ump = VFSTOUFS(mp);
152 		fs = ump->um_fs;
153 		devvp = ump->um_devvp;
154 		if (fs->fs_ronly == 0 &&
155 		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
156 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
157 				return (error);
158 			/*
159 			 * Flush any dirty data.
160 			 */
161 			if ((error = ffs_sync(mp, MNT_WAIT, td)) != 0) {
162 				vn_finished_write(mp);
163 				return (error);
164 			}
165 			/*
166 			 * Check for and optionally get rid of files open
167 			 * for writing.
168 			 */
169 			flags = WRITECLOSE;
170 			if (mp->mnt_flag & MNT_FORCE)
171 				flags |= FORCECLOSE;
172 			if (mp->mnt_flag & MNT_SOFTDEP) {
173 				error = softdep_flushfiles(mp, flags, td);
174 			} else {
175 				error = ffs_flushfiles(mp, flags, td);
176 			}
177 			if (error) {
178 				vn_finished_write(mp);
179 				return (error);
180 			}
181 			if (fs->fs_pendingblocks != 0 ||
182 			    fs->fs_pendinginodes != 0) {
183 				printf("%s: %s: blocks %jd files %d\n",
184 				    fs->fs_fsmnt, "update error",
185 				    (intmax_t)fs->fs_pendingblocks,
186 				    fs->fs_pendinginodes);
187 				fs->fs_pendingblocks = 0;
188 				fs->fs_pendinginodes = 0;
189 			}
190 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
191 				fs->fs_clean = 1;
192 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
193 				fs->fs_ronly = 0;
194 				fs->fs_clean = 0;
195 				vn_finished_write(mp);
196 				return (error);
197 			}
198 			vn_finished_write(mp);
199 			DROP_GIANT();
200 			g_topology_lock();
201 			g_access(ump->um_cp, 0, -1, 0);
202 			g_topology_unlock();
203 			PICKUP_GIANT();
204 			fs->fs_ronly = 1;
205 			mp->mnt_flag |= MNT_RDONLY;
206 		}
207 		if ((mp->mnt_flag & MNT_RELOAD) &&
208 		    (error = ffs_reload(mp, td)) != 0)
209 			return (error);
210 		if (fs->fs_ronly &&
211 		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
212 			/*
213 			 * If upgrade to read-write by non-root, then verify
214 			 * that user has necessary permissions on the device.
215 			 */
216 			if (suser(td)) {
217 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
218 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
219 				    td->td_ucred, td)) != 0) {
220 					VOP_UNLOCK(devvp, 0, td);
221 					return (error);
222 				}
223 				VOP_UNLOCK(devvp, 0, td);
224 			}
225 			fs->fs_flags &= ~FS_UNCLEAN;
226 			if (fs->fs_clean == 0) {
227 				fs->fs_flags |= FS_UNCLEAN;
228 				if ((mp->mnt_flag & MNT_FORCE) ||
229 				    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
230 				     (fs->fs_flags & FS_DOSOFTDEP))) {
231 					printf("WARNING: %s was not %s\n",
232 					   fs->fs_fsmnt, "properly dismounted");
233 				} else {
234 					printf(
235 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
236 					    fs->fs_fsmnt);
237 					return (EPERM);
238 				}
239 			}
240 			DROP_GIANT();
241 			g_topology_lock();
242 			/*
243 			 * If we're the root device, we may not have an E count
244 			 * yet, get it now.
245 			 */
246 			if (ump->um_cp->ace == 0)
247 				error = g_access(ump->um_cp, 0, 1, 1);
248 			else
249 				error = g_access(ump->um_cp, 0, 1, 0);
250 			g_topology_unlock();
251 			PICKUP_GIANT();
252 			if (error)
253 				return (error);
254 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
255 				return (error);
256 			fs->fs_ronly = 0;
257 			mp->mnt_flag &= ~MNT_RDONLY;
258 			fs->fs_clean = 0;
259 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
260 				vn_finished_write(mp);
261 				return (error);
262 			}
263 			/* check to see if we need to start softdep */
264 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
265 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
266 				vn_finished_write(mp);
267 				return (error);
268 			}
269 			if (fs->fs_snapinum[0] != 0)
270 				ffs_snapshot_mount(mp);
271 			vn_finished_write(mp);
272 		}
273 		/*
274 		 * Soft updates is incompatible with "async",
275 		 * so if we are doing softupdates stop the user
276 		 * from setting the async flag in an update.
277 		 * Softdep_mount() clears it in an initial mount
278 		 * or ro->rw remount.
279 		 */
280 		if (mp->mnt_flag & MNT_SOFTDEP)
281 			mp->mnt_flag &= ~MNT_ASYNC;
282 		/*
283 		 * Keep MNT_ACLS flag if it is stored in superblock.
284 		 */
285 		if ((fs->fs_flags & FS_ACLS) != 0)
286 			mp->mnt_flag |= MNT_ACLS;
287 		/*
288 		 * If not updating name, process export requests.
289 		 */
290 		error = vfs_copyopt(mp->mnt_optnew, "export", &export, sizeof export);
291 		if (error == 0 && export.ex_flags != 0)
292 			return (vfs_export(mp, &export));
293 		/*
294 		 * If this is a snapshot request, take the snapshot.
295 		 */
296 		if (mp->mnt_flag & MNT_SNAPSHOT)
297 			return (ffs_snapshot(mp, fspec));
298 	}
299 
300 	/*
301 	 * Not an update, or updating the name: look up the name
302 	 * and verify that it refers to a sensible disk device.
303 	 */
304 	NDINIT(&ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td);
305 	if ((error = namei(&ndp)) != 0)
306 		return (error);
307 	NDFREE(&ndp, NDF_ONLY_PNBUF);
308 	devvp = ndp.ni_vp;
309 	if (!vn_isdisk(devvp, &error)) {
310 		vrele(devvp);
311 		return (error);
312 	}
313 
314 	/*
315 	 * If mount by non-root, then verify that user has necessary
316 	 * permissions on the device.
317 	 */
318 	if (suser(td)) {
319 		accessmode = VREAD;
320 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
321 			accessmode |= VWRITE;
322 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
323 		if ((error = VOP_ACCESS(devvp, accessmode, td->td_ucred, td))!= 0){
324 			vput(devvp);
325 			return (error);
326 		}
327 		VOP_UNLOCK(devvp, 0, td);
328 	}
329 
330 	if (mp->mnt_flag & MNT_UPDATE) {
331 		/*
332 		 * Update only
333 		 *
334 		 * If it's not the same vnode, or at least the same device
335 		 * then it's not correct.
336 		 */
337 
338 		if (devvp->v_rdev != ump->um_devvp->v_rdev)
339 			error = EINVAL;	/* needs translation */
340 		vrele(devvp);
341 		if (error)
342 			return (error);
343 	} else {
344 		/*
345 		 * New mount
346 		 *
347 		 * We need the name for the mount point (also used for
348 		 * "last mounted on") copied in. If an error occurs,
349 		 * the mount point is discarded by the upper level code.
350 		 * Note that vfs_mount() populates f_mntonname for us.
351 		 */
352 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
353 			vrele(devvp);
354 			return (error);
355 		}
356 	}
357 	vfs_mountedfrom(mp, fspec);
358 	return (0);
359 }
360 
361 /*
362  * Compatibility with old mount system call.
363  */
364 
365 static int
366 ffs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td)
367 {
368 	struct ufs_args args;
369 	int error;
370 
371 	if (data == NULL)
372 		return (EINVAL);
373 	error = copyin(data, &args, sizeof args);
374 	if (error)
375 		return (error);
376 
377 	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
378 	ma = mount_arg(ma, "export", &args.export, sizeof args.export);
379 	error = kernel_mount(ma, flags);
380 
381 	return (error);
382 }
383 
384 /*
385  * Reload all incore data for a filesystem (used after running fsck on
386  * the root filesystem and finding things to fix). The filesystem must
387  * be mounted read-only.
388  *
389  * Things to do to update the mount:
390  *	1) invalidate all cached meta-data.
391  *	2) re-read superblock from disk.
392  *	3) re-read summary information from disk.
393  *	4) invalidate all inactive vnodes.
394  *	5) invalidate all cached file data.
395  *	6) re-read inode data for all active vnodes.
396  */
397 static int
398 ffs_reload(struct mount *mp, struct thread *td)
399 {
400 	struct vnode *vp, *nvp, *devvp;
401 	struct inode *ip;
402 	void *space;
403 	struct buf *bp;
404 	struct fs *fs, *newfs;
405 	struct ufsmount *ump;
406 	ufs2_daddr_t sblockloc;
407 	int i, blks, size, error;
408 	int32_t *lp;
409 
410 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
411 		return (EINVAL);
412 	ump = VFSTOUFS(mp);
413 	/*
414 	 * Step 1: invalidate all cached meta-data.
415 	 */
416 	devvp = VFSTOUFS(mp)->um_devvp;
417 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, td);
418 	if (vinvalbuf(devvp, 0, td, 0, 0) != 0)
419 		panic("ffs_reload: dirty1");
420 	VOP_UNLOCK(devvp, 0, td);
421 
422 	/*
423 	 * Step 2: re-read superblock from disk.
424 	 */
425 	fs = VFSTOUFS(mp)->um_fs;
426 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
427 	    NOCRED, &bp)) != 0)
428 		return (error);
429 	newfs = (struct fs *)bp->b_data;
430 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
431 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
432 	    newfs->fs_bsize > MAXBSIZE ||
433 	    newfs->fs_bsize < sizeof(struct fs)) {
434 			brelse(bp);
435 			return (EIO);		/* XXX needs translation */
436 	}
437 	/*
438 	 * Copy pointer fields back into superblock before copying in	XXX
439 	 * new superblock. These should really be in the ufsmount.	XXX
440 	 * Note that important parameters (eg fs_ncg) are unchanged.
441 	 */
442 	newfs->fs_csp = fs->fs_csp;
443 	newfs->fs_maxcluster = fs->fs_maxcluster;
444 	newfs->fs_contigdirs = fs->fs_contigdirs;
445 	newfs->fs_active = fs->fs_active;
446 	/* The file system is still read-only. */
447 	newfs->fs_ronly = 1;
448 	sblockloc = fs->fs_sblockloc;
449 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
450 	brelse(bp);
451 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
452 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
453 	UFS_LOCK(ump);
454 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
455 		printf("%s: reload pending error: blocks %jd files %d\n",
456 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
457 		    fs->fs_pendinginodes);
458 		fs->fs_pendingblocks = 0;
459 		fs->fs_pendinginodes = 0;
460 	}
461 	UFS_UNLOCK(ump);
462 
463 	/*
464 	 * Step 3: re-read summary information from disk.
465 	 */
466 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
467 	space = fs->fs_csp;
468 	for (i = 0; i < blks; i += fs->fs_frag) {
469 		size = fs->fs_bsize;
470 		if (i + fs->fs_frag > blks)
471 			size = (blks - i) * fs->fs_fsize;
472 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
473 		    NOCRED, &bp);
474 		if (error)
475 			return (error);
476 		bcopy(bp->b_data, space, (u_int)size);
477 		space = (char *)space + size;
478 		brelse(bp);
479 	}
480 	/*
481 	 * We no longer know anything about clusters per cylinder group.
482 	 */
483 	if (fs->fs_contigsumsize > 0) {
484 		lp = fs->fs_maxcluster;
485 		for (i = 0; i < fs->fs_ncg; i++)
486 			*lp++ = fs->fs_contigsumsize;
487 	}
488 
489 loop:
490 	MNT_ILOCK(mp);
491 	MNT_VNODE_FOREACH(vp, mp, nvp) {
492 		VI_LOCK(vp);
493 		if (vp->v_iflag & VI_XLOCK) {
494 			VI_UNLOCK(vp);
495 			continue;
496 		}
497 		MNT_IUNLOCK(mp);
498 		/*
499 		 * Step 4: invalidate all inactive vnodes.
500 		 */
501 		if (vp->v_usecount == 0) {
502 			vgonel(vp, td);
503 			goto loop;
504 		}
505 		/*
506 		 * Step 5: invalidate all cached file data.
507 		 */
508 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
509 			goto loop;
510 		}
511 		if (vinvalbuf(vp, 0, td, 0, 0))
512 			panic("ffs_reload: dirty2");
513 		/*
514 		 * Step 6: re-read inode data for all active vnodes.
515 		 */
516 		ip = VTOI(vp);
517 		error =
518 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
519 		    (int)fs->fs_bsize, NOCRED, &bp);
520 		if (error) {
521 			VOP_UNLOCK(vp, 0, td);
522 			vrele(vp);
523 			return (error);
524 		}
525 		ffs_load_inode(bp, ip, fs, ip->i_number);
526 		ip->i_effnlink = ip->i_nlink;
527 		brelse(bp);
528 		VOP_UNLOCK(vp, 0, td);
529 		vrele(vp);
530 		MNT_ILOCK(mp);
531 	}
532 	MNT_IUNLOCK(mp);
533 	return (0);
534 }
535 
536 /*
537  * Possible superblock locations ordered from most to least likely.
538  */
539 static int sblock_try[] = SBLOCKSEARCH;
540 
541 /*
542  * Common code for mount and mountroot
543  */
544 static int
545 ffs_mountfs(devvp, mp, td)
546 	struct vnode *devvp;
547 	struct mount *mp;
548 	struct thread *td;
549 {
550 	struct ufsmount *ump;
551 	struct buf *bp;
552 	struct fs *fs;
553 	struct cdev *dev;
554 	void *space;
555 	ufs2_daddr_t sblockloc;
556 	int error, i, blks, size, ronly;
557 	int32_t *lp;
558 	struct ucred *cred;
559 	struct g_consumer *cp;
560 
561 	dev = devvp->v_rdev;
562 	cred = td ? td->td_ucred : NOCRED;
563 
564 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
565 	DROP_GIANT();
566 	g_topology_lock();
567 	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
568 
569 	/*
570 	 * If we are a root mount, drop the E flag so fsck can do its magic.
571 	 * We will pick it up again when we remount R/W.
572 	 */
573 	if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS))
574 		error = g_access(cp, 0, 0, -1);
575 	g_topology_unlock();
576 	PICKUP_GIANT();
577 	VOP_UNLOCK(devvp, 0, td);
578 	if (error)
579 		return (error);
580 	if (devvp->v_rdev->si_iosize_max != 0)
581 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
582 	if (mp->mnt_iosize_max > MAXPHYS)
583 		mp->mnt_iosize_max = MAXPHYS;
584 
585 	devvp->v_bufobj.bo_private = cp;
586 	devvp->v_bufobj.bo_ops = &ffs_ops;
587 
588 	bp = NULL;
589 	ump = NULL;
590 	fs = NULL;
591 	sblockloc = 0;
592 	/*
593 	 * Try reading the superblock in each of its possible locations.
594 	 */
595 	for (i = 0; sblock_try[i] != -1; i++) {
596 		if ((error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
597 		    cred, &bp)) != 0)
598 			goto out;
599 		fs = (struct fs *)bp->b_data;
600 		sblockloc = sblock_try[i];
601 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
602 		     (fs->fs_magic == FS_UFS2_MAGIC &&
603 		      (fs->fs_sblockloc == sblockloc ||
604 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
605 		    fs->fs_bsize <= MAXBSIZE &&
606 		    fs->fs_bsize >= sizeof(struct fs))
607 			break;
608 		brelse(bp);
609 		bp = NULL;
610 	}
611 	if (sblock_try[i] == -1) {
612 		error = EINVAL;		/* XXX needs translation */
613 		goto out;
614 	}
615 	fs->fs_fmod = 0;
616 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
617 	fs->fs_flags &= ~FS_UNCLEAN;
618 	if (fs->fs_clean == 0) {
619 		fs->fs_flags |= FS_UNCLEAN;
620 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
621 		    ((fs->fs_flags & FS_NEEDSFSCK) == 0 &&
622 		     (fs->fs_flags & FS_DOSOFTDEP))) {
623 			printf(
624 "WARNING: %s was not properly dismounted\n",
625 			    fs->fs_fsmnt);
626 		} else {
627 			printf(
628 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
629 			    fs->fs_fsmnt);
630 			error = EPERM;
631 			goto out;
632 		}
633 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
634 		    (mp->mnt_flag & MNT_FORCE)) {
635 			printf("%s: lost blocks %jd files %d\n", fs->fs_fsmnt,
636 			    (intmax_t)fs->fs_pendingblocks,
637 			    fs->fs_pendinginodes);
638 			fs->fs_pendingblocks = 0;
639 			fs->fs_pendinginodes = 0;
640 		}
641 	}
642 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
643 		printf("%s: mount pending error: blocks %jd files %d\n",
644 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
645 		    fs->fs_pendinginodes);
646 		fs->fs_pendingblocks = 0;
647 		fs->fs_pendinginodes = 0;
648 	}
649 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
650 	ump->um_cp = cp;
651 	ump->um_bo = &devvp->v_bufobj;
652 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
653 	if (fs->fs_magic == FS_UFS1_MAGIC) {
654 		ump->um_fstype = UFS1;
655 		ump->um_balloc = ffs_balloc_ufs1;
656 	} else {
657 		ump->um_fstype = UFS2;
658 		ump->um_balloc = ffs_balloc_ufs2;
659 	}
660 	ump->um_blkatoff = ffs_blkatoff;
661 	ump->um_truncate = ffs_truncate;
662 	ump->um_update = ffs_update;
663 	ump->um_valloc = ffs_valloc;
664 	ump->um_vfree = ffs_vfree;
665 	ump->um_ifree = ffs_ifree;
666 	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
667 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
668 	if (fs->fs_sbsize < SBLOCKSIZE)
669 		bp->b_flags |= B_INVAL | B_NOCACHE;
670 	brelse(bp);
671 	bp = NULL;
672 	fs = ump->um_fs;
673 	ffs_oldfscompat_read(fs, ump, sblockloc);
674 	fs->fs_ronly = ronly;
675 	size = fs->fs_cssize;
676 	blks = howmany(size, fs->fs_fsize);
677 	if (fs->fs_contigsumsize > 0)
678 		size += fs->fs_ncg * sizeof(int32_t);
679 	size += fs->fs_ncg * sizeof(u_int8_t);
680 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
681 	fs->fs_csp = space;
682 	for (i = 0; i < blks; i += fs->fs_frag) {
683 		size = fs->fs_bsize;
684 		if (i + fs->fs_frag > blks)
685 			size = (blks - i) * fs->fs_fsize;
686 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
687 		    cred, &bp)) != 0) {
688 			free(fs->fs_csp, M_UFSMNT);
689 			goto out;
690 		}
691 		bcopy(bp->b_data, space, (u_int)size);
692 		space = (char *)space + size;
693 		brelse(bp);
694 		bp = NULL;
695 	}
696 	if (fs->fs_contigsumsize > 0) {
697 		fs->fs_maxcluster = lp = space;
698 		for (i = 0; i < fs->fs_ncg; i++)
699 			*lp++ = fs->fs_contigsumsize;
700 		space = lp;
701 	}
702 	size = fs->fs_ncg * sizeof(u_int8_t);
703 	fs->fs_contigdirs = (u_int8_t *)space;
704 	bzero(fs->fs_contigdirs, size);
705 	fs->fs_active = NULL;
706 	mp->mnt_data = (qaddr_t)ump;
707 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
708 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
709 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
710 	    vfs_getvfs(&mp->mnt_stat.f_fsid))
711 		vfs_getnewfsid(mp);
712 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
713 	mp->mnt_flag |= MNT_LOCAL;
714 	if ((fs->fs_flags & FS_MULTILABEL) != 0)
715 #ifdef MAC
716 		mp->mnt_flag |= MNT_MULTILABEL;
717 #else
718 		printf(
719 "WARNING: %s: multilabel flag on fs but no MAC support\n",
720 		    fs->fs_fsmnt);
721 #endif
722 	if ((fs->fs_flags & FS_ACLS) != 0)
723 #ifdef UFS_ACL
724 		mp->mnt_flag |= MNT_ACLS;
725 #else
726 		printf(
727 "WARNING: %s: ACLs flag on fs but no ACLs support\n",
728 		    fs->fs_fsmnt);
729 #endif
730 	ump->um_mountp = mp;
731 	ump->um_dev = dev;
732 	ump->um_devvp = devvp;
733 	ump->um_nindir = fs->fs_nindir;
734 	ump->um_bptrtodb = fs->fs_fsbtodb;
735 	ump->um_seqinc = fs->fs_frag;
736 	for (i = 0; i < MAXQUOTAS; i++)
737 		ump->um_quotas[i] = NULLVP;
738 #ifdef UFS_EXTATTR
739 	ufs_extattr_uepm_init(&ump->um_extattr);
740 #endif
741 	/*
742 	 * Set FS local "last mounted on" information (NULL pad)
743 	 */
744 	vfs_mountedfrom(mp, fs->fs_fsmnt);
745 
746 	if( mp->mnt_flag & MNT_ROOTFS) {
747 		/*
748 		 * Root mount; update timestamp in mount structure.
749 		 * this will be used by the common root mount code
750 		 * to update the system clock.
751 		 */
752 		mp->mnt_time = fs->fs_time;
753 	}
754 
755 	if (ronly == 0) {
756 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
757 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
758 			free(fs->fs_csp, M_UFSMNT);
759 			goto out;
760 		}
761 		if (fs->fs_snapinum[0] != 0)
762 			ffs_snapshot_mount(mp);
763 		fs->fs_fmod = 1;
764 		fs->fs_clean = 0;
765 		(void) ffs_sbupdate(ump, MNT_WAIT);
766 	}
767 	/*
768 	 * Initialize filesystem stat information in mount struct.
769 	 */
770 #ifdef UFS_EXTATTR
771 #ifdef UFS_EXTATTR_AUTOSTART
772 	/*
773 	 *
774 	 * Auto-starting does the following:
775 	 *	- check for /.attribute in the fs, and extattr_start if so
776 	 *	- for each file in .attribute, enable that file with
777 	 * 	  an attribute of the same name.
778 	 * Not clear how to report errors -- probably eat them.
779 	 * This would all happen while the filesystem was busy/not
780 	 * available, so would effectively be "atomic".
781 	 */
782 	(void) ufs_extattr_autostart(mp, td);
783 #endif /* !UFS_EXTATTR_AUTOSTART */
784 #endif /* !UFS_EXTATTR */
785 #ifndef QUOTA
786 	mp->mnt_kern_flag |= MNTK_MPSAFE;
787 #endif
788 	return (0);
789 out:
790 	if (bp)
791 		brelse(bp);
792 	vinvalbuf(devvp, V_SAVE, td, 0, 0);
793 	if (cp != NULL) {
794 		DROP_GIANT();
795 		g_topology_lock();
796 		g_vfs_close(cp, td);
797 		g_topology_unlock();
798 		PICKUP_GIANT();
799 	}
800 	if (ump) {
801 		mtx_destroy(UFS_MTX(ump));
802 		free(ump->um_fs, M_UFSMNT);
803 		free(ump, M_UFSMNT);
804 		mp->mnt_data = (qaddr_t)0;
805 	}
806 	return (error);
807 }
808 
809 #include <sys/sysctl.h>
810 int bigcgs = 0;
811 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
812 
813 /*
814  * Sanity checks for loading old filesystem superblocks.
815  * See ffs_oldfscompat_write below for unwound actions.
816  *
817  * XXX - Parts get retired eventually.
818  * Unfortunately new bits get added.
819  */
820 static void
821 ffs_oldfscompat_read(fs, ump, sblockloc)
822 	struct fs *fs;
823 	struct ufsmount *ump;
824 	ufs2_daddr_t sblockloc;
825 {
826 	off_t maxfilesize;
827 
828 	/*
829 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
830 	 */
831 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
832 		fs->fs_flags = fs->fs_old_flags;
833 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
834 		fs->fs_sblockloc = sblockloc;
835 	}
836 	/*
837 	 * If not yet done, update UFS1 superblock with new wider fields.
838 	 */
839 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
840 		fs->fs_maxbsize = fs->fs_bsize;
841 		fs->fs_time = fs->fs_old_time;
842 		fs->fs_size = fs->fs_old_size;
843 		fs->fs_dsize = fs->fs_old_dsize;
844 		fs->fs_csaddr = fs->fs_old_csaddr;
845 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
846 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
847 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
848 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
849 	}
850 	if (fs->fs_magic == FS_UFS1_MAGIC &&
851 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
852 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
853 		fs->fs_qbmask = ~fs->fs_bmask;
854 		fs->fs_qfmask = ~fs->fs_fmask;
855 	}
856 	if (fs->fs_magic == FS_UFS1_MAGIC) {
857 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
858 		maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;
859 		if (fs->fs_maxfilesize > maxfilesize)
860 			fs->fs_maxfilesize = maxfilesize;
861 	}
862 	/* Compatibility for old filesystems */
863 	if (fs->fs_avgfilesize <= 0)
864 		fs->fs_avgfilesize = AVFILESIZ;
865 	if (fs->fs_avgfpdir <= 0)
866 		fs->fs_avgfpdir = AFPDIR;
867 	if (bigcgs) {
868 		fs->fs_save_cgsize = fs->fs_cgsize;
869 		fs->fs_cgsize = fs->fs_bsize;
870 	}
871 }
872 
873 /*
874  * Unwinding superblock updates for old filesystems.
875  * See ffs_oldfscompat_read above for details.
876  *
877  * XXX - Parts get retired eventually.
878  * Unfortunately new bits get added.
879  */
880 static void
881 ffs_oldfscompat_write(fs, ump)
882 	struct fs *fs;
883 	struct ufsmount *ump;
884 {
885 
886 	/*
887 	 * Copy back UFS2 updated fields that UFS1 inspects.
888 	 */
889 	if (fs->fs_magic == FS_UFS1_MAGIC) {
890 		fs->fs_old_time = fs->fs_time;
891 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
892 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
893 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
894 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
895 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
896 	}
897 	if (bigcgs) {
898 		fs->fs_cgsize = fs->fs_save_cgsize;
899 		fs->fs_save_cgsize = 0;
900 	}
901 }
902 
903 /*
904  * unmount system call
905  */
906 int
907 ffs_unmount(mp, mntflags, td)
908 	struct mount *mp;
909 	int mntflags;
910 	struct thread *td;
911 {
912 	struct ufsmount *ump = VFSTOUFS(mp);
913 	struct fs *fs;
914 	int error, flags;
915 
916 	flags = 0;
917 	if (mntflags & MNT_FORCE) {
918 		flags |= FORCECLOSE;
919 	}
920 #ifdef UFS_EXTATTR
921 	if ((error = ufs_extattr_stop(mp, td))) {
922 		if (error != EOPNOTSUPP)
923 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
924 			    error);
925 	} else {
926 		ufs_extattr_uepm_destroy(&ump->um_extattr);
927 	}
928 #endif
929 	if (mp->mnt_flag & MNT_SOFTDEP) {
930 		if ((error = softdep_flushfiles(mp, flags, td)) != 0)
931 			return (error);
932 	} else {
933 		if ((error = ffs_flushfiles(mp, flags, td)) != 0)
934 			return (error);
935 	}
936 	fs = ump->um_fs;
937 	UFS_LOCK(ump);
938 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
939 		printf("%s: unmount pending error: blocks %jd files %d\n",
940 		    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
941 		    fs->fs_pendinginodes);
942 		fs->fs_pendingblocks = 0;
943 		fs->fs_pendinginodes = 0;
944 	}
945 	UFS_UNLOCK(ump);
946 	if (fs->fs_ronly == 0) {
947 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
948 		error = ffs_sbupdate(ump, MNT_WAIT);
949 		if (error) {
950 			fs->fs_clean = 0;
951 			return (error);
952 		}
953 	}
954 	vinvalbuf(ump->um_devvp, V_SAVE, td, 0, 0);
955 	DROP_GIANT();
956 	g_topology_lock();
957 	g_vfs_close(ump->um_cp, td);
958 	g_topology_unlock();
959 	PICKUP_GIANT();
960 	vrele(ump->um_devvp);
961 	mtx_destroy(UFS_MTX(ump));
962 	free(fs->fs_csp, M_UFSMNT);
963 	free(fs, M_UFSMNT);
964 	free(ump, M_UFSMNT);
965 	mp->mnt_data = (qaddr_t)0;
966 	mp->mnt_flag &= ~MNT_LOCAL;
967 	return (error);
968 }
969 
970 /*
971  * Flush out all the files in a filesystem.
972  */
973 int
974 ffs_flushfiles(mp, flags, td)
975 	struct mount *mp;
976 	int flags;
977 	struct thread *td;
978 {
979 	struct ufsmount *ump;
980 	int error;
981 
982 	ump = VFSTOUFS(mp);
983 #ifdef QUOTA
984 	if (mp->mnt_flag & MNT_QUOTA) {
985 		int i;
986 		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
987 		if (error)
988 			return (error);
989 		for (i = 0; i < MAXQUOTAS; i++) {
990 			if (ump->um_quotas[i] == NULLVP)
991 				continue;
992 			quotaoff(td, mp, i);
993 		}
994 		/*
995 		 * Here we fall through to vflush again to ensure
996 		 * that we have gotten rid of all the system vnodes.
997 		 */
998 	}
999 #endif
1000 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1001 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1002 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1003 			return (error);
1004 		ffs_snapshot_unmount(mp);
1005 		/*
1006 		 * Here we fall through to vflush again to ensure
1007 		 * that we have gotten rid of all the system vnodes.
1008 		 */
1009 	}
1010         /*
1011 	 * Flush all the files.
1012 	 */
1013 	if ((error = vflush(mp, 0, flags, td)) != 0)
1014 		return (error);
1015 	/*
1016 	 * Flush filesystem metadata.
1017 	 */
1018 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, td);
1019 	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1020 	VOP_UNLOCK(ump->um_devvp, 0, td);
1021 	return (error);
1022 }
1023 
1024 /*
1025  * Get filesystem statistics.
1026  */
1027 int
1028 ffs_statfs(mp, sbp, td)
1029 	struct mount *mp;
1030 	struct statfs *sbp;
1031 	struct thread *td;
1032 {
1033 	struct ufsmount *ump;
1034 	struct fs *fs;
1035 
1036 	ump = VFSTOUFS(mp);
1037 	fs = ump->um_fs;
1038 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1039 		panic("ffs_statfs");
1040 	sbp->f_version = STATFS_VERSION;
1041 	sbp->f_bsize = fs->fs_fsize;
1042 	sbp->f_iosize = fs->fs_bsize;
1043 	sbp->f_blocks = fs->fs_dsize;
1044 	UFS_LOCK(ump);
1045 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1046 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1047 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1048 	    dbtofsb(fs, fs->fs_pendingblocks);
1049 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1050 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1051 	UFS_UNLOCK(ump);
1052 	sbp->f_namemax = NAME_MAX;
1053 	return (0);
1054 }
1055 
1056 /*
1057  * Go through the disk queues to initiate sandbagged IO;
1058  * go through the inodes to write those that have been modified;
1059  * initiate the writing of the super block if it has been modified.
1060  *
1061  * Note: we are always called with the filesystem marked `MPBUSY'.
1062  */
1063 int
1064 ffs_sync(mp, waitfor, td)
1065 	struct mount *mp;
1066 	int waitfor;
1067 	struct thread *td;
1068 {
1069 	struct vnode *nvp, *vp, *devvp;
1070 	struct inode *ip;
1071 	struct ufsmount *ump = VFSTOUFS(mp);
1072 	struct fs *fs;
1073 	int error, count, wait, lockreq, allerror = 0;
1074 	struct bufobj *bo;
1075 
1076 	fs = ump->um_fs;
1077 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1078 		printf("fs = %s\n", fs->fs_fsmnt);
1079 		panic("ffs_sync: rofs mod");
1080 	}
1081 	/*
1082 	 * Write back each (modified) inode.
1083 	 */
1084 	wait = 0;
1085 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1086 	if (waitfor == MNT_WAIT) {
1087 		wait = 1;
1088 		lockreq = LK_EXCLUSIVE;
1089 	}
1090 	lockreq |= LK_INTERLOCK;
1091 	MNT_ILOCK(mp);
1092 loop:
1093 	MNT_VNODE_FOREACH(vp, mp, nvp) {
1094 		/*
1095 		 * Depend on the mntvnode_slock to keep things stable enough
1096 		 * for a quick test.  Since there might be hundreds of
1097 		 * thousands of vnodes, we cannot afford even a subroutine
1098 		 * call unless there's a good chance that we have work to do.
1099 		 */
1100 		VI_LOCK(vp);
1101 		if (vp->v_iflag & VI_XLOCK) {
1102 			VI_UNLOCK(vp);
1103 			continue;
1104 		}
1105 		ip = VTOI(vp);
1106 		if (vp->v_type == VNON || ((ip->i_flag &
1107 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1108 		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1109 			VI_UNLOCK(vp);
1110 			continue;
1111 		}
1112 		MNT_IUNLOCK(mp);
1113 		if ((error = vget(vp, lockreq, td)) != 0) {
1114 			MNT_ILOCK(mp);
1115 			if (error == ENOENT)
1116 				goto loop;
1117 			continue;
1118 		}
1119 		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1120 			allerror = error;
1121 		VOP_UNLOCK(vp, 0, td);
1122 		vrele(vp);
1123 		MNT_ILOCK(mp);
1124 	}
1125 	MNT_IUNLOCK(mp);
1126 	/*
1127 	 * Force stale filesystem control information to be flushed.
1128 	 */
1129 	if (waitfor == MNT_WAIT) {
1130 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1131 			allerror = error;
1132 		/* Flushed work items may create new vnodes to clean */
1133 		if (allerror == 0 && count) {
1134 			MNT_ILOCK(mp);
1135 			goto loop;
1136 		}
1137 	}
1138 #ifdef QUOTA
1139 	qsync(mp);
1140 #endif
1141 	devvp = ump->um_devvp;
1142 	VI_LOCK(devvp);
1143 	bo = &devvp->v_bufobj;
1144 	if (waitfor != MNT_LAZY &&
1145 	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1146 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, td);
1147 		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1148 			allerror = error;
1149 		VOP_UNLOCK(devvp, 0, td);
1150 		if (allerror == 0 && waitfor == MNT_WAIT) {
1151 			MNT_ILOCK(mp);
1152 			goto loop;
1153 		}
1154 	} else
1155 		VI_UNLOCK(devvp);
1156 	/*
1157 	 * Write back modified superblock.
1158 	 */
1159 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1160 		allerror = error;
1161 	return (allerror);
1162 }
1163 
1164 int
1165 ffs_vget(mp, ino, flags, vpp)
1166 	struct mount *mp;
1167 	ino_t ino;
1168 	int flags;
1169 	struct vnode **vpp;
1170 {
1171 	struct thread *td = curthread; 		/* XXX */
1172 	struct fs *fs;
1173 	struct inode *ip;
1174 	struct ufsmount *ump;
1175 	struct buf *bp;
1176 	struct vnode *vp;
1177 	struct cdev *dev;
1178 	int error;
1179 
1180 	ump = VFSTOUFS(mp);
1181 	dev = ump->um_dev;
1182 	fs = ump->um_fs;
1183 
1184 	/*
1185 	 * We do not lock vnode creation as it is believed to be too
1186 	 * expensive for such rare case as simultaneous creation of vnode
1187 	 * for same ino by different processes. We just allow them to race
1188 	 * and check later to decide who wins. Let the race begin!
1189 	 */
1190 	if ((error = ufs_ihashget(dev, ino, flags, vpp)) != 0)
1191 		return (error);
1192 	if (*vpp != NULL)
1193 		return (0);
1194 
1195 	/*
1196 	 * If this MALLOC() is performed after the getnewvnode()
1197 	 * it might block, leaving a vnode with a NULL v_data to be
1198 	 * found by ffs_sync() if a sync happens to fire right then,
1199 	 * which will cause a panic because ffs_sync() blindly
1200 	 * dereferences vp->v_data (as well it should).
1201 	 */
1202 	ip = uma_zalloc(uma_inode, M_WAITOK);
1203 
1204 	/* Allocate a new vnode/inode. */
1205 	if (fs->fs_magic == FS_UFS1_MAGIC)
1206 		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1207 	else
1208 		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1209 	if (error) {
1210 		*vpp = NULL;
1211 		uma_zfree(uma_inode, ip);
1212 		return (error);
1213 	}
1214 	bzero((caddr_t)ip, sizeof(struct inode));
1215 	/*
1216 	 * FFS supports recursive locking.
1217 	 */
1218 	vp->v_vnlock->lk_flags |= LK_CANRECURSE;
1219 	vp->v_data = ip;
1220 	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1221 	ip->i_vnode = vp;
1222 	ip->i_ump = ump;
1223 	ip->i_fs = fs;
1224 	ip->i_dev = dev;
1225 	ip->i_number = ino;
1226 #ifdef QUOTA
1227 	{
1228 		int i;
1229 		for (i = 0; i < MAXQUOTAS; i++)
1230 			ip->i_dquot[i] = NODQUOT;
1231 	}
1232 #endif
1233 	/*
1234 	 * Exclusively lock the vnode before adding to hash. Note, that we
1235 	 * must not release nor downgrade the lock (despite flags argument
1236 	 * says) till it is fully initialized.
1237 	 */
1238 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, (struct mtx *)0, td);
1239 
1240 	/*
1241 	 * Atomicaly (in terms of ufs_hash operations) check the hash for
1242 	 * duplicate of vnode being created and add it to the hash. If a
1243 	 * duplicate vnode was found, it will be vget()ed from hash for us.
1244 	 */
1245 	if ((error = ufs_ihashins(ip, flags, vpp)) != 0) {
1246 		vput(vp);
1247 		*vpp = NULL;
1248 		return (error);
1249 	}
1250 
1251 	/* We lost the race, then throw away our vnode and return existing */
1252 	if (*vpp != NULL) {
1253 		vput(vp);
1254 		return (0);
1255 	}
1256 
1257 	/* Read in the disk contents for the inode, copy into the inode. */
1258 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1259 	    (int)fs->fs_bsize, NOCRED, &bp);
1260 	if (error) {
1261 		/*
1262 		 * The inode does not contain anything useful, so it would
1263 		 * be misleading to leave it on its hash chain. With mode
1264 		 * still zero, it will be unlinked and returned to the free
1265 		 * list by vput().
1266 		 */
1267 		brelse(bp);
1268 		vput(vp);
1269 		*vpp = NULL;
1270 		return (error);
1271 	}
1272 	if (ip->i_ump->um_fstype == UFS1)
1273 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1274 	else
1275 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1276 	ffs_load_inode(bp, ip, fs, ino);
1277 	if (DOINGSOFTDEP(vp))
1278 		softdep_load_inodeblock(ip);
1279 	else
1280 		ip->i_effnlink = ip->i_nlink;
1281 	bqrelse(bp);
1282 
1283 	/*
1284 	 * Initialize the vnode from the inode, check for aliases.
1285 	 * Note that the underlying vnode may have changed.
1286 	 */
1287 	if (ip->i_ump->um_fstype == UFS1)
1288 		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1289 	else
1290 		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1291 	if (error) {
1292 		vput(vp);
1293 		*vpp = NULL;
1294 		return (error);
1295 	}
1296 	/*
1297 	 * Finish inode initialization.
1298 	 */
1299 	VREF(ip->i_devvp);
1300 	/*
1301 	 * Set up a generation number for this inode if it does not
1302 	 * already have one. This should only happen on old filesystems.
1303 	 */
1304 	if (ip->i_gen == 0) {
1305 		ip->i_gen = arc4random() / 2 + 1;
1306 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1307 			ip->i_flag |= IN_MODIFIED;
1308 			DIP_SET(ip, i_gen, ip->i_gen);
1309 		}
1310 	}
1311 	/*
1312 	 * Ensure that uid and gid are correct. This is a temporary
1313 	 * fix until fsck has been changed to do the update.
1314 	 */
1315 	if (fs->fs_magic == FS_UFS1_MAGIC &&		/* XXX */
1316 	    fs->fs_old_inodefmt < FS_44INODEFMT) {	/* XXX */
1317 		ip->i_uid = ip->i_din1->di_ouid;	/* XXX */
1318 		ip->i_gid = ip->i_din1->di_ogid;	/* XXX */
1319 	}						/* XXX */
1320 
1321 #ifdef MAC
1322 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1323 		/*
1324 		 * If this vnode is already allocated, and we're running
1325 		 * multi-label, attempt to perform a label association
1326 		 * from the extended attributes on the inode.
1327 		 */
1328 		error = mac_associate_vnode_extattr(mp, vp);
1329 		if (error) {
1330 			/* ufs_inactive will release ip->i_devvp ref. */
1331 			vput(vp);
1332 			*vpp = NULL;
1333 			return (error);
1334 		}
1335 	}
1336 #endif
1337 
1338 	*vpp = vp;
1339 	return (0);
1340 }
1341 
1342 /*
1343  * File handle to vnode
1344  *
1345  * Have to be really careful about stale file handles:
1346  * - check that the inode number is valid
1347  * - call ffs_vget() to get the locked inode
1348  * - check for an unallocated inode (i_mode == 0)
1349  * - check that the given client host has export rights and return
1350  *   those rights via. exflagsp and credanonp
1351  */
1352 int
1353 ffs_fhtovp(mp, fhp, vpp)
1354 	struct mount *mp;
1355 	struct fid *fhp;
1356 	struct vnode **vpp;
1357 {
1358 	struct ufid *ufhp;
1359 	struct fs *fs;
1360 
1361 	ufhp = (struct ufid *)fhp;
1362 	fs = VFSTOUFS(mp)->um_fs;
1363 	if (ufhp->ufid_ino < ROOTINO ||
1364 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1365 		return (ESTALE);
1366 	return (ufs_fhtovp(mp, ufhp, vpp));
1367 }
1368 
1369 /*
1370  * Vnode pointer to File handle
1371  */
1372 /* ARGSUSED */
1373 int
1374 ffs_vptofh(vp, fhp)
1375 	struct vnode *vp;
1376 	struct fid *fhp;
1377 {
1378 	struct inode *ip;
1379 	struct ufid *ufhp;
1380 
1381 	ip = VTOI(vp);
1382 	ufhp = (struct ufid *)fhp;
1383 	ufhp->ufid_len = sizeof(struct ufid);
1384 	ufhp->ufid_ino = ip->i_number;
1385 	ufhp->ufid_gen = ip->i_gen;
1386 	return (0);
1387 }
1388 
1389 /*
1390  * Initialize the filesystem.
1391  */
1392 static int
1393 ffs_init(vfsp)
1394 	struct vfsconf *vfsp;
1395 {
1396 
1397 	softdep_initialize();
1398 	return (ufs_init(vfsp));
1399 }
1400 
1401 /*
1402  * Undo the work of ffs_init().
1403  */
1404 static int
1405 ffs_uninit(vfsp)
1406 	struct vfsconf *vfsp;
1407 {
1408 	int ret;
1409 
1410 	ret = ufs_uninit(vfsp);
1411 	softdep_uninitialize();
1412 	return (ret);
1413 }
1414 
1415 /*
1416  * Write a superblock and associated information back to disk.
1417  */
1418 static int
1419 ffs_sbupdate(mp, waitfor)
1420 	struct ufsmount *mp;
1421 	int waitfor;
1422 {
1423 	struct fs *fs = mp->um_fs;
1424 	struct buf *sbbp;
1425 	struct buf *bp;
1426 	int blks;
1427 	void *space;
1428 	int i, size, error, allerror = 0;
1429 
1430 	if (fs->fs_ronly == 1 &&
1431 	    (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1432 	    (MNT_RDONLY | MNT_UPDATE))
1433 		panic("ffs_sbupdate: write read-only filesystem");
1434 	/*
1435 	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1436 	 */
1437 	sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize,
1438 	    0, 0, 0);
1439 	/*
1440 	 * First write back the summary information.
1441 	 */
1442 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1443 	space = fs->fs_csp;
1444 	for (i = 0; i < blks; i += fs->fs_frag) {
1445 		size = fs->fs_bsize;
1446 		if (i + fs->fs_frag > blks)
1447 			size = (blks - i) * fs->fs_fsize;
1448 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1449 		    size, 0, 0, 0);
1450 		bcopy(space, bp->b_data, (u_int)size);
1451 		space = (char *)space + size;
1452 		if (waitfor != MNT_WAIT)
1453 			bawrite(bp);
1454 		else if ((error = bwrite(bp)) != 0)
1455 			allerror = error;
1456 	}
1457 	/*
1458 	 * Now write back the superblock itself. If any errors occurred
1459 	 * up to this point, then fail so that the superblock avoids
1460 	 * being written out as clean.
1461 	 */
1462 	if (allerror) {
1463 		brelse(sbbp);
1464 		return (allerror);
1465 	}
1466 	bp = sbbp;
1467 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1468 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1469 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1470 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1471 		fs->fs_sblockloc = SBLOCK_UFS1;
1472 	}
1473 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1474 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1475 		printf("%s: correcting fs_sblockloc from %jd to %d\n",
1476 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1477 		fs->fs_sblockloc = SBLOCK_UFS2;
1478 	}
1479 	fs->fs_fmod = 0;
1480 	fs->fs_time = time_second;
1481 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1482 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
1483 	if (waitfor != MNT_WAIT)
1484 		bawrite(bp);
1485 	else if ((error = bwrite(bp)) != 0)
1486 		allerror = error;
1487 	return (allerror);
1488 }
1489 
1490 static int
1491 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1492 	int attrnamespace, const char *attrname, struct thread *td)
1493 {
1494 
1495 #ifdef UFS_EXTATTR
1496 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1497 	    attrname, td));
1498 #else
1499 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1500 	    attrname, td));
1501 #endif
1502 }
1503 
1504 static void
1505 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1506 {
1507 
1508 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1509 		uma_zfree(uma_ufs1, ip->i_din1);
1510 	else if (ip->i_din2 != NULL)
1511 		uma_zfree(uma_ufs2, ip->i_din2);
1512 	uma_zfree(uma_inode, ip);
1513 }
1514 
1515 static int dobkgrdwrite = 1;
1516 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1517     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1518 
1519 /*
1520  * Complete a background write started from bwrite.
1521  */
1522 static void
1523 ffs_backgroundwritedone(struct buf *bp)
1524 {
1525 	struct buf *origbp;
1526 
1527 	/*
1528 	 * Find the original buffer that we are writing.
1529 	 */
1530 	BO_LOCK(bp->b_bufobj);
1531 	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1532 		panic("backgroundwritedone: lost buffer");
1533 	BO_UNLOCK(bp->b_bufobj);
1534 	/*
1535 	 * Process dependencies then return any unfinished ones.
1536 	 */
1537 	if (LIST_FIRST(&bp->b_dep) != NULL)
1538 		buf_complete(bp);
1539 #ifdef SOFTUPDATES
1540 	if (LIST_FIRST(&bp->b_dep) != NULL)
1541 		softdep_move_dependencies(bp, origbp);
1542 #endif
1543 
1544 	/*
1545 	 * This buffer is marked B_NOCACHE, so when it is released
1546 	 * by biodone, it will be tossed. We mark it with BIO_READ
1547 	 * to avoid biodone doing a second bufobj_wdrop.
1548 	 */
1549 	bp->b_flags |= B_NOCACHE;
1550 	bp->b_iocmd = BIO_READ;
1551 	bp->b_flags &= ~(B_CACHE | B_DONE);
1552 	bp->b_iodone = 0;
1553 	bufdone(bp);
1554 	BO_LOCK(origbp->b_bufobj);
1555 	/*
1556 	 * Clear the BV_BKGRDINPROG flag in the original buffer
1557 	 * and awaken it if it is waiting for the write to complete.
1558 	 * If BV_BKGRDINPROG is not set in the original buffer it must
1559 	 * have been released and re-instantiated - which is not legal.
1560 	 */
1561 	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1562 	    ("backgroundwritedone: lost buffer2"));
1563 	origbp->b_vflags &= ~BV_BKGRDINPROG;
1564 	if (origbp->b_vflags & BV_BKGRDWAIT) {
1565 		origbp->b_vflags &= ~BV_BKGRDWAIT;
1566 		wakeup(&origbp->b_xflags);
1567 	}
1568 	BO_UNLOCK(origbp->b_bufobj);
1569 }
1570 
1571 
1572 /*
1573  * Write, release buffer on completion.  (Done by iodone
1574  * if async).  Do not bother writing anything if the buffer
1575  * is invalid.
1576  *
1577  * Note that we set B_CACHE here, indicating that buffer is
1578  * fully valid and thus cacheable.  This is true even of NFS
1579  * now so we set it generally.  This could be set either here
1580  * or in biodone() since the I/O is synchronous.  We put it
1581  * here.
1582  */
1583 static int
1584 ffs_bufwrite(struct buf *bp)
1585 {
1586 	int oldflags, s;
1587 	struct buf *newbp;
1588 
1589 	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1590 	if (bp->b_flags & B_INVAL) {
1591 		brelse(bp);
1592 		return (0);
1593 	}
1594 
1595 	oldflags = bp->b_flags;
1596 
1597 	if (BUF_REFCNT(bp) == 0)
1598 		panic("bufwrite: buffer is not busy???");
1599 	s = splbio();
1600 	/*
1601 	 * If a background write is already in progress, delay
1602 	 * writing this block if it is asynchronous. Otherwise
1603 	 * wait for the background write to complete.
1604 	 */
1605 	BO_LOCK(bp->b_bufobj);
1606 	if (bp->b_vflags & BV_BKGRDINPROG) {
1607 		if (bp->b_flags & B_ASYNC) {
1608 			BO_UNLOCK(bp->b_bufobj);
1609 			splx(s);
1610 			bdwrite(bp);
1611 			return (0);
1612 		}
1613 		bp->b_vflags |= BV_BKGRDWAIT;
1614 		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
1615 		if (bp->b_vflags & BV_BKGRDINPROG)
1616 			panic("bufwrite: still writing");
1617 	}
1618 	BO_UNLOCK(bp->b_bufobj);
1619 
1620 	/* Mark the buffer clean */
1621 	bundirty(bp);
1622 
1623 	/*
1624 	 * If this buffer is marked for background writing and we
1625 	 * do not have to wait for it, make a copy and write the
1626 	 * copy so as to leave this buffer ready for further use.
1627 	 *
1628 	 * This optimization eats a lot of memory.  If we have a page
1629 	 * or buffer shortfall we can't do it.
1630 	 */
1631 	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
1632 	    (bp->b_flags & B_ASYNC) &&
1633 	    !vm_page_count_severe() &&
1634 	    !buf_dirty_count_severe()) {
1635 		KASSERT(bp->b_iodone == NULL,
1636 		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
1637 
1638 		/* get a new block */
1639 		newbp = geteblk(bp->b_bufsize);
1640 
1641 		/*
1642 		 * set it to be identical to the old block.  We have to
1643 		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
1644 		 * to avoid confusing the splay tree and gbincore().
1645 		 */
1646 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
1647 		newbp->b_lblkno = bp->b_lblkno;
1648 		newbp->b_xflags |= BX_BKGRDMARKER;
1649 		BO_LOCK(bp->b_bufobj);
1650 		bp->b_vflags |= BV_BKGRDINPROG;
1651 		bgetvp(bp->b_vp, newbp);
1652 		BO_UNLOCK(bp->b_bufobj);
1653 		newbp->b_bufobj = &bp->b_vp->v_bufobj;
1654 		newbp->b_blkno = bp->b_blkno;
1655 		newbp->b_offset = bp->b_offset;
1656 		newbp->b_iodone = ffs_backgroundwritedone;
1657 		newbp->b_flags |= B_ASYNC;
1658 		newbp->b_flags &= ~B_INVAL;
1659 
1660 #ifdef SOFTUPDATES
1661 		/* move over the dependencies */
1662 		if (LIST_FIRST(&bp->b_dep) != NULL)
1663 			softdep_move_dependencies(bp, newbp);
1664 #endif
1665 
1666 		/*
1667 		 * Initiate write on the copy, release the original to
1668 		 * the B_LOCKED queue so that it cannot go away until
1669 		 * the background write completes. If not locked it could go
1670 		 * away and then be reconstituted while it was being written.
1671 		 * If the reconstituted buffer were written, we could end up
1672 		 * with two background copies being written at the same time.
1673 		 */
1674 		bqrelse(bp);
1675 		bp = newbp;
1676 	}
1677 
1678 	/* Let the normal bufwrite do the rest for us */
1679 	bufwrite(bp);
1680 
1681 	return (0);
1682 }
1683 
1684 
1685 static void
1686 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
1687 {
1688 
1689 #ifdef SOFTUPDATES
1690 	if (bp->b_iocmd == BIO_WRITE && softdep_disk_prewrite(bp))
1691 		return;
1692 #endif
1693 	g_vfs_strategy(bo, bp);
1694 }
1695