xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 9a14aa017b21c292740c00ee098195cd46642730)
1 /*-
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_quota.h"
36 #include "opt_ufs.h"
37 #include "opt_ffs.h"
38 #include "opt_ddb.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/mount.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 
55 #include <security/mac/mac_framework.h>
56 
57 #include <ufs/ufs/extattr.h>
58 #include <ufs/ufs/gjournal.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66 
67 #include <vm/vm.h>
68 #include <vm/uma.h>
69 #include <vm/vm_page.h>
70 
71 #include <geom/geom.h>
72 #include <geom/geom_vfs.h>
73 
74 #include <ddb/ddb.h>
75 
76 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
77 
78 static int	ffs_reload(struct mount *, struct thread *);
79 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
80 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
81 		    ufs2_daddr_t);
82 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
83 static vfs_init_t ffs_init;
84 static vfs_uninit_t ffs_uninit;
85 static vfs_extattrctl_t ffs_extattrctl;
86 static vfs_cmount_t ffs_cmount;
87 static vfs_unmount_t ffs_unmount;
88 static vfs_mount_t ffs_mount;
89 static vfs_statfs_t ffs_statfs;
90 static vfs_fhtovp_t ffs_fhtovp;
91 static vfs_sync_t ffs_sync;
92 
93 static struct vfsops ufs_vfsops = {
94 	.vfs_extattrctl =	ffs_extattrctl,
95 	.vfs_fhtovp =		ffs_fhtovp,
96 	.vfs_init =		ffs_init,
97 	.vfs_mount =		ffs_mount,
98 	.vfs_cmount =		ffs_cmount,
99 	.vfs_quotactl =		ufs_quotactl,
100 	.vfs_root =		ufs_root,
101 	.vfs_statfs =		ffs_statfs,
102 	.vfs_sync =		ffs_sync,
103 	.vfs_uninit =		ffs_uninit,
104 	.vfs_unmount =		ffs_unmount,
105 	.vfs_vget =		ffs_vget,
106 	.vfs_susp_clean =	process_deferred_inactive,
107 };
108 
109 VFS_SET(ufs_vfsops, ufs, 0);
110 MODULE_VERSION(ufs, 1);
111 
112 static b_strategy_t ffs_geom_strategy;
113 static b_write_t ffs_bufwrite;
114 
115 static struct buf_ops ffs_ops = {
116 	.bop_name =	"FFS",
117 	.bop_write =	ffs_bufwrite,
118 	.bop_strategy =	ffs_geom_strategy,
119 	.bop_sync =	bufsync,
120 #ifdef NO_FFS_SNAPSHOT
121 	.bop_bdflush =	bufbdflush,
122 #else
123 	.bop_bdflush =	ffs_bdflush,
124 #endif
125 };
126 
127 /*
128  * Note that userquota and groupquota options are not currently used
129  * by UFS/FFS code and generally mount(8) does not pass those options
130  * from userland, but they can be passed by loader(8) via
131  * vfs.root.mountfrom.options.
132  */
133 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
134     "noclusterw", "noexec", "export", "force", "from", "groupquota",
135     "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
136     "nosymfollow", "sync", "union", "userquota", NULL };
137 
138 static int
139 ffs_mount(struct mount *mp)
140 {
141 	struct vnode *devvp;
142 	struct thread *td;
143 	struct ufsmount *ump = 0;
144 	struct fs *fs;
145 	pid_t fsckpid = 0;
146 	int error, flags;
147 	u_int mntorflags;
148 	accmode_t accmode;
149 	struct nameidata ndp;
150 	char *fspec;
151 
152 	td = curthread;
153 	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
154 		return (EINVAL);
155 	if (uma_inode == NULL) {
156 		uma_inode = uma_zcreate("FFS inode",
157 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
158 		    UMA_ALIGN_PTR, 0);
159 		uma_ufs1 = uma_zcreate("FFS1 dinode",
160 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
161 		    UMA_ALIGN_PTR, 0);
162 		uma_ufs2 = uma_zcreate("FFS2 dinode",
163 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
164 		    UMA_ALIGN_PTR, 0);
165 	}
166 
167 	vfs_deleteopt(mp->mnt_optnew, "groupquota");
168 	vfs_deleteopt(mp->mnt_optnew, "userquota");
169 
170 	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
171 	if (error)
172 		return (error);
173 
174 	mntorflags = 0;
175 	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
176 		mntorflags |= MNT_ACLS;
177 
178 	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
179 		mntorflags |= MNT_SNAPSHOT;
180 		/*
181 		 * Once we have set the MNT_SNAPSHOT flag, do not
182 		 * persist "snapshot" in the options list.
183 		 */
184 		vfs_deleteopt(mp->mnt_optnew, "snapshot");
185 		vfs_deleteopt(mp->mnt_opt, "snapshot");
186 	}
187 
188 	if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
189 	    vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
190 		/*
191 		 * Once we have set the restricted PID, do not
192 		 * persist "fsckpid" in the options list.
193 		 */
194 		vfs_deleteopt(mp->mnt_optnew, "fsckpid");
195 		vfs_deleteopt(mp->mnt_opt, "fsckpid");
196 		if (mp->mnt_flag & MNT_UPDATE) {
197 			if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
198 			     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
199 				vfs_mount_error(mp,
200 				    "Checker enable: Must be read-only");
201 				return (EINVAL);
202 			}
203 		} else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
204 			vfs_mount_error(mp,
205 			    "Checker enable: Must be read-only");
206 			return (EINVAL);
207 		}
208 		/* Set to -1 if we are done */
209 		if (fsckpid == 0)
210 			fsckpid = -1;
211 	}
212 
213 	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
214 		if (mntorflags & MNT_ACLS) {
215 			vfs_mount_error(mp,
216 			    "\"acls\" and \"nfsv4acls\" options "
217 			    "are mutually exclusive");
218 			return (EINVAL);
219 		}
220 		mntorflags |= MNT_NFS4ACLS;
221 	}
222 
223 	MNT_ILOCK(mp);
224 	mp->mnt_flag |= mntorflags;
225 	MNT_IUNLOCK(mp);
226 	/*
227 	 * If updating, check whether changing from read-only to
228 	 * read/write; if there is no device name, that's all we do.
229 	 */
230 	if (mp->mnt_flag & MNT_UPDATE) {
231 		ump = VFSTOUFS(mp);
232 		fs = ump->um_fs;
233 		devvp = ump->um_devvp;
234 		if (fsckpid == -1 && ump->um_fsckpid > 0) {
235 			if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
236 			    (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
237 				return (error);
238 			DROP_GIANT();
239 			g_topology_lock();
240 			/*
241 			 * Return to normal read-only mode.
242 			 */
243 			error = g_access(ump->um_cp, 0, -1, 0);
244 			g_topology_unlock();
245 			PICKUP_GIANT();
246 			ump->um_fsckpid = 0;
247 		}
248 		if (fs->fs_ronly == 0 &&
249 		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
250 			/*
251 			 * Flush any dirty data and suspend filesystem.
252 			 */
253 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
254 				return (error);
255 			for (;;) {
256 				vn_finished_write(mp);
257 				if ((error = vfs_write_suspend(mp)) != 0)
258 					return (error);
259 				MNT_ILOCK(mp);
260 				if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
261 					/*
262 					 * Allow the secondary writes
263 					 * to proceed.
264 					 */
265 					mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
266 					    MNTK_SUSPEND2);
267 					wakeup(&mp->mnt_flag);
268 					MNT_IUNLOCK(mp);
269 					/*
270 					 * Allow the curthread to
271 					 * ignore the suspension to
272 					 * synchronize on-disk state.
273 					 */
274 					td->td_pflags |= TDP_IGNSUSP;
275 					break;
276 				}
277 				MNT_IUNLOCK(mp);
278 				vn_start_write(NULL, &mp, V_WAIT);
279 			}
280 			/*
281 			 * Check for and optionally get rid of files open
282 			 * for writing.
283 			 */
284 			flags = WRITECLOSE;
285 			if (mp->mnt_flag & MNT_FORCE)
286 				flags |= FORCECLOSE;
287 			if (MOUNTEDSOFTDEP(mp)) {
288 				error = softdep_flushfiles(mp, flags, td);
289 			} else {
290 				error = ffs_flushfiles(mp, flags, td);
291 			}
292 			if (error) {
293 				vfs_write_resume(mp);
294 				return (error);
295 			}
296 			if (fs->fs_pendingblocks != 0 ||
297 			    fs->fs_pendinginodes != 0) {
298 				printf("WARNING: %s Update error: blocks %jd "
299 				    "files %d\n", fs->fs_fsmnt,
300 				    (intmax_t)fs->fs_pendingblocks,
301 				    fs->fs_pendinginodes);
302 				fs->fs_pendingblocks = 0;
303 				fs->fs_pendinginodes = 0;
304 			}
305 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
306 				fs->fs_clean = 1;
307 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
308 				fs->fs_ronly = 0;
309 				fs->fs_clean = 0;
310 				vfs_write_resume(mp);
311 				return (error);
312 			}
313 			if (MOUNTEDSOFTDEP(mp))
314 				softdep_unmount(mp);
315 			DROP_GIANT();
316 			g_topology_lock();
317 			/*
318 			 * Drop our write and exclusive access.
319 			 */
320 			g_access(ump->um_cp, 0, -1, -1);
321 			g_topology_unlock();
322 			PICKUP_GIANT();
323 			fs->fs_ronly = 1;
324 			MNT_ILOCK(mp);
325 			mp->mnt_flag |= MNT_RDONLY;
326 			MNT_IUNLOCK(mp);
327 			/*
328 			 * Allow the writers to note that filesystem
329 			 * is ro now.
330 			 */
331 			vfs_write_resume(mp);
332 		}
333 		if ((mp->mnt_flag & MNT_RELOAD) &&
334 		    (error = ffs_reload(mp, td)) != 0)
335 			return (error);
336 		if (fs->fs_ronly &&
337 		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
338 			/*
339 			 * If we are running a checker, do not allow upgrade.
340 			 */
341 			if (ump->um_fsckpid > 0) {
342 				vfs_mount_error(mp,
343 				    "Active checker, cannot upgrade to write");
344 				return (EINVAL);
345 			}
346 			/*
347 			 * If upgrade to read-write by non-root, then verify
348 			 * that user has necessary permissions on the device.
349 			 */
350 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
351 			error = VOP_ACCESS(devvp, VREAD | VWRITE,
352 			    td->td_ucred, td);
353 			if (error)
354 				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
355 			if (error) {
356 				VOP_UNLOCK(devvp, 0);
357 				return (error);
358 			}
359 			VOP_UNLOCK(devvp, 0);
360 			fs->fs_flags &= ~FS_UNCLEAN;
361 			if (fs->fs_clean == 0) {
362 				fs->fs_flags |= FS_UNCLEAN;
363 				if ((mp->mnt_flag & MNT_FORCE) ||
364 				    ((fs->fs_flags &
365 				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
366 				     (fs->fs_flags & FS_DOSOFTDEP))) {
367 					printf("WARNING: %s was not properly "
368 					   "dismounted\n", fs->fs_fsmnt);
369 				} else {
370 					vfs_mount_error(mp,
371 					   "R/W mount of %s denied. %s.%s",
372 					   fs->fs_fsmnt,
373 					   "Filesystem is not clean - run fsck",
374 					   (fs->fs_flags & FS_SUJ) == 0 ? "" :
375 					   " Forced mount will invalidate"
376 					   " journal contents");
377 					return (EPERM);
378 				}
379 			}
380 			DROP_GIANT();
381 			g_topology_lock();
382 			/*
383 			 * Request exclusive write access.
384 			 */
385 			error = g_access(ump->um_cp, 0, 1, 1);
386 			g_topology_unlock();
387 			PICKUP_GIANT();
388 			if (error)
389 				return (error);
390 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
391 				return (error);
392 			fs->fs_ronly = 0;
393 			MNT_ILOCK(mp);
394 			mp->mnt_flag &= ~MNT_RDONLY;
395 			MNT_IUNLOCK(mp);
396 			fs->fs_mtime = time_second;
397 			/* check to see if we need to start softdep */
398 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
399 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
400 				vn_finished_write(mp);
401 				return (error);
402 			}
403 			fs->fs_clean = 0;
404 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
405 				vn_finished_write(mp);
406 				return (error);
407 			}
408 			if (fs->fs_snapinum[0] != 0)
409 				ffs_snapshot_mount(mp);
410 			vn_finished_write(mp);
411 		}
412 		/*
413 		 * Soft updates is incompatible with "async",
414 		 * so if we are doing softupdates stop the user
415 		 * from setting the async flag in an update.
416 		 * Softdep_mount() clears it in an initial mount
417 		 * or ro->rw remount.
418 		 */
419 		if (MOUNTEDSOFTDEP(mp)) {
420 			/* XXX: Reset too late ? */
421 			MNT_ILOCK(mp);
422 			mp->mnt_flag &= ~MNT_ASYNC;
423 			MNT_IUNLOCK(mp);
424 		}
425 		/*
426 		 * Keep MNT_ACLS flag if it is stored in superblock.
427 		 */
428 		if ((fs->fs_flags & FS_ACLS) != 0) {
429 			/* XXX: Set too late ? */
430 			MNT_ILOCK(mp);
431 			mp->mnt_flag |= MNT_ACLS;
432 			MNT_IUNLOCK(mp);
433 		}
434 
435 		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
436 			/* XXX: Set too late ? */
437 			MNT_ILOCK(mp);
438 			mp->mnt_flag |= MNT_NFS4ACLS;
439 			MNT_IUNLOCK(mp);
440 		}
441 		/*
442 		 * If this is a request from fsck to clean up the filesystem,
443 		 * then allow the specified pid to proceed.
444 		 */
445 		if (fsckpid > 0) {
446 			if (ump->um_fsckpid != 0) {
447 				vfs_mount_error(mp,
448 				    "Active checker already running on %s",
449 				    fs->fs_fsmnt);
450 				return (EINVAL);
451 			}
452 			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
453 			    ("soft updates enabled on read-only file system"));
454 			DROP_GIANT();
455 			g_topology_lock();
456 			/*
457 			 * Request write access.
458 			 */
459 			error = g_access(ump->um_cp, 0, 1, 0);
460 			g_topology_unlock();
461 			PICKUP_GIANT();
462 			if (error) {
463 				vfs_mount_error(mp,
464 				    "Checker activation failed on %s",
465 				    fs->fs_fsmnt);
466 				return (error);
467 			}
468 			ump->um_fsckpid = fsckpid;
469 			if (fs->fs_snapinum[0] != 0)
470 				ffs_snapshot_mount(mp);
471 			fs->fs_mtime = time_second;
472 			fs->fs_fmod = 1;
473 			fs->fs_clean = 0;
474 			(void) ffs_sbupdate(ump, MNT_WAIT, 0);
475 		}
476 
477 		/*
478 		 * If this is a snapshot request, take the snapshot.
479 		 */
480 		if (mp->mnt_flag & MNT_SNAPSHOT)
481 			return (ffs_snapshot(mp, fspec));
482 	}
483 
484 	/*
485 	 * Not an update, or updating the name: look up the name
486 	 * and verify that it refers to a sensible disk device.
487 	 */
488 	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
489 	if ((error = namei(&ndp)) != 0)
490 		return (error);
491 	NDFREE(&ndp, NDF_ONLY_PNBUF);
492 	devvp = ndp.ni_vp;
493 	if (!vn_isdisk(devvp, &error)) {
494 		vput(devvp);
495 		return (error);
496 	}
497 
498 	/*
499 	 * If mount by non-root, then verify that user has necessary
500 	 * permissions on the device.
501 	 */
502 	accmode = VREAD;
503 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
504 		accmode |= VWRITE;
505 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
506 	if (error)
507 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
508 	if (error) {
509 		vput(devvp);
510 		return (error);
511 	}
512 
513 	if (mp->mnt_flag & MNT_UPDATE) {
514 		/*
515 		 * Update only
516 		 *
517 		 * If it's not the same vnode, or at least the same device
518 		 * then it's not correct.
519 		 */
520 
521 		if (devvp->v_rdev != ump->um_devvp->v_rdev)
522 			error = EINVAL;	/* needs translation */
523 		vput(devvp);
524 		if (error)
525 			return (error);
526 	} else {
527 		/*
528 		 * New mount
529 		 *
530 		 * We need the name for the mount point (also used for
531 		 * "last mounted on") copied in. If an error occurs,
532 		 * the mount point is discarded by the upper level code.
533 		 * Note that vfs_mount() populates f_mntonname for us.
534 		 */
535 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
536 			vrele(devvp);
537 			return (error);
538 		}
539 		if (fsckpid > 0) {
540 			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
541 			    ("soft updates enabled on read-only file system"));
542 			ump = VFSTOUFS(mp);
543 			fs = ump->um_fs;
544 			DROP_GIANT();
545 			g_topology_lock();
546 			/*
547 			 * Request write access.
548 			 */
549 			error = g_access(ump->um_cp, 0, 1, 0);
550 			g_topology_unlock();
551 			PICKUP_GIANT();
552 			if (error) {
553 				printf("WARNING: %s: Checker activation "
554 				    "failed\n", fs->fs_fsmnt);
555 			} else {
556 				ump->um_fsckpid = fsckpid;
557 				if (fs->fs_snapinum[0] != 0)
558 					ffs_snapshot_mount(mp);
559 				fs->fs_mtime = time_second;
560 				fs->fs_clean = 0;
561 				(void) ffs_sbupdate(ump, MNT_WAIT, 0);
562 			}
563 		}
564 	}
565 	vfs_mountedfrom(mp, fspec);
566 	return (0);
567 }
568 
569 /*
570  * Compatibility with old mount system call.
571  */
572 
573 static int
574 ffs_cmount(struct mntarg *ma, void *data, int flags)
575 {
576 	struct ufs_args args;
577 	struct export_args exp;
578 	int error;
579 
580 	if (data == NULL)
581 		return (EINVAL);
582 	error = copyin(data, &args, sizeof args);
583 	if (error)
584 		return (error);
585 	vfs_oexport_conv(&args.export, &exp);
586 
587 	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
588 	ma = mount_arg(ma, "export", &exp, sizeof(exp));
589 	error = kernel_mount(ma, flags);
590 
591 	return (error);
592 }
593 
594 /*
595  * Reload all incore data for a filesystem (used after running fsck on
596  * the root filesystem and finding things to fix). The filesystem must
597  * be mounted read-only.
598  *
599  * Things to do to update the mount:
600  *	1) invalidate all cached meta-data.
601  *	2) re-read superblock from disk.
602  *	3) re-read summary information from disk.
603  *	4) invalidate all inactive vnodes.
604  *	5) invalidate all cached file data.
605  *	6) re-read inode data for all active vnodes.
606  */
607 static int
608 ffs_reload(struct mount *mp, struct thread *td)
609 {
610 	struct vnode *vp, *mvp, *devvp;
611 	struct inode *ip;
612 	void *space;
613 	struct buf *bp;
614 	struct fs *fs, *newfs;
615 	struct ufsmount *ump;
616 	ufs2_daddr_t sblockloc;
617 	int i, blks, size, error;
618 	int32_t *lp;
619 
620 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
621 		return (EINVAL);
622 	ump = VFSTOUFS(mp);
623 	/*
624 	 * Step 1: invalidate all cached meta-data.
625 	 */
626 	devvp = VFSTOUFS(mp)->um_devvp;
627 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
628 	if (vinvalbuf(devvp, 0, 0, 0) != 0)
629 		panic("ffs_reload: dirty1");
630 	VOP_UNLOCK(devvp, 0);
631 
632 	/*
633 	 * Step 2: re-read superblock from disk.
634 	 */
635 	fs = VFSTOUFS(mp)->um_fs;
636 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
637 	    NOCRED, &bp)) != 0)
638 		return (error);
639 	newfs = (struct fs *)bp->b_data;
640 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
641 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
642 	    newfs->fs_bsize > MAXBSIZE ||
643 	    newfs->fs_bsize < sizeof(struct fs)) {
644 			brelse(bp);
645 			return (EIO);		/* XXX needs translation */
646 	}
647 	/*
648 	 * Copy pointer fields back into superblock before copying in	XXX
649 	 * new superblock. These should really be in the ufsmount.	XXX
650 	 * Note that important parameters (eg fs_ncg) are unchanged.
651 	 */
652 	newfs->fs_csp = fs->fs_csp;
653 	newfs->fs_maxcluster = fs->fs_maxcluster;
654 	newfs->fs_contigdirs = fs->fs_contigdirs;
655 	newfs->fs_active = fs->fs_active;
656 	/* The file system is still read-only. */
657 	newfs->fs_ronly = 1;
658 	sblockloc = fs->fs_sblockloc;
659 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
660 	brelse(bp);
661 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
662 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
663 	UFS_LOCK(ump);
664 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
665 		printf("WARNING: %s: reload pending error: blocks %jd "
666 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
667 		    fs->fs_pendinginodes);
668 		fs->fs_pendingblocks = 0;
669 		fs->fs_pendinginodes = 0;
670 	}
671 	UFS_UNLOCK(ump);
672 
673 	/*
674 	 * Step 3: re-read summary information from disk.
675 	 */
676 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
677 	space = fs->fs_csp;
678 	for (i = 0; i < blks; i += fs->fs_frag) {
679 		size = fs->fs_bsize;
680 		if (i + fs->fs_frag > blks)
681 			size = (blks - i) * fs->fs_fsize;
682 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
683 		    NOCRED, &bp);
684 		if (error)
685 			return (error);
686 		bcopy(bp->b_data, space, (u_int)size);
687 		space = (char *)space + size;
688 		brelse(bp);
689 	}
690 	/*
691 	 * We no longer know anything about clusters per cylinder group.
692 	 */
693 	if (fs->fs_contigsumsize > 0) {
694 		lp = fs->fs_maxcluster;
695 		for (i = 0; i < fs->fs_ncg; i++)
696 			*lp++ = fs->fs_contigsumsize;
697 	}
698 
699 loop:
700 	MNT_ILOCK(mp);
701 	MNT_VNODE_FOREACH(vp, mp, mvp) {
702 		VI_LOCK(vp);
703 		if (vp->v_iflag & VI_DOOMED) {
704 			VI_UNLOCK(vp);
705 			continue;
706 		}
707 		MNT_IUNLOCK(mp);
708 		/*
709 		 * Step 4: invalidate all cached file data.
710 		 */
711 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
712 			MNT_VNODE_FOREACH_ABORT(mp, mvp);
713 			goto loop;
714 		}
715 		if (vinvalbuf(vp, 0, 0, 0))
716 			panic("ffs_reload: dirty2");
717 		/*
718 		 * Step 5: re-read inode data for all active vnodes.
719 		 */
720 		ip = VTOI(vp);
721 		error =
722 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
723 		    (int)fs->fs_bsize, NOCRED, &bp);
724 		if (error) {
725 			VOP_UNLOCK(vp, 0);
726 			vrele(vp);
727 			MNT_VNODE_FOREACH_ABORT(mp, mvp);
728 			return (error);
729 		}
730 		ffs_load_inode(bp, ip, fs, ip->i_number);
731 		ip->i_effnlink = ip->i_nlink;
732 		brelse(bp);
733 		VOP_UNLOCK(vp, 0);
734 		vrele(vp);
735 		MNT_ILOCK(mp);
736 	}
737 	MNT_IUNLOCK(mp);
738 	return (0);
739 }
740 
741 /*
742  * Possible superblock locations ordered from most to least likely.
743  */
744 static int sblock_try[] = SBLOCKSEARCH;
745 
746 /*
747  * Common code for mount and mountroot
748  */
749 static int
750 ffs_mountfs(devvp, mp, td)
751 	struct vnode *devvp;
752 	struct mount *mp;
753 	struct thread *td;
754 {
755 	struct ufsmount *ump;
756 	struct buf *bp;
757 	struct fs *fs;
758 	struct cdev *dev;
759 	void *space;
760 	ufs2_daddr_t sblockloc;
761 	int error, i, blks, size, ronly;
762 	int32_t *lp;
763 	struct ucred *cred;
764 	struct g_consumer *cp;
765 	struct mount *nmp;
766 
767 	bp = NULL;
768 	ump = NULL;
769 	cred = td ? td->td_ucred : NOCRED;
770 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
771 
772 	dev = devvp->v_rdev;
773 	dev_ref(dev);
774 	DROP_GIANT();
775 	g_topology_lock();
776 	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
777 	g_topology_unlock();
778 	PICKUP_GIANT();
779 	VOP_UNLOCK(devvp, 0);
780 	if (error)
781 		goto out;
782 	if (devvp->v_rdev->si_iosize_max != 0)
783 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
784 	if (mp->mnt_iosize_max > MAXPHYS)
785 		mp->mnt_iosize_max = MAXPHYS;
786 
787 	devvp->v_bufobj.bo_ops = &ffs_ops;
788 
789 	fs = NULL;
790 	sblockloc = 0;
791 	/*
792 	 * Try reading the superblock in each of its possible locations.
793 	 */
794 	for (i = 0; sblock_try[i] != -1; i++) {
795 		if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
796 			error = EINVAL;
797 			vfs_mount_error(mp,
798 			    "Invalid sectorsize %d for superblock size %d",
799 			    cp->provider->sectorsize, SBLOCKSIZE);
800 			goto out;
801 		}
802 		if ((error = bread(devvp, btodb(sblock_try[i]), SBLOCKSIZE,
803 		    cred, &bp)) != 0)
804 			goto out;
805 		fs = (struct fs *)bp->b_data;
806 		sblockloc = sblock_try[i];
807 		if ((fs->fs_magic == FS_UFS1_MAGIC ||
808 		     (fs->fs_magic == FS_UFS2_MAGIC &&
809 		      (fs->fs_sblockloc == sblockloc ||
810 		       (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0))) &&
811 		    fs->fs_bsize <= MAXBSIZE &&
812 		    fs->fs_bsize >= sizeof(struct fs))
813 			break;
814 		brelse(bp);
815 		bp = NULL;
816 	}
817 	if (sblock_try[i] == -1) {
818 		error = EINVAL;		/* XXX needs translation */
819 		goto out;
820 	}
821 	fs->fs_fmod = 0;
822 	fs->fs_flags &= ~FS_INDEXDIRS;	/* no support for directory indicies */
823 	fs->fs_flags &= ~FS_UNCLEAN;
824 	if (fs->fs_clean == 0) {
825 		fs->fs_flags |= FS_UNCLEAN;
826 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
827 		    ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
828 		     (fs->fs_flags & FS_DOSOFTDEP))) {
829 			printf("WARNING: %s was not properly dismounted\n",
830 			    fs->fs_fsmnt);
831 		} else {
832 			vfs_mount_error(mp, "R/W mount of %s denied. %s%s",
833 			    fs->fs_fsmnt, "Filesystem is not clean - run fsck.",
834 			    (fs->fs_flags & FS_SUJ) == 0 ? "" :
835 			    " Forced mount will invalidate journal contents");
836 			error = EPERM;
837 			goto out;
838 		}
839 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
840 		    (mp->mnt_flag & MNT_FORCE)) {
841 			printf("WARNING: %s: lost blocks %jd files %d\n",
842 			    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
843 			    fs->fs_pendinginodes);
844 			fs->fs_pendingblocks = 0;
845 			fs->fs_pendinginodes = 0;
846 		}
847 	}
848 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
849 		printf("WARNING: %s: mount pending error: blocks %jd "
850 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
851 		    fs->fs_pendinginodes);
852 		fs->fs_pendingblocks = 0;
853 		fs->fs_pendinginodes = 0;
854 	}
855 	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
856 #ifdef UFS_GJOURNAL
857 		/*
858 		 * Get journal provider name.
859 		 */
860 		size = 1024;
861 		mp->mnt_gjprovider = malloc(size, M_UFSMNT, M_WAITOK);
862 		if (g_io_getattr("GJOURNAL::provider", cp, &size,
863 		    mp->mnt_gjprovider) == 0) {
864 			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, size,
865 			    M_UFSMNT, M_WAITOK);
866 			MNT_ILOCK(mp);
867 			mp->mnt_flag |= MNT_GJOURNAL;
868 			MNT_IUNLOCK(mp);
869 		} else {
870 			printf("WARNING: %s: GJOURNAL flag on fs "
871 			    "but no gjournal provider below\n",
872 			    mp->mnt_stat.f_mntonname);
873 			free(mp->mnt_gjprovider, M_UFSMNT);
874 			mp->mnt_gjprovider = NULL;
875 		}
876 #else
877 		printf("WARNING: %s: GJOURNAL flag on fs but no "
878 		    "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
879 #endif
880 	} else {
881 		mp->mnt_gjprovider = NULL;
882 	}
883 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
884 	ump->um_cp = cp;
885 	ump->um_bo = &devvp->v_bufobj;
886 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK);
887 	if (fs->fs_magic == FS_UFS1_MAGIC) {
888 		ump->um_fstype = UFS1;
889 		ump->um_balloc = ffs_balloc_ufs1;
890 	} else {
891 		ump->um_fstype = UFS2;
892 		ump->um_balloc = ffs_balloc_ufs2;
893 	}
894 	ump->um_blkatoff = ffs_blkatoff;
895 	ump->um_truncate = ffs_truncate;
896 	ump->um_update = ffs_update;
897 	ump->um_valloc = ffs_valloc;
898 	ump->um_vfree = ffs_vfree;
899 	ump->um_ifree = ffs_ifree;
900 	ump->um_rdonly = ffs_rdonly;
901 	ump->um_snapgone = ffs_snapgone;
902 	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
903 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
904 	if (fs->fs_sbsize < SBLOCKSIZE)
905 		bp->b_flags |= B_INVAL | B_NOCACHE;
906 	brelse(bp);
907 	bp = NULL;
908 	fs = ump->um_fs;
909 	ffs_oldfscompat_read(fs, ump, sblockloc);
910 	fs->fs_ronly = ronly;
911 	size = fs->fs_cssize;
912 	blks = howmany(size, fs->fs_fsize);
913 	if (fs->fs_contigsumsize > 0)
914 		size += fs->fs_ncg * sizeof(int32_t);
915 	size += fs->fs_ncg * sizeof(u_int8_t);
916 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
917 	fs->fs_csp = space;
918 	for (i = 0; i < blks; i += fs->fs_frag) {
919 		size = fs->fs_bsize;
920 		if (i + fs->fs_frag > blks)
921 			size = (blks - i) * fs->fs_fsize;
922 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
923 		    cred, &bp)) != 0) {
924 			free(fs->fs_csp, M_UFSMNT);
925 			goto out;
926 		}
927 		bcopy(bp->b_data, space, (u_int)size);
928 		space = (char *)space + size;
929 		brelse(bp);
930 		bp = NULL;
931 	}
932 	if (fs->fs_contigsumsize > 0) {
933 		fs->fs_maxcluster = lp = space;
934 		for (i = 0; i < fs->fs_ncg; i++)
935 			*lp++ = fs->fs_contigsumsize;
936 		space = lp;
937 	}
938 	size = fs->fs_ncg * sizeof(u_int8_t);
939 	fs->fs_contigdirs = (u_int8_t *)space;
940 	bzero(fs->fs_contigdirs, size);
941 	fs->fs_active = NULL;
942 	mp->mnt_data = ump;
943 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
944 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
945 	nmp = NULL;
946 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
947 	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
948 		if (nmp)
949 			vfs_rel(nmp);
950 		vfs_getnewfsid(mp);
951 	}
952 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
953 	MNT_ILOCK(mp);
954 	mp->mnt_flag |= MNT_LOCAL;
955 	MNT_IUNLOCK(mp);
956 	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
957 #ifdef MAC
958 		MNT_ILOCK(mp);
959 		mp->mnt_flag |= MNT_MULTILABEL;
960 		MNT_IUNLOCK(mp);
961 #else
962 		printf("WARNING: %s: multilabel flag on fs but "
963 		    "no MAC support\n", mp->mnt_stat.f_mntonname);
964 #endif
965 	}
966 	if ((fs->fs_flags & FS_ACLS) != 0) {
967 #ifdef UFS_ACL
968 		MNT_ILOCK(mp);
969 
970 		if (mp->mnt_flag & MNT_NFS4ACLS)
971 			printf("WARNING: %s: ACLs flag on fs conflicts with "
972 			    "\"nfsv4acls\" mount option; option ignored\n",
973 			    mp->mnt_stat.f_mntonname);
974 		mp->mnt_flag &= ~MNT_NFS4ACLS;
975 		mp->mnt_flag |= MNT_ACLS;
976 
977 		MNT_IUNLOCK(mp);
978 #else
979 		printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
980 		    mp->mnt_stat.f_mntonname);
981 #endif
982 	}
983 	if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
984 #ifdef UFS_ACL
985 		MNT_ILOCK(mp);
986 
987 		if (mp->mnt_flag & MNT_ACLS)
988 			printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
989 			    "with \"acls\" mount option; option ignored\n",
990 			    mp->mnt_stat.f_mntonname);
991 		mp->mnt_flag &= ~MNT_ACLS;
992 		mp->mnt_flag |= MNT_NFS4ACLS;
993 
994 		MNT_IUNLOCK(mp);
995 #else
996 		printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
997 		    "ACLs support\n", mp->mnt_stat.f_mntonname);
998 #endif
999 	}
1000 	if ((fs->fs_flags & FS_TRIM) != 0) {
1001 		size = sizeof(int);
1002 		if (g_io_getattr("GEOM::candelete", cp, &size,
1003 		    &ump->um_candelete) == 0) {
1004 			if (!ump->um_candelete)
1005 				printf("WARNING: %s: TRIM flag on fs but disk "
1006 				    "does not support TRIM\n",
1007 				    mp->mnt_stat.f_mntonname);
1008 		} else {
1009 			printf("WARNING: %s: TRIM flag on fs but disk does "
1010 			    "not confirm that it supports TRIM\n",
1011 			    mp->mnt_stat.f_mntonname);
1012 			ump->um_candelete = 0;
1013 		}
1014 	}
1015 
1016 	ump->um_mountp = mp;
1017 	ump->um_dev = dev;
1018 	ump->um_devvp = devvp;
1019 	ump->um_nindir = fs->fs_nindir;
1020 	ump->um_bptrtodb = fs->fs_fsbtodb;
1021 	ump->um_seqinc = fs->fs_frag;
1022 	for (i = 0; i < MAXQUOTAS; i++)
1023 		ump->um_quotas[i] = NULLVP;
1024 #ifdef UFS_EXTATTR
1025 	ufs_extattr_uepm_init(&ump->um_extattr);
1026 #endif
1027 	/*
1028 	 * Set FS local "last mounted on" information (NULL pad)
1029 	 */
1030 	bzero(fs->fs_fsmnt, MAXMNTLEN);
1031 	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
1032 	mp->mnt_stat.f_iosize = fs->fs_bsize;
1033 
1034 	if (mp->mnt_flag & MNT_ROOTFS) {
1035 		/*
1036 		 * Root mount; update timestamp in mount structure.
1037 		 * this will be used by the common root mount code
1038 		 * to update the system clock.
1039 		 */
1040 		mp->mnt_time = fs->fs_time;
1041 	}
1042 
1043 	if (ronly == 0) {
1044 		fs->fs_mtime = time_second;
1045 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
1046 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
1047 			free(fs->fs_csp, M_UFSMNT);
1048 			ffs_flushfiles(mp, FORCECLOSE, td);
1049 			goto out;
1050 		}
1051 		if (fs->fs_snapinum[0] != 0)
1052 			ffs_snapshot_mount(mp);
1053 		fs->fs_fmod = 1;
1054 		fs->fs_clean = 0;
1055 		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
1056 	}
1057 	/*
1058 	 * Initialize filesystem stat information in mount struct.
1059 	 */
1060 	MNT_ILOCK(mp);
1061 	mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED |
1062 	    MNTK_EXTENDED_SHARED;
1063 	MNT_IUNLOCK(mp);
1064 #ifdef UFS_EXTATTR
1065 #ifdef UFS_EXTATTR_AUTOSTART
1066 	/*
1067 	 *
1068 	 * Auto-starting does the following:
1069 	 *	- check for /.attribute in the fs, and extattr_start if so
1070 	 *	- for each file in .attribute, enable that file with
1071 	 * 	  an attribute of the same name.
1072 	 * Not clear how to report errors -- probably eat them.
1073 	 * This would all happen while the filesystem was busy/not
1074 	 * available, so would effectively be "atomic".
1075 	 */
1076 	(void) ufs_extattr_autostart(mp, td);
1077 #endif /* !UFS_EXTATTR_AUTOSTART */
1078 #endif /* !UFS_EXTATTR */
1079 	return (0);
1080 out:
1081 	if (bp)
1082 		brelse(bp);
1083 	if (cp != NULL) {
1084 		DROP_GIANT();
1085 		g_topology_lock();
1086 		g_vfs_close(cp);
1087 		g_topology_unlock();
1088 		PICKUP_GIANT();
1089 	}
1090 	if (ump) {
1091 		mtx_destroy(UFS_MTX(ump));
1092 		if (mp->mnt_gjprovider != NULL) {
1093 			free(mp->mnt_gjprovider, M_UFSMNT);
1094 			mp->mnt_gjprovider = NULL;
1095 		}
1096 		free(ump->um_fs, M_UFSMNT);
1097 		free(ump, M_UFSMNT);
1098 		mp->mnt_data = NULL;
1099 	}
1100 	dev_rel(dev);
1101 	return (error);
1102 }
1103 
1104 #include <sys/sysctl.h>
1105 static int bigcgs = 0;
1106 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
1107 
1108 /*
1109  * Sanity checks for loading old filesystem superblocks.
1110  * See ffs_oldfscompat_write below for unwound actions.
1111  *
1112  * XXX - Parts get retired eventually.
1113  * Unfortunately new bits get added.
1114  */
1115 static void
1116 ffs_oldfscompat_read(fs, ump, sblockloc)
1117 	struct fs *fs;
1118 	struct ufsmount *ump;
1119 	ufs2_daddr_t sblockloc;
1120 {
1121 	off_t maxfilesize;
1122 
1123 	/*
1124 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
1125 	 */
1126 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
1127 		fs->fs_flags = fs->fs_old_flags;
1128 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
1129 		fs->fs_sblockloc = sblockloc;
1130 	}
1131 	/*
1132 	 * If not yet done, update UFS1 superblock with new wider fields.
1133 	 */
1134 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
1135 		fs->fs_maxbsize = fs->fs_bsize;
1136 		fs->fs_time = fs->fs_old_time;
1137 		fs->fs_size = fs->fs_old_size;
1138 		fs->fs_dsize = fs->fs_old_dsize;
1139 		fs->fs_csaddr = fs->fs_old_csaddr;
1140 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1141 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1142 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1143 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1144 	}
1145 	if (fs->fs_magic == FS_UFS1_MAGIC &&
1146 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
1147 		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
1148 		fs->fs_qbmask = ~fs->fs_bmask;
1149 		fs->fs_qfmask = ~fs->fs_fmask;
1150 	}
1151 	if (fs->fs_magic == FS_UFS1_MAGIC) {
1152 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
1153 		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
1154 		if (fs->fs_maxfilesize > maxfilesize)
1155 			fs->fs_maxfilesize = maxfilesize;
1156 	}
1157 	/* Compatibility for old filesystems */
1158 	if (fs->fs_avgfilesize <= 0)
1159 		fs->fs_avgfilesize = AVFILESIZ;
1160 	if (fs->fs_avgfpdir <= 0)
1161 		fs->fs_avgfpdir = AFPDIR;
1162 	if (bigcgs) {
1163 		fs->fs_save_cgsize = fs->fs_cgsize;
1164 		fs->fs_cgsize = fs->fs_bsize;
1165 	}
1166 }
1167 
1168 /*
1169  * Unwinding superblock updates for old filesystems.
1170  * See ffs_oldfscompat_read above for details.
1171  *
1172  * XXX - Parts get retired eventually.
1173  * Unfortunately new bits get added.
1174  */
1175 void
1176 ffs_oldfscompat_write(fs, ump)
1177 	struct fs *fs;
1178 	struct ufsmount *ump;
1179 {
1180 
1181 	/*
1182 	 * Copy back UFS2 updated fields that UFS1 inspects.
1183 	 */
1184 	if (fs->fs_magic == FS_UFS1_MAGIC) {
1185 		fs->fs_old_time = fs->fs_time;
1186 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1187 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1188 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1189 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1190 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
1191 	}
1192 	if (bigcgs) {
1193 		fs->fs_cgsize = fs->fs_save_cgsize;
1194 		fs->fs_save_cgsize = 0;
1195 	}
1196 }
1197 
1198 /*
1199  * unmount system call
1200  */
1201 static int
1202 ffs_unmount(mp, mntflags)
1203 	struct mount *mp;
1204 	int mntflags;
1205 {
1206 	struct thread *td;
1207 	struct ufsmount *ump = VFSTOUFS(mp);
1208 	struct fs *fs;
1209 	int error, flags, susp;
1210 #ifdef UFS_EXTATTR
1211 	int e_restart;
1212 #endif
1213 
1214 	flags = 0;
1215 	td = curthread;
1216 	fs = ump->um_fs;
1217 	susp = 0;
1218 	if (mntflags & MNT_FORCE) {
1219 		flags |= FORCECLOSE;
1220 		susp = fs->fs_ronly != 0;
1221 	}
1222 #ifdef UFS_EXTATTR
1223 	if ((error = ufs_extattr_stop(mp, td))) {
1224 		if (error != EOPNOTSUPP)
1225 			printf("WARNING: unmount %s: ufs_extattr_stop "
1226 			    "returned errno %d\n", mp->mnt_stat.f_mntonname,
1227 			    error);
1228 		e_restart = 0;
1229 	} else {
1230 		ufs_extattr_uepm_destroy(&ump->um_extattr);
1231 		e_restart = 1;
1232 	}
1233 #endif
1234 	if (susp) {
1235 		/*
1236 		 * dounmount already called vn_start_write().
1237 		 */
1238 		for (;;) {
1239 			vn_finished_write(mp);
1240 			if ((error = vfs_write_suspend(mp)) != 0)
1241 				return (error);
1242 			MNT_ILOCK(mp);
1243 			if (mp->mnt_kern_flag & MNTK_SUSPENDED) {
1244 				mp->mnt_kern_flag &= ~(MNTK_SUSPENDED |
1245 				    MNTK_SUSPEND2);
1246 				wakeup(&mp->mnt_flag);
1247 				MNT_IUNLOCK(mp);
1248 				td->td_pflags |= TDP_IGNSUSP;
1249 				break;
1250 			}
1251 			MNT_IUNLOCK(mp);
1252 			vn_start_write(NULL, &mp, V_WAIT);
1253 		}
1254 	}
1255 	if (MOUNTEDSOFTDEP(mp))
1256 		error = softdep_flushfiles(mp, flags, td);
1257 	else
1258 		error = ffs_flushfiles(mp, flags, td);
1259 	if (error != 0 && error != ENXIO)
1260 		goto fail;
1261 
1262 	UFS_LOCK(ump);
1263 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1264 		printf("WARNING: unmount %s: pending error: blocks %jd "
1265 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
1266 		    fs->fs_pendinginodes);
1267 		fs->fs_pendingblocks = 0;
1268 		fs->fs_pendinginodes = 0;
1269 	}
1270 	UFS_UNLOCK(ump);
1271 	softdep_unmount(mp);
1272 	if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
1273 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
1274 		error = ffs_sbupdate(ump, MNT_WAIT, 0);
1275 		if (error && error != ENXIO) {
1276 			fs->fs_clean = 0;
1277 			goto fail;
1278 		}
1279 	}
1280 	if (susp) {
1281 		vfs_write_resume(mp);
1282 		vn_start_write(NULL, &mp, V_WAIT);
1283 	}
1284 	DROP_GIANT();
1285 	g_topology_lock();
1286 	if (ump->um_fsckpid > 0) {
1287 		/*
1288 		 * Return to normal read-only mode.
1289 		 */
1290 		error = g_access(ump->um_cp, 0, -1, 0);
1291 		ump->um_fsckpid = 0;
1292 	}
1293 	g_vfs_close(ump->um_cp);
1294 	g_topology_unlock();
1295 	PICKUP_GIANT();
1296 	vrele(ump->um_devvp);
1297 	dev_rel(ump->um_dev);
1298 	mtx_destroy(UFS_MTX(ump));
1299 	if (mp->mnt_gjprovider != NULL) {
1300 		free(mp->mnt_gjprovider, M_UFSMNT);
1301 		mp->mnt_gjprovider = NULL;
1302 	}
1303 	free(fs->fs_csp, M_UFSMNT);
1304 	free(fs, M_UFSMNT);
1305 	free(ump, M_UFSMNT);
1306 	mp->mnt_data = NULL;
1307 	MNT_ILOCK(mp);
1308 	mp->mnt_flag &= ~MNT_LOCAL;
1309 	MNT_IUNLOCK(mp);
1310 	return (error);
1311 
1312 fail:
1313 	if (susp) {
1314 		vfs_write_resume(mp);
1315 		vn_start_write(NULL, &mp, V_WAIT);
1316 	}
1317 #ifdef UFS_EXTATTR
1318 	if (e_restart) {
1319 		ufs_extattr_uepm_init(&ump->um_extattr);
1320 #ifdef UFS_EXTATTR_AUTOSTART
1321 		(void) ufs_extattr_autostart(mp, td);
1322 #endif
1323 	}
1324 #endif
1325 
1326 	return (error);
1327 }
1328 
1329 /*
1330  * Flush out all the files in a filesystem.
1331  */
1332 int
1333 ffs_flushfiles(mp, flags, td)
1334 	struct mount *mp;
1335 	int flags;
1336 	struct thread *td;
1337 {
1338 	struct ufsmount *ump;
1339 	int error;
1340 
1341 	ump = VFSTOUFS(mp);
1342 #ifdef QUOTA
1343 	if (mp->mnt_flag & MNT_QUOTA) {
1344 		int i;
1345 		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
1346 		if (error)
1347 			return (error);
1348 		for (i = 0; i < MAXQUOTAS; i++) {
1349 			quotaoff(td, mp, i);
1350 		}
1351 		/*
1352 		 * Here we fall through to vflush again to ensure
1353 		 * that we have gotten rid of all the system vnodes.
1354 		 */
1355 	}
1356 #endif
1357 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
1358 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
1359 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
1360 			return (error);
1361 		ffs_snapshot_unmount(mp);
1362 		flags |= FORCECLOSE;
1363 		/*
1364 		 * Here we fall through to vflush again to ensure
1365 		 * that we have gotten rid of all the system vnodes.
1366 		 */
1367 	}
1368         /*
1369 	 * Flush all the files.
1370 	 */
1371 	if ((error = vflush(mp, 0, flags, td)) != 0)
1372 		return (error);
1373 	/*
1374 	 * Flush filesystem metadata.
1375 	 */
1376 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1377 	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
1378 	VOP_UNLOCK(ump->um_devvp, 0);
1379 	return (error);
1380 }
1381 
1382 /*
1383  * Get filesystem statistics.
1384  */
1385 static int
1386 ffs_statfs(mp, sbp)
1387 	struct mount *mp;
1388 	struct statfs *sbp;
1389 {
1390 	struct ufsmount *ump;
1391 	struct fs *fs;
1392 
1393 	ump = VFSTOUFS(mp);
1394 	fs = ump->um_fs;
1395 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
1396 		panic("ffs_statfs");
1397 	sbp->f_version = STATFS_VERSION;
1398 	sbp->f_bsize = fs->fs_fsize;
1399 	sbp->f_iosize = fs->fs_bsize;
1400 	sbp->f_blocks = fs->fs_dsize;
1401 	UFS_LOCK(ump);
1402 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
1403 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
1404 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
1405 	    dbtofsb(fs, fs->fs_pendingblocks);
1406 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
1407 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1408 	UFS_UNLOCK(ump);
1409 	sbp->f_namemax = NAME_MAX;
1410 	return (0);
1411 }
1412 
1413 /*
1414  * Go through the disk queues to initiate sandbagged IO;
1415  * go through the inodes to write those that have been modified;
1416  * initiate the writing of the super block if it has been modified.
1417  *
1418  * Note: we are always called with the filesystem marked `MPBUSY'.
1419  */
1420 static int
1421 ffs_sync(mp, waitfor)
1422 	struct mount *mp;
1423 	int waitfor;
1424 {
1425 	struct vnode *mvp, *vp, *devvp;
1426 	struct thread *td;
1427 	struct inode *ip;
1428 	struct ufsmount *ump = VFSTOUFS(mp);
1429 	struct fs *fs;
1430 	int error, count, wait, lockreq, allerror = 0;
1431 	int suspend;
1432 	int suspended;
1433 	int secondary_writes;
1434 	int secondary_accwrites;
1435 	int softdep_deps;
1436 	int softdep_accdeps;
1437 	struct bufobj *bo;
1438 
1439 	td = curthread;
1440 	fs = ump->um_fs;
1441 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
1442 		panic("%s: ffs_sync: modification on read-only filesystem",
1443 		    fs->fs_fsmnt);
1444 	/*
1445 	 * Write back each (modified) inode.
1446 	 */
1447 	wait = 0;
1448 	suspend = 0;
1449 	suspended = 0;
1450 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
1451 	if (waitfor == MNT_SUSPEND) {
1452 		suspend = 1;
1453 		waitfor = MNT_WAIT;
1454 	}
1455 	if (waitfor == MNT_WAIT) {
1456 		wait = 1;
1457 		lockreq = LK_EXCLUSIVE;
1458 	}
1459 	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
1460 	MNT_ILOCK(mp);
1461 loop:
1462 	/* Grab snapshot of secondary write counts */
1463 	secondary_writes = mp->mnt_secondary_writes;
1464 	secondary_accwrites = mp->mnt_secondary_accwrites;
1465 
1466 	/* Grab snapshot of softdep dependency counts */
1467 	MNT_IUNLOCK(mp);
1468 	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
1469 	MNT_ILOCK(mp);
1470 
1471 	MNT_VNODE_FOREACH(vp, mp, mvp) {
1472 		/*
1473 		 * Depend on the mntvnode_slock to keep things stable enough
1474 		 * for a quick test.  Since there might be hundreds of
1475 		 * thousands of vnodes, we cannot afford even a subroutine
1476 		 * call unless there's a good chance that we have work to do.
1477 		 */
1478 		VI_LOCK(vp);
1479 		if (vp->v_iflag & VI_DOOMED) {
1480 			VI_UNLOCK(vp);
1481 			continue;
1482 		}
1483 		ip = VTOI(vp);
1484 		if (vp->v_type == VNON || ((ip->i_flag &
1485 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
1486 		    vp->v_bufobj.bo_dirty.bv_cnt == 0)) {
1487 			VI_UNLOCK(vp);
1488 			continue;
1489 		}
1490 		MNT_IUNLOCK(mp);
1491 		if ((error = vget(vp, lockreq, td)) != 0) {
1492 			MNT_ILOCK(mp);
1493 			if (error == ENOENT || error == ENOLCK) {
1494 				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1495 				goto loop;
1496 			}
1497 			continue;
1498 		}
1499 		if ((error = ffs_syncvnode(vp, waitfor)) != 0)
1500 			allerror = error;
1501 		vput(vp);
1502 		MNT_ILOCK(mp);
1503 	}
1504 	MNT_IUNLOCK(mp);
1505 	/*
1506 	 * Force stale filesystem control information to be flushed.
1507 	 */
1508 	if (waitfor == MNT_WAIT) {
1509 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
1510 			allerror = error;
1511 		/* Flushed work items may create new vnodes to clean */
1512 		if (allerror == 0 && count) {
1513 			MNT_ILOCK(mp);
1514 			goto loop;
1515 		}
1516 	}
1517 #ifdef QUOTA
1518 	qsync(mp);
1519 #endif
1520 	devvp = ump->um_devvp;
1521 	bo = &devvp->v_bufobj;
1522 	BO_LOCK(bo);
1523 	if (waitfor != MNT_LAZY &&
1524 	    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
1525 		BO_UNLOCK(bo);
1526 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
1527 		if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
1528 			allerror = error;
1529 		VOP_UNLOCK(devvp, 0);
1530 		if (allerror == 0 && waitfor == MNT_WAIT) {
1531 			MNT_ILOCK(mp);
1532 			goto loop;
1533 		}
1534 	} else if (suspend != 0) {
1535 		if (softdep_check_suspend(mp,
1536 					  devvp,
1537 					  softdep_deps,
1538 					  softdep_accdeps,
1539 					  secondary_writes,
1540 					  secondary_accwrites) != 0)
1541 			goto loop;	/* More work needed */
1542 		mtx_assert(MNT_MTX(mp), MA_OWNED);
1543 		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
1544 		MNT_IUNLOCK(mp);
1545 		suspended = 1;
1546 	} else
1547 		BO_UNLOCK(bo);
1548 	/*
1549 	 * Write back modified superblock.
1550 	 */
1551 	if (fs->fs_fmod != 0 &&
1552 	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
1553 		allerror = error;
1554 	return (allerror);
1555 }
1556 
1557 int
1558 ffs_vget(mp, ino, flags, vpp)
1559 	struct mount *mp;
1560 	ino_t ino;
1561 	int flags;
1562 	struct vnode **vpp;
1563 {
1564 	return (ffs_vgetf(mp, ino, flags, vpp, 0));
1565 }
1566 
1567 int
1568 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
1569 	struct mount *mp;
1570 	ino_t ino;
1571 	int flags;
1572 	struct vnode **vpp;
1573 	int ffs_flags;
1574 {
1575 	struct fs *fs;
1576 	struct inode *ip;
1577 	struct ufsmount *ump;
1578 	struct buf *bp;
1579 	struct vnode *vp;
1580 	struct cdev *dev;
1581 	int error;
1582 
1583 	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
1584 	if (error || *vpp != NULL)
1585 		return (error);
1586 
1587 	/*
1588 	 * We must promote to an exclusive lock for vnode creation.  This
1589 	 * can happen if lookup is passed LOCKSHARED.
1590  	 */
1591 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
1592 		flags &= ~LK_TYPE_MASK;
1593 		flags |= LK_EXCLUSIVE;
1594 	}
1595 
1596 	/*
1597 	 * We do not lock vnode creation as it is believed to be too
1598 	 * expensive for such rare case as simultaneous creation of vnode
1599 	 * for same ino by different processes. We just allow them to race
1600 	 * and check later to decide who wins. Let the race begin!
1601 	 */
1602 
1603 	ump = VFSTOUFS(mp);
1604 	dev = ump->um_dev;
1605 	fs = ump->um_fs;
1606 
1607 	/*
1608 	 * If this malloc() is performed after the getnewvnode()
1609 	 * it might block, leaving a vnode with a NULL v_data to be
1610 	 * found by ffs_sync() if a sync happens to fire right then,
1611 	 * which will cause a panic because ffs_sync() blindly
1612 	 * dereferences vp->v_data (as well it should).
1613 	 */
1614 	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
1615 
1616 	/* Allocate a new vnode/inode. */
1617 	if (fs->fs_magic == FS_UFS1_MAGIC)
1618 		error = getnewvnode("ufs", mp, &ffs_vnodeops1, &vp);
1619 	else
1620 		error = getnewvnode("ufs", mp, &ffs_vnodeops2, &vp);
1621 	if (error) {
1622 		*vpp = NULL;
1623 		uma_zfree(uma_inode, ip);
1624 		return (error);
1625 	}
1626 	/*
1627 	 * FFS supports recursive locking.
1628 	 */
1629 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
1630 	VN_LOCK_AREC(vp);
1631 	vp->v_data = ip;
1632 	vp->v_bufobj.bo_bsize = fs->fs_bsize;
1633 	ip->i_vnode = vp;
1634 	ip->i_ump = ump;
1635 	ip->i_fs = fs;
1636 	ip->i_dev = dev;
1637 	ip->i_number = ino;
1638 	ip->i_ea_refs = 0;
1639 #ifdef QUOTA
1640 	{
1641 		int i;
1642 		for (i = 0; i < MAXQUOTAS; i++)
1643 			ip->i_dquot[i] = NODQUOT;
1644 	}
1645 #endif
1646 
1647 	if (ffs_flags & FFSV_FORCEINSMQ)
1648 		vp->v_vflag |= VV_FORCEINSMQ;
1649 	error = insmntque(vp, mp);
1650 	if (error != 0) {
1651 		uma_zfree(uma_inode, ip);
1652 		*vpp = NULL;
1653 		return (error);
1654 	}
1655 	vp->v_vflag &= ~VV_FORCEINSMQ;
1656 	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
1657 	if (error || *vpp != NULL)
1658 		return (error);
1659 
1660 	/* Read in the disk contents for the inode, copy into the inode. */
1661 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1662 	    (int)fs->fs_bsize, NOCRED, &bp);
1663 	if (error) {
1664 		/*
1665 		 * The inode does not contain anything useful, so it would
1666 		 * be misleading to leave it on its hash chain. With mode
1667 		 * still zero, it will be unlinked and returned to the free
1668 		 * list by vput().
1669 		 */
1670 		brelse(bp);
1671 		vput(vp);
1672 		*vpp = NULL;
1673 		return (error);
1674 	}
1675 	if (ip->i_ump->um_fstype == UFS1)
1676 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
1677 	else
1678 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
1679 	ffs_load_inode(bp, ip, fs, ino);
1680 	if (DOINGSOFTDEP(vp))
1681 		softdep_load_inodeblock(ip);
1682 	else
1683 		ip->i_effnlink = ip->i_nlink;
1684 	bqrelse(bp);
1685 
1686 	/*
1687 	 * Initialize the vnode from the inode, check for aliases.
1688 	 * Note that the underlying vnode may have changed.
1689 	 */
1690 	if (ip->i_ump->um_fstype == UFS1)
1691 		error = ufs_vinit(mp, &ffs_fifoops1, &vp);
1692 	else
1693 		error = ufs_vinit(mp, &ffs_fifoops2, &vp);
1694 	if (error) {
1695 		vput(vp);
1696 		*vpp = NULL;
1697 		return (error);
1698 	}
1699 
1700 	/*
1701 	 * Finish inode initialization.
1702 	 */
1703 	if (vp->v_type != VFIFO) {
1704 		/* FFS supports shared locking for all files except fifos. */
1705 		VN_LOCK_ASHARE(vp);
1706 	}
1707 
1708 	/*
1709 	 * Set up a generation number for this inode if it does not
1710 	 * already have one. This should only happen on old filesystems.
1711 	 */
1712 	if (ip->i_gen == 0) {
1713 		ip->i_gen = arc4random() / 2 + 1;
1714 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
1715 			ip->i_flag |= IN_MODIFIED;
1716 			DIP_SET(ip, i_gen, ip->i_gen);
1717 		}
1718 	}
1719 #ifdef MAC
1720 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
1721 		/*
1722 		 * If this vnode is already allocated, and we're running
1723 		 * multi-label, attempt to perform a label association
1724 		 * from the extended attributes on the inode.
1725 		 */
1726 		error = mac_vnode_associate_extattr(mp, vp);
1727 		if (error) {
1728 			/* ufs_inactive will release ip->i_devvp ref. */
1729 			vput(vp);
1730 			*vpp = NULL;
1731 			return (error);
1732 		}
1733 	}
1734 #endif
1735 
1736 	*vpp = vp;
1737 	return (0);
1738 }
1739 
1740 /*
1741  * File handle to vnode
1742  *
1743  * Have to be really careful about stale file handles:
1744  * - check that the inode number is valid
1745  * - call ffs_vget() to get the locked inode
1746  * - check for an unallocated inode (i_mode == 0)
1747  * - check that the given client host has export rights and return
1748  *   those rights via. exflagsp and credanonp
1749  */
1750 static int
1751 ffs_fhtovp(mp, fhp, flags, vpp)
1752 	struct mount *mp;
1753 	struct fid *fhp;
1754 	int flags;
1755 	struct vnode **vpp;
1756 {
1757 	struct ufid *ufhp;
1758 	struct fs *fs;
1759 
1760 	ufhp = (struct ufid *)fhp;
1761 	fs = VFSTOUFS(mp)->um_fs;
1762 	if (ufhp->ufid_ino < ROOTINO ||
1763 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1764 		return (ESTALE);
1765 	return (ufs_fhtovp(mp, ufhp, flags, vpp));
1766 }
1767 
1768 /*
1769  * Initialize the filesystem.
1770  */
1771 static int
1772 ffs_init(vfsp)
1773 	struct vfsconf *vfsp;
1774 {
1775 
1776 	softdep_initialize();
1777 	return (ufs_init(vfsp));
1778 }
1779 
1780 /*
1781  * Undo the work of ffs_init().
1782  */
1783 static int
1784 ffs_uninit(vfsp)
1785 	struct vfsconf *vfsp;
1786 {
1787 	int ret;
1788 
1789 	ret = ufs_uninit(vfsp);
1790 	softdep_uninitialize();
1791 	return (ret);
1792 }
1793 
1794 /*
1795  * Write a superblock and associated information back to disk.
1796  */
1797 int
1798 ffs_sbupdate(ump, waitfor, suspended)
1799 	struct ufsmount *ump;
1800 	int waitfor;
1801 	int suspended;
1802 {
1803 	struct fs *fs = ump->um_fs;
1804 	struct buf *sbbp;
1805 	struct buf *bp;
1806 	int blks;
1807 	void *space;
1808 	int i, size, error, allerror = 0;
1809 
1810 	if (fs->fs_ronly == 1 &&
1811 	    (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
1812 	    (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0)
1813 		panic("ffs_sbupdate: write read-only filesystem");
1814 	/*
1815 	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
1816 	 */
1817 	sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
1818 	    (int)fs->fs_sbsize, 0, 0, 0);
1819 	/*
1820 	 * First write back the summary information.
1821 	 */
1822 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1823 	space = fs->fs_csp;
1824 	for (i = 0; i < blks; i += fs->fs_frag) {
1825 		size = fs->fs_bsize;
1826 		if (i + fs->fs_frag > blks)
1827 			size = (blks - i) * fs->fs_fsize;
1828 		bp = getblk(ump->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1829 		    size, 0, 0, 0);
1830 		bcopy(space, bp->b_data, (u_int)size);
1831 		space = (char *)space + size;
1832 		if (suspended)
1833 			bp->b_flags |= B_VALIDSUSPWRT;
1834 		if (waitfor != MNT_WAIT)
1835 			bawrite(bp);
1836 		else if ((error = bwrite(bp)) != 0)
1837 			allerror = error;
1838 	}
1839 	/*
1840 	 * Now write back the superblock itself. If any errors occurred
1841 	 * up to this point, then fail so that the superblock avoids
1842 	 * being written out as clean.
1843 	 */
1844 	if (allerror) {
1845 		brelse(sbbp);
1846 		return (allerror);
1847 	}
1848 	bp = sbbp;
1849 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
1850 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1851 		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1852 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
1853 		fs->fs_sblockloc = SBLOCK_UFS1;
1854 	}
1855 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
1856 	    (fs->fs_flags & FS_FLAGS_UPDATED) == 0) {
1857 		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
1858 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
1859 		fs->fs_sblockloc = SBLOCK_UFS2;
1860 	}
1861 	fs->fs_fmod = 0;
1862 	fs->fs_time = time_second;
1863 	if (fs->fs_flags & FS_DOSOFTDEP)
1864 		softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
1865 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1866 	ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
1867 	if (suspended)
1868 		bp->b_flags |= B_VALIDSUSPWRT;
1869 	if (waitfor != MNT_WAIT)
1870 		bawrite(bp);
1871 	else if ((error = bwrite(bp)) != 0)
1872 		allerror = error;
1873 	return (allerror);
1874 }
1875 
1876 static int
1877 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
1878 	int attrnamespace, const char *attrname)
1879 {
1880 
1881 #ifdef UFS_EXTATTR
1882 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
1883 	    attrname));
1884 #else
1885 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
1886 	    attrname));
1887 #endif
1888 }
1889 
1890 static void
1891 ffs_ifree(struct ufsmount *ump, struct inode *ip)
1892 {
1893 
1894 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
1895 		uma_zfree(uma_ufs1, ip->i_din1);
1896 	else if (ip->i_din2 != NULL)
1897 		uma_zfree(uma_ufs2, ip->i_din2);
1898 	uma_zfree(uma_inode, ip);
1899 }
1900 
1901 static int dobkgrdwrite = 1;
1902 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
1903     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
1904 
1905 /*
1906  * Complete a background write started from bwrite.
1907  */
1908 static void
1909 ffs_backgroundwritedone(struct buf *bp)
1910 {
1911 	struct bufobj *bufobj;
1912 	struct buf *origbp;
1913 
1914 	/*
1915 	 * Find the original buffer that we are writing.
1916 	 */
1917 	bufobj = bp->b_bufobj;
1918 	BO_LOCK(bufobj);
1919 	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
1920 		panic("backgroundwritedone: lost buffer");
1921 	/* Grab an extra reference to be dropped by the bufdone() below. */
1922 	bufobj_wrefl(bufobj);
1923 	BO_UNLOCK(bufobj);
1924 	/*
1925 	 * Process dependencies then return any unfinished ones.
1926 	 */
1927 	if (!LIST_EMPTY(&bp->b_dep))
1928 		buf_complete(bp);
1929 #ifdef SOFTUPDATES
1930 	if (!LIST_EMPTY(&bp->b_dep))
1931 		softdep_move_dependencies(bp, origbp);
1932 #endif
1933 	/*
1934 	 * This buffer is marked B_NOCACHE so when it is released
1935 	 * by biodone it will be tossed.
1936 	 */
1937 	bp->b_flags |= B_NOCACHE;
1938 	bp->b_flags &= ~B_CACHE;
1939 	bufdone(bp);
1940 	BO_LOCK(bufobj);
1941 	/*
1942 	 * Clear the BV_BKGRDINPROG flag in the original buffer
1943 	 * and awaken it if it is waiting for the write to complete.
1944 	 * If BV_BKGRDINPROG is not set in the original buffer it must
1945 	 * have been released and re-instantiated - which is not legal.
1946 	 */
1947 	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
1948 	    ("backgroundwritedone: lost buffer2"));
1949 	origbp->b_vflags &= ~BV_BKGRDINPROG;
1950 	if (origbp->b_vflags & BV_BKGRDWAIT) {
1951 		origbp->b_vflags &= ~BV_BKGRDWAIT;
1952 		wakeup(&origbp->b_xflags);
1953 	}
1954 	BO_UNLOCK(bufobj);
1955 }
1956 
1957 
1958 /*
1959  * Write, release buffer on completion.  (Done by iodone
1960  * if async).  Do not bother writing anything if the buffer
1961  * is invalid.
1962  *
1963  * Note that we set B_CACHE here, indicating that buffer is
1964  * fully valid and thus cacheable.  This is true even of NFS
1965  * now so we set it generally.  This could be set either here
1966  * or in biodone() since the I/O is synchronous.  We put it
1967  * here.
1968  */
1969 static int
1970 ffs_bufwrite(struct buf *bp)
1971 {
1972 	int oldflags, s;
1973 	struct buf *newbp;
1974 
1975 	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
1976 	if (bp->b_flags & B_INVAL) {
1977 		brelse(bp);
1978 		return (0);
1979 	}
1980 
1981 	oldflags = bp->b_flags;
1982 
1983 	if (!BUF_ISLOCKED(bp))
1984 		panic("bufwrite: buffer is not busy???");
1985 	s = splbio();
1986 	/*
1987 	 * If a background write is already in progress, delay
1988 	 * writing this block if it is asynchronous. Otherwise
1989 	 * wait for the background write to complete.
1990 	 */
1991 	BO_LOCK(bp->b_bufobj);
1992 	if (bp->b_vflags & BV_BKGRDINPROG) {
1993 		if (bp->b_flags & B_ASYNC) {
1994 			BO_UNLOCK(bp->b_bufobj);
1995 			splx(s);
1996 			bdwrite(bp);
1997 			return (0);
1998 		}
1999 		bp->b_vflags |= BV_BKGRDWAIT;
2000 		msleep(&bp->b_xflags, BO_MTX(bp->b_bufobj), PRIBIO, "bwrbg", 0);
2001 		if (bp->b_vflags & BV_BKGRDINPROG)
2002 			panic("bufwrite: still writing");
2003 	}
2004 	BO_UNLOCK(bp->b_bufobj);
2005 
2006 	/*
2007 	 * If this buffer is marked for background writing and we
2008 	 * do not have to wait for it, make a copy and write the
2009 	 * copy so as to leave this buffer ready for further use.
2010 	 *
2011 	 * This optimization eats a lot of memory.  If we have a page
2012 	 * or buffer shortfall we can't do it.
2013 	 */
2014 	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
2015 	    (bp->b_flags & B_ASYNC) &&
2016 	    !vm_page_count_severe() &&
2017 	    !buf_dirty_count_severe()) {
2018 		KASSERT(bp->b_iodone == NULL,
2019 		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
2020 
2021 		/* get a new block */
2022 		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
2023 		if (newbp == NULL)
2024 			goto normal_write;
2025 
2026 		/*
2027 		 * set it to be identical to the old block.  We have to
2028 		 * set b_lblkno and BKGRDMARKER before calling bgetvp()
2029 		 * to avoid confusing the splay tree and gbincore().
2030 		 */
2031 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
2032 		newbp->b_lblkno = bp->b_lblkno;
2033 		newbp->b_xflags |= BX_BKGRDMARKER;
2034 		BO_LOCK(bp->b_bufobj);
2035 		bp->b_vflags |= BV_BKGRDINPROG;
2036 		bgetvp(bp->b_vp, newbp);
2037 		BO_UNLOCK(bp->b_bufobj);
2038 		newbp->b_bufobj = &bp->b_vp->v_bufobj;
2039 		newbp->b_blkno = bp->b_blkno;
2040 		newbp->b_offset = bp->b_offset;
2041 		newbp->b_iodone = ffs_backgroundwritedone;
2042 		newbp->b_flags |= B_ASYNC;
2043 		newbp->b_flags &= ~B_INVAL;
2044 
2045 #ifdef SOFTUPDATES
2046 		/*
2047 		 * Move over the dependencies.  If there are rollbacks,
2048 		 * leave the parent buffer dirtied as it will need to
2049 		 * be written again.
2050 		 */
2051 		if (LIST_EMPTY(&bp->b_dep) ||
2052 		    softdep_move_dependencies(bp, newbp) == 0)
2053 			bundirty(bp);
2054 #else
2055 		bundirty(bp);
2056 #endif
2057 
2058 		/*
2059 		 * Initiate write on the copy, release the original to
2060 		 * the B_LOCKED queue so that it cannot go away until
2061 		 * the background write completes. If not locked it could go
2062 		 * away and then be reconstituted while it was being written.
2063 		 * If the reconstituted buffer were written, we could end up
2064 		 * with two background copies being written at the same time.
2065 		 */
2066 		bqrelse(bp);
2067 		bp = newbp;
2068 	} else
2069 		/* Mark the buffer clean */
2070 		bundirty(bp);
2071 
2072 
2073 	/* Let the normal bufwrite do the rest for us */
2074 normal_write:
2075 	return (bufwrite(bp));
2076 }
2077 
2078 
2079 static void
2080 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
2081 {
2082 	struct vnode *vp;
2083 	int error;
2084 	struct buf *tbp;
2085 	int nocopy;
2086 
2087 	vp = bo->__bo_vnode;
2088 	if (bp->b_iocmd == BIO_WRITE) {
2089 		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
2090 		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
2091 		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
2092 			panic("ffs_geom_strategy: bad I/O");
2093 		nocopy = bp->b_flags & B_NOCOPY;
2094 		bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
2095 		if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
2096 		    vp->v_rdev->si_snapdata != NULL) {
2097 			if ((bp->b_flags & B_CLUSTER) != 0) {
2098 				runningbufwakeup(bp);
2099 				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2100 					      b_cluster.cluster_entry) {
2101 					error = ffs_copyonwrite(vp, tbp);
2102 					if (error != 0 &&
2103 					    error != EOPNOTSUPP) {
2104 						bp->b_error = error;
2105 						bp->b_ioflags |= BIO_ERROR;
2106 						bufdone(bp);
2107 						return;
2108 					}
2109 				}
2110 				bp->b_runningbufspace = bp->b_bufsize;
2111 				atomic_add_long(&runningbufspace,
2112 					       bp->b_runningbufspace);
2113 			} else {
2114 				error = ffs_copyonwrite(vp, bp);
2115 				if (error != 0 && error != EOPNOTSUPP) {
2116 					bp->b_error = error;
2117 					bp->b_ioflags |= BIO_ERROR;
2118 					bufdone(bp);
2119 					return;
2120 				}
2121 			}
2122 		}
2123 #ifdef SOFTUPDATES
2124 		if ((bp->b_flags & B_CLUSTER) != 0) {
2125 			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
2126 				      b_cluster.cluster_entry) {
2127 				if (!LIST_EMPTY(&tbp->b_dep))
2128 					buf_start(tbp);
2129 			}
2130 		} else {
2131 			if (!LIST_EMPTY(&bp->b_dep))
2132 				buf_start(bp);
2133 		}
2134 
2135 #endif
2136 	}
2137 	g_vfs_strategy(bo, bp);
2138 }
2139 
2140 #ifdef	DDB
2141 
2142 static void
2143 db_print_ffs(struct ufsmount *ump)
2144 {
2145 	db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
2146 	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
2147 	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
2148 	    ump->softdep_deps, ump->softdep_req);
2149 }
2150 
2151 DB_SHOW_COMMAND(ffs, db_show_ffs)
2152 {
2153 	struct mount *mp;
2154 	struct ufsmount *ump;
2155 
2156 	if (have_addr) {
2157 		ump = VFSTOUFS((struct mount *)addr);
2158 		db_print_ffs(ump);
2159 		return;
2160 	}
2161 
2162 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2163 		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
2164 			db_print_ffs(VFSTOUFS(mp));
2165 	}
2166 }
2167 
2168 #endif	/* DDB */
2169