xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision d877622a0d93fa5f92d91a667ab79cfcc47b647d)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $Id: ffs_vfsops.c,v 1.60 1997/10/16 10:49:33 phk Exp $
35  */
36 
37 #include "opt_quota.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/namei.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/mount.h>
46 #include <sys/buf.h>
47 #include <sys/conf.h>
48 #include <sys/fcntl.h>
49 #include <sys/disklabel.h>
50 #include <sys/malloc.h>
51 
52 #include <miscfs/specfs/specdev.h>
53 
54 #include <ufs/ufs/quota.h>
55 #include <ufs/ufs/ufsmount.h>
56 #include <ufs/ufs/inode.h>
57 #include <ufs/ufs/ufs_extern.h>
58 
59 #include <ufs/ffs/fs.h>
60 #include <ufs/ffs/ffs_extern.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_prot.h>
64 #include <vm/vm_page.h>
65 #include <vm/vm_extern.h>
66 
67 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
68 
69 static int	ffs_sbupdate __P((struct ufsmount *, int));
70 static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
71 static int	ffs_oldfscompat __P((struct fs *));
72 static int	ffs_mount __P((struct mount *, char *, caddr_t,
73 				struct nameidata *, struct proc *));
74 static int	ffs_init __P((struct vfsconf *));
75 
76 struct vfsops ufs_vfsops = {
77 	ffs_mount,
78 	ufs_start,
79 	ffs_unmount,
80 	ufs_root,
81 	ufs_quotactl,
82 	ffs_statfs,
83 	ffs_sync,
84 	ffs_vget,
85 	ffs_fhtovp,
86 	ffs_vptofh,
87 	ffs_init,
88 };
89 
90 VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
91 
92 /*
93  * ffs_mount
94  *
95  * Called when mounting local physical media
96  *
97  * PARAMETERS:
98  *		mountroot
99  *			mp	mount point structure
100  *			path	NULL (flag for root mount!!!)
101  *			data	<unused>
102  *			ndp	<unused>
103  *			p	process (user credentials check [statfs])
104  *
105  *		mount
106  *			mp	mount point structure
107  *			path	path to mount point
108  *			data	pointer to argument struct in user space
109  *			ndp	mount point namei() return (used for
110  *				credentials on reload), reused to look
111  *				up block device.
112  *			p	process (user credentials check)
113  *
114  * RETURNS:	0	Success
115  *		!0	error number (errno.h)
116  *
117  * LOCK STATE:
118  *
119  *		ENTRY
120  *			mount point is locked
121  *		EXIT
122  *			mount point is locked
123  *
124  * NOTES:
125  *		A NULL path can be used for a flag since the mount
126  *		system call will fail with EFAULT in copyinstr in
127  *		namei() if it is a genuine NULL from the user.
128  */
129 static int
130 ffs_mount( mp, path, data, ndp, p)
131         struct mount		*mp;	/* mount struct pointer*/
132         char			*path;	/* path to mount point*/
133         caddr_t			data;	/* arguments to FS specific mount*/
134         struct nameidata	*ndp;	/* mount point credentials*/
135         struct proc		*p;	/* process requesting mount*/
136 {
137 	u_int		size;
138 	int		err = 0;
139 	struct vnode	*devvp;
140 
141 	struct ufs_args args;
142 	struct ufsmount *ump = 0;
143 	register struct fs *fs;
144 	int flags;
145 
146 	/*
147 	 * Use NULL path to flag a root mount
148 	 */
149 	if( path == NULL) {
150 		/*
151 		 ***
152 		 * Mounting root file system
153 		 ***
154 		 */
155 
156 		if ((err = bdevvp(rootdev, &rootvp))) {
157 			printf("ffs_mountroot: can't find rootvp");
158 			return (err);
159 		}
160 
161 		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
162 			mp->mnt_flag |= MNT_NOCLUSTERR;
163 		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW)
164 			mp->mnt_flag |= MNT_NOCLUSTERW;
165 		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
166 			/* fs specific cleanup (if any)*/
167 			goto error_1;
168 		}
169 
170 		goto dostatfs;		/* success*/
171 
172 	}
173 
174 	/*
175 	 ***
176 	 * Mounting non-root file system or updating a file system
177 	 ***
178 	 */
179 
180 	/* copy in user arguments*/
181 	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
182 	if (err)
183 		goto error_1;		/* can't get arguments*/
184 
185 	/*
186 	 * If updating, check whether changing from read-only to
187 	 * read/write; if there is no device name, that's all we do.
188 	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
189 	 * if block device requests.
190 	 */
191 	if (mp->mnt_flag & MNT_UPDATE) {
192 		ump = VFSTOUFS(mp);
193 		fs = ump->um_fs;
194 		err = 0;
195 		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR)
196 			mp->mnt_flag |= MNT_NOCLUSTERR;
197 		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW)
198 			mp->mnt_flag |= MNT_NOCLUSTERW;
199 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
200 			flags = WRITECLOSE;
201 			if (mp->mnt_flag & MNT_FORCE)
202 				flags |= FORCECLOSE;
203 			err = ffs_flushfiles(mp, flags, p);
204 		}
205 		if (!err && (mp->mnt_flag & MNT_RELOAD))
206 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
207 		if (err) {
208 			goto error_1;
209 		}
210 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
211 			if (!fs->fs_clean) {
212 				if (mp->mnt_flag & MNT_FORCE) {
213 					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
214 				} else {
215 					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
216 					    fs->fs_fsmnt);
217 					err = EPERM;
218 					goto error_1;
219 				}
220 			}
221 			fs->fs_ronly = 0;
222 		}
223 		if (fs->fs_ronly == 0) {
224 			fs->fs_clean = 0;
225 			ffs_sbupdate(ump, MNT_WAIT);
226 		}
227 		/* if not updating name...*/
228 		if (args.fspec == 0) {
229 			/*
230 			 * Process export requests.  Jumping to "success"
231 			 * will return the vfs_export() error code.
232 			 */
233 			err = vfs_export(mp, &ump->um_export, &args.export);
234 			goto success;
235 		}
236 	}
237 
238 	/*
239 	 * Not an update, or updating the name: look up the name
240 	 * and verify that it refers to a sensible block device.
241 	 */
242 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
243 	err = namei(ndp);
244 	if (err) {
245 		/* can't get devvp!*/
246 		goto error_1;
247 	}
248 
249 	devvp = ndp->ni_vp;
250 
251 	if (devvp->v_type != VBLK) {
252 		err = ENOTBLK;
253 		goto error_2;
254 	}
255 	if (major(devvp->v_rdev) >= nblkdev) {
256 		err = ENXIO;
257 		goto error_2;
258 	}
259 	if (mp->mnt_flag & MNT_UPDATE) {
260 		/*
261 		 ********************
262 		 * UPDATE
263 		 ********************
264 		 */
265 
266 		if (devvp != ump->um_devvp)
267 			err = EINVAL;	/* needs translation */
268 		else
269 			vrele(devvp);
270 		/*
271 		 * Update device name only on success
272 		 */
273 		if( !err) {
274 			/* Save "mounted from" info for mount point (NULL pad)*/
275 			copyinstr(	args.fspec,
276 					mp->mnt_stat.f_mntfromname,
277 					MNAMELEN - 1,
278 					&size);
279 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
280 		}
281 	} else {
282 		/*
283 		 ********************
284 		 * NEW MOUNT
285 		 ********************
286 		 */
287 
288 		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
289 			mp->mnt_flag |= MNT_NOCLUSTERR;
290 		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW)
291 			mp->mnt_flag |= MNT_NOCLUSTERW;
292 
293 		/*
294 		 * Since this is a new mount, we want the names for
295 		 * the device and the mount point copied in.  If an
296 		 * error occurs,  the mountpoint is discarded by the
297 		 * upper level code.
298 		 */
299 		/* Save "last mounted on" info for mount point (NULL pad)*/
300 		copyinstr(	path,				/* mount point*/
301 				mp->mnt_stat.f_mntonname,	/* save area*/
302 				MNAMELEN - 1,			/* max size*/
303 				&size);				/* real size*/
304 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
305 
306 		/* Save "mounted from" info for mount point (NULL pad)*/
307 		copyinstr(	args.fspec,			/* device name*/
308 				mp->mnt_stat.f_mntfromname,	/* save area*/
309 				MNAMELEN - 1,			/* max size*/
310 				&size);				/* real size*/
311 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
312 
313 		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
314 	}
315 	if (err) {
316 		goto error_2;
317 	}
318 
319 dostatfs:
320 	/*
321 	 * Initialize FS stat information in mount struct; uses both
322 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
323 	 *
324 	 * This code is common to root and non-root mounts
325 	 */
326 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
327 
328 	goto success;
329 
330 
331 error_2:	/* error with devvp held*/
332 
333 	/* release devvp before failing*/
334 	vrele(devvp);
335 
336 error_1:	/* no state to back out*/
337 
338 success:
339 	return( err);
340 }
341 
342 /*
343  * Reload all incore data for a filesystem (used after running fsck on
344  * the root filesystem and finding things to fix). The filesystem must
345  * be mounted read-only.
346  *
347  * Things to do to update the mount:
348  *	1) invalidate all cached meta-data.
349  *	2) re-read superblock from disk.
350  *	3) re-read summary information from disk.
351  *	4) invalidate all inactive vnodes.
352  *	5) invalidate all cached file data.
353  *	6) re-read inode data for all active vnodes.
354  */
355 static int
356 ffs_reload(mp, cred, p)
357 	register struct mount *mp;
358 	struct ucred *cred;
359 	struct proc *p;
360 {
361 	register struct vnode *vp, *nvp, *devvp;
362 	struct inode *ip;
363 	struct csum *space;
364 	struct buf *bp;
365 	struct fs *fs, *newfs;
366 	struct partinfo dpart;
367 	int i, blks, size, error;
368 	int32_t *lp;
369 
370 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
371 		return (EINVAL);
372 	/*
373 	 * Step 1: invalidate all cached meta-data.
374 	 */
375 	devvp = VFSTOUFS(mp)->um_devvp;
376 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
377 		panic("ffs_reload: dirty1");
378 	/*
379 	 * Step 2: re-read superblock from disk.
380 	 */
381 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
382 		size = DEV_BSIZE;
383 	else
384 		size = dpart.disklab->d_secsize;
385 	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
386 		return (error);
387 	newfs = (struct fs *)bp->b_data;
388 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
389 		newfs->fs_bsize < sizeof(struct fs)) {
390 			brelse(bp);
391 			return (EIO);		/* XXX needs translation */
392 	}
393 	fs = VFSTOUFS(mp)->um_fs;
394 	/*
395 	 * Copy pointer fields back into superblock before copying in	XXX
396 	 * new superblock. These should really be in the ufsmount.	XXX
397 	 * Note that important parameters (eg fs_ncg) are unchanged.
398 	 */
399 	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
400 	newfs->fs_maxcluster = fs->fs_maxcluster;
401 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
402 	if (fs->fs_sbsize < SBSIZE)
403 		bp->b_flags |= B_INVAL;
404 	brelse(bp);
405 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
406 	ffs_oldfscompat(fs);
407 
408 	/*
409 	 * Step 3: re-read summary information from disk.
410 	 */
411 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
412 	space = fs->fs_csp[0];
413 	for (i = 0; i < blks; i += fs->fs_frag) {
414 		size = fs->fs_bsize;
415 		if (i + fs->fs_frag > blks)
416 			size = (blks - i) * fs->fs_fsize;
417 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
418 		    NOCRED, &bp);
419 		if (error)
420 			return (error);
421 		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
422 		brelse(bp);
423 	}
424 	/*
425 	 * We no longer know anything about clusters per cylinder group.
426 	 */
427 	if (fs->fs_contigsumsize > 0) {
428 		lp = fs->fs_maxcluster;
429 		for (i = 0; i < fs->fs_ncg; i++)
430 			*lp++ = fs->fs_contigsumsize;
431 	}
432 
433 loop:
434 	simple_lock(&mntvnode_slock);
435 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
436 		if (vp->v_mount != mp) {
437 			simple_unlock(&mntvnode_slock);
438 			goto loop;
439 		}
440 		nvp = vp->v_mntvnodes.le_next;
441 		/*
442 		 * Step 4: invalidate all inactive vnodes.
443 		 */
444 		if (vrecycle(vp, &mntvnode_slock, p))
445 			goto loop;
446 		/*
447 		 * Step 5: invalidate all cached file data.
448 		 */
449 		simple_lock(&vp->v_interlock);
450 		simple_unlock(&mntvnode_slock);
451 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
452 			goto loop;
453 		}
454 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
455 			panic("ffs_reload: dirty2");
456 		/*
457 		 * Step 6: re-read inode data for all active vnodes.
458 		 */
459 		ip = VTOI(vp);
460 		error =
461 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
462 		    (int)fs->fs_bsize, NOCRED, &bp);
463 		if (error) {
464 			vput(vp);
465 			return (error);
466 		}
467 		ip->i_din = *((struct dinode *)bp->b_data +
468 		    ino_to_fsbo(fs, ip->i_number));
469 		brelse(bp);
470 		vput(vp);
471 		simple_lock(&mntvnode_slock);
472 	}
473 	simple_unlock(&mntvnode_slock);
474 	return (0);
475 }
476 
477 /*
478  * Common code for mount and mountroot
479  */
480 int
481 ffs_mountfs(devvp, mp, p, malloctype)
482 	register struct vnode *devvp;
483 	struct mount *mp;
484 	struct proc *p;
485 	struct malloc_type *malloctype;
486 {
487 	register struct ufsmount *ump;
488 	struct buf *bp;
489 	register struct fs *fs;
490 	dev_t dev;
491 	struct partinfo dpart;
492 	caddr_t base, space;
493 	int error, i, blks, size, ronly;
494 	int32_t *lp;
495 	struct ucred *cred;
496 	u_int64_t maxfilesize;					/* XXX */
497 	u_int strsize;
498 	int ncount;
499 
500 	dev = devvp->v_rdev;
501 	cred = p ? p->p_ucred : NOCRED;
502 	/*
503 	 * Disallow multiple mounts of the same device.
504 	 * Disallow mounting of a device that is currently in use
505 	 * (except for root, which might share swap device for miniroot).
506 	 * Flush out any old buffers remaining from a previous use.
507 	 */
508 	error = vfs_mountedon(devvp);
509 	if (error)
510 		return (error);
511 	ncount = vcount(devvp);
512 	if (devvp->v_object)
513 		ncount -= 1;
514 	if (ncount > 1 && devvp != rootvp)
515 		return (EBUSY);
516 	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
517 		return (error);
518 
519 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
520 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
521 	if (error)
522 		return (error);
523 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
524 		size = DEV_BSIZE;
525 	else
526 		size = dpart.disklab->d_secsize;
527 
528 	bp = NULL;
529 	ump = NULL;
530 	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
531 		goto out;
532 	fs = (struct fs *)bp->b_data;
533 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
534 	    fs->fs_bsize < sizeof(struct fs)) {
535 		error = EINVAL;		/* XXX needs translation */
536 		goto out;
537 	}
538 	fs->fs_fmod = 0;
539 	if (!fs->fs_clean) {
540 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
541 			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
542 		} else {
543 			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
544 			error = EPERM;
545 			goto out;
546 		}
547 	}
548 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
549 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
550 		error = EROFS;          /* needs translation */
551 		goto out;
552 	}
553 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
554 	bzero((caddr_t)ump, sizeof *ump);
555 	ump->um_malloctype = malloctype;
556 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
557 	    M_WAITOK);
558 	ump->um_blkatoff = ffs_blkatoff;
559 	ump->um_truncate = ffs_truncate;
560 	ump->um_update = ffs_update;
561 	ump->um_valloc = ffs_valloc;
562 	ump->um_vfree = ffs_vfree;
563 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
564 	if (fs->fs_sbsize < SBSIZE)
565 		bp->b_flags |= B_INVAL;
566 	brelse(bp);
567 	bp = NULL;
568 	fs = ump->um_fs;
569 	fs->fs_ronly = ronly;
570 	if (ronly == 0) {
571 		fs->fs_fmod = 1;
572 		fs->fs_clean = 0;
573 	}
574 	size = fs->fs_cssize;
575 	blks = howmany(size, fs->fs_fsize);
576 	if (fs->fs_contigsumsize > 0)
577 		size += fs->fs_ncg * sizeof(int32_t);
578 	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
579 	for (i = 0; i < blks; i += fs->fs_frag) {
580 		size = fs->fs_bsize;
581 		if (i + fs->fs_frag > blks)
582 			size = (blks - i) * fs->fs_fsize;
583 		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
584 		    cred, &bp)) {
585 			free(base, M_UFSMNT);
586 			goto out;
587 		}
588 		bcopy(bp->b_data, space, (u_int)size);
589 		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
590 		space += size;
591 		brelse(bp);
592 		bp = NULL;
593 	}
594 	if (fs->fs_contigsumsize > 0) {
595 		fs->fs_maxcluster = lp = (int32_t *)space;
596 		for (i = 0; i < fs->fs_ncg; i++)
597 			*lp++ = fs->fs_contigsumsize;
598 	}
599 	mp->mnt_data = (qaddr_t)ump;
600 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
601 	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
602 		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
603 	else
604 		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
605 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
606 	mp->mnt_flag |= MNT_LOCAL;
607 	ump->um_mountp = mp;
608 	ump->um_dev = dev;
609 	ump->um_devvp = devvp;
610 	ump->um_nindir = fs->fs_nindir;
611 	ump->um_bptrtodb = fs->fs_fsbtodb;
612 	ump->um_seqinc = fs->fs_frag;
613 	for (i = 0; i < MAXQUOTAS; i++)
614 		ump->um_quotas[i] = NULLVP;
615 	devvp->v_specflags |= SI_MOUNTEDON;
616 	ffs_oldfscompat(fs);
617 
618 	/*
619 	 * Set FS local "last mounted on" information (NULL pad)
620 	 */
621 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
622 			fs->fs_fsmnt,			/* copy area*/
623 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
624 			&strsize);			/* real size*/
625 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
626 
627 	if( mp->mnt_flag & MNT_ROOTFS) {
628 		/*
629 		 * Root mount; update timestamp in mount structure.
630 		 * this will be used by the common root mount code
631 		 * to update the system clock.
632 		 */
633 		mp->mnt_time = fs->fs_time;
634 	}
635 
636 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
637 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
638 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
639 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
640 	if (ronly == 0) {
641 		fs->fs_clean = 0;
642 		(void) ffs_sbupdate(ump, MNT_WAIT);
643 	}
644 	/*
645 	 * Only VMIO the backing device if the backing device is a real
646 	 * block device.  This excludes the original MFS implementation.
647 	 * Note that it is optional that the backing device be VMIOed.  This
648 	 * increases the opportunity for metadata caching.
649 	 */
650 	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
651 		vfs_object_create(devvp, p, p->p_ucred, 0);
652 	}
653 	return (0);
654 out:
655 	if (bp)
656 		brelse(bp);
657 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
658 	if (ump) {
659 		free(ump->um_fs, M_UFSMNT);
660 		free(ump, M_UFSMNT);
661 		mp->mnt_data = (qaddr_t)0;
662 	}
663 	return (error);
664 }
665 
666 /*
667  * Sanity checks for old file systems.
668  *
669  * XXX - goes away some day.
670  */
671 static int
672 ffs_oldfscompat(fs)
673 	struct fs *fs;
674 {
675 
676 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
677 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
678 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
679 		fs->fs_nrpos = 8;				/* XXX */
680 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
681 #if 0
682 		int i;						/* XXX */
683 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
684 								/* XXX */
685 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
686 		for (i = 0; i < NIADDR; i++) {			/* XXX */
687 			sizepb *= NINDIR(fs);			/* XXX */
688 			fs->fs_maxfilesize += sizepb;		/* XXX */
689 		}						/* XXX */
690 #endif
691 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
692 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
693 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
694 	}							/* XXX */
695 	return (0);
696 }
697 
698 /*
699  * unmount system call
700  */
701 int
702 ffs_unmount(mp, mntflags, p)
703 	struct mount *mp;
704 	int mntflags;
705 	struct proc *p;
706 {
707 	register struct ufsmount *ump;
708 	register struct fs *fs;
709 	int error, flags;
710 
711 	flags = 0;
712 	if (mntflags & MNT_FORCE) {
713 		flags |= FORCECLOSE;
714 	}
715 	error = ffs_flushfiles(mp, flags, p);
716 	if (error)
717 		return (error);
718 	ump = VFSTOUFS(mp);
719 	fs = ump->um_fs;
720 	if (fs->fs_ronly == 0) {
721 		fs->fs_clean = 1;
722 		error = ffs_sbupdate(ump, MNT_WAIT);
723 		if (error) {
724 			fs->fs_clean = 0;
725 			return (error);
726 		}
727 	}
728 	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
729 
730 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
731 	vnode_pager_uncache(ump->um_devvp, p);
732 	VOP_UNLOCK(ump->um_devvp, 0, p);
733 
734 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
735 		NOCRED, p);
736 
737 	vrele(ump->um_devvp);
738 
739 	free(fs->fs_csp[0], M_UFSMNT);
740 	free(fs, M_UFSMNT);
741 	free(ump, M_UFSMNT);
742 	mp->mnt_data = (qaddr_t)0;
743 	mp->mnt_flag &= ~MNT_LOCAL;
744 	return (error);
745 }
746 
747 /*
748  * Flush out all the files in a filesystem.
749  */
750 int
751 ffs_flushfiles(mp, flags, p)
752 	register struct mount *mp;
753 	int flags;
754 	struct proc *p;
755 {
756 	register struct ufsmount *ump;
757 	int error;
758 
759 	ump = VFSTOUFS(mp);
760 #ifdef QUOTA
761 	if (mp->mnt_flag & MNT_QUOTA) {
762 		int i;
763 		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
764 		if (error)
765 			return (error);
766 		for (i = 0; i < MAXQUOTAS; i++) {
767 			if (ump->um_quotas[i] == NULLVP)
768 				continue;
769 			quotaoff(p, mp, i);
770 		}
771 		/*
772 		 * Here we fall through to vflush again to ensure
773 		 * that we have gotten rid of all the system vnodes.
774 		 */
775 	}
776 #endif
777 	error = vflush(mp, NULLVP, flags);
778 	return (error);
779 }
780 
781 /*
782  * Get file system statistics.
783  */
784 int
785 ffs_statfs(mp, sbp, p)
786 	struct mount *mp;
787 	register struct statfs *sbp;
788 	struct proc *p;
789 {
790 	register struct ufsmount *ump;
791 	register struct fs *fs;
792 
793 	ump = VFSTOUFS(mp);
794 	fs = ump->um_fs;
795 	if (fs->fs_magic != FS_MAGIC)
796 		panic("ffs_statfs");
797 	sbp->f_bsize = fs->fs_fsize;
798 	sbp->f_iosize = fs->fs_bsize;
799 	sbp->f_blocks = fs->fs_dsize;
800 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
801 		fs->fs_cstotal.cs_nffree;
802 	sbp->f_bavail = freespace(fs, fs->fs_minfree);
803 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
804 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
805 	if (sbp != &mp->mnt_stat) {
806 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
807 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
808 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
809 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
810 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
811 	}
812 	return (0);
813 }
814 
815 /*
816  * Go through the disk queues to initiate sandbagged IO;
817  * go through the inodes to write those that have been modified;
818  * initiate the writing of the super block if it has been modified.
819  *
820  * Note: we are always called with the filesystem marked `MPBUSY'.
821  */
822 int
823 ffs_sync(mp, waitfor, cred, p)
824 	struct mount *mp;
825 	int waitfor;
826 	struct ucred *cred;
827 	struct proc *p;
828 {
829 	struct vnode *nvp, *vp;
830 	struct inode *ip;
831 	struct ufsmount *ump = VFSTOUFS(mp);
832 	struct fs *fs;
833 	struct timeval tv;
834 	int error, allerror = 0;
835 
836 	fs = ump->um_fs;
837 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
838 		printf("fs = %s\n", fs->fs_fsmnt);
839 		panic("ffs_sync: rofs mod");
840 	}
841 	/*
842 	 * Write back each (modified) inode.
843 	 */
844 	simple_lock(&mntvnode_slock);
845 loop:
846 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
847 		/*
848 		 * If the vnode that we are about to sync is no longer
849 		 * associated with this mount point, start over.
850 		 */
851 		if (vp->v_mount != mp)
852 			goto loop;
853 		simple_lock(&vp->v_interlock);
854 		nvp = vp->v_mntvnodes.le_next;
855 		ip = VTOI(vp);
856 		if (((ip->i_flag &
857 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
858 		    vp->v_dirtyblkhd.lh_first == NULL) {
859 			simple_unlock(&vp->v_interlock);
860 			continue;
861 		}
862 		if (vp->v_type != VCHR) {
863 			simple_unlock(&mntvnode_slock);
864 			error =
865 			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
866 			if (error) {
867 				simple_lock(&mntvnode_slock);
868 				if (error == ENOENT)
869 					goto loop;
870 				continue;
871 			}
872 			if (error = VOP_FSYNC(vp, cred, waitfor, p))
873 				allerror = error;
874 			VOP_UNLOCK(vp, 0, p);
875 			vrele(vp);
876 			simple_lock(&mntvnode_slock);
877 		} else {
878 			simple_unlock(&mntvnode_slock);
879 			simple_unlock(&vp->v_interlock);
880 			gettime(&tv);
881 			/* UFS_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
882 			UFS_UPDATE(vp, &tv, &tv, 0);
883 			simple_lock(&mntvnode_slock);
884 		}
885 	}
886 	simple_unlock(&mntvnode_slock);
887 	/*
888 	 * Force stale file system control information to be flushed.
889 	 */
890 	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
891 	if (error)
892 		allerror = error;
893 #ifdef QUOTA
894 	qsync(mp);
895 #endif
896 	/*
897 	 * Write back modified superblock.
898 	 */
899 	if (fs->fs_fmod != 0) {
900 		fs->fs_fmod = 0;
901 		fs->fs_time = time.tv_sec;
902 		if (error = ffs_sbupdate(ump, waitfor))
903 			allerror = error;
904 	}
905 	return (allerror);
906 }
907 
908 /*
909  * Look up a FFS dinode number to find its incore vnode, otherwise read it
910  * in from disk.  If it is in core, wait for the lock bit to clear, then
911  * return the inode locked.  Detection and handling of mount points must be
912  * done by the calling routine.
913  */
914 static int ffs_inode_hash_lock;
915 
916 int
917 ffs_vget(mp, ino, vpp)
918 	struct mount *mp;
919 	ino_t ino;
920 	struct vnode **vpp;
921 {
922 	struct fs *fs;
923 	struct inode *ip;
924 	struct ufsmount *ump;
925 	struct buf *bp;
926 	struct vnode *vp;
927 	dev_t dev;
928 	int error;
929 
930 	ump = VFSTOUFS(mp);
931 	dev = ump->um_dev;
932 restart:
933 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
934 		return (0);
935 
936 	/*
937 	 * Lock out the creation of new entries in the FFS hash table in
938 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
939 	 * may occur!
940 	 */
941 	if (ffs_inode_hash_lock) {
942 		while (ffs_inode_hash_lock) {
943 			ffs_inode_hash_lock = -1;
944 			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
945 		}
946 		goto restart;
947 	}
948 	ffs_inode_hash_lock = 1;
949 
950 	/*
951 	 * If this MALLOC() is performed after the getnewvnode()
952 	 * it might block, leaving a vnode with a NULL v_data to be
953 	 * found by ffs_sync() if a sync happens to fire right then,
954 	 * which will cause a panic because ffs_sync() blindly
955 	 * dereferences vp->v_data (as well it should).
956 	 */
957 	MALLOC(ip, struct inode *, sizeof(struct inode),
958 	    ump->um_malloctype, M_WAITOK);
959 
960 	/* Allocate a new vnode/inode. */
961 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
962 	if (error) {
963 		if (ffs_inode_hash_lock < 0)
964 			wakeup(&ffs_inode_hash_lock);
965 		ffs_inode_hash_lock = 0;
966 		*vpp = NULL;
967 		FREE(ip, ump->um_malloctype);
968 		return (error);
969 	}
970 	bzero((caddr_t)ip, sizeof(struct inode));
971 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
972 	vp->v_data = ip;
973 	ip->i_vnode = vp;
974 	ip->i_fs = fs = ump->um_fs;
975 	ip->i_dev = dev;
976 	ip->i_number = ino;
977 #ifdef QUOTA
978 	{
979 		int i;
980 		for (i = 0; i < MAXQUOTAS; i++)
981 			ip->i_dquot[i] = NODQUOT;
982 	}
983 #endif
984 	/*
985 	 * Put it onto its hash chain and lock it so that other requests for
986 	 * this inode will block if they arrive while we are sleeping waiting
987 	 * for old data structures to be purged or for the contents of the
988 	 * disk portion of this inode to be read.
989 	 */
990 	ufs_ihashins(ip);
991 
992 	if (ffs_inode_hash_lock < 0)
993 		wakeup(&ffs_inode_hash_lock);
994 	ffs_inode_hash_lock = 0;
995 
996 	/* Read in the disk contents for the inode, copy into the inode. */
997 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
998 	    (int)fs->fs_bsize, NOCRED, &bp);
999 	if (error) {
1000 		/*
1001 		 * The inode does not contain anything useful, so it would
1002 		 * be misleading to leave it on its hash chain. With mode
1003 		 * still zero, it will be unlinked and returned to the free
1004 		 * list by vput().
1005 		 */
1006 		brelse(bp);
1007 		vput(vp);
1008 		*vpp = NULL;
1009 		return (error);
1010 	}
1011 	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1012 	bqrelse(bp);
1013 
1014 	/*
1015 	 * Initialize the vnode from the inode, check for aliases.
1016 	 * Note that the underlying vnode may have changed.
1017 	 */
1018 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1019 	if (error) {
1020 		vput(vp);
1021 		*vpp = NULL;
1022 		return (error);
1023 	}
1024 	/*
1025 	 * Finish inode initialization now that aliasing has been resolved.
1026 	 */
1027 	ip->i_devvp = ump->um_devvp;
1028 	VREF(ip->i_devvp);
1029 	/*
1030 	 * Set up a generation number for this inode if it does not
1031 	 * already have one. This should only happen on old filesystems.
1032 	 */
1033 	if (ip->i_gen == 0) {
1034 		ip->i_gen = random() / 2 + 1;
1035 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1036 			ip->i_flag |= IN_MODIFIED;
1037 	}
1038 	/*
1039 	 * Ensure that uid and gid are correct. This is a temporary
1040 	 * fix until fsck has been changed to do the update.
1041 	 */
1042 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1043 		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1044 		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1045 	}						/* XXX */
1046 
1047 	*vpp = vp;
1048 	return (0);
1049 }
1050 
1051 /*
1052  * File handle to vnode
1053  *
1054  * Have to be really careful about stale file handles:
1055  * - check that the inode number is valid
1056  * - call ffs_vget() to get the locked inode
1057  * - check for an unallocated inode (i_mode == 0)
1058  * - check that the given client host has export rights and return
1059  *   those rights via. exflagsp and credanonp
1060  */
1061 int
1062 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1063 	register struct mount *mp;
1064 	struct fid *fhp;
1065 	struct sockaddr *nam;
1066 	struct vnode **vpp;
1067 	int *exflagsp;
1068 	struct ucred **credanonp;
1069 {
1070 	register struct ufid *ufhp;
1071 	struct fs *fs;
1072 
1073 	ufhp = (struct ufid *)fhp;
1074 	fs = VFSTOUFS(mp)->um_fs;
1075 	if (ufhp->ufid_ino < ROOTINO ||
1076 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1077 		return (ESTALE);
1078 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1079 }
1080 
1081 /*
1082  * Vnode pointer to File handle
1083  */
1084 /* ARGSUSED */
1085 int
1086 ffs_vptofh(vp, fhp)
1087 	struct vnode *vp;
1088 	struct fid *fhp;
1089 {
1090 	register struct inode *ip;
1091 	register struct ufid *ufhp;
1092 
1093 	ip = VTOI(vp);
1094 	ufhp = (struct ufid *)fhp;
1095 	ufhp->ufid_len = sizeof(struct ufid);
1096 	ufhp->ufid_ino = ip->i_number;
1097 	ufhp->ufid_gen = ip->i_gen;
1098 	return (0);
1099 }
1100 
1101 /*
1102  * Initialize the filesystem; just use ufs_init.
1103  */
1104 static int
1105 ffs_init(vfsp)
1106 	struct vfsconf *vfsp;
1107 {
1108 
1109 	return (ufs_init(vfsp));
1110 }
1111 
1112 /*
1113  * Write a superblock and associated information back to disk.
1114  */
1115 static int
1116 ffs_sbupdate(mp, waitfor)
1117 	struct ufsmount *mp;
1118 	int waitfor;
1119 {
1120 	register struct fs *dfs, *fs = mp->um_fs;
1121 	register struct buf *bp;
1122 	int blks;
1123 	caddr_t space;
1124 	int i, size, error, allerror = 0;
1125 
1126 	/*
1127 	 * First write back the summary information.
1128 	 */
1129 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1130 	space = (caddr_t)fs->fs_csp[0];
1131 	for (i = 0; i < blks; i += fs->fs_frag) {
1132 		size = fs->fs_bsize;
1133 		if (i + fs->fs_frag > blks)
1134 			size = (blks - i) * fs->fs_fsize;
1135 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1136 		    size, 0, 0);
1137 		bcopy(space, bp->b_data, (u_int)size);
1138 		space += size;
1139 		if (waitfor != MNT_WAIT)
1140 			bawrite(bp);
1141 		else if (error = bwrite(bp))
1142 			allerror = error;
1143 	}
1144 	/*
1145 	 * Now write back the superblock itself. If any errors occurred
1146 	 * up to this point, then fail so that the superblock avoids
1147 	 * being written out as clean.
1148 	 */
1149 	if (allerror)
1150 		return (allerror);
1151 	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1152 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1153 	/* Restore compatibility to old file systems.		   XXX */
1154 	dfs = (struct fs *)bp->b_data;				/* XXX */
1155 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1156 		dfs->fs_nrpos = -1;				/* XXX */
1157 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1158 		int32_t *lp, tmp;				/* XXX */
1159 								/* XXX */
1160 		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1161 		tmp = lp[4];					/* XXX */
1162 		for (i = 4; i > 0; i--)				/* XXX */
1163 			lp[i] = lp[i-1];			/* XXX */
1164 		lp[0] = tmp;					/* XXX */
1165 	}							/* XXX */
1166 	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1167 	if (waitfor != MNT_WAIT)
1168 		bawrite(bp);
1169 	else if (error = bwrite(bp))
1170 		allerror = error;
1171 	return (allerror);
1172 }
1173