xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision df7f5d4de4592a8948a25ce01e5bddfbb7ce39dc)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $Id: ffs_vfsops.c,v 1.47 1997/03/15 18:58:10 sos Exp $
35  */
36 
37 #include "opt_quota.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/namei.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/socket.h>
46 #include <sys/mount.h>
47 #include <sys/buf.h>
48 #include <sys/mbuf.h>
49 #include <sys/file.h>
50 #include <sys/disklabel.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/malloc.h>
54 
55 #include <miscfs/specfs/specdev.h>
56 
57 #include <ufs/ufs/quota.h>
58 #include <ufs/ufs/ufsmount.h>
59 #include <ufs/ufs/inode.h>
60 #include <ufs/ufs/ufs_extern.h>
61 
62 #include <ufs/ffs/fs.h>
63 #include <ufs/ffs/ffs_extern.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/vm_prot.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_extern.h>
71 
72 static int	ffs_sbupdate __P((struct ufsmount *, int));
73 static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
74 static int	ffs_oldfscompat __P((struct fs *));
75 static int	ffs_mount __P((struct mount *, char *, caddr_t,
76 				struct nameidata *, struct proc *));
77 static int	ffs_init __P((struct vfsconf *));
78 
79 struct vfsops ufs_vfsops = {
80 	ffs_mount,
81 	ufs_start,
82 	ffs_unmount,
83 	ufs_root,
84 	ufs_quotactl,
85 	ffs_statfs,
86 	ffs_sync,
87 	ffs_vget,
88 	ffs_fhtovp,
89 	ffs_vptofh,
90 	ffs_init,
91 };
92 
93 VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
94 
95 extern u_long nextgennumber;
96 
97 /*
98  * ffs_mount
99  *
100  * Called when mounting local physical media
101  *
102  * PARAMETERS:
103  *		mountroot
104  *			mp	mount point structure
105  *			path	NULL (flag for root mount!!!)
106  *			data	<unused>
107  *			ndp	<unused>
108  *			p	process (user credentials check [statfs])
109  *
110  *		mount
111  *			mp	mount point structure
112  *			path	path to mount point
113  *			data	pointer to argument struct in user space
114  *			ndp	mount point namei() return (used for
115  *				credentials on reload), reused to look
116  *				up block device.
117  *			p	process (user credentials check)
118  *
119  * RETURNS:	0	Success
120  *		!0	error number (errno.h)
121  *
122  * LOCK STATE:
123  *
124  *		ENTRY
125  *			mount point is locked
126  *		EXIT
127  *			mount point is locked
128  *
129  * NOTES:
130  *		A NULL path can be used for a flag since the mount
131  *		system call will fail with EFAULT in copyinstr in
132  *		namei() if it is a genuine NULL from the user.
133  */
134 static int
135 ffs_mount( mp, path, data, ndp, p)
136         struct mount		*mp;	/* mount struct pointer*/
137         char			*path;	/* path to mount point*/
138         caddr_t			data;	/* arguments to FS specific mount*/
139         struct nameidata	*ndp;	/* mount point credentials*/
140         struct proc		*p;	/* process requesting mount*/
141 {
142 	u_int		size;
143 	int		err = 0;
144 	struct vnode	*devvp;
145 
146 	struct ufs_args args;
147 	struct ufsmount *ump = 0;
148 	register struct fs *fs;
149 	int flags;
150 
151 	/*
152 	 * Use NULL path to flag a root mount
153 	 */
154 	if( path == NULL) {
155 		/*
156 		 ***
157 		 * Mounting root file system
158 		 ***
159 		 */
160 
161 		/* Get vnode for root device*/
162 		if ((err = bdevvp( rootdev, &rootvp))) {
163 			printf("ffs_mountroot: can't setup bdevvp for root");
164 			return (err);
165 		}
166 
167 		/*
168 		 * Attempt mount
169 		 */
170 		if( ( err = ffs_mountfs(rootvp, mp, p)) != 0) {
171 			/* fs specific cleanup (if any)*/
172 			goto error_1;
173 		}
174 
175 		goto dostatfs;		/* success*/
176 
177 	}
178 
179 	/*
180 	 ***
181 	 * Mounting non-root file system or updating a file system
182 	 ***
183 	 */
184 
185 	/* copy in user arguments*/
186 	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
187 	if (err)
188 		goto error_1;		/* can't get arguments*/
189 
190 	/*
191 	 * If updating, check whether changing from read-only to
192 	 * read/write; if there is no device name, that's all we do.
193 	 */
194 	if (mp->mnt_flag & MNT_UPDATE) {
195 		ump = VFSTOUFS(mp);
196 		fs = ump->um_fs;
197 		err = 0;
198 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
199 			flags = WRITECLOSE;
200 			if (mp->mnt_flag & MNT_FORCE)
201 				flags |= FORCECLOSE;
202 			err = ffs_flushfiles(mp, flags, p);
203 		}
204 		if (!err && (mp->mnt_flag & MNT_RELOAD))
205 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
206 		if (err) {
207 			goto error_1;
208 		}
209 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
210 			if (!fs->fs_clean) {
211 				if (mp->mnt_flag & MNT_FORCE) {
212 					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
213 				} else {
214 					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
215 					    fs->fs_fsmnt);
216 					err = EPERM;
217 					goto error_1;
218 				}
219 			}
220 			fs->fs_ronly = 0;
221 		}
222 		if (fs->fs_ronly == 0) {
223 			fs->fs_clean = 0;
224 			ffs_sbupdate(ump, MNT_WAIT);
225 		}
226 		/* if not updating name...*/
227 		if (args.fspec == 0) {
228 			/*
229 			 * Process export requests.  Jumping to "success"
230 			 * will return the vfs_export() error code.
231 			 */
232 			err = vfs_export(mp, &ump->um_export, &args.export);
233 			goto success;
234 		}
235 	}
236 
237 	/*
238 	 * Not an update, or updating the name: look up the name
239 	 * and verify that it refers to a sensible block device.
240 	 */
241 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
242 	err = namei(ndp);
243 	if (err) {
244 		/* can't get devvp!*/
245 		goto error_1;
246 	}
247 
248 	devvp = ndp->ni_vp;
249 
250 	if (devvp->v_type != VBLK) {
251 		err = ENOTBLK;
252 		goto error_2;
253 	}
254 	if (major(devvp->v_rdev) >= nblkdev) {
255 		err = ENXIO;
256 		goto error_2;
257 	}
258 	if (mp->mnt_flag & MNT_UPDATE) {
259 		/*
260 		 ********************
261 		 * UPDATE
262 		 ********************
263 		 */
264 
265 		if (devvp != ump->um_devvp)
266 			err = EINVAL;	/* needs translation */
267 		else
268 			vrele(devvp);
269 		/*
270 		 * Update device name only on success
271 		 */
272 		if( !err) {
273 			/* Save "mounted from" info for mount point (NULL pad)*/
274 			copyinstr(	args.fspec,
275 					mp->mnt_stat.f_mntfromname,
276 					MNAMELEN - 1,
277 					&size);
278 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
279 		}
280 	} else {
281 		/*
282 		 ********************
283 		 * NEW MOUNT
284 		 ********************
285 		 */
286 
287 		/*
288 		 * Since this is a new mount, we want the names for
289 		 * the device and the mount point copied in.  If an
290 		 * error occurs,  the mountpoint is discarded by the
291 		 * upper level code.
292 		 */
293 		/* Save "last mounted on" info for mount point (NULL pad)*/
294 		copyinstr(	path,				/* mount point*/
295 				mp->mnt_stat.f_mntonname,	/* save area*/
296 				MNAMELEN - 1,			/* max size*/
297 				&size);				/* real size*/
298 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
299 
300 		/* Save "mounted from" info for mount point (NULL pad)*/
301 		copyinstr(	args.fspec,			/* device name*/
302 				mp->mnt_stat.f_mntfromname,	/* save area*/
303 				MNAMELEN - 1,			/* max size*/
304 				&size);				/* real size*/
305 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
306 
307 		err = ffs_mountfs(devvp, mp, p);
308 	}
309 	if (err) {
310 		goto error_2;
311 	}
312 
313 dostatfs:
314 	/*
315 	 * Initialize FS stat information in mount struct; uses both
316 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
317 	 *
318 	 * This code is common to root and non-root mounts
319 	 */
320 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
321 
322 	goto success;
323 
324 
325 error_2:	/* error with devvp held*/
326 
327 	/* release devvp before failing*/
328 	vrele(devvp);
329 
330 error_1:	/* no state to back out*/
331 
332 success:
333 	return( err);
334 }
335 
336 /*
337  * Reload all incore data for a filesystem (used after running fsck on
338  * the root filesystem and finding things to fix). The filesystem must
339  * be mounted read-only.
340  *
341  * Things to do to update the mount:
342  *	1) invalidate all cached meta-data.
343  *	2) re-read superblock from disk.
344  *	3) re-read summary information from disk.
345  *	4) invalidate all inactive vnodes.
346  *	5) invalidate all cached file data.
347  *	6) re-read inode data for all active vnodes.
348  */
349 static int
350 ffs_reload(mp, cred, p)
351 	register struct mount *mp;
352 	struct ucred *cred;
353 	struct proc *p;
354 {
355 	register struct vnode *vp, *nvp, *devvp;
356 	struct inode *ip;
357 	struct csum *space;
358 	struct buf *bp;
359 	struct fs *fs, *newfs;
360 	struct partinfo dpart;
361 	int i, blks, size, error;
362 	int32_t *lp;
363 
364 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
365 		return (EINVAL);
366 	/*
367 	 * Step 1: invalidate all cached meta-data.
368 	 */
369 	devvp = VFSTOUFS(mp)->um_devvp;
370 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
371 		panic("ffs_reload: dirty1");
372 	/*
373 	 * Step 2: re-read superblock from disk.
374 	 */
375 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
376 		size = DEV_BSIZE;
377 	else
378 		size = dpart.disklab->d_secsize;
379 	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
380 		return (error);
381 	newfs = (struct fs *)bp->b_data;
382 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
383 		newfs->fs_bsize < sizeof(struct fs)) {
384 			brelse(bp);
385 			return (EIO);		/* XXX needs translation */
386 	}
387 	fs = VFSTOUFS(mp)->um_fs;
388 	/*
389 	 * Copy pointer fields back into superblock before copying in	XXX
390 	 * new superblock. These should really be in the ufsmount.	XXX
391 	 * Note that important parameters (eg fs_ncg) are unchanged.
392 	 */
393 	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
394 	newfs->fs_maxcluster = fs->fs_maxcluster;
395 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
396 	if (fs->fs_sbsize < SBSIZE)
397 		bp->b_flags |= B_INVAL;
398 	brelse(bp);
399 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
400 	ffs_oldfscompat(fs);
401 
402 	/*
403 	 * Step 3: re-read summary information from disk.
404 	 */
405 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
406 	space = fs->fs_csp[0];
407 	for (i = 0; i < blks; i += fs->fs_frag) {
408 		size = fs->fs_bsize;
409 		if (i + fs->fs_frag > blks)
410 			size = (blks - i) * fs->fs_fsize;
411 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
412 		    NOCRED, &bp);
413 		if (error)
414 			return (error);
415 		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
416 		brelse(bp);
417 	}
418 	/*
419 	 * We no longer know anything about clusters per cylinder group.
420 	 */
421 	if (fs->fs_contigsumsize > 0) {
422 		lp = fs->fs_maxcluster;
423 		for (i = 0; i < fs->fs_ncg; i++)
424 			*lp++ = fs->fs_contigsumsize;
425 	}
426 
427 loop:
428 	simple_lock(&mntvnode_slock);
429 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
430 		if (vp->v_mount != mp) {
431 			simple_unlock(&mntvnode_slock);
432 			goto loop;
433 		}
434 		nvp = vp->v_mntvnodes.le_next;
435 		/*
436 		 * Step 4: invalidate all inactive vnodes.
437 		 */
438 		if (vrecycle(vp, &mntvnode_slock, p))
439 			goto loop;
440 		/*
441 		 * Step 5: invalidate all cached file data.
442 		 */
443 		simple_lock(&vp->v_interlock);
444 		simple_unlock(&mntvnode_slock);
445 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
446 			goto loop;
447 		}
448 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
449 			panic("ffs_reload: dirty2");
450 		/*
451 		 * Step 6: re-read inode data for all active vnodes.
452 		 */
453 		ip = VTOI(vp);
454 		error =
455 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
456 		    (int)fs->fs_bsize, NOCRED, &bp);
457 		if (error) {
458 			vput(vp);
459 			return (error);
460 		}
461 		ip->i_din = *((struct dinode *)bp->b_data +
462 		    ino_to_fsbo(fs, ip->i_number));
463 		brelse(bp);
464 		vput(vp);
465 		simple_lock(&mntvnode_slock);
466 	}
467 	simple_unlock(&mntvnode_slock);
468 	return (0);
469 }
470 
471 /*
472  * Common code for mount and mountroot
473  */
474 int
475 ffs_mountfs(devvp, mp, p)
476 	register struct vnode *devvp;
477 	struct mount *mp;
478 	struct proc *p;
479 {
480 	register struct ufsmount *ump;
481 	struct buf *bp;
482 	register struct fs *fs;
483 	dev_t dev;
484 	struct partinfo dpart;
485 	caddr_t base, space;
486 	int error, i, blks, size, ronly;
487 	int32_t *lp;
488 	struct ucred *cred;
489 	u_int64_t maxfilesize;					/* XXX */
490 	u_int strsize;
491 	int ncount;
492 
493 	dev = devvp->v_rdev;
494 	cred = p ? p->p_ucred : NOCRED;
495 	/*
496 	 * Disallow multiple mounts of the same device.
497 	 * Disallow mounting of a device that is currently in use
498 	 * (except for root, which might share swap device for miniroot).
499 	 * Flush out any old buffers remaining from a previous use.
500 	 */
501 	error = vfs_mountedon(devvp);
502 	if (error)
503 		return (error);
504 	ncount = vcount(devvp);
505 	if (devvp->v_object)
506 		ncount -= 1;
507 	if (ncount > 1 && devvp != rootvp)
508 		return (EBUSY);
509 	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
510 		return (error);
511 
512 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
513 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
514 	if (error)
515 		return (error);
516 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
517 		size = DEV_BSIZE;
518 	else
519 		size = dpart.disklab->d_secsize;
520 
521 	bp = NULL;
522 	ump = NULL;
523 	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
524 		goto out;
525 	fs = (struct fs *)bp->b_data;
526 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
527 	    fs->fs_bsize < sizeof(struct fs)) {
528 		error = EINVAL;		/* XXX needs translation */
529 		goto out;
530 	}
531 	fs->fs_fmod = 0;
532 	if (!fs->fs_clean) {
533 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
534 			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
535 		} else {
536 			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
537 			error = EPERM;
538 			goto out;
539 		}
540 	}
541 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
542 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
543 		error = EROFS;          /* needs translation */
544 		goto out;
545 	}
546 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
547 	bzero((caddr_t)ump, sizeof *ump);
548 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
549 	    M_WAITOK);
550 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
551 	if (fs->fs_sbsize < SBSIZE)
552 		bp->b_flags |= B_INVAL;
553 	brelse(bp);
554 	bp = NULL;
555 	fs = ump->um_fs;
556 	fs->fs_ronly = ronly;
557 	if (ronly == 0) {
558 		fs->fs_fmod = 1;
559 		fs->fs_clean = 0;
560 	}
561 	size = fs->fs_cssize;
562 	blks = howmany(size, fs->fs_fsize);
563 	if (fs->fs_contigsumsize > 0)
564 		size += fs->fs_ncg * sizeof(int32_t);
565 	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
566 	for (i = 0; i < blks; i += fs->fs_frag) {
567 		size = fs->fs_bsize;
568 		if (i + fs->fs_frag > blks)
569 			size = (blks - i) * fs->fs_fsize;
570 		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
571 		    cred, &bp)) {
572 			free(base, M_UFSMNT);
573 			goto out;
574 		}
575 		bcopy(bp->b_data, space, (u_int)size);
576 		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
577 		space += size;
578 		brelse(bp);
579 		bp = NULL;
580 	}
581 	if (fs->fs_contigsumsize > 0) {
582 		fs->fs_maxcluster = lp = (int32_t *)space;
583 		for (i = 0; i < fs->fs_ncg; i++)
584 			*lp++ = fs->fs_contigsumsize;
585 	}
586 	mp->mnt_data = (qaddr_t)ump;
587 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
588 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
589 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
590 	mp->mnt_flag |= MNT_LOCAL;
591 	ump->um_mountp = mp;
592 	ump->um_dev = dev;
593 	ump->um_devvp = devvp;
594 	ump->um_nindir = fs->fs_nindir;
595 	ump->um_bptrtodb = fs->fs_fsbtodb;
596 	ump->um_seqinc = fs->fs_frag;
597 	for (i = 0; i < MAXQUOTAS; i++)
598 		ump->um_quotas[i] = NULLVP;
599 	devvp->v_specflags |= SI_MOUNTEDON;
600 	ffs_oldfscompat(fs);
601 
602 	/*
603 	 * Set FS local "last mounted on" information (NULL pad)
604 	 */
605 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
606 			fs->fs_fsmnt,			/* copy area*/
607 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
608 			&strsize);			/* real size*/
609 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
610 
611 	if( mp->mnt_flag & MNT_ROOTFS) {
612 		/*
613 		 * Root mount; update timestamp in mount structure.
614 		 * this will be used by the common root mount code
615 		 * to update the system clock.
616 		 */
617 		mp->mnt_time = fs->fs_time;
618 	}
619 
620 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
621 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
622 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
623 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
624 	if (ronly == 0) {
625 		fs->fs_clean = 0;
626 		(void) ffs_sbupdate(ump, MNT_WAIT);
627 	}
628 	/*
629 	 * Only VMIO the backing device if the backing device is a real
630 	 * block device.  This excludes the original MFS implementation.
631 	 * Note that it is optional that the backing device be VMIOed.  This
632 	 * increases the opportunity for metadata caching.
633 	 */
634 	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
635 		vfs_object_create(devvp, p, p->p_ucred, 0);
636 	}
637 	return (0);
638 out:
639 	if (bp)
640 		brelse(bp);
641 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
642 	if (ump) {
643 		free(ump->um_fs, M_UFSMNT);
644 		free(ump, M_UFSMNT);
645 		mp->mnt_data = (qaddr_t)0;
646 	}
647 	return (error);
648 }
649 
650 /*
651  * Sanity checks for old file systems.
652  *
653  * XXX - goes away some day.
654  */
655 static int
656 ffs_oldfscompat(fs)
657 	struct fs *fs;
658 {
659 
660 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
661 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
662 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
663 		fs->fs_nrpos = 8;				/* XXX */
664 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
665 #if 0
666 		int i;						/* XXX */
667 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
668 								/* XXX */
669 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
670 		for (i = 0; i < NIADDR; i++) {			/* XXX */
671 			sizepb *= NINDIR(fs);			/* XXX */
672 			fs->fs_maxfilesize += sizepb;		/* XXX */
673 		}						/* XXX */
674 #endif
675 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
676 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
677 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
678 	}							/* XXX */
679 	return (0);
680 }
681 
682 /*
683  * unmount system call
684  */
685 int
686 ffs_unmount(mp, mntflags, p)
687 	struct mount *mp;
688 	int mntflags;
689 	struct proc *p;
690 {
691 	register struct ufsmount *ump;
692 	register struct fs *fs;
693 	int error, flags;
694 
695 	flags = 0;
696 	if (mntflags & MNT_FORCE) {
697 		flags |= FORCECLOSE;
698 	}
699 	error = ffs_flushfiles(mp, flags, p);
700 	if (error)
701 		return (error);
702 	ump = VFSTOUFS(mp);
703 	fs = ump->um_fs;
704 	if (fs->fs_ronly == 0) {
705 		fs->fs_clean = 1;
706 		error = ffs_sbupdate(ump, MNT_WAIT);
707 		if (error) {
708 			fs->fs_clean = 0;
709 			return (error);
710 		}
711 	}
712 	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
713 
714 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
715 	vnode_pager_uncache(ump->um_devvp, p);
716 	VOP_UNLOCK(ump->um_devvp, 0, p);
717 
718 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
719 		NOCRED, p);
720 
721 	vrele(ump->um_devvp);
722 
723 	free(fs->fs_csp[0], M_UFSMNT);
724 	free(fs, M_UFSMNT);
725 	free(ump, M_UFSMNT);
726 	mp->mnt_data = (qaddr_t)0;
727 	mp->mnt_flag &= ~MNT_LOCAL;
728 	return (error);
729 }
730 
731 /*
732  * Flush out all the files in a filesystem.
733  */
734 int
735 ffs_flushfiles(mp, flags, p)
736 	register struct mount *mp;
737 	int flags;
738 	struct proc *p;
739 {
740 	register struct ufsmount *ump;
741 	int error;
742 
743 	ump = VFSTOUFS(mp);
744 #ifdef QUOTA
745 	if (mp->mnt_flag & MNT_QUOTA) {
746 		int i;
747 		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
748 		if (error)
749 			return (error);
750 		for (i = 0; i < MAXQUOTAS; i++) {
751 			if (ump->um_quotas[i] == NULLVP)
752 				continue;
753 			quotaoff(p, mp, i);
754 		}
755 		/*
756 		 * Here we fall through to vflush again to ensure
757 		 * that we have gotten rid of all the system vnodes.
758 		 */
759 	}
760 #endif
761 	error = vflush(mp, NULLVP, flags);
762 	return (error);
763 }
764 
765 /*
766  * Get file system statistics.
767  */
768 int
769 ffs_statfs(mp, sbp, p)
770 	struct mount *mp;
771 	register struct statfs *sbp;
772 	struct proc *p;
773 {
774 	register struct ufsmount *ump;
775 	register struct fs *fs;
776 
777 	ump = VFSTOUFS(mp);
778 	fs = ump->um_fs;
779 	if (fs->fs_magic != FS_MAGIC)
780 		panic("ffs_statfs");
781 	sbp->f_bsize = fs->fs_fsize;
782 	sbp->f_iosize = fs->fs_bsize;
783 	sbp->f_blocks = fs->fs_dsize;
784 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
785 		fs->fs_cstotal.cs_nffree;
786 	sbp->f_bavail = freespace(fs, fs->fs_minfree);
787 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
788 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
789 	if (sbp != &mp->mnt_stat) {
790 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
791 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
792 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
793 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
794 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
795 	}
796 	return (0);
797 }
798 
799 /*
800  * Go through the disk queues to initiate sandbagged IO;
801  * go through the inodes to write those that have been modified;
802  * initiate the writing of the super block if it has been modified.
803  *
804  * Note: we are always called with the filesystem marked `MPBUSY'.
805  */
806 int
807 ffs_sync(mp, waitfor, cred, p)
808 	struct mount *mp;
809 	int waitfor;
810 	struct ucred *cred;
811 	struct proc *p;
812 {
813 	struct vnode *nvp, *vp;
814 	struct inode *ip;
815 	struct ufsmount *ump = VFSTOUFS(mp);
816 	struct fs *fs;
817 	struct timeval tv;
818 	int error, allerror = 0;
819 
820 	fs = ump->um_fs;
821 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
822 		printf("fs = %s\n", fs->fs_fsmnt);
823 		panic("ffs_sync: rofs mod");
824 	}
825 	/*
826 	 * Write back each (modified) inode.
827 	 */
828 	simple_lock(&mntvnode_slock);
829 loop:
830 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
831 		/*
832 		 * If the vnode that we are about to sync is no longer
833 		 * associated with this mount point, start over.
834 		 */
835 		if (vp->v_mount != mp)
836 			goto loop;
837 		simple_lock(&vp->v_interlock);
838 		nvp = vp->v_mntvnodes.le_next;
839 		ip = VTOI(vp);
840 		if (((ip->i_flag &
841 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
842 		    vp->v_dirtyblkhd.lh_first == NULL) {
843 			simple_unlock(&vp->v_interlock);
844 			continue;
845 		}
846 		if (vp->v_type != VCHR) {
847 			simple_unlock(&mntvnode_slock);
848 			error =
849 			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
850 			if (error) {
851 				simple_lock(&mntvnode_slock);
852 				if (error == ENOENT)
853 					goto loop;
854 				continue;
855 			}
856 			if (error = VOP_FSYNC(vp, cred, waitfor, p))
857 				allerror = error;
858 			VOP_UNLOCK(vp, 0, p);
859 			vrele(vp);
860 			simple_lock(&mntvnode_slock);
861 		} else {
862 			simple_unlock(&mntvnode_slock);
863 			simple_unlock(&vp->v_interlock);
864 			tv = time;
865 			/* VOP_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
866 			VOP_UPDATE(vp, &tv, &tv, 0);
867 			simple_lock(&mntvnode_slock);
868 		}
869 	}
870 	simple_unlock(&mntvnode_slock);
871 	/*
872 	 * Force stale file system control information to be flushed.
873 	 */
874 	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
875 	if (error)
876 		allerror = error;
877 #ifdef QUOTA
878 	qsync(mp);
879 #endif
880 	/*
881 	 * Write back modified superblock.
882 	 */
883 	if (fs->fs_fmod != 0) {
884 		fs->fs_fmod = 0;
885 		fs->fs_time = time.tv_sec;
886 		if (error = ffs_sbupdate(ump, waitfor))
887 			allerror = error;
888 	}
889 	return (allerror);
890 }
891 
892 /*
893  * Look up a FFS dinode number to find its incore vnode, otherwise read it
894  * in from disk.  If it is in core, wait for the lock bit to clear, then
895  * return the inode locked.  Detection and handling of mount points must be
896  * done by the calling routine.
897  */
898 static int ffs_inode_hash_lock;
899 
900 int
901 ffs_vget(mp, ino, vpp)
902 	struct mount *mp;
903 	ino_t ino;
904 	struct vnode **vpp;
905 {
906 	struct fs *fs;
907 	struct inode *ip;
908 	struct ufsmount *ump;
909 	struct buf *bp;
910 	struct vnode *vp;
911 	dev_t dev;
912 	int type, error;
913 
914 	ump = VFSTOUFS(mp);
915 	dev = ump->um_dev;
916 restart:
917 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
918 		return (0);
919 
920 	/*
921 	 * Lock out the creation of new entries in the FFS hash table in
922 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
923 	 * may occur!
924 	 */
925 	if (ffs_inode_hash_lock) {
926 		while (ffs_inode_hash_lock) {
927 			ffs_inode_hash_lock = -1;
928 			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
929 		}
930 		goto restart;
931 	}
932 	ffs_inode_hash_lock = 1;
933 
934 	/*
935 	 * If this MALLOC() is performed after the getnewvnode()
936 	 * it might block, leaving a vnode with a NULL v_data to be
937 	 * found by ffs_sync() if a sync happens to fire right then,
938 	 * which will cause a panic because ffs_sync() blindly
939 	 * dereferences vp->v_data (as well it should).
940 	 */
941 	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
942 	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
943 
944 	/* Allocate a new vnode/inode. */
945 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
946 	if (error) {
947 		if (ffs_inode_hash_lock < 0)
948 			wakeup(&ffs_inode_hash_lock);
949 		ffs_inode_hash_lock = 0;
950 		*vpp = NULL;
951 		FREE(ip, type);
952 		return (error);
953 	}
954 	bzero((caddr_t)ip, sizeof(struct inode));
955 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
956 	vp->v_data = ip;
957 	ip->i_vnode = vp;
958 	ip->i_fs = fs = ump->um_fs;
959 	ip->i_dev = dev;
960 	ip->i_number = ino;
961 #ifdef QUOTA
962 	{
963 		int i;
964 		for (i = 0; i < MAXQUOTAS; i++)
965 			ip->i_dquot[i] = NODQUOT;
966 	}
967 #endif
968 	/*
969 	 * Put it onto its hash chain and lock it so that other requests for
970 	 * this inode will block if they arrive while we are sleeping waiting
971 	 * for old data structures to be purged or for the contents of the
972 	 * disk portion of this inode to be read.
973 	 */
974 	ufs_ihashins(ip);
975 
976 	if (ffs_inode_hash_lock < 0)
977 		wakeup(&ffs_inode_hash_lock);
978 	ffs_inode_hash_lock = 0;
979 
980 	/* Read in the disk contents for the inode, copy into the inode. */
981 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
982 	    (int)fs->fs_bsize, NOCRED, &bp);
983 	if (error) {
984 		/*
985 		 * The inode does not contain anything useful, so it would
986 		 * be misleading to leave it on its hash chain. With mode
987 		 * still zero, it will be unlinked and returned to the free
988 		 * list by vput().
989 		 */
990 		brelse(bp);
991 		vput(vp);
992 		*vpp = NULL;
993 		return (error);
994 	}
995 	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
996 	bqrelse(bp);
997 
998 	/*
999 	 * Initialize the vnode from the inode, check for aliases.
1000 	 * Note that the underlying vnode may have changed.
1001 	 */
1002 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1003 	if (error) {
1004 		vput(vp);
1005 		*vpp = NULL;
1006 		return (error);
1007 	}
1008 	/*
1009 	 * Finish inode initialization now that aliasing has been resolved.
1010 	 */
1011 	ip->i_devvp = ump->um_devvp;
1012 	VREF(ip->i_devvp);
1013 	/*
1014 	 * Set up a generation number for this inode if it does not
1015 	 * already have one. This should only happen on old filesystems.
1016 	 */
1017 	if (ip->i_gen == 0) {
1018 		if (++nextgennumber < (u_long)time.tv_sec)
1019 			nextgennumber = time.tv_sec;
1020 		ip->i_gen = nextgennumber;
1021 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1022 			ip->i_flag |= IN_MODIFIED;
1023 	}
1024 	/*
1025 	 * Ensure that uid and gid are correct. This is a temporary
1026 	 * fix until fsck has been changed to do the update.
1027 	 */
1028 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1029 		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1030 		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1031 	}						/* XXX */
1032 
1033 	*vpp = vp;
1034 	return (0);
1035 }
1036 
1037 /*
1038  * File handle to vnode
1039  *
1040  * Have to be really careful about stale file handles:
1041  * - check that the inode number is valid
1042  * - call ffs_vget() to get the locked inode
1043  * - check for an unallocated inode (i_mode == 0)
1044  * - check that the given client host has export rights and return
1045  *   those rights via. exflagsp and credanonp
1046  */
1047 int
1048 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1049 	register struct mount *mp;
1050 	struct fid *fhp;
1051 	struct mbuf *nam;
1052 	struct vnode **vpp;
1053 	int *exflagsp;
1054 	struct ucred **credanonp;
1055 {
1056 	register struct ufid *ufhp;
1057 	struct fs *fs;
1058 
1059 	ufhp = (struct ufid *)fhp;
1060 	fs = VFSTOUFS(mp)->um_fs;
1061 	if (ufhp->ufid_ino < ROOTINO ||
1062 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1063 		return (ESTALE);
1064 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1065 }
1066 
1067 /*
1068  * Vnode pointer to File handle
1069  */
1070 /* ARGSUSED */
1071 int
1072 ffs_vptofh(vp, fhp)
1073 	struct vnode *vp;
1074 	struct fid *fhp;
1075 {
1076 	register struct inode *ip;
1077 	register struct ufid *ufhp;
1078 
1079 	ip = VTOI(vp);
1080 	ufhp = (struct ufid *)fhp;
1081 	ufhp->ufid_len = sizeof(struct ufid);
1082 	ufhp->ufid_ino = ip->i_number;
1083 	ufhp->ufid_gen = ip->i_gen;
1084 	return (0);
1085 }
1086 
1087 /*
1088  * Initialize the filesystem; just use ufs_init.
1089  */
1090 static int
1091 ffs_init(vfsp)
1092 	struct vfsconf *vfsp;
1093 {
1094 
1095 	return (ufs_init(vfsp));
1096 }
1097 
1098 /*
1099  * Write a superblock and associated information back to disk.
1100  */
1101 static int
1102 ffs_sbupdate(mp, waitfor)
1103 	struct ufsmount *mp;
1104 	int waitfor;
1105 {
1106 	register struct fs *dfs, *fs = mp->um_fs;
1107 	register struct buf *bp;
1108 	int blks;
1109 	caddr_t space;
1110 	int i, size, error, allerror = 0;
1111 
1112 	/*
1113 	 * First write back the summary information.
1114 	 */
1115 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1116 	space = (caddr_t)fs->fs_csp[0];
1117 	for (i = 0; i < blks; i += fs->fs_frag) {
1118 		size = fs->fs_bsize;
1119 		if (i + fs->fs_frag > blks)
1120 			size = (blks - i) * fs->fs_fsize;
1121 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1122 		    size, 0, 0);
1123 		bcopy(space, bp->b_data, (u_int)size);
1124 		space += size;
1125 		if (waitfor != MNT_WAIT)
1126 			bawrite(bp);
1127 		else if (error = bwrite(bp))
1128 			allerror = error;
1129 	}
1130 	/*
1131 	 * Now write back the superblock itself. If any errors occurred
1132 	 * up to this point, then fail so that the superblock avoids
1133 	 * being written out as clean.
1134 	 */
1135 	if (allerror)
1136 		return (allerror);
1137 	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1138 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1139 	/* Restore compatibility to old file systems.		   XXX */
1140 	dfs = (struct fs *)bp->b_data;				/* XXX */
1141 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1142 		dfs->fs_nrpos = -1;				/* XXX */
1143 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1144 		int32_t *lp, tmp;				/* XXX */
1145 								/* XXX */
1146 		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1147 		tmp = lp[4];					/* XXX */
1148 		for (i = 4; i > 0; i--)				/* XXX */
1149 			lp[i] = lp[i-1];			/* XXX */
1150 		lp[0] = tmp;					/* XXX */
1151 	}							/* XXX */
1152 	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1153 	if (waitfor != MNT_WAIT)
1154 		bawrite(bp);
1155 	else if (error = bwrite(bp))
1156 		allerror = error;
1157 	return (allerror);
1158 }
1159