xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 952d112864d8008aa87278a30a539d888a8493cd)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $Id: ffs_vfsops.c,v 1.51 1997/03/23 20:08:19 guido Exp $
35  */
36 
37 #include "opt_quota.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/namei.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/socket.h>
46 #include <sys/mount.h>
47 #include <sys/buf.h>
48 #include <sys/mbuf.h>
49 #include <sys/fcntl.h>
50 #include <sys/disklabel.h>
51 #include <sys/errno.h>
52 #include <sys/malloc.h>
53 
54 #include <miscfs/specfs/specdev.h>
55 
56 #include <ufs/ufs/quota.h>
57 #include <ufs/ufs/ufsmount.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/ufs_extern.h>
60 
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63 
64 #include <vm/vm.h>
65 #include <vm/vm_param.h>
66 #include <vm/vm_prot.h>
67 #include <vm/vm_page.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_extern.h>
70 
71 static int	ffs_sbupdate __P((struct ufsmount *, int));
72 static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
73 static int	ffs_oldfscompat __P((struct fs *));
74 static int	ffs_mount __P((struct mount *, char *, caddr_t,
75 				struct nameidata *, struct proc *));
76 static int	ffs_init __P((struct vfsconf *));
77 
78 struct vfsops ufs_vfsops = {
79 	ffs_mount,
80 	ufs_start,
81 	ffs_unmount,
82 	ufs_root,
83 	ufs_quotactl,
84 	ffs_statfs,
85 	ffs_sync,
86 	ffs_vget,
87 	ffs_fhtovp,
88 	ffs_vptofh,
89 	ffs_init,
90 };
91 
92 VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
93 
94 /*
95  * ffs_mount
96  *
97  * Called when mounting local physical media
98  *
99  * PARAMETERS:
100  *		mountroot
101  *			mp	mount point structure
102  *			path	NULL (flag for root mount!!!)
103  *			data	<unused>
104  *			ndp	<unused>
105  *			p	process (user credentials check [statfs])
106  *
107  *		mount
108  *			mp	mount point structure
109  *			path	path to mount point
110  *			data	pointer to argument struct in user space
111  *			ndp	mount point namei() return (used for
112  *				credentials on reload), reused to look
113  *				up block device.
114  *			p	process (user credentials check)
115  *
116  * RETURNS:	0	Success
117  *		!0	error number (errno.h)
118  *
119  * LOCK STATE:
120  *
121  *		ENTRY
122  *			mount point is locked
123  *		EXIT
124  *			mount point is locked
125  *
126  * NOTES:
127  *		A NULL path can be used for a flag since the mount
128  *		system call will fail with EFAULT in copyinstr in
129  *		namei() if it is a genuine NULL from the user.
130  */
131 static int
132 ffs_mount( mp, path, data, ndp, p)
133         struct mount		*mp;	/* mount struct pointer*/
134         char			*path;	/* path to mount point*/
135         caddr_t			data;	/* arguments to FS specific mount*/
136         struct nameidata	*ndp;	/* mount point credentials*/
137         struct proc		*p;	/* process requesting mount*/
138 {
139 	u_int		size;
140 	int		err = 0;
141 	struct vnode	*devvp;
142 
143 	struct ufs_args args;
144 	struct ufsmount *ump = 0;
145 	register struct fs *fs;
146 	int flags;
147 
148 	/*
149 	 * Use NULL path to flag a root mount
150 	 */
151 	if( path == NULL) {
152 		/*
153 		 ***
154 		 * Mounting root file system
155 		 ***
156 		 */
157 
158 		/* Get vnode for root device*/
159 		if ((err = bdevvp( rootdev, &rootvp))) {
160 			printf("ffs_mountroot: can't setup bdevvp for root");
161 			return (err);
162 		}
163 
164 		/*
165 		 * Attempt mount
166 		 */
167 		if( ( err = ffs_mountfs(rootvp, mp, p)) != 0) {
168 			/* fs specific cleanup (if any)*/
169 			goto error_1;
170 		}
171 
172 		goto dostatfs;		/* success*/
173 
174 	}
175 
176 	/*
177 	 ***
178 	 * Mounting non-root file system or updating a file system
179 	 ***
180 	 */
181 
182 	/* copy in user arguments*/
183 	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
184 	if (err)
185 		goto error_1;		/* can't get arguments*/
186 
187 	/*
188 	 * If updating, check whether changing from read-only to
189 	 * read/write; if there is no device name, that's all we do.
190 	 */
191 	if (mp->mnt_flag & MNT_UPDATE) {
192 		ump = VFSTOUFS(mp);
193 		fs = ump->um_fs;
194 		err = 0;
195 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
196 			flags = WRITECLOSE;
197 			if (mp->mnt_flag & MNT_FORCE)
198 				flags |= FORCECLOSE;
199 			err = ffs_flushfiles(mp, flags, p);
200 		}
201 		if (!err && (mp->mnt_flag & MNT_RELOAD))
202 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
203 		if (err) {
204 			goto error_1;
205 		}
206 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
207 			if (!fs->fs_clean) {
208 				if (mp->mnt_flag & MNT_FORCE) {
209 					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
210 				} else {
211 					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
212 					    fs->fs_fsmnt);
213 					err = EPERM;
214 					goto error_1;
215 				}
216 			}
217 			fs->fs_ronly = 0;
218 		}
219 		if (fs->fs_ronly == 0) {
220 			fs->fs_clean = 0;
221 			ffs_sbupdate(ump, MNT_WAIT);
222 		}
223 		/* if not updating name...*/
224 		if (args.fspec == 0) {
225 			/*
226 			 * Process export requests.  Jumping to "success"
227 			 * will return the vfs_export() error code.
228 			 */
229 			err = vfs_export(mp, &ump->um_export, &args.export);
230 			goto success;
231 		}
232 	}
233 
234 	/*
235 	 * Not an update, or updating the name: look up the name
236 	 * and verify that it refers to a sensible block device.
237 	 */
238 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
239 	err = namei(ndp);
240 	if (err) {
241 		/* can't get devvp!*/
242 		goto error_1;
243 	}
244 
245 	devvp = ndp->ni_vp;
246 
247 	if (devvp->v_type != VBLK) {
248 		err = ENOTBLK;
249 		goto error_2;
250 	}
251 	if (major(devvp->v_rdev) >= nblkdev) {
252 		err = ENXIO;
253 		goto error_2;
254 	}
255 	if (mp->mnt_flag & MNT_UPDATE) {
256 		/*
257 		 ********************
258 		 * UPDATE
259 		 ********************
260 		 */
261 
262 		if (devvp != ump->um_devvp)
263 			err = EINVAL;	/* needs translation */
264 		else
265 			vrele(devvp);
266 		/*
267 		 * Update device name only on success
268 		 */
269 		if( !err) {
270 			/* Save "mounted from" info for mount point (NULL pad)*/
271 			copyinstr(	args.fspec,
272 					mp->mnt_stat.f_mntfromname,
273 					MNAMELEN - 1,
274 					&size);
275 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
276 		}
277 	} else {
278 		/*
279 		 ********************
280 		 * NEW MOUNT
281 		 ********************
282 		 */
283 
284 		/*
285 		 * Since this is a new mount, we want the names for
286 		 * the device and the mount point copied in.  If an
287 		 * error occurs,  the mountpoint is discarded by the
288 		 * upper level code.
289 		 */
290 		/* Save "last mounted on" info for mount point (NULL pad)*/
291 		copyinstr(	path,				/* mount point*/
292 				mp->mnt_stat.f_mntonname,	/* save area*/
293 				MNAMELEN - 1,			/* max size*/
294 				&size);				/* real size*/
295 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
296 
297 		/* Save "mounted from" info for mount point (NULL pad)*/
298 		copyinstr(	args.fspec,			/* device name*/
299 				mp->mnt_stat.f_mntfromname,	/* save area*/
300 				MNAMELEN - 1,			/* max size*/
301 				&size);				/* real size*/
302 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
303 
304 		err = ffs_mountfs(devvp, mp, p);
305 	}
306 	if (err) {
307 		goto error_2;
308 	}
309 
310 dostatfs:
311 	/*
312 	 * Initialize FS stat information in mount struct; uses both
313 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
314 	 *
315 	 * This code is common to root and non-root mounts
316 	 */
317 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
318 
319 	goto success;
320 
321 
322 error_2:	/* error with devvp held*/
323 
324 	/* release devvp before failing*/
325 	vrele(devvp);
326 
327 error_1:	/* no state to back out*/
328 
329 success:
330 	return( err);
331 }
332 
333 /*
334  * Reload all incore data for a filesystem (used after running fsck on
335  * the root filesystem and finding things to fix). The filesystem must
336  * be mounted read-only.
337  *
338  * Things to do to update the mount:
339  *	1) invalidate all cached meta-data.
340  *	2) re-read superblock from disk.
341  *	3) re-read summary information from disk.
342  *	4) invalidate all inactive vnodes.
343  *	5) invalidate all cached file data.
344  *	6) re-read inode data for all active vnodes.
345  */
346 static int
347 ffs_reload(mp, cred, p)
348 	register struct mount *mp;
349 	struct ucred *cred;
350 	struct proc *p;
351 {
352 	register struct vnode *vp, *nvp, *devvp;
353 	struct inode *ip;
354 	struct csum *space;
355 	struct buf *bp;
356 	struct fs *fs, *newfs;
357 	struct partinfo dpart;
358 	int i, blks, size, error;
359 	int32_t *lp;
360 
361 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
362 		return (EINVAL);
363 	/*
364 	 * Step 1: invalidate all cached meta-data.
365 	 */
366 	devvp = VFSTOUFS(mp)->um_devvp;
367 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
368 		panic("ffs_reload: dirty1");
369 	/*
370 	 * Step 2: re-read superblock from disk.
371 	 */
372 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
373 		size = DEV_BSIZE;
374 	else
375 		size = dpart.disklab->d_secsize;
376 	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
377 		return (error);
378 	newfs = (struct fs *)bp->b_data;
379 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
380 		newfs->fs_bsize < sizeof(struct fs)) {
381 			brelse(bp);
382 			return (EIO);		/* XXX needs translation */
383 	}
384 	fs = VFSTOUFS(mp)->um_fs;
385 	/*
386 	 * Copy pointer fields back into superblock before copying in	XXX
387 	 * new superblock. These should really be in the ufsmount.	XXX
388 	 * Note that important parameters (eg fs_ncg) are unchanged.
389 	 */
390 	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
391 	newfs->fs_maxcluster = fs->fs_maxcluster;
392 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
393 	if (fs->fs_sbsize < SBSIZE)
394 		bp->b_flags |= B_INVAL;
395 	brelse(bp);
396 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
397 	ffs_oldfscompat(fs);
398 
399 	/*
400 	 * Step 3: re-read summary information from disk.
401 	 */
402 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
403 	space = fs->fs_csp[0];
404 	for (i = 0; i < blks; i += fs->fs_frag) {
405 		size = fs->fs_bsize;
406 		if (i + fs->fs_frag > blks)
407 			size = (blks - i) * fs->fs_fsize;
408 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
409 		    NOCRED, &bp);
410 		if (error)
411 			return (error);
412 		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
413 		brelse(bp);
414 	}
415 	/*
416 	 * We no longer know anything about clusters per cylinder group.
417 	 */
418 	if (fs->fs_contigsumsize > 0) {
419 		lp = fs->fs_maxcluster;
420 		for (i = 0; i < fs->fs_ncg; i++)
421 			*lp++ = fs->fs_contigsumsize;
422 	}
423 
424 loop:
425 	simple_lock(&mntvnode_slock);
426 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
427 		if (vp->v_mount != mp) {
428 			simple_unlock(&mntvnode_slock);
429 			goto loop;
430 		}
431 		nvp = vp->v_mntvnodes.le_next;
432 		/*
433 		 * Step 4: invalidate all inactive vnodes.
434 		 */
435 		if (vrecycle(vp, &mntvnode_slock, p))
436 			goto loop;
437 		/*
438 		 * Step 5: invalidate all cached file data.
439 		 */
440 		simple_lock(&vp->v_interlock);
441 		simple_unlock(&mntvnode_slock);
442 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
443 			goto loop;
444 		}
445 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
446 			panic("ffs_reload: dirty2");
447 		/*
448 		 * Step 6: re-read inode data for all active vnodes.
449 		 */
450 		ip = VTOI(vp);
451 		error =
452 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
453 		    (int)fs->fs_bsize, NOCRED, &bp);
454 		if (error) {
455 			vput(vp);
456 			return (error);
457 		}
458 		ip->i_din = *((struct dinode *)bp->b_data +
459 		    ino_to_fsbo(fs, ip->i_number));
460 		brelse(bp);
461 		vput(vp);
462 		simple_lock(&mntvnode_slock);
463 	}
464 	simple_unlock(&mntvnode_slock);
465 	return (0);
466 }
467 
468 /*
469  * Common code for mount and mountroot
470  */
471 int
472 ffs_mountfs(devvp, mp, p)
473 	register struct vnode *devvp;
474 	struct mount *mp;
475 	struct proc *p;
476 {
477 	register struct ufsmount *ump;
478 	struct buf *bp;
479 	register struct fs *fs;
480 	dev_t dev;
481 	struct partinfo dpart;
482 	caddr_t base, space;
483 	int error, i, blks, size, ronly;
484 	int32_t *lp;
485 	struct ucred *cred;
486 	u_int64_t maxfilesize;					/* XXX */
487 	u_int strsize;
488 	int ncount;
489 
490 	dev = devvp->v_rdev;
491 	cred = p ? p->p_ucred : NOCRED;
492 	/*
493 	 * Disallow multiple mounts of the same device.
494 	 * Disallow mounting of a device that is currently in use
495 	 * (except for root, which might share swap device for miniroot).
496 	 * Flush out any old buffers remaining from a previous use.
497 	 */
498 	error = vfs_mountedon(devvp);
499 	if (error)
500 		return (error);
501 	ncount = vcount(devvp);
502 	if (devvp->v_object)
503 		ncount -= 1;
504 	if (ncount > 1 && devvp != rootvp)
505 		return (EBUSY);
506 	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
507 		return (error);
508 
509 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
510 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
511 	if (error)
512 		return (error);
513 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
514 		size = DEV_BSIZE;
515 	else
516 		size = dpart.disklab->d_secsize;
517 
518 	bp = NULL;
519 	ump = NULL;
520 	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
521 		goto out;
522 	fs = (struct fs *)bp->b_data;
523 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
524 	    fs->fs_bsize < sizeof(struct fs)) {
525 		error = EINVAL;		/* XXX needs translation */
526 		goto out;
527 	}
528 	fs->fs_fmod = 0;
529 	if (!fs->fs_clean) {
530 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
531 			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
532 		} else {
533 			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
534 			error = EPERM;
535 			goto out;
536 		}
537 	}
538 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
539 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
540 		error = EROFS;          /* needs translation */
541 		goto out;
542 	}
543 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
544 	bzero((caddr_t)ump, sizeof *ump);
545 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
546 	    M_WAITOK);
547 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
548 	if (fs->fs_sbsize < SBSIZE)
549 		bp->b_flags |= B_INVAL;
550 	brelse(bp);
551 	bp = NULL;
552 	fs = ump->um_fs;
553 	fs->fs_ronly = ronly;
554 	if (ronly == 0) {
555 		fs->fs_fmod = 1;
556 		fs->fs_clean = 0;
557 	}
558 	size = fs->fs_cssize;
559 	blks = howmany(size, fs->fs_fsize);
560 	if (fs->fs_contigsumsize > 0)
561 		size += fs->fs_ncg * sizeof(int32_t);
562 	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
563 	for (i = 0; i < blks; i += fs->fs_frag) {
564 		size = fs->fs_bsize;
565 		if (i + fs->fs_frag > blks)
566 			size = (blks - i) * fs->fs_fsize;
567 		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
568 		    cred, &bp)) {
569 			free(base, M_UFSMNT);
570 			goto out;
571 		}
572 		bcopy(bp->b_data, space, (u_int)size);
573 		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
574 		space += size;
575 		brelse(bp);
576 		bp = NULL;
577 	}
578 	if (fs->fs_contigsumsize > 0) {
579 		fs->fs_maxcluster = lp = (int32_t *)space;
580 		for (i = 0; i < fs->fs_ncg; i++)
581 			*lp++ = fs->fs_contigsumsize;
582 	}
583 	mp->mnt_data = (qaddr_t)ump;
584 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
585 	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
586 		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
587 	else
588 		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
589 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
590 	mp->mnt_flag |= MNT_LOCAL;
591 	ump->um_mountp = mp;
592 	ump->um_dev = dev;
593 	ump->um_devvp = devvp;
594 	ump->um_nindir = fs->fs_nindir;
595 	ump->um_bptrtodb = fs->fs_fsbtodb;
596 	ump->um_seqinc = fs->fs_frag;
597 	for (i = 0; i < MAXQUOTAS; i++)
598 		ump->um_quotas[i] = NULLVP;
599 	devvp->v_specflags |= SI_MOUNTEDON;
600 	ffs_oldfscompat(fs);
601 
602 	/*
603 	 * Set FS local "last mounted on" information (NULL pad)
604 	 */
605 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
606 			fs->fs_fsmnt,			/* copy area*/
607 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
608 			&strsize);			/* real size*/
609 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
610 
611 	if( mp->mnt_flag & MNT_ROOTFS) {
612 		/*
613 		 * Root mount; update timestamp in mount structure.
614 		 * this will be used by the common root mount code
615 		 * to update the system clock.
616 		 */
617 		mp->mnt_time = fs->fs_time;
618 	}
619 
620 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
621 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
622 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
623 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
624 	if (ronly == 0) {
625 		fs->fs_clean = 0;
626 		(void) ffs_sbupdate(ump, MNT_WAIT);
627 	}
628 	/*
629 	 * Only VMIO the backing device if the backing device is a real
630 	 * block device.  This excludes the original MFS implementation.
631 	 * Note that it is optional that the backing device be VMIOed.  This
632 	 * increases the opportunity for metadata caching.
633 	 */
634 	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
635 		vfs_object_create(devvp, p, p->p_ucred, 0);
636 	}
637 	return (0);
638 out:
639 	if (bp)
640 		brelse(bp);
641 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
642 	if (ump) {
643 		free(ump->um_fs, M_UFSMNT);
644 		free(ump, M_UFSMNT);
645 		mp->mnt_data = (qaddr_t)0;
646 	}
647 	return (error);
648 }
649 
650 /*
651  * Sanity checks for old file systems.
652  *
653  * XXX - goes away some day.
654  */
655 static int
656 ffs_oldfscompat(fs)
657 	struct fs *fs;
658 {
659 
660 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
661 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
662 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
663 		fs->fs_nrpos = 8;				/* XXX */
664 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
665 #if 0
666 		int i;						/* XXX */
667 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
668 								/* XXX */
669 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
670 		for (i = 0; i < NIADDR; i++) {			/* XXX */
671 			sizepb *= NINDIR(fs);			/* XXX */
672 			fs->fs_maxfilesize += sizepb;		/* XXX */
673 		}						/* XXX */
674 #endif
675 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
676 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
677 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
678 	}							/* XXX */
679 	return (0);
680 }
681 
682 /*
683  * unmount system call
684  */
685 int
686 ffs_unmount(mp, mntflags, p)
687 	struct mount *mp;
688 	int mntflags;
689 	struct proc *p;
690 {
691 	register struct ufsmount *ump;
692 	register struct fs *fs;
693 	int error, flags;
694 
695 	flags = 0;
696 	if (mntflags & MNT_FORCE) {
697 		flags |= FORCECLOSE;
698 	}
699 	error = ffs_flushfiles(mp, flags, p);
700 	if (error)
701 		return (error);
702 	ump = VFSTOUFS(mp);
703 	fs = ump->um_fs;
704 	if (fs->fs_ronly == 0) {
705 		fs->fs_clean = 1;
706 		error = ffs_sbupdate(ump, MNT_WAIT);
707 		if (error) {
708 			fs->fs_clean = 0;
709 			return (error);
710 		}
711 	}
712 	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
713 
714 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
715 	vnode_pager_uncache(ump->um_devvp, p);
716 	VOP_UNLOCK(ump->um_devvp, 0, p);
717 
718 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
719 		NOCRED, p);
720 
721 	vrele(ump->um_devvp);
722 
723 	free(fs->fs_csp[0], M_UFSMNT);
724 	free(fs, M_UFSMNT);
725 	free(ump, M_UFSMNT);
726 	mp->mnt_data = (qaddr_t)0;
727 	mp->mnt_flag &= ~MNT_LOCAL;
728 	return (error);
729 }
730 
731 /*
732  * Flush out all the files in a filesystem.
733  */
734 int
735 ffs_flushfiles(mp, flags, p)
736 	register struct mount *mp;
737 	int flags;
738 	struct proc *p;
739 {
740 	register struct ufsmount *ump;
741 	int error;
742 
743 	ump = VFSTOUFS(mp);
744 #ifdef QUOTA
745 	if (mp->mnt_flag & MNT_QUOTA) {
746 		int i;
747 		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
748 		if (error)
749 			return (error);
750 		for (i = 0; i < MAXQUOTAS; i++) {
751 			if (ump->um_quotas[i] == NULLVP)
752 				continue;
753 			quotaoff(p, mp, i);
754 		}
755 		/*
756 		 * Here we fall through to vflush again to ensure
757 		 * that we have gotten rid of all the system vnodes.
758 		 */
759 	}
760 #endif
761 	error = vflush(mp, NULLVP, flags);
762 	return (error);
763 }
764 
765 /*
766  * Get file system statistics.
767  */
768 int
769 ffs_statfs(mp, sbp, p)
770 	struct mount *mp;
771 	register struct statfs *sbp;
772 	struct proc *p;
773 {
774 	register struct ufsmount *ump;
775 	register struct fs *fs;
776 
777 	ump = VFSTOUFS(mp);
778 	fs = ump->um_fs;
779 	if (fs->fs_magic != FS_MAGIC)
780 		panic("ffs_statfs");
781 	sbp->f_bsize = fs->fs_fsize;
782 	sbp->f_iosize = fs->fs_bsize;
783 	sbp->f_blocks = fs->fs_dsize;
784 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
785 		fs->fs_cstotal.cs_nffree;
786 	sbp->f_bavail = freespace(fs, fs->fs_minfree);
787 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
788 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
789 	if (sbp != &mp->mnt_stat) {
790 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
791 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
792 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
793 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
794 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
795 	}
796 	return (0);
797 }
798 
799 /*
800  * Go through the disk queues to initiate sandbagged IO;
801  * go through the inodes to write those that have been modified;
802  * initiate the writing of the super block if it has been modified.
803  *
804  * Note: we are always called with the filesystem marked `MPBUSY'.
805  */
806 int
807 ffs_sync(mp, waitfor, cred, p)
808 	struct mount *mp;
809 	int waitfor;
810 	struct ucred *cred;
811 	struct proc *p;
812 {
813 	struct vnode *nvp, *vp;
814 	struct inode *ip;
815 	struct ufsmount *ump = VFSTOUFS(mp);
816 	struct fs *fs;
817 	struct timeval tv;
818 	int error, allerror = 0;
819 
820 	fs = ump->um_fs;
821 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
822 		printf("fs = %s\n", fs->fs_fsmnt);
823 		panic("ffs_sync: rofs mod");
824 	}
825 	/*
826 	 * Write back each (modified) inode.
827 	 */
828 	simple_lock(&mntvnode_slock);
829 loop:
830 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
831 		/*
832 		 * If the vnode that we are about to sync is no longer
833 		 * associated with this mount point, start over.
834 		 */
835 		if (vp->v_mount != mp)
836 			goto loop;
837 		simple_lock(&vp->v_interlock);
838 		nvp = vp->v_mntvnodes.le_next;
839 		ip = VTOI(vp);
840 		if (((ip->i_flag &
841 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
842 		    vp->v_dirtyblkhd.lh_first == NULL) {
843 			simple_unlock(&vp->v_interlock);
844 			continue;
845 		}
846 		if (vp->v_type != VCHR) {
847 			simple_unlock(&mntvnode_slock);
848 			error =
849 			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
850 			if (error) {
851 				simple_lock(&mntvnode_slock);
852 				if (error == ENOENT)
853 					goto loop;
854 				continue;
855 			}
856 			if (error = VOP_FSYNC(vp, cred, waitfor, p))
857 				allerror = error;
858 			VOP_UNLOCK(vp, 0, p);
859 			vrele(vp);
860 			simple_lock(&mntvnode_slock);
861 		} else {
862 			simple_unlock(&mntvnode_slock);
863 			simple_unlock(&vp->v_interlock);
864 			gettime(&tv);
865 			/* VOP_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
866 			VOP_UPDATE(vp, &tv, &tv, 0);
867 			simple_lock(&mntvnode_slock);
868 		}
869 	}
870 	simple_unlock(&mntvnode_slock);
871 	/*
872 	 * Force stale file system control information to be flushed.
873 	 */
874 	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
875 	if (error)
876 		allerror = error;
877 #ifdef QUOTA
878 	qsync(mp);
879 #endif
880 	/*
881 	 * Write back modified superblock.
882 	 */
883 	if (fs->fs_fmod != 0) {
884 		fs->fs_fmod = 0;
885 		fs->fs_time = time.tv_sec;
886 		if (error = ffs_sbupdate(ump, waitfor))
887 			allerror = error;
888 	}
889 	return (allerror);
890 }
891 
892 /*
893  * Look up a FFS dinode number to find its incore vnode, otherwise read it
894  * in from disk.  If it is in core, wait for the lock bit to clear, then
895  * return the inode locked.  Detection and handling of mount points must be
896  * done by the calling routine.
897  */
898 static int ffs_inode_hash_lock;
899 
900 int
901 ffs_vget(mp, ino, vpp)
902 	struct mount *mp;
903 	ino_t ino;
904 	struct vnode **vpp;
905 {
906 	struct fs *fs;
907 	struct inode *ip;
908 	struct ufsmount *ump;
909 	struct buf *bp;
910 	struct vnode *vp;
911 	dev_t dev;
912 	int type, error;
913 
914 	ump = VFSTOUFS(mp);
915 	dev = ump->um_dev;
916 restart:
917 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
918 		return (0);
919 
920 	/*
921 	 * Lock out the creation of new entries in the FFS hash table in
922 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
923 	 * may occur!
924 	 */
925 	if (ffs_inode_hash_lock) {
926 		while (ffs_inode_hash_lock) {
927 			ffs_inode_hash_lock = -1;
928 			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
929 		}
930 		goto restart;
931 	}
932 	ffs_inode_hash_lock = 1;
933 
934 	/*
935 	 * If this MALLOC() is performed after the getnewvnode()
936 	 * it might block, leaving a vnode with a NULL v_data to be
937 	 * found by ffs_sync() if a sync happens to fire right then,
938 	 * which will cause a panic because ffs_sync() blindly
939 	 * dereferences vp->v_data (as well it should).
940 	 */
941 	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
942 	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
943 
944 	/* Allocate a new vnode/inode. */
945 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
946 	if (error) {
947 		if (ffs_inode_hash_lock < 0)
948 			wakeup(&ffs_inode_hash_lock);
949 		ffs_inode_hash_lock = 0;
950 		*vpp = NULL;
951 		FREE(ip, type);
952 		return (error);
953 	}
954 	bzero((caddr_t)ip, sizeof(struct inode));
955 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
956 	vp->v_data = ip;
957 	ip->i_vnode = vp;
958 	ip->i_fs = fs = ump->um_fs;
959 	ip->i_dev = dev;
960 	ip->i_number = ino;
961 #ifdef QUOTA
962 	{
963 		int i;
964 		for (i = 0; i < MAXQUOTAS; i++)
965 			ip->i_dquot[i] = NODQUOT;
966 	}
967 #endif
968 	/*
969 	 * Put it onto its hash chain and lock it so that other requests for
970 	 * this inode will block if they arrive while we are sleeping waiting
971 	 * for old data structures to be purged or for the contents of the
972 	 * disk portion of this inode to be read.
973 	 */
974 	ufs_ihashins(ip);
975 
976 	if (ffs_inode_hash_lock < 0)
977 		wakeup(&ffs_inode_hash_lock);
978 	ffs_inode_hash_lock = 0;
979 
980 	/* Read in the disk contents for the inode, copy into the inode. */
981 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
982 	    (int)fs->fs_bsize, NOCRED, &bp);
983 	if (error) {
984 		/*
985 		 * The inode does not contain anything useful, so it would
986 		 * be misleading to leave it on its hash chain. With mode
987 		 * still zero, it will be unlinked and returned to the free
988 		 * list by vput().
989 		 */
990 		brelse(bp);
991 		vput(vp);
992 		*vpp = NULL;
993 		return (error);
994 	}
995 	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
996 	bqrelse(bp);
997 
998 	/*
999 	 * Initialize the vnode from the inode, check for aliases.
1000 	 * Note that the underlying vnode may have changed.
1001 	 */
1002 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1003 	if (error) {
1004 		vput(vp);
1005 		*vpp = NULL;
1006 		return (error);
1007 	}
1008 	/*
1009 	 * Finish inode initialization now that aliasing has been resolved.
1010 	 */
1011 	ip->i_devvp = ump->um_devvp;
1012 	VREF(ip->i_devvp);
1013 	/*
1014 	 * Set up a generation number for this inode if it does not
1015 	 * already have one. This should only happen on old filesystems.
1016 	 */
1017 	if (ip->i_gen == 0) {
1018 		ip->i_gen = random() / 2 + 1;
1019 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1020 			ip->i_flag |= IN_MODIFIED;
1021 	}
1022 	/*
1023 	 * Ensure that uid and gid are correct. This is a temporary
1024 	 * fix until fsck has been changed to do the update.
1025 	 */
1026 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1027 		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1028 		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1029 	}						/* XXX */
1030 
1031 	*vpp = vp;
1032 	return (0);
1033 }
1034 
1035 /*
1036  * File handle to vnode
1037  *
1038  * Have to be really careful about stale file handles:
1039  * - check that the inode number is valid
1040  * - call ffs_vget() to get the locked inode
1041  * - check for an unallocated inode (i_mode == 0)
1042  * - check that the given client host has export rights and return
1043  *   those rights via. exflagsp and credanonp
1044  */
1045 int
1046 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1047 	register struct mount *mp;
1048 	struct fid *fhp;
1049 	struct mbuf *nam;
1050 	struct vnode **vpp;
1051 	int *exflagsp;
1052 	struct ucred **credanonp;
1053 {
1054 	register struct ufid *ufhp;
1055 	struct fs *fs;
1056 
1057 	ufhp = (struct ufid *)fhp;
1058 	fs = VFSTOUFS(mp)->um_fs;
1059 	if (ufhp->ufid_ino < ROOTINO ||
1060 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1061 		return (ESTALE);
1062 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1063 }
1064 
1065 /*
1066  * Vnode pointer to File handle
1067  */
1068 /* ARGSUSED */
1069 int
1070 ffs_vptofh(vp, fhp)
1071 	struct vnode *vp;
1072 	struct fid *fhp;
1073 {
1074 	register struct inode *ip;
1075 	register struct ufid *ufhp;
1076 
1077 	ip = VTOI(vp);
1078 	ufhp = (struct ufid *)fhp;
1079 	ufhp->ufid_len = sizeof(struct ufid);
1080 	ufhp->ufid_ino = ip->i_number;
1081 	ufhp->ufid_gen = ip->i_gen;
1082 	return (0);
1083 }
1084 
1085 /*
1086  * Initialize the filesystem; just use ufs_init.
1087  */
1088 static int
1089 ffs_init(vfsp)
1090 	struct vfsconf *vfsp;
1091 {
1092 
1093 	return (ufs_init(vfsp));
1094 }
1095 
1096 /*
1097  * Write a superblock and associated information back to disk.
1098  */
1099 static int
1100 ffs_sbupdate(mp, waitfor)
1101 	struct ufsmount *mp;
1102 	int waitfor;
1103 {
1104 	register struct fs *dfs, *fs = mp->um_fs;
1105 	register struct buf *bp;
1106 	int blks;
1107 	caddr_t space;
1108 	int i, size, error, allerror = 0;
1109 
1110 	/*
1111 	 * First write back the summary information.
1112 	 */
1113 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1114 	space = (caddr_t)fs->fs_csp[0];
1115 	for (i = 0; i < blks; i += fs->fs_frag) {
1116 		size = fs->fs_bsize;
1117 		if (i + fs->fs_frag > blks)
1118 			size = (blks - i) * fs->fs_fsize;
1119 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1120 		    size, 0, 0);
1121 		bcopy(space, bp->b_data, (u_int)size);
1122 		space += size;
1123 		if (waitfor != MNT_WAIT)
1124 			bawrite(bp);
1125 		else if (error = bwrite(bp))
1126 			allerror = error;
1127 	}
1128 	/*
1129 	 * Now write back the superblock itself. If any errors occurred
1130 	 * up to this point, then fail so that the superblock avoids
1131 	 * being written out as clean.
1132 	 */
1133 	if (allerror)
1134 		return (allerror);
1135 	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1136 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1137 	/* Restore compatibility to old file systems.		   XXX */
1138 	dfs = (struct fs *)bp->b_data;				/* XXX */
1139 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1140 		dfs->fs_nrpos = -1;				/* XXX */
1141 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1142 		int32_t *lp, tmp;				/* XXX */
1143 								/* XXX */
1144 		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1145 		tmp = lp[4];					/* XXX */
1146 		for (i = 4; i > 0; i--)				/* XXX */
1147 			lp[i] = lp[i-1];			/* XXX */
1148 		lp[0] = tmp;					/* XXX */
1149 	}							/* XXX */
1150 	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1151 	if (waitfor != MNT_WAIT)
1152 		bawrite(bp);
1153 	else if (error = bwrite(bp))
1154 		allerror = error;
1155 	return (allerror);
1156 }
1157