xref: /freebsd/sys/ufs/ffs/ffs_vfsops.c (revision 3e0f6b97b257a96f7275e4442204263e44b16686)
1 /*
2  * Copyright (c) 1989, 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
34  * $FreeBSD$
35  */
36 
37 #include "opt_quota.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/namei.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/socket.h>
46 #include <sys/mount.h>
47 #include <sys/buf.h>
48 #include <sys/mbuf.h>
49 #include <sys/file.h>
50 #include <sys/disklabel.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/malloc.h>
54 
55 #include <miscfs/specfs/specdev.h>
56 
57 #include <ufs/ufs/quota.h>
58 #include <ufs/ufs/ufsmount.h>
59 #include <ufs/ufs/inode.h>
60 #include <ufs/ufs/ufs_extern.h>
61 
62 #include <ufs/ffs/fs.h>
63 #include <ufs/ffs/ffs_extern.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/vm_prot.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_extern.h>
71 
72 static int	ffs_sbupdate __P((struct ufsmount *, int));
73 static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
74 static int	ffs_oldfscompat __P((struct fs *));
75 static int	ffs_mount __P((struct mount *, char *, caddr_t,
76 				struct nameidata *, struct proc *));
77 static int	ffs_init __P((struct vfsconf *));
78 
79 struct vfsops ufs_vfsops = {
80 	ffs_mount,
81 	ufs_start,
82 	ffs_unmount,
83 	ufs_root,
84 	ufs_quotactl,
85 	ffs_statfs,
86 	ffs_sync,
87 	ffs_vget,
88 	ffs_fhtovp,
89 	ffs_vptofh,
90 	ffs_init,
91 };
92 
93 VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
94 
95 extern u_long nextgennumber;
96 
97 /*
98  * ffs_mount
99  *
100  * Called when mounting local physical media
101  *
102  * PARAMETERS:
103  *		mountroot
104  *			mp	mount point structure
105  *			path	NULL (flag for root mount!!!)
106  *			data	<unused>
107  *			ndp	<unused>
108  *			p	process (user credentials check [statfs])
109  *
110  *		mount
111  *			mp	mount point structure
112  *			path	path to mount point
113  *			data	pointer to argument struct in user space
114  *			ndp	mount point namei() return (used for
115  *				credentials on reload), reused to look
116  *				up block device.
117  *			p	process (user credentials check)
118  *
119  * RETURNS:	0	Success
120  *		!0	error number (errno.h)
121  *
122  * LOCK STATE:
123  *
124  *		ENTRY
125  *			mount point is locked
126  *		EXIT
127  *			mount point is locked
128  *
129  * NOTES:
130  *		A NULL path can be used for a flag since the mount
131  *		system call will fail with EFAULT in copyinstr in
132  *		namei() if it is a genuine NULL from the user.
133  */
134 static int
135 ffs_mount( mp, path, data, ndp, p)
136         struct mount		*mp;	/* mount struct pointer*/
137         char			*path;	/* path to mount point*/
138         caddr_t			data;	/* arguments to FS specific mount*/
139         struct nameidata	*ndp;	/* mount point credentials*/
140         struct proc		*p;	/* process requesting mount*/
141 {
142 	u_int		size;
143 	int		err = 0;
144 	struct vnode	*devvp;
145 
146 	struct ufs_args args;
147 	struct ufsmount *ump = 0;
148 	register struct fs *fs;
149 	int flags;
150 
151 	/*
152 	 * Use NULL path to flag a root mount
153 	 */
154 	if( path == NULL) {
155 		/*
156 		 ***
157 		 * Mounting root file system
158 		 ***
159 		 */
160 
161 		/* Get vnode for root device*/
162 		if ((err = bdevvp( rootdev, &rootvp))) {
163 			printf("ffs_mountroot: can't setup bdevvp for root");
164 			return (err);
165 		}
166 
167 		/*
168 		 * Attempt mount
169 		 */
170 		if( ( err = ffs_mountfs(rootvp, mp, p)) != 0) {
171 			/* fs specific cleanup (if any)*/
172 			goto error_1;
173 		}
174 
175 		goto dostatfs;		/* success*/
176 
177 	}
178 
179 	/*
180 	 ***
181 	 * Mounting non-root file system or updating a file system
182 	 ***
183 	 */
184 
185 	/* copy in user arguments*/
186 	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
187 	if (err)
188 		goto error_1;		/* can't get arguments*/
189 
190 	/*
191 	 * If updating, check whether changing from read-only to
192 	 * read/write; if there is no device name, that's all we do.
193 	 */
194 	if (mp->mnt_flag & MNT_UPDATE) {
195 		ump = VFSTOUFS(mp);
196 		fs = ump->um_fs;
197 		err = 0;
198 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
199 			flags = WRITECLOSE;
200 			if (mp->mnt_flag & MNT_FORCE)
201 				flags |= FORCECLOSE;
202 			err = ffs_flushfiles(mp, flags, p);
203 		}
204 		if (!err && (mp->mnt_flag & MNT_RELOAD))
205 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
206 		if (err) {
207 			goto error_1;
208 		}
209 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
210 			if (!fs->fs_clean) {
211 				if (mp->mnt_flag & MNT_FORCE) {
212 					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
213 				} else {
214 					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
215 					    fs->fs_fsmnt);
216 					err = EPERM;
217 					goto error_1;
218 				}
219 			}
220 			fs->fs_ronly = 0;
221 		}
222 		if (fs->fs_ronly == 0) {
223 			fs->fs_clean = 0;
224 			ffs_sbupdate(ump, MNT_WAIT);
225 		}
226 		/* if not updating name...*/
227 		if (args.fspec == 0) {
228 			/*
229 			 * Process export requests.  Jumping to "success"
230 			 * will return the vfs_export() error code.
231 			 */
232 			err = vfs_export(mp, &ump->um_export, &args.export);
233 			goto success;
234 		}
235 	}
236 
237 	/*
238 	 * Not an update, or updating the name: look up the name
239 	 * and verify that it refers to a sensible block device.
240 	 */
241 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
242 	err = namei(ndp);
243 	if (err) {
244 		/* can't get devvp!*/
245 		goto error_1;
246 	}
247 
248 	devvp = ndp->ni_vp;
249 
250 	if (devvp->v_type != VBLK) {
251 		err = ENOTBLK;
252 		goto error_2;
253 	}
254 	if (major(devvp->v_rdev) >= nblkdev) {
255 		err = ENXIO;
256 		goto error_2;
257 	}
258 	if (mp->mnt_flag & MNT_UPDATE) {
259 		/*
260 		 ********************
261 		 * UPDATE
262 		 ********************
263 		 */
264 
265 		if (devvp != ump->um_devvp)
266 			err = EINVAL;	/* needs translation */
267 		else
268 			vrele(devvp);
269 		/*
270 		 * Update device name only on success
271 		 */
272 		if( !err) {
273 			/* Save "mounted from" info for mount point (NULL pad)*/
274 			copyinstr(	args.fspec,
275 					mp->mnt_stat.f_mntfromname,
276 					MNAMELEN - 1,
277 					&size);
278 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
279 		}
280 	} else {
281 		/*
282 		 ********************
283 		 * NEW MOUNT
284 		 ********************
285 		 */
286 
287 		/*
288 		 * Since this is a new mount, we want the names for
289 		 * the device and the mount point copied in.  If an
290 		 * error occurs,  the mountpoint is discarded by the
291 		 * upper level code.
292 		 */
293 		/* Save "last mounted on" info for mount point (NULL pad)*/
294 		copyinstr(	path,				/* mount point*/
295 				mp->mnt_stat.f_mntonname,	/* save area*/
296 				MNAMELEN - 1,			/* max size*/
297 				&size);				/* real size*/
298 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
299 
300 		/* Save "mounted from" info for mount point (NULL pad)*/
301 		copyinstr(	args.fspec,			/* device name*/
302 				mp->mnt_stat.f_mntfromname,	/* save area*/
303 				MNAMELEN - 1,			/* max size*/
304 				&size);				/* real size*/
305 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
306 
307 		err = ffs_mountfs(devvp, mp, p);
308 	}
309 	if (err) {
310 		goto error_2;
311 	}
312 
313 dostatfs:
314 	/*
315 	 * Initialize FS stat information in mount struct; uses both
316 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
317 	 *
318 	 * This code is common to root and non-root mounts
319 	 */
320 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
321 
322 	goto success;
323 
324 
325 error_2:	/* error with devvp held*/
326 
327 	/* release devvp before failing*/
328 	vrele(devvp);
329 
330 error_1:	/* no state to back out*/
331 
332 success:
333 	return( err);
334 }
335 
336 /*
337  * Reload all incore data for a filesystem (used after running fsck on
338  * the root filesystem and finding things to fix). The filesystem must
339  * be mounted read-only.
340  *
341  * Things to do to update the mount:
342  *	1) invalidate all cached meta-data.
343  *	2) re-read superblock from disk.
344  *	3) re-read summary information from disk.
345  *	4) invalidate all inactive vnodes.
346  *	5) invalidate all cached file data.
347  *	6) re-read inode data for all active vnodes.
348  */
349 static int
350 ffs_reload(mp, cred, p)
351 	register struct mount *mp;
352 	struct ucred *cred;
353 	struct proc *p;
354 {
355 	register struct vnode *vp, *nvp, *devvp;
356 	struct inode *ip;
357 	struct csum *space;
358 	struct buf *bp;
359 	struct fs *fs, *newfs;
360 	struct partinfo dpart;
361 	int i, blks, size, error;
362 	int32_t *lp;
363 
364 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
365 		return (EINVAL);
366 	/*
367 	 * Step 1: invalidate all cached meta-data.
368 	 */
369 	devvp = VFSTOUFS(mp)->um_devvp;
370 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
371 		panic("ffs_reload: dirty1");
372 	/*
373 	 * Step 2: re-read superblock from disk.
374 	 */
375 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
376 		size = DEV_BSIZE;
377 	else
378 		size = dpart.disklab->d_secsize;
379 	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
380 		return (error);
381 	newfs = (struct fs *)bp->b_data;
382 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
383 		newfs->fs_bsize < sizeof(struct fs)) {
384 			brelse(bp);
385 			return (EIO);		/* XXX needs translation */
386 	}
387 	fs = VFSTOUFS(mp)->um_fs;
388 	/*
389 	 * Copy pointer fields back into superblock before copying in	XXX
390 	 * new superblock. These should really be in the ufsmount.	XXX
391 	 * Note that important parameters (eg fs_ncg) are unchanged.
392 	 */
393 	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
394 	newfs->fs_maxcluster = fs->fs_maxcluster;
395 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
396 	if (fs->fs_sbsize < SBSIZE)
397 		bp->b_flags |= B_INVAL;
398 	brelse(bp);
399 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
400 	ffs_oldfscompat(fs);
401 
402 	/*
403 	 * Step 3: re-read summary information from disk.
404 	 */
405 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
406 	space = fs->fs_csp[0];
407 	for (i = 0; i < blks; i += fs->fs_frag) {
408 		size = fs->fs_bsize;
409 		if (i + fs->fs_frag > blks)
410 			size = (blks - i) * fs->fs_fsize;
411 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
412 		    NOCRED, &bp);
413 		if (error)
414 			return (error);
415 		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
416 		brelse(bp);
417 	}
418 	/*
419 	 * We no longer know anything about clusters per cylinder group.
420 	 */
421 	if (fs->fs_contigsumsize > 0) {
422 		lp = fs->fs_maxcluster;
423 		for (i = 0; i < fs->fs_ncg; i++)
424 			*lp++ = fs->fs_contigsumsize;
425 	}
426 
427 loop:
428 	simple_lock(&mntvnode_slock);
429 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
430 		if (vp->v_mount != mp) {
431 			simple_unlock(&mntvnode_slock);
432 			goto loop;
433 		}
434 		nvp = vp->v_mntvnodes.le_next;
435 		/*
436 		 * Step 4: invalidate all inactive vnodes.
437 		 */
438 		if (vrecycle(vp, &mntvnode_slock, p))
439 			goto loop;
440 		/*
441 		 * Step 5: invalidate all cached file data.
442 		 */
443 		simple_lock(&vp->v_interlock);
444 		simple_unlock(&mntvnode_slock);
445 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
446 			goto loop;
447 		}
448 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
449 			panic("ffs_reload: dirty2");
450 		/*
451 		 * Step 6: re-read inode data for all active vnodes.
452 		 */
453 		ip = VTOI(vp);
454 		error =
455 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
456 		    (int)fs->fs_bsize, NOCRED, &bp);
457 		if (error) {
458 			vput(vp);
459 			return (error);
460 		}
461 		ip->i_din = *((struct dinode *)bp->b_data +
462 		    ino_to_fsbo(fs, ip->i_number));
463 		brelse(bp);
464 		vput(vp);
465 		simple_lock(&mntvnode_slock);
466 	}
467 	simple_unlock(&mntvnode_slock);
468 	return (0);
469 }
470 
471 /*
472  * Common code for mount and mountroot
473  */
474 int
475 ffs_mountfs(devvp, mp, p)
476 	register struct vnode *devvp;
477 	struct mount *mp;
478 	struct proc *p;
479 {
480 	register struct ufsmount *ump;
481 	struct buf *bp;
482 	register struct fs *fs;
483 	dev_t dev;
484 	struct partinfo dpart;
485 	caddr_t base, space;
486 	int error, i, blks, size, ronly;
487 	int32_t *lp;
488 	struct ucred *cred;
489 	u_int64_t maxfilesize;					/* XXX */
490 	u_int strsize;
491 	int ncount;
492 
493 	dev = devvp->v_rdev;
494 	cred = p ? p->p_ucred : NOCRED;
495 	/*
496 	 * Disallow multiple mounts of the same device.
497 	 * Disallow mounting of a device that is currently in use
498 	 * (except for root, which might share swap device for miniroot).
499 	 * Flush out any old buffers remaining from a previous use.
500 	 */
501 	error = vfs_mountedon(devvp);
502 	if (error)
503 		return (error);
504 	ncount = vcount(devvp);
505 	if (devvp->v_object)
506 		ncount -= 1;
507 	if (ncount > 1 && devvp != rootvp)
508 		return (EBUSY);
509 	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
510 		return (error);
511 
512 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
513 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
514 	if (error)
515 		return (error);
516 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
517 		size = DEV_BSIZE;
518 	else
519 		size = dpart.disklab->d_secsize;
520 
521 	bp = NULL;
522 	ump = NULL;
523 	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, cred, &bp))
524 		goto out;
525 	fs = (struct fs *)bp->b_data;
526 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
527 	    fs->fs_bsize < sizeof(struct fs)) {
528 		error = EINVAL;		/* XXX needs translation */
529 		goto out;
530 	}
531 	fs->fs_fmod = 0;
532 	if (!fs->fs_clean) {
533 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
534 			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
535 		} else {
536 			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
537 			error = EPERM;
538 			goto out;
539 		}
540 	}
541 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
542 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
543 		error = EROFS;          /* needs translation */
544 		goto out;
545 	}
546 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
547 	bzero((caddr_t)ump, sizeof *ump);
548 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
549 	    M_WAITOK);
550 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
551 	if (fs->fs_sbsize < SBSIZE)
552 		bp->b_flags |= B_INVAL;
553 	brelse(bp);
554 	bp = NULL;
555 	fs = ump->um_fs;
556 	fs->fs_ronly = ronly;
557 	if (ronly == 0) {
558 		fs->fs_fmod = 1;
559 		fs->fs_clean = 0;
560 	}
561 	size = fs->fs_cssize;
562 	blks = howmany(size, fs->fs_fsize);
563 	if (fs->fs_contigsumsize > 0)
564 		size += fs->fs_ncg * sizeof(int32_t);
565 	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
566 	for (i = 0; i < blks; i += fs->fs_frag) {
567 		size = fs->fs_bsize;
568 		if (i + fs->fs_frag > blks)
569 			size = (blks - i) * fs->fs_fsize;
570 		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
571 		    cred, &bp)) {
572 			free(base, M_UFSMNT);
573 			goto out;
574 		}
575 		bcopy(bp->b_data, space, (u_int)size);
576 		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
577 		space += size;
578 		brelse(bp);
579 		bp = NULL;
580 	}
581 	if (fs->fs_contigsumsize > 0) {
582 		fs->fs_maxcluster = lp = (int32_t *)space;
583 		for (i = 0; i < fs->fs_ncg; i++)
584 			*lp++ = fs->fs_contigsumsize;
585 	}
586 	mp->mnt_data = (qaddr_t)ump;
587 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
588 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
589 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
590 	ump->um_mountp = mp;
591 	ump->um_dev = dev;
592 	ump->um_devvp = devvp;
593 	ump->um_nindir = fs->fs_nindir;
594 	ump->um_bptrtodb = fs->fs_fsbtodb;
595 	ump->um_seqinc = fs->fs_frag;
596 	for (i = 0; i < MAXQUOTAS; i++)
597 		ump->um_quotas[i] = NULLVP;
598 	devvp->v_specflags |= SI_MOUNTEDON;
599 	ffs_oldfscompat(fs);
600 
601 	/*
602 	 * Set FS local "last mounted on" information (NULL pad)
603 	 */
604 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
605 			fs->fs_fsmnt,			/* copy area*/
606 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
607 			&strsize);			/* real size*/
608 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
609 
610 	if( mp->mnt_flag & MNT_ROOTFS) {
611 		/*
612 		 * Root mount; update timestamp in mount structure.
613 		 * this will be used by the common root mount code
614 		 * to update the system clock.
615 		 */
616 		mp->mnt_time = fs->fs_time;
617 	}
618 
619 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
620 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
621 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
622 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
623 	if (ronly == 0) {
624 		fs->fs_clean = 0;
625 		(void) ffs_sbupdate(ump, MNT_WAIT);
626 	}
627 	/*
628 	 * Only VMIO the backing device if the backing device is a real
629 	 * block device.  This excludes the original MFS implementation.
630 	 * Note that it is optional that the backing device be VMIOed.  This
631 	 * increases the opportunity for metadata caching.
632 	 */
633 	if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) {
634 		vfs_object_create(devvp, p, p->p_ucred, 0);
635 	}
636 	return (0);
637 out:
638 	if (bp)
639 		brelse(bp);
640 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
641 	if (ump) {
642 		free(ump->um_fs, M_UFSMNT);
643 		free(ump, M_UFSMNT);
644 		mp->mnt_data = (qaddr_t)0;
645 	}
646 	return (error);
647 }
648 
649 /*
650  * Sanity checks for old file systems.
651  *
652  * XXX - goes away some day.
653  */
654 static int
655 ffs_oldfscompat(fs)
656 	struct fs *fs;
657 {
658 
659 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
660 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
661 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
662 		fs->fs_nrpos = 8;				/* XXX */
663 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
664 #if 0
665 		int i;						/* XXX */
666 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
667 								/* XXX */
668 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
669 		for (i = 0; i < NIADDR; i++) {			/* XXX */
670 			sizepb *= NINDIR(fs);			/* XXX */
671 			fs->fs_maxfilesize += sizepb;		/* XXX */
672 		}						/* XXX */
673 #endif
674 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
675 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
676 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
677 	}							/* XXX */
678 	return (0);
679 }
680 
681 /*
682  * unmount system call
683  */
684 int
685 ffs_unmount(mp, mntflags, p)
686 	struct mount *mp;
687 	int mntflags;
688 	struct proc *p;
689 {
690 	register struct ufsmount *ump;
691 	register struct fs *fs;
692 	int error, flags;
693 
694 	flags = 0;
695 	if (mntflags & MNT_FORCE) {
696 		flags |= FORCECLOSE;
697 	}
698 	error = ffs_flushfiles(mp, flags, p);
699 	if (error)
700 		return (error);
701 	ump = VFSTOUFS(mp);
702 	fs = ump->um_fs;
703 	if (fs->fs_ronly == 0) {
704 		fs->fs_clean = 1;
705 		error = ffs_sbupdate(ump, MNT_WAIT);
706 		if (error) {
707 			fs->fs_clean = 0;
708 			return (error);
709 		}
710 	}
711 	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
712 
713 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
714 	vnode_pager_uncache(ump->um_devvp, p);
715 	VOP_UNLOCK(ump->um_devvp, 0, p);
716 
717 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
718 		NOCRED, p);
719 
720 	vrele(ump->um_devvp);
721 
722 	free(fs->fs_csp[0], M_UFSMNT);
723 	free(fs, M_UFSMNT);
724 	free(ump, M_UFSMNT);
725 	mp->mnt_data = (qaddr_t)0;
726 	return (error);
727 }
728 
729 /*
730  * Flush out all the files in a filesystem.
731  */
732 int
733 ffs_flushfiles(mp, flags, p)
734 	register struct mount *mp;
735 	int flags;
736 	struct proc *p;
737 {
738 	register struct ufsmount *ump;
739 	int error;
740 
741 	ump = VFSTOUFS(mp);
742 #ifdef QUOTA
743 	if (mp->mnt_flag & MNT_QUOTA) {
744 		int i;
745 		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
746 		if (error)
747 			return (error);
748 		for (i = 0; i < MAXQUOTAS; i++) {
749 			if (ump->um_quotas[i] == NULLVP)
750 				continue;
751 			quotaoff(p, mp, i);
752 		}
753 		/*
754 		 * Here we fall through to vflush again to ensure
755 		 * that we have gotten rid of all the system vnodes.
756 		 */
757 	}
758 #endif
759 	error = vflush(mp, NULLVP, flags);
760 	return (error);
761 }
762 
763 /*
764  * Get file system statistics.
765  */
766 int
767 ffs_statfs(mp, sbp, p)
768 	struct mount *mp;
769 	register struct statfs *sbp;
770 	struct proc *p;
771 {
772 	register struct ufsmount *ump;
773 	register struct fs *fs;
774 
775 	ump = VFSTOUFS(mp);
776 	fs = ump->um_fs;
777 	if (fs->fs_magic != FS_MAGIC)
778 		panic("ffs_statfs");
779 	sbp->f_bsize = fs->fs_fsize;
780 	sbp->f_iosize = fs->fs_bsize;
781 	sbp->f_blocks = fs->fs_dsize;
782 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
783 		fs->fs_cstotal.cs_nffree;
784 	sbp->f_bavail = freespace(fs, fs->fs_minfree);
785 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
786 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
787 	if (sbp != &mp->mnt_stat) {
788 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
789 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
790 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
791 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
792 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
793 	}
794 	return (0);
795 }
796 
797 /*
798  * Go through the disk queues to initiate sandbagged IO;
799  * go through the inodes to write those that have been modified;
800  * initiate the writing of the super block if it has been modified.
801  *
802  * Note: we are always called with the filesystem marked `MPBUSY'.
803  */
804 int
805 ffs_sync(mp, waitfor, cred, p)
806 	struct mount *mp;
807 	int waitfor;
808 	struct ucred *cred;
809 	struct proc *p;
810 {
811 	struct vnode *nvp, *vp;
812 	struct inode *ip;
813 	struct ufsmount *ump = VFSTOUFS(mp);
814 	struct fs *fs;
815 	struct timeval tv;
816 	int error, allerror = 0;
817 
818 	fs = ump->um_fs;
819 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
820 		printf("fs = %s\n", fs->fs_fsmnt);
821 		panic("update: rofs mod");
822 	}
823 	/*
824 	 * Write back each (modified) inode.
825 	 */
826 	simple_lock(&mntvnode_slock);
827 loop:
828 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
829 		/*
830 		 * If the vnode that we are about to sync is no longer
831 		 * associated with this mount point, start over.
832 		 */
833 		if (vp->v_mount != mp)
834 			goto loop;
835 		simple_lock(&vp->v_interlock);
836 		nvp = vp->v_mntvnodes.le_next;
837 		ip = VTOI(vp);
838 		if (((ip->i_flag &
839 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
840 		    vp->v_dirtyblkhd.lh_first == NULL) {
841 			simple_unlock(&vp->v_interlock);
842 			continue;
843 		}
844 		if (vp->v_type != VCHR) {
845 			simple_unlock(&mntvnode_slock);
846 			error =
847 			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
848 			if (error) {
849 				simple_lock(&mntvnode_slock);
850 				if (error == ENOENT)
851 					goto loop;
852 				continue;
853 			}
854 			if (error = VOP_FSYNC(vp, cred, waitfor, p))
855 				allerror = error;
856 			VOP_UNLOCK(vp, 0, p);
857 			vrele(vp);
858 			simple_lock(&mntvnode_slock);
859 		} else {
860 			simple_unlock(&mntvnode_slock);
861 			simple_unlock(&vp->v_interlock);
862 			tv = time;
863 			/* VOP_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
864 			VOP_UPDATE(vp, &tv, &tv, 0);
865 			simple_lock(&mntvnode_slock);
866 		}
867 	}
868 	simple_unlock(&mntvnode_slock);
869 	/*
870 	 * Force stale file system control information to be flushed.
871 	 */
872 	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
873 	if (error)
874 		allerror = error;
875 #ifdef QUOTA
876 	qsync(mp);
877 #endif
878 	/*
879 	 * Write back modified superblock.
880 	 */
881 	if (fs->fs_fmod != 0) {
882 		fs->fs_fmod = 0;
883 		fs->fs_time = time.tv_sec;
884 		if (error = ffs_sbupdate(ump, waitfor))
885 			allerror = error;
886 	}
887 	return (allerror);
888 }
889 
890 /*
891  * Look up a FFS dinode number to find its incore vnode, otherwise read it
892  * in from disk.  If it is in core, wait for the lock bit to clear, then
893  * return the inode locked.  Detection and handling of mount points must be
894  * done by the calling routine.
895  */
896 static int ffs_inode_hash_lock;
897 
898 int
899 ffs_vget(mp, ino, vpp)
900 	struct mount *mp;
901 	ino_t ino;
902 	struct vnode **vpp;
903 {
904 	struct fs *fs;
905 	struct inode *ip;
906 	struct ufsmount *ump;
907 	struct buf *bp;
908 	struct vnode *vp;
909 	dev_t dev;
910 	int type, error;
911 
912 	ump = VFSTOUFS(mp);
913 	dev = ump->um_dev;
914 restart:
915 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
916 		return (0);
917 
918 	/*
919 	 * Lock out the creation of new entries in the FFS hash table in
920 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
921 	 * may occur!
922 	 */
923 	if (ffs_inode_hash_lock) {
924 		while (ffs_inode_hash_lock) {
925 			ffs_inode_hash_lock = -1;
926 			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
927 		}
928 		goto restart;
929 	}
930 	ffs_inode_hash_lock = 1;
931 
932 	/*
933 	 * If this MALLOC() is performed after the getnewvnode()
934 	 * it might block, leaving a vnode with a NULL v_data to be
935 	 * found by ffs_sync() if a sync happens to fire right then,
936 	 * which will cause a panic because ffs_sync() blindly
937 	 * dereferences vp->v_data (as well it should).
938 	 */
939 	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
940 	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
941 
942 	/* Allocate a new vnode/inode. */
943 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
944 	if (error) {
945 		if (ffs_inode_hash_lock < 0)
946 			wakeup(&ffs_inode_hash_lock);
947 		ffs_inode_hash_lock = 0;
948 		*vpp = NULL;
949 		FREE(ip, type);
950 		return (error);
951 	}
952 	bzero((caddr_t)ip, sizeof(struct inode));
953 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
954 	vp->v_data = ip;
955 	ip->i_vnode = vp;
956 	ip->i_fs = fs = ump->um_fs;
957 	ip->i_dev = dev;
958 	ip->i_number = ino;
959 #ifdef QUOTA
960 	{
961 		int i;
962 		for (i = 0; i < MAXQUOTAS; i++)
963 			ip->i_dquot[i] = NODQUOT;
964 	}
965 #endif
966 	/*
967 	 * Put it onto its hash chain and lock it so that other requests for
968 	 * this inode will block if they arrive while we are sleeping waiting
969 	 * for old data structures to be purged or for the contents of the
970 	 * disk portion of this inode to be read.
971 	 */
972 	ufs_ihashins(ip);
973 
974 	if (ffs_inode_hash_lock < 0)
975 		wakeup(&ffs_inode_hash_lock);
976 	ffs_inode_hash_lock = 0;
977 
978 	/* Read in the disk contents for the inode, copy into the inode. */
979 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
980 	    (int)fs->fs_bsize, NOCRED, &bp);
981 	if (error) {
982 		/*
983 		 * The inode does not contain anything useful, so it would
984 		 * be misleading to leave it on its hash chain. With mode
985 		 * still zero, it will be unlinked and returned to the free
986 		 * list by vput().
987 		 */
988 		brelse(bp);
989 		vput(vp);
990 		*vpp = NULL;
991 		return (error);
992 	}
993 	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
994 	bqrelse(bp);
995 
996 	/*
997 	 * Initialize the vnode from the inode, check for aliases.
998 	 * Note that the underlying vnode may have changed.
999 	 */
1000 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1001 	if (error) {
1002 		vput(vp);
1003 		*vpp = NULL;
1004 		return (error);
1005 	}
1006 	/*
1007 	 * Finish inode initialization now that aliasing has been resolved.
1008 	 */
1009 	ip->i_devvp = ump->um_devvp;
1010 	VREF(ip->i_devvp);
1011 	/*
1012 	 * Set up a generation number for this inode if it does not
1013 	 * already have one. This should only happen on old filesystems.
1014 	 */
1015 	if (ip->i_gen == 0) {
1016 		if (++nextgennumber < (u_long)time.tv_sec)
1017 			nextgennumber = time.tv_sec;
1018 		ip->i_gen = nextgennumber;
1019 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1020 			ip->i_flag |= IN_MODIFIED;
1021 	}
1022 	/*
1023 	 * Ensure that uid and gid are correct. This is a temporary
1024 	 * fix until fsck has been changed to do the update.
1025 	 */
1026 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
1027 		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
1028 		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
1029 	}						/* XXX */
1030 
1031 	*vpp = vp;
1032 	return (0);
1033 }
1034 
1035 /*
1036  * File handle to vnode
1037  *
1038  * Have to be really careful about stale file handles:
1039  * - check that the inode number is valid
1040  * - call ffs_vget() to get the locked inode
1041  * - check for an unallocated inode (i_mode == 0)
1042  * - check that the given client host has export rights and return
1043  *   those rights via. exflagsp and credanonp
1044  */
1045 int
1046 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
1047 	register struct mount *mp;
1048 	struct fid *fhp;
1049 	struct mbuf *nam;
1050 	struct vnode **vpp;
1051 	int *exflagsp;
1052 	struct ucred **credanonp;
1053 {
1054 	register struct ufid *ufhp;
1055 	struct fs *fs;
1056 
1057 	ufhp = (struct ufid *)fhp;
1058 	fs = VFSTOUFS(mp)->um_fs;
1059 	if (ufhp->ufid_ino < ROOTINO ||
1060 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1061 		return (ESTALE);
1062 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
1063 }
1064 
1065 /*
1066  * Vnode pointer to File handle
1067  */
1068 /* ARGSUSED */
1069 int
1070 ffs_vptofh(vp, fhp)
1071 	struct vnode *vp;
1072 	struct fid *fhp;
1073 {
1074 	register struct inode *ip;
1075 	register struct ufid *ufhp;
1076 
1077 	ip = VTOI(vp);
1078 	ufhp = (struct ufid *)fhp;
1079 	ufhp->ufid_len = sizeof(struct ufid);
1080 	ufhp->ufid_ino = ip->i_number;
1081 	ufhp->ufid_gen = ip->i_gen;
1082 	return (0);
1083 }
1084 
1085 /*
1086  * Initialize the filesystem; just use ufs_init.
1087  */
1088 static int
1089 ffs_init(vfsp)
1090 	struct vfsconf *vfsp;
1091 {
1092 
1093 	return (ufs_init(vfsp));
1094 }
1095 
1096 /*
1097  * Write a superblock and associated information back to disk.
1098  */
1099 static int
1100 ffs_sbupdate(mp, waitfor)
1101 	struct ufsmount *mp;
1102 	int waitfor;
1103 {
1104 	register struct fs *dfs, *fs = mp->um_fs;
1105 	register struct buf *bp;
1106 	int blks;
1107 	caddr_t space;
1108 	int i, size, error, allerror = 0;
1109 
1110 	/*
1111 	 * First write back the summary information.
1112 	 */
1113 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
1114 	space = (caddr_t)fs->fs_csp[0];
1115 	for (i = 0; i < blks; i += fs->fs_frag) {
1116 		size = fs->fs_bsize;
1117 		if (i + fs->fs_frag > blks)
1118 			size = (blks - i) * fs->fs_fsize;
1119 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1120 		    size, 0, 0);
1121 		bcopy(space, bp->b_data, (u_int)size);
1122 		space += size;
1123 		if (waitfor != MNT_WAIT)
1124 			bawrite(bp);
1125 		else if (error = bwrite(bp))
1126 			allerror = error;
1127 	}
1128 	/*
1129 	 * Now write back the superblock itself. If any errors occurred
1130 	 * up to this point, then fail so that the superblock avoids
1131 	 * being written out as clean.
1132 	 */
1133 	if (allerror)
1134 		return (allerror);
1135 	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1136 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1137 	/* Restore compatibility to old file systems.		   XXX */
1138 	dfs = (struct fs *)bp->b_data;				/* XXX */
1139 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
1140 		dfs->fs_nrpos = -1;				/* XXX */
1141 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
1142 		int32_t *lp, tmp;				/* XXX */
1143 								/* XXX */
1144 		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
1145 		tmp = lp[4];					/* XXX */
1146 		for (i = 4; i > 0; i--)				/* XXX */
1147 			lp[i] = lp[i-1];			/* XXX */
1148 		lp[0] = tmp;					/* XXX */
1149 	}							/* XXX */
1150 	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
1151 	if (waitfor != MNT_WAIT)
1152 		bawrite(bp);
1153 	else if (error = bwrite(bp))
1154 		allerror = error;
1155 	return (allerror);
1156 }
1157