xref: /freebsd/sys/kern/vfs_mount.c (revision b52b9d56d4e96089873a75f9e29062eec19fabba)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  * Copyright (c) 1999 Michael Smith
39  * All rights reserved.
40  * Copyright (c) 1999 Poul-Henning Kamp
41  * All rights reserved.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  * $FreeBSD$
65  */
66 
67 #include <sys/param.h>
68 #include <sys/conf.h>
69 #include <sys/cons.h>
70 #include <sys/kernel.h>
71 #include <sys/linker.h>
72 #include <sys/malloc.h>
73 #include <sys/mount.h>
74 #include <sys/mutex.h>
75 #include <sys/namei.h>
76 #include <sys/proc.h>
77 #include <sys/reboot.h>
78 #include <sys/sysproto.h>
79 #include <sys/sx.h>
80 #include <sys/sysctl.h>
81 #include <sys/sysent.h>
82 #include <sys/systm.h>
83 #include <sys/vnode.h>
84 
85 #include <machine/stdarg.h>
86 
87 #include "opt_rootdevname.h"
88 #include "opt_ddb.h"
89 
90 #ifdef DDB
91 #include <ddb/ddb.h>
92 #endif
93 
94 #define ROOTNAME	"root_device"
95 
96 static void	checkdirs(struct vnode *olddp, struct vnode *newdp);
97 static int	vfs_nmount(struct thread *td, int, struct uio *);
98 static int	vfs_mountroot_try(char *mountfrom);
99 static int	vfs_mountroot_ask(void);
100 static void	gets(char *cp);
101 
102 static int	usermount = 0;	/* if 1, non-root can mount fs. */
103 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
104 
105 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
106 
107 /* List of mounted filesystems. */
108 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
109 
110 /* For any iteration/modification of mountlist */
111 struct mtx mountlist_mtx;
112 
113 /* For any iteration/modification of mnt_vnodelist */
114 struct mtx mntvnode_mtx;
115 
116 /*
117  * The vnode of the system's root (/ in the filesystem, without chroot
118  * active.)
119  */
120 struct vnode	*rootvnode;
121 
122 /*
123  * The root filesystem is detailed in the kernel environment variable
124  * vfs.root.mountfrom, which is expected to be in the general format
125  *
126  * <vfsname>:[<path>]
127  * vfsname   := the name of a VFS known to the kernel and capable
128  *              of being mounted as root
129  * path      := disk device name or other data used by the filesystem
130  *              to locate its physical store
131  */
132 
133 /*
134  * The root specifiers we will try if RB_CDROM is specified.
135  */
136 static char *cdrom_rootdevnames[] = {
137 	"cd9660:cd0a",
138 	"cd9660:acd0a",
139 	"cd9660:wcd0a",
140 	NULL
141 };
142 
143 /* legacy find-root code */
144 char		*rootdevnames[2] = {NULL, NULL};
145 static int	setrootbyname(char *name);
146 dev_t		rootdev = NODEV;
147 
148 /* Remove one mount option. */
149 static void
150 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
151 {
152 
153 	TAILQ_REMOVE(opts, opt, link);
154 	free(opt->name, M_MOUNT);
155 	if (opt->value != NULL)
156 		free(opt->value, M_MOUNT);
157 #ifdef INVARIANTS
158 	else if (opt->len != 0)
159 		panic("%s: mount option with NULL value but length != 0",
160 		    __func__);
161 #endif
162 	free(opt, M_MOUNT);
163 }
164 
165 /* Release all resources related to the mount options. */
166 static void
167 vfs_freeopts(struct vfsoptlist *opts)
168 {
169 	struct vfsopt *opt;
170 
171 	while (!TAILQ_EMPTY(opts)) {
172 		opt = TAILQ_FIRST(opts);
173 		vfs_freeopt(opts, opt);
174 	}
175 	free(opts, M_MOUNT);
176 }
177 
178 /*
179  * If a mount option is specified several times,
180  * (with or without the "no" prefix) only keep
181  * the last occurence of it.
182  */
183 static void
184 vfs_sanitizeopts(struct vfsoptlist *opts)
185 {
186 	struct vfsopt *opt, *opt2, *tmp;
187 	int noopt;
188 
189 	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
190 		if (strncmp(opt->name, "no", 2) == 0)
191 			noopt = 1;
192 		else
193 			noopt = 0;
194 		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
195 		while (opt2 != NULL) {
196 			if (strcmp(opt2->name, opt->name) == 0 ||
197 			    (noopt && strcmp(opt->name + 2, opt2->name) == 0) ||
198 			    (!noopt && strncmp(opt2->name, "no", 2) == 0 &&
199 			    strcmp(opt2->name + 2, opt->name) == 0)) {
200 				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
201 				vfs_freeopt(opts, opt2);
202 				opt2 = tmp;
203 			} else {
204 				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
205 			}
206 		}
207 	}
208 }
209 
210 /*
211  * Build a linked list of mount options from a struct uio.
212  */
213 static int
214 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
215 {
216 	struct vfsoptlist *opts;
217 	struct vfsopt *opt;
218 	unsigned int i, iovcnt;
219 	int error, namelen, optlen;
220 
221 	iovcnt = auio->uio_iovcnt;
222 	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
223 	TAILQ_INIT(opts);
224 	for (i = 0; i < iovcnt; i += 2) {
225 		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
226 		namelen = auio->uio_iov[i].iov_len;
227 		optlen = auio->uio_iov[i + 1].iov_len;
228 		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
229 		opt->value = NULL;
230 		if (auio->uio_segflg == UIO_SYSSPACE) {
231 			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
232 		} else {
233 			error = copyin(auio->uio_iov[i].iov_base, opt->name,
234 			    namelen);
235 			if (error)
236 				goto bad;
237 		}
238 		opt->len = optlen;
239 		if (optlen != 0) {
240 			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
241 			if (auio->uio_segflg == UIO_SYSSPACE) {
242 				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
243 				    optlen);
244 			} else {
245 				error = copyin(auio->uio_iov[i + 1].iov_base,
246 				    opt->value, optlen);
247 				if (error)
248 					goto bad;
249 			}
250 		}
251 		TAILQ_INSERT_TAIL(opts, opt, link);
252 	}
253 	vfs_sanitizeopts(opts);
254 	*options = opts;
255 	return (0);
256 bad:
257 	vfs_freeopts(opts);
258 	return (error);
259 }
260 
261 /*
262  * Merge the old mount options with the new ones passed
263  * in the MNT_UPDATE case.
264  */
265 static void
266 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *opts)
267 {
268 	struct vfsopt *opt, *opt2, *new;
269 
270 	TAILQ_FOREACH(opt, opts, link) {
271 		/*
272 		 * Check that this option hasn't been redefined
273 		 * nor cancelled with a "no" mount option.
274 		 */
275 		opt2 = TAILQ_FIRST(toopts);
276 		while (opt2 != NULL) {
277 			if (strcmp(opt2->name, opt->name) == 0)
278 				goto next;
279 			if (strncmp(opt2->name, "no", 2) == 0 &&
280 			    strcmp(opt2->name + 2, opt->name) == 0) {
281 				vfs_freeopt(toopts, opt2);
282 				goto next;
283 			}
284 			opt2 = TAILQ_NEXT(opt2, link);
285 		}
286 		/* We want this option, duplicate it. */
287 		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
288 		new->name = malloc(strlen(opt->name) + 1, M_MOUNT, M_WAITOK);
289 		strcpy(new->name, opt->name);
290 		if (opt->len != 0) {
291 			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
292 			bcopy(opt->value, new->value, opt->len);
293 		} else {
294 			new->value = NULL;
295 		}
296 		new->len = opt->len;
297 		TAILQ_INSERT_TAIL(toopts, new, link);
298 next:
299 		continue;
300 	}
301 }
302 
303 /*
304  * New mount API.
305  */
306 int
307 nmount(td, uap)
308 	struct thread *td;
309 	struct nmount_args /* {
310 		syscallarg(struct iovec *) iovp;
311 		syscallarg(unsigned int) iovcnt;
312 		syscallarg(int) flags;
313 	} */ *uap;
314 {
315 	struct uio auio;
316 	struct iovec *iov, *needfree;
317 	struct iovec aiov[UIO_SMALLIOV];
318 	unsigned int i;
319 	int error;
320 	u_int iovlen, iovcnt;
321 
322 	iovcnt = SCARG(uap, iovcnt);
323 	iovlen = iovcnt * sizeof (struct iovec);
324 	/*
325 	 * Check that we have an even number of iovec's
326 	 * and that we have at least two options.
327 	 */
328 	if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV))
329 		return (EINVAL);
330 
331 	if (iovcnt > UIO_SMALLIOV) {
332 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
333 		needfree = iov;
334 	} else {
335 		iov = aiov;
336 		needfree = NULL;
337 	}
338 	auio.uio_iov = iov;
339 	auio.uio_iovcnt = iovcnt;
340 	auio.uio_segflg = UIO_USERSPACE;
341 	if ((error = copyin(uap->iovp, iov, iovlen)))
342 		goto finish;
343 
344 	for (i = 0; i < iovcnt; i++) {
345 		if (iov->iov_len > MMAXOPTIONLEN) {
346 			error = EINVAL;
347 			goto finish;
348 		}
349 		iov++;
350 	}
351 	error = vfs_nmount(td, SCARG(uap, flags), &auio);
352 finish:
353 	if (needfree != NULL)
354 		free(needfree, M_TEMP);
355 	return (error);
356 }
357 
358 int
359 kernel_mount(iovp, iovcnt, flags)
360 	struct iovec *iovp;
361 	unsigned int iovcnt;
362 	int flags;
363 {
364 	struct uio auio;
365 	int error;
366 
367 	/*
368 	 * Check that we have an even number of iovec's
369 	 * and that we have at least two options.
370 	 */
371 	if ((iovcnt & 1) || (iovcnt < 4))
372 		return (EINVAL);
373 
374 	auio.uio_iov = iovp;
375 	auio.uio_iovcnt = iovcnt;
376 	auio.uio_segflg = UIO_SYSSPACE;
377 
378 	error = vfs_nmount(curthread, flags, &auio);
379 	return (error);
380 }
381 
382 int
383 kernel_vmount(int flags, ...)
384 {
385 	struct iovec *iovp;
386 	struct uio auio;
387 	va_list ap;
388 	unsigned int iovcnt, iovlen, len;
389 	const char *cp;
390 	char *buf, *pos;
391 	size_t n;
392 	int error, i;
393 
394 	len = 0;
395 	va_start(ap, flags);
396 	for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++)
397 		len += strlen(cp) + 1;
398 	va_end(ap);
399 
400 	if (iovcnt < 4 || iovcnt & 1)
401 		return (EINVAL);
402 
403 	iovlen = iovcnt * sizeof (struct iovec);
404 	MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK);
405 	MALLOC(buf, char *, len, M_MOUNT, M_WAITOK);
406 	pos = buf;
407 	va_start(ap, flags);
408 	for (i = 0; i < iovcnt; i++) {
409 		cp = va_arg(ap, const char *);
410 		copystr(cp, pos, len - (pos - buf), &n);
411 		iovp[i].iov_base = pos;
412 		iovp[i].iov_len = n;
413 		pos += n;
414 	}
415 	va_end(ap);
416 
417 	auio.uio_iov = iovp;
418 	auio.uio_iovcnt = iovcnt;
419 	auio.uio_segflg = UIO_SYSSPACE;
420 
421 	error = vfs_nmount(curthread, flags, &auio);
422 	FREE(iovp, M_MOUNT);
423 	FREE(buf, M_MOUNT);
424 	return (error);
425 }
426 
427 /*
428  * vfs_nmount(): actually attempt a filesystem mount.
429  */
430 static int
431 vfs_nmount(td, fsflags, fsoptions)
432 	struct thread *td;
433 	int fsflags;		/* Flags common to all filesystems. */
434 	struct uio *fsoptions;	/* Options local to the filesystem. */
435 {
436 	linker_file_t lf;
437 	struct vnode *vp;
438 	struct mount *mp;
439 	struct vfsconf *vfsp;
440 	struct vfsoptlist *optlist;
441 	char *fstype, *fspath;
442 	int error, flag = 0, kern_flag = 0;
443 	int fstypelen, fspathlen;
444 	struct vattr va;
445 	struct nameidata nd;
446 
447 	error = vfs_buildopts(fsoptions, &optlist);
448 	if (error)
449 		return (error);
450 
451 	/*
452 	 * We need these two options before the others,
453 	 * and they are mandatory for any filesystem.
454 	 * Ensure they are NUL terminated as well.
455 	 */
456 	fstypelen = 0;
457 	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
458 	if (error || fstype[fstypelen - 1] != '\0') {
459 		error = EINVAL;
460 		goto bad;
461 	}
462 	fspathlen = 0;
463 	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
464 	if (error || fspath[fspathlen - 1] != '\0') {
465 		error = EINVAL;
466 		goto bad;
467 	}
468 
469 	/*
470 	 * Be ultra-paranoid about making sure the type and fspath
471 	 * variables will fit in our mp buffers, including the
472 	 * terminating NUL.
473 	 */
474 	if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) {
475 		error = ENAMETOOLONG;
476 		goto bad;
477 	}
478 
479 	if (usermount == 0) {
480 	       	error = suser(td);
481 		if (error)
482 			goto bad;
483 	}
484 	/*
485 	 * Do not allow NFS export by non-root users.
486 	 */
487 	if (fsflags & MNT_EXPORTED) {
488 		error = suser(td);
489 		if (error)
490 			goto bad;
491 	}
492 	/*
493 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
494 	 */
495 	if (suser(td))
496 		fsflags |= MNT_NOSUID | MNT_NODEV;
497 	/*
498 	 * Get vnode to be covered
499 	 */
500 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
501 	if ((error = namei(&nd)) != 0)
502 		goto bad;
503 	NDFREE(&nd, NDF_ONLY_PNBUF);
504 	vp = nd.ni_vp;
505 	if (fsflags & MNT_UPDATE) {
506 		if ((vp->v_flag & VROOT) == 0) {
507 			vput(vp);
508 			error = EINVAL;
509 			goto bad;
510 		}
511 		mp = vp->v_mount;
512 		flag = mp->mnt_flag;
513 		kern_flag = mp->mnt_kern_flag;
514 		/*
515 		 * We only allow the filesystem to be reloaded if it
516 		 * is currently mounted read-only.
517 		 */
518 		if ((fsflags & MNT_RELOAD) &&
519 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
520 			vput(vp);
521 			error = EOPNOTSUPP;	/* Needs translation */
522 			goto bad;
523 		}
524 		/*
525 		 * Only root, or the user that did the original mount is
526 		 * permitted to update it.
527 		 */
528 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
529 			error = suser(td);
530 			if (error) {
531 				vput(vp);
532 				goto bad;
533 			}
534 		}
535 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
536 			vput(vp);
537 			error = EBUSY;
538 			goto bad;
539 		}
540 		mtx_lock(&vp->v_interlock);
541 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
542 			mtx_unlock(&vp->v_interlock);
543 			vfs_unbusy(mp, td);
544 			vput(vp);
545 			error = EBUSY;
546 			goto bad;
547 		}
548 		vp->v_flag |= VMOUNT;
549 		mtx_unlock(&vp->v_interlock);
550 		mp->mnt_flag |= fsflags &
551 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
552 		VOP_UNLOCK(vp, 0, td);
553 		mp->mnt_optnew = optlist;
554 		vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
555 		goto update;
556 	}
557 	/*
558 	 * If the user is not root, ensure that they own the directory
559 	 * onto which we are attempting to mount.
560 	 */
561 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
562 	if (error) {
563 		vput(vp);
564 		goto bad;
565 	}
566 	if (va.va_uid != td->td_ucred->cr_uid) {
567 		error = suser(td);
568 		if (error) {
569 			vput(vp);
570 			goto bad;
571 		}
572 	}
573 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
574 		vput(vp);
575 		goto bad;
576 	}
577 	if (vp->v_type != VDIR) {
578 		vput(vp);
579 		error = ENOTDIR;
580 		goto bad;
581 	}
582 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
583 		if (!strcmp(vfsp->vfc_name, fstype))
584 			break;
585 	if (vfsp == NULL) {
586 		/* Only load modules for root (very important!). */
587 		error = suser(td);
588 		if (error) {
589 			vput(vp);
590 			goto bad;
591 		}
592 		error = securelevel_gt(td->td_ucred, 0);
593 		if (error) {
594 			vput(vp);
595 			goto bad;
596 		}
597 		error = linker_load_file(fstype, &lf);
598 		if (error || lf == NULL) {
599 			vput(vp);
600 			if (lf == NULL)
601 				error = ENODEV;
602 			goto bad;
603 		}
604 		lf->userrefs++;
605 		/* Look up again to see if the VFS was loaded. */
606 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
607 			if (!strcmp(vfsp->vfc_name, fstype))
608 				break;
609 		if (vfsp == NULL) {
610 			lf->userrefs--;
611 			linker_file_unload(lf);
612 			vput(vp);
613 			error = ENODEV;
614 			goto bad;
615 		}
616 	}
617 	mtx_lock(&vp->v_interlock);
618 	if ((vp->v_flag & VMOUNT) != 0 ||
619 	    vp->v_mountedhere != NULL) {
620 		mtx_unlock(&vp->v_interlock);
621 		vput(vp);
622 		error = EBUSY;
623 		goto bad;
624 	}
625 	vp->v_flag |= VMOUNT;
626 	mtx_unlock(&vp->v_interlock);
627 
628 	/*
629 	 * Allocate and initialize the filesystem.
630 	 */
631 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
632 	TAILQ_INIT(&mp->mnt_nvnodelist);
633 	TAILQ_INIT(&mp->mnt_reservedvnlist);
634 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
635 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
636 	mp->mnt_op = vfsp->vfc_vfsops;
637 	mp->mnt_vfc = vfsp;
638 	vfsp->vfc_refcount++;
639 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
640 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
641 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
642 	mp->mnt_vnodecovered = vp;
643 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
644 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
645 	mp->mnt_iosize_max = DFLTPHYS;
646 	VOP_UNLOCK(vp, 0, td);
647 	mp->mnt_optnew = optlist;
648 
649 update:
650 	/*
651 	 * Check if the fs implements the new VFS_NMOUNT()
652 	 * function, since the new system call was used.
653 	 */
654 	if (mp->mnt_op->vfs_mount != NULL) {
655 		printf("%s doesn't support the new mount syscall\n",
656 		    mp->mnt_vfc->vfc_name);
657 		mtx_lock(&vp->v_interlock);
658 		vp->v_flag &= ~VMOUNT;
659 		mtx_unlock(&vp->v_interlock);
660 		if (mp->mnt_flag & MNT_UPDATE)
661 			vfs_unbusy(mp, td);
662 		else {
663 			mp->mnt_vfc->vfc_refcount--;
664 			vfs_unbusy(mp, td);
665 			free(mp, M_MOUNT);
666 		}
667 		vrele(vp);
668 		error = EOPNOTSUPP;
669 		goto bad;
670 	}
671 
672 	/*
673 	 * Set the mount level flags.
674 	 */
675 	if (fsflags & MNT_RDONLY)
676 		mp->mnt_flag |= MNT_RDONLY;
677 	else if (mp->mnt_flag & MNT_RDONLY)
678 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
679 	mp->mnt_flag &=~ MNT_UPDATEMASK;
680 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
681 	/*
682 	 * Mount the filesystem.
683 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
684 	 * get.  No freeing of cn_pnbuf.
685 	 */
686 	error = VFS_NMOUNT(mp, &nd, td);
687 	if (!error) {
688 		if (mp->mnt_opt != NULL)
689 			vfs_freeopts(mp->mnt_opt);
690 		mp->mnt_opt = mp->mnt_optnew;
691 	}
692 	/*
693 	 * Prevent external consumers of mount
694 	 * options to read mnt_optnew.
695 	 */
696 	mp->mnt_optnew = NULL;
697 	if (mp->mnt_flag & MNT_UPDATE) {
698 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
699 			mp->mnt_flag &= ~MNT_RDONLY;
700 		mp->mnt_flag &=~
701 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
702 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
703 		if (error) {
704 			mp->mnt_flag = flag;
705 			mp->mnt_kern_flag = kern_flag;
706 		}
707 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
708 			if (mp->mnt_syncer == NULL)
709 				error = vfs_allocate_syncvnode(mp);
710 		} else {
711 			if (mp->mnt_syncer != NULL)
712 				vput(mp->mnt_syncer);
713 			mp->mnt_syncer = NULL;
714 		}
715 		vfs_unbusy(mp, td);
716 		mtx_lock(&vp->v_interlock);
717 		vp->v_flag &= ~VMOUNT;
718 		mtx_unlock(&vp->v_interlock);
719 		vrele(vp);
720 		return (error);
721 	}
722 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
723 	/*
724 	 * Put the new filesystem on the mount list after root.
725 	 */
726 	cache_purge(vp);
727 	if (!error) {
728 		struct vnode *newdp;
729 
730 		mtx_lock(&vp->v_interlock);
731 		vp->v_flag &= ~VMOUNT;
732 		vp->v_mountedhere = mp;
733 		mtx_unlock(&vp->v_interlock);
734 		mtx_lock(&mountlist_mtx);
735 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
736 		mtx_unlock(&mountlist_mtx);
737 		if (VFS_ROOT(mp, &newdp))
738 			panic("mount: lost mount");
739 		checkdirs(vp, newdp);
740 		vput(newdp);
741 		VOP_UNLOCK(vp, 0, td);
742 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
743 			error = vfs_allocate_syncvnode(mp);
744 		vfs_unbusy(mp, td);
745 		if ((error = VFS_START(mp, 0, td)) != 0) {
746 			vrele(vp);
747 			goto bad;
748 		}
749 	} else {
750 		mtx_lock(&vp->v_interlock);
751 		vp->v_flag &= ~VMOUNT;
752 		mtx_unlock(&vp->v_interlock);
753 		mp->mnt_vfc->vfc_refcount--;
754 		vfs_unbusy(mp, td);
755 		free(mp, M_MOUNT);
756 		vput(vp);
757 		goto bad;
758 	}
759 	return (0);
760 bad:
761 	vfs_freeopts(optlist);
762 	return (error);
763 }
764 
765 /*
766  * Old mount API.
767  */
768 #ifndef _SYS_SYSPROTO_H_
769 struct mount_args {
770 	char	*type;
771 	char	*path;
772 	int	flags;
773 	caddr_t	data;
774 };
775 #endif
776 /* ARGSUSED */
777 int
778 mount(td, uap)
779 	struct thread *td;
780 	struct mount_args /* {
781 		syscallarg(char *) type;
782 		syscallarg(char *) path;
783 		syscallarg(int) flags;
784 		syscallarg(caddr_t) data;
785 	} */ *uap;
786 {
787 	char *fstype;
788 	char *fspath;
789 	int error;
790 
791 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
792 	fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK);
793 
794 	/*
795 	 * vfs_mount() actually takes a kernel string for `type' and
796 	 * `path' now, so extract them.
797 	 */
798 	error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL);
799 	if (error)
800 		goto finish;
801 	error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL);
802 	if (error)
803 		goto finish;
804 	error = vfs_mount(td, fstype, fspath, SCARG(uap, flags),
805 	    SCARG(uap, data));
806 finish:
807 	free(fstype, M_TEMP);
808 	free(fspath, M_TEMP);
809 	return (error);
810 }
811 
812 /*
813  * vfs_mount(): actually attempt a filesystem mount.
814  *
815  * This routine is designed to be a "generic" entry point for routines
816  * that wish to mount a filesystem. All parameters except `fsdata' are
817  * pointers into kernel space. `fsdata' is currently still a pointer
818  * into userspace.
819  */
820 int
821 vfs_mount(td, fstype, fspath, fsflags, fsdata)
822 	struct thread *td;
823 	const char *fstype;
824 	char *fspath;
825 	int fsflags;
826 	void *fsdata;
827 {
828 	linker_file_t lf;
829 	struct vnode *vp;
830 	struct mount *mp;
831 	struct vfsconf *vfsp;
832 	int error, flag = 0, kern_flag = 0;
833 	struct vattr va;
834 	struct nameidata nd;
835 
836 	/*
837 	 * Be ultra-paranoid about making sure the type and fspath
838 	 * variables will fit in our mp buffers, including the
839 	 * terminating NUL.
840 	 */
841 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
842 		return (ENAMETOOLONG);
843 
844 	if (usermount == 0) {
845 		error = suser(td);
846 		if (error)
847 			return (error);
848 	}
849 	/*
850 	 * Do not allow NFS export by non-root users.
851 	 */
852 	if (fsflags & MNT_EXPORTED) {
853 		error = suser(td);
854 		if (error)
855 			return (error);
856 	}
857 	/*
858 	 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users.
859 	 */
860 	if (suser(td))
861 		fsflags |= MNT_NOSUID | MNT_NODEV;
862 	/*
863 	 * Get vnode to be covered
864 	 */
865 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td);
866 	if ((error = namei(&nd)) != 0)
867 		return (error);
868 	NDFREE(&nd, NDF_ONLY_PNBUF);
869 	vp = nd.ni_vp;
870 	if (fsflags & MNT_UPDATE) {
871 		if ((vp->v_flag & VROOT) == 0) {
872 			vput(vp);
873 			return (EINVAL);
874 		}
875 		mp = vp->v_mount;
876 		flag = mp->mnt_flag;
877 		kern_flag = mp->mnt_kern_flag;
878 		/*
879 		 * We only allow the filesystem to be reloaded if it
880 		 * is currently mounted read-only.
881 		 */
882 		if ((fsflags & MNT_RELOAD) &&
883 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
884 			vput(vp);
885 			return (EOPNOTSUPP);	/* Needs translation */
886 		}
887 		/*
888 		 * Only root, or the user that did the original mount is
889 		 * permitted to update it.
890 		 */
891 		if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
892 			error = suser(td);
893 			if (error) {
894 				vput(vp);
895 				return (error);
896 			}
897 		}
898 		if (vfs_busy(mp, LK_NOWAIT, 0, td)) {
899 			vput(vp);
900 			return (EBUSY);
901 		}
902 		mtx_lock(&vp->v_interlock);
903 		if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) {
904 			mtx_unlock(&vp->v_interlock);
905 			vfs_unbusy(mp, td);
906 			vput(vp);
907 			return (EBUSY);
908 		}
909 		vp->v_flag |= VMOUNT;
910 		mtx_unlock(&vp->v_interlock);
911 		mp->mnt_flag |= fsflags &
912 		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
913 		VOP_UNLOCK(vp, 0, td);
914 		goto update;
915 	}
916 	/*
917 	 * If the user is not root, ensure that they own the directory
918 	 * onto which we are attempting to mount.
919 	 */
920 	error = VOP_GETATTR(vp, &va, td->td_ucred, td);
921 	if (error) {
922 		vput(vp);
923 		return (error);
924 	}
925 	if (va.va_uid != td->td_ucred->cr_uid) {
926 		error = suser(td);
927 		if (error) {
928 			vput(vp);
929 			return (error);
930 		}
931 	}
932 	if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) {
933 		vput(vp);
934 		return (error);
935 	}
936 	if (vp->v_type != VDIR) {
937 		vput(vp);
938 		return (ENOTDIR);
939 	}
940 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
941 		if (!strcmp(vfsp->vfc_name, fstype))
942 			break;
943 	if (vfsp == NULL) {
944 		/* Only load modules for root (very important!). */
945 		error = suser(td);
946 		if (error) {
947 			vput(vp);
948 			return (error);
949 		}
950 		error = securelevel_gt(td->td_ucred, 0);
951 		if (error) {
952 			vput(vp);
953 			return (error);
954 		}
955 		error = linker_load_file(fstype, &lf);
956 		if (error || lf == NULL) {
957 			vput(vp);
958 			if (lf == NULL)
959 				error = ENODEV;
960 			return (error);
961 		}
962 		lf->userrefs++;
963 		/* Look up again to see if the VFS was loaded. */
964 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
965 			if (!strcmp(vfsp->vfc_name, fstype))
966 				break;
967 		if (vfsp == NULL) {
968 			lf->userrefs--;
969 			linker_file_unload(lf);
970 			vput(vp);
971 			return (ENODEV);
972 		}
973 	}
974 	mtx_lock(&vp->v_interlock);
975 	if ((vp->v_flag & VMOUNT) != 0 ||
976 	    vp->v_mountedhere != NULL) {
977 		mtx_unlock(&vp->v_interlock);
978 		vput(vp);
979 		return (EBUSY);
980 	}
981 	vp->v_flag |= VMOUNT;
982 	mtx_unlock(&vp->v_interlock);
983 
984 	/*
985 	 * Allocate and initialize the filesystem.
986 	 */
987 	mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
988 	TAILQ_INIT(&mp->mnt_nvnodelist);
989 	TAILQ_INIT(&mp->mnt_reservedvnlist);
990 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
991 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
992 	mp->mnt_op = vfsp->vfc_vfsops;
993 	mp->mnt_vfc = vfsp;
994 	vfsp->vfc_refcount++;
995 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
996 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
997 	strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN);
998 	mp->mnt_vnodecovered = vp;
999 	mp->mnt_stat.f_owner = td->td_ucred->cr_uid;
1000 	strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
1001 	mp->mnt_iosize_max = DFLTPHYS;
1002 	VOP_UNLOCK(vp, 0, td);
1003 update:
1004 	/*
1005 	 * Check if the fs implements the old VFS_MOUNT()
1006 	 * function, since the old system call was used.
1007 	 */
1008 	if (mp->mnt_op->vfs_mount == NULL) {
1009 		printf("%s doesn't support the old mount syscall\n",
1010 		    mp->mnt_vfc->vfc_name);
1011 		mtx_lock(&vp->v_interlock);
1012 		vp->v_flag &= ~VMOUNT;
1013 		mtx_unlock(&vp->v_interlock);
1014 		if (mp->mnt_flag & MNT_UPDATE)
1015 			vfs_unbusy(mp, td);
1016 		else {
1017 			mp->mnt_vfc->vfc_refcount--;
1018 			vfs_unbusy(mp, td);
1019 			free(mp, M_MOUNT);
1020 		}
1021 		vrele(vp);
1022 		return (EOPNOTSUPP);
1023 	}
1024 
1025 	/*
1026 	 * Set the mount level flags.
1027 	 */
1028 	if (fsflags & MNT_RDONLY)
1029 		mp->mnt_flag |= MNT_RDONLY;
1030 	else if (mp->mnt_flag & MNT_RDONLY)
1031 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
1032 	mp->mnt_flag &=~ MNT_UPDATEMASK;
1033 	mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE);
1034 	/*
1035 	 * Mount the filesystem.
1036 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
1037 	 * get.  No freeing of cn_pnbuf.
1038 	 */
1039 	error = VFS_MOUNT(mp, fspath, fsdata, &nd, td);
1040 	if (mp->mnt_flag & MNT_UPDATE) {
1041 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
1042 			mp->mnt_flag &= ~MNT_RDONLY;
1043 		mp->mnt_flag &=~
1044 		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
1045 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
1046 		if (error) {
1047 			mp->mnt_flag = flag;
1048 			mp->mnt_kern_flag = kern_flag;
1049 		}
1050 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
1051 			if (mp->mnt_syncer == NULL)
1052 				error = vfs_allocate_syncvnode(mp);
1053 		} else {
1054 			if (mp->mnt_syncer != NULL)
1055 				vput(mp->mnt_syncer);
1056 			mp->mnt_syncer = NULL;
1057 		}
1058 		vfs_unbusy(mp, td);
1059 		mtx_lock(&vp->v_interlock);
1060 		vp->v_flag &= ~VMOUNT;
1061 		mtx_unlock(&vp->v_interlock);
1062 		vrele(vp);
1063 		return (error);
1064 	}
1065 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1066 	/*
1067 	 * Put the new filesystem on the mount list after root.
1068 	 */
1069 	cache_purge(vp);
1070 	if (!error) {
1071 		struct vnode *newdp;
1072 
1073 		mtx_lock(&vp->v_interlock);
1074 		vp->v_flag &= ~VMOUNT;
1075 		vp->v_mountedhere = mp;
1076 		mtx_unlock(&vp->v_interlock);
1077 		mtx_lock(&mountlist_mtx);
1078 		TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1079 		mtx_unlock(&mountlist_mtx);
1080 		if (VFS_ROOT(mp, &newdp))
1081 			panic("mount: lost mount");
1082 		checkdirs(vp, newdp);
1083 		vput(newdp);
1084 		VOP_UNLOCK(vp, 0, td);
1085 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
1086 			error = vfs_allocate_syncvnode(mp);
1087 		vfs_unbusy(mp, td);
1088 		if ((error = VFS_START(mp, 0, td)) != 0)
1089 			vrele(vp);
1090 	} else {
1091 		mtx_lock(&vp->v_interlock);
1092 		vp->v_flag &= ~VMOUNT;
1093 		mtx_unlock(&vp->v_interlock);
1094 		mp->mnt_vfc->vfc_refcount--;
1095 		vfs_unbusy(mp, td);
1096 		free(mp, M_MOUNT);
1097 		vput(vp);
1098 	}
1099 	return (error);
1100 }
1101 
1102 /*
1103  * Scan all active processes to see if any of them have a current
1104  * or root directory of `olddp'. If so, replace them with the new
1105  * mount point.
1106  */
1107 static void
1108 checkdirs(olddp, newdp)
1109 	struct vnode *olddp, *newdp;
1110 {
1111 	struct filedesc *fdp;
1112 	struct proc *p;
1113 	int nrele;
1114 
1115 	if (olddp->v_usecount == 1)
1116 		return;
1117 	sx_slock(&allproc_lock);
1118 	LIST_FOREACH(p, &allproc, p_list) {
1119 		PROC_LOCK(p);
1120 		fdp = p->p_fd;
1121 		if (fdp == NULL) {
1122 			PROC_UNLOCK(p);
1123 			continue;
1124 		}
1125 		nrele = 0;
1126 		FILEDESC_LOCK(fdp);
1127 		if (fdp->fd_cdir == olddp) {
1128 			VREF(newdp);
1129 			fdp->fd_cdir = newdp;
1130 			nrele++;
1131 		}
1132 		if (fdp->fd_rdir == olddp) {
1133 			VREF(newdp);
1134 			fdp->fd_rdir = newdp;
1135 			nrele++;
1136 		}
1137 		FILEDESC_UNLOCK(fdp);
1138 		PROC_UNLOCK(p);
1139 		while (nrele--)
1140 			vrele(olddp);
1141 	}
1142 	sx_sunlock(&allproc_lock);
1143 	if (rootvnode == olddp) {
1144 		vrele(rootvnode);
1145 		VREF(newdp);
1146 		rootvnode = newdp;
1147 	}
1148 }
1149 
1150 /*
1151  * Unmount a filesystem.
1152  *
1153  * Note: unmount takes a path to the vnode mounted on as argument,
1154  * not special file (as before).
1155  */
1156 #ifndef _SYS_SYSPROTO_H_
1157 struct unmount_args {
1158 	char	*path;
1159 	int	flags;
1160 };
1161 #endif
1162 /* ARGSUSED */
1163 int
1164 unmount(td, uap)
1165 	struct thread *td;
1166 	register struct unmount_args /* {
1167 		syscallarg(char *) path;
1168 		syscallarg(int) flags;
1169 	} */ *uap;
1170 {
1171 	register struct vnode *vp;
1172 	struct mount *mp;
1173 	int error;
1174 	struct nameidata nd;
1175 
1176 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1177 	    SCARG(uap, path), td);
1178 	if ((error = namei(&nd)) != 0)
1179 		return (error);
1180 	vp = nd.ni_vp;
1181 	NDFREE(&nd, NDF_ONLY_PNBUF);
1182 	mp = vp->v_mount;
1183 
1184 	/*
1185 	 * Only root, or the user that did the original mount is
1186 	 * permitted to unmount this filesystem.
1187 	 */
1188 	if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) {
1189 		error = suser(td);
1190 		if (error) {
1191 			vput(vp);
1192 			return (error);
1193 		}
1194 	}
1195 
1196 	/*
1197 	 * Don't allow unmounting the root filesystem.
1198 	 */
1199 	if (mp->mnt_flag & MNT_ROOTFS) {
1200 		vput(vp);
1201 		return (EINVAL);
1202 	}
1203 
1204 	/*
1205 	 * Must be the root of the filesystem
1206 	 */
1207 	if ((vp->v_flag & VROOT) == 0) {
1208 		vput(vp);
1209 		return (EINVAL);
1210 	}
1211 	vput(vp);
1212 	return (dounmount(mp, SCARG(uap, flags), td));
1213 }
1214 
1215 /*
1216  * Do the actual filesystem unmount.
1217  */
1218 int
1219 dounmount(mp, flags, td)
1220 	struct mount *mp;
1221 	int flags;
1222 	struct thread *td;
1223 {
1224 	struct vnode *coveredvp, *fsrootvp;
1225 	int error;
1226 	int async_flag;
1227 
1228 	mtx_lock(&mountlist_mtx);
1229 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1230 		mtx_unlock(&mountlist_mtx);
1231 		return (EBUSY);
1232 	}
1233 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
1234 	/* Allow filesystems to detect that a forced unmount is in progress. */
1235 	if (flags & MNT_FORCE)
1236 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
1237 	error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK |
1238 	    ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td);
1239 	if (error) {
1240 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1241 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1242 			wakeup(mp);
1243 		return (error);
1244 	}
1245 	vn_start_write(NULL, &mp, V_WAIT);
1246 
1247 	if (mp->mnt_flag & MNT_EXPUBLIC)
1248 		vfs_setpublicfs(NULL, NULL, NULL);
1249 
1250 	vfs_msync(mp, MNT_WAIT);
1251 	async_flag = mp->mnt_flag & MNT_ASYNC;
1252 	mp->mnt_flag &=~ MNT_ASYNC;
1253 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
1254 	if (mp->mnt_syncer != NULL)
1255 		vput(mp->mnt_syncer);
1256 	/* Move process cdir/rdir refs on fs root to underlying vnode. */
1257 	if (VFS_ROOT(mp, &fsrootvp) == 0) {
1258 		if (mp->mnt_vnodecovered != NULL)
1259 			checkdirs(fsrootvp, mp->mnt_vnodecovered);
1260 		if (fsrootvp == rootvnode) {
1261 			vrele(rootvnode);
1262 			rootvnode = NULL;
1263 		}
1264 		vput(fsrootvp);
1265 	}
1266 	if (((mp->mnt_flag & MNT_RDONLY) ||
1267 	     (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) ||
1268 	    (flags & MNT_FORCE)) {
1269 		error = VFS_UNMOUNT(mp, flags, td);
1270 	}
1271 	vn_finished_write(mp);
1272 	if (error) {
1273 		/* Undo cdir/rdir and rootvnode changes made above. */
1274 		if (VFS_ROOT(mp, &fsrootvp) == 0) {
1275 			if (mp->mnt_vnodecovered != NULL)
1276 				checkdirs(mp->mnt_vnodecovered, fsrootvp);
1277 			if (rootvnode == NULL) {
1278 				rootvnode = fsrootvp;
1279 				vref(rootvnode);
1280 			}
1281 			vput(fsrootvp);
1282 		}
1283 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
1284 			(void) vfs_allocate_syncvnode(mp);
1285 		mtx_lock(&mountlist_mtx);
1286 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
1287 		mp->mnt_flag |= async_flag;
1288 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK,
1289 		    &mountlist_mtx, td);
1290 		if (mp->mnt_kern_flag & MNTK_MWAIT)
1291 			wakeup(mp);
1292 		return (error);
1293 	}
1294 	mtx_lock(&mountlist_mtx);
1295 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
1296 	if ((coveredvp = mp->mnt_vnodecovered) != NULL)
1297 		coveredvp->v_mountedhere = NULL;
1298 	mp->mnt_vfc->vfc_refcount--;
1299 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist))
1300 		panic("unmount: dangling vnode");
1301 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td);
1302 	lockdestroy(&mp->mnt_lock);
1303 	if (coveredvp != NULL)
1304 		vrele(coveredvp);
1305 	if (mp->mnt_kern_flag & MNTK_MWAIT)
1306 		wakeup(mp);
1307 	if (mp->mnt_op->vfs_mount == NULL)
1308 		vfs_freeopts(mp->mnt_opt);
1309 	free(mp, M_MOUNT);
1310 	return (0);
1311 }
1312 
1313 /*
1314  * Lookup a filesystem type, and if found allocate and initialize
1315  * a mount structure for it.
1316  *
1317  * Devname is usually updated by mount(8) after booting.
1318  */
1319 int
1320 vfs_rootmountalloc(fstypename, devname, mpp)
1321 	char *fstypename;
1322 	char *devname;
1323 	struct mount **mpp;
1324 {
1325 	struct thread *td = curthread;	/* XXX */
1326 	struct vfsconf *vfsp;
1327 	struct mount *mp;
1328 
1329 	if (fstypename == NULL)
1330 		return (ENODEV);
1331 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1332 		if (!strcmp(vfsp->vfc_name, fstypename))
1333 			break;
1334 	if (vfsp == NULL)
1335 		return (ENODEV);
1336 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
1337 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
1338 	(void)vfs_busy(mp, LK_NOWAIT, 0, td);
1339 	TAILQ_INIT(&mp->mnt_nvnodelist);
1340 	TAILQ_INIT(&mp->mnt_reservedvnlist);
1341 	mp->mnt_vfc = vfsp;
1342 	mp->mnt_op = vfsp->vfc_vfsops;
1343 	mp->mnt_flag = MNT_RDONLY;
1344 	mp->mnt_vnodecovered = NULLVP;
1345 	vfsp->vfc_refcount++;
1346 	mp->mnt_iosize_max = DFLTPHYS;
1347 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
1348 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
1349 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
1350 	mp->mnt_stat.f_mntonname[0] = '/';
1351 	mp->mnt_stat.f_mntonname[1] = 0;
1352 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
1353 	*mpp = mp;
1354 	return (0);
1355 }
1356 
1357 /*
1358  * Find and mount the root filesystem
1359  */
1360 void
1361 vfs_mountroot(void)
1362 {
1363 	char		*cp;
1364 	int		i, error;
1365 
1366 	/*
1367 	 * The root filesystem information is compiled in, and we are
1368 	 * booted with instructions to use it.
1369 	 */
1370 #ifdef ROOTDEVNAME
1371 	if ((boothowto & RB_DFLTROOT) &&
1372 	    !vfs_mountroot_try(ROOTDEVNAME))
1373 		return;
1374 #endif
1375 	/*
1376 	 * We are booted with instructions to prompt for the root filesystem,
1377 	 * or to use the compiled-in default when it doesn't exist.
1378 	 */
1379 	if (boothowto & (RB_DFLTROOT | RB_ASKNAME)) {
1380 		if (!vfs_mountroot_ask())
1381 			return;
1382 	}
1383 
1384 	/*
1385 	 * We've been given the generic "use CDROM as root" flag.  This is
1386 	 * necessary because one media may be used in many different
1387 	 * devices, so we need to search for them.
1388 	 */
1389 	if (boothowto & RB_CDROM) {
1390 		for (i = 0; cdrom_rootdevnames[i] != NULL; i++) {
1391 			if (!vfs_mountroot_try(cdrom_rootdevnames[i]))
1392 				return;
1393 		}
1394 	}
1395 
1396 	/*
1397 	 * Try to use the value read by the loader from /etc/fstab, or
1398 	 * supplied via some other means.  This is the preferred
1399 	 * mechanism.
1400 	 */
1401 	if ((cp = getenv("vfs.root.mountfrom")) != NULL) {
1402 		error = vfs_mountroot_try(cp);
1403 		freeenv(cp);
1404 		if (!error)
1405 			return;
1406 	}
1407 
1408 	/*
1409 	 * Try values that may have been computed by the machine-dependant
1410 	 * legacy code.
1411 	 */
1412 	if (!vfs_mountroot_try(rootdevnames[0]))
1413 		return;
1414 	if (!vfs_mountroot_try(rootdevnames[1]))
1415 		return;
1416 
1417 	/*
1418 	 * If we have a compiled-in default, and haven't already tried it, try
1419 	 * it now.
1420 	 */
1421 #ifdef ROOTDEVNAME
1422 	if (!(boothowto & RB_DFLTROOT))
1423 		if (!vfs_mountroot_try(ROOTDEVNAME))
1424 			return;
1425 #endif
1426 
1427 	/*
1428 	 * Everything so far has failed, prompt on the console if we haven't
1429 	 * already tried that.
1430 	 */
1431 	if (!(boothowto & (RB_DFLTROOT | RB_ASKNAME)) && !vfs_mountroot_ask())
1432 		return;
1433 	panic("Root mount failed, startup aborted.");
1434 }
1435 
1436 /*
1437  * Mount (mountfrom) as the root filesystem.
1438  */
1439 static int
1440 vfs_mountroot_try(char *mountfrom)
1441 {
1442         struct mount	*mp;
1443 	char		*vfsname, *path;
1444 	int		error;
1445 	char		patt[32];
1446 	int		s;
1447 
1448 	vfsname = NULL;
1449 	path    = NULL;
1450 	mp      = NULL;
1451 	error   = EINVAL;
1452 
1453 	if (mountfrom == NULL)
1454 		return(error);		/* don't complain */
1455 
1456 	s = splcam();			/* Overkill, but annoying without it */
1457 	printf("Mounting root from %s\n", mountfrom);
1458 	splx(s);
1459 
1460 	/* parse vfs name and path */
1461 	vfsname = malloc(MFSNAMELEN, M_MOUNT, M_WAITOK);
1462 	path = malloc(MNAMELEN, M_MOUNT, M_WAITOK);
1463 	vfsname[0] = path[0] = 0;
1464 	sprintf(patt, "%%%d[a-z0-9]:%%%ds", MFSNAMELEN, MNAMELEN);
1465 	if (sscanf(mountfrom, patt, vfsname, path) < 1)
1466 		goto done;
1467 
1468 	/* allocate a root mount */
1469 	error = vfs_rootmountalloc(vfsname, path[0] != 0 ? path : ROOTNAME,
1470 				   &mp);
1471 	if (error != 0) {
1472 		printf("Can't allocate root mount for filesystem '%s': %d\n",
1473 		       vfsname, error);
1474 		goto done;
1475 	}
1476 	mp->mnt_flag |= MNT_ROOTFS;
1477 
1478 	/* do our best to set rootdev */
1479 	if ((path[0] != 0) && setrootbyname(path))
1480 		printf("setrootbyname failed\n");
1481 
1482 	/* If the root device is a type "memory disk", mount RW */
1483 	if (rootdev != NODEV && devsw(rootdev) &&
1484 	    (devsw(rootdev)->d_flags & D_MEMDISK))
1485 		mp->mnt_flag &= ~MNT_RDONLY;
1486 
1487 	/*
1488 	 * Set the mount path to be something useful, because the
1489 	 * filesystem code isn't responsible now for initialising
1490 	 * f_mntonname unless they want to override the default
1491 	 * (which is `path'.)
1492 	 */
1493 	strncpy(mp->mnt_stat.f_mntonname, "/", MNAMELEN);
1494 
1495 	error = VFS_MOUNT(mp, NULL, NULL, NULL, curthread);
1496 
1497 done:
1498 	if (vfsname != NULL)
1499 		free(vfsname, M_MOUNT);
1500 	if (path != NULL)
1501 		free(path, M_MOUNT);
1502 	if (error != 0) {
1503 		if (mp != NULL) {
1504 			vfs_unbusy(mp, curthread);
1505 			free(mp, M_MOUNT);
1506 		}
1507 		printf("Root mount failed: %d\n", error);
1508 	} else {
1509 
1510 		/* register with list of mounted filesystems */
1511 		mtx_lock(&mountlist_mtx);
1512 		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
1513 		mtx_unlock(&mountlist_mtx);
1514 
1515 		/* sanity check system clock against root fs timestamp */
1516 		inittodr(mp->mnt_time);
1517 		vfs_unbusy(mp, curthread);
1518 		error = VFS_START(mp, 0, curthread);
1519 	}
1520 	return(error);
1521 }
1522 
1523 /*
1524  * Spin prompting on the console for a suitable root filesystem
1525  */
1526 static int
1527 vfs_mountroot_ask(void)
1528 {
1529 	char name[128];
1530 	int i;
1531 	dev_t dev;
1532 
1533 	for(;;) {
1534 		printf("\nManual root filesystem specification:\n");
1535 		printf("  <fstype>:<device>  Mount <device> using filesystem <fstype>\n");
1536 #if defined(__i386__) || defined(__ia64__)
1537 		printf("                       eg. ufs:da0s1a\n");
1538 #else
1539 		printf("                       eg. ufs:da0a\n");
1540 #endif
1541 		printf("  ?                  List valid disk boot devices\n");
1542 		printf("  <empty line>       Abort manual input\n");
1543 		printf("\nmountroot> ");
1544 		gets(name);
1545 		if (name[0] == 0)
1546 			return(1);
1547 		if (name[0] == '?') {
1548 			printf("Possibly valid devices for 'ufs' root:\n");
1549 			for (i = 0; i < NUMCDEVSW; i++) {
1550 				dev = makedev(i, 0);
1551 				if (devsw(dev) != NULL)
1552 					printf(" \"%s\"", devsw(dev)->d_name);
1553 			}
1554 			printf("\n");
1555 			continue;
1556 		}
1557 		if (!vfs_mountroot_try(name))
1558 			return(0);
1559 	}
1560 }
1561 
1562 /*
1563  * Local helper function for vfs_mountroot_ask.
1564  */
1565 static void
1566 gets(char *cp)
1567 {
1568 	char *lp;
1569 	int c;
1570 
1571 	lp = cp;
1572 	for (;;) {
1573 		printf("%c", c = cngetc() & 0177);
1574 		switch (c) {
1575 		case -1:
1576 		case '\n':
1577 		case '\r':
1578 			*lp++ = '\0';
1579 			return;
1580 		case '\b':
1581 		case '\177':
1582 			if (lp > cp) {
1583 				printf(" \b");
1584 				lp--;
1585 			}
1586 			continue;
1587 		case '#':
1588 			lp--;
1589 			if (lp < cp)
1590 				lp = cp;
1591 			continue;
1592 		case '@':
1593 		case 'u' & 037:
1594 			lp = cp;
1595 			printf("%c", '\n');
1596 			continue;
1597 		default:
1598 			*lp++ = c;
1599 		}
1600 	}
1601 }
1602 
1603 /*
1604  * Convert a given name to the dev_t of the disk-like device
1605  * it refers to.
1606  */
1607 dev_t
1608 getdiskbyname(char *name) {
1609 	char *cp;
1610 	dev_t dev;
1611 
1612 	cp = name;
1613 	if (!bcmp(cp, "/dev/", 5))
1614 		cp += 5;
1615 
1616 	dev = NODEV;
1617 	EVENTHANDLER_INVOKE(dev_clone, cp, strlen(cp), &dev);
1618 	return (dev);
1619 }
1620 
1621 /*
1622  * Set rootdev to match (name), given that we expect it to
1623  * refer to a disk-like device.
1624  */
1625 static int
1626 setrootbyname(char *name)
1627 {
1628 	dev_t diskdev;
1629 
1630 	diskdev = getdiskbyname(name);
1631 	if (diskdev != NODEV) {
1632 		rootdev = diskdev;
1633 		return (0);
1634 	}
1635 
1636 	return (1);
1637 }
1638 
1639 /* Show the dev_t for a disk specified by name */
1640 #ifdef DDB
1641 DB_SHOW_COMMAND(disk, db_getdiskbyname)
1642 {
1643 	dev_t dev;
1644 
1645 	if (modif[0] == '\0') {
1646 		db_error("usage: show disk/devicename");
1647 		return;
1648 	}
1649 	dev = getdiskbyname(modif);
1650 	if (dev != NODEV)
1651 		db_printf("dev_t = %p\n", dev);
1652 	else
1653 		db_printf("No disk device matched.\n");
1654 }
1655 #endif
1656 
1657 /*
1658  * Get a mount option by its name.
1659  *
1660  * Return 0 if the option was found, ENOENT otherwise.
1661  * If len is non-NULL it will be filled with the length
1662  * of the option. If buf is non-NULL, it will be filled
1663  * with the address of the option.
1664  */
1665 int
1666 vfs_getopt(opts, name, buf, len)
1667 	struct vfsoptlist *opts;
1668 	const char *name;
1669 	void **buf;
1670 	int *len;
1671 {
1672 	struct vfsopt *opt;
1673 
1674 	TAILQ_FOREACH(opt, opts, link) {
1675 		if (strcmp(name, opt->name) == 0) {
1676 			if (len != NULL)
1677 				*len = opt->len;
1678 			if (buf != NULL)
1679 				*buf = opt->value;
1680 			return (0);
1681 		}
1682 	}
1683 	return (ENOENT);
1684 }
1685 
1686 /*
1687  * Find and copy a mount option.
1688  *
1689  * The size of the buffer has to be specified
1690  * in len, if it is not the same length as the
1691  * mount option, EINVAL is returned.
1692  * Returns ENOENT if the option is not found.
1693  */
1694 int
1695 vfs_copyopt(opts, name, dest, len)
1696 	struct vfsoptlist *opts;
1697 	const char *name;
1698 	void *dest;
1699 	int len;
1700 {
1701 	struct vfsopt *opt;
1702 
1703 	TAILQ_FOREACH(opt, opts, link) {
1704 		if (strcmp(name, opt->name) == 0) {
1705 			if (len != opt->len)
1706 				return (EINVAL);
1707 			bcopy(opt->value, dest, opt->len);
1708 			return (0);
1709 		}
1710 	}
1711 	return (ENOENT);
1712 }
1713