xref: /freebsd/sys/kern/vfs_mountroot.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 2010 Marcel Moolenaar
3  * Copyright (c) 1999-2004 Poul-Henning Kamp
4  * Copyright (c) 1999 Michael Smith
5  * Copyright (c) 1989, 1993
6  *      The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include "opt_rootdevname.h"
39 
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/param.h>
44 #include <sys/conf.h>
45 #include <sys/fcntl.h>
46 #include <sys/jail.h>
47 #include <sys/kernel.h>
48 #include <sys/libkern.h>
49 #include <sys/malloc.h>
50 #include <sys/mdioctl.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/namei.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/filedesc.h>
57 #include <sys/reboot.h>
58 #include <sys/stat.h>
59 #include <sys/syscallsubr.h>
60 #include <sys/sysproto.h>
61 #include <sys/sx.h>
62 #include <sys/sysctl.h>
63 #include <sys/sysent.h>
64 #include <sys/systm.h>
65 #include <sys/vnode.h>
66 
67 #include <geom/geom.h>
68 
69 /*
70  * The root filesystem is detailed in the kernel environment variable
71  * vfs.root.mountfrom, which is expected to be in the general format
72  *
73  * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
74  * vfsname   := the name of a VFS known to the kernel and capable
75  *              of being mounted as root
76  * path      := disk device name or other data used by the filesystem
77  *              to locate its physical store
78  *
79  * If the environment variable vfs.root.mountfrom is a space separated list,
80  * each list element is tried in turn and the root filesystem will be mounted
81  * from the first one that suceeds.
82  *
83  * The environment variable vfs.root.mountfrom.options is a comma delimited
84  * set of string mount options.  These mount options must be parseable
85  * by nmount() in the kernel.
86  */
87 
88 static int parse_mount(char **);
89 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
90 
91 /*
92  * The vnode of the system's root (/ in the filesystem, without chroot
93  * active.)
94  */
95 struct vnode *rootvnode;
96 
97 char *rootdevnames[2] = {NULL, NULL};
98 
99 struct root_hold_token {
100 	const char			*who;
101 	LIST_ENTRY(root_hold_token)	list;
102 };
103 
104 static LIST_HEAD(, root_hold_token)	root_holds =
105     LIST_HEAD_INITIALIZER(root_holds);
106 
107 enum action {
108 	A_CONTINUE,
109 	A_PANIC,
110 	A_REBOOT,
111 	A_RETRY
112 };
113 
114 static enum action root_mount_onfail = A_CONTINUE;
115 
116 static int root_mount_mddev;
117 static int root_mount_complete;
118 
119 /* By default wait up to 3 seconds for devices to appear. */
120 static int root_mount_timeout = 3;
121 
122 struct root_hold_token *
123 root_mount_hold(const char *identifier)
124 {
125 	struct root_hold_token *h;
126 
127 	if (root_mounted())
128 		return (NULL);
129 
130 	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
131 	h->who = identifier;
132 	mtx_lock(&mountlist_mtx);
133 	LIST_INSERT_HEAD(&root_holds, h, list);
134 	mtx_unlock(&mountlist_mtx);
135 	return (h);
136 }
137 
138 void
139 root_mount_rel(struct root_hold_token *h)
140 {
141 
142 	if (h == NULL)
143 		return;
144 	mtx_lock(&mountlist_mtx);
145 	LIST_REMOVE(h, list);
146 	wakeup(&root_holds);
147 	mtx_unlock(&mountlist_mtx);
148 	free(h, M_DEVBUF);
149 }
150 
151 int
152 root_mounted(void)
153 {
154 
155 	/* No mutex is acquired here because int stores are atomic. */
156 	return (root_mount_complete);
157 }
158 
159 void
160 root_mount_wait(void)
161 {
162 
163 	/*
164 	 * Panic on an obvious deadlock - the function can't be called from
165 	 * a thread which is doing the whole SYSINIT stuff.
166 	 */
167 	KASSERT(curthread->td_proc->p_pid != 0,
168 	    ("root_mount_wait: cannot be called from the swapper thread"));
169 	mtx_lock(&mountlist_mtx);
170 	while (!root_mount_complete) {
171 		msleep(&root_mount_complete, &mountlist_mtx, PZERO, "rootwait",
172 		    hz);
173 	}
174 	mtx_unlock(&mountlist_mtx);
175 }
176 
177 static void
178 set_rootvnode(void)
179 {
180 	struct proc *p;
181 
182 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
183 		panic("Cannot find root vnode");
184 
185 	VOP_UNLOCK(rootvnode, 0);
186 
187 	p = curthread->td_proc;
188 	FILEDESC_XLOCK(p->p_fd);
189 
190 	if (p->p_fd->fd_cdir != NULL)
191 		vrele(p->p_fd->fd_cdir);
192 	p->p_fd->fd_cdir = rootvnode;
193 	VREF(rootvnode);
194 
195 	if (p->p_fd->fd_rdir != NULL)
196 		vrele(p->p_fd->fd_rdir);
197 	p->p_fd->fd_rdir = rootvnode;
198 	VREF(rootvnode);
199 
200 	FILEDESC_XUNLOCK(p->p_fd);
201 
202 	EVENTHANDLER_INVOKE(mountroot);
203 }
204 
205 static int
206 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
207 {
208 	struct vfsoptlist *opts;
209 	struct vfsconf *vfsp;
210 	struct mount *mp;
211 	int error;
212 
213 	*mpp = NULL;
214 
215 	vfsp = vfs_byname("devfs");
216 	KASSERT(vfsp != NULL, ("Could not find devfs by name"));
217 	if (vfsp == NULL)
218 		return (ENOENT);
219 
220 	mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
221 
222 	error = VFS_MOUNT(mp);
223 	KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
224 	if (error)
225 		return (error);
226 
227 	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
228 	TAILQ_INIT(opts);
229 	mp->mnt_opt = opts;
230 
231 	mtx_lock(&mountlist_mtx);
232 	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
233 	mtx_unlock(&mountlist_mtx);
234 
235 	*mpp = mp;
236 	set_rootvnode();
237 
238 	error = kern_symlink(td, "/", "dev", UIO_SYSSPACE);
239 	if (error)
240 		printf("kern_symlink /dev -> / returns %d\n", error);
241 
242 	return (error);
243 }
244 
245 static int
246 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
247 {
248 	struct nameidata nd;
249 	struct mount *mporoot, *mpnroot;
250 	struct vnode *vp, *vporoot, *vpdevfs;
251 	char *fspath;
252 	int error;
253 
254 	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
255 
256 	/* Shuffle the mountlist. */
257 	mtx_lock(&mountlist_mtx);
258 	mporoot = TAILQ_FIRST(&mountlist);
259 	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
260 	if (mporoot != mpdevfs) {
261 		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
262 		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
263 	}
264 	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
265 	mtx_unlock(&mountlist_mtx);
266 
267 	cache_purgevfs(mporoot);
268 	if (mporoot != mpdevfs)
269 		cache_purgevfs(mpdevfs);
270 
271 	VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);
272 
273 	VI_LOCK(vporoot);
274 	vporoot->v_iflag &= ~VI_MOUNT;
275 	VI_UNLOCK(vporoot);
276 	vporoot->v_mountedhere = NULL;
277 	mporoot->mnt_flag &= ~MNT_ROOTFS;
278 	mporoot->mnt_vnodecovered = NULL;
279 	vput(vporoot);
280 
281 	/* Set up the new rootvnode, and purge the cache */
282 	mpnroot->mnt_vnodecovered = NULL;
283 	set_rootvnode();
284 	cache_purgevfs(rootvnode->v_mount);
285 
286 	if (mporoot != mpdevfs) {
287 		/* Remount old root under /.mount or /mnt */
288 		fspath = "/.mount";
289 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
290 		    fspath, td);
291 		error = namei(&nd);
292 		if (error) {
293 			NDFREE(&nd, NDF_ONLY_PNBUF);
294 			fspath = "/mnt";
295 			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
296 			    fspath, td);
297 			error = namei(&nd);
298 		}
299 		if (!error) {
300 			vp = nd.ni_vp;
301 			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
302 			if (!error)
303 				error = vinvalbuf(vp, V_SAVE, 0, 0);
304 			if (!error) {
305 				cache_purge(vp);
306 				mporoot->mnt_vnodecovered = vp;
307 				vp->v_mountedhere = mporoot;
308 				strlcpy(mporoot->mnt_stat.f_mntonname,
309 				    fspath, MNAMELEN);
310 				VOP_UNLOCK(vp, 0);
311 			} else
312 				vput(vp);
313 		}
314 		NDFREE(&nd, NDF_ONLY_PNBUF);
315 
316 		if (error && bootverbose)
317 			printf("mountroot: unable to remount previous root "
318 			    "under /.mount or /mnt (error %d).\n", error);
319 	}
320 
321 	/* Remount devfs under /dev */
322 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
323 	error = namei(&nd);
324 	if (!error) {
325 		vp = nd.ni_vp;
326 		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
327 		if (!error)
328 			error = vinvalbuf(vp, V_SAVE, 0, 0);
329 		if (!error) {
330 			vpdevfs = mpdevfs->mnt_vnodecovered;
331 			if (vpdevfs != NULL) {
332 				cache_purge(vpdevfs);
333 				vpdevfs->v_mountedhere = NULL;
334 				vrele(vpdevfs);
335 			}
336 			mpdevfs->mnt_vnodecovered = vp;
337 			vp->v_mountedhere = mpdevfs;
338 			VOP_UNLOCK(vp, 0);
339 		} else
340 			vput(vp);
341 	}
342 	if (error && bootverbose)
343 		printf("mountroot: unable to remount devfs under /dev "
344 		    "(error %d).\n", error);
345 	NDFREE(&nd, NDF_ONLY_PNBUF);
346 
347 	if (mporoot == mpdevfs) {
348 		vfs_unbusy(mpdevfs);
349 		/* Unlink the no longer needed /dev/dev -> / symlink */
350 		error = kern_unlink(td, "/dev/dev", UIO_SYSSPACE);
351 		if (error && bootverbose)
352 			printf("mountroot: unable to unlink /dev/dev "
353 			    "(error %d)\n", error);
354 	}
355 
356 	return (0);
357 }
358 
359 /*
360  * Configuration parser.
361  */
362 
363 /* Parser character classes. */
364 #define	CC_WHITESPACE		-1
365 #define	CC_NONWHITESPACE	-2
366 
367 /* Parse errors. */
368 #define	PE_EOF			-1
369 #define	PE_EOL			-2
370 
371 static __inline int
372 parse_peek(char **conf)
373 {
374 
375 	return (**conf);
376 }
377 
378 static __inline void
379 parse_poke(char **conf, int c)
380 {
381 
382 	**conf = c;
383 }
384 
385 static __inline void
386 parse_advance(char **conf)
387 {
388 
389 	(*conf)++;
390 }
391 
392 static __inline int
393 parse_isspace(int c)
394 {
395 
396 	return ((c == ' ' || c == '\t' || c == '\n') ? 1 : 0);
397 }
398 
399 static int
400 parse_skipto(char **conf, int mc)
401 {
402 	int c, match;
403 
404 	while (1) {
405 		c = parse_peek(conf);
406 		if (c == 0)
407 			return (PE_EOF);
408 		switch (mc) {
409 		case CC_WHITESPACE:
410 			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
411 			break;
412 		case CC_NONWHITESPACE:
413 			if (c == '\n')
414 				return (PE_EOL);
415 			match = (c != ' ' && c != '\t') ? 1 : 0;
416 			break;
417 		default:
418 			match = (c == mc) ? 1 : 0;
419 			break;
420 		}
421 		if (match)
422 			break;
423 		parse_advance(conf);
424 	}
425 	return (0);
426 }
427 
428 static int
429 parse_token(char **conf, char **tok)
430 {
431 	char *p;
432 	size_t len;
433 	int error;
434 
435 	*tok = NULL;
436 	error = parse_skipto(conf, CC_NONWHITESPACE);
437 	if (error)
438 		return (error);
439 	p = *conf;
440 	error = parse_skipto(conf, CC_WHITESPACE);
441 	len = *conf - p;
442 	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
443 	bcopy(p, *tok, len);
444 	return (0);
445 }
446 
447 static void
448 parse_dir_ask_printenv(const char *var)
449 {
450 	char *val;
451 
452 	val = getenv(var);
453 	if (val != NULL) {
454 		printf("  %s=%s\n", var, val);
455 		freeenv(val);
456 	}
457 }
458 
459 static int
460 parse_dir_ask(char **conf)
461 {
462 	char name[80];
463 	char *mnt;
464 	int error;
465 
466 	printf("\nLoader variables:\n");
467 	parse_dir_ask_printenv("vfs.root.mountfrom");
468 	parse_dir_ask_printenv("vfs.root.mountfrom.options");
469 
470 	printf("\nManual root filesystem specification:\n");
471 	printf("  <fstype>:<device> [options]\n");
472 	printf("      Mount <device> using filesystem <fstype>\n");
473 	printf("      and with the specified (optional) option list.\n");
474 	printf("\n");
475 	printf("    eg. ufs:/dev/da0s1a\n");
476 	printf("        zfs:tank\n");
477 	printf("        cd9660:/dev/acd0 ro\n");
478 	printf("          (which is equivalent to: ");
479 	printf("mount -t cd9660 -o ro /dev/acd0 /)\n");
480 	printf("\n");
481 	printf("  ?               List valid disk boot devices\n");
482 	printf("  .               Yield 1 second (for background tasks)\n");
483 	printf("  <empty line>    Abort manual input\n");
484 
485  again:
486 	printf("\nmountroot> ");
487 	gets(name, sizeof(name), GETS_ECHO);
488 	if (name[0] == '\0')
489 		return (0);
490 	if (name[0] == '?') {
491 		printf("\nList of GEOM managed disk devices:\n  ");
492 		g_dev_print();
493 		goto again;
494 	}
495 	if (name[0] == '.') {
496 		pause("rmask", hz);
497 		goto again;
498 	}
499 	mnt = name;
500 	error = parse_mount(&mnt);
501 	if (error == -1) {
502 		printf("Invalid specification.\n");
503 		goto again;
504 	}
505 	return (error);
506 }
507 
508 static int
509 parse_dir_md(char **conf)
510 {
511 	struct stat sb;
512 	struct thread *td;
513 	struct md_ioctl *mdio;
514 	char *path, *tok;
515 	int error, fd, len;
516 
517 	td = curthread;
518 
519 	error = parse_token(conf, &tok);
520 	if (error)
521 		return (error);
522 
523 	len = strlen(tok);
524 	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
525 	path = (void *)(mdio + 1);
526 	bcopy(tok, path, len);
527 	free(tok, M_TEMP);
528 
529 	/* Get file status. */
530 	error = kern_stat(td, path, UIO_SYSSPACE, &sb);
531 	if (error)
532 		goto out;
533 
534 	/* Open /dev/mdctl so that we can attach/detach. */
535 	error = kern_open(td, "/dev/" MDCTL_NAME, UIO_SYSSPACE, O_RDWR, 0);
536 	if (error)
537 		goto out;
538 
539 	fd = td->td_retval[0];
540 	mdio->md_version = MDIOVERSION;
541 	mdio->md_type = MD_VNODE;
542 
543 	if (root_mount_mddev != -1) {
544 		mdio->md_unit = root_mount_mddev;
545 		DROP_GIANT();
546 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
547 		PICKUP_GIANT();
548 		/* Ignore errors. We don't care. */
549 		root_mount_mddev = -1;
550 	}
551 
552 	mdio->md_file = (void *)(mdio + 1);
553 	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
554 	mdio->md_mediasize = sb.st_size;
555 	mdio->md_unit = 0;
556 	DROP_GIANT();
557 	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
558 	PICKUP_GIANT();
559 	if (error)
560 		goto out;
561 
562 	if (mdio->md_unit > 9) {
563 		printf("rootmount: too many md units\n");
564 		mdio->md_file = NULL;
565 		mdio->md_options = 0;
566 		mdio->md_mediasize = 0;
567 		DROP_GIANT();
568 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
569 		PICKUP_GIANT();
570 		/* Ignore errors. We don't care. */
571 		error = ERANGE;
572 		goto out;
573 	}
574 
575 	root_mount_mddev = mdio->md_unit;
576 	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
577 
578 	error = kern_close(td, fd);
579 
580  out:
581 	free(mdio, M_TEMP);
582 	return (error);
583 }
584 
585 static int
586 parse_dir_onfail(char **conf)
587 {
588 	char *action;
589 	int error;
590 
591 	error = parse_token(conf, &action);
592 	if (error)
593 		return (error);
594 
595 	if (!strcmp(action, "continue"))
596 		root_mount_onfail = A_CONTINUE;
597 	else if (!strcmp(action, "panic"))
598 		root_mount_onfail = A_PANIC;
599 	else if (!strcmp(action, "reboot"))
600 		root_mount_onfail = A_REBOOT;
601 	else if (!strcmp(action, "retry"))
602 		root_mount_onfail = A_RETRY;
603 	else {
604 		printf("rootmount: %s: unknown action\n", action);
605 		error = EINVAL;
606 	}
607 
608 	free(action, M_TEMP);
609 	return (0);
610 }
611 
612 static int
613 parse_dir_timeout(char **conf)
614 {
615 	char *tok, *endtok;
616 	long secs;
617 	int error;
618 
619 	error = parse_token(conf, &tok);
620 	if (error)
621 		return (error);
622 
623 	secs = strtol(tok, &endtok, 0);
624 	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
625 	if (!error)
626 		root_mount_timeout = secs;
627 	free(tok, M_TEMP);
628 	return (error);
629 }
630 
631 static int
632 parse_directive(char **conf)
633 {
634 	char *dir;
635 	int error;
636 
637 	error = parse_token(conf, &dir);
638 	if (error)
639 		return (error);
640 
641 	if (strcmp(dir, ".ask") == 0)
642 		error = parse_dir_ask(conf);
643 	else if (strcmp(dir, ".md") == 0)
644 		error = parse_dir_md(conf);
645 	else if (strcmp(dir, ".onfail") == 0)
646 		error = parse_dir_onfail(conf);
647 	else if (strcmp(dir, ".timeout") == 0)
648 		error = parse_dir_timeout(conf);
649 	else {
650 		printf("mountroot: invalid directive `%s'\n", dir);
651 		/* Ignore the rest of the line. */
652 		(void)parse_skipto(conf, '\n');
653 		error = EINVAL;
654 	}
655 	free(dir, M_TEMP);
656 	return (error);
657 }
658 
659 static int
660 parse_mount_dev_present(const char *dev)
661 {
662 	struct nameidata nd;
663 	int error;
664 
665 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
666 	error = namei(&nd);
667 	if (!error)
668 		vput(nd.ni_vp);
669 	NDFREE(&nd, NDF_ONLY_PNBUF);
670 	return (error != 0) ? 0 : 1;
671 }
672 
673 static int
674 parse_mount(char **conf)
675 {
676 	char errmsg[255];
677 	struct mntarg *ma;
678 	char *dev, *fs, *opts, *tok;
679 	int delay, error, timeout;
680 
681 	error = parse_token(conf, &tok);
682 	if (error)
683 		return (error);
684 	fs = tok;
685 	error = parse_skipto(&tok, ':');
686 	if (error) {
687 		free(fs, M_TEMP);
688 		return (error);
689 	}
690 	parse_poke(&tok, '\0');
691 	parse_advance(&tok);
692 	dev = tok;
693 
694 	if (root_mount_mddev != -1) {
695 		/* Handle substitution for the md unit number. */
696 		tok = strstr(dev, "md#");
697 		if (tok != NULL)
698 			tok[2] = '0' + root_mount_mddev;
699 	}
700 
701 	/* Parse options. */
702 	error = parse_token(conf, &tok);
703 	opts = (error == 0) ? tok : NULL;
704 
705 	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
706 	    (opts != NULL) ? opts : "");
707 
708 	bzero(errmsg, sizeof(errmsg));
709 
710 	if (vfs_byname(fs) == NULL) {
711 		strlcpy(errmsg, "unknown file system", sizeof(errmsg));
712 		error = ENOENT;
713 		goto out;
714 	}
715 
716 	if (strcmp(fs, "zfs") != 0 && dev[0] != '\0' &&
717 	    !parse_mount_dev_present(dev)) {
718 		printf("mountroot: waiting for device %s ...\n", dev);
719 		delay = hz / 10;
720 		timeout = root_mount_timeout * hz;
721 		do {
722 			pause("rmdev", delay);
723 			timeout -= delay;
724 		} while (timeout > 0 && !parse_mount_dev_present(dev));
725 		if (timeout <= 0) {
726 			error = ENODEV;
727 			goto out;
728 		}
729 	}
730 
731 	ma = NULL;
732 	ma = mount_arg(ma, "fstype", fs, -1);
733 	ma = mount_arg(ma, "fspath", "/", -1);
734 	ma = mount_arg(ma, "from", dev, -1);
735 	ma = mount_arg(ma, "errmsg", errmsg, sizeof(errmsg));
736 	ma = mount_arg(ma, "ro", NULL, 0);
737 	ma = parse_mountroot_options(ma, opts);
738 	error = kernel_mount(ma, MNT_ROOTFS);
739 
740  out:
741 	if (error) {
742 		printf("Mounting from %s:%s failed with error %d",
743 		    fs, dev, error);
744 		if (errmsg[0] != '\0')
745 			printf(": %s", errmsg);
746 		printf(".\n");
747 	}
748 	free(fs, M_TEMP);
749 	if (opts != NULL)
750 		free(opts, M_TEMP);
751 	/* kernel_mount can return -1 on error. */
752 	return ((error < 0) ? EDOOFUS : error);
753 }
754 
755 static int
756 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
757 {
758 	struct mount *mp;
759 	char *conf;
760 	int error;
761 
762 	root_mount_mddev = -1;
763 
764 retry:
765 	conf = sbuf_data(sb);
766 	mp = TAILQ_NEXT(mpdevfs, mnt_list);
767 	error = (mp == NULL) ? 0 : EDOOFUS;
768 	root_mount_onfail = A_CONTINUE;
769 	while (mp == NULL) {
770 		error = parse_skipto(&conf, CC_NONWHITESPACE);
771 		if (error == PE_EOL) {
772 			parse_advance(&conf);
773 			continue;
774 		}
775 		if (error < 0)
776 			break;
777 		switch (parse_peek(&conf)) {
778 		case '#':
779 			error = parse_skipto(&conf, '\n');
780 			break;
781 		case '.':
782 			error = parse_directive(&conf);
783 			break;
784 		default:
785 			error = parse_mount(&conf);
786 			break;
787 		}
788 		if (error < 0)
789 			break;
790 		/* Ignore any trailing garbage on the line. */
791 		if (parse_peek(&conf) != '\n') {
792 			printf("mountroot: advancing to next directive...\n");
793 			(void)parse_skipto(&conf, '\n');
794 		}
795 		mp = TAILQ_NEXT(mpdevfs, mnt_list);
796 	}
797 	if (mp != NULL)
798 		return (0);
799 
800 	/*
801 	 * We failed to mount (a new) root.
802 	 */
803 	switch (root_mount_onfail) {
804 	case A_CONTINUE:
805 		break;
806 	case A_PANIC:
807 		panic("mountroot: unable to (re-)mount root.");
808 		/* NOTREACHED */
809 	case A_RETRY:
810 		goto retry;
811 	case A_REBOOT:
812 		kern_reboot(RB_NOSYNC);
813 		/* NOTREACHED */
814 	}
815 
816 	return (error);
817 }
818 
819 static void
820 vfs_mountroot_conf0(struct sbuf *sb)
821 {
822 	char *s, *tok, *mnt, *opt;
823 	int error;
824 
825 	sbuf_printf(sb, ".onfail panic\n");
826 	sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
827 	if (boothowto & RB_ASKNAME)
828 		sbuf_printf(sb, ".ask\n");
829 #ifdef ROOTDEVNAME
830 	if (boothowto & RB_DFLTROOT)
831 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
832 #endif
833 	if (boothowto & RB_CDROM) {
834 		sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
835 		sbuf_printf(sb, ".timeout 0\n");
836 		sbuf_printf(sb, "cd9660:/dev/acd0 ro\n");
837 		sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
838 	}
839 	s = getenv("vfs.root.mountfrom");
840 	if (s != NULL) {
841 		opt = getenv("vfs.root.mountfrom.options");
842 		tok = s;
843 		error = parse_token(&tok, &mnt);
844 		while (!error) {
845 			sbuf_printf(sb, "%s %s\n", mnt,
846 			    (opt != NULL) ? opt : "");
847 			free(mnt, M_TEMP);
848 			error = parse_token(&tok, &mnt);
849 		}
850 		if (opt != NULL)
851 			freeenv(opt);
852 		freeenv(s);
853 	}
854 	if (rootdevnames[0] != NULL)
855 		sbuf_printf(sb, "%s\n", rootdevnames[0]);
856 	if (rootdevnames[1] != NULL)
857 		sbuf_printf(sb, "%s\n", rootdevnames[1]);
858 #ifdef ROOTDEVNAME
859 	if (!(boothowto & RB_DFLTROOT))
860 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
861 #endif
862 	if (!(boothowto & RB_ASKNAME))
863 		sbuf_printf(sb, ".ask\n");
864 }
865 
866 static int
867 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
868 {
869 	static char buf[128];
870 	struct nameidata nd;
871 	off_t ofs;
872 	int error, flags;
873 	int len, resid;
874 	int vfslocked;
875 
876 	NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE,
877 	    "/.mount.conf", td);
878 	flags = FREAD;
879 	error = vn_open(&nd, &flags, 0, NULL);
880 	if (error)
881 		return (error);
882 
883 	vfslocked = NDHASGIANT(&nd);
884 	NDFREE(&nd, NDF_ONLY_PNBUF);
885 	ofs = 0;
886 	len = sizeof(buf) - 1;
887 	while (1) {
888 		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
889 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
890 		    NOCRED, &resid, td);
891 		if (error)
892 			break;
893 		if (resid == len)
894 			break;
895 		buf[len - resid] = 0;
896 		sbuf_printf(sb, "%s", buf);
897 		ofs += len - resid;
898 	}
899 
900 	VOP_UNLOCK(nd.ni_vp, 0);
901 	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
902 	VFS_UNLOCK_GIANT(vfslocked);
903 	return (error);
904 }
905 
906 static void
907 vfs_mountroot_wait(void)
908 {
909 	struct root_hold_token *h;
910 	struct timeval lastfail;
911 	int curfail;
912 
913 	curfail = 0;
914 	while (1) {
915 		DROP_GIANT();
916 		g_waitidle();
917 		PICKUP_GIANT();
918 		mtx_lock(&mountlist_mtx);
919 		if (LIST_EMPTY(&root_holds)) {
920 			mtx_unlock(&mountlist_mtx);
921 			break;
922 		}
923 		if (ppsratecheck(&lastfail, &curfail, 1)) {
924 			printf("Root mount waiting for:");
925 			LIST_FOREACH(h, &root_holds, list)
926 				printf(" %s", h->who);
927 			printf("\n");
928 		}
929 		msleep(&root_holds, &mountlist_mtx, PZERO | PDROP, "roothold",
930 		    hz);
931 	}
932 }
933 
934 void
935 vfs_mountroot(void)
936 {
937 	struct mount *mp;
938 	struct sbuf *sb;
939 	struct thread *td;
940 	time_t timebase;
941 	int error;
942 
943 	td = curthread;
944 
945 	vfs_mountroot_wait();
946 
947 	sb = sbuf_new_auto();
948 	vfs_mountroot_conf0(sb);
949 	sbuf_finish(sb);
950 
951 	error = vfs_mountroot_devfs(td, &mp);
952 	while (!error) {
953 		error = vfs_mountroot_parse(sb, mp);
954 		if (!error) {
955 			error = vfs_mountroot_shuffle(td, mp);
956 			if (!error) {
957 				sbuf_clear(sb);
958 				error = vfs_mountroot_readconf(td, sb);
959 				sbuf_finish(sb);
960 			}
961 		}
962 	}
963 
964 	sbuf_delete(sb);
965 
966 	/*
967 	 * Iterate over all currently mounted file systems and use
968 	 * the time stamp found to check and/or initialize the RTC.
969 	 * Call inittodr() only once and pass it the largest of the
970 	 * timestamps we encounter.
971 	 */
972 	timebase = 0;
973 	mtx_lock(&mountlist_mtx);
974 	mp = TAILQ_FIRST(&mountlist);
975 	while (mp != NULL) {
976 		if (mp->mnt_time > timebase)
977 			timebase = mp->mnt_time;
978 		mp = TAILQ_NEXT(mp, mnt_list);
979 	}
980 	mtx_unlock(&mountlist_mtx);
981 	inittodr(timebase);
982 
983 	/* Keep prison0's root in sync with the global rootvnode. */
984 	mtx_lock(&prison0.pr_mtx);
985 	prison0.pr_root = rootvnode;
986 	vref(prison0.pr_root);
987 	mtx_unlock(&prison0.pr_mtx);
988 
989 	mtx_lock(&mountlist_mtx);
990 	atomic_store_rel_int(&root_mount_complete, 1);
991 	wakeup(&root_mount_complete);
992 	mtx_unlock(&mountlist_mtx);
993 }
994 
995 static struct mntarg *
996 parse_mountroot_options(struct mntarg *ma, const char *options)
997 {
998 	char *p;
999 	char *name, *name_arg;
1000 	char *val, *val_arg;
1001 	char *opts;
1002 
1003 	if (options == NULL || options[0] == '\0')
1004 		return (ma);
1005 
1006 	p = opts = strdup(options, M_MOUNT);
1007 	if (opts == NULL) {
1008 		return (ma);
1009 	}
1010 
1011 	while((name = strsep(&p, ",")) != NULL) {
1012 		if (name[0] == '\0')
1013 			break;
1014 
1015 		val = strchr(name, '=');
1016 		if (val != NULL) {
1017 			*val = '\0';
1018 			++val;
1019 		}
1020 		if( strcmp(name, "rw") == 0 ||
1021 		    strcmp(name, "noro") == 0) {
1022 			/*
1023 			 * The first time we mount the root file system,
1024 			 * we need to mount 'ro', so We need to ignore
1025 			 * 'rw' and 'noro' mount options.
1026 			 */
1027 			continue;
1028 		}
1029 		name_arg = strdup(name, M_MOUNT);
1030 		val_arg = NULL;
1031 		if (val != NULL)
1032 			val_arg = strdup(val, M_MOUNT);
1033 
1034 		ma = mount_arg(ma, name_arg, val_arg,
1035 		    (val_arg != NULL ? -1 : 0));
1036 	}
1037 	free(opts, M_MOUNT);
1038 	return (ma);
1039 }
1040