xref: /freebsd/sys/kern/vfs_mountroot.c (revision 59e2ff550c448126b988150ce800cdf73bb5103e)
1 /*-
2  * Copyright (c) 2010 Marcel Moolenaar
3  * Copyright (c) 1999-2004 Poul-Henning Kamp
4  * Copyright (c) 1999 Michael Smith
5  * Copyright (c) 1989, 1993
6  *      The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include "opt_rootdevname.h"
39 
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/param.h>
44 #include <sys/conf.h>
45 #include <sys/cons.h>
46 #include <sys/fcntl.h>
47 #include <sys/jail.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/mdioctl.h>
51 #include <sys/mount.h>
52 #include <sys/mutex.h>
53 #include <sys/namei.h>
54 #include <sys/priv.h>
55 #include <sys/proc.h>
56 #include <sys/filedesc.h>
57 #include <sys/reboot.h>
58 #include <sys/sbuf.h>
59 #include <sys/stat.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysproto.h>
62 #include <sys/sx.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysent.h>
65 #include <sys/systm.h>
66 #include <sys/vnode.h>
67 
68 #include <geom/geom.h>
69 
70 /*
71  * The root filesystem is detailed in the kernel environment variable
72  * vfs.root.mountfrom, which is expected to be in the general format
73  *
74  * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
75  * vfsname   := the name of a VFS known to the kernel and capable
76  *              of being mounted as root
77  * path      := disk device name or other data used by the filesystem
78  *              to locate its physical store
79  *
80  * If the environment variable vfs.root.mountfrom is a space separated list,
81  * each list element is tried in turn and the root filesystem will be mounted
82  * from the first one that suceeds.
83  *
84  * The environment variable vfs.root.mountfrom.options is a comma delimited
85  * set of string mount options.  These mount options must be parseable
86  * by nmount() in the kernel.
87  */
88 
89 static int parse_mount(char **);
90 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
91 
92 /*
93  * The vnode of the system's root (/ in the filesystem, without chroot
94  * active.)
95  */
96 struct vnode *rootvnode;
97 
98 /*
99  * Mount of the system's /dev.
100  */
101 struct mount *rootdevmp;
102 
103 char *rootdevnames[2] = {NULL, NULL};
104 
105 struct mtx root_holds_mtx;
106 MTX_SYSINIT(root_holds, &root_holds_mtx, "root_holds", MTX_DEF);
107 
108 struct root_hold_token {
109 	const char			*who;
110 	LIST_ENTRY(root_hold_token)	list;
111 };
112 
113 static LIST_HEAD(, root_hold_token)	root_holds =
114     LIST_HEAD_INITIALIZER(root_holds);
115 
116 enum action {
117 	A_CONTINUE,
118 	A_PANIC,
119 	A_REBOOT,
120 	A_RETRY
121 };
122 
123 static enum action root_mount_onfail = A_CONTINUE;
124 
125 static int root_mount_mddev;
126 static int root_mount_complete;
127 
128 /* By default wait up to 3 seconds for devices to appear. */
129 static int root_mount_timeout = 3;
130 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
131 
132 struct root_hold_token *
133 root_mount_hold(const char *identifier)
134 {
135 	struct root_hold_token *h;
136 
137 	if (root_mounted())
138 		return (NULL);
139 
140 	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
141 	h->who = identifier;
142 	mtx_lock(&root_holds_mtx);
143 	LIST_INSERT_HEAD(&root_holds, h, list);
144 	mtx_unlock(&root_holds_mtx);
145 	return (h);
146 }
147 
148 void
149 root_mount_rel(struct root_hold_token *h)
150 {
151 
152 	if (h == NULL)
153 		return;
154 	mtx_lock(&root_holds_mtx);
155 	LIST_REMOVE(h, list);
156 	wakeup(&root_holds);
157 	mtx_unlock(&root_holds_mtx);
158 	free(h, M_DEVBUF);
159 }
160 
161 int
162 root_mounted(void)
163 {
164 
165 	/* No mutex is acquired here because int stores are atomic. */
166 	return (root_mount_complete);
167 }
168 
169 static void
170 set_rootvnode(void)
171 {
172 	struct proc *p;
173 
174 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
175 		panic("Cannot find root vnode");
176 
177 	VOP_UNLOCK(rootvnode, 0);
178 
179 	p = curthread->td_proc;
180 	FILEDESC_XLOCK(p->p_fd);
181 
182 	if (p->p_fd->fd_cdir != NULL)
183 		vrele(p->p_fd->fd_cdir);
184 	p->p_fd->fd_cdir = rootvnode;
185 	VREF(rootvnode);
186 
187 	if (p->p_fd->fd_rdir != NULL)
188 		vrele(p->p_fd->fd_rdir);
189 	p->p_fd->fd_rdir = rootvnode;
190 	VREF(rootvnode);
191 
192 	FILEDESC_XUNLOCK(p->p_fd);
193 }
194 
195 static int
196 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
197 {
198 	struct vfsoptlist *opts;
199 	struct vfsconf *vfsp;
200 	struct mount *mp;
201 	int error;
202 
203 	*mpp = NULL;
204 
205 	if (rootdevmp != NULL) {
206 		/*
207 		 * Already have /dev; this happens during rerooting.
208 		 */
209 		error = vfs_busy(rootdevmp, 0);
210 		if (error != 0)
211 			return (error);
212 		*mpp = rootdevmp;
213 	} else {
214 		vfsp = vfs_byname("devfs");
215 		KASSERT(vfsp != NULL, ("Could not find devfs by name"));
216 		if (vfsp == NULL)
217 			return (ENOENT);
218 
219 		mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
220 
221 		error = VFS_MOUNT(mp);
222 		KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
223 		if (error)
224 			return (error);
225 
226 		opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
227 		TAILQ_INIT(opts);
228 		mp->mnt_opt = opts;
229 
230 		mtx_lock(&mountlist_mtx);
231 		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
232 		mtx_unlock(&mountlist_mtx);
233 
234 		*mpp = mp;
235 		rootdevmp = mp;
236 	}
237 
238 	set_rootvnode();
239 
240 	error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE);
241 	if (error)
242 		printf("kern_symlink /dev -> / returns %d\n", error);
243 
244 	return (error);
245 }
246 
247 static void
248 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
249 {
250 	struct nameidata nd;
251 	struct mount *mporoot, *mpnroot;
252 	struct vnode *vp, *vporoot, *vpdevfs;
253 	char *fspath;
254 	int error;
255 
256 	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
257 
258 	/* Shuffle the mountlist. */
259 	mtx_lock(&mountlist_mtx);
260 	mporoot = TAILQ_FIRST(&mountlist);
261 	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
262 	if (mporoot != mpdevfs) {
263 		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
264 		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
265 	}
266 	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
267 	mtx_unlock(&mountlist_mtx);
268 
269 	cache_purgevfs(mporoot);
270 	if (mporoot != mpdevfs)
271 		cache_purgevfs(mpdevfs);
272 
273 	VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot);
274 
275 	VI_LOCK(vporoot);
276 	vporoot->v_iflag &= ~VI_MOUNT;
277 	VI_UNLOCK(vporoot);
278 	vporoot->v_mountedhere = NULL;
279 	mporoot->mnt_flag &= ~MNT_ROOTFS;
280 	mporoot->mnt_vnodecovered = NULL;
281 	vput(vporoot);
282 
283 	/* Set up the new rootvnode, and purge the cache */
284 	mpnroot->mnt_vnodecovered = NULL;
285 	set_rootvnode();
286 	cache_purgevfs(rootvnode->v_mount);
287 
288 	if (mporoot != mpdevfs) {
289 		/* Remount old root under /.mount or /mnt */
290 		fspath = "/.mount";
291 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
292 		    fspath, td);
293 		error = namei(&nd);
294 		if (error) {
295 			NDFREE(&nd, NDF_ONLY_PNBUF);
296 			fspath = "/mnt";
297 			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
298 			    fspath, td);
299 			error = namei(&nd);
300 		}
301 		if (!error) {
302 			vp = nd.ni_vp;
303 			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
304 			if (!error)
305 				error = vinvalbuf(vp, V_SAVE, 0, 0);
306 			if (!error) {
307 				cache_purge(vp);
308 				mporoot->mnt_vnodecovered = vp;
309 				vp->v_mountedhere = mporoot;
310 				strlcpy(mporoot->mnt_stat.f_mntonname,
311 				    fspath, MNAMELEN);
312 				VOP_UNLOCK(vp, 0);
313 			} else
314 				vput(vp);
315 		}
316 		NDFREE(&nd, NDF_ONLY_PNBUF);
317 
318 		if (error && bootverbose)
319 			printf("mountroot: unable to remount previous root "
320 			    "under /.mount or /mnt (error %d).\n", error);
321 	}
322 
323 	/* Remount devfs under /dev */
324 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
325 	error = namei(&nd);
326 	if (!error) {
327 		vp = nd.ni_vp;
328 		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
329 		if (!error)
330 			error = vinvalbuf(vp, V_SAVE, 0, 0);
331 		if (!error) {
332 			vpdevfs = mpdevfs->mnt_vnodecovered;
333 			if (vpdevfs != NULL) {
334 				cache_purge(vpdevfs);
335 				vpdevfs->v_mountedhere = NULL;
336 				vrele(vpdevfs);
337 			}
338 			mpdevfs->mnt_vnodecovered = vp;
339 			vp->v_mountedhere = mpdevfs;
340 			VOP_UNLOCK(vp, 0);
341 		} else
342 			vput(vp);
343 	}
344 	if (error && bootverbose)
345 		printf("mountroot: unable to remount devfs under /dev "
346 		    "(error %d).\n", error);
347 	NDFREE(&nd, NDF_ONLY_PNBUF);
348 
349 	if (mporoot == mpdevfs) {
350 		vfs_unbusy(mpdevfs);
351 		/* Unlink the no longer needed /dev/dev -> / symlink */
352 		error = kern_unlinkat(td, AT_FDCWD, "/dev/dev",
353 		    UIO_SYSSPACE, 0);
354 		if (error && bootverbose)
355 			printf("mountroot: unable to unlink /dev/dev "
356 			    "(error %d)\n", error);
357 	}
358 }
359 
360 /*
361  * Configuration parser.
362  */
363 
364 /* Parser character classes. */
365 #define	CC_WHITESPACE		-1
366 #define	CC_NONWHITESPACE	-2
367 
368 /* Parse errors. */
369 #define	PE_EOF			-1
370 #define	PE_EOL			-2
371 
372 static __inline int
373 parse_peek(char **conf)
374 {
375 
376 	return (**conf);
377 }
378 
379 static __inline void
380 parse_poke(char **conf, int c)
381 {
382 
383 	**conf = c;
384 }
385 
386 static __inline void
387 parse_advance(char **conf)
388 {
389 
390 	(*conf)++;
391 }
392 
393 static int
394 parse_skipto(char **conf, int mc)
395 {
396 	int c, match;
397 
398 	while (1) {
399 		c = parse_peek(conf);
400 		if (c == 0)
401 			return (PE_EOF);
402 		switch (mc) {
403 		case CC_WHITESPACE:
404 			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
405 			break;
406 		case CC_NONWHITESPACE:
407 			if (c == '\n')
408 				return (PE_EOL);
409 			match = (c != ' ' && c != '\t') ? 1 : 0;
410 			break;
411 		default:
412 			match = (c == mc) ? 1 : 0;
413 			break;
414 		}
415 		if (match)
416 			break;
417 		parse_advance(conf);
418 	}
419 	return (0);
420 }
421 
422 static int
423 parse_token(char **conf, char **tok)
424 {
425 	char *p;
426 	size_t len;
427 	int error;
428 
429 	*tok = NULL;
430 	error = parse_skipto(conf, CC_NONWHITESPACE);
431 	if (error)
432 		return (error);
433 	p = *conf;
434 	error = parse_skipto(conf, CC_WHITESPACE);
435 	len = *conf - p;
436 	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
437 	bcopy(p, *tok, len);
438 	return (0);
439 }
440 
441 static void
442 parse_dir_ask_printenv(const char *var)
443 {
444 	char *val;
445 
446 	val = kern_getenv(var);
447 	if (val != NULL) {
448 		printf("  %s=%s\n", var, val);
449 		freeenv(val);
450 	}
451 }
452 
453 static int
454 parse_dir_ask(char **conf)
455 {
456 	char name[80];
457 	char *mnt;
458 	int error;
459 
460 	printf("\nLoader variables:\n");
461 	parse_dir_ask_printenv("vfs.root.mountfrom");
462 	parse_dir_ask_printenv("vfs.root.mountfrom.options");
463 
464 	printf("\nManual root filesystem specification:\n");
465 	printf("  <fstype>:<device> [options]\n");
466 	printf("      Mount <device> using filesystem <fstype>\n");
467 	printf("      and with the specified (optional) option list.\n");
468 	printf("\n");
469 	printf("    eg. ufs:/dev/da0s1a\n");
470 	printf("        zfs:tank\n");
471 	printf("        cd9660:/dev/acd0 ro\n");
472 	printf("          (which is equivalent to: ");
473 	printf("mount -t cd9660 -o ro /dev/acd0 /)\n");
474 	printf("\n");
475 	printf("  ?               List valid disk boot devices\n");
476 	printf("  .               Yield 1 second (for background tasks)\n");
477 	printf("  <empty line>    Abort manual input\n");
478 
479 	do {
480 		error = EINVAL;
481 		printf("\nmountroot> ");
482 		cngets(name, sizeof(name), GETS_ECHO);
483 		if (name[0] == '\0')
484 			break;
485 		if (name[0] == '?' && name[1] == '\0') {
486 			printf("\nList of GEOM managed disk devices:\n  ");
487 			g_dev_print();
488 			continue;
489 		}
490 		if (name[0] == '.' && name[1] == '\0') {
491 			pause("rmask", hz);
492 			continue;
493 		}
494 		mnt = name;
495 		error = parse_mount(&mnt);
496 		if (error == -1)
497 			printf("Invalid file system specification.\n");
498 	} while (error != 0);
499 
500 	return (error);
501 }
502 
503 static int
504 parse_dir_md(char **conf)
505 {
506 	struct stat sb;
507 	struct thread *td;
508 	struct md_ioctl *mdio;
509 	char *path, *tok;
510 	int error, fd, len;
511 
512 	td = curthread;
513 
514 	error = parse_token(conf, &tok);
515 	if (error)
516 		return (error);
517 
518 	len = strlen(tok);
519 	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
520 	path = (void *)(mdio + 1);
521 	bcopy(tok, path, len);
522 	free(tok, M_TEMP);
523 
524 	/* Get file status. */
525 	error = kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &sb, NULL);
526 	if (error)
527 		goto out;
528 
529 	/* Open /dev/mdctl so that we can attach/detach. */
530 	error = kern_openat(td, AT_FDCWD, "/dev/" MDCTL_NAME, UIO_SYSSPACE,
531 	    O_RDWR, 0);
532 	if (error)
533 		goto out;
534 
535 	fd = td->td_retval[0];
536 	mdio->md_version = MDIOVERSION;
537 	mdio->md_type = MD_VNODE;
538 
539 	if (root_mount_mddev != -1) {
540 		mdio->md_unit = root_mount_mddev;
541 		DROP_GIANT();
542 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
543 		PICKUP_GIANT();
544 		/* Ignore errors. We don't care. */
545 		root_mount_mddev = -1;
546 	}
547 
548 	mdio->md_file = (void *)(mdio + 1);
549 	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
550 	mdio->md_mediasize = sb.st_size;
551 	mdio->md_unit = 0;
552 	DROP_GIANT();
553 	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
554 	PICKUP_GIANT();
555 	if (error)
556 		goto out;
557 
558 	if (mdio->md_unit > 9) {
559 		printf("rootmount: too many md units\n");
560 		mdio->md_file = NULL;
561 		mdio->md_options = 0;
562 		mdio->md_mediasize = 0;
563 		DROP_GIANT();
564 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
565 		PICKUP_GIANT();
566 		/* Ignore errors. We don't care. */
567 		error = ERANGE;
568 		goto out;
569 	}
570 
571 	root_mount_mddev = mdio->md_unit;
572 	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
573 
574 	error = kern_close(td, fd);
575 
576  out:
577 	free(mdio, M_TEMP);
578 	return (error);
579 }
580 
581 static int
582 parse_dir_onfail(char **conf)
583 {
584 	char *action;
585 	int error;
586 
587 	error = parse_token(conf, &action);
588 	if (error)
589 		return (error);
590 
591 	if (!strcmp(action, "continue"))
592 		root_mount_onfail = A_CONTINUE;
593 	else if (!strcmp(action, "panic"))
594 		root_mount_onfail = A_PANIC;
595 	else if (!strcmp(action, "reboot"))
596 		root_mount_onfail = A_REBOOT;
597 	else if (!strcmp(action, "retry"))
598 		root_mount_onfail = A_RETRY;
599 	else {
600 		printf("rootmount: %s: unknown action\n", action);
601 		error = EINVAL;
602 	}
603 
604 	free(action, M_TEMP);
605 	return (0);
606 }
607 
608 static int
609 parse_dir_timeout(char **conf)
610 {
611 	char *tok, *endtok;
612 	long secs;
613 	int error;
614 
615 	error = parse_token(conf, &tok);
616 	if (error)
617 		return (error);
618 
619 	secs = strtol(tok, &endtok, 0);
620 	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
621 	if (!error)
622 		root_mount_timeout = secs;
623 	free(tok, M_TEMP);
624 	return (error);
625 }
626 
627 static int
628 parse_directive(char **conf)
629 {
630 	char *dir;
631 	int error;
632 
633 	error = parse_token(conf, &dir);
634 	if (error)
635 		return (error);
636 
637 	if (strcmp(dir, ".ask") == 0)
638 		error = parse_dir_ask(conf);
639 	else if (strcmp(dir, ".md") == 0)
640 		error = parse_dir_md(conf);
641 	else if (strcmp(dir, ".onfail") == 0)
642 		error = parse_dir_onfail(conf);
643 	else if (strcmp(dir, ".timeout") == 0)
644 		error = parse_dir_timeout(conf);
645 	else {
646 		printf("mountroot: invalid directive `%s'\n", dir);
647 		/* Ignore the rest of the line. */
648 		(void)parse_skipto(conf, '\n');
649 		error = EINVAL;
650 	}
651 	free(dir, M_TEMP);
652 	return (error);
653 }
654 
655 static int
656 parse_mount_dev_present(const char *dev)
657 {
658 	struct nameidata nd;
659 	int error;
660 
661 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
662 	error = namei(&nd);
663 	if (!error)
664 		vput(nd.ni_vp);
665 	NDFREE(&nd, NDF_ONLY_PNBUF);
666 	return (error != 0) ? 0 : 1;
667 }
668 
669 #define	ERRMSGL	255
670 static int
671 parse_mount(char **conf)
672 {
673 	char *errmsg;
674 	struct mntarg *ma;
675 	char *dev, *fs, *opts, *tok;
676 	int delay, error, timeout;
677 
678 	error = parse_token(conf, &tok);
679 	if (error)
680 		return (error);
681 	fs = tok;
682 	error = parse_skipto(&tok, ':');
683 	if (error) {
684 		free(fs, M_TEMP);
685 		return (error);
686 	}
687 	parse_poke(&tok, '\0');
688 	parse_advance(&tok);
689 	dev = tok;
690 
691 	if (root_mount_mddev != -1) {
692 		/* Handle substitution for the md unit number. */
693 		tok = strstr(dev, "md#");
694 		if (tok != NULL)
695 			tok[2] = '0' + root_mount_mddev;
696 	}
697 
698 	/* Parse options. */
699 	error = parse_token(conf, &tok);
700 	opts = (error == 0) ? tok : NULL;
701 
702 	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
703 	    (opts != NULL) ? opts : "");
704 
705 	errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
706 
707 	if (vfs_byname(fs) == NULL) {
708 		strlcpy(errmsg, "unknown file system", ERRMSGL);
709 		error = ENOENT;
710 		goto out;
711 	}
712 
713 	if (strcmp(fs, "zfs") != 0 && strstr(fs, "nfs") == NULL &&
714 	    dev[0] != '\0' && !parse_mount_dev_present(dev)) {
715 		printf("mountroot: waiting for device %s ...\n", dev);
716 		delay = hz / 10;
717 		timeout = root_mount_timeout * hz;
718 		do {
719 			pause("rmdev", delay);
720 			timeout -= delay;
721 		} while (timeout > 0 && !parse_mount_dev_present(dev));
722 		if (timeout <= 0) {
723 			error = ENODEV;
724 			goto out;
725 		}
726 	}
727 
728 	ma = NULL;
729 	ma = mount_arg(ma, "fstype", fs, -1);
730 	ma = mount_arg(ma, "fspath", "/", -1);
731 	ma = mount_arg(ma, "from", dev, -1);
732 	ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
733 	ma = mount_arg(ma, "ro", NULL, 0);
734 	ma = parse_mountroot_options(ma, opts);
735 	error = kernel_mount(ma, MNT_ROOTFS);
736 
737  out:
738 	if (error) {
739 		printf("Mounting from %s:%s failed with error %d",
740 		    fs, dev, error);
741 		if (errmsg[0] != '\0')
742 			printf(": %s", errmsg);
743 		printf(".\n");
744 	}
745 	free(fs, M_TEMP);
746 	free(errmsg, M_TEMP);
747 	if (opts != NULL)
748 		free(opts, M_TEMP);
749 	/* kernel_mount can return -1 on error. */
750 	return ((error < 0) ? EDOOFUS : error);
751 }
752 #undef ERRMSGL
753 
754 static int
755 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
756 {
757 	struct mount *mp;
758 	char *conf;
759 	int error;
760 
761 	root_mount_mddev = -1;
762 
763 retry:
764 	conf = sbuf_data(sb);
765 	mp = TAILQ_NEXT(mpdevfs, mnt_list);
766 	error = (mp == NULL) ? 0 : EDOOFUS;
767 	root_mount_onfail = A_CONTINUE;
768 	while (mp == NULL) {
769 		error = parse_skipto(&conf, CC_NONWHITESPACE);
770 		if (error == PE_EOL) {
771 			parse_advance(&conf);
772 			continue;
773 		}
774 		if (error < 0)
775 			break;
776 		switch (parse_peek(&conf)) {
777 		case '#':
778 			error = parse_skipto(&conf, '\n');
779 			break;
780 		case '.':
781 			error = parse_directive(&conf);
782 			break;
783 		default:
784 			error = parse_mount(&conf);
785 			if (error == -1) {
786 				printf("mountroot: invalid file system "
787 				    "specification.\n");
788 				error = 0;
789 			}
790 			break;
791 		}
792 		if (error < 0)
793 			break;
794 		/* Ignore any trailing garbage on the line. */
795 		if (parse_peek(&conf) != '\n') {
796 			printf("mountroot: advancing to next directive...\n");
797 			(void)parse_skipto(&conf, '\n');
798 		}
799 		mp = TAILQ_NEXT(mpdevfs, mnt_list);
800 	}
801 	if (mp != NULL)
802 		return (0);
803 
804 	/*
805 	 * We failed to mount (a new) root.
806 	 */
807 	switch (root_mount_onfail) {
808 	case A_CONTINUE:
809 		break;
810 	case A_PANIC:
811 		panic("mountroot: unable to (re-)mount root.");
812 		/* NOTREACHED */
813 	case A_RETRY:
814 		goto retry;
815 	case A_REBOOT:
816 		kern_reboot(RB_NOSYNC);
817 		/* NOTREACHED */
818 	}
819 
820 	return (error);
821 }
822 
823 static void
824 vfs_mountroot_conf0(struct sbuf *sb)
825 {
826 	char *s, *tok, *mnt, *opt;
827 	int error;
828 
829 	sbuf_printf(sb, ".onfail panic\n");
830 	sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
831 	if (boothowto & RB_ASKNAME)
832 		sbuf_printf(sb, ".ask\n");
833 #ifdef ROOTDEVNAME
834 	if (boothowto & RB_DFLTROOT)
835 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
836 #endif
837 	if (boothowto & RB_CDROM) {
838 		sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
839 		sbuf_printf(sb, ".timeout 0\n");
840 		sbuf_printf(sb, "cd9660:/dev/acd0 ro\n");
841 		sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
842 	}
843 	s = kern_getenv("vfs.root.mountfrom");
844 	if (s != NULL) {
845 		opt = kern_getenv("vfs.root.mountfrom.options");
846 		tok = s;
847 		error = parse_token(&tok, &mnt);
848 		while (!error) {
849 			sbuf_printf(sb, "%s %s\n", mnt,
850 			    (opt != NULL) ? opt : "");
851 			free(mnt, M_TEMP);
852 			error = parse_token(&tok, &mnt);
853 		}
854 		if (opt != NULL)
855 			freeenv(opt);
856 		freeenv(s);
857 	}
858 	if (rootdevnames[0] != NULL)
859 		sbuf_printf(sb, "%s\n", rootdevnames[0]);
860 	if (rootdevnames[1] != NULL)
861 		sbuf_printf(sb, "%s\n", rootdevnames[1]);
862 #ifdef ROOTDEVNAME
863 	if (!(boothowto & RB_DFLTROOT))
864 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
865 #endif
866 	if (!(boothowto & RB_ASKNAME))
867 		sbuf_printf(sb, ".ask\n");
868 }
869 
870 static int
871 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
872 {
873 	static char buf[128];
874 	struct nameidata nd;
875 	off_t ofs;
876 	ssize_t resid;
877 	int error, flags, len;
878 
879 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
880 	flags = FREAD;
881 	error = vn_open(&nd, &flags, 0, NULL);
882 	if (error)
883 		return (error);
884 
885 	NDFREE(&nd, NDF_ONLY_PNBUF);
886 	ofs = 0;
887 	len = sizeof(buf) - 1;
888 	while (1) {
889 		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
890 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
891 		    NOCRED, &resid, td);
892 		if (error)
893 			break;
894 		if (resid == len)
895 			break;
896 		buf[len - resid] = 0;
897 		sbuf_printf(sb, "%s", buf);
898 		ofs += len - resid;
899 	}
900 
901 	VOP_UNLOCK(nd.ni_vp, 0);
902 	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
903 	return (error);
904 }
905 
906 static void
907 vfs_mountroot_wait(void)
908 {
909 	struct root_hold_token *h;
910 	struct timeval lastfail;
911 	int curfail;
912 
913 	curfail = 0;
914 	while (1) {
915 		DROP_GIANT();
916 		g_waitidle();
917 		PICKUP_GIANT();
918 		mtx_lock(&root_holds_mtx);
919 		if (LIST_EMPTY(&root_holds)) {
920 			mtx_unlock(&root_holds_mtx);
921 			break;
922 		}
923 		if (ppsratecheck(&lastfail, &curfail, 1)) {
924 			printf("Root mount waiting for:");
925 			LIST_FOREACH(h, &root_holds, list)
926 				printf(" %s", h->who);
927 			printf("\n");
928 		}
929 		msleep(&root_holds, &root_holds_mtx, PZERO | PDROP, "roothold",
930 		    hz);
931 	}
932 }
933 
934 void
935 vfs_mountroot(void)
936 {
937 	struct mount *mp;
938 	struct sbuf *sb;
939 	struct thread *td;
940 	time_t timebase;
941 	int error;
942 
943 	td = curthread;
944 
945 	vfs_mountroot_wait();
946 
947 	sb = sbuf_new_auto();
948 	vfs_mountroot_conf0(sb);
949 	sbuf_finish(sb);
950 
951 	error = vfs_mountroot_devfs(td, &mp);
952 	while (!error) {
953 		error = vfs_mountroot_parse(sb, mp);
954 		if (!error) {
955 			vfs_mountroot_shuffle(td, mp);
956 			sbuf_clear(sb);
957 			error = vfs_mountroot_readconf(td, sb);
958 			sbuf_finish(sb);
959 		}
960 	}
961 
962 	sbuf_delete(sb);
963 
964 	/*
965 	 * Iterate over all currently mounted file systems and use
966 	 * the time stamp found to check and/or initialize the RTC.
967 	 * Call inittodr() only once and pass it the largest of the
968 	 * timestamps we encounter.
969 	 */
970 	timebase = 0;
971 	mtx_lock(&mountlist_mtx);
972 	mp = TAILQ_FIRST(&mountlist);
973 	while (mp != NULL) {
974 		if (mp->mnt_time > timebase)
975 			timebase = mp->mnt_time;
976 		mp = TAILQ_NEXT(mp, mnt_list);
977 	}
978 	mtx_unlock(&mountlist_mtx);
979 	inittodr(timebase);
980 
981 	/* Keep prison0's root in sync with the global rootvnode. */
982 	mtx_lock(&prison0.pr_mtx);
983 	prison0.pr_root = rootvnode;
984 	vref(prison0.pr_root);
985 	mtx_unlock(&prison0.pr_mtx);
986 
987 	mtx_lock(&root_holds_mtx);
988 	atomic_store_rel_int(&root_mount_complete, 1);
989 	wakeup(&root_mount_complete);
990 	mtx_unlock(&root_holds_mtx);
991 
992 	EVENTHANDLER_INVOKE(mountroot);
993 }
994 
995 static struct mntarg *
996 parse_mountroot_options(struct mntarg *ma, const char *options)
997 {
998 	char *p;
999 	char *name, *name_arg;
1000 	char *val, *val_arg;
1001 	char *opts;
1002 
1003 	if (options == NULL || options[0] == '\0')
1004 		return (ma);
1005 
1006 	p = opts = strdup(options, M_MOUNT);
1007 	if (opts == NULL) {
1008 		return (ma);
1009 	}
1010 
1011 	while((name = strsep(&p, ",")) != NULL) {
1012 		if (name[0] == '\0')
1013 			break;
1014 
1015 		val = strchr(name, '=');
1016 		if (val != NULL) {
1017 			*val = '\0';
1018 			++val;
1019 		}
1020 		if( strcmp(name, "rw") == 0 ||
1021 		    strcmp(name, "noro") == 0) {
1022 			/*
1023 			 * The first time we mount the root file system,
1024 			 * we need to mount 'ro', so We need to ignore
1025 			 * 'rw' and 'noro' mount options.
1026 			 */
1027 			continue;
1028 		}
1029 		name_arg = strdup(name, M_MOUNT);
1030 		val_arg = NULL;
1031 		if (val != NULL)
1032 			val_arg = strdup(val, M_MOUNT);
1033 
1034 		ma = mount_arg(ma, name_arg, val_arg,
1035 		    (val_arg != NULL ? -1 : 0));
1036 	}
1037 	free(opts, M_MOUNT);
1038 	return (ma);
1039 }
1040