xref: /freebsd/usr.sbin/bhyveload/bhyveload.c (revision fc55c20355d889bf3d3f81d94b3614a0c4253fa0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*-
30  * Copyright (c) 2011 Google, Inc.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  */
54 
55 #include <sys/cdefs.h>
56 #include <sys/ioctl.h>
57 #include <sys/stat.h>
58 #include <sys/disk.h>
59 #include <sys/queue.h>
60 
61 #include <machine/specialreg.h>
62 #include <machine/vmm.h>
63 
64 #include <assert.h>
65 #include <dirent.h>
66 #include <dlfcn.h>
67 #include <errno.h>
68 #include <err.h>
69 #include <fcntl.h>
70 #include <getopt.h>
71 #include <libgen.h>
72 #include <limits.h>
73 #include <setjmp.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <sysexits.h>
78 #include <termios.h>
79 #include <unistd.h>
80 
81 #include <capsicum_helpers.h>
82 #include <vmmapi.h>
83 
84 #include "userboot.h"
85 
86 #define	MB	(1024 * 1024UL)
87 #define	GB	(1024 * 1024 * 1024UL)
88 #define	BSP	0
89 
90 #define	NDISKS	32
91 
92 static struct termios term, oldterm;
93 static int disk_fd[NDISKS];
94 static int ndisks;
95 static int consin_fd, consout_fd;
96 static int hostbase_fd = -1;
97 
98 static void *loader_hdl;
99 static char *loader;
100 static int explicit_loader_fd = -1;
101 static jmp_buf jb;
102 
103 static char *vmname, *progname;
104 static struct vmctx *ctx;
105 static struct vcpu *vcpu;
106 
107 static uint64_t gdtbase, cr3, rsp;
108 
109 static void cb_exit(void *arg, int v);
110 
111 /*
112  * Console i/o callbacks
113  */
114 
115 static void
116 cb_putc(void *arg __unused, int ch)
117 {
118 	char c = ch;
119 
120 	(void) write(consout_fd, &c, 1);
121 }
122 
123 static int
124 cb_getc(void *arg __unused)
125 {
126 	char c;
127 
128 	if (read(consin_fd, &c, 1) == 1)
129 		return (c);
130 	return (-1);
131 }
132 
133 static int
134 cb_poll(void *arg __unused)
135 {
136 	int n;
137 
138 	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
139 		return (n > 0);
140 	return (0);
141 }
142 
143 /*
144  * Host filesystem i/o callbacks
145  */
146 
147 struct cb_file {
148 	int cf_isdir;
149 	size_t cf_size;
150 	struct stat cf_stat;
151 	union {
152 		int fd;
153 		DIR *dir;
154 	} cf_u;
155 };
156 
157 static int
158 cb_open(void *arg __unused, const char *filename, void **hp)
159 {
160 	struct cb_file *cf;
161 	struct stat sb;
162 	int fd, flags;
163 
164 	cf = NULL;
165 	fd = -1;
166 	flags = O_RDONLY | O_RESOLVE_BENEATH;
167 	if (hostbase_fd == -1)
168 		return (ENOENT);
169 
170 	/* Absolute paths are relative to our hostbase, chop off leading /. */
171 	if (filename[0] == '/')
172 		filename++;
173 
174 	/* Lookup of /, use . instead. */
175 	if (filename[0] == '\0')
176 		filename = ".";
177 
178 	if (fstatat(hostbase_fd, filename, &sb, AT_RESOLVE_BENEATH) < 0)
179 		return (errno);
180 
181 	if (!S_ISDIR(sb.st_mode) && !S_ISREG(sb.st_mode))
182 		return (EINVAL);
183 
184 	if (S_ISDIR(sb.st_mode))
185 		flags |= O_DIRECTORY;
186 
187 	/* May be opening the root dir */
188 	fd = openat(hostbase_fd, filename, flags);
189 	if (fd < 0)
190 		return (errno);
191 
192 	cf = malloc(sizeof(struct cb_file));
193 	if (cf == NULL) {
194 		close(fd);
195 		return (ENOMEM);
196 	}
197 
198 	cf->cf_stat = sb;
199 	cf->cf_size = cf->cf_stat.st_size;
200 
201 	if (S_ISDIR(cf->cf_stat.st_mode)) {
202 		cf->cf_isdir = 1;
203 		cf->cf_u.dir = fdopendir(fd);
204 		if (cf->cf_u.dir == NULL) {
205 			close(fd);
206 			free(cf);
207 			return (ENOMEM);
208 		}
209 	} else {
210 		assert(S_ISREG(cf->cf_stat.st_mode));
211 		cf->cf_isdir = 0;
212 		cf->cf_u.fd = fd;
213 	}
214 	*hp = cf;
215 	return (0);
216 }
217 
218 static int
219 cb_close(void *arg __unused, void *h)
220 {
221 	struct cb_file *cf = h;
222 
223 	if (cf->cf_isdir)
224 		closedir(cf->cf_u.dir);
225 	else
226 		close(cf->cf_u.fd);
227 	free(cf);
228 
229 	return (0);
230 }
231 
232 static int
233 cb_isdir(void *arg __unused, void *h)
234 {
235 	struct cb_file *cf = h;
236 
237 	return (cf->cf_isdir);
238 }
239 
240 static int
241 cb_read(void *arg __unused, void *h, void *buf, size_t size, size_t *resid)
242 {
243 	struct cb_file *cf = h;
244 	ssize_t sz;
245 
246 	if (cf->cf_isdir)
247 		return (EINVAL);
248 	sz = read(cf->cf_u.fd, buf, size);
249 	if (sz < 0)
250 		return (EINVAL);
251 	*resid = size - sz;
252 	return (0);
253 }
254 
255 static int
256 cb_readdir(void *arg __unused, void *h, uint32_t *fileno_return,
257     uint8_t *type_return, size_t *namelen_return, char *name)
258 {
259 	struct cb_file *cf = h;
260 	struct dirent *dp;
261 
262 	if (!cf->cf_isdir)
263 		return (EINVAL);
264 
265 	dp = readdir(cf->cf_u.dir);
266 	if (!dp)
267 		return (ENOENT);
268 
269 	/*
270 	 * Note: d_namlen is in the range 0..255 and therefore less
271 	 * than PATH_MAX so we don't need to test before copying.
272 	 */
273 	*fileno_return = dp->d_fileno;
274 	*type_return = dp->d_type;
275 	*namelen_return = dp->d_namlen;
276 	memcpy(name, dp->d_name, dp->d_namlen);
277 	name[dp->d_namlen] = 0;
278 
279 	return (0);
280 }
281 
282 static int
283 cb_seek(void *arg __unused, void *h, uint64_t offset, int whence)
284 {
285 	struct cb_file *cf = h;
286 
287 	if (cf->cf_isdir)
288 		return (EINVAL);
289 	if (lseek(cf->cf_u.fd, offset, whence) < 0)
290 		return (errno);
291 	return (0);
292 }
293 
294 static int
295 cb_stat(void *arg __unused, void *h, struct stat *sbp)
296 {
297 	struct cb_file *cf = h;
298 
299 	memset(sbp, 0, sizeof(struct stat));
300 	sbp->st_mode = cf->cf_stat.st_mode;
301 	sbp->st_uid = cf->cf_stat.st_uid;
302 	sbp->st_gid = cf->cf_stat.st_gid;
303 	sbp->st_size = cf->cf_stat.st_size;
304 	sbp->st_mtime = cf->cf_stat.st_mtime;
305 	sbp->st_dev = cf->cf_stat.st_dev;
306 	sbp->st_ino = cf->cf_stat.st_ino;
307 
308 	return (0);
309 }
310 
311 /*
312  * Disk image i/o callbacks
313  */
314 
315 static int
316 cb_diskread(void *arg __unused, int unit, uint64_t from, void *to, size_t size,
317     size_t *resid)
318 {
319 	ssize_t n;
320 
321 	if (unit < 0 || unit >= ndisks)
322 		return (EIO);
323 	n = pread(disk_fd[unit], to, size, from);
324 	if (n < 0)
325 		return (errno);
326 	*resid = size - n;
327 	return (0);
328 }
329 
330 static int
331 cb_diskwrite(void *arg __unused, int unit, uint64_t offset, void *src,
332     size_t size, size_t *resid)
333 {
334 	ssize_t n;
335 
336 	if (unit < 0 || unit >= ndisks)
337 		return (EIO);
338 	n = pwrite(disk_fd[unit], src, size, offset);
339 	if (n < 0)
340 		return (errno);
341 	*resid = size - n;
342 	return (0);
343 }
344 
345 static int
346 cb_diskioctl(void *arg __unused, int unit, u_long cmd, void *data)
347 {
348 	struct stat sb;
349 
350 	if (unit < 0 || unit >= ndisks)
351 		return (EBADF);
352 
353 	switch (cmd) {
354 	case DIOCGSECTORSIZE:
355 		*(u_int *)data = 512;
356 		break;
357 	case DIOCGMEDIASIZE:
358 		if (fstat(disk_fd[unit], &sb) != 0)
359 			return (ENOTTY);
360 		if (S_ISCHR(sb.st_mode) &&
361 		    ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0)
362 				return (ENOTTY);
363 		*(off_t *)data = sb.st_size;
364 		break;
365 	default:
366 		return (ENOTTY);
367 	}
368 
369 	return (0);
370 }
371 
372 /*
373  * Guest virtual machine i/o callbacks
374  */
375 static int
376 cb_copyin(void *arg __unused, const void *from, uint64_t to, size_t size)
377 {
378 	char *ptr;
379 
380 	to &= 0x7fffffff;
381 
382 	ptr = vm_map_gpa(ctx, to, size);
383 	if (ptr == NULL)
384 		return (EFAULT);
385 
386 	memcpy(ptr, from, size);
387 	return (0);
388 }
389 
390 static int
391 cb_copyout(void *arg __unused, uint64_t from, void *to, size_t size)
392 {
393 	char *ptr;
394 
395 	from &= 0x7fffffff;
396 
397 	ptr = vm_map_gpa(ctx, from, size);
398 	if (ptr == NULL)
399 		return (EFAULT);
400 
401 	memcpy(to, ptr, size);
402 	return (0);
403 }
404 
405 static void
406 cb_setreg(void *arg __unused, int r, uint64_t v)
407 {
408 	int error;
409 	enum vm_reg_name vmreg;
410 
411 	vmreg = VM_REG_LAST;
412 
413 	switch (r) {
414 	case 4:
415 		vmreg = VM_REG_GUEST_RSP;
416 		rsp = v;
417 		break;
418 	default:
419 		break;
420 	}
421 
422 	if (vmreg == VM_REG_LAST) {
423 		printf("test_setreg(%d): not implemented\n", r);
424 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
425 	}
426 
427 	error = vm_set_register(vcpu, vmreg, v);
428 	if (error) {
429 		perror("vm_set_register");
430 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
431 	}
432 }
433 
434 static void
435 cb_setmsr(void *arg __unused, int r, uint64_t v)
436 {
437 	int error;
438 	enum vm_reg_name vmreg;
439 
440 	vmreg = VM_REG_LAST;
441 
442 	switch (r) {
443 	case MSR_EFER:
444 		vmreg = VM_REG_GUEST_EFER;
445 		break;
446 	default:
447 		break;
448 	}
449 
450 	if (vmreg == VM_REG_LAST) {
451 		printf("test_setmsr(%d): not implemented\n", r);
452 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
453 	}
454 
455 	error = vm_set_register(vcpu, vmreg, v);
456 	if (error) {
457 		perror("vm_set_msr");
458 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
459 	}
460 }
461 
462 static void
463 cb_setcr(void *arg __unused, int r, uint64_t v)
464 {
465 	int error;
466 	enum vm_reg_name vmreg;
467 
468 	vmreg = VM_REG_LAST;
469 
470 	switch (r) {
471 	case 0:
472 		vmreg = VM_REG_GUEST_CR0;
473 		break;
474 	case 3:
475 		vmreg = VM_REG_GUEST_CR3;
476 		cr3 = v;
477 		break;
478 	case 4:
479 		vmreg = VM_REG_GUEST_CR4;
480 		break;
481 	default:
482 		break;
483 	}
484 
485 	if (vmreg == VM_REG_LAST) {
486 		printf("test_setcr(%d): not implemented\n", r);
487 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
488 	}
489 
490 	error = vm_set_register(vcpu, vmreg, v);
491 	if (error) {
492 		perror("vm_set_cr");
493 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
494 	}
495 }
496 
497 static void
498 cb_setgdt(void *arg __unused, uint64_t base, size_t size)
499 {
500 	int error;
501 
502 	error = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, base, size - 1, 0);
503 	if (error != 0) {
504 		perror("vm_set_desc(gdt)");
505 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
506 	}
507 
508 	gdtbase = base;
509 }
510 
511 static void
512 cb_exec(void *arg __unused, uint64_t rip)
513 {
514 	int error;
515 
516 	if (cr3 == 0)
517 		error = vm_setup_freebsd_registers_i386(vcpu, rip, gdtbase,
518 		    rsp);
519 	else
520 		error = vm_setup_freebsd_registers(vcpu, rip, cr3, gdtbase,
521 		    rsp);
522 	if (error) {
523 		perror("vm_setup_freebsd_registers");
524 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
525 	}
526 
527 	cb_exit(NULL, 0);
528 }
529 
530 /*
531  * Misc
532  */
533 
534 static void
535 cb_delay(void *arg __unused, int usec)
536 {
537 
538 	usleep(usec);
539 }
540 
541 static void
542 cb_exit(void *arg __unused, int v)
543 {
544 
545 	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
546 	exit(v);
547 }
548 
549 static void
550 cb_getmem(void *arg __unused, uint64_t *ret_lowmem, uint64_t *ret_highmem)
551 {
552 
553 	*ret_lowmem = vm_get_lowmem_size(ctx);
554 	*ret_highmem = vm_get_highmem_size(ctx);
555 }
556 
557 struct env {
558 	char *str;	/* name=value */
559 	SLIST_ENTRY(env) next;
560 };
561 
562 static SLIST_HEAD(envhead, env) envhead;
563 
564 static void
565 addenv(const char *str)
566 {
567 	struct env *env;
568 
569 	env = malloc(sizeof(struct env));
570 	if (env == NULL)
571 		err(EX_OSERR, "malloc");
572 	env->str = strdup(str);
573 	if (env->str == NULL)
574 		err(EX_OSERR, "strdup");
575 	SLIST_INSERT_HEAD(&envhead, env, next);
576 }
577 
578 static char *
579 cb_getenv(void *arg __unused, int num)
580 {
581 	int i;
582 	struct env *env;
583 
584 	i = 0;
585 	SLIST_FOREACH(env, &envhead, next) {
586 		if (i == num)
587 			return (env->str);
588 		i++;
589 	}
590 
591 	return (NULL);
592 }
593 
594 static int
595 cb_vm_set_register(void *arg __unused, int vcpuid, int reg, uint64_t val)
596 {
597 
598 	assert(vcpuid == BSP);
599 	return (vm_set_register(vcpu, reg, val));
600 }
601 
602 static int
603 cb_vm_set_desc(void *arg __unused, int vcpuid, int reg, uint64_t base,
604     u_int limit, u_int access)
605 {
606 
607 	assert(vcpuid == BSP);
608 	return (vm_set_desc(vcpu, reg, base, limit, access));
609 }
610 
611 static void
612 cb_swap_interpreter(void *arg __unused, const char *interp_req)
613 {
614 
615 	/*
616 	 * If the user specified a loader but we detected a mismatch, we should
617 	 * not try to pivot to a different loader on them.
618 	 */
619 	free(loader);
620 	if (explicit_loader_fd != -1) {
621 		perror("requested loader interpreter does not match guest userboot");
622 		cb_exit(NULL, 1);
623 	}
624 	if (interp_req == NULL || *interp_req == '\0') {
625 		perror("guest failed to request an interpreter");
626 		cb_exit(NULL, 1);
627 	}
628 
629 	if (asprintf(&loader, "userboot_%s.so", interp_req) == -1)
630 		err(EX_OSERR, "malloc");
631 	longjmp(jb, 1);
632 }
633 
634 static struct loader_callbacks cb = {
635 	.getc = cb_getc,
636 	.putc = cb_putc,
637 	.poll = cb_poll,
638 
639 	.open = cb_open,
640 	.close = cb_close,
641 	.isdir = cb_isdir,
642 	.read = cb_read,
643 	.readdir = cb_readdir,
644 	.seek = cb_seek,
645 	.stat = cb_stat,
646 
647 	.diskread = cb_diskread,
648 	.diskwrite = cb_diskwrite,
649 	.diskioctl = cb_diskioctl,
650 
651 	.copyin = cb_copyin,
652 	.copyout = cb_copyout,
653 	.setreg = cb_setreg,
654 	.setmsr = cb_setmsr,
655 	.setcr = cb_setcr,
656 	.setgdt = cb_setgdt,
657 	.exec = cb_exec,
658 
659 	.delay = cb_delay,
660 	.exit = cb_exit,
661 	.getmem = cb_getmem,
662 
663 	.getenv = cb_getenv,
664 
665 	/* Version 4 additions */
666 	.vm_set_register = cb_vm_set_register,
667 	.vm_set_desc = cb_vm_set_desc,
668 
669 	/* Version 5 additions */
670 	.swap_interpreter = cb_swap_interpreter,
671 };
672 
673 static int
674 altcons_open(char *path)
675 {
676 	struct stat sb;
677 	int err;
678 	int fd;
679 
680 	/*
681 	 * Allow stdio to be passed in so that the same string
682 	 * can be used for the bhyveload console and bhyve com-port
683 	 * parameters
684 	 */
685 	if (!strcmp(path, "stdio"))
686 		return (0);
687 
688 	err = stat(path, &sb);
689 	if (err == 0) {
690 		if (!S_ISCHR(sb.st_mode))
691 			err = ENOTSUP;
692 		else {
693 			fd = open(path, O_RDWR | O_NONBLOCK);
694 			if (fd < 0)
695 				err = errno;
696 			else
697 				consin_fd = consout_fd = fd;
698 		}
699 	}
700 
701 	return (err);
702 }
703 
704 static int
705 disk_open(char *path)
706 {
707 	int fd;
708 
709 	if (ndisks >= NDISKS)
710 		return (ERANGE);
711 
712 	fd = open(path, O_RDWR);
713 	if (fd < 0)
714 		return (errno);
715 
716 	disk_fd[ndisks] = fd;
717 	ndisks++;
718 
719 	return (0);
720 }
721 
722 static void
723 usage(void)
724 {
725 
726 	fprintf(stderr,
727 	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
728 	    "       %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n",
729 	    progname,
730 	    (int)strlen(progname), "");
731 	exit(1);
732 }
733 
734 static void
735 hostbase_open(const char *base)
736 {
737 
738 	if (hostbase_fd != -1)
739 		close(hostbase_fd);
740 	hostbase_fd = open(base, O_DIRECTORY | O_PATH);
741 	if (hostbase_fd == -1)
742 		err(EX_OSERR, "open");
743 }
744 
745 static void
746 loader_open(int bootfd)
747 {
748 	int fd;
749 
750 	if (loader == NULL) {
751 		loader = strdup("userboot.so");
752 		if (loader == NULL)
753 			err(EX_OSERR, "malloc");
754 	}
755 
756 	assert(bootfd >= 0 || explicit_loader_fd >= 0);
757 	if (explicit_loader_fd >= 0)
758 		fd = explicit_loader_fd;
759 	else
760 		fd = openat(bootfd, loader, O_RDONLY | O_RESOLVE_BENEATH);
761 	if (fd == -1)
762 		err(EX_OSERR, "openat");
763 
764 	loader_hdl = fdlopen(fd, RTLD_LOCAL);
765 	if (!loader_hdl)
766 		errx(EX_OSERR, "dlopen: %s", dlerror());
767 }
768 
769 int
770 main(int argc, char** argv)
771 {
772 	void (*func)(struct loader_callbacks *, void *, int, int);
773 	uint64_t mem_size;
774 	int bootfd, opt, error, memflags, need_reinit;
775 
776 	bootfd = -1;
777 	progname = basename(argv[0]);
778 
779 	memflags = 0;
780 	mem_size = 256 * MB;
781 
782 	consin_fd = STDIN_FILENO;
783 	consout_fd = STDOUT_FILENO;
784 
785 	while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
786 		switch (opt) {
787 		case 'c':
788 			error = altcons_open(optarg);
789 			if (error != 0)
790 				errx(EX_USAGE, "Could not open '%s'", optarg);
791 			break;
792 
793 		case 'd':
794 			error = disk_open(optarg);
795 			if (error != 0)
796 				errx(EX_USAGE, "Could not open '%s'", optarg);
797 			break;
798 
799 		case 'e':
800 			addenv(optarg);
801 			break;
802 
803 		case 'h':
804 			hostbase_open(optarg);
805 			break;
806 
807 		case 'l':
808 			if (loader != NULL)
809 				errx(EX_USAGE, "-l can only be given once");
810 			loader = strdup(optarg);
811 			if (loader == NULL)
812 				err(EX_OSERR, "malloc");
813 			explicit_loader_fd = open(loader, O_RDONLY);
814 			if (explicit_loader_fd == -1)
815 				err(EX_OSERR, "%s", loader);
816 			break;
817 
818 		case 'm':
819 			error = vm_parse_memsize(optarg, &mem_size);
820 			if (error != 0)
821 				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
822 			break;
823 		case 'C':
824 			memflags |= VM_MEM_F_INCORE;
825 			break;
826 		case 'S':
827 			memflags |= VM_MEM_F_WIRED;
828 			break;
829 		case '?':
830 			usage();
831 		}
832 	}
833 
834 	argc -= optind;
835 	argv += optind;
836 
837 	if (argc != 1)
838 		usage();
839 
840 	vmname = argv[0];
841 
842 	need_reinit = 0;
843 	error = vm_create(vmname);
844 	if (error) {
845 		if (errno != EEXIST) {
846 			perror("vm_create");
847 			exit(1);
848 		}
849 		need_reinit = 1;
850 	}
851 
852 	ctx = vm_open(vmname);
853 	if (ctx == NULL) {
854 		perror("vm_open");
855 		exit(1);
856 	}
857 
858 	/*
859 	 * If we weren't given an explicit loader to use, we need to support the
860 	 * guest requesting a different one.
861 	 */
862 	if (explicit_loader_fd == -1) {
863 		bootfd = open("/boot", O_DIRECTORY | O_PATH);
864 		if (bootfd == -1) {
865 			perror("open");
866 			exit(1);
867 		}
868 	}
869 
870 	vcpu = vm_vcpu_open(ctx, BSP);
871 
872 	caph_cache_catpages();
873 	if (caph_enter() < 0) {
874 		perror("caph_enter");
875 		exit(1);
876 	}
877 
878 	/*
879 	 * setjmp in the case the guest wants to swap out interpreter,
880 	 * cb_swap_interpreter will swap out loader as appropriate and set
881 	 * need_reinit so that we end up in a clean state once again.
882 	 */
883 	if (setjmp(jb) != 0) {
884 		dlclose(loader_hdl);
885 		loader_hdl = NULL;
886 
887 		need_reinit = 1;
888 	}
889 
890 	if (need_reinit) {
891 		error = vm_reinit(ctx);
892 		if (error) {
893 			perror("vm_reinit");
894 			exit(1);
895 		}
896 	}
897 
898 	vm_set_memflags(ctx, memflags);
899 	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
900 	if (error) {
901 		perror("vm_setup_memory");
902 		exit(1);
903 	}
904 
905 	loader_open(bootfd);
906 	func = dlsym(loader_hdl, "loader_main");
907 	if (!func) {
908 		printf("%s\n", dlerror());
909 		free(loader);
910 		return (1);
911 	}
912 
913 	tcgetattr(consout_fd, &term);
914 	oldterm = term;
915 	cfmakeraw(&term);
916 	term.c_cflag |= CLOCAL;
917 
918 	tcsetattr(consout_fd, TCSAFLUSH, &term);
919 
920 	addenv("smbios.bios.vendor=BHYVE");
921 	addenv("boot_serial=1");
922 
923 	func(&cb, NULL, USERBOOT_VERSION_5, ndisks);
924 
925 	free(loader);
926 	return (0);
927 }
928