xref: /freebsd/usr.sbin/bhyveload/bhyveload.c (revision f5e9c916afed4a948fe5c03bfaee038d165e12ab)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*-
30  * Copyright (c) 2011 Google, Inc.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  *
54  * $FreeBSD$
55  */
56 
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
59 
60 #include <sys/ioctl.h>
61 #include <sys/stat.h>
62 #include <sys/disk.h>
63 #include <sys/queue.h>
64 
65 #include <machine/specialreg.h>
66 #include <machine/vmm.h>
67 
68 #include <dirent.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <err.h>
72 #include <fcntl.h>
73 #include <getopt.h>
74 #include <libgen.h>
75 #include <limits.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <sysexits.h>
80 #include <termios.h>
81 #include <unistd.h>
82 
83 #include <vmmapi.h>
84 
85 #include "userboot.h"
86 
87 #define	MB	(1024 * 1024UL)
88 #define	GB	(1024 * 1024 * 1024UL)
89 #define	BSP	0
90 
91 #define	NDISKS	32
92 
93 static char *host_base;
94 static struct termios term, oldterm;
95 static int disk_fd[NDISKS];
96 static int ndisks;
97 static int consin_fd, consout_fd;
98 
99 static char *vmname, *progname;
100 static struct vmctx *ctx;
101 
102 static uint64_t gdtbase, cr3, rsp;
103 
104 static void cb_exit(void *arg, int v);
105 
106 /*
107  * Console i/o callbacks
108  */
109 
110 static void
111 cb_putc(void *arg, int ch)
112 {
113 	char c = ch;
114 
115 	(void) write(consout_fd, &c, 1);
116 }
117 
118 static int
119 cb_getc(void *arg)
120 {
121 	char c;
122 
123 	if (read(consin_fd, &c, 1) == 1)
124 		return (c);
125 	return (-1);
126 }
127 
128 static int
129 cb_poll(void *arg)
130 {
131 	int n;
132 
133 	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
134 		return (n > 0);
135 	return (0);
136 }
137 
138 /*
139  * Host filesystem i/o callbacks
140  */
141 
142 struct cb_file {
143 	int cf_isdir;
144 	size_t cf_size;
145 	struct stat cf_stat;
146 	union {
147 		int fd;
148 		DIR *dir;
149 	} cf_u;
150 };
151 
152 static int
153 cb_open(void *arg, const char *filename, void **hp)
154 {
155 	struct stat st;
156 	struct cb_file *cf;
157 	char path[PATH_MAX];
158 
159 	if (!host_base)
160 		return (ENOENT);
161 
162 	strlcpy(path, host_base, PATH_MAX);
163 	if (path[strlen(path) - 1] == '/')
164 		path[strlen(path) - 1] = 0;
165 	strlcat(path, filename, PATH_MAX);
166 	cf = malloc(sizeof(struct cb_file));
167 	if (stat(path, &cf->cf_stat) < 0) {
168 		free(cf);
169 		return (errno);
170 	}
171 
172 	cf->cf_size = st.st_size;
173 	if (S_ISDIR(cf->cf_stat.st_mode)) {
174 		cf->cf_isdir = 1;
175 		cf->cf_u.dir = opendir(path);
176 		if (!cf->cf_u.dir)
177 			goto out;
178 		*hp = cf;
179 		return (0);
180 	}
181 	if (S_ISREG(cf->cf_stat.st_mode)) {
182 		cf->cf_isdir = 0;
183 		cf->cf_u.fd = open(path, O_RDONLY);
184 		if (cf->cf_u.fd < 0)
185 			goto out;
186 		*hp = cf;
187 		return (0);
188 	}
189 
190 out:
191 	free(cf);
192 	return (EINVAL);
193 }
194 
195 static int
196 cb_close(void *arg, void *h)
197 {
198 	struct cb_file *cf = h;
199 
200 	if (cf->cf_isdir)
201 		closedir(cf->cf_u.dir);
202 	else
203 		close(cf->cf_u.fd);
204 	free(cf);
205 
206 	return (0);
207 }
208 
209 static int
210 cb_isdir(void *arg, void *h)
211 {
212 	struct cb_file *cf = h;
213 
214 	return (cf->cf_isdir);
215 }
216 
217 static int
218 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
219 {
220 	struct cb_file *cf = h;
221 	ssize_t sz;
222 
223 	if (cf->cf_isdir)
224 		return (EINVAL);
225 	sz = read(cf->cf_u.fd, buf, size);
226 	if (sz < 0)
227 		return (EINVAL);
228 	*resid = size - sz;
229 	return (0);
230 }
231 
232 static int
233 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
234 	   size_t *namelen_return, char *name)
235 {
236 	struct cb_file *cf = h;
237 	struct dirent *dp;
238 
239 	if (!cf->cf_isdir)
240 		return (EINVAL);
241 
242 	dp = readdir(cf->cf_u.dir);
243 	if (!dp)
244 		return (ENOENT);
245 
246 	/*
247 	 * Note: d_namlen is in the range 0..255 and therefore less
248 	 * than PATH_MAX so we don't need to test before copying.
249 	 */
250 	*fileno_return = dp->d_fileno;
251 	*type_return = dp->d_type;
252 	*namelen_return = dp->d_namlen;
253 	memcpy(name, dp->d_name, dp->d_namlen);
254 	name[dp->d_namlen] = 0;
255 
256 	return (0);
257 }
258 
259 static int
260 cb_seek(void *arg, void *h, uint64_t offset, int whence)
261 {
262 	struct cb_file *cf = h;
263 
264 	if (cf->cf_isdir)
265 		return (EINVAL);
266 	if (lseek(cf->cf_u.fd, offset, whence) < 0)
267 		return (errno);
268 	return (0);
269 }
270 
271 static int
272 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
273 {
274 	struct cb_file *cf = h;
275 
276 	*mode = cf->cf_stat.st_mode;
277 	*uid = cf->cf_stat.st_uid;
278 	*gid = cf->cf_stat.st_gid;
279 	*size = cf->cf_stat.st_size;
280 	return (0);
281 }
282 
283 /*
284  * Disk image i/o callbacks
285  */
286 
287 static int
288 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
289 	    size_t *resid)
290 {
291 	ssize_t n;
292 
293 	if (unit < 0 || unit >= ndisks )
294 		return (EIO);
295 	n = pread(disk_fd[unit], to, size, from);
296 	if (n < 0)
297 		return (errno);
298 	*resid = size - n;
299 	return (0);
300 }
301 
302 static int
303 cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
304 {
305 	struct stat sb;
306 
307 	if (unit < 0 || unit >= ndisks)
308 		return (EBADF);
309 
310 	switch (cmd) {
311 	case DIOCGSECTORSIZE:
312 		*(u_int *)data = 512;
313 		break;
314 	case DIOCGMEDIASIZE:
315 		if (fstat(disk_fd[unit], &sb) == 0)
316 			*(off_t *)data = sb.st_size;
317 		else
318 			return (ENOTTY);
319 		break;
320 	default:
321 		return (ENOTTY);
322 	}
323 
324 	return (0);
325 }
326 
327 /*
328  * Guest virtual machine i/o callbacks
329  */
330 static int
331 cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
332 {
333 	char *ptr;
334 
335 	to &= 0x7fffffff;
336 
337 	ptr = vm_map_gpa(ctx, to, size);
338 	if (ptr == NULL)
339 		return (EFAULT);
340 
341 	memcpy(ptr, from, size);
342 	return (0);
343 }
344 
345 static int
346 cb_copyout(void *arg, uint64_t from, void *to, size_t size)
347 {
348 	char *ptr;
349 
350 	from &= 0x7fffffff;
351 
352 	ptr = vm_map_gpa(ctx, from, size);
353 	if (ptr == NULL)
354 		return (EFAULT);
355 
356 	memcpy(to, ptr, size);
357 	return (0);
358 }
359 
360 static void
361 cb_setreg(void *arg, int r, uint64_t v)
362 {
363 	int error;
364 	enum vm_reg_name vmreg;
365 
366 	vmreg = VM_REG_LAST;
367 
368 	switch (r) {
369 	case 4:
370 		vmreg = VM_REG_GUEST_RSP;
371 		rsp = v;
372 		break;
373 	default:
374 		break;
375 	}
376 
377 	if (vmreg == VM_REG_LAST) {
378 		printf("test_setreg(%d): not implemented\n", r);
379 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
380 	}
381 
382 	error = vm_set_register(ctx, BSP, vmreg, v);
383 	if (error) {
384 		perror("vm_set_register");
385 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
386 	}
387 }
388 
389 static void
390 cb_setmsr(void *arg, int r, uint64_t v)
391 {
392 	int error;
393 	enum vm_reg_name vmreg;
394 
395 	vmreg = VM_REG_LAST;
396 
397 	switch (r) {
398 	case MSR_EFER:
399 		vmreg = VM_REG_GUEST_EFER;
400 		break;
401 	default:
402 		break;
403 	}
404 
405 	if (vmreg == VM_REG_LAST) {
406 		printf("test_setmsr(%d): not implemented\n", r);
407 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
408 	}
409 
410 	error = vm_set_register(ctx, BSP, vmreg, v);
411 	if (error) {
412 		perror("vm_set_msr");
413 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
414 	}
415 }
416 
417 static void
418 cb_setcr(void *arg, int r, uint64_t v)
419 {
420 	int error;
421 	enum vm_reg_name vmreg;
422 
423 	vmreg = VM_REG_LAST;
424 
425 	switch (r) {
426 	case 0:
427 		vmreg = VM_REG_GUEST_CR0;
428 		break;
429 	case 3:
430 		vmreg = VM_REG_GUEST_CR3;
431 		cr3 = v;
432 		break;
433 	case 4:
434 		vmreg = VM_REG_GUEST_CR4;
435 		break;
436 	default:
437 		break;
438 	}
439 
440 	if (vmreg == VM_REG_LAST) {
441 		printf("test_setcr(%d): not implemented\n", r);
442 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
443 	}
444 
445 	error = vm_set_register(ctx, BSP, vmreg, v);
446 	if (error) {
447 		perror("vm_set_cr");
448 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
449 	}
450 }
451 
452 static void
453 cb_setgdt(void *arg, uint64_t base, size_t size)
454 {
455 	int error;
456 
457 	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
458 	if (error != 0) {
459 		perror("vm_set_desc(gdt)");
460 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
461 	}
462 
463 	gdtbase = base;
464 }
465 
466 static void
467 cb_exec(void *arg, uint64_t rip)
468 {
469 	int error;
470 
471 	if (cr3 == 0)
472 		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
473 		    rsp);
474 	else
475 		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
476 		    rsp);
477 	if (error) {
478 		perror("vm_setup_freebsd_registers");
479 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
480 	}
481 
482 	cb_exit(NULL, 0);
483 }
484 
485 /*
486  * Misc
487  */
488 
489 static void
490 cb_delay(void *arg, int usec)
491 {
492 
493 	usleep(usec);
494 }
495 
496 static void
497 cb_exit(void *arg, int v)
498 {
499 
500 	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
501 	exit(v);
502 }
503 
504 static void
505 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
506 {
507 
508 	*ret_lowmem = vm_get_lowmem_size(ctx);
509 	*ret_highmem = vm_get_highmem_size(ctx);
510 }
511 
512 struct env {
513 	const char *str;	/* name=value */
514 	SLIST_ENTRY(env) next;
515 };
516 
517 static SLIST_HEAD(envhead, env) envhead;
518 
519 static void
520 addenv(const char *str)
521 {
522 	struct env *env;
523 
524 	env = malloc(sizeof(struct env));
525 	env->str = str;
526 	SLIST_INSERT_HEAD(&envhead, env, next);
527 }
528 
529 static const char *
530 cb_getenv(void *arg, int num)
531 {
532 	int i;
533 	struct env *env;
534 
535 	i = 0;
536 	SLIST_FOREACH(env, &envhead, next) {
537 		if (i == num)
538 			return (env->str);
539 		i++;
540 	}
541 
542 	return (NULL);
543 }
544 
545 static int
546 cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val)
547 {
548 
549 	return (vm_set_register(ctx, vcpu, reg, val));
550 }
551 
552 static int
553 cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit,
554     u_int access)
555 {
556 
557 	return (vm_set_desc(ctx, vcpu, reg, base, limit, access));
558 }
559 
560 static struct loader_callbacks cb = {
561 	.getc = cb_getc,
562 	.putc = cb_putc,
563 	.poll = cb_poll,
564 
565 	.open = cb_open,
566 	.close = cb_close,
567 	.isdir = cb_isdir,
568 	.read = cb_read,
569 	.readdir = cb_readdir,
570 	.seek = cb_seek,
571 	.stat = cb_stat,
572 
573 	.diskread = cb_diskread,
574 	.diskioctl = cb_diskioctl,
575 
576 	.copyin = cb_copyin,
577 	.copyout = cb_copyout,
578 	.setreg = cb_setreg,
579 	.setmsr = cb_setmsr,
580 	.setcr = cb_setcr,
581 	.setgdt = cb_setgdt,
582 	.exec = cb_exec,
583 
584 	.delay = cb_delay,
585 	.exit = cb_exit,
586 	.getmem = cb_getmem,
587 
588 	.getenv = cb_getenv,
589 
590 	/* Version 4 additions */
591 	.vm_set_register = cb_vm_set_register,
592 	.vm_set_desc = cb_vm_set_desc,
593 };
594 
595 static int
596 altcons_open(char *path)
597 {
598 	struct stat sb;
599 	int err;
600 	int fd;
601 
602 	/*
603 	 * Allow stdio to be passed in so that the same string
604 	 * can be used for the bhyveload console and bhyve com-port
605 	 * parameters
606 	 */
607 	if (!strcmp(path, "stdio"))
608 		return (0);
609 
610 	err = stat(path, &sb);
611 	if (err == 0) {
612 		if (!S_ISCHR(sb.st_mode))
613 			err = ENOTSUP;
614 		else {
615 			fd = open(path, O_RDWR | O_NONBLOCK);
616 			if (fd < 0)
617 				err = errno;
618 			else
619 				consin_fd = consout_fd = fd;
620 		}
621 	}
622 
623 	return (err);
624 }
625 
626 static int
627 disk_open(char *path)
628 {
629 	int err, fd;
630 
631 	if (ndisks >= NDISKS)
632 		return (ERANGE);
633 
634 	err = 0;
635 	fd = open(path, O_RDONLY);
636 
637 	if (fd > 0) {
638 		disk_fd[ndisks] = fd;
639 		ndisks++;
640 	} else
641 		err = errno;
642 
643 	return (err);
644 }
645 
646 static void
647 usage(void)
648 {
649 
650 	fprintf(stderr,
651 	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
652 	    "       %*s [-h <host-path>] [-m mem-size] <vmname>\n",
653 	    progname,
654 	    (int)strlen(progname), "");
655 	exit(1);
656 }
657 
658 int
659 main(int argc, char** argv)
660 {
661 	char *loader;
662 	void *h;
663 	void (*func)(struct loader_callbacks *, void *, int, int);
664 	uint64_t mem_size;
665 	int opt, error, need_reinit, memflags;
666 
667 	progname = basename(argv[0]);
668 
669 	loader = NULL;
670 
671 	memflags = 0;
672 	mem_size = 256 * MB;
673 
674 	consin_fd = STDIN_FILENO;
675 	consout_fd = STDOUT_FILENO;
676 
677 	while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
678 		switch (opt) {
679 		case 'c':
680 			error = altcons_open(optarg);
681 			if (error != 0)
682 				errx(EX_USAGE, "Could not open '%s'", optarg);
683 			break;
684 
685 		case 'd':
686 			error = disk_open(optarg);
687 			if (error != 0)
688 				errx(EX_USAGE, "Could not open '%s'", optarg);
689 			break;
690 
691 		case 'e':
692 			addenv(optarg);
693 			break;
694 
695 		case 'h':
696 			host_base = optarg;
697 			break;
698 
699 		case 'l':
700 			if (loader != NULL)
701 				errx(EX_USAGE, "-l can only be given once");
702 			loader = strdup(optarg);
703 			if (loader == NULL)
704 				err(EX_OSERR, "malloc");
705 			break;
706 
707 		case 'm':
708 			error = vm_parse_memsize(optarg, &mem_size);
709 			if (error != 0)
710 				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
711 			break;
712 		case 'C':
713 			memflags |= VM_MEM_F_INCORE;
714 			break;
715 		case 'S':
716 			memflags |= VM_MEM_F_WIRED;
717 			break;
718 		case '?':
719 			usage();
720 		}
721 	}
722 
723 	argc -= optind;
724 	argv += optind;
725 
726 	if (argc != 1)
727 		usage();
728 
729 	vmname = argv[0];
730 
731 	need_reinit = 0;
732 	error = vm_create(vmname);
733 	if (error) {
734 		if (errno != EEXIST) {
735 			perror("vm_create");
736 			exit(1);
737 		}
738 		need_reinit = 1;
739 	}
740 
741 	ctx = vm_open(vmname);
742 	if (ctx == NULL) {
743 		perror("vm_open");
744 		exit(1);
745 	}
746 
747 	if (need_reinit) {
748 		error = vm_reinit(ctx);
749 		if (error) {
750 			perror("vm_reinit");
751 			exit(1);
752 		}
753 	}
754 
755 	vm_set_memflags(ctx, memflags);
756 	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
757 	if (error) {
758 		perror("vm_setup_memory");
759 		exit(1);
760 	}
761 
762 	if (loader == NULL) {
763 		loader = strdup("/boot/userboot.so");
764 		if (loader == NULL)
765 			err(EX_OSERR, "malloc");
766 	}
767 	h = dlopen(loader, RTLD_LOCAL);
768 	if (!h) {
769 		printf("%s\n", dlerror());
770 		free(loader);
771 		return (1);
772 	}
773 	func = dlsym(h, "loader_main");
774 	if (!func) {
775 		printf("%s\n", dlerror());
776 		free(loader);
777 		return (1);
778 	}
779 
780 	tcgetattr(consout_fd, &term);
781 	oldterm = term;
782 	cfmakeraw(&term);
783 	term.c_cflag |= CLOCAL;
784 
785 	tcsetattr(consout_fd, TCSAFLUSH, &term);
786 
787 	addenv("smbios.bios.vendor=BHYVE");
788 	addenv("boot_serial=1");
789 
790 	func(&cb, NULL, USERBOOT_VERSION_4, ndisks);
791 
792 	free(loader);
793 	return (0);
794 }
795