xref: /freebsd/usr.sbin/bhyveload/bhyveload.c (revision aa24f48b361effe51163877d84f1b70d32b77e04)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*-
30  * Copyright (c) 2011 Google, Inc.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  *
54  * $FreeBSD$
55  */
56 
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
59 
60 #include <sys/ioctl.h>
61 #include <sys/stat.h>
62 #include <sys/disk.h>
63 #include <sys/queue.h>
64 
65 #include <machine/specialreg.h>
66 #include <machine/vmm.h>
67 
68 #include <dirent.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <err.h>
72 #include <fcntl.h>
73 #include <getopt.h>
74 #include <libgen.h>
75 #include <limits.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <sysexits.h>
80 #include <termios.h>
81 #include <unistd.h>
82 
83 #include <vmmapi.h>
84 
85 #include "userboot.h"
86 
87 #define	MB	(1024 * 1024UL)
88 #define	GB	(1024 * 1024 * 1024UL)
89 #define	BSP	0
90 
91 #define	NDISKS	32
92 
93 static char *host_base;
94 static struct termios term, oldterm;
95 static int disk_fd[NDISKS];
96 static int ndisks;
97 static int consin_fd, consout_fd;
98 
99 static char *vmname, *progname;
100 static struct vmctx *ctx;
101 
102 static uint64_t gdtbase, cr3, rsp;
103 
104 static void cb_exit(void *arg, int v);
105 
106 /*
107  * Console i/o callbacks
108  */
109 
110 static void
111 cb_putc(void *arg, int ch)
112 {
113 	char c = ch;
114 
115 	(void) write(consout_fd, &c, 1);
116 }
117 
118 static int
119 cb_getc(void *arg)
120 {
121 	char c;
122 
123 	if (read(consin_fd, &c, 1) == 1)
124 		return (c);
125 	return (-1);
126 }
127 
128 static int
129 cb_poll(void *arg)
130 {
131 	int n;
132 
133 	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
134 		return (n > 0);
135 	return (0);
136 }
137 
138 /*
139  * Host filesystem i/o callbacks
140  */
141 
142 struct cb_file {
143 	int cf_isdir;
144 	size_t cf_size;
145 	struct stat cf_stat;
146 	union {
147 		int fd;
148 		DIR *dir;
149 	} cf_u;
150 };
151 
152 static int
153 cb_open(void *arg, const char *filename, void **hp)
154 {
155 	struct cb_file *cf;
156 	char path[PATH_MAX];
157 
158 	if (!host_base)
159 		return (ENOENT);
160 
161 	strlcpy(path, host_base, PATH_MAX);
162 	if (path[strlen(path) - 1] == '/')
163 		path[strlen(path) - 1] = 0;
164 	strlcat(path, filename, PATH_MAX);
165 	cf = malloc(sizeof(struct cb_file));
166 	if (stat(path, &cf->cf_stat) < 0) {
167 		free(cf);
168 		return (errno);
169 	}
170 
171 	cf->cf_size = cf->cf_stat.st_size;
172 	if (S_ISDIR(cf->cf_stat.st_mode)) {
173 		cf->cf_isdir = 1;
174 		cf->cf_u.dir = opendir(path);
175 		if (!cf->cf_u.dir)
176 			goto out;
177 		*hp = cf;
178 		return (0);
179 	}
180 	if (S_ISREG(cf->cf_stat.st_mode)) {
181 		cf->cf_isdir = 0;
182 		cf->cf_u.fd = open(path, O_RDONLY);
183 		if (cf->cf_u.fd < 0)
184 			goto out;
185 		*hp = cf;
186 		return (0);
187 	}
188 
189 out:
190 	free(cf);
191 	return (EINVAL);
192 }
193 
194 static int
195 cb_close(void *arg, void *h)
196 {
197 	struct cb_file *cf = h;
198 
199 	if (cf->cf_isdir)
200 		closedir(cf->cf_u.dir);
201 	else
202 		close(cf->cf_u.fd);
203 	free(cf);
204 
205 	return (0);
206 }
207 
208 static int
209 cb_isdir(void *arg, void *h)
210 {
211 	struct cb_file *cf = h;
212 
213 	return (cf->cf_isdir);
214 }
215 
216 static int
217 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
218 {
219 	struct cb_file *cf = h;
220 	ssize_t sz;
221 
222 	if (cf->cf_isdir)
223 		return (EINVAL);
224 	sz = read(cf->cf_u.fd, buf, size);
225 	if (sz < 0)
226 		return (EINVAL);
227 	*resid = size - sz;
228 	return (0);
229 }
230 
231 static int
232 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
233 	   size_t *namelen_return, char *name)
234 {
235 	struct cb_file *cf = h;
236 	struct dirent *dp;
237 
238 	if (!cf->cf_isdir)
239 		return (EINVAL);
240 
241 	dp = readdir(cf->cf_u.dir);
242 	if (!dp)
243 		return (ENOENT);
244 
245 	/*
246 	 * Note: d_namlen is in the range 0..255 and therefore less
247 	 * than PATH_MAX so we don't need to test before copying.
248 	 */
249 	*fileno_return = dp->d_fileno;
250 	*type_return = dp->d_type;
251 	*namelen_return = dp->d_namlen;
252 	memcpy(name, dp->d_name, dp->d_namlen);
253 	name[dp->d_namlen] = 0;
254 
255 	return (0);
256 }
257 
258 static int
259 cb_seek(void *arg, void *h, uint64_t offset, int whence)
260 {
261 	struct cb_file *cf = h;
262 
263 	if (cf->cf_isdir)
264 		return (EINVAL);
265 	if (lseek(cf->cf_u.fd, offset, whence) < 0)
266 		return (errno);
267 	return (0);
268 }
269 
270 static int
271 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
272 {
273 	struct cb_file *cf = h;
274 
275 	*mode = cf->cf_stat.st_mode;
276 	*uid = cf->cf_stat.st_uid;
277 	*gid = cf->cf_stat.st_gid;
278 	*size = cf->cf_stat.st_size;
279 	return (0);
280 }
281 
282 /*
283  * Disk image i/o callbacks
284  */
285 
286 static int
287 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
288 	    size_t *resid)
289 {
290 	ssize_t n;
291 
292 	if (unit < 0 || unit >= ndisks )
293 		return (EIO);
294 	n = pread(disk_fd[unit], to, size, from);
295 	if (n < 0)
296 		return (errno);
297 	*resid = size - n;
298 	return (0);
299 }
300 
301 static int
302 cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
303 {
304 	struct stat sb;
305 
306 	if (unit < 0 || unit >= ndisks)
307 		return (EBADF);
308 
309 	switch (cmd) {
310 	case DIOCGSECTORSIZE:
311 		*(u_int *)data = 512;
312 		break;
313 	case DIOCGMEDIASIZE:
314 		if (fstat(disk_fd[unit], &sb) != 0)
315 			return (ENOTTY);
316 		if (S_ISCHR(sb.st_mode) &&
317 		    ioctl(disk_fd[unit], DIOCGMEDIASIZE, &sb.st_size) != 0)
318 				return (ENOTTY);
319 		*(off_t *)data = sb.st_size;
320 		break;
321 	default:
322 		return (ENOTTY);
323 	}
324 
325 	return (0);
326 }
327 
328 /*
329  * Guest virtual machine i/o callbacks
330  */
331 static int
332 cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
333 {
334 	char *ptr;
335 
336 	to &= 0x7fffffff;
337 
338 	ptr = vm_map_gpa(ctx, to, size);
339 	if (ptr == NULL)
340 		return (EFAULT);
341 
342 	memcpy(ptr, from, size);
343 	return (0);
344 }
345 
346 static int
347 cb_copyout(void *arg, uint64_t from, void *to, size_t size)
348 {
349 	char *ptr;
350 
351 	from &= 0x7fffffff;
352 
353 	ptr = vm_map_gpa(ctx, from, size);
354 	if (ptr == NULL)
355 		return (EFAULT);
356 
357 	memcpy(to, ptr, size);
358 	return (0);
359 }
360 
361 static void
362 cb_setreg(void *arg, int r, uint64_t v)
363 {
364 	int error;
365 	enum vm_reg_name vmreg;
366 
367 	vmreg = VM_REG_LAST;
368 
369 	switch (r) {
370 	case 4:
371 		vmreg = VM_REG_GUEST_RSP;
372 		rsp = v;
373 		break;
374 	default:
375 		break;
376 	}
377 
378 	if (vmreg == VM_REG_LAST) {
379 		printf("test_setreg(%d): not implemented\n", r);
380 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
381 	}
382 
383 	error = vm_set_register(ctx, BSP, vmreg, v);
384 	if (error) {
385 		perror("vm_set_register");
386 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
387 	}
388 }
389 
390 static void
391 cb_setmsr(void *arg, int r, uint64_t v)
392 {
393 	int error;
394 	enum vm_reg_name vmreg;
395 
396 	vmreg = VM_REG_LAST;
397 
398 	switch (r) {
399 	case MSR_EFER:
400 		vmreg = VM_REG_GUEST_EFER;
401 		break;
402 	default:
403 		break;
404 	}
405 
406 	if (vmreg == VM_REG_LAST) {
407 		printf("test_setmsr(%d): not implemented\n", r);
408 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
409 	}
410 
411 	error = vm_set_register(ctx, BSP, vmreg, v);
412 	if (error) {
413 		perror("vm_set_msr");
414 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
415 	}
416 }
417 
418 static void
419 cb_setcr(void *arg, int r, uint64_t v)
420 {
421 	int error;
422 	enum vm_reg_name vmreg;
423 
424 	vmreg = VM_REG_LAST;
425 
426 	switch (r) {
427 	case 0:
428 		vmreg = VM_REG_GUEST_CR0;
429 		break;
430 	case 3:
431 		vmreg = VM_REG_GUEST_CR3;
432 		cr3 = v;
433 		break;
434 	case 4:
435 		vmreg = VM_REG_GUEST_CR4;
436 		break;
437 	default:
438 		break;
439 	}
440 
441 	if (vmreg == VM_REG_LAST) {
442 		printf("test_setcr(%d): not implemented\n", r);
443 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
444 	}
445 
446 	error = vm_set_register(ctx, BSP, vmreg, v);
447 	if (error) {
448 		perror("vm_set_cr");
449 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
450 	}
451 }
452 
453 static void
454 cb_setgdt(void *arg, uint64_t base, size_t size)
455 {
456 	int error;
457 
458 	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
459 	if (error != 0) {
460 		perror("vm_set_desc(gdt)");
461 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
462 	}
463 
464 	gdtbase = base;
465 }
466 
467 static void
468 cb_exec(void *arg, uint64_t rip)
469 {
470 	int error;
471 
472 	if (cr3 == 0)
473 		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
474 		    rsp);
475 	else
476 		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
477 		    rsp);
478 	if (error) {
479 		perror("vm_setup_freebsd_registers");
480 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
481 	}
482 
483 	cb_exit(NULL, 0);
484 }
485 
486 /*
487  * Misc
488  */
489 
490 static void
491 cb_delay(void *arg, int usec)
492 {
493 
494 	usleep(usec);
495 }
496 
497 static void
498 cb_exit(void *arg, int v)
499 {
500 
501 	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
502 	exit(v);
503 }
504 
505 static void
506 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
507 {
508 
509 	*ret_lowmem = vm_get_lowmem_size(ctx);
510 	*ret_highmem = vm_get_highmem_size(ctx);
511 }
512 
513 struct env {
514 	const char *str;	/* name=value */
515 	SLIST_ENTRY(env) next;
516 };
517 
518 static SLIST_HEAD(envhead, env) envhead;
519 
520 static void
521 addenv(const char *str)
522 {
523 	struct env *env;
524 
525 	env = malloc(sizeof(struct env));
526 	env->str = str;
527 	SLIST_INSERT_HEAD(&envhead, env, next);
528 }
529 
530 static const char *
531 cb_getenv(void *arg, int num)
532 {
533 	int i;
534 	struct env *env;
535 
536 	i = 0;
537 	SLIST_FOREACH(env, &envhead, next) {
538 		if (i == num)
539 			return (env->str);
540 		i++;
541 	}
542 
543 	return (NULL);
544 }
545 
546 static int
547 cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val)
548 {
549 
550 	return (vm_set_register(ctx, vcpu, reg, val));
551 }
552 
553 static int
554 cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit,
555     u_int access)
556 {
557 
558 	return (vm_set_desc(ctx, vcpu, reg, base, limit, access));
559 }
560 
561 static struct loader_callbacks cb = {
562 	.getc = cb_getc,
563 	.putc = cb_putc,
564 	.poll = cb_poll,
565 
566 	.open = cb_open,
567 	.close = cb_close,
568 	.isdir = cb_isdir,
569 	.read = cb_read,
570 	.readdir = cb_readdir,
571 	.seek = cb_seek,
572 	.stat = cb_stat,
573 
574 	.diskread = cb_diskread,
575 	.diskioctl = cb_diskioctl,
576 
577 	.copyin = cb_copyin,
578 	.copyout = cb_copyout,
579 	.setreg = cb_setreg,
580 	.setmsr = cb_setmsr,
581 	.setcr = cb_setcr,
582 	.setgdt = cb_setgdt,
583 	.exec = cb_exec,
584 
585 	.delay = cb_delay,
586 	.exit = cb_exit,
587 	.getmem = cb_getmem,
588 
589 	.getenv = cb_getenv,
590 
591 	/* Version 4 additions */
592 	.vm_set_register = cb_vm_set_register,
593 	.vm_set_desc = cb_vm_set_desc,
594 };
595 
596 static int
597 altcons_open(char *path)
598 {
599 	struct stat sb;
600 	int err;
601 	int fd;
602 
603 	/*
604 	 * Allow stdio to be passed in so that the same string
605 	 * can be used for the bhyveload console and bhyve com-port
606 	 * parameters
607 	 */
608 	if (!strcmp(path, "stdio"))
609 		return (0);
610 
611 	err = stat(path, &sb);
612 	if (err == 0) {
613 		if (!S_ISCHR(sb.st_mode))
614 			err = ENOTSUP;
615 		else {
616 			fd = open(path, O_RDWR | O_NONBLOCK);
617 			if (fd < 0)
618 				err = errno;
619 			else
620 				consin_fd = consout_fd = fd;
621 		}
622 	}
623 
624 	return (err);
625 }
626 
627 static int
628 disk_open(char *path)
629 {
630 	int err, fd;
631 
632 	if (ndisks >= NDISKS)
633 		return (ERANGE);
634 
635 	err = 0;
636 	fd = open(path, O_RDONLY);
637 
638 	if (fd > 0) {
639 		disk_fd[ndisks] = fd;
640 		ndisks++;
641 	} else
642 		err = errno;
643 
644 	return (err);
645 }
646 
647 static void
648 usage(void)
649 {
650 
651 	fprintf(stderr,
652 	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
653 	    "       %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n",
654 	    progname,
655 	    (int)strlen(progname), "");
656 	exit(1);
657 }
658 
659 int
660 main(int argc, char** argv)
661 {
662 	char *loader;
663 	void *h;
664 	void (*func)(struct loader_callbacks *, void *, int, int);
665 	uint64_t mem_size;
666 	int opt, error, need_reinit, memflags;
667 
668 	progname = basename(argv[0]);
669 
670 	loader = NULL;
671 
672 	memflags = 0;
673 	mem_size = 256 * MB;
674 
675 	consin_fd = STDIN_FILENO;
676 	consout_fd = STDOUT_FILENO;
677 
678 	while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
679 		switch (opt) {
680 		case 'c':
681 			error = altcons_open(optarg);
682 			if (error != 0)
683 				errx(EX_USAGE, "Could not open '%s'", optarg);
684 			break;
685 
686 		case 'd':
687 			error = disk_open(optarg);
688 			if (error != 0)
689 				errx(EX_USAGE, "Could not open '%s'", optarg);
690 			break;
691 
692 		case 'e':
693 			addenv(optarg);
694 			break;
695 
696 		case 'h':
697 			host_base = optarg;
698 			break;
699 
700 		case 'l':
701 			if (loader != NULL)
702 				errx(EX_USAGE, "-l can only be given once");
703 			loader = strdup(optarg);
704 			if (loader == NULL)
705 				err(EX_OSERR, "malloc");
706 			break;
707 
708 		case 'm':
709 			error = vm_parse_memsize(optarg, &mem_size);
710 			if (error != 0)
711 				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
712 			break;
713 		case 'C':
714 			memflags |= VM_MEM_F_INCORE;
715 			break;
716 		case 'S':
717 			memflags |= VM_MEM_F_WIRED;
718 			break;
719 		case '?':
720 			usage();
721 		}
722 	}
723 
724 	argc -= optind;
725 	argv += optind;
726 
727 	if (argc != 1)
728 		usage();
729 
730 	vmname = argv[0];
731 
732 	need_reinit = 0;
733 	error = vm_create(vmname);
734 	if (error) {
735 		if (errno != EEXIST) {
736 			perror("vm_create");
737 			exit(1);
738 		}
739 		need_reinit = 1;
740 	}
741 
742 	ctx = vm_open(vmname);
743 	if (ctx == NULL) {
744 		perror("vm_open");
745 		exit(1);
746 	}
747 
748 	if (need_reinit) {
749 		error = vm_reinit(ctx);
750 		if (error) {
751 			perror("vm_reinit");
752 			exit(1);
753 		}
754 	}
755 
756 	vm_set_memflags(ctx, memflags);
757 	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
758 	if (error) {
759 		perror("vm_setup_memory");
760 		exit(1);
761 	}
762 
763 	if (loader == NULL) {
764 		loader = strdup("/boot/userboot.so");
765 		if (loader == NULL)
766 			err(EX_OSERR, "malloc");
767 	}
768 	h = dlopen(loader, RTLD_LOCAL);
769 	if (!h) {
770 		printf("%s\n", dlerror());
771 		free(loader);
772 		return (1);
773 	}
774 	func = dlsym(h, "loader_main");
775 	if (!func) {
776 		printf("%s\n", dlerror());
777 		free(loader);
778 		return (1);
779 	}
780 
781 	tcgetattr(consout_fd, &term);
782 	oldterm = term;
783 	cfmakeraw(&term);
784 	term.c_cflag |= CLOCAL;
785 
786 	tcsetattr(consout_fd, TCSAFLUSH, &term);
787 
788 	addenv("smbios.bios.vendor=BHYVE");
789 	addenv("boot_serial=1");
790 
791 	func(&cb, NULL, USERBOOT_VERSION_4, ndisks);
792 
793 	free(loader);
794 	return (0);
795 }
796