xref: /freebsd/usr.sbin/bhyveload/bhyveload.c (revision 13de33a5dc2304b13d595d75d48c51793958474f)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*-
30  * Copyright (c) 2011 Google, Inc.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  *
54  * $FreeBSD$
55  */
56 
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
59 
60 #include <sys/ioctl.h>
61 #include <sys/stat.h>
62 #include <sys/disk.h>
63 #include <sys/queue.h>
64 
65 #include <machine/specialreg.h>
66 #include <machine/vmm.h>
67 
68 #include <dirent.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <err.h>
72 #include <fcntl.h>
73 #include <getopt.h>
74 #include <libgen.h>
75 #include <limits.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <sysexits.h>
80 #include <termios.h>
81 #include <unistd.h>
82 
83 #include <vmmapi.h>
84 
85 #include "userboot.h"
86 
87 #define	MB	(1024 * 1024UL)
88 #define	GB	(1024 * 1024 * 1024UL)
89 #define	BSP	0
90 
91 static char *host_base;
92 static struct termios term, oldterm;
93 static int disk_fd = -1;
94 static int consin_fd, consout_fd;
95 
96 static char *vmname, *progname;
97 static struct vmctx *ctx;
98 
99 static uint64_t gdtbase, cr3, rsp;
100 
101 static void cb_exit(void *arg, int v);
102 
103 /*
104  * Console i/o callbacks
105  */
106 
107 static void
108 cb_putc(void *arg, int ch)
109 {
110 	char c = ch;
111 
112 	(void) write(consout_fd, &c, 1);
113 }
114 
115 static int
116 cb_getc(void *arg)
117 {
118 	char c;
119 
120 	if (read(consin_fd, &c, 1) == 1)
121 		return (c);
122 	return (-1);
123 }
124 
125 static int
126 cb_poll(void *arg)
127 {
128 	int n;
129 
130 	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
131 		return (n > 0);
132 	return (0);
133 }
134 
135 /*
136  * Host filesystem i/o callbacks
137  */
138 
139 struct cb_file {
140 	int cf_isdir;
141 	size_t cf_size;
142 	struct stat cf_stat;
143 	union {
144 		int fd;
145 		DIR *dir;
146 	} cf_u;
147 };
148 
149 static int
150 cb_open(void *arg, const char *filename, void **hp)
151 {
152 	struct stat st;
153 	struct cb_file *cf;
154 	char path[PATH_MAX];
155 
156 	if (!host_base)
157 		return (ENOENT);
158 
159 	strlcpy(path, host_base, PATH_MAX);
160 	if (path[strlen(path) - 1] == '/')
161 		path[strlen(path) - 1] = 0;
162 	strlcat(path, filename, PATH_MAX);
163 	cf = malloc(sizeof(struct cb_file));
164 	if (stat(path, &cf->cf_stat) < 0) {
165 		free(cf);
166 		return (errno);
167 	}
168 
169 	cf->cf_size = st.st_size;
170 	if (S_ISDIR(cf->cf_stat.st_mode)) {
171 		cf->cf_isdir = 1;
172 		cf->cf_u.dir = opendir(path);
173 		if (!cf->cf_u.dir)
174 			goto out;
175 		*hp = cf;
176 		return (0);
177 	}
178 	if (S_ISREG(cf->cf_stat.st_mode)) {
179 		cf->cf_isdir = 0;
180 		cf->cf_u.fd = open(path, O_RDONLY);
181 		if (cf->cf_u.fd < 0)
182 			goto out;
183 		*hp = cf;
184 		return (0);
185 	}
186 
187 out:
188 	free(cf);
189 	return (EINVAL);
190 }
191 
192 static int
193 cb_close(void *arg, void *h)
194 {
195 	struct cb_file *cf = h;
196 
197 	if (cf->cf_isdir)
198 		closedir(cf->cf_u.dir);
199 	else
200 		close(cf->cf_u.fd);
201 	free(cf);
202 
203 	return (0);
204 }
205 
206 static int
207 cb_isdir(void *arg, void *h)
208 {
209 	struct cb_file *cf = h;
210 
211 	return (cf->cf_isdir);
212 }
213 
214 static int
215 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
216 {
217 	struct cb_file *cf = h;
218 	ssize_t sz;
219 
220 	if (cf->cf_isdir)
221 		return (EINVAL);
222 	sz = read(cf->cf_u.fd, buf, size);
223 	if (sz < 0)
224 		return (EINVAL);
225 	*resid = size - sz;
226 	return (0);
227 }
228 
229 static int
230 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
231 	   size_t *namelen_return, char *name)
232 {
233 	struct cb_file *cf = h;
234 	struct dirent *dp;
235 
236 	if (!cf->cf_isdir)
237 		return (EINVAL);
238 
239 	dp = readdir(cf->cf_u.dir);
240 	if (!dp)
241 		return (ENOENT);
242 
243 	/*
244 	 * Note: d_namlen is in the range 0..255 and therefore less
245 	 * than PATH_MAX so we don't need to test before copying.
246 	 */
247 	*fileno_return = dp->d_fileno;
248 	*type_return = dp->d_type;
249 	*namelen_return = dp->d_namlen;
250 	memcpy(name, dp->d_name, dp->d_namlen);
251 	name[dp->d_namlen] = 0;
252 
253 	return (0);
254 }
255 
256 static int
257 cb_seek(void *arg, void *h, uint64_t offset, int whence)
258 {
259 	struct cb_file *cf = h;
260 
261 	if (cf->cf_isdir)
262 		return (EINVAL);
263 	if (lseek(cf->cf_u.fd, offset, whence) < 0)
264 		return (errno);
265 	return (0);
266 }
267 
268 static int
269 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
270 {
271 	struct cb_file *cf = h;
272 
273 	*mode = cf->cf_stat.st_mode;
274 	*uid = cf->cf_stat.st_uid;
275 	*gid = cf->cf_stat.st_gid;
276 	*size = cf->cf_stat.st_size;
277 	return (0);
278 }
279 
280 /*
281  * Disk image i/o callbacks
282  */
283 
284 static int
285 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
286 	    size_t *resid)
287 {
288 	ssize_t n;
289 
290 	if (unit != 0 || disk_fd == -1)
291 		return (EIO);
292 	n = pread(disk_fd, to, size, from);
293 	if (n < 0)
294 		return (errno);
295 	*resid = size - n;
296 	return (0);
297 }
298 
299 static int
300 cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
301 {
302 	struct stat sb;
303 
304 	if (unit != 0 || disk_fd == -1)
305 		return (EBADF);
306 
307 	switch (cmd) {
308 	case DIOCGSECTORSIZE:
309 		*(u_int *)data = 512;
310 		break;
311 	case DIOCGMEDIASIZE:
312 		if (fstat(disk_fd, &sb) == 0)
313 			*(off_t *)data = sb.st_size;
314 		else
315 			return (ENOTTY);
316 		break;
317 	default:
318 		return (ENOTTY);
319 	}
320 
321 	return (0);
322 }
323 
324 /*
325  * Guest virtual machine i/o callbacks
326  */
327 static int
328 cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
329 {
330 	char *ptr;
331 
332 	to &= 0x7fffffff;
333 
334 	ptr = vm_map_gpa(ctx, to, size);
335 	if (ptr == NULL)
336 		return (EFAULT);
337 
338 	memcpy(ptr, from, size);
339 	return (0);
340 }
341 
342 static int
343 cb_copyout(void *arg, uint64_t from, void *to, size_t size)
344 {
345 	char *ptr;
346 
347 	from &= 0x7fffffff;
348 
349 	ptr = vm_map_gpa(ctx, from, size);
350 	if (ptr == NULL)
351 		return (EFAULT);
352 
353 	memcpy(to, ptr, size);
354 	return (0);
355 }
356 
357 static void
358 cb_setreg(void *arg, int r, uint64_t v)
359 {
360 	int error;
361 	enum vm_reg_name vmreg;
362 
363 	vmreg = VM_REG_LAST;
364 
365 	switch (r) {
366 	case 4:
367 		vmreg = VM_REG_GUEST_RSP;
368 		rsp = v;
369 		break;
370 	default:
371 		break;
372 	}
373 
374 	if (vmreg == VM_REG_LAST) {
375 		printf("test_setreg(%d): not implemented\n", r);
376 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
377 	}
378 
379 	error = vm_set_register(ctx, BSP, vmreg, v);
380 	if (error) {
381 		perror("vm_set_register");
382 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
383 	}
384 }
385 
386 static void
387 cb_setmsr(void *arg, int r, uint64_t v)
388 {
389 	int error;
390 	enum vm_reg_name vmreg;
391 
392 	vmreg = VM_REG_LAST;
393 
394 	switch (r) {
395 	case MSR_EFER:
396 		vmreg = VM_REG_GUEST_EFER;
397 		break;
398 	default:
399 		break;
400 	}
401 
402 	if (vmreg == VM_REG_LAST) {
403 		printf("test_setmsr(%d): not implemented\n", r);
404 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
405 	}
406 
407 	error = vm_set_register(ctx, BSP, vmreg, v);
408 	if (error) {
409 		perror("vm_set_msr");
410 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
411 	}
412 }
413 
414 static void
415 cb_setcr(void *arg, int r, uint64_t v)
416 {
417 	int error;
418 	enum vm_reg_name vmreg;
419 
420 	vmreg = VM_REG_LAST;
421 
422 	switch (r) {
423 	case 0:
424 		vmreg = VM_REG_GUEST_CR0;
425 		break;
426 	case 3:
427 		vmreg = VM_REG_GUEST_CR3;
428 		cr3 = v;
429 		break;
430 	case 4:
431 		vmreg = VM_REG_GUEST_CR4;
432 		break;
433 	default:
434 		break;
435 	}
436 
437 	if (vmreg == VM_REG_LAST) {
438 		printf("test_setcr(%d): not implemented\n", r);
439 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
440 	}
441 
442 	error = vm_set_register(ctx, BSP, vmreg, v);
443 	if (error) {
444 		perror("vm_set_cr");
445 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
446 	}
447 }
448 
449 static void
450 cb_setgdt(void *arg, uint64_t base, size_t size)
451 {
452 	int error;
453 
454 	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
455 	if (error != 0) {
456 		perror("vm_set_desc(gdt)");
457 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
458 	}
459 
460 	gdtbase = base;
461 }
462 
463 static void
464 cb_exec(void *arg, uint64_t rip)
465 {
466 	int error;
467 
468 	error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, rsp);
469 	if (error) {
470 		perror("vm_setup_freebsd_registers");
471 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
472 	}
473 
474 	cb_exit(NULL, 0);
475 }
476 
477 /*
478  * Misc
479  */
480 
481 static void
482 cb_delay(void *arg, int usec)
483 {
484 
485 	usleep(usec);
486 }
487 
488 static void
489 cb_exit(void *arg, int v)
490 {
491 
492 	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
493 	exit(v);
494 }
495 
496 static void
497 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
498 {
499 
500 	vm_get_memory_seg(ctx, 0, ret_lowmem, NULL);
501 	vm_get_memory_seg(ctx, 4 * GB, ret_highmem, NULL);
502 }
503 
504 struct env {
505 	const char *str;	/* name=value */
506 	SLIST_ENTRY(env) next;
507 };
508 
509 static SLIST_HEAD(envhead, env) envhead;
510 
511 static void
512 addenv(const char *str)
513 {
514 	struct env *env;
515 
516 	env = malloc(sizeof(struct env));
517 	env->str = str;
518 	SLIST_INSERT_HEAD(&envhead, env, next);
519 }
520 
521 static const char *
522 cb_getenv(void *arg, int num)
523 {
524 	int i;
525 	struct env *env;
526 
527 	i = 0;
528 	SLIST_FOREACH(env, &envhead, next) {
529 		if (i == num)
530 			return (env->str);
531 		i++;
532 	}
533 
534 	return (NULL);
535 }
536 
537 static struct loader_callbacks cb = {
538 	.getc = cb_getc,
539 	.putc = cb_putc,
540 	.poll = cb_poll,
541 
542 	.open = cb_open,
543 	.close = cb_close,
544 	.isdir = cb_isdir,
545 	.read = cb_read,
546 	.readdir = cb_readdir,
547 	.seek = cb_seek,
548 	.stat = cb_stat,
549 
550 	.diskread = cb_diskread,
551 	.diskioctl = cb_diskioctl,
552 
553 	.copyin = cb_copyin,
554 	.copyout = cb_copyout,
555 	.setreg = cb_setreg,
556 	.setmsr = cb_setmsr,
557 	.setcr = cb_setcr,
558 	.setgdt = cb_setgdt,
559 	.exec = cb_exec,
560 
561 	.delay = cb_delay,
562 	.exit = cb_exit,
563 	.getmem = cb_getmem,
564 
565 	.getenv = cb_getenv,
566 };
567 
568 static int
569 altcons_open(char *path)
570 {
571 	struct stat sb;
572 	int err;
573 	int fd;
574 
575 	/*
576 	 * Allow stdio to be passed in so that the same string
577 	 * can be used for the bhyveload console and bhyve com-port
578 	 * parameters
579 	 */
580 	if (!strcmp(path, "stdio"))
581 		return (0);
582 
583 	err = stat(path, &sb);
584 	if (err == 0) {
585 		if (!S_ISCHR(sb.st_mode))
586 			err = ENOTSUP;
587 		else {
588 			fd = open(path, O_RDWR | O_NONBLOCK);
589 			if (fd < 0)
590 				err = errno;
591 			else
592 				consin_fd = consout_fd = fd;
593 		}
594 	}
595 
596 	return (err);
597 }
598 
599 static void
600 usage(void)
601 {
602 
603 	fprintf(stderr,
604 	    "usage: %s [-m mem-size] [-d <disk-path>] [-h <host-path>]\n"
605 	    "       %*s [-e <name=value>] [-c <console-device>] <vmname>\n",
606 	    progname,
607 	    (int)strlen(progname), "");
608 	exit(1);
609 }
610 
611 int
612 main(int argc, char** argv)
613 {
614 	void *h;
615 	void (*func)(struct loader_callbacks *, void *, int, int);
616 	uint64_t mem_size;
617 	int opt, error;
618 	char *disk_image;
619 
620 	progname = basename(argv[0]);
621 
622 	mem_size = 256 * MB;
623 	disk_image = NULL;
624 
625 	consin_fd = STDIN_FILENO;
626 	consout_fd = STDOUT_FILENO;
627 
628 	while ((opt = getopt(argc, argv, "c:d:e:h:m:")) != -1) {
629 		switch (opt) {
630 		case 'c':
631 			error = altcons_open(optarg);
632 			if (error != 0)
633 				errx(EX_USAGE, "Could not open '%s'", optarg);
634 			break;
635 		case 'd':
636 			disk_image = optarg;
637 			break;
638 
639 		case 'e':
640 			addenv(optarg);
641 			break;
642 
643 		case 'h':
644 			host_base = optarg;
645 			break;
646 
647 		case 'm':
648 			error = vm_parse_memsize(optarg, &mem_size);
649 			if (error != 0)
650 				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
651 			break;
652 		case '?':
653 			usage();
654 		}
655 	}
656 
657 	argc -= optind;
658 	argv += optind;
659 
660 	if (argc != 1)
661 		usage();
662 
663 	vmname = argv[0];
664 
665 	error = vm_create(vmname);
666 	if (error != 0 && errno != EEXIST) {
667 		perror("vm_create");
668 		exit(1);
669 
670 	}
671 
672 	ctx = vm_open(vmname);
673 	if (ctx == NULL) {
674 		perror("vm_open");
675 		exit(1);
676 	}
677 
678 	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
679 	if (error) {
680 		perror("vm_setup_memory");
681 		exit(1);
682 	}
683 
684 	tcgetattr(consout_fd, &term);
685 	oldterm = term;
686 	cfmakeraw(&term);
687 	term.c_cflag |= CLOCAL;
688 
689 	tcsetattr(consout_fd, TCSAFLUSH, &term);
690 
691 	h = dlopen("/boot/userboot.so", RTLD_LOCAL);
692 	if (!h) {
693 		printf("%s\n", dlerror());
694 		return (1);
695 	}
696 	func = dlsym(h, "loader_main");
697 	if (!func) {
698 		printf("%s\n", dlerror());
699 		return (1);
700 	}
701 
702 	if (disk_image) {
703 		disk_fd = open(disk_image, O_RDONLY);
704 	}
705 
706 	addenv("smbios.bios.vendor=BHYVE");
707 	addenv("boot_serial=1");
708 
709 	func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0);
710 }
711