xref: /freebsd/usr.sbin/bhyveload/bhyveload.c (revision a18eacbefdfa1085ca3db829e86ece78cd416493)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*-
30  * Copyright (c) 2011 Google, Inc.
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52  * SUCH DAMAGE.
53  *
54  * $FreeBSD$
55  */
56 
57 #include <sys/cdefs.h>
58 __FBSDID("$FreeBSD$");
59 
60 #include <sys/ioctl.h>
61 #include <sys/stat.h>
62 #include <sys/disk.h>
63 #include <sys/queue.h>
64 
65 #include <machine/specialreg.h>
66 #include <machine/vmm.h>
67 
68 #include <dirent.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <err.h>
72 #include <fcntl.h>
73 #include <getopt.h>
74 #include <libgen.h>
75 #include <limits.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <sysexits.h>
80 #include <termios.h>
81 #include <unistd.h>
82 
83 #include <vmmapi.h>
84 
85 #include "userboot.h"
86 
87 #define	MB	(1024 * 1024UL)
88 #define	GB	(1024 * 1024 * 1024UL)
89 #define	BSP	0
90 
91 static char *host_base = "/";
92 static struct termios term, oldterm;
93 static int disk_fd = -1;
94 
95 static char *vmname, *progname;
96 static struct vmctx *ctx;
97 
98 static uint64_t gdtbase, cr3, rsp;
99 
100 static void cb_exit(void *arg, int v);
101 
102 /*
103  * Console i/o callbacks
104  */
105 
106 static void
107 cb_putc(void *arg, int ch)
108 {
109 	char c = ch;
110 
111 	write(1, &c, 1);
112 }
113 
114 static int
115 cb_getc(void *arg)
116 {
117 	char c;
118 
119 	if (read(0, &c, 1) == 1)
120 		return (c);
121 	return (-1);
122 }
123 
124 static int
125 cb_poll(void *arg)
126 {
127 	int n;
128 
129 	if (ioctl(0, FIONREAD, &n) >= 0)
130 		return (n > 0);
131 	return (0);
132 }
133 
134 /*
135  * Host filesystem i/o callbacks
136  */
137 
138 struct cb_file {
139 	int cf_isdir;
140 	size_t cf_size;
141 	struct stat cf_stat;
142 	union {
143 		int fd;
144 		DIR *dir;
145 	} cf_u;
146 };
147 
148 static int
149 cb_open(void *arg, const char *filename, void **hp)
150 {
151 	struct stat st;
152 	struct cb_file *cf;
153 	char path[PATH_MAX];
154 
155 	if (!host_base)
156 		return (ENOENT);
157 
158 	strlcpy(path, host_base, PATH_MAX);
159 	if (path[strlen(path) - 1] == '/')
160 		path[strlen(path) - 1] = 0;
161 	strlcat(path, filename, PATH_MAX);
162 	cf = malloc(sizeof(struct cb_file));
163 	if (stat(path, &cf->cf_stat) < 0) {
164 		free(cf);
165 		return (errno);
166 	}
167 
168 	cf->cf_size = st.st_size;
169 	if (S_ISDIR(cf->cf_stat.st_mode)) {
170 		cf->cf_isdir = 1;
171 		cf->cf_u.dir = opendir(path);
172 		if (!cf->cf_u.dir)
173 			goto out;
174 		*hp = cf;
175 		return (0);
176 	}
177 	if (S_ISREG(cf->cf_stat.st_mode)) {
178 		cf->cf_isdir = 0;
179 		cf->cf_u.fd = open(path, O_RDONLY);
180 		if (cf->cf_u.fd < 0)
181 			goto out;
182 		*hp = cf;
183 		return (0);
184 	}
185 
186 out:
187 	free(cf);
188 	return (EINVAL);
189 }
190 
191 static int
192 cb_close(void *arg, void *h)
193 {
194 	struct cb_file *cf = h;
195 
196 	if (cf->cf_isdir)
197 		closedir(cf->cf_u.dir);
198 	else
199 		close(cf->cf_u.fd);
200 	free(cf);
201 
202 	return (0);
203 }
204 
205 static int
206 cb_isdir(void *arg, void *h)
207 {
208 	struct cb_file *cf = h;
209 
210 	return (cf->cf_isdir);
211 }
212 
213 static int
214 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
215 {
216 	struct cb_file *cf = h;
217 	ssize_t sz;
218 
219 	if (cf->cf_isdir)
220 		return (EINVAL);
221 	sz = read(cf->cf_u.fd, buf, size);
222 	if (sz < 0)
223 		return (EINVAL);
224 	*resid = size - sz;
225 	return (0);
226 }
227 
228 static int
229 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
230 	   size_t *namelen_return, char *name)
231 {
232 	struct cb_file *cf = h;
233 	struct dirent *dp;
234 
235 	if (!cf->cf_isdir)
236 		return (EINVAL);
237 
238 	dp = readdir(cf->cf_u.dir);
239 	if (!dp)
240 		return (ENOENT);
241 
242 	/*
243 	 * Note: d_namlen is in the range 0..255 and therefore less
244 	 * than PATH_MAX so we don't need to test before copying.
245 	 */
246 	*fileno_return = dp->d_fileno;
247 	*type_return = dp->d_type;
248 	*namelen_return = dp->d_namlen;
249 	memcpy(name, dp->d_name, dp->d_namlen);
250 	name[dp->d_namlen] = 0;
251 
252 	return (0);
253 }
254 
255 static int
256 cb_seek(void *arg, void *h, uint64_t offset, int whence)
257 {
258 	struct cb_file *cf = h;
259 
260 	if (cf->cf_isdir)
261 		return (EINVAL);
262 	if (lseek(cf->cf_u.fd, offset, whence) < 0)
263 		return (errno);
264 	return (0);
265 }
266 
267 static int
268 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
269 {
270 	struct cb_file *cf = h;
271 
272 	*mode = cf->cf_stat.st_mode;
273 	*uid = cf->cf_stat.st_uid;
274 	*gid = cf->cf_stat.st_gid;
275 	*size = cf->cf_stat.st_size;
276 	return (0);
277 }
278 
279 /*
280  * Disk image i/o callbacks
281  */
282 
283 static int
284 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
285 	    size_t *resid)
286 {
287 	ssize_t n;
288 
289 	if (unit != 0 || disk_fd == -1)
290 		return (EIO);
291 	n = pread(disk_fd, to, size, from);
292 	if (n < 0)
293 		return (errno);
294 	*resid = size - n;
295 	return (0);
296 }
297 
298 static int
299 cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
300 {
301 	struct stat sb;
302 
303 	if (unit != 0 || disk_fd == -1)
304 		return (EBADF);
305 
306 	switch (cmd) {
307 	case DIOCGSECTORSIZE:
308 		*(u_int *)data = 512;
309 		break;
310 	case DIOCGMEDIASIZE:
311 		if (fstat(disk_fd, &sb) == 0)
312 			*(off_t *)data = sb.st_size;
313 		else
314 			return (ENOTTY);
315 		break;
316 	default:
317 		return (ENOTTY);
318 	}
319 
320 	return (0);
321 }
322 
323 /*
324  * Guest virtual machine i/o callbacks
325  */
326 static int
327 cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
328 {
329 	char *ptr;
330 
331 	to &= 0x7fffffff;
332 
333 	ptr = vm_map_gpa(ctx, to, size);
334 	if (ptr == NULL)
335 		return (EFAULT);
336 
337 	memcpy(ptr, from, size);
338 	return (0);
339 }
340 
341 static int
342 cb_copyout(void *arg, uint64_t from, void *to, size_t size)
343 {
344 	char *ptr;
345 
346 	from &= 0x7fffffff;
347 
348 	ptr = vm_map_gpa(ctx, from, size);
349 	if (ptr == NULL)
350 		return (EFAULT);
351 
352 	memcpy(to, ptr, size);
353 	return (0);
354 }
355 
356 static void
357 cb_setreg(void *arg, int r, uint64_t v)
358 {
359 	int error;
360 	enum vm_reg_name vmreg;
361 
362 	vmreg = VM_REG_LAST;
363 
364 	switch (r) {
365 	case 4:
366 		vmreg = VM_REG_GUEST_RSP;
367 		rsp = v;
368 		break;
369 	default:
370 		break;
371 	}
372 
373 	if (vmreg == VM_REG_LAST) {
374 		printf("test_setreg(%d): not implemented\n", r);
375 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
376 	}
377 
378 	error = vm_set_register(ctx, BSP, vmreg, v);
379 	if (error) {
380 		perror("vm_set_register");
381 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
382 	}
383 }
384 
385 static void
386 cb_setmsr(void *arg, int r, uint64_t v)
387 {
388 	int error;
389 	enum vm_reg_name vmreg;
390 
391 	vmreg = VM_REG_LAST;
392 
393 	switch (r) {
394 	case MSR_EFER:
395 		vmreg = VM_REG_GUEST_EFER;
396 		break;
397 	default:
398 		break;
399 	}
400 
401 	if (vmreg == VM_REG_LAST) {
402 		printf("test_setmsr(%d): not implemented\n", r);
403 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
404 	}
405 
406 	error = vm_set_register(ctx, BSP, vmreg, v);
407 	if (error) {
408 		perror("vm_set_msr");
409 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
410 	}
411 }
412 
413 static void
414 cb_setcr(void *arg, int r, uint64_t v)
415 {
416 	int error;
417 	enum vm_reg_name vmreg;
418 
419 	vmreg = VM_REG_LAST;
420 
421 	switch (r) {
422 	case 0:
423 		vmreg = VM_REG_GUEST_CR0;
424 		break;
425 	case 3:
426 		vmreg = VM_REG_GUEST_CR3;
427 		cr3 = v;
428 		break;
429 	case 4:
430 		vmreg = VM_REG_GUEST_CR4;
431 		break;
432 	default:
433 		break;
434 	}
435 
436 	if (vmreg == VM_REG_LAST) {
437 		printf("test_setcr(%d): not implemented\n", r);
438 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
439 	}
440 
441 	error = vm_set_register(ctx, BSP, vmreg, v);
442 	if (error) {
443 		perror("vm_set_cr");
444 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
445 	}
446 }
447 
448 static void
449 cb_setgdt(void *arg, uint64_t base, size_t size)
450 {
451 	int error;
452 
453 	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
454 	if (error != 0) {
455 		perror("vm_set_desc(gdt)");
456 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
457 	}
458 
459 	gdtbase = base;
460 }
461 
462 static void
463 cb_exec(void *arg, uint64_t rip)
464 {
465 	int error;
466 
467 	error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, rsp);
468 	if (error) {
469 		perror("vm_setup_freebsd_registers");
470 		cb_exit(NULL, USERBOOT_EXIT_QUIT);
471 	}
472 
473 	cb_exit(NULL, 0);
474 }
475 
476 /*
477  * Misc
478  */
479 
480 static void
481 cb_delay(void *arg, int usec)
482 {
483 
484 	usleep(usec);
485 }
486 
487 static void
488 cb_exit(void *arg, int v)
489 {
490 
491 	tcsetattr(0, TCSAFLUSH, &oldterm);
492 	exit(v);
493 }
494 
495 static void
496 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
497 {
498 
499 	vm_get_memory_seg(ctx, 0, ret_lowmem, NULL);
500 	vm_get_memory_seg(ctx, 4 * GB, ret_highmem, NULL);
501 }
502 
503 struct env {
504 	const char *str;	/* name=value */
505 	SLIST_ENTRY(env) next;
506 };
507 
508 static SLIST_HEAD(envhead, env) envhead;
509 
510 static void
511 addenv(const char *str)
512 {
513 	struct env *env;
514 
515 	env = malloc(sizeof(struct env));
516 	env->str = str;
517 	SLIST_INSERT_HEAD(&envhead, env, next);
518 }
519 
520 static const char *
521 cb_getenv(void *arg, int num)
522 {
523 	int i;
524 	struct env *env;
525 
526 	i = 0;
527 	SLIST_FOREACH(env, &envhead, next) {
528 		if (i == num)
529 			return (env->str);
530 		i++;
531 	}
532 
533 	return (NULL);
534 }
535 
536 static struct loader_callbacks cb = {
537 	.getc = cb_getc,
538 	.putc = cb_putc,
539 	.poll = cb_poll,
540 
541 	.open = cb_open,
542 	.close = cb_close,
543 	.isdir = cb_isdir,
544 	.read = cb_read,
545 	.readdir = cb_readdir,
546 	.seek = cb_seek,
547 	.stat = cb_stat,
548 
549 	.diskread = cb_diskread,
550 	.diskioctl = cb_diskioctl,
551 
552 	.copyin = cb_copyin,
553 	.copyout = cb_copyout,
554 	.setreg = cb_setreg,
555 	.setmsr = cb_setmsr,
556 	.setcr = cb_setcr,
557 	.setgdt = cb_setgdt,
558 	.exec = cb_exec,
559 
560 	.delay = cb_delay,
561 	.exit = cb_exit,
562 	.getmem = cb_getmem,
563 
564 	.getenv = cb_getenv,
565 };
566 
567 static void
568 usage(void)
569 {
570 
571 	fprintf(stderr,
572 	    "usage: %s [-m mem-size] [-d <disk-path>] [-h <host-path>]\n"
573 	    "       %*s [-e <name=value>] <vmname>\n", progname,
574 	    (int)strlen(progname), "");
575 	exit(1);
576 }
577 
578 int
579 main(int argc, char** argv)
580 {
581 	void *h;
582 	void (*func)(struct loader_callbacks *, void *, int, int);
583 	uint64_t mem_size;
584 	int opt, error;
585 	char *disk_image;
586 
587 	progname = basename(argv[0]);
588 
589 	mem_size = 256 * MB;
590 	disk_image = NULL;
591 
592 	while ((opt = getopt(argc, argv, "d:e:h:m:")) != -1) {
593 		switch (opt) {
594 		case 'd':
595 			disk_image = optarg;
596 			break;
597 
598 		case 'e':
599 			addenv(optarg);
600 			break;
601 
602 		case 'h':
603 			host_base = optarg;
604 			break;
605 
606 		case 'm':
607 			error = vm_parse_memsize(optarg, &mem_size);
608 			if (error != 0)
609 				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
610 			break;
611 		case '?':
612 			usage();
613 		}
614 	}
615 
616 	argc -= optind;
617 	argv += optind;
618 
619 	if (argc != 1)
620 		usage();
621 
622 	vmname = argv[0];
623 
624 	error = vm_create(vmname);
625 	if (error != 0 && errno != EEXIST) {
626 		perror("vm_create");
627 		exit(1);
628 
629 	}
630 
631 	ctx = vm_open(vmname);
632 	if (ctx == NULL) {
633 		perror("vm_open");
634 		exit(1);
635 	}
636 
637 	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
638 	if (error) {
639 		perror("vm_setup_memory");
640 		exit(1);
641 	}
642 
643 	tcgetattr(0, &term);
644 	oldterm = term;
645 	term.c_lflag &= ~(ICANON|ECHO);
646 	term.c_iflag &= ~ICRNL;
647 	tcsetattr(0, TCSAFLUSH, &term);
648 	h = dlopen("/boot/userboot.so", RTLD_LOCAL);
649 	if (!h) {
650 		printf("%s\n", dlerror());
651 		return (1);
652 	}
653 	func = dlsym(h, "loader_main");
654 	if (!func) {
655 		printf("%s\n", dlerror());
656 		return (1);
657 	}
658 
659 	if (disk_image) {
660 		disk_fd = open(disk_image, O_RDONLY);
661 	}
662 
663 	addenv("smbios.bios.vendor=BHYVE");
664 	addenv("boot_serial=1");
665 
666 	func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0);
667 }
668