1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 /*- 30 * Copyright (c) 2011 Google, Inc. 31 * All rights reserved. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * 54 * $FreeBSD$ 55 */ 56 57 #include <sys/cdefs.h> 58 __FBSDID("$FreeBSD$"); 59 60 #include <sys/ioctl.h> 61 #include <sys/stat.h> 62 #include <sys/disk.h> 63 #include <sys/queue.h> 64 65 #include <machine/specialreg.h> 66 #include <machine/vmm.h> 67 68 #include <dirent.h> 69 #include <dlfcn.h> 70 #include <errno.h> 71 #include <err.h> 72 #include <fcntl.h> 73 #include <getopt.h> 74 #include <libgen.h> 75 #include <limits.h> 76 #include <stdio.h> 77 #include <stdlib.h> 78 #include <string.h> 79 #include <sysexits.h> 80 #include <termios.h> 81 #include <unistd.h> 82 83 #include <vmmapi.h> 84 85 #include "userboot.h" 86 87 #define MB (1024 * 1024UL) 88 #define GB (1024 * 1024 * 1024UL) 89 #define BSP 0 90 91 #define NDISKS 32 92 93 static char *host_base; 94 static struct termios term, oldterm; 95 static int disk_fd[NDISKS]; 96 static int ndisks; 97 static int consin_fd, consout_fd; 98 99 static char *vmname, *progname; 100 static struct vmctx *ctx; 101 102 static uint64_t gdtbase, cr3, rsp; 103 104 static void cb_exit(void *arg, int v); 105 106 /* 107 * Console i/o callbacks 108 */ 109 110 static void 111 cb_putc(void *arg, int ch) 112 { 113 char c = ch; 114 115 (void) write(consout_fd, &c, 1); 116 } 117 118 static int 119 cb_getc(void *arg) 120 { 121 char c; 122 123 if (read(consin_fd, &c, 1) == 1) 124 return (c); 125 return (-1); 126 } 127 128 static int 129 cb_poll(void *arg) 130 { 131 int n; 132 133 if (ioctl(consin_fd, FIONREAD, &n) >= 0) 134 return (n > 0); 135 return (0); 136 } 137 138 /* 139 * Host filesystem i/o callbacks 140 */ 141 142 struct cb_file { 143 int cf_isdir; 144 size_t cf_size; 145 struct stat cf_stat; 146 union { 147 int fd; 148 DIR *dir; 149 } cf_u; 150 }; 151 152 static int 153 cb_open(void *arg, const char *filename, void **hp) 154 { 155 struct stat st; 156 struct cb_file *cf; 157 char path[PATH_MAX]; 158 159 if (!host_base) 160 return (ENOENT); 161 162 strlcpy(path, host_base, PATH_MAX); 163 if (path[strlen(path) - 1] == '/') 164 path[strlen(path) - 1] = 0; 165 strlcat(path, filename, PATH_MAX); 166 cf = malloc(sizeof(struct cb_file)); 167 if (stat(path, &cf->cf_stat) < 0) { 168 free(cf); 169 return (errno); 170 } 171 172 cf->cf_size = st.st_size; 173 if (S_ISDIR(cf->cf_stat.st_mode)) { 174 cf->cf_isdir = 1; 175 cf->cf_u.dir = opendir(path); 176 if (!cf->cf_u.dir) 177 goto out; 178 *hp = cf; 179 return (0); 180 } 181 if (S_ISREG(cf->cf_stat.st_mode)) { 182 cf->cf_isdir = 0; 183 cf->cf_u.fd = open(path, O_RDONLY); 184 if (cf->cf_u.fd < 0) 185 goto out; 186 *hp = cf; 187 return (0); 188 } 189 190 out: 191 free(cf); 192 return (EINVAL); 193 } 194 195 static int 196 cb_close(void *arg, void *h) 197 { 198 struct cb_file *cf = h; 199 200 if (cf->cf_isdir) 201 closedir(cf->cf_u.dir); 202 else 203 close(cf->cf_u.fd); 204 free(cf); 205 206 return (0); 207 } 208 209 static int 210 cb_isdir(void *arg, void *h) 211 { 212 struct cb_file *cf = h; 213 214 return (cf->cf_isdir); 215 } 216 217 static int 218 cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid) 219 { 220 struct cb_file *cf = h; 221 ssize_t sz; 222 223 if (cf->cf_isdir) 224 return (EINVAL); 225 sz = read(cf->cf_u.fd, buf, size); 226 if (sz < 0) 227 return (EINVAL); 228 *resid = size - sz; 229 return (0); 230 } 231 232 static int 233 cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, 234 size_t *namelen_return, char *name) 235 { 236 struct cb_file *cf = h; 237 struct dirent *dp; 238 239 if (!cf->cf_isdir) 240 return (EINVAL); 241 242 dp = readdir(cf->cf_u.dir); 243 if (!dp) 244 return (ENOENT); 245 246 /* 247 * Note: d_namlen is in the range 0..255 and therefore less 248 * than PATH_MAX so we don't need to test before copying. 249 */ 250 *fileno_return = dp->d_fileno; 251 *type_return = dp->d_type; 252 *namelen_return = dp->d_namlen; 253 memcpy(name, dp->d_name, dp->d_namlen); 254 name[dp->d_namlen] = 0; 255 256 return (0); 257 } 258 259 static int 260 cb_seek(void *arg, void *h, uint64_t offset, int whence) 261 { 262 struct cb_file *cf = h; 263 264 if (cf->cf_isdir) 265 return (EINVAL); 266 if (lseek(cf->cf_u.fd, offset, whence) < 0) 267 return (errno); 268 return (0); 269 } 270 271 static int 272 cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size) 273 { 274 struct cb_file *cf = h; 275 276 *mode = cf->cf_stat.st_mode; 277 *uid = cf->cf_stat.st_uid; 278 *gid = cf->cf_stat.st_gid; 279 *size = cf->cf_stat.st_size; 280 return (0); 281 } 282 283 /* 284 * Disk image i/o callbacks 285 */ 286 287 static int 288 cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, 289 size_t *resid) 290 { 291 ssize_t n; 292 293 if (unit < 0 || unit >= ndisks ) 294 return (EIO); 295 n = pread(disk_fd[unit], to, size, from); 296 if (n < 0) 297 return (errno); 298 *resid = size - n; 299 return (0); 300 } 301 302 static int 303 cb_diskioctl(void *arg, int unit, u_long cmd, void *data) 304 { 305 struct stat sb; 306 307 if (unit < 0 || unit >= ndisks) 308 return (EBADF); 309 310 switch (cmd) { 311 case DIOCGSECTORSIZE: 312 *(u_int *)data = 512; 313 break; 314 case DIOCGMEDIASIZE: 315 if (fstat(disk_fd[unit], &sb) == 0) 316 *(off_t *)data = sb.st_size; 317 else 318 return (ENOTTY); 319 break; 320 default: 321 return (ENOTTY); 322 } 323 324 return (0); 325 } 326 327 /* 328 * Guest virtual machine i/o callbacks 329 */ 330 static int 331 cb_copyin(void *arg, const void *from, uint64_t to, size_t size) 332 { 333 char *ptr; 334 335 to &= 0x7fffffff; 336 337 ptr = vm_map_gpa(ctx, to, size); 338 if (ptr == NULL) 339 return (EFAULT); 340 341 memcpy(ptr, from, size); 342 return (0); 343 } 344 345 static int 346 cb_copyout(void *arg, uint64_t from, void *to, size_t size) 347 { 348 char *ptr; 349 350 from &= 0x7fffffff; 351 352 ptr = vm_map_gpa(ctx, from, size); 353 if (ptr == NULL) 354 return (EFAULT); 355 356 memcpy(to, ptr, size); 357 return (0); 358 } 359 360 static void 361 cb_setreg(void *arg, int r, uint64_t v) 362 { 363 int error; 364 enum vm_reg_name vmreg; 365 366 vmreg = VM_REG_LAST; 367 368 switch (r) { 369 case 4: 370 vmreg = VM_REG_GUEST_RSP; 371 rsp = v; 372 break; 373 default: 374 break; 375 } 376 377 if (vmreg == VM_REG_LAST) { 378 printf("test_setreg(%d): not implemented\n", r); 379 cb_exit(NULL, USERBOOT_EXIT_QUIT); 380 } 381 382 error = vm_set_register(ctx, BSP, vmreg, v); 383 if (error) { 384 perror("vm_set_register"); 385 cb_exit(NULL, USERBOOT_EXIT_QUIT); 386 } 387 } 388 389 static void 390 cb_setmsr(void *arg, int r, uint64_t v) 391 { 392 int error; 393 enum vm_reg_name vmreg; 394 395 vmreg = VM_REG_LAST; 396 397 switch (r) { 398 case MSR_EFER: 399 vmreg = VM_REG_GUEST_EFER; 400 break; 401 default: 402 break; 403 } 404 405 if (vmreg == VM_REG_LAST) { 406 printf("test_setmsr(%d): not implemented\n", r); 407 cb_exit(NULL, USERBOOT_EXIT_QUIT); 408 } 409 410 error = vm_set_register(ctx, BSP, vmreg, v); 411 if (error) { 412 perror("vm_set_msr"); 413 cb_exit(NULL, USERBOOT_EXIT_QUIT); 414 } 415 } 416 417 static void 418 cb_setcr(void *arg, int r, uint64_t v) 419 { 420 int error; 421 enum vm_reg_name vmreg; 422 423 vmreg = VM_REG_LAST; 424 425 switch (r) { 426 case 0: 427 vmreg = VM_REG_GUEST_CR0; 428 break; 429 case 3: 430 vmreg = VM_REG_GUEST_CR3; 431 cr3 = v; 432 break; 433 case 4: 434 vmreg = VM_REG_GUEST_CR4; 435 break; 436 default: 437 break; 438 } 439 440 if (vmreg == VM_REG_LAST) { 441 printf("test_setcr(%d): not implemented\n", r); 442 cb_exit(NULL, USERBOOT_EXIT_QUIT); 443 } 444 445 error = vm_set_register(ctx, BSP, vmreg, v); 446 if (error) { 447 perror("vm_set_cr"); 448 cb_exit(NULL, USERBOOT_EXIT_QUIT); 449 } 450 } 451 452 static void 453 cb_setgdt(void *arg, uint64_t base, size_t size) 454 { 455 int error; 456 457 error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0); 458 if (error != 0) { 459 perror("vm_set_desc(gdt)"); 460 cb_exit(NULL, USERBOOT_EXIT_QUIT); 461 } 462 463 gdtbase = base; 464 } 465 466 static void 467 cb_exec(void *arg, uint64_t rip) 468 { 469 int error; 470 471 if (cr3 == 0) 472 error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase, 473 rsp); 474 else 475 error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, 476 rsp); 477 if (error) { 478 perror("vm_setup_freebsd_registers"); 479 cb_exit(NULL, USERBOOT_EXIT_QUIT); 480 } 481 482 cb_exit(NULL, 0); 483 } 484 485 /* 486 * Misc 487 */ 488 489 static void 490 cb_delay(void *arg, int usec) 491 { 492 493 usleep(usec); 494 } 495 496 static void 497 cb_exit(void *arg, int v) 498 { 499 500 tcsetattr(consout_fd, TCSAFLUSH, &oldterm); 501 exit(v); 502 } 503 504 static void 505 cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) 506 { 507 508 *ret_lowmem = vm_get_lowmem_size(ctx); 509 *ret_highmem = vm_get_highmem_size(ctx); 510 } 511 512 struct env { 513 const char *str; /* name=value */ 514 SLIST_ENTRY(env) next; 515 }; 516 517 static SLIST_HEAD(envhead, env) envhead; 518 519 static void 520 addenv(const char *str) 521 { 522 struct env *env; 523 524 env = malloc(sizeof(struct env)); 525 env->str = str; 526 SLIST_INSERT_HEAD(&envhead, env, next); 527 } 528 529 static const char * 530 cb_getenv(void *arg, int num) 531 { 532 int i; 533 struct env *env; 534 535 i = 0; 536 SLIST_FOREACH(env, &envhead, next) { 537 if (i == num) 538 return (env->str); 539 i++; 540 } 541 542 return (NULL); 543 } 544 545 static int 546 cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val) 547 { 548 549 return (vm_set_register(ctx, vcpu, reg, val)); 550 } 551 552 static int 553 cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit, 554 u_int access) 555 { 556 557 return (vm_set_desc(ctx, vcpu, reg, base, limit, access)); 558 } 559 560 static struct loader_callbacks cb = { 561 .getc = cb_getc, 562 .putc = cb_putc, 563 .poll = cb_poll, 564 565 .open = cb_open, 566 .close = cb_close, 567 .isdir = cb_isdir, 568 .read = cb_read, 569 .readdir = cb_readdir, 570 .seek = cb_seek, 571 .stat = cb_stat, 572 573 .diskread = cb_diskread, 574 .diskioctl = cb_diskioctl, 575 576 .copyin = cb_copyin, 577 .copyout = cb_copyout, 578 .setreg = cb_setreg, 579 .setmsr = cb_setmsr, 580 .setcr = cb_setcr, 581 .setgdt = cb_setgdt, 582 .exec = cb_exec, 583 584 .delay = cb_delay, 585 .exit = cb_exit, 586 .getmem = cb_getmem, 587 588 .getenv = cb_getenv, 589 590 /* Version 4 additions */ 591 .vm_set_register = cb_vm_set_register, 592 .vm_set_desc = cb_vm_set_desc, 593 }; 594 595 static int 596 altcons_open(char *path) 597 { 598 struct stat sb; 599 int err; 600 int fd; 601 602 /* 603 * Allow stdio to be passed in so that the same string 604 * can be used for the bhyveload console and bhyve com-port 605 * parameters 606 */ 607 if (!strcmp(path, "stdio")) 608 return (0); 609 610 err = stat(path, &sb); 611 if (err == 0) { 612 if (!S_ISCHR(sb.st_mode)) 613 err = ENOTSUP; 614 else { 615 fd = open(path, O_RDWR | O_NONBLOCK); 616 if (fd < 0) 617 err = errno; 618 else 619 consin_fd = consout_fd = fd; 620 } 621 } 622 623 return (err); 624 } 625 626 static int 627 disk_open(char *path) 628 { 629 int err, fd; 630 631 if (ndisks >= NDISKS) 632 return (ERANGE); 633 634 err = 0; 635 fd = open(path, O_RDONLY); 636 637 if (fd > 0) { 638 disk_fd[ndisks] = fd; 639 ndisks++; 640 } else 641 err = errno; 642 643 return (err); 644 } 645 646 static void 647 usage(void) 648 { 649 650 fprintf(stderr, 651 "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n" 652 " %*s [-h <host-path>] [-m mem-size] <vmname>\n", 653 progname, 654 (int)strlen(progname), ""); 655 exit(1); 656 } 657 658 int 659 main(int argc, char** argv) 660 { 661 char *loader; 662 void *h; 663 void (*func)(struct loader_callbacks *, void *, int, int); 664 uint64_t mem_size; 665 int opt, error, need_reinit, memflags; 666 667 progname = basename(argv[0]); 668 669 loader = NULL; 670 671 memflags = 0; 672 mem_size = 256 * MB; 673 674 consin_fd = STDIN_FILENO; 675 consout_fd = STDOUT_FILENO; 676 677 while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) { 678 switch (opt) { 679 case 'c': 680 error = altcons_open(optarg); 681 if (error != 0) 682 errx(EX_USAGE, "Could not open '%s'", optarg); 683 break; 684 685 case 'd': 686 error = disk_open(optarg); 687 if (error != 0) 688 errx(EX_USAGE, "Could not open '%s'", optarg); 689 break; 690 691 case 'e': 692 addenv(optarg); 693 break; 694 695 case 'h': 696 host_base = optarg; 697 break; 698 699 case 'l': 700 if (loader != NULL) 701 errx(EX_USAGE, "-l can only be given once"); 702 loader = strdup(optarg); 703 if (loader == NULL) 704 err(EX_OSERR, "malloc"); 705 break; 706 707 case 'm': 708 error = vm_parse_memsize(optarg, &mem_size); 709 if (error != 0) 710 errx(EX_USAGE, "Invalid memsize '%s'", optarg); 711 break; 712 case 'C': 713 memflags |= VM_MEM_F_INCORE; 714 break; 715 case 'S': 716 memflags |= VM_MEM_F_WIRED; 717 break; 718 case '?': 719 usage(); 720 } 721 } 722 723 argc -= optind; 724 argv += optind; 725 726 if (argc != 1) 727 usage(); 728 729 vmname = argv[0]; 730 731 need_reinit = 0; 732 error = vm_create(vmname); 733 if (error) { 734 if (errno != EEXIST) { 735 perror("vm_create"); 736 exit(1); 737 } 738 need_reinit = 1; 739 } 740 741 ctx = vm_open(vmname); 742 if (ctx == NULL) { 743 perror("vm_open"); 744 exit(1); 745 } 746 747 if (need_reinit) { 748 error = vm_reinit(ctx); 749 if (error) { 750 perror("vm_reinit"); 751 exit(1); 752 } 753 } 754 755 vm_set_memflags(ctx, memflags); 756 error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); 757 if (error) { 758 perror("vm_setup_memory"); 759 exit(1); 760 } 761 762 if (loader == NULL) { 763 loader = strdup("/boot/userboot.so"); 764 if (loader == NULL) 765 err(EX_OSERR, "malloc"); 766 } 767 h = dlopen(loader, RTLD_LOCAL); 768 if (!h) { 769 printf("%s\n", dlerror()); 770 free(loader); 771 return (1); 772 } 773 func = dlsym(h, "loader_main"); 774 if (!func) { 775 printf("%s\n", dlerror()); 776 free(loader); 777 return (1); 778 } 779 780 tcgetattr(consout_fd, &term); 781 oldterm = term; 782 cfmakeraw(&term); 783 term.c_cflag |= CLOCAL; 784 785 tcsetattr(consout_fd, TCSAFLUSH, &term); 786 787 addenv("smbios.bios.vendor=BHYVE"); 788 addenv("boot_serial=1"); 789 790 func(&cb, NULL, USERBOOT_VERSION_4, ndisks); 791 792 free(loader); 793 return (0); 794 } 795