1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/types.h> 33 #include <sys/mman.h> 34 #include <sys/time.h> 35 36 #include <machine/atomic.h> 37 #include <machine/segments.h> 38 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <err.h> 43 #include <libgen.h> 44 #include <unistd.h> 45 #include <assert.h> 46 #include <errno.h> 47 #include <pthread.h> 48 #include <pthread_np.h> 49 #include <sysexits.h> 50 #include <stdbool.h> 51 52 #include <machine/vmm.h> 53 #include <vmmapi.h> 54 55 #include "bhyverun.h" 56 #include "acpi.h" 57 #include "inout.h" 58 #include "dbgport.h" 59 #include "ioapic.h" 60 #include "mem.h" 61 #include "mevent.h" 62 #include "mptbl.h" 63 #include "pci_emul.h" 64 #include "pci_irq.h" 65 #include "pci_lpc.h" 66 #include "smbiostbl.h" 67 #include "xmsr.h" 68 #include "spinup_ap.h" 69 #include "rtc.h" 70 71 #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ 72 73 #define MB (1024UL * 1024) 74 #define GB (1024UL * MB) 75 76 typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); 77 extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); 78 79 char *vmname; 80 81 int guest_ncpus; 82 char *guest_uuid_str; 83 84 static int guest_vmexit_on_hlt, guest_vmexit_on_pause; 85 static int virtio_msix = 1; 86 static int x2apic_mode = 0; /* default is xAPIC */ 87 88 static int strictio; 89 static int strictmsr = 1; 90 91 static int acpi; 92 93 static char *progname; 94 static const int BSP = 0; 95 96 static cpuset_t cpumask; 97 98 static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); 99 100 static struct vm_exit vmexit[VM_MAXCPU]; 101 102 struct bhyvestats { 103 uint64_t vmexit_bogus; 104 uint64_t vmexit_reqidle; 105 uint64_t vmexit_hlt; 106 uint64_t vmexit_pause; 107 uint64_t vmexit_mtrap; 108 uint64_t vmexit_inst_emul; 109 uint64_t cpu_switch_rotate; 110 uint64_t cpu_switch_direct; 111 } stats; 112 113 struct mt_vmm_info { 114 pthread_t mt_thr; 115 struct vmctx *mt_ctx; 116 int mt_vcpu; 117 } mt_vmm_info[VM_MAXCPU]; 118 119 static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; 120 121 static void 122 usage(int code) 123 { 124 125 fprintf(stderr, 126 "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" 127 " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" 128 " -a: local apic is in xAPIC mode (deprecated)\n" 129 " -A: create ACPI tables\n" 130 " -c: # cpus (default 1)\n" 131 " -C: include guest memory in core file\n" 132 " -e: exit on unhandled I/O access\n" 133 " -g: gdb port\n" 134 " -h: help\n" 135 " -H: vmexit from the guest on hlt\n" 136 " -l: LPC device configuration\n" 137 " -m: memory size in MB\n" 138 " -p: pin 'vcpu' to 'hostcpu'\n" 139 " -P: vmexit from the guest on pause\n" 140 " -s: <slot,driver,configinfo> PCI slot config\n" 141 " -S: guest memory cannot be swapped\n" 142 " -u: RTC keeps UTC time\n" 143 " -U: uuid\n" 144 " -w: ignore unimplemented MSRs\n" 145 " -W: force virtio to use single-vector MSI\n" 146 " -x: local apic is in x2APIC mode\n" 147 " -Y: disable MPtable generation\n", 148 progname, (int)strlen(progname), ""); 149 150 exit(code); 151 } 152 153 static int 154 pincpu_parse(const char *opt) 155 { 156 int vcpu, pcpu; 157 158 if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 159 fprintf(stderr, "invalid format: %s\n", opt); 160 return (-1); 161 } 162 163 if (vcpu < 0 || vcpu >= VM_MAXCPU) { 164 fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", 165 vcpu, VM_MAXCPU - 1); 166 return (-1); 167 } 168 169 if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 170 fprintf(stderr, "hostcpu '%d' outside valid range from " 171 "0 to %d\n", pcpu, CPU_SETSIZE - 1); 172 return (-1); 173 } 174 175 if (vcpumap[vcpu] == NULL) { 176 if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { 177 perror("malloc"); 178 return (-1); 179 } 180 CPU_ZERO(vcpumap[vcpu]); 181 } 182 CPU_SET(pcpu, vcpumap[vcpu]); 183 return (0); 184 } 185 186 void 187 vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, 188 int errcode) 189 { 190 struct vmctx *ctx; 191 int error, restart_instruction; 192 193 ctx = arg; 194 restart_instruction = 1; 195 196 error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, 197 restart_instruction); 198 assert(error == 0); 199 } 200 201 void * 202 paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 203 { 204 205 return (vm_map_gpa(ctx, gaddr, len)); 206 } 207 208 int 209 fbsdrun_vmexit_on_pause(void) 210 { 211 212 return (guest_vmexit_on_pause); 213 } 214 215 int 216 fbsdrun_vmexit_on_hlt(void) 217 { 218 219 return (guest_vmexit_on_hlt); 220 } 221 222 int 223 fbsdrun_virtio_msix(void) 224 { 225 226 return (virtio_msix); 227 } 228 229 static void * 230 fbsdrun_start_thread(void *param) 231 { 232 char tname[MAXCOMLEN + 1]; 233 struct mt_vmm_info *mtp; 234 int vcpu; 235 236 mtp = param; 237 vcpu = mtp->mt_vcpu; 238 239 snprintf(tname, sizeof(tname), "vcpu %d", vcpu); 240 pthread_set_name_np(mtp->mt_thr, tname); 241 242 vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); 243 244 /* not reached */ 245 exit(1); 246 return (NULL); 247 } 248 249 void 250 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) 251 { 252 int error; 253 254 assert(fromcpu == BSP); 255 256 /* 257 * The 'newcpu' must be activated in the context of 'fromcpu'. If 258 * vm_activate_cpu() is delayed until newcpu's pthread starts running 259 * then vmm.ko is out-of-sync with bhyve and this can create a race 260 * with vm_suspend(). 261 */ 262 error = vm_activate_cpu(ctx, newcpu); 263 assert(error == 0); 264 265 CPU_SET_ATOMIC(newcpu, &cpumask); 266 267 /* 268 * Set up the vmexit struct to allow execution to start 269 * at the given RIP 270 */ 271 vmexit[newcpu].rip = rip; 272 vmexit[newcpu].inst_length = 0; 273 274 mt_vmm_info[newcpu].mt_ctx = ctx; 275 mt_vmm_info[newcpu].mt_vcpu = newcpu; 276 277 error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, 278 fbsdrun_start_thread, &mt_vmm_info[newcpu]); 279 assert(error == 0); 280 } 281 282 static int 283 fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) 284 { 285 286 if (!CPU_ISSET(vcpu, &cpumask)) { 287 fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 288 exit(1); 289 } 290 291 CPU_CLR_ATOMIC(vcpu, &cpumask); 292 return (CPU_EMPTY(&cpumask)); 293 } 294 295 static int 296 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, 297 uint32_t eax) 298 { 299 #if BHYVE_DEBUG 300 /* 301 * put guest-driven debug here 302 */ 303 #endif 304 return (VMEXIT_CONTINUE); 305 } 306 307 static int 308 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 309 { 310 int error; 311 int bytes, port, in, out, string; 312 int vcpu; 313 314 vcpu = *pvcpu; 315 316 port = vme->u.inout.port; 317 bytes = vme->u.inout.bytes; 318 string = vme->u.inout.string; 319 in = vme->u.inout.in; 320 out = !in; 321 322 /* Extra-special case of host notifications */ 323 if (out && port == GUEST_NIO_PORT) { 324 error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); 325 return (error); 326 } 327 328 error = emulate_inout(ctx, vcpu, vme, strictio); 329 if (error) { 330 fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 331 in ? "in" : "out", 332 bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 333 port, vmexit->rip); 334 return (VMEXIT_ABORT); 335 } else { 336 return (VMEXIT_CONTINUE); 337 } 338 } 339 340 static int 341 vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 342 { 343 uint64_t val; 344 uint32_t eax, edx; 345 int error; 346 347 val = 0; 348 error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); 349 if (error != 0) { 350 fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 351 vme->u.msr.code, *pvcpu); 352 if (strictmsr) { 353 vm_inject_gp(ctx, *pvcpu); 354 return (VMEXIT_CONTINUE); 355 } 356 } 357 358 eax = val; 359 error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); 360 assert(error == 0); 361 362 edx = val >> 32; 363 error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); 364 assert(error == 0); 365 366 return (VMEXIT_CONTINUE); 367 } 368 369 static int 370 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 371 { 372 int error; 373 374 error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); 375 if (error != 0) { 376 fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 377 vme->u.msr.code, vme->u.msr.wval, *pvcpu); 378 if (strictmsr) { 379 vm_inject_gp(ctx, *pvcpu); 380 return (VMEXIT_CONTINUE); 381 } 382 } 383 return (VMEXIT_CONTINUE); 384 } 385 386 static int 387 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) 388 { 389 int newcpu; 390 int retval = VMEXIT_CONTINUE; 391 392 newcpu = spinup_ap(ctx, *pvcpu, 393 vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); 394 395 return (retval); 396 } 397 398 #define DEBUG_EPT_MISCONFIG 399 #ifdef DEBUG_EPT_MISCONFIG 400 #define EXIT_REASON_EPT_MISCONFIG 49 401 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 402 #define VMCS_IDENT(x) ((x) | 0x80000000) 403 404 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 405 static int ept_misconfig_ptenum; 406 #endif 407 408 static int 409 vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 410 { 411 412 fprintf(stderr, "vm exit[%d]\n", *pvcpu); 413 fprintf(stderr, "\treason\t\tVMX\n"); 414 fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 415 fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 416 fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); 417 fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); 418 fprintf(stderr, "\tqualification\t0x%016lx\n", 419 vmexit->u.vmx.exit_qualification); 420 fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); 421 fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); 422 #ifdef DEBUG_EPT_MISCONFIG 423 if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 424 vm_get_register(ctx, *pvcpu, 425 VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 426 &ept_misconfig_gpa); 427 vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 428 &ept_misconfig_ptenum); 429 fprintf(stderr, "\tEPT misconfiguration:\n"); 430 fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 431 fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 432 ept_misconfig_ptenum, ept_misconfig_pte[0], 433 ept_misconfig_pte[1], ept_misconfig_pte[2], 434 ept_misconfig_pte[3]); 435 } 436 #endif /* DEBUG_EPT_MISCONFIG */ 437 return (VMEXIT_ABORT); 438 } 439 440 static int 441 vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 442 { 443 444 fprintf(stderr, "vm exit[%d]\n", *pvcpu); 445 fprintf(stderr, "\treason\t\tSVM\n"); 446 fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); 447 fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); 448 fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); 449 fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); 450 fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); 451 return (VMEXIT_ABORT); 452 } 453 454 static int 455 vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 456 { 457 458 assert(vmexit->inst_length == 0); 459 460 stats.vmexit_bogus++; 461 462 return (VMEXIT_CONTINUE); 463 } 464 465 static int 466 vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 467 { 468 469 assert(vmexit->inst_length == 0); 470 471 stats.vmexit_reqidle++; 472 473 return (VMEXIT_CONTINUE); 474 } 475 476 static int 477 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 478 { 479 480 stats.vmexit_hlt++; 481 482 /* 483 * Just continue execution with the next instruction. We use 484 * the HLT VM exit as a way to be friendly with the host 485 * scheduler. 486 */ 487 return (VMEXIT_CONTINUE); 488 } 489 490 static int 491 vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 492 { 493 494 stats.vmexit_pause++; 495 496 return (VMEXIT_CONTINUE); 497 } 498 499 static int 500 vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 501 { 502 503 assert(vmexit->inst_length == 0); 504 505 stats.vmexit_mtrap++; 506 507 return (VMEXIT_CONTINUE); 508 } 509 510 static int 511 vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 512 { 513 int err, i; 514 struct vie *vie; 515 516 stats.vmexit_inst_emul++; 517 518 vie = &vmexit->u.inst_emul.vie; 519 err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, 520 vie, &vmexit->u.inst_emul.paging); 521 522 if (err) { 523 if (err == ESRCH) { 524 fprintf(stderr, "Unhandled memory access to 0x%lx\n", 525 vmexit->u.inst_emul.gpa); 526 } 527 528 fprintf(stderr, "Failed to emulate instruction ["); 529 for (i = 0; i < vie->num_valid; i++) { 530 fprintf(stderr, "0x%02x%s", vie->inst[i], 531 i != (vie->num_valid - 1) ? " " : ""); 532 } 533 fprintf(stderr, "] at 0x%lx\n", vmexit->rip); 534 return (VMEXIT_ABORT); 535 } 536 537 return (VMEXIT_CONTINUE); 538 } 539 540 static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 541 static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 542 543 static int 544 vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 545 { 546 enum vm_suspend_how how; 547 548 how = vmexit->u.suspended.how; 549 550 fbsdrun_deletecpu(ctx, *pvcpu); 551 552 if (*pvcpu != BSP) { 553 pthread_mutex_lock(&resetcpu_mtx); 554 pthread_cond_signal(&resetcpu_cond); 555 pthread_mutex_unlock(&resetcpu_mtx); 556 pthread_exit(NULL); 557 } 558 559 pthread_mutex_lock(&resetcpu_mtx); 560 while (!CPU_EMPTY(&cpumask)) { 561 pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 562 } 563 pthread_mutex_unlock(&resetcpu_mtx); 564 565 switch (how) { 566 case VM_SUSPEND_RESET: 567 exit(0); 568 case VM_SUSPEND_POWEROFF: 569 exit(1); 570 case VM_SUSPEND_HALT: 571 exit(2); 572 case VM_SUSPEND_TRIPLEFAULT: 573 exit(3); 574 default: 575 fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 576 exit(100); 577 } 578 return (0); /* NOTREACHED */ 579 } 580 581 static vmexit_handler_t handler[VM_EXITCODE_MAX] = { 582 [VM_EXITCODE_INOUT] = vmexit_inout, 583 [VM_EXITCODE_INOUT_STR] = vmexit_inout, 584 [VM_EXITCODE_VMX] = vmexit_vmx, 585 [VM_EXITCODE_SVM] = vmexit_svm, 586 [VM_EXITCODE_BOGUS] = vmexit_bogus, 587 [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 588 [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 589 [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 590 [VM_EXITCODE_MTRAP] = vmexit_mtrap, 591 [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 592 [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, 593 [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 594 [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 595 }; 596 597 static void 598 vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) 599 { 600 int error, rc, prevcpu; 601 enum vm_exitcode exitcode; 602 cpuset_t active_cpus; 603 604 if (vcpumap[vcpu] != NULL) { 605 error = pthread_setaffinity_np(pthread_self(), 606 sizeof(cpuset_t), vcpumap[vcpu]); 607 assert(error == 0); 608 } 609 610 error = vm_active_cpus(ctx, &active_cpus); 611 assert(CPU_ISSET(vcpu, &active_cpus)); 612 613 error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); 614 assert(error == 0); 615 616 while (1) { 617 error = vm_run(ctx, vcpu, &vmexit[vcpu]); 618 if (error != 0) 619 break; 620 621 prevcpu = vcpu; 622 623 exitcode = vmexit[vcpu].exitcode; 624 if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 625 fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 626 exitcode); 627 exit(1); 628 } 629 630 rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); 631 632 switch (rc) { 633 case VMEXIT_CONTINUE: 634 break; 635 case VMEXIT_ABORT: 636 abort(); 637 default: 638 exit(1); 639 } 640 } 641 fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 642 } 643 644 static int 645 num_vcpus_allowed(struct vmctx *ctx) 646 { 647 int tmp, error; 648 649 error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); 650 651 /* 652 * The guest is allowed to spinup more than one processor only if the 653 * UNRESTRICTED_GUEST capability is available. 654 */ 655 if (error == 0) 656 return (VM_MAXCPU); 657 else 658 return (1); 659 } 660 661 void 662 fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) 663 { 664 int err, tmp; 665 666 if (fbsdrun_vmexit_on_hlt()) { 667 err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); 668 if (err < 0) { 669 fprintf(stderr, "VM exit on HLT not supported\n"); 670 exit(1); 671 } 672 vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); 673 if (cpu == BSP) 674 handler[VM_EXITCODE_HLT] = vmexit_hlt; 675 } 676 677 if (fbsdrun_vmexit_on_pause()) { 678 /* 679 * pause exit support required for this mode 680 */ 681 err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); 682 if (err < 0) { 683 fprintf(stderr, 684 "SMP mux requested, no pause support\n"); 685 exit(1); 686 } 687 vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); 688 if (cpu == BSP) 689 handler[VM_EXITCODE_PAUSE] = vmexit_pause; 690 } 691 692 if (x2apic_mode) 693 err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); 694 else 695 err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); 696 697 if (err) { 698 fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 699 exit(1); 700 } 701 702 vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); 703 } 704 705 static struct vmctx * 706 do_open(const char *vmname) 707 { 708 struct vmctx *ctx; 709 int error; 710 bool reinit, romboot; 711 712 reinit = romboot = false; 713 714 if (lpc_bootrom()) 715 romboot = true; 716 717 error = vm_create(vmname); 718 if (error) { 719 if (errno == EEXIST) { 720 if (romboot) { 721 reinit = true; 722 } else { 723 /* 724 * The virtual machine has been setup by the 725 * userspace bootloader. 726 */ 727 } 728 } else { 729 perror("vm_create"); 730 exit(1); 731 } 732 } else { 733 if (!romboot) { 734 /* 735 * If the virtual machine was just created then a 736 * bootrom must be configured to boot it. 737 */ 738 fprintf(stderr, "virtual machine cannot be booted\n"); 739 exit(1); 740 } 741 } 742 743 ctx = vm_open(vmname); 744 if (ctx == NULL) { 745 perror("vm_open"); 746 exit(1); 747 } 748 749 if (reinit) { 750 error = vm_reinit(ctx); 751 if (error) { 752 perror("vm_reinit"); 753 exit(1); 754 } 755 } 756 return (ctx); 757 } 758 759 int 760 main(int argc, char *argv[]) 761 { 762 int c, error, gdb_port, err, bvmcons; 763 int max_vcpus, mptgen, memflags; 764 int rtc_localtime; 765 struct vmctx *ctx; 766 uint64_t rip; 767 size_t memsize; 768 char *optstr; 769 770 bvmcons = 0; 771 progname = basename(argv[0]); 772 gdb_port = 0; 773 guest_ncpus = 1; 774 memsize = 256 * MB; 775 mptgen = 1; 776 rtc_localtime = 1; 777 memflags = 0; 778 779 optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; 780 while ((c = getopt(argc, argv, optstr)) != -1) { 781 switch (c) { 782 case 'a': 783 x2apic_mode = 0; 784 break; 785 case 'A': 786 acpi = 1; 787 break; 788 case 'b': 789 bvmcons = 1; 790 break; 791 case 'p': 792 if (pincpu_parse(optarg) != 0) { 793 errx(EX_USAGE, "invalid vcpu pinning " 794 "configuration '%s'", optarg); 795 } 796 break; 797 case 'c': 798 guest_ncpus = atoi(optarg); 799 break; 800 case 'C': 801 memflags |= VM_MEM_F_INCORE; 802 break; 803 case 'g': 804 gdb_port = atoi(optarg); 805 break; 806 case 'l': 807 if (lpc_device_parse(optarg) != 0) { 808 errx(EX_USAGE, "invalid lpc device " 809 "configuration '%s'", optarg); 810 } 811 break; 812 case 's': 813 if (pci_parse_slot(optarg) != 0) 814 exit(1); 815 else 816 break; 817 case 'S': 818 memflags |= VM_MEM_F_WIRED; 819 break; 820 case 'm': 821 error = vm_parse_memsize(optarg, &memsize); 822 if (error) 823 errx(EX_USAGE, "invalid memsize '%s'", optarg); 824 break; 825 case 'H': 826 guest_vmexit_on_hlt = 1; 827 break; 828 case 'I': 829 /* 830 * The "-I" option was used to add an ioapic to the 831 * virtual machine. 832 * 833 * An ioapic is now provided unconditionally for each 834 * virtual machine and this option is now deprecated. 835 */ 836 break; 837 case 'P': 838 guest_vmexit_on_pause = 1; 839 break; 840 case 'e': 841 strictio = 1; 842 break; 843 case 'u': 844 rtc_localtime = 0; 845 break; 846 case 'U': 847 guest_uuid_str = optarg; 848 break; 849 case 'w': 850 strictmsr = 0; 851 break; 852 case 'W': 853 virtio_msix = 0; 854 break; 855 case 'x': 856 x2apic_mode = 1; 857 break; 858 case 'Y': 859 mptgen = 0; 860 break; 861 case 'h': 862 usage(0); 863 default: 864 usage(1); 865 } 866 } 867 argc -= optind; 868 argv += optind; 869 870 if (argc != 1) 871 usage(1); 872 873 vmname = argv[0]; 874 ctx = do_open(vmname); 875 876 if (guest_ncpus < 1) { 877 fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 878 exit(1); 879 } 880 881 max_vcpus = num_vcpus_allowed(ctx); 882 if (guest_ncpus > max_vcpus) { 883 fprintf(stderr, "%d vCPUs requested but only %d available\n", 884 guest_ncpus, max_vcpus); 885 exit(1); 886 } 887 888 fbsdrun_set_capabilities(ctx, BSP); 889 890 vm_set_memflags(ctx, memflags); 891 err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 892 if (err) { 893 fprintf(stderr, "Unable to setup memory (%d)\n", errno); 894 exit(1); 895 } 896 897 error = init_msr(); 898 if (error) { 899 fprintf(stderr, "init_msr error %d", error); 900 exit(1); 901 } 902 903 init_mem(); 904 init_inout(); 905 pci_irq_init(ctx); 906 ioapic_init(ctx); 907 908 rtc_init(ctx, rtc_localtime); 909 sci_init(ctx); 910 911 /* 912 * Exit if a device emulation finds an error in it's initilization 913 */ 914 if (init_pci(ctx) != 0) 915 exit(1); 916 917 if (gdb_port != 0) 918 init_dbgport(gdb_port); 919 920 if (bvmcons) 921 init_bvmcons(); 922 923 if (lpc_bootrom()) { 924 if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { 925 fprintf(stderr, "ROM boot failed: unrestricted guest " 926 "capability not available\n"); 927 exit(1); 928 } 929 error = vcpu_reset(ctx, BSP); 930 assert(error == 0); 931 } 932 933 error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); 934 assert(error == 0); 935 936 /* 937 * build the guest tables, MP etc. 938 */ 939 if (mptgen) { 940 error = mptable_build(ctx, guest_ncpus); 941 if (error) 942 exit(1); 943 } 944 945 error = smbios_build(ctx); 946 assert(error == 0); 947 948 if (acpi) { 949 error = acpi_build(ctx, guest_ncpus); 950 assert(error == 0); 951 } 952 953 /* 954 * Change the proc title to include the VM name. 955 */ 956 setproctitle("%s", vmname); 957 958 /* 959 * Add CPU 0 960 */ 961 fbsdrun_addcpu(ctx, BSP, BSP, rip); 962 963 /* 964 * Head off to the main event dispatch loop 965 */ 966 mevent_dispatch(); 967 968 exit(1); 969 } 970