1e285ef8dSPeter Grehan /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 31de7b4b8SPedro F. Giffuni * 4e285ef8dSPeter Grehan * Copyright (c) 2011 NetApp, Inc. 5e285ef8dSPeter Grehan * All rights reserved. 6e285ef8dSPeter Grehan * 7e285ef8dSPeter Grehan * Redistribution and use in source and binary forms, with or without 8e285ef8dSPeter Grehan * modification, are permitted provided that the following conditions 9e285ef8dSPeter Grehan * are met: 10e285ef8dSPeter Grehan * 1. Redistributions of source code must retain the above copyright 11e285ef8dSPeter Grehan * notice, this list of conditions and the following disclaimer. 12e285ef8dSPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright 13e285ef8dSPeter Grehan * notice, this list of conditions and the following disclaimer in the 14e285ef8dSPeter Grehan * documentation and/or other materials provided with the distribution. 15e285ef8dSPeter Grehan * 16e285ef8dSPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17e285ef8dSPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18e285ef8dSPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19e285ef8dSPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20e285ef8dSPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21e285ef8dSPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22e285ef8dSPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23e285ef8dSPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24e285ef8dSPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25e285ef8dSPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26e285ef8dSPeter Grehan * SUCH DAMAGE. 27e285ef8dSPeter Grehan * 28e285ef8dSPeter Grehan * $FreeBSD$ 29e285ef8dSPeter Grehan */ 30e285ef8dSPeter Grehan 31e285ef8dSPeter Grehan #include <sys/cdefs.h> 32e285ef8dSPeter Grehan __FBSDID("$FreeBSD$"); 33e285ef8dSPeter Grehan 34e285ef8dSPeter Grehan #include <sys/types.h> 3500ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 3600ef17beSBartek Rutkowski #include <sys/capsicum.h> 3700ef17beSBartek Rutkowski #endif 38e285ef8dSPeter Grehan #include <sys/mman.h> 39483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 40483d953aSJohn Baldwin #include <sys/socket.h> 41483d953aSJohn Baldwin #include <sys/stat.h> 42483d953aSJohn Baldwin #endif 43e285ef8dSPeter Grehan #include <sys/time.h> 44483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 45483d953aSJohn Baldwin #include <sys/un.h> 46483d953aSJohn Baldwin #endif 47e285ef8dSPeter Grehan 488d56c805SYuri Pankov #include <amd64/vmm/intel/vmcs.h> 490bda8d3eSCorvin Köhne #include <x86/apicreg.h> 508d56c805SYuri Pankov 511c052192SNeel Natu #include <machine/atomic.h> 52e285ef8dSPeter Grehan #include <machine/segments.h> 53e285ef8dSPeter Grehan 5400ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 5500ef17beSBartek Rutkowski #include <capsicum_helpers.h> 5600ef17beSBartek Rutkowski #endif 57e285ef8dSPeter Grehan #include <stdio.h> 58e285ef8dSPeter Grehan #include <stdlib.h> 59b5331f4dSNeel Natu #include <string.h> 60200758f1SNeel Natu #include <err.h> 6100ef17beSBartek Rutkowski #include <errno.h> 62483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 63483d953aSJohn Baldwin #include <fcntl.h> 64483d953aSJohn Baldwin #endif 65e285ef8dSPeter Grehan #include <libgen.h> 66e285ef8dSPeter Grehan #include <unistd.h> 67e285ef8dSPeter Grehan #include <assert.h> 68e285ef8dSPeter Grehan #include <pthread.h> 69e285ef8dSPeter Grehan #include <pthread_np.h> 70200758f1SNeel Natu #include <sysexits.h> 719b1aa8d6SNeel Natu #include <stdbool.h> 7201d822d3SRodney W. Grimes #include <stdint.h> 73483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 74483d953aSJohn Baldwin #include <ucl.h> 75483d953aSJohn Baldwin #include <unistd.h> 76483d953aSJohn Baldwin 77483d953aSJohn Baldwin #include <libxo/xo.h> 78483d953aSJohn Baldwin #endif 79e285ef8dSPeter Grehan 80e285ef8dSPeter Grehan #include <machine/vmm.h> 8100ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 8200ef17beSBartek Rutkowski #include <machine/vmm_dev.h> 8300ef17beSBartek Rutkowski #endif 844daa95f8SConrad Meyer #include <machine/vmm_instruction_emul.h> 85e285ef8dSPeter Grehan #include <vmmapi.h> 86e285ef8dSPeter Grehan 87e285ef8dSPeter Grehan #include "bhyverun.h" 88e285ef8dSPeter Grehan #include "acpi.h" 892cf9911fSPeter Grehan #include "atkbdc.h" 90bb30b08eSConrad Meyer #include "bootrom.h" 91621b5090SJohn Baldwin #include "config.h" 92e285ef8dSPeter Grehan #include "inout.h" 9352c39ee6SConrad Meyer #include "debug.h" 9416f23f75SCorvin Köhne #include "e820.h" 9588ac6958SPeter Grehan #include "fwctl.h" 96cd377eb3SJohn Baldwin #include "gdb.h" 973cbf3585SJohn Baldwin #include "ioapic.h" 988a68ae80SConrad Meyer #include "kernemu_dev.h" 99e285ef8dSPeter Grehan #include "mem.h" 100e285ef8dSPeter Grehan #include "mevent.h" 101e285ef8dSPeter Grehan #include "mptbl.h" 102e285ef8dSPeter Grehan #include "pci_emul.h" 103b3e9732aSJohn Baldwin #include "pci_irq.h" 104ea7f1c8cSNeel Natu #include "pci_lpc.h" 105d85147f3SCorvin Köhne #include "qemu_fwcfg.h" 106af5bfc53STycho Nightingale #include "smbiostbl.h" 107483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 108483d953aSJohn Baldwin #include "snapshot.h" 109483d953aSJohn Baldwin #endif 110e285ef8dSPeter Grehan #include "xmsr.h" 111e285ef8dSPeter Grehan #include "spinup_ap.h" 1129d6be09fSPeter Grehan #include "rtc.h" 1139cb339ccSConrad Meyer #include "vmgenc.h" 114e285ef8dSPeter Grehan 115e285ef8dSPeter Grehan #define MB (1024UL * 1024) 116e285ef8dSPeter Grehan #define GB (1024UL * MB) 117e285ef8dSPeter Grehan 1188d56c805SYuri Pankov static const char * const vmx_exit_reason_desc[] = { 1198d56c805SYuri Pankov [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", 1208d56c805SYuri Pankov [EXIT_REASON_EXT_INTR] = "External interrupt", 1218d56c805SYuri Pankov [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", 1228d56c805SYuri Pankov [EXIT_REASON_INIT] = "INIT signal", 1238d56c805SYuri Pankov [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", 1248d56c805SYuri Pankov [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", 1258d56c805SYuri Pankov [EXIT_REASON_SMI] = "Other SMI", 1268d56c805SYuri Pankov [EXIT_REASON_INTR_WINDOW] = "Interrupt window", 1278d56c805SYuri Pankov [EXIT_REASON_NMI_WINDOW] = "NMI window", 1288d56c805SYuri Pankov [EXIT_REASON_TASK_SWITCH] = "Task switch", 1298d56c805SYuri Pankov [EXIT_REASON_CPUID] = "CPUID", 1308d56c805SYuri Pankov [EXIT_REASON_GETSEC] = "GETSEC", 1318d56c805SYuri Pankov [EXIT_REASON_HLT] = "HLT", 1328d56c805SYuri Pankov [EXIT_REASON_INVD] = "INVD", 1338d56c805SYuri Pankov [EXIT_REASON_INVLPG] = "INVLPG", 1348d56c805SYuri Pankov [EXIT_REASON_RDPMC] = "RDPMC", 1358d56c805SYuri Pankov [EXIT_REASON_RDTSC] = "RDTSC", 1368d56c805SYuri Pankov [EXIT_REASON_RSM] = "RSM", 1378d56c805SYuri Pankov [EXIT_REASON_VMCALL] = "VMCALL", 1388d56c805SYuri Pankov [EXIT_REASON_VMCLEAR] = "VMCLEAR", 1398d56c805SYuri Pankov [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", 1408d56c805SYuri Pankov [EXIT_REASON_VMPTRLD] = "VMPTRLD", 1418d56c805SYuri Pankov [EXIT_REASON_VMPTRST] = "VMPTRST", 1428d56c805SYuri Pankov [EXIT_REASON_VMREAD] = "VMREAD", 1438d56c805SYuri Pankov [EXIT_REASON_VMRESUME] = "VMRESUME", 1448d56c805SYuri Pankov [EXIT_REASON_VMWRITE] = "VMWRITE", 1458d56c805SYuri Pankov [EXIT_REASON_VMXOFF] = "VMXOFF", 1468d56c805SYuri Pankov [EXIT_REASON_VMXON] = "VMXON", 1478d56c805SYuri Pankov [EXIT_REASON_CR_ACCESS] = "Control-register accesses", 1488d56c805SYuri Pankov [EXIT_REASON_DR_ACCESS] = "MOV DR", 1498d56c805SYuri Pankov [EXIT_REASON_INOUT] = "I/O instruction", 1508d56c805SYuri Pankov [EXIT_REASON_RDMSR] = "RDMSR", 1518d56c805SYuri Pankov [EXIT_REASON_WRMSR] = "WRMSR", 1528d56c805SYuri Pankov [EXIT_REASON_INVAL_VMCS] = 1538d56c805SYuri Pankov "VM-entry failure due to invalid guest state", 1548d56c805SYuri Pankov [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", 1558d56c805SYuri Pankov [EXIT_REASON_MWAIT] = "MWAIT", 1568d56c805SYuri Pankov [EXIT_REASON_MTF] = "Monitor trap flag", 1578d56c805SYuri Pankov [EXIT_REASON_MONITOR] = "MONITOR", 1588d56c805SYuri Pankov [EXIT_REASON_PAUSE] = "PAUSE", 1598d56c805SYuri Pankov [EXIT_REASON_MCE_DURING_ENTRY] = 1608d56c805SYuri Pankov "VM-entry failure due to machine-check event", 1618d56c805SYuri Pankov [EXIT_REASON_TPR] = "TPR below threshold", 1628d56c805SYuri Pankov [EXIT_REASON_APIC_ACCESS] = "APIC access", 1638d56c805SYuri Pankov [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", 1648d56c805SYuri Pankov [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", 1658d56c805SYuri Pankov [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", 1668d56c805SYuri Pankov [EXIT_REASON_EPT_FAULT] = "EPT violation", 1678d56c805SYuri Pankov [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", 1688d56c805SYuri Pankov [EXIT_REASON_INVEPT] = "INVEPT", 1698d56c805SYuri Pankov [EXIT_REASON_RDTSCP] = "RDTSCP", 1708d56c805SYuri Pankov [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", 1718d56c805SYuri Pankov [EXIT_REASON_INVVPID] = "INVVPID", 1728d56c805SYuri Pankov [EXIT_REASON_WBINVD] = "WBINVD", 1738d56c805SYuri Pankov [EXIT_REASON_XSETBV] = "XSETBV", 1748d56c805SYuri Pankov [EXIT_REASON_APIC_WRITE] = "APIC write", 1758d56c805SYuri Pankov [EXIT_REASON_RDRAND] = "RDRAND", 1768d56c805SYuri Pankov [EXIT_REASON_INVPCID] = "INVPCID", 1778d56c805SYuri Pankov [EXIT_REASON_VMFUNC] = "VMFUNC", 1788d56c805SYuri Pankov [EXIT_REASON_ENCLS] = "ENCLS", 1798d56c805SYuri Pankov [EXIT_REASON_RDSEED] = "RDSEED", 1808d56c805SYuri Pankov [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", 1818d56c805SYuri Pankov [EXIT_REASON_XSAVES] = "XSAVES", 1828d56c805SYuri Pankov [EXIT_REASON_XRSTORS] = "XRSTORS" 1838d56c805SYuri Pankov }; 1848d56c805SYuri Pankov 185e17eca32SMark Johnston typedef int (*vmexit_handler_t)(struct vmctx *, struct vcpu *, struct vm_run *); 186e285ef8dSPeter Grehan 187e285ef8dSPeter Grehan int guest_ncpus; 1883b6cb9b4SMark Johnston uint16_t cpu_cores, cpu_sockets, cpu_threads; 18901d822d3SRodney W. Grimes 190332eff95SVincenzo Maffione int raw_stdio = 0; 191332eff95SVincenzo Maffione 192e285ef8dSPeter Grehan static char *progname; 193e285ef8dSPeter Grehan static const int BSP = 0; 194e285ef8dSPeter Grehan 1950826d045SNeel Natu static cpuset_t cpumask; 196e285ef8dSPeter Grehan 1977d9ef309SJohn Baldwin static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu); 198e285ef8dSPeter Grehan 1997d9ef309SJohn Baldwin static struct vcpu_info { 2007d9ef309SJohn Baldwin struct vmctx *ctx; 2017d9ef309SJohn Baldwin struct vcpu *vcpu; 2027d9ef309SJohn Baldwin int vcpuid; 2037d9ef309SJohn Baldwin } *vcpu_info; 204e285ef8dSPeter Grehan 2057261f821SJohn Baldwin static cpuset_t **vcpumap; 2069b6155a2SNeel Natu 207e285ef8dSPeter Grehan static void 208e285ef8dSPeter Grehan usage(int code) 209e285ef8dSPeter Grehan { 210e285ef8dSPeter Grehan 211e285ef8dSPeter Grehan fprintf(stderr, 21203c3e5e4SMateusz Piotrowski "Usage: %s [-AaCDeHhPSuWwxY]\n" 21301d822d3SRodney W. Grimes " %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n" 214f656df58SMateusz Piotrowski " %*s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n" 21503c3e5e4SMateusz Piotrowski " %*s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n" 2165749449dSJohn Baldwin " -A: create ACPI tables\n" 217b6a572d0SMateusz Piotrowski " -a: local apic is in xAPIC mode (deprecated)\n" 2180dd10c00SNeel Natu " -C: include guest memory in core file\n" 219b6a572d0SMateusz Piotrowski " -c: number of CPUs and/or topology specification\n" 2200a1016f9SPawel Biernacki " -D: destroy on power-off\n" 221b5331f4dSNeel Natu " -e: exit on unhandled I/O access\n" 222b6a572d0SMateusz Piotrowski " -G: start a debug server\n" 223b6a572d0SMateusz Piotrowski " -H: vmexit from the guest on HLT\n" 224e285ef8dSPeter Grehan " -h: help\n" 225621b5090SJohn Baldwin " -k: key=value flat config file\n" 22619eaa01bSMichael Reifenberger " -K: PS2 keyboard layout\n" 227ea7f1c8cSNeel Natu " -l: LPC device configuration\n" 228afd4f7faSCorvin Köhne " -m: memory size\n" 229621b5090SJohn Baldwin " -o: set config 'var' to 'value'\n" 230621b5090SJohn Baldwin " -P: vmexit from the guest on pause\n" 231b6a572d0SMateusz Piotrowski " -p: pin 'vcpu' to 'hostcpu'\n" 232483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 233483d953aSJohn Baldwin " -r: path to checkpoint file\n" 234483d953aSJohn Baldwin #endif 2359b1aa8d6SNeel Natu " -S: guest memory cannot be swapped\n" 236b6a572d0SMateusz Piotrowski " -s: <slot,driver,configinfo> PCI slot config\n" 237b6a572d0SMateusz Piotrowski " -U: UUID\n" 238c9747678SNeel Natu " -u: RTC keeps UTC time\n" 239cde1f5b8SJohn Baldwin " -W: force virtio to use single-vector MSI\n" 240b6a572d0SMateusz Piotrowski " -w: ignore unimplemented MSRs\n" 241b6a572d0SMateusz Piotrowski " -x: local APIC is in x2APIC mode\n" 242cde1f5b8SJohn Baldwin " -Y: disable MPtable generation\n", 2434edc7f41SMarcelo Araujo progname, (int)strlen(progname), "", (int)strlen(progname), "", 2444edc7f41SMarcelo Araujo (int)strlen(progname), ""); 24594c3b3bfSPeter Grehan 246e285ef8dSPeter Grehan exit(code); 247e285ef8dSPeter Grehan } 248e285ef8dSPeter Grehan 24901d822d3SRodney W. Grimes /* 25001d822d3SRodney W. Grimes * XXX This parser is known to have the following issues: 251621b5090SJohn Baldwin * 1. It accepts null key=value tokens ",," as setting "cpus" to an 252621b5090SJohn Baldwin * empty string. 25301d822d3SRodney W. Grimes * 25401d822d3SRodney W. Grimes * The acceptance of a null specification ('-c ""') is by design to match the 25501d822d3SRodney W. Grimes * manual page syntax specification, this results in a topology of 1 vCPU. 25601d822d3SRodney W. Grimes */ 25701d822d3SRodney W. Grimes static int 25801d822d3SRodney W. Grimes topology_parse(const char *opt) 25901d822d3SRodney W. Grimes { 260ad3da829SAndy Fiddaman char *cp, *str, *tofree; 26101d822d3SRodney W. Grimes 262621b5090SJohn Baldwin if (*opt == '\0') { 263621b5090SJohn Baldwin set_config_value("sockets", "1"); 264621b5090SJohn Baldwin set_config_value("cores", "1"); 265621b5090SJohn Baldwin set_config_value("threads", "1"); 266621b5090SJohn Baldwin set_config_value("cpus", "1"); 267621b5090SJohn Baldwin return (0); 268621b5090SJohn Baldwin } 269621b5090SJohn Baldwin 270ad3da829SAndy Fiddaman tofree = str = strdup(opt); 271635a2c89SMarcelo Araujo if (str == NULL) 272621b5090SJohn Baldwin errx(4, "Failed to allocate memory"); 27301d822d3SRodney W. Grimes 27401d822d3SRodney W. Grimes while ((cp = strsep(&str, ",")) != NULL) { 275621b5090SJohn Baldwin if (strncmp(cp, "cpus=", strlen("cpus=")) == 0) 276621b5090SJohn Baldwin set_config_value("cpus", cp + strlen("cpus=")); 277621b5090SJohn Baldwin else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0) 278621b5090SJohn Baldwin set_config_value("sockets", cp + strlen("sockets=")); 279621b5090SJohn Baldwin else if (strncmp(cp, "cores=", strlen("cores=")) == 0) 280621b5090SJohn Baldwin set_config_value("cores", cp + strlen("cores=")); 281621b5090SJohn Baldwin else if (strncmp(cp, "threads=", strlen("threads=")) == 0) 282621b5090SJohn Baldwin set_config_value("threads", cp + strlen("threads=")); 283621b5090SJohn Baldwin else if (strchr(cp, '=') != NULL) 284621b5090SJohn Baldwin goto out; 28501d822d3SRodney W. Grimes else 286621b5090SJohn Baldwin set_config_value("cpus", cp); 28701d822d3SRodney W. Grimes } 288ad3da829SAndy Fiddaman free(tofree); 28901d822d3SRodney W. Grimes return (0); 290ea089f8cSMarcelo Araujo 291ea089f8cSMarcelo Araujo out: 292ad3da829SAndy Fiddaman free(tofree); 293ea089f8cSMarcelo Araujo return (-1); 29401d822d3SRodney W. Grimes } 29501d822d3SRodney W. Grimes 2969b6155a2SNeel Natu static int 297621b5090SJohn Baldwin parse_int_value(const char *key, const char *value, int minval, int maxval) 298621b5090SJohn Baldwin { 299621b5090SJohn Baldwin char *cp; 300621b5090SJohn Baldwin long lval; 301621b5090SJohn Baldwin 302621b5090SJohn Baldwin errno = 0; 303621b5090SJohn Baldwin lval = strtol(value, &cp, 0); 304621b5090SJohn Baldwin if (errno != 0 || *cp != '\0' || cp == value || lval < minval || 305621b5090SJohn Baldwin lval > maxval) 306621b5090SJohn Baldwin errx(4, "Invalid value for %s: '%s'", key, value); 307621b5090SJohn Baldwin return (lval); 308621b5090SJohn Baldwin } 309621b5090SJohn Baldwin 310621b5090SJohn Baldwin /* 311621b5090SJohn Baldwin * Set the sockets, cores, threads, and guest_cpus variables based on 312621b5090SJohn Baldwin * the configured topology. 313621b5090SJohn Baldwin * 314621b5090SJohn Baldwin * The limits of UINT16_MAX are due to the types passed to 315621b5090SJohn Baldwin * vm_set_topology(). vmm.ko may enforce tighter limits. 316621b5090SJohn Baldwin */ 317621b5090SJohn Baldwin static void 318e008f5beSMark Johnston calc_topology(void) 319621b5090SJohn Baldwin { 320621b5090SJohn Baldwin const char *value; 321621b5090SJohn Baldwin bool explicit_cpus; 322621b5090SJohn Baldwin uint64_t ncpus; 323621b5090SJohn Baldwin 324621b5090SJohn Baldwin value = get_config_value("cpus"); 325621b5090SJohn Baldwin if (value != NULL) { 326621b5090SJohn Baldwin guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX); 327621b5090SJohn Baldwin explicit_cpus = true; 328621b5090SJohn Baldwin } else { 329621b5090SJohn Baldwin guest_ncpus = 1; 330621b5090SJohn Baldwin explicit_cpus = false; 331621b5090SJohn Baldwin } 332621b5090SJohn Baldwin value = get_config_value("cores"); 333621b5090SJohn Baldwin if (value != NULL) 3343b6cb9b4SMark Johnston cpu_cores = parse_int_value("cores", value, 1, UINT16_MAX); 335621b5090SJohn Baldwin else 3363b6cb9b4SMark Johnston cpu_cores = 1; 337621b5090SJohn Baldwin value = get_config_value("threads"); 338621b5090SJohn Baldwin if (value != NULL) 3393b6cb9b4SMark Johnston cpu_threads = parse_int_value("threads", value, 1, UINT16_MAX); 340621b5090SJohn Baldwin else 3413b6cb9b4SMark Johnston cpu_threads = 1; 342621b5090SJohn Baldwin value = get_config_value("sockets"); 343621b5090SJohn Baldwin if (value != NULL) 3443b6cb9b4SMark Johnston cpu_sockets = parse_int_value("sockets", value, 1, UINT16_MAX); 345621b5090SJohn Baldwin else 3463b6cb9b4SMark Johnston cpu_sockets = guest_ncpus; 347621b5090SJohn Baldwin 348621b5090SJohn Baldwin /* 349621b5090SJohn Baldwin * Compute sockets * cores * threads avoiding overflow. The 350621b5090SJohn Baldwin * range check above insures these are 16 bit values. 351621b5090SJohn Baldwin */ 3523b6cb9b4SMark Johnston ncpus = (uint64_t)cpu_sockets * cpu_cores * cpu_threads; 353621b5090SJohn Baldwin if (ncpus > UINT16_MAX) 354621b5090SJohn Baldwin errx(4, "Computed number of vCPUs too high: %ju", 355621b5090SJohn Baldwin (uintmax_t)ncpus); 356621b5090SJohn Baldwin 357621b5090SJohn Baldwin if (explicit_cpus) { 358ed721684SMark Johnston if (guest_ncpus != (int)ncpus) 359621b5090SJohn Baldwin errx(4, "Topology (%d sockets, %d cores, %d threads) " 3603b6cb9b4SMark Johnston "does not match %d vCPUs", 3613b6cb9b4SMark Johnston cpu_sockets, cpu_cores, cpu_threads, 362621b5090SJohn Baldwin guest_ncpus); 363621b5090SJohn Baldwin } else 364621b5090SJohn Baldwin guest_ncpus = ncpus; 365621b5090SJohn Baldwin } 366621b5090SJohn Baldwin 367621b5090SJohn Baldwin static int 3689b6155a2SNeel Natu pincpu_parse(const char *opt) 3699b6155a2SNeel Natu { 370621b5090SJohn Baldwin const char *value; 371621b5090SJohn Baldwin char *newval; 372621b5090SJohn Baldwin char key[16]; 3739b6155a2SNeel Natu int vcpu, pcpu; 3749b6155a2SNeel Natu 3759b6155a2SNeel Natu if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { 3769b6155a2SNeel Natu fprintf(stderr, "invalid format: %s\n", opt); 3779b6155a2SNeel Natu return (-1); 3789b6155a2SNeel Natu } 3799b6155a2SNeel Natu 380fd6f9294SJohn Baldwin if (vcpu < 0) { 381fd6f9294SJohn Baldwin fprintf(stderr, "invalid vcpu '%d'\n", vcpu); 3829b6155a2SNeel Natu return (-1); 3839b6155a2SNeel Natu } 3849b6155a2SNeel Natu 3859b6155a2SNeel Natu if (pcpu < 0 || pcpu >= CPU_SETSIZE) { 3869b6155a2SNeel Natu fprintf(stderr, "hostcpu '%d' outside valid range from " 3879b6155a2SNeel Natu "0 to %d\n", pcpu, CPU_SETSIZE - 1); 3889b6155a2SNeel Natu return (-1); 3899b6155a2SNeel Natu } 3909b6155a2SNeel Natu 391621b5090SJohn Baldwin snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); 392621b5090SJohn Baldwin value = get_config_value(key); 393621b5090SJohn Baldwin 394621b5090SJohn Baldwin if (asprintf(&newval, "%s%s%d", value != NULL ? value : "", 395621b5090SJohn Baldwin value != NULL ? "," : "", pcpu) == -1) { 396621b5090SJohn Baldwin perror("failed to build new cpuset string"); 3979b6155a2SNeel Natu return (-1); 3989b6155a2SNeel Natu } 399621b5090SJohn Baldwin 400621b5090SJohn Baldwin set_config_value(key, newval); 401621b5090SJohn Baldwin free(newval); 402621b5090SJohn Baldwin return (0); 403621b5090SJohn Baldwin } 404621b5090SJohn Baldwin 405621b5090SJohn Baldwin static void 406621b5090SJohn Baldwin parse_cpuset(int vcpu, const char *list, cpuset_t *set) 407621b5090SJohn Baldwin { 408621b5090SJohn Baldwin char *cp, *token; 409621b5090SJohn Baldwin int pcpu, start; 410621b5090SJohn Baldwin 411621b5090SJohn Baldwin CPU_ZERO(set); 412621b5090SJohn Baldwin start = -1; 413621b5090SJohn Baldwin token = __DECONST(char *, list); 414621b5090SJohn Baldwin for (;;) { 415621b5090SJohn Baldwin pcpu = strtoul(token, &cp, 0); 416621b5090SJohn Baldwin if (cp == token) 417621b5090SJohn Baldwin errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); 418621b5090SJohn Baldwin if (pcpu < 0 || pcpu >= CPU_SETSIZE) 419621b5090SJohn Baldwin errx(4, "hostcpu '%d' outside valid range from 0 to %d", 420621b5090SJohn Baldwin pcpu, CPU_SETSIZE - 1); 421621b5090SJohn Baldwin switch (*cp) { 422621b5090SJohn Baldwin case ',': 423621b5090SJohn Baldwin case '\0': 424621b5090SJohn Baldwin if (start >= 0) { 425621b5090SJohn Baldwin if (start > pcpu) 426621b5090SJohn Baldwin errx(4, "Invalid hostcpu range %d-%d", 427621b5090SJohn Baldwin start, pcpu); 428621b5090SJohn Baldwin while (start < pcpu) { 42984874437SJohn Baldwin CPU_SET(start, set); 430621b5090SJohn Baldwin start++; 431621b5090SJohn Baldwin } 432621b5090SJohn Baldwin start = -1; 4339b6155a2SNeel Natu } 43484874437SJohn Baldwin CPU_SET(pcpu, set); 435621b5090SJohn Baldwin break; 436621b5090SJohn Baldwin case '-': 437621b5090SJohn Baldwin if (start >= 0) 438621b5090SJohn Baldwin errx(4, "invalid cpuset for vcpu %d: '%s'", 439621b5090SJohn Baldwin vcpu, list); 440621b5090SJohn Baldwin start = pcpu; 441621b5090SJohn Baldwin break; 442621b5090SJohn Baldwin default: 443621b5090SJohn Baldwin errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list); 444621b5090SJohn Baldwin } 445621b5090SJohn Baldwin if (*cp == '\0') 446621b5090SJohn Baldwin break; 447621b5090SJohn Baldwin token = cp + 1; 448621b5090SJohn Baldwin } 449621b5090SJohn Baldwin } 450621b5090SJohn Baldwin 451621b5090SJohn Baldwin static void 452621b5090SJohn Baldwin build_vcpumaps(void) 453621b5090SJohn Baldwin { 454621b5090SJohn Baldwin char key[16]; 455621b5090SJohn Baldwin const char *value; 456621b5090SJohn Baldwin int vcpu; 457621b5090SJohn Baldwin 4587261f821SJohn Baldwin vcpumap = calloc(guest_ncpus, sizeof(*vcpumap)); 459621b5090SJohn Baldwin for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { 460621b5090SJohn Baldwin snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu); 461621b5090SJohn Baldwin value = get_config_value(key); 462621b5090SJohn Baldwin if (value == NULL) 463621b5090SJohn Baldwin continue; 464621b5090SJohn Baldwin vcpumap[vcpu] = malloc(sizeof(cpuset_t)); 465621b5090SJohn Baldwin if (vcpumap[vcpu] == NULL) 466621b5090SJohn Baldwin err(4, "Failed to allocate cpuset for vcpu %d", vcpu); 467621b5090SJohn Baldwin parse_cpuset(vcpu, value, vcpumap[vcpu]); 468621b5090SJohn Baldwin } 4699b6155a2SNeel Natu } 4709b6155a2SNeel Natu 471d37f2adbSNeel Natu void 4727d9ef309SJohn Baldwin vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, 473d37f2adbSNeel Natu int errcode) 474d37f2adbSNeel Natu { 475d087a399SNeel Natu int error, restart_instruction; 476d37f2adbSNeel Natu 477d087a399SNeel Natu restart_instruction = 1; 478d37f2adbSNeel Natu 4797d9ef309SJohn Baldwin error = vm_inject_exception(vcpu, vector, errcode_valid, errcode, 480d087a399SNeel Natu restart_instruction); 481d087a399SNeel Natu assert(error == 0); 482d37f2adbSNeel Natu } 483d37f2adbSNeel Natu 484e285ef8dSPeter Grehan void * 485b060ba50SNeel Natu paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) 486e285ef8dSPeter Grehan { 487e285ef8dSPeter Grehan 488b060ba50SNeel Natu return (vm_map_gpa(ctx, gaddr, len)); 489e285ef8dSPeter Grehan } 490e285ef8dSPeter Grehan 491483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 492483d953aSJohn Baldwin uintptr_t 493483d953aSJohn Baldwin paddr_host2guest(struct vmctx *ctx, void *addr) 494483d953aSJohn Baldwin { 495483d953aSJohn Baldwin return (vm_rev_map_gpa(ctx, addr)); 496483d953aSJohn Baldwin } 497483d953aSJohn Baldwin #endif 498483d953aSJohn Baldwin 499e285ef8dSPeter Grehan int 500062b878fSPeter Grehan fbsdrun_virtio_msix(void) 501062b878fSPeter Grehan { 502062b878fSPeter Grehan 503621b5090SJohn Baldwin return (get_config_bool_default("virtio_msix", true)); 504062b878fSPeter Grehan } 505062b878fSPeter Grehan 506e285ef8dSPeter Grehan static void * 507e285ef8dSPeter Grehan fbsdrun_start_thread(void *param) 508e285ef8dSPeter Grehan { 509e285ef8dSPeter Grehan char tname[MAXCOMLEN + 1]; 5107d9ef309SJohn Baldwin struct vcpu_info *vi = param; 5117d9ef309SJohn Baldwin int error; 512e285ef8dSPeter Grehan 5137d9ef309SJohn Baldwin snprintf(tname, sizeof(tname), "vcpu %d", vi->vcpuid); 5147d9ef309SJohn Baldwin pthread_set_name_np(pthread_self(), tname); 515e285ef8dSPeter Grehan 5167d9ef309SJohn Baldwin if (vcpumap[vi->vcpuid] != NULL) { 5177d9ef309SJohn Baldwin error = pthread_setaffinity_np(pthread_self(), 5187d9ef309SJohn Baldwin sizeof(cpuset_t), vcpumap[vi->vcpuid]); 5197224a96aSJohn Baldwin assert(error == 0); 5207224a96aSJohn Baldwin } 5217224a96aSJohn Baldwin 522483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 5237d9ef309SJohn Baldwin checkpoint_cpu_add(vi->vcpuid); 524483d953aSJohn Baldwin #endif 5257d9ef309SJohn Baldwin gdb_cpu_add(vi->vcpu); 526cd377eb3SJohn Baldwin 5277d9ef309SJohn Baldwin vm_loop(vi->ctx, vi->vcpu); 528e285ef8dSPeter Grehan 529e285ef8dSPeter Grehan /* not reached */ 530e285ef8dSPeter Grehan exit(1); 531e285ef8dSPeter Grehan return (NULL); 532e285ef8dSPeter Grehan } 533e285ef8dSPeter Grehan 5349cc9abf4SCorvin Köhne static void 5357d9ef309SJohn Baldwin fbsdrun_addcpu(struct vcpu_info *vi) 536e285ef8dSPeter Grehan { 5377d9ef309SJohn Baldwin pthread_t thr; 538e285ef8dSPeter Grehan int error; 539e285ef8dSPeter Grehan 5407d9ef309SJohn Baldwin error = vm_activate_cpu(vi->vcpu); 54168dd37f7SEnji Cooper if (error != 0) 5427d9ef309SJohn Baldwin err(EX_OSERR, "could not activate CPU %d", vi->vcpuid); 54395ebc360SNeel Natu 5447d9ef309SJohn Baldwin CPU_SET_ATOMIC(vi->vcpuid, &cpumask); 545e285ef8dSPeter Grehan 5467d9ef309SJohn Baldwin vm_suspend_cpu(vi->vcpu); 5479cc9abf4SCorvin Köhne 5487d9ef309SJohn Baldwin error = pthread_create(&thr, NULL, fbsdrun_start_thread, vi); 549e285ef8dSPeter Grehan assert(error == 0); 550e285ef8dSPeter Grehan } 551e285ef8dSPeter Grehan 552e285ef8dSPeter Grehan static int 55365b8109bSMark Johnston fbsdrun_deletecpu(int vcpu) 5541c052192SNeel Natu { 5551c052192SNeel Natu 5560826d045SNeel Natu if (!CPU_ISSET(vcpu, &cpumask)) { 55706782425SNeel Natu fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); 558989e062bSMarcelo Araujo exit(4); 5591c052192SNeel Natu } 5601c052192SNeel Natu 5610826d045SNeel Natu CPU_CLR_ATOMIC(vcpu, &cpumask); 5620826d045SNeel Natu return (CPU_EMPTY(&cpumask)); 5631c052192SNeel Natu } 5641c052192SNeel Natu 5651c052192SNeel Natu static int 566e17eca32SMark Johnston vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 567e285ef8dSPeter Grehan { 568e17eca32SMark Johnston struct vm_exit *vme; 569e285ef8dSPeter Grehan int error; 570e1390215SMark Johnston int bytes, port, in; 571e285ef8dSPeter Grehan 572e17eca32SMark Johnston vme = vmrun->vm_exit; 573e285ef8dSPeter Grehan port = vme->u.inout.port; 574e285ef8dSPeter Grehan bytes = vme->u.inout.bytes; 575e285ef8dSPeter Grehan in = vme->u.inout.in; 576e285ef8dSPeter Grehan 577621b5090SJohn Baldwin error = emulate_inout(ctx, vcpu, vme); 578afd5e8baSNeel Natu if (error) { 5793b65fbe4STycho Nightingale fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", 5803b65fbe4STycho Nightingale in ? "in" : "out", 5813b65fbe4STycho Nightingale bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 582a20c00c6SJohn Baldwin port, vme->rip); 583ee2dbd02SNeel Natu return (VMEXIT_ABORT); 584afd5e8baSNeel Natu } else { 585afd5e8baSNeel Natu return (VMEXIT_CONTINUE); 586e285ef8dSPeter Grehan } 587e285ef8dSPeter Grehan } 588e285ef8dSPeter Grehan 589e285ef8dSPeter Grehan static int 590e17eca32SMark Johnston vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, 591e17eca32SMark Johnston struct vm_run *vmrun) 592e285ef8dSPeter Grehan { 593e17eca32SMark Johnston struct vm_exit *vme; 594851d84f1SNeel Natu uint64_t val; 595851d84f1SNeel Natu uint32_t eax, edx; 596851d84f1SNeel Natu int error; 597851d84f1SNeel Natu 598e17eca32SMark Johnston vme = vmrun->vm_exit; 599e17eca32SMark Johnston 600851d84f1SNeel Natu val = 0; 6017d9ef309SJohn Baldwin error = emulate_rdmsr(vcpu, vme->u.msr.code, &val); 602851d84f1SNeel Natu if (error != 0) { 603851d84f1SNeel Natu fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", 6047d9ef309SJohn Baldwin vme->u.msr.code, vcpu_id(vcpu)); 605621b5090SJohn Baldwin if (get_config_bool("x86.strictmsr")) { 6067d9ef309SJohn Baldwin vm_inject_gp(vcpu); 607d087a399SNeel Natu return (VMEXIT_CONTINUE); 608dc506506SNeel Natu } 609e285ef8dSPeter Grehan } 610e285ef8dSPeter Grehan 611851d84f1SNeel Natu eax = val; 6127d9ef309SJohn Baldwin error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax); 613851d84f1SNeel Natu assert(error == 0); 614851d84f1SNeel Natu 615851d84f1SNeel Natu edx = val >> 32; 6167d9ef309SJohn Baldwin error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx); 617851d84f1SNeel Natu assert(error == 0); 618851d84f1SNeel Natu 619851d84f1SNeel Natu return (VMEXIT_CONTINUE); 620851d84f1SNeel Natu } 621851d84f1SNeel Natu 622e285ef8dSPeter Grehan static int 623e17eca32SMark Johnston vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, 624e17eca32SMark Johnston struct vm_run *vmrun) 625e285ef8dSPeter Grehan { 626e17eca32SMark Johnston struct vm_exit *vme; 627851d84f1SNeel Natu int error; 628e285ef8dSPeter Grehan 629e17eca32SMark Johnston vme = vmrun->vm_exit; 630e17eca32SMark Johnston 6317d9ef309SJohn Baldwin error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval); 632851d84f1SNeel Natu if (error != 0) { 633851d84f1SNeel Natu fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", 6347d9ef309SJohn Baldwin vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu)); 635621b5090SJohn Baldwin if (get_config_bool("x86.strictmsr")) { 6367d9ef309SJohn Baldwin vm_inject_gp(vcpu); 637d087a399SNeel Natu return (VMEXIT_CONTINUE); 638dc506506SNeel Natu } 639851d84f1SNeel Natu } 640851d84f1SNeel Natu return (VMEXIT_CONTINUE); 641e285ef8dSPeter Grehan } 642e285ef8dSPeter Grehan 64364fe7235SNeel Natu #define DEBUG_EPT_MISCONFIG 64464fe7235SNeel Natu #ifdef DEBUG_EPT_MISCONFIG 64564fe7235SNeel Natu #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 64664fe7235SNeel Natu 64764fe7235SNeel Natu static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 64864fe7235SNeel Natu static int ept_misconfig_ptenum; 64964fe7235SNeel Natu #endif 65064fe7235SNeel Natu 6518d56c805SYuri Pankov static const char * 6528d56c805SYuri Pankov vmexit_vmx_desc(uint32_t exit_reason) 6538d56c805SYuri Pankov { 6548d56c805SYuri Pankov 6558d56c805SYuri Pankov if (exit_reason >= nitems(vmx_exit_reason_desc) || 6568d56c805SYuri Pankov vmx_exit_reason_desc[exit_reason] == NULL) 6578d56c805SYuri Pankov return ("Unknown"); 6588d56c805SYuri Pankov return (vmx_exit_reason_desc[exit_reason]); 6598d56c805SYuri Pankov } 6608d56c805SYuri Pankov 661e285ef8dSPeter Grehan static int 662e17eca32SMark Johnston vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 663e285ef8dSPeter Grehan { 664e17eca32SMark Johnston struct vm_exit *vme; 665e17eca32SMark Johnston 666e17eca32SMark Johnston vme = vmrun->vm_exit; 667e285ef8dSPeter Grehan 6687d9ef309SJohn Baldwin fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); 669e285ef8dSPeter Grehan fprintf(stderr, "\treason\t\tVMX\n"); 67065b8109bSMark Johnston fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip); 67165b8109bSMark Johnston fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length); 67265b8109bSMark Johnston fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status); 67365b8109bSMark Johnston fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason, 67465b8109bSMark Johnston vmexit_vmx_desc(vme->u.vmx.exit_reason)); 675e285ef8dSPeter Grehan fprintf(stderr, "\tqualification\t0x%016lx\n", 67665b8109bSMark Johnston vme->u.vmx.exit_qualification); 67765b8109bSMark Johnston fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type); 67865b8109bSMark Johnston fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error); 67964fe7235SNeel Natu #ifdef DEBUG_EPT_MISCONFIG 68065b8109bSMark Johnston if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 6817d9ef309SJohn Baldwin vm_get_register(vcpu, 68264fe7235SNeel Natu VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 68364fe7235SNeel Natu &ept_misconfig_gpa); 68464fe7235SNeel Natu vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 68564fe7235SNeel Natu &ept_misconfig_ptenum); 68664fe7235SNeel Natu fprintf(stderr, "\tEPT misconfiguration:\n"); 68764fe7235SNeel Natu fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); 68864fe7235SNeel Natu fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", 68964fe7235SNeel Natu ept_misconfig_ptenum, ept_misconfig_pte[0], 69064fe7235SNeel Natu ept_misconfig_pte[1], ept_misconfig_pte[2], 69164fe7235SNeel Natu ept_misconfig_pte[3]); 69264fe7235SNeel Natu } 69364fe7235SNeel Natu #endif /* DEBUG_EPT_MISCONFIG */ 694e285ef8dSPeter Grehan return (VMEXIT_ABORT); 695e285ef8dSPeter Grehan } 696e285ef8dSPeter Grehan 697e285ef8dSPeter Grehan static int 698e17eca32SMark Johnston vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) 699bbadcde4SNeel Natu { 700e17eca32SMark Johnston struct vm_exit *vme; 701e17eca32SMark Johnston 702e17eca32SMark Johnston vme = vmrun->vm_exit; 703bbadcde4SNeel Natu 7047d9ef309SJohn Baldwin fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu)); 705bbadcde4SNeel Natu fprintf(stderr, "\treason\t\tSVM\n"); 70665b8109bSMark Johnston fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip); 70765b8109bSMark Johnston fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length); 70865b8109bSMark Johnston fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode); 70965b8109bSMark Johnston fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1); 71065b8109bSMark Johnston fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2); 711bbadcde4SNeel Natu return (VMEXIT_ABORT); 712bbadcde4SNeel Natu } 713bbadcde4SNeel Natu 714bbadcde4SNeel Natu static int 7157d9ef309SJohn Baldwin vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 716e17eca32SMark Johnston struct vm_run *vmrun) 717e285ef8dSPeter Grehan { 718e17eca32SMark Johnston assert(vmrun->vm_exit->inst_length == 0); 719d087a399SNeel Natu 720d087a399SNeel Natu return (VMEXIT_CONTINUE); 721e285ef8dSPeter Grehan } 722e285ef8dSPeter Grehan 723e285ef8dSPeter Grehan static int 7247d9ef309SJohn Baldwin vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 725e17eca32SMark Johnston struct vm_run *vmrun) 726248e6799SNeel Natu { 727e17eca32SMark Johnston assert(vmrun->vm_exit->inst_length == 0); 728248e6799SNeel Natu 729248e6799SNeel Natu return (VMEXIT_CONTINUE); 730248e6799SNeel Natu } 731248e6799SNeel Natu 732248e6799SNeel Natu static int 7337d9ef309SJohn Baldwin vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 734e17eca32SMark Johnston struct vm_run *vmrun __unused) 735e285ef8dSPeter Grehan { 736e285ef8dSPeter Grehan /* 737e285ef8dSPeter Grehan * Just continue execution with the next instruction. We use 738e285ef8dSPeter Grehan * the HLT VM exit as a way to be friendly with the host 739e285ef8dSPeter Grehan * scheduler. 740e285ef8dSPeter Grehan */ 741e285ef8dSPeter Grehan return (VMEXIT_CONTINUE); 742e285ef8dSPeter Grehan } 743e285ef8dSPeter Grehan 744e285ef8dSPeter Grehan static int 7457d9ef309SJohn Baldwin vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 746e17eca32SMark Johnston struct vm_run *vmrun __unused) 747e285ef8dSPeter Grehan { 748e285ef8dSPeter Grehan return (VMEXIT_CONTINUE); 749e285ef8dSPeter Grehan } 750e285ef8dSPeter Grehan 751e285ef8dSPeter Grehan static int 7527d9ef309SJohn Baldwin vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu, 753e17eca32SMark Johnston struct vm_run *vmrun) 754e285ef8dSPeter Grehan { 755e17eca32SMark Johnston assert(vmrun->vm_exit->inst_length == 0); 756d087a399SNeel Natu 757483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 7587d9ef309SJohn Baldwin checkpoint_cpu_suspend(vcpu_id(vcpu)); 759483d953aSJohn Baldwin #endif 7607d9ef309SJohn Baldwin gdb_cpu_mtrap(vcpu); 761483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 7627d9ef309SJohn Baldwin checkpoint_cpu_resume(vcpu_id(vcpu)); 763483d953aSJohn Baldwin #endif 764483d953aSJohn Baldwin 765d087a399SNeel Natu return (VMEXIT_CONTINUE); 766e285ef8dSPeter Grehan } 767e285ef8dSPeter Grehan 768e285ef8dSPeter Grehan static int 7697d9ef309SJohn Baldwin vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu, 770e17eca32SMark Johnston struct vm_run *vmrun) 771e285ef8dSPeter Grehan { 772e17eca32SMark Johnston struct vm_exit *vme; 773703e4974STycho Nightingale struct vie *vie; 774e17eca32SMark Johnston int err, i, cs_d; 7754daa95f8SConrad Meyer enum vm_cpu_mode mode; 776703e4974STycho Nightingale 777e17eca32SMark Johnston vme = vmrun->vm_exit; 778e17eca32SMark Johnston 77965b8109bSMark Johnston vie = &vme->u.inst_emul.vie; 7804daa95f8SConrad Meyer if (!vie->decoded) { 7814daa95f8SConrad Meyer /* 7824daa95f8SConrad Meyer * Attempt to decode in userspace as a fallback. This allows 7834daa95f8SConrad Meyer * updating instruction decode in bhyve without rebooting the 7844daa95f8SConrad Meyer * kernel (rapid prototyping), albeit with much slower 7854daa95f8SConrad Meyer * emulation. 7864daa95f8SConrad Meyer */ 7874daa95f8SConrad Meyer vie_restart(vie); 78865b8109bSMark Johnston mode = vme->u.inst_emul.paging.cpu_mode; 78965b8109bSMark Johnston cs_d = vme->u.inst_emul.cs_d; 790887d46efSPeter Grehan if (vmm_decode_instruction(mode, cs_d, vie) != 0) 791887d46efSPeter Grehan goto fail; 7927d9ef309SJohn Baldwin if (vm_set_register(vcpu, VM_REG_GUEST_RIP, 79365b8109bSMark Johnston vme->rip + vie->num_processed) != 0) 794887d46efSPeter Grehan goto fail; 7954daa95f8SConrad Meyer } 7964daa95f8SConrad Meyer 7977d9ef309SJohn Baldwin err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie, 7987d9ef309SJohn Baldwin &vme->u.inst_emul.paging); 799e285ef8dSPeter Grehan if (err) { 800703e4974STycho Nightingale if (err == ESRCH) { 80152c39ee6SConrad Meyer EPRINTLN("Unhandled memory access to 0x%lx\n", 80265b8109bSMark Johnston vme->u.inst_emul.gpa); 803e285ef8dSPeter Grehan } 804887d46efSPeter Grehan goto fail; 805887d46efSPeter Grehan } 806e285ef8dSPeter Grehan 807887d46efSPeter Grehan return (VMEXIT_CONTINUE); 808887d46efSPeter Grehan 809887d46efSPeter Grehan fail: 81052c39ee6SConrad Meyer fprintf(stderr, "Failed to emulate instruction sequence [ "); 81152c39ee6SConrad Meyer for (i = 0; i < vie->num_valid; i++) 81252c39ee6SConrad Meyer fprintf(stderr, "%02x", vie->inst[i]); 81365b8109bSMark Johnston FPRINTLN(stderr, " ] at 0x%lx", vme->rip); 814e285ef8dSPeter Grehan return (VMEXIT_ABORT); 815e285ef8dSPeter Grehan } 816e285ef8dSPeter Grehan 817b15a09c0SNeel Natu static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; 818b15a09c0SNeel Natu static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; 819b15a09c0SNeel Natu 820b15a09c0SNeel Natu static int 821e17eca32SMark Johnston vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 822b15a09c0SNeel Natu { 823e17eca32SMark Johnston struct vm_exit *vme; 824f0fdcfe2SNeel Natu enum vm_suspend_how how; 8257d9ef309SJohn Baldwin int vcpuid = vcpu_id(vcpu); 826b15a09c0SNeel Natu 827e17eca32SMark Johnston vme = vmrun->vm_exit; 828e17eca32SMark Johnston 82965b8109bSMark Johnston how = vme->u.suspended.how; 830b15a09c0SNeel Natu 8317d9ef309SJohn Baldwin fbsdrun_deletecpu(vcpuid); 832b15a09c0SNeel Natu 8337d9ef309SJohn Baldwin if (vcpuid != BSP) { 834b15a09c0SNeel Natu pthread_mutex_lock(&resetcpu_mtx); 835b15a09c0SNeel Natu pthread_cond_signal(&resetcpu_cond); 836b15a09c0SNeel Natu pthread_mutex_unlock(&resetcpu_mtx); 837b15a09c0SNeel Natu pthread_exit(NULL); 838b15a09c0SNeel Natu } 839b15a09c0SNeel Natu 840b15a09c0SNeel Natu pthread_mutex_lock(&resetcpu_mtx); 841b15a09c0SNeel Natu while (!CPU_EMPTY(&cpumask)) { 842b15a09c0SNeel Natu pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); 843b15a09c0SNeel Natu } 844b15a09c0SNeel Natu pthread_mutex_unlock(&resetcpu_mtx); 845f0fdcfe2SNeel Natu 846e50ce2aaSNeel Natu switch (how) { 847e50ce2aaSNeel Natu case VM_SUSPEND_RESET: 848b15a09c0SNeel Natu exit(0); 849e50ce2aaSNeel Natu case VM_SUSPEND_POWEROFF: 850621b5090SJohn Baldwin if (get_config_bool_default("destroy_on_poweroff", false)) 8510a1016f9SPawel Biernacki vm_destroy(ctx); 852f0fdcfe2SNeel Natu exit(1); 853e50ce2aaSNeel Natu case VM_SUSPEND_HALT: 854e50ce2aaSNeel Natu exit(2); 855091d4532SNeel Natu case VM_SUSPEND_TRIPLEFAULT: 856091d4532SNeel Natu exit(3); 857e50ce2aaSNeel Natu default: 858e50ce2aaSNeel Natu fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); 859e50ce2aaSNeel Natu exit(100); 860e50ce2aaSNeel Natu } 861f0fdcfe2SNeel Natu return (0); /* NOTREACHED */ 862b15a09c0SNeel Natu } 863b15a09c0SNeel Natu 864cd377eb3SJohn Baldwin static int 8657d9ef309SJohn Baldwin vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu, 866e17eca32SMark Johnston struct vm_run *vmrun __unused) 867cd377eb3SJohn Baldwin { 868cd377eb3SJohn Baldwin 869483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 8707d9ef309SJohn Baldwin checkpoint_cpu_suspend(vcpu_id(vcpu)); 871483d953aSJohn Baldwin #endif 8727d9ef309SJohn Baldwin gdb_cpu_suspend(vcpu); 873483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 8747d9ef309SJohn Baldwin checkpoint_cpu_resume(vcpu_id(vcpu)); 875483d953aSJohn Baldwin #endif 876ef0ac973SMark Johnston /* 877ef0ac973SMark Johnston * XXX-MJ sleep for a short period to avoid chewing up the CPU in the 878ef0ac973SMark Johnston * window between activation of the vCPU thread and the STARTUP IPI. 879ef0ac973SMark Johnston */ 880ef0ac973SMark Johnston usleep(1000); 881cd377eb3SJohn Baldwin return (VMEXIT_CONTINUE); 882cd377eb3SJohn Baldwin } 883cd377eb3SJohn Baldwin 884cbd03a9dSJohn Baldwin static int 8857d9ef309SJohn Baldwin vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu, 886e17eca32SMark Johnston struct vm_run *vmrun) 887cbd03a9dSJohn Baldwin { 888e17eca32SMark Johnston gdb_cpu_breakpoint(vcpu, vmrun->vm_exit); 889cbd03a9dSJohn Baldwin return (VMEXIT_CONTINUE); 890cbd03a9dSJohn Baldwin } 891cbd03a9dSJohn Baldwin 8920bda8d3eSCorvin Köhne static int 8937d9ef309SJohn Baldwin vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 894e17eca32SMark Johnston struct vm_run *vmrun) 8950bda8d3eSCorvin Köhne { 896e17eca32SMark Johnston struct vm_exit *vme; 897e17eca32SMark Johnston cpuset_t *dmask; 8980bda8d3eSCorvin Köhne int error = -1; 8990bda8d3eSCorvin Köhne int i; 900e17eca32SMark Johnston 901e17eca32SMark Johnston dmask = vmrun->cpuset; 902e17eca32SMark Johnston vme = vmrun->vm_exit; 903e17eca32SMark Johnston 9044a1c23a7SMark Johnston switch (vme->u.ipi.mode) { 9050bda8d3eSCorvin Köhne case APIC_DELMODE_INIT: 906e17eca32SMark Johnston CPU_FOREACH_ISSET(i, dmask) { 9077d9ef309SJohn Baldwin error = vm_suspend_cpu(vcpu_info[i].vcpu); 9080bda8d3eSCorvin Köhne if (error) { 9090bda8d3eSCorvin Köhne warnx("%s: failed to suspend cpu %d\n", 9100bda8d3eSCorvin Köhne __func__, i); 9110bda8d3eSCorvin Köhne break; 9120bda8d3eSCorvin Köhne } 9130bda8d3eSCorvin Köhne } 9140bda8d3eSCorvin Köhne break; 9150bda8d3eSCorvin Köhne case APIC_DELMODE_STARTUP: 916e17eca32SMark Johnston CPU_FOREACH_ISSET(i, dmask) { 9177d9ef309SJohn Baldwin spinup_ap(vcpu_info[i].vcpu, 9187d9ef309SJohn Baldwin vme->u.ipi.vector << PAGE_SHIFT); 9190bda8d3eSCorvin Köhne } 9200bda8d3eSCorvin Köhne error = 0; 9210bda8d3eSCorvin Köhne break; 9220bda8d3eSCorvin Köhne default: 9230bda8d3eSCorvin Köhne break; 9240bda8d3eSCorvin Köhne } 9250bda8d3eSCorvin Köhne 9260bda8d3eSCorvin Köhne return (error); 9270bda8d3eSCorvin Köhne } 9280bda8d3eSCorvin Köhne 929e285ef8dSPeter Grehan static vmexit_handler_t handler[VM_EXITCODE_MAX] = { 930e285ef8dSPeter Grehan [VM_EXITCODE_INOUT] = vmexit_inout, 931d17b5104SNeel Natu [VM_EXITCODE_INOUT_STR] = vmexit_inout, 932e285ef8dSPeter Grehan [VM_EXITCODE_VMX] = vmexit_vmx, 933bbadcde4SNeel Natu [VM_EXITCODE_SVM] = vmexit_svm, 934e285ef8dSPeter Grehan [VM_EXITCODE_BOGUS] = vmexit_bogus, 935248e6799SNeel Natu [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 936e285ef8dSPeter Grehan [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 937e285ef8dSPeter Grehan [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 938e285ef8dSPeter Grehan [VM_EXITCODE_MTRAP] = vmexit_mtrap, 939318224bbSNeel Natu [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 9403d5444c8SNeel Natu [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 9413d5444c8SNeel Natu [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 942cd377eb3SJohn Baldwin [VM_EXITCODE_DEBUG] = vmexit_debug, 943cbd03a9dSJohn Baldwin [VM_EXITCODE_BPT] = vmexit_breakpoint, 9440bda8d3eSCorvin Köhne [VM_EXITCODE_IPI] = vmexit_ipi, 945e285ef8dSPeter Grehan }; 946e285ef8dSPeter Grehan 947e285ef8dSPeter Grehan static void 9487d9ef309SJohn Baldwin vm_loop(struct vmctx *ctx, struct vcpu *vcpu) 949e285ef8dSPeter Grehan { 950a20c00c6SJohn Baldwin struct vm_exit vme; 951e17eca32SMark Johnston struct vm_run vmrun; 952cc398e21SBjoern A. Zeeb int error, rc; 9538b271170SPeter Grehan enum vm_exitcode exitcode; 954e17eca32SMark Johnston cpuset_t active_cpus, dmask; 955e285ef8dSPeter Grehan 95695ebc360SNeel Natu error = vm_active_cpus(ctx, &active_cpus); 9577d9ef309SJohn Baldwin assert(CPU_ISSET(vcpu_id(vcpu), &active_cpus)); 95895ebc360SNeel Natu 959e17eca32SMark Johnston vmrun.vm_exit = &vme; 960e17eca32SMark Johnston vmrun.cpuset = &dmask; 961e17eca32SMark Johnston vmrun.cpusetsize = sizeof(dmask); 962e17eca32SMark Johnston 963e285ef8dSPeter Grehan while (1) { 964e17eca32SMark Johnston error = vm_run(vcpu, &vmrun); 965f80330a8SNeel Natu if (error != 0) 966e285ef8dSPeter Grehan break; 967e285ef8dSPeter Grehan 968a20c00c6SJohn Baldwin exitcode = vme.exitcode; 9698b271170SPeter Grehan if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { 9708b271170SPeter Grehan fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", 9718b271170SPeter Grehan exitcode); 972989e062bSMarcelo Araujo exit(4); 9738b271170SPeter Grehan } 9748b271170SPeter Grehan 975e17eca32SMark Johnston rc = (*handler[exitcode])(ctx, vcpu, &vmrun); 9768b271170SPeter Grehan 977e285ef8dSPeter Grehan switch (rc) { 978e285ef8dSPeter Grehan case VMEXIT_CONTINUE: 979e285ef8dSPeter Grehan break; 980ee2dbd02SNeel Natu case VMEXIT_ABORT: 981ee2dbd02SNeel Natu abort(); 982e285ef8dSPeter Grehan default: 983989e062bSMarcelo Araujo exit(4); 984e285ef8dSPeter Grehan } 985e285ef8dSPeter Grehan } 986e285ef8dSPeter Grehan fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); 987e285ef8dSPeter Grehan } 988e285ef8dSPeter Grehan 9895f0677d3SNeel Natu static int 9907d9ef309SJohn Baldwin num_vcpus_allowed(struct vmctx *ctx, struct vcpu *vcpu) 9915f0677d3SNeel Natu { 992c76e4b89SJohn Baldwin uint16_t sockets, cores, threads, maxcpus; 9935f0677d3SNeel Natu int tmp, error; 9945f0677d3SNeel Natu 9955f0677d3SNeel Natu /* 9965f0677d3SNeel Natu * The guest is allowed to spinup more than one processor only if the 9975f0677d3SNeel Natu * UNRESTRICTED_GUEST capability is available. 9985f0677d3SNeel Natu */ 9997d9ef309SJohn Baldwin error = vm_get_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, &tmp); 1000c76e4b89SJohn Baldwin if (error != 0) 1001c76e4b89SJohn Baldwin return (1); 1002c76e4b89SJohn Baldwin 1003c76e4b89SJohn Baldwin error = vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus); 10045f0677d3SNeel Natu if (error == 0) 1005c76e4b89SJohn Baldwin return (maxcpus); 10065f0677d3SNeel Natu else 10075f0677d3SNeel Natu return (1); 10085f0677d3SNeel Natu } 1009e285ef8dSPeter Grehan 1010461663ddSJohn Baldwin static void 10117d9ef309SJohn Baldwin fbsdrun_set_capabilities(struct vcpu *vcpu, bool bsp) 101249cc03daSNeel Natu { 101349cc03daSNeel Natu int err, tmp; 101449cc03daSNeel Natu 1015621b5090SJohn Baldwin if (get_config_bool_default("x86.vmexit_on_hlt", false)) { 10167d9ef309SJohn Baldwin err = vm_get_capability(vcpu, VM_CAP_HALT_EXIT, &tmp); 101749cc03daSNeel Natu if (err < 0) { 101849cc03daSNeel Natu fprintf(stderr, "VM exit on HLT not supported\n"); 1019989e062bSMarcelo Araujo exit(4); 102049cc03daSNeel Natu } 10217d9ef309SJohn Baldwin vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1); 10227d9ef309SJohn Baldwin if (bsp) 102349cc03daSNeel Natu handler[VM_EXITCODE_HLT] = vmexit_hlt; 102449cc03daSNeel Natu } 102549cc03daSNeel Natu 1026621b5090SJohn Baldwin if (get_config_bool_default("x86.vmexit_on_pause", false)) { 102749cc03daSNeel Natu /* 102849cc03daSNeel Natu * pause exit support required for this mode 102949cc03daSNeel Natu */ 10307d9ef309SJohn Baldwin err = vm_get_capability(vcpu, VM_CAP_PAUSE_EXIT, &tmp); 103149cc03daSNeel Natu if (err < 0) { 103249cc03daSNeel Natu fprintf(stderr, 103349cc03daSNeel Natu "SMP mux requested, no pause support\n"); 1034989e062bSMarcelo Araujo exit(4); 103549cc03daSNeel Natu } 10367d9ef309SJohn Baldwin vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1); 10377d9ef309SJohn Baldwin if (bsp) 103849cc03daSNeel Natu handler[VM_EXITCODE_PAUSE] = vmexit_pause; 103949cc03daSNeel Natu } 104049cc03daSNeel Natu 1041621b5090SJohn Baldwin if (get_config_bool_default("x86.x2apic", false)) 10427d9ef309SJohn Baldwin err = vm_set_x2apic_state(vcpu, X2APIC_ENABLED); 104352e5c8a2SNeel Natu else 10447d9ef309SJohn Baldwin err = vm_set_x2apic_state(vcpu, X2APIC_DISABLED); 104549cc03daSNeel Natu 104649cc03daSNeel Natu if (err) { 104749cc03daSNeel Natu fprintf(stderr, "Unable to set x2apic state (%d)\n", err); 1048989e062bSMarcelo Araujo exit(4); 104949cc03daSNeel Natu } 105049cc03daSNeel Natu 10517d9ef309SJohn Baldwin vm_set_capability(vcpu, VM_CAP_ENABLE_INVPCID, 1); 1052461663ddSJohn Baldwin 10537d9ef309SJohn Baldwin err = vm_set_capability(vcpu, VM_CAP_IPI_EXIT, 1); 1054461663ddSJohn Baldwin assert(err == 0); 105549cc03daSNeel Natu } 105649cc03daSNeel Natu 10579b1aa8d6SNeel Natu static struct vmctx * 10589b1aa8d6SNeel Natu do_open(const char *vmname) 10599b1aa8d6SNeel Natu { 10609b1aa8d6SNeel Natu struct vmctx *ctx; 10619b1aa8d6SNeel Natu int error; 10629b1aa8d6SNeel Natu bool reinit, romboot; 10639b1aa8d6SNeel Natu 10649b1aa8d6SNeel Natu reinit = romboot = false; 10659b1aa8d6SNeel Natu 10669b1aa8d6SNeel Natu if (lpc_bootrom()) 10679b1aa8d6SNeel Natu romboot = true; 10689b1aa8d6SNeel Natu 10699b1aa8d6SNeel Natu error = vm_create(vmname); 10709b1aa8d6SNeel Natu if (error) { 10719b1aa8d6SNeel Natu if (errno == EEXIST) { 10729b1aa8d6SNeel Natu if (romboot) { 10739b1aa8d6SNeel Natu reinit = true; 10749b1aa8d6SNeel Natu } else { 10759b1aa8d6SNeel Natu /* 10769b1aa8d6SNeel Natu * The virtual machine has been setup by the 10779b1aa8d6SNeel Natu * userspace bootloader. 10789b1aa8d6SNeel Natu */ 10799b1aa8d6SNeel Natu } 10809b1aa8d6SNeel Natu } else { 10819b1aa8d6SNeel Natu perror("vm_create"); 1082989e062bSMarcelo Araujo exit(4); 10839b1aa8d6SNeel Natu } 10849b1aa8d6SNeel Natu } else { 10859b1aa8d6SNeel Natu if (!romboot) { 10869b1aa8d6SNeel Natu /* 10879b1aa8d6SNeel Natu * If the virtual machine was just created then a 10889b1aa8d6SNeel Natu * bootrom must be configured to boot it. 10899b1aa8d6SNeel Natu */ 10909b1aa8d6SNeel Natu fprintf(stderr, "virtual machine cannot be booted\n"); 1091989e062bSMarcelo Araujo exit(4); 10929b1aa8d6SNeel Natu } 10939b1aa8d6SNeel Natu } 10949b1aa8d6SNeel Natu 10959b1aa8d6SNeel Natu ctx = vm_open(vmname); 10969b1aa8d6SNeel Natu if (ctx == NULL) { 10979b1aa8d6SNeel Natu perror("vm_open"); 1098989e062bSMarcelo Araujo exit(4); 10999b1aa8d6SNeel Natu } 11009b1aa8d6SNeel Natu 110100ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 1102fb7ce0a9SMark Johnston if (vm_limit_rights(ctx) != 0) 1103fb7ce0a9SMark Johnston err(EX_OSERR, "vm_limit_rights"); 110400ef17beSBartek Rutkowski #endif 110500ef17beSBartek Rutkowski 11069b1aa8d6SNeel Natu if (reinit) { 11079b1aa8d6SNeel Natu error = vm_reinit(ctx); 11089b1aa8d6SNeel Natu if (error) { 11099b1aa8d6SNeel Natu perror("vm_reinit"); 1110989e062bSMarcelo Araujo exit(4); 11119b1aa8d6SNeel Natu } 11129b1aa8d6SNeel Natu } 1113bb177010SJohn Baldwin error = vm_set_topology(ctx, cpu_sockets, cpu_cores, cpu_threads, 0); 111401d822d3SRodney W. Grimes if (error) 111501d822d3SRodney W. Grimes errx(EX_OSERR, "vm_set_topology"); 11169b1aa8d6SNeel Natu return (ctx); 11179b1aa8d6SNeel Natu } 11189b1aa8d6SNeel Natu 11199cc9abf4SCorvin Köhne static void 11207d9ef309SJohn Baldwin spinup_vcpu(struct vcpu_info *vi, bool bsp) 1121483d953aSJohn Baldwin { 1122483d953aSJohn Baldwin int error; 1123483d953aSJohn Baldwin 11247d9ef309SJohn Baldwin if (!bsp) { 11257d9ef309SJohn Baldwin fbsdrun_set_capabilities(vi->vcpu, false); 1126461663ddSJohn Baldwin 1127461663ddSJohn Baldwin /* 1128461663ddSJohn Baldwin * Enable the 'unrestricted guest' mode for APs. 1129461663ddSJohn Baldwin * 1130461663ddSJohn Baldwin * APs startup in power-on 16-bit mode. 1131461663ddSJohn Baldwin */ 11327d9ef309SJohn Baldwin error = vm_set_capability(vi->vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); 1133483d953aSJohn Baldwin assert(error == 0); 1134461663ddSJohn Baldwin } 1135483d953aSJohn Baldwin 11367d9ef309SJohn Baldwin fbsdrun_addcpu(vi); 1137483d953aSJohn Baldwin } 1138483d953aSJohn Baldwin 1139621b5090SJohn Baldwin static bool 1140621b5090SJohn Baldwin parse_config_option(const char *option) 1141621b5090SJohn Baldwin { 1142621b5090SJohn Baldwin const char *value; 1143621b5090SJohn Baldwin char *path; 1144621b5090SJohn Baldwin 1145621b5090SJohn Baldwin value = strchr(option, '='); 1146621b5090SJohn Baldwin if (value == NULL || value[1] == '\0') 1147621b5090SJohn Baldwin return (false); 1148621b5090SJohn Baldwin path = strndup(option, value - option); 1149621b5090SJohn Baldwin if (path == NULL) 1150621b5090SJohn Baldwin err(4, "Failed to allocate memory"); 1151621b5090SJohn Baldwin set_config_value(path, value + 1); 1152621b5090SJohn Baldwin return (true); 1153621b5090SJohn Baldwin } 1154621b5090SJohn Baldwin 1155621b5090SJohn Baldwin static void 1156621b5090SJohn Baldwin parse_simple_config_file(const char *path) 1157621b5090SJohn Baldwin { 1158621b5090SJohn Baldwin FILE *fp; 1159621b5090SJohn Baldwin char *line, *cp; 1160621b5090SJohn Baldwin size_t linecap; 1161621b5090SJohn Baldwin unsigned int lineno; 1162621b5090SJohn Baldwin 1163621b5090SJohn Baldwin fp = fopen(path, "r"); 1164621b5090SJohn Baldwin if (fp == NULL) 1165621b5090SJohn Baldwin err(4, "Failed to open configuration file %s", path); 1166621b5090SJohn Baldwin line = NULL; 1167621b5090SJohn Baldwin linecap = 0; 1168621b5090SJohn Baldwin lineno = 1; 1169621b5090SJohn Baldwin for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) { 1170621b5090SJohn Baldwin if (*line == '#' || *line == '\n') 1171621b5090SJohn Baldwin continue; 1172621b5090SJohn Baldwin cp = strchr(line, '\n'); 1173621b5090SJohn Baldwin if (cp != NULL) 1174621b5090SJohn Baldwin *cp = '\0'; 1175621b5090SJohn Baldwin if (!parse_config_option(line)) 1176621b5090SJohn Baldwin errx(4, "%s line %u: invalid config option '%s'", path, 1177621b5090SJohn Baldwin lineno, line); 1178621b5090SJohn Baldwin } 1179621b5090SJohn Baldwin free(line); 1180621b5090SJohn Baldwin fclose(fp); 1181621b5090SJohn Baldwin } 1182621b5090SJohn Baldwin 1183621b5090SJohn Baldwin static void 118465b8109bSMark Johnston parse_gdb_options(const char *opt) 11852cdff991SMariusz Zaborski { 11862cdff991SMariusz Zaborski const char *sport; 11872cdff991SMariusz Zaborski char *colon; 11882cdff991SMariusz Zaborski 118965b8109bSMark Johnston if (opt[0] == 'w') { 11902cdff991SMariusz Zaborski set_config_bool("gdb.wait", true); 119165b8109bSMark Johnston opt++; 11922cdff991SMariusz Zaborski } 11932cdff991SMariusz Zaborski 119465b8109bSMark Johnston colon = strrchr(opt, ':'); 11952cdff991SMariusz Zaborski if (colon == NULL) { 119665b8109bSMark Johnston sport = opt; 11972cdff991SMariusz Zaborski } else { 11982cdff991SMariusz Zaborski *colon = '\0'; 11992cdff991SMariusz Zaborski colon++; 12002cdff991SMariusz Zaborski sport = colon; 120165b8109bSMark Johnston set_config_value("gdb.address", opt); 12022cdff991SMariusz Zaborski } 12032cdff991SMariusz Zaborski 12042cdff991SMariusz Zaborski set_config_value("gdb.port", sport); 12052cdff991SMariusz Zaborski } 12062cdff991SMariusz Zaborski 12072cdff991SMariusz Zaborski static void 1208621b5090SJohn Baldwin set_defaults(void) 1209621b5090SJohn Baldwin { 1210621b5090SJohn Baldwin 1211621b5090SJohn Baldwin set_config_bool("acpi_tables", false); 1212621b5090SJohn Baldwin set_config_value("memory.size", "256M"); 1213621b5090SJohn Baldwin set_config_bool("x86.strictmsr", true); 1214d85147f3SCorvin Köhne set_config_value("lpc.fwcfg", "bhyve"); 1215621b5090SJohn Baldwin } 1216621b5090SJohn Baldwin 1217e285ef8dSPeter Grehan int 1218e285ef8dSPeter Grehan main(int argc, char *argv[]) 1219e285ef8dSPeter Grehan { 1220956171d5SVitaliy Gusev int c, error; 1221621b5090SJohn Baldwin int max_vcpus, memflags; 12227d9ef309SJohn Baldwin struct vcpu *bsp; 1223e285ef8dSPeter Grehan struct vmctx *ctx; 122416f23f75SCorvin Köhne struct qemu_fwcfg_item *e820_fwcfg_item; 1225e285ef8dSPeter Grehan uint64_t rip; 1226b060ba50SNeel Natu size_t memsize; 122765b8109bSMark Johnston const char *optstr, *value, *vmname; 1228483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1229483d953aSJohn Baldwin char *restore_file; 1230483d953aSJohn Baldwin struct restore_state rstate; 1231483d953aSJohn Baldwin 1232483d953aSJohn Baldwin restore_file = NULL; 1233483d953aSJohn Baldwin #endif 1234e285ef8dSPeter Grehan 1235621b5090SJohn Baldwin init_config(); 1236621b5090SJohn Baldwin set_defaults(); 1237e285ef8dSPeter Grehan progname = basename(argv[0]); 1238e285ef8dSPeter Grehan 1239483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1240ca14781cSCorvin Köhne optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:"; 1241483d953aSJohn Baldwin #else 1242ca14781cSCorvin Köhne optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:"; 1243483d953aSJohn Baldwin #endif 12449b1aa8d6SNeel Natu while ((c = getopt(argc, argv, optstr)) != -1) { 1245e285ef8dSPeter Grehan switch (c) { 1246e285ef8dSPeter Grehan case 'a': 1247621b5090SJohn Baldwin set_config_bool("x86.x2apic", false); 1248e285ef8dSPeter Grehan break; 1249e285ef8dSPeter Grehan case 'A': 1250621b5090SJohn Baldwin set_config_bool("acpi_tables", true); 1251e285ef8dSPeter Grehan break; 12520a1016f9SPawel Biernacki case 'D': 1253621b5090SJohn Baldwin set_config_bool("destroy_on_poweroff", true); 12540a1016f9SPawel Biernacki break; 1255e285ef8dSPeter Grehan case 'p': 12569b6155a2SNeel Natu if (pincpu_parse(optarg) != 0) { 12579b6155a2SNeel Natu errx(EX_USAGE, "invalid vcpu pinning " 12589b6155a2SNeel Natu "configuration '%s'", optarg); 12599b6155a2SNeel Natu } 1260e285ef8dSPeter Grehan break; 1261e285ef8dSPeter Grehan case 'c': 126201d822d3SRodney W. Grimes if (topology_parse(optarg) != 0) { 126301d822d3SRodney W. Grimes errx(EX_USAGE, "invalid cpu topology " 126401d822d3SRodney W. Grimes "'%s'", optarg); 126501d822d3SRodney W. Grimes } 1266e285ef8dSPeter Grehan break; 12670dd10c00SNeel Natu case 'C': 1268621b5090SJohn Baldwin set_config_bool("memory.guest_in_core", true); 12690dd10c00SNeel Natu break; 1270ca14781cSCorvin Köhne case 'f': 1271ca14781cSCorvin Köhne if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) { 1272ca14781cSCorvin Köhne errx(EX_USAGE, "invalid fwcfg item '%s'", optarg); 1273ca14781cSCorvin Köhne } 1274ca14781cSCorvin Köhne break; 1275cd377eb3SJohn Baldwin case 'G': 12762cdff991SMariusz Zaborski parse_gdb_options(optarg); 1277621b5090SJohn Baldwin break; 1278621b5090SJohn Baldwin case 'k': 1279621b5090SJohn Baldwin parse_simple_config_file(optarg); 1280e285ef8dSPeter Grehan break; 128119eaa01bSMichael Reifenberger case 'K': 128219eaa01bSMichael Reifenberger set_config_value("keyboard.layout", optarg); 128319eaa01bSMichael Reifenberger break; 1284ea7f1c8cSNeel Natu case 'l': 1285657d2158SMarcelo Araujo if (strncmp(optarg, "help", strlen(optarg)) == 0) { 1286657d2158SMarcelo Araujo lpc_print_supported_devices(); 1287657d2158SMarcelo Araujo exit(0); 1288657d2158SMarcelo Araujo } else if (lpc_device_parse(optarg) != 0) { 1289ea7f1c8cSNeel Natu errx(EX_USAGE, "invalid lpc device " 1290ea7f1c8cSNeel Natu "configuration '%s'", optarg); 1291ea7f1c8cSNeel Natu } 1292ea7f1c8cSNeel Natu break; 1293483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1294483d953aSJohn Baldwin case 'r': 1295483d953aSJohn Baldwin restore_file = optarg; 1296483d953aSJohn Baldwin break; 1297483d953aSJohn Baldwin #endif 1298e285ef8dSPeter Grehan case 's': 1299657d2158SMarcelo Araujo if (strncmp(optarg, "help", strlen(optarg)) == 0) { 1300657d2158SMarcelo Araujo pci_print_supported_devices(); 1301657d2158SMarcelo Araujo exit(0); 1302657d2158SMarcelo Araujo } else if (pci_parse_slot(optarg) != 0) 1303989e062bSMarcelo Araujo exit(4); 1304b05c77ffSNeel Natu else 1305e285ef8dSPeter Grehan break; 13069b1aa8d6SNeel Natu case 'S': 1307621b5090SJohn Baldwin set_config_bool("memory.wired", true); 13089b1aa8d6SNeel Natu break; 1309e285ef8dSPeter Grehan case 'm': 1310621b5090SJohn Baldwin set_config_value("memory.size", optarg); 1311621b5090SJohn Baldwin break; 1312621b5090SJohn Baldwin case 'o': 1313621b5090SJohn Baldwin if (!parse_config_option(optarg)) 1314621b5090SJohn Baldwin errx(EX_USAGE, "invalid configuration option '%s'", optarg); 1315e285ef8dSPeter Grehan break; 1316e285ef8dSPeter Grehan case 'H': 1317621b5090SJohn Baldwin set_config_bool("x86.vmexit_on_hlt", true); 1318e285ef8dSPeter Grehan break; 1319e285ef8dSPeter Grehan case 'I': 1320a1a4cbeaSNeel Natu /* 1321a1a4cbeaSNeel Natu * The "-I" option was used to add an ioapic to the 1322a1a4cbeaSNeel Natu * virtual machine. 1323a1a4cbeaSNeel Natu * 1324a1a4cbeaSNeel Natu * An ioapic is now provided unconditionally for each 1325a1a4cbeaSNeel Natu * virtual machine and this option is now deprecated. 1326a1a4cbeaSNeel Natu */ 1327e285ef8dSPeter Grehan break; 1328e285ef8dSPeter Grehan case 'P': 1329621b5090SJohn Baldwin set_config_bool("x86.vmexit_on_pause", true); 1330e285ef8dSPeter Grehan break; 1331e285ef8dSPeter Grehan case 'e': 1332621b5090SJohn Baldwin set_config_bool("x86.strictio", true); 1333e285ef8dSPeter Grehan break; 1334c9747678SNeel Natu case 'u': 1335621b5090SJohn Baldwin set_config_bool("rtc.use_localtime", false); 1336c9747678SNeel Natu break; 1337af5bfc53STycho Nightingale case 'U': 1338621b5090SJohn Baldwin set_config_value("uuid", optarg); 1339af5bfc53STycho Nightingale break; 1340851d84f1SNeel Natu case 'w': 1341621b5090SJohn Baldwin set_config_bool("x86.strictmsr", false); 1342851d84f1SNeel Natu break; 1343062b878fSPeter Grehan case 'W': 1344621b5090SJohn Baldwin set_config_bool("virtio_msix", false); 1345062b878fSPeter Grehan break; 134652e5c8a2SNeel Natu case 'x': 1347621b5090SJohn Baldwin set_config_bool("x86.x2apic", true); 134852e5c8a2SNeel Natu break; 1349b100acf2SNeel Natu case 'Y': 1350621b5090SJohn Baldwin set_config_bool("x86.mptable", false); 1351b100acf2SNeel Natu break; 1352e285ef8dSPeter Grehan case 'h': 1353e285ef8dSPeter Grehan usage(0); 1354e285ef8dSPeter Grehan default: 1355e285ef8dSPeter Grehan usage(1); 1356e285ef8dSPeter Grehan } 1357e285ef8dSPeter Grehan } 1358e285ef8dSPeter Grehan argc -= optind; 1359e285ef8dSPeter Grehan argv += optind; 1360e285ef8dSPeter Grehan 1361621b5090SJohn Baldwin if (argc > 1) 1362483d953aSJohn Baldwin usage(1); 1363483d953aSJohn Baldwin 1364621b5090SJohn Baldwin #ifdef BHYVE_SNAPSHOT 1365483d953aSJohn Baldwin if (restore_file != NULL) { 1366483d953aSJohn Baldwin error = load_restore_file(restore_file, &rstate); 1367483d953aSJohn Baldwin if (error) { 1368483d953aSJohn Baldwin fprintf(stderr, "Failed to read checkpoint info from " 1369483d953aSJohn Baldwin "file: '%s'.\n", restore_file); 1370483d953aSJohn Baldwin exit(1); 1371483d953aSJohn Baldwin } 1372483d953aSJohn Baldwin vmname = lookup_vmname(&rstate); 1373621b5090SJohn Baldwin if (vmname != NULL) 1374621b5090SJohn Baldwin set_config_value("name", vmname); 1375483d953aSJohn Baldwin } 1376621b5090SJohn Baldwin #endif 1377621b5090SJohn Baldwin 1378621b5090SJohn Baldwin if (argc == 1) 1379621b5090SJohn Baldwin set_config_value("name", argv[0]); 1380621b5090SJohn Baldwin 1381621b5090SJohn Baldwin vmname = get_config_value("name"); 1382621b5090SJohn Baldwin if (vmname == NULL) 1383e285ef8dSPeter Grehan usage(1); 1384e285ef8dSPeter Grehan 1385621b5090SJohn Baldwin if (get_config_bool_default("config.dump", false)) { 1386621b5090SJohn Baldwin dump_config(); 1387621b5090SJohn Baldwin exit(1); 1388621b5090SJohn Baldwin } 1389621b5090SJohn Baldwin 1390e008f5beSMark Johnston calc_topology(); 1391621b5090SJohn Baldwin build_vcpumaps(); 1392621b5090SJohn Baldwin 1393621b5090SJohn Baldwin value = get_config_value("memory.size"); 1394621b5090SJohn Baldwin error = vm_parse_memsize(value, &memsize); 1395621b5090SJohn Baldwin if (error) 1396621b5090SJohn Baldwin errx(EX_USAGE, "invalid memsize '%s'", value); 1397621b5090SJohn Baldwin 13989b1aa8d6SNeel Natu ctx = do_open(vmname); 1399e285ef8dSPeter Grehan 1400483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1401483d953aSJohn Baldwin if (restore_file != NULL) { 1402483d953aSJohn Baldwin guest_ncpus = lookup_guest_ncpus(&rstate); 1403483d953aSJohn Baldwin memflags = lookup_memflags(&rstate); 1404483d953aSJohn Baldwin memsize = lookup_memsize(&rstate); 1405483d953aSJohn Baldwin } 1406483d953aSJohn Baldwin 1407483d953aSJohn Baldwin if (guest_ncpus < 1) { 1408483d953aSJohn Baldwin fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); 1409483d953aSJohn Baldwin exit(1); 1410483d953aSJohn Baldwin } 1411483d953aSJohn Baldwin #endif 1412483d953aSJohn Baldwin 14137d9ef309SJohn Baldwin bsp = vm_vcpu_open(ctx, BSP); 14147d9ef309SJohn Baldwin max_vcpus = num_vcpus_allowed(ctx, bsp); 14155f0677d3SNeel Natu if (guest_ncpus > max_vcpus) { 14165f0677d3SNeel Natu fprintf(stderr, "%d vCPUs requested but only %d available\n", 14175f0677d3SNeel Natu guest_ncpus, max_vcpus); 1418989e062bSMarcelo Araujo exit(4); 14195f0677d3SNeel Natu } 14205f0677d3SNeel Natu 14217d9ef309SJohn Baldwin fbsdrun_set_capabilities(bsp, true); 14227d9ef309SJohn Baldwin 14237d9ef309SJohn Baldwin /* Allocate per-VCPU resources. */ 14247d9ef309SJohn Baldwin vcpu_info = calloc(guest_ncpus, sizeof(*vcpu_info)); 14257d9ef309SJohn Baldwin for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) { 14267d9ef309SJohn Baldwin vcpu_info[vcpuid].ctx = ctx; 14277d9ef309SJohn Baldwin vcpu_info[vcpuid].vcpuid = vcpuid; 14287d9ef309SJohn Baldwin if (vcpuid == BSP) 14297d9ef309SJohn Baldwin vcpu_info[vcpuid].vcpu = bsp; 14307d9ef309SJohn Baldwin else 14317d9ef309SJohn Baldwin vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid); 14327d9ef309SJohn Baldwin } 1433e285ef8dSPeter Grehan 1434621b5090SJohn Baldwin memflags = 0; 1435621b5090SJohn Baldwin if (get_config_bool_default("memory.wired", false)) 1436621b5090SJohn Baldwin memflags |= VM_MEM_F_WIRED; 1437621b5090SJohn Baldwin if (get_config_bool_default("memory.guest_in_core", false)) 1438621b5090SJohn Baldwin memflags |= VM_MEM_F_INCORE; 14399b1aa8d6SNeel Natu vm_set_memflags(ctx, memflags); 1440956171d5SVitaliy Gusev error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); 1441956171d5SVitaliy Gusev if (error) { 14429b1aa8d6SNeel Natu fprintf(stderr, "Unable to setup memory (%d)\n", errno); 1443989e062bSMarcelo Araujo exit(4); 1444e285ef8dSPeter Grehan } 1445e285ef8dSPeter Grehan 1446c3498942SNeel Natu error = init_msr(); 1447c3498942SNeel Natu if (error) { 1448c3498942SNeel Natu fprintf(stderr, "init_msr error %d", error); 1449989e062bSMarcelo Araujo exit(4); 1450c3498942SNeel Natu } 1451c3498942SNeel Natu 1452730510dcSJohn Baldwin init_mem(guest_ncpus); 1453e285ef8dSPeter Grehan init_inout(); 14548a68ae80SConrad Meyer kernemu_dev_init(); 1455bb30b08eSConrad Meyer init_bootrom(ctx); 14562cf9911fSPeter Grehan atkbdc_init(ctx); 1457b3e9732aSJohn Baldwin pci_irq_init(ctx); 14583cbf3585SJohn Baldwin ioapic_init(ctx); 1459a38e2a64SPeter Grehan 1460621b5090SJohn Baldwin rtc_init(ctx); 1461b3e9732aSJohn Baldwin sci_init(ctx); 14629d6be09fSPeter Grehan 1463d85147f3SCorvin Köhne if (qemu_fwcfg_init(ctx) != 0) { 1464d85147f3SCorvin Köhne fprintf(stderr, "qemu fwcfg initialization error"); 1465d85147f3SCorvin Köhne exit(4); 1466d85147f3SCorvin Köhne } 1467d85147f3SCorvin Köhne 1468d85147f3SCorvin Köhne if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu", sizeof(guest_ncpus), 1469d85147f3SCorvin Köhne &guest_ncpus) != 0) { 1470d85147f3SCorvin Köhne fprintf(stderr, "Could not add qemu fwcfg opt/bhyve/hw.ncpu"); 1471d85147f3SCorvin Köhne exit(4); 1472d85147f3SCorvin Köhne } 1473d85147f3SCorvin Köhne 147416f23f75SCorvin Köhne if (e820_init(ctx) != 0) { 147516f23f75SCorvin Köhne fprintf(stderr, "Unable to setup E820"); 147616f23f75SCorvin Köhne exit(4); 147716f23f75SCorvin Köhne } 147816f23f75SCorvin Köhne 1479a38e2a64SPeter Grehan /* 14800dc159ceSElyes Haouas * Exit if a device emulation finds an error in its initialization 1481a38e2a64SPeter Grehan */ 1482989e062bSMarcelo Araujo if (init_pci(ctx) != 0) { 1483989e062bSMarcelo Araujo perror("device emulation initialization error"); 1484989e062bSMarcelo Araujo exit(4); 1485989e062bSMarcelo Araujo } 1486a38e2a64SPeter Grehan 14879cb339ccSConrad Meyer /* 14889cb339ccSConrad Meyer * Initialize after PCI, to allow a bootrom file to reserve the high 14899cb339ccSConrad Meyer * region. 14909cb339ccSConrad Meyer */ 1491621b5090SJohn Baldwin if (get_config_bool("acpi_tables")) 14929cb339ccSConrad Meyer vmgenc_init(ctx); 14939cb339ccSConrad Meyer 14942cdff991SMariusz Zaborski init_gdb(ctx); 1495e285ef8dSPeter Grehan 14969b1aa8d6SNeel Natu if (lpc_bootrom()) { 14977d9ef309SJohn Baldwin if (vm_set_capability(bsp, VM_CAP_UNRESTRICTED_GUEST, 1)) { 14989b1aa8d6SNeel Natu fprintf(stderr, "ROM boot failed: unrestricted guest " 14999b1aa8d6SNeel Natu "capability not available\n"); 1500989e062bSMarcelo Araujo exit(4); 15019b1aa8d6SNeel Natu } 15027d9ef309SJohn Baldwin error = vcpu_reset(bsp); 15039b1aa8d6SNeel Natu assert(error == 0); 15049b1aa8d6SNeel Natu } 15059b1aa8d6SNeel Natu 15069ff3e8b7SVitaliy Gusev /* 15079ff3e8b7SVitaliy Gusev * Add all vCPUs. 15089ff3e8b7SVitaliy Gusev */ 15097d9ef309SJohn Baldwin for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) 15107d9ef309SJohn Baldwin spinup_vcpu(&vcpu_info[vcpuid], vcpuid == BSP); 15119ff3e8b7SVitaliy Gusev 1512483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 1513483d953aSJohn Baldwin if (restore_file != NULL) { 1514483d953aSJohn Baldwin fprintf(stdout, "Pausing pci devs...\r\n"); 1515*b10d65a4SVitaliy Gusev if (vm_pause_devices() != 0) { 1516483d953aSJohn Baldwin fprintf(stderr, "Failed to pause PCI device state.\n"); 1517483d953aSJohn Baldwin exit(1); 1518483d953aSJohn Baldwin } 1519483d953aSJohn Baldwin 1520483d953aSJohn Baldwin fprintf(stdout, "Restoring vm mem...\r\n"); 1521483d953aSJohn Baldwin if (restore_vm_mem(ctx, &rstate) != 0) { 1522483d953aSJohn Baldwin fprintf(stderr, "Failed to restore VM memory.\n"); 1523483d953aSJohn Baldwin exit(1); 1524483d953aSJohn Baldwin } 1525483d953aSJohn Baldwin 1526483d953aSJohn Baldwin fprintf(stdout, "Restoring pci devs...\r\n"); 1527*b10d65a4SVitaliy Gusev if (vm_restore_devices(&rstate) != 0) { 1528483d953aSJohn Baldwin fprintf(stderr, "Failed to restore PCI device state.\n"); 1529483d953aSJohn Baldwin exit(1); 1530483d953aSJohn Baldwin } 1531483d953aSJohn Baldwin 1532483d953aSJohn Baldwin fprintf(stdout, "Restoring kernel structs...\r\n"); 1533483d953aSJohn Baldwin if (vm_restore_kern_structs(ctx, &rstate) != 0) { 1534483d953aSJohn Baldwin fprintf(stderr, "Failed to restore kernel structs.\n"); 1535483d953aSJohn Baldwin exit(1); 1536483d953aSJohn Baldwin } 1537483d953aSJohn Baldwin 1538483d953aSJohn Baldwin fprintf(stdout, "Resuming pci devs...\r\n"); 1539*b10d65a4SVitaliy Gusev if (vm_resume_devices() != 0) { 1540483d953aSJohn Baldwin fprintf(stderr, "Failed to resume PCI device state.\n"); 1541483d953aSJohn Baldwin exit(1); 1542483d953aSJohn Baldwin } 1543483d953aSJohn Baldwin } 1544483d953aSJohn Baldwin #endif 1545483d953aSJohn Baldwin 15467d9ef309SJohn Baldwin error = vm_get_register(bsp, VM_REG_GUEST_RIP, &rip); 1547e285ef8dSPeter Grehan assert(error == 0); 1548e285ef8dSPeter Grehan 1549e285ef8dSPeter Grehan /* 1550e285ef8dSPeter Grehan * build the guest tables, MP etc. 1551e285ef8dSPeter Grehan */ 1552621b5090SJohn Baldwin if (get_config_bool_default("x86.mptable", true)) { 1553b100acf2SNeel Natu error = mptable_build(ctx, guest_ncpus); 1554989e062bSMarcelo Araujo if (error) { 1555989e062bSMarcelo Araujo perror("error to build the guest tables"); 1556989e062bSMarcelo Araujo exit(4); 1557989e062bSMarcelo Araujo } 1558b100acf2SNeel Natu } 1559e285ef8dSPeter Grehan 1560af5bfc53STycho Nightingale error = smbios_build(ctx); 1561e16b709eSJames Mintram if (error != 0) 1562e16b709eSJames Mintram exit(4); 1563af5bfc53STycho Nightingale 1564621b5090SJohn Baldwin if (get_config_bool("acpi_tables")) { 1565a1a4cbeaSNeel Natu error = acpi_build(ctx, guest_ncpus); 1566e285ef8dSPeter Grehan assert(error == 0); 1567e285ef8dSPeter Grehan } 1568e285ef8dSPeter Grehan 156916f23f75SCorvin Köhne e820_fwcfg_item = e820_get_fwcfg_item(); 157016f23f75SCorvin Köhne if (e820_fwcfg_item == NULL) { 157116f23f75SCorvin Köhne fprintf(stderr, "invalid e820 table"); 157216f23f75SCorvin Köhne exit(4); 157316f23f75SCorvin Köhne } 157416f23f75SCorvin Köhne if (qemu_fwcfg_add_file("etc/e820", e820_fwcfg_item->size, 157516f23f75SCorvin Köhne e820_fwcfg_item->data) != 0) { 157616f23f75SCorvin Köhne fprintf(stderr, "could not add qemu fwcfg etc/e820"); 157716f23f75SCorvin Köhne exit(4); 157816f23f75SCorvin Köhne } 157916f23f75SCorvin Köhne free(e820_fwcfg_item); 158016f23f75SCorvin Köhne 1581d85147f3SCorvin Köhne if (lpc_bootrom() && strcmp(lpc_fwcfg(), "bhyve") == 0) { 158288ac6958SPeter Grehan fwctl_init(); 1583d85147f3SCorvin Köhne } 158488ac6958SPeter Grehan 1585dcbebe85SMariusz Zaborski /* 1586dcbebe85SMariusz Zaborski * Change the proc title to include the VM name. 1587dcbebe85SMariusz Zaborski */ 1588dcbebe85SMariusz Zaborski setproctitle("%s", vmname); 1589dcbebe85SMariusz Zaborski 15909a9a2489SVitaliy Gusev #ifdef BHYVE_SNAPSHOT 15919a9a2489SVitaliy Gusev /* initialize mutex/cond variables */ 15929a9a2489SVitaliy Gusev init_snapshot(); 15939a9a2489SVitaliy Gusev 15949a9a2489SVitaliy Gusev /* 15959a9a2489SVitaliy Gusev * checkpointing thread for communication with bhyvectl 15969a9a2489SVitaliy Gusev */ 15979a9a2489SVitaliy Gusev if (init_checkpoint_thread(ctx) != 0) 15989a9a2489SVitaliy Gusev errx(EX_OSERR, "Failed to start checkpoint thread"); 15999a9a2489SVitaliy Gusev #endif 16009a9a2489SVitaliy Gusev 160100ef17beSBartek Rutkowski #ifndef WITHOUT_CAPSICUM 160200ef17beSBartek Rutkowski caph_cache_catpages(); 160300ef17beSBartek Rutkowski 160400ef17beSBartek Rutkowski if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) 160500ef17beSBartek Rutkowski errx(EX_OSERR, "Unable to apply rights for sandbox"); 160600ef17beSBartek Rutkowski 16077672a014SMariusz Zaborski if (caph_enter() == -1) 160800ef17beSBartek Rutkowski errx(EX_OSERR, "cap_enter() failed"); 160900ef17beSBartek Rutkowski #endif 161000ef17beSBartek Rutkowski 1611483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT 16129ff3e8b7SVitaliy Gusev if (restore_file != NULL) { 16139ff3e8b7SVitaliy Gusev destroy_restore_state(&rstate); 1614d213429eSVitaliy Gusev if (vm_restore_time(ctx) < 0) 1615d213429eSVitaliy Gusev err(EX_OSERR, "Unable to restore time"); 1616483d953aSJohn Baldwin 16177d9ef309SJohn Baldwin for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) 16187d9ef309SJohn Baldwin vm_resume_cpu(vcpu_info[vcpuid].vcpu); 16197d9ef309SJohn Baldwin } else 16209ff3e8b7SVitaliy Gusev #endif 16217d9ef309SJohn Baldwin vm_resume_cpu(bsp); 1622483d953aSJohn Baldwin 1623e285ef8dSPeter Grehan /* 1624e285ef8dSPeter Grehan * Head off to the main event dispatch loop 1625e285ef8dSPeter Grehan */ 1626e285ef8dSPeter Grehan mevent_dispatch(); 1627e285ef8dSPeter Grehan 1628989e062bSMarcelo Araujo exit(4); 1629e285ef8dSPeter Grehan } 1630