xref: /illumos-gate/usr/src/cmd/bhyve/common/bhyverun.c (revision 5c4a5fe16715fb423db76577a6883b5bbecdbe45)
1*5c4a5fe1SAndy Fiddaman /*-
2*5c4a5fe1SAndy Fiddaman  * SPDX-License-Identifier: BSD-2-Clause
3*5c4a5fe1SAndy Fiddaman  *
4*5c4a5fe1SAndy Fiddaman  * Copyright (c) 2011 NetApp, Inc.
5*5c4a5fe1SAndy Fiddaman  * All rights reserved.
6*5c4a5fe1SAndy Fiddaman  *
7*5c4a5fe1SAndy Fiddaman  * Redistribution and use in source and binary forms, with or without
8*5c4a5fe1SAndy Fiddaman  * modification, are permitted provided that the following conditions
9*5c4a5fe1SAndy Fiddaman  * are met:
10*5c4a5fe1SAndy Fiddaman  * 1. Redistributions of source code must retain the above copyright
11*5c4a5fe1SAndy Fiddaman  *    notice, this list of conditions and the following disclaimer.
12*5c4a5fe1SAndy Fiddaman  * 2. Redistributions in binary form must reproduce the above copyright
13*5c4a5fe1SAndy Fiddaman  *    notice, this list of conditions and the following disclaimer in the
14*5c4a5fe1SAndy Fiddaman  *    documentation and/or other materials provided with the distribution.
15*5c4a5fe1SAndy Fiddaman  *
16*5c4a5fe1SAndy Fiddaman  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17*5c4a5fe1SAndy Fiddaman  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*5c4a5fe1SAndy Fiddaman  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*5c4a5fe1SAndy Fiddaman  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20*5c4a5fe1SAndy Fiddaman  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*5c4a5fe1SAndy Fiddaman  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*5c4a5fe1SAndy Fiddaman  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*5c4a5fe1SAndy Fiddaman  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*5c4a5fe1SAndy Fiddaman  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*5c4a5fe1SAndy Fiddaman  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*5c4a5fe1SAndy Fiddaman  * SUCH DAMAGE.
27*5c4a5fe1SAndy Fiddaman  */
28*5c4a5fe1SAndy Fiddaman /*
29*5c4a5fe1SAndy Fiddaman  * This file and its contents are supplied under the terms of the
30*5c4a5fe1SAndy Fiddaman  * Common Development and Distribution License ("CDDL"), version 1.0.
31*5c4a5fe1SAndy Fiddaman  * You may only use this file in accordance with the terms of version
32*5c4a5fe1SAndy Fiddaman  * 1.0 of the CDDL.
33*5c4a5fe1SAndy Fiddaman  *
34*5c4a5fe1SAndy Fiddaman  * A full copy of the text of the CDDL should have accompanied this
35*5c4a5fe1SAndy Fiddaman  * source.  A copy of the CDDL is also available via the Internet at
36*5c4a5fe1SAndy Fiddaman  * http://www.illumos.org/license/CDDL.
37*5c4a5fe1SAndy Fiddaman  *
38*5c4a5fe1SAndy Fiddaman  * Copyright 2015 Pluribus Networks Inc.
39*5c4a5fe1SAndy Fiddaman  * Copyright 2018 Joyent, Inc.
40*5c4a5fe1SAndy Fiddaman  * Copyright 2022 Oxide Computer Company
41*5c4a5fe1SAndy Fiddaman  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
42*5c4a5fe1SAndy Fiddaman  */
43*5c4a5fe1SAndy Fiddaman 
44*5c4a5fe1SAndy Fiddaman 
45*5c4a5fe1SAndy Fiddaman #include <sys/types.h>
46*5c4a5fe1SAndy Fiddaman #ifndef WITHOUT_CAPSICUM
47*5c4a5fe1SAndy Fiddaman #include <sys/capsicum.h>
48*5c4a5fe1SAndy Fiddaman #endif
49*5c4a5fe1SAndy Fiddaman #include <sys/mman.h>
50*5c4a5fe1SAndy Fiddaman #include <sys/time.h>
51*5c4a5fe1SAndy Fiddaman 
52*5c4a5fe1SAndy Fiddaman #ifdef __FreeBSD__
53*5c4a5fe1SAndy Fiddaman #include <amd64/vmm/intel/vmcs.h>
54*5c4a5fe1SAndy Fiddaman #else
55*5c4a5fe1SAndy Fiddaman #include <sys/cpuset.h>
56*5c4a5fe1SAndy Fiddaman #include <intel/vmcs.h>
57*5c4a5fe1SAndy Fiddaman #endif
58*5c4a5fe1SAndy Fiddaman 
59*5c4a5fe1SAndy Fiddaman #include <machine/atomic.h>
60*5c4a5fe1SAndy Fiddaman 
61*5c4a5fe1SAndy Fiddaman #ifndef WITHOUT_CAPSICUM
62*5c4a5fe1SAndy Fiddaman #include <capsicum_helpers.h>
63*5c4a5fe1SAndy Fiddaman #endif
64*5c4a5fe1SAndy Fiddaman #include <stdio.h>
65*5c4a5fe1SAndy Fiddaman #include <stdlib.h>
66*5c4a5fe1SAndy Fiddaman #include <string.h>
67*5c4a5fe1SAndy Fiddaman #include <err.h>
68*5c4a5fe1SAndy Fiddaman #include <errno.h>
69*5c4a5fe1SAndy Fiddaman #include <libgen.h>
70*5c4a5fe1SAndy Fiddaman #include <unistd.h>
71*5c4a5fe1SAndy Fiddaman #include <assert.h>
72*5c4a5fe1SAndy Fiddaman #include <pthread.h>
73*5c4a5fe1SAndy Fiddaman #include <pthread_np.h>
74*5c4a5fe1SAndy Fiddaman #include <sysexits.h>
75*5c4a5fe1SAndy Fiddaman #include <stdbool.h>
76*5c4a5fe1SAndy Fiddaman #include <stdint.h>
77*5c4a5fe1SAndy Fiddaman 
78*5c4a5fe1SAndy Fiddaman #include <machine/vmm.h>
79*5c4a5fe1SAndy Fiddaman #ifndef WITHOUT_CAPSICUM
80*5c4a5fe1SAndy Fiddaman #include <machine/vmm_dev.h>
81*5c4a5fe1SAndy Fiddaman #endif
82*5c4a5fe1SAndy Fiddaman #ifdef	__FreeBSD__
83*5c4a5fe1SAndy Fiddaman #include <machine/vmm_instruction_emul.h>
84*5c4a5fe1SAndy Fiddaman #endif
85*5c4a5fe1SAndy Fiddaman #include <vmmapi.h>
86*5c4a5fe1SAndy Fiddaman 
87*5c4a5fe1SAndy Fiddaman #include "acpi.h"
88*5c4a5fe1SAndy Fiddaman #include "bhyverun.h"
89*5c4a5fe1SAndy Fiddaman #include "bootrom.h"
90*5c4a5fe1SAndy Fiddaman #include "config.h"
91*5c4a5fe1SAndy Fiddaman #include "debug.h"
92*5c4a5fe1SAndy Fiddaman #include "gdb.h"
93*5c4a5fe1SAndy Fiddaman #include "ioapic.h"
94*5c4a5fe1SAndy Fiddaman #include "mem.h"
95*5c4a5fe1SAndy Fiddaman #include "mevent.h"
96*5c4a5fe1SAndy Fiddaman #include "pci_emul.h"
97*5c4a5fe1SAndy Fiddaman #include "pci_lpc.h"
98*5c4a5fe1SAndy Fiddaman #include "qemu_fwcfg.h"
99*5c4a5fe1SAndy Fiddaman #include "tpm_device.h"
100*5c4a5fe1SAndy Fiddaman #include "spinup_ap.h"
101*5c4a5fe1SAndy Fiddaman #include "vmgenc.h"
102*5c4a5fe1SAndy Fiddaman #include "vmexit.h"
103*5c4a5fe1SAndy Fiddaman #ifndef __FreeBSD__
104*5c4a5fe1SAndy Fiddaman #include "smbiostbl.h"
105*5c4a5fe1SAndy Fiddaman #include "privileges.h"
106*5c4a5fe1SAndy Fiddaman #endif
107*5c4a5fe1SAndy Fiddaman 
108*5c4a5fe1SAndy Fiddaman #define MB		(1024UL * 1024)
109*5c4a5fe1SAndy Fiddaman #define GB		(1024UL * MB)
110*5c4a5fe1SAndy Fiddaman 
111*5c4a5fe1SAndy Fiddaman int guest_ncpus;
112*5c4a5fe1SAndy Fiddaman uint16_t cpu_cores, cpu_sockets, cpu_threads;
113*5c4a5fe1SAndy Fiddaman 
114*5c4a5fe1SAndy Fiddaman int raw_stdio = 0;
115*5c4a5fe1SAndy Fiddaman 
116*5c4a5fe1SAndy Fiddaman static const int BSP = 0;
117*5c4a5fe1SAndy Fiddaman 
118*5c4a5fe1SAndy Fiddaman static cpuset_t cpumask;
119*5c4a5fe1SAndy Fiddaman 
120*5c4a5fe1SAndy Fiddaman static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu);
121*5c4a5fe1SAndy Fiddaman 
122*5c4a5fe1SAndy Fiddaman static struct vcpu_info {
123*5c4a5fe1SAndy Fiddaman 	struct vmctx    *ctx;
124*5c4a5fe1SAndy Fiddaman 	struct vcpu     *vcpu;
125*5c4a5fe1SAndy Fiddaman 	int             vcpuid;
126*5c4a5fe1SAndy Fiddaman } *vcpu_info;
127*5c4a5fe1SAndy Fiddaman 
128*5c4a5fe1SAndy Fiddaman #ifdef	__FreeBSD__
129*5c4a5fe1SAndy Fiddaman static cpuset_t **vcpumap;
130*5c4a5fe1SAndy Fiddaman #endif
131*5c4a5fe1SAndy Fiddaman 
132*5c4a5fe1SAndy Fiddaman 
133*5c4a5fe1SAndy Fiddaman /*
134*5c4a5fe1SAndy Fiddaman  * XXX This parser is known to have the following issues:
135*5c4a5fe1SAndy Fiddaman  * 1.  It accepts null key=value tokens ",," as setting "cpus" to an
136*5c4a5fe1SAndy Fiddaman  *     empty string.
137*5c4a5fe1SAndy Fiddaman  *
138*5c4a5fe1SAndy Fiddaman  * The acceptance of a null specification ('-c ""') is by design to match the
139*5c4a5fe1SAndy Fiddaman  * manual page syntax specification, this results in a topology of 1 vCPU.
140*5c4a5fe1SAndy Fiddaman  */
141*5c4a5fe1SAndy Fiddaman int
bhyve_topology_parse(const char * opt)142*5c4a5fe1SAndy Fiddaman bhyve_topology_parse(const char *opt)
143*5c4a5fe1SAndy Fiddaman {
144*5c4a5fe1SAndy Fiddaman 	char *cp, *str, *tofree;
145*5c4a5fe1SAndy Fiddaman 
146*5c4a5fe1SAndy Fiddaman 	if (*opt == '\0') {
147*5c4a5fe1SAndy Fiddaman 		set_config_value("sockets", "1");
148*5c4a5fe1SAndy Fiddaman 		set_config_value("cores", "1");
149*5c4a5fe1SAndy Fiddaman 		set_config_value("threads", "1");
150*5c4a5fe1SAndy Fiddaman 		set_config_value("cpus", "1");
151*5c4a5fe1SAndy Fiddaman 		return (0);
152*5c4a5fe1SAndy Fiddaman 	}
153*5c4a5fe1SAndy Fiddaman 
154*5c4a5fe1SAndy Fiddaman 	tofree = str = strdup(opt);
155*5c4a5fe1SAndy Fiddaman 	if (str == NULL)
156*5c4a5fe1SAndy Fiddaman 		errx(4, "Failed to allocate memory");
157*5c4a5fe1SAndy Fiddaman 
158*5c4a5fe1SAndy Fiddaman 	while ((cp = strsep(&str, ",")) != NULL) {
159*5c4a5fe1SAndy Fiddaman 		if (strncmp(cp, "cpus=", strlen("cpus=")) == 0)
160*5c4a5fe1SAndy Fiddaman 			set_config_value("cpus", cp + strlen("cpus="));
161*5c4a5fe1SAndy Fiddaman 		else if (strncmp(cp, "sockets=", strlen("sockets=")) == 0)
162*5c4a5fe1SAndy Fiddaman 			set_config_value("sockets", cp + strlen("sockets="));
163*5c4a5fe1SAndy Fiddaman 		else if (strncmp(cp, "cores=", strlen("cores=")) == 0)
164*5c4a5fe1SAndy Fiddaman 			set_config_value("cores", cp + strlen("cores="));
165*5c4a5fe1SAndy Fiddaman 		else if (strncmp(cp, "threads=", strlen("threads=")) == 0)
166*5c4a5fe1SAndy Fiddaman 			set_config_value("threads", cp + strlen("threads="));
167*5c4a5fe1SAndy Fiddaman 		else if (strchr(cp, '=') != NULL)
168*5c4a5fe1SAndy Fiddaman 			goto out;
169*5c4a5fe1SAndy Fiddaman 		else
170*5c4a5fe1SAndy Fiddaman 			set_config_value("cpus", cp);
171*5c4a5fe1SAndy Fiddaman 	}
172*5c4a5fe1SAndy Fiddaman 	free(tofree);
173*5c4a5fe1SAndy Fiddaman 	return (0);
174*5c4a5fe1SAndy Fiddaman 
175*5c4a5fe1SAndy Fiddaman out:
176*5c4a5fe1SAndy Fiddaman 	free(tofree);
177*5c4a5fe1SAndy Fiddaman 	return (-1);
178*5c4a5fe1SAndy Fiddaman }
179*5c4a5fe1SAndy Fiddaman 
180*5c4a5fe1SAndy Fiddaman static int
parse_int_value(const char * key,const char * value,int minval,int maxval)181*5c4a5fe1SAndy Fiddaman parse_int_value(const char *key, const char *value, int minval, int maxval)
182*5c4a5fe1SAndy Fiddaman {
183*5c4a5fe1SAndy Fiddaman 	char *cp;
184*5c4a5fe1SAndy Fiddaman 	long lval;
185*5c4a5fe1SAndy Fiddaman 
186*5c4a5fe1SAndy Fiddaman 	errno = 0;
187*5c4a5fe1SAndy Fiddaman 	lval = strtol(value, &cp, 0);
188*5c4a5fe1SAndy Fiddaman 	if (errno != 0 || *cp != '\0' || cp == value || lval < minval ||
189*5c4a5fe1SAndy Fiddaman 	    lval > maxval)
190*5c4a5fe1SAndy Fiddaman 		errx(4, "Invalid value for %s: '%s'", key, value);
191*5c4a5fe1SAndy Fiddaman 	return (lval);
192*5c4a5fe1SAndy Fiddaman }
193*5c4a5fe1SAndy Fiddaman 
194*5c4a5fe1SAndy Fiddaman /*
195*5c4a5fe1SAndy Fiddaman  * Set the sockets, cores, threads, and guest_cpus variables based on
196*5c4a5fe1SAndy Fiddaman  * the configured topology.
197*5c4a5fe1SAndy Fiddaman  *
198*5c4a5fe1SAndy Fiddaman  * The limits of UINT16_MAX are due to the types passed to
199*5c4a5fe1SAndy Fiddaman  * vm_set_topology().  vmm.ko may enforce tighter limits.
200*5c4a5fe1SAndy Fiddaman  */
201*5c4a5fe1SAndy Fiddaman static void
calc_topology(void)202*5c4a5fe1SAndy Fiddaman calc_topology(void)
203*5c4a5fe1SAndy Fiddaman {
204*5c4a5fe1SAndy Fiddaman 	const char *value;
205*5c4a5fe1SAndy Fiddaman 	bool explicit_cpus;
206*5c4a5fe1SAndy Fiddaman 	uint64_t ncpus;
207*5c4a5fe1SAndy Fiddaman 
208*5c4a5fe1SAndy Fiddaman 	value = get_config_value("cpus");
209*5c4a5fe1SAndy Fiddaman 	if (value != NULL) {
210*5c4a5fe1SAndy Fiddaman 		guest_ncpus = parse_int_value("cpus", value, 1, UINT16_MAX);
211*5c4a5fe1SAndy Fiddaman 		explicit_cpus = true;
212*5c4a5fe1SAndy Fiddaman 	} else {
213*5c4a5fe1SAndy Fiddaman 		guest_ncpus = 1;
214*5c4a5fe1SAndy Fiddaman 		explicit_cpus = false;
215*5c4a5fe1SAndy Fiddaman 	}
216*5c4a5fe1SAndy Fiddaman 	value = get_config_value("cores");
217*5c4a5fe1SAndy Fiddaman 	if (value != NULL)
218*5c4a5fe1SAndy Fiddaman 		cpu_cores = parse_int_value("cores", value, 1, UINT16_MAX);
219*5c4a5fe1SAndy Fiddaman 	else
220*5c4a5fe1SAndy Fiddaman 		cpu_cores = 1;
221*5c4a5fe1SAndy Fiddaman 	value = get_config_value("threads");
222*5c4a5fe1SAndy Fiddaman 	if (value != NULL)
223*5c4a5fe1SAndy Fiddaman 		cpu_threads = parse_int_value("threads", value, 1, UINT16_MAX);
224*5c4a5fe1SAndy Fiddaman 	else
225*5c4a5fe1SAndy Fiddaman 		cpu_threads = 1;
226*5c4a5fe1SAndy Fiddaman 	value = get_config_value("sockets");
227*5c4a5fe1SAndy Fiddaman 	if (value != NULL)
228*5c4a5fe1SAndy Fiddaman 		cpu_sockets = parse_int_value("sockets", value, 1, UINT16_MAX);
229*5c4a5fe1SAndy Fiddaman 	else
230*5c4a5fe1SAndy Fiddaman 		cpu_sockets = guest_ncpus;
231*5c4a5fe1SAndy Fiddaman 
232*5c4a5fe1SAndy Fiddaman 	/*
233*5c4a5fe1SAndy Fiddaman 	 * Compute sockets * cores * threads avoiding overflow.  The
234*5c4a5fe1SAndy Fiddaman 	 * range check above insures these are 16 bit values.
235*5c4a5fe1SAndy Fiddaman 	 */
236*5c4a5fe1SAndy Fiddaman 	ncpus = (uint64_t)cpu_sockets * cpu_cores * cpu_threads;
237*5c4a5fe1SAndy Fiddaman 	if (ncpus > UINT16_MAX)
238*5c4a5fe1SAndy Fiddaman 		errx(4, "Computed number of vCPUs too high: %ju",
239*5c4a5fe1SAndy Fiddaman 		    (uintmax_t)ncpus);
240*5c4a5fe1SAndy Fiddaman 
241*5c4a5fe1SAndy Fiddaman 	if (explicit_cpus) {
242*5c4a5fe1SAndy Fiddaman 		if (guest_ncpus != (int)ncpus)
243*5c4a5fe1SAndy Fiddaman 			errx(4, "Topology (%d sockets, %d cores, %d threads) "
244*5c4a5fe1SAndy Fiddaman 			    "does not match %d vCPUs",
245*5c4a5fe1SAndy Fiddaman 			    cpu_sockets, cpu_cores, cpu_threads,
246*5c4a5fe1SAndy Fiddaman 			    guest_ncpus);
247*5c4a5fe1SAndy Fiddaman 	} else
248*5c4a5fe1SAndy Fiddaman 		guest_ncpus = ncpus;
249*5c4a5fe1SAndy Fiddaman }
250*5c4a5fe1SAndy Fiddaman 
251*5c4a5fe1SAndy Fiddaman #ifdef	__FreeBSD__
252*5c4a5fe1SAndy Fiddaman int
bhyve_pincpu_parse(const char * opt)253*5c4a5fe1SAndy Fiddaman bhyve_pincpu_parse(const char *opt)
254*5c4a5fe1SAndy Fiddaman {
255*5c4a5fe1SAndy Fiddaman 	int vcpu, pcpu;
256*5c4a5fe1SAndy Fiddaman 	const char *value;
257*5c4a5fe1SAndy Fiddaman 	char *newval;
258*5c4a5fe1SAndy Fiddaman 	char key[16];
259*5c4a5fe1SAndy Fiddaman 
260*5c4a5fe1SAndy Fiddaman 	if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
261*5c4a5fe1SAndy Fiddaman 		fprintf(stderr, "invalid format: %s\n", opt);
262*5c4a5fe1SAndy Fiddaman 		return (-1);
263*5c4a5fe1SAndy Fiddaman 	}
264*5c4a5fe1SAndy Fiddaman 
265*5c4a5fe1SAndy Fiddaman 	if (vcpu < 0) {
266*5c4a5fe1SAndy Fiddaman 		fprintf(stderr, "invalid vcpu '%d'\n", vcpu);
267*5c4a5fe1SAndy Fiddaman 		return (-1);
268*5c4a5fe1SAndy Fiddaman 	}
269*5c4a5fe1SAndy Fiddaman 
270*5c4a5fe1SAndy Fiddaman 	if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
271*5c4a5fe1SAndy Fiddaman 		fprintf(stderr, "hostcpu '%d' outside valid range from "
272*5c4a5fe1SAndy Fiddaman 		    "0 to %d\n", pcpu, CPU_SETSIZE - 1);
273*5c4a5fe1SAndy Fiddaman 		return (-1);
274*5c4a5fe1SAndy Fiddaman 	}
275*5c4a5fe1SAndy Fiddaman 
276*5c4a5fe1SAndy Fiddaman 	snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu);
277*5c4a5fe1SAndy Fiddaman 	value = get_config_value(key);
278*5c4a5fe1SAndy Fiddaman 
279*5c4a5fe1SAndy Fiddaman 	if (asprintf(&newval, "%s%s%d", value != NULL ? value : "",
280*5c4a5fe1SAndy Fiddaman 	    value != NULL ? "," : "", pcpu) == -1) {
281*5c4a5fe1SAndy Fiddaman 		perror("failed to build new cpuset string");
282*5c4a5fe1SAndy Fiddaman 		return (-1);
283*5c4a5fe1SAndy Fiddaman 	}
284*5c4a5fe1SAndy Fiddaman 
285*5c4a5fe1SAndy Fiddaman 	set_config_value(key, newval);
286*5c4a5fe1SAndy Fiddaman 	free(newval);
287*5c4a5fe1SAndy Fiddaman 	return (0);
288*5c4a5fe1SAndy Fiddaman }
289*5c4a5fe1SAndy Fiddaman 
290*5c4a5fe1SAndy Fiddaman static void
parse_cpuset(int vcpu,const char * list,cpuset_t * set)291*5c4a5fe1SAndy Fiddaman parse_cpuset(int vcpu, const char *list, cpuset_t *set)
292*5c4a5fe1SAndy Fiddaman {
293*5c4a5fe1SAndy Fiddaman 	char *cp, *token;
294*5c4a5fe1SAndy Fiddaman 	int pcpu, start;
295*5c4a5fe1SAndy Fiddaman 
296*5c4a5fe1SAndy Fiddaman 	CPU_ZERO(set);
297*5c4a5fe1SAndy Fiddaman 	start = -1;
298*5c4a5fe1SAndy Fiddaman 	token = __DECONST(char *, list);
299*5c4a5fe1SAndy Fiddaman 	for (;;) {
300*5c4a5fe1SAndy Fiddaman 		pcpu = strtoul(token, &cp, 0);
301*5c4a5fe1SAndy Fiddaman 		if (cp == token)
302*5c4a5fe1SAndy Fiddaman 			errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list);
303*5c4a5fe1SAndy Fiddaman 		if (pcpu < 0 || pcpu >= CPU_SETSIZE)
304*5c4a5fe1SAndy Fiddaman 			errx(4, "hostcpu '%d' outside valid range from 0 to %d",
305*5c4a5fe1SAndy Fiddaman 			    pcpu, CPU_SETSIZE - 1);
306*5c4a5fe1SAndy Fiddaman 		switch (*cp) {
307*5c4a5fe1SAndy Fiddaman 		case ',':
308*5c4a5fe1SAndy Fiddaman 		case '\0':
309*5c4a5fe1SAndy Fiddaman 			if (start >= 0) {
310*5c4a5fe1SAndy Fiddaman 				if (start > pcpu)
311*5c4a5fe1SAndy Fiddaman 					errx(4, "Invalid hostcpu range %d-%d",
312*5c4a5fe1SAndy Fiddaman 					    start, pcpu);
313*5c4a5fe1SAndy Fiddaman 				while (start < pcpu) {
314*5c4a5fe1SAndy Fiddaman 					CPU_SET(start, set);
315*5c4a5fe1SAndy Fiddaman 					start++;
316*5c4a5fe1SAndy Fiddaman 				}
317*5c4a5fe1SAndy Fiddaman 				start = -1;
318*5c4a5fe1SAndy Fiddaman 			}
319*5c4a5fe1SAndy Fiddaman 			CPU_SET(pcpu, set);
320*5c4a5fe1SAndy Fiddaman 			break;
321*5c4a5fe1SAndy Fiddaman 		case '-':
322*5c4a5fe1SAndy Fiddaman 			if (start >= 0)
323*5c4a5fe1SAndy Fiddaman 				errx(4, "invalid cpuset for vcpu %d: '%s'",
324*5c4a5fe1SAndy Fiddaman 				    vcpu, list);
325*5c4a5fe1SAndy Fiddaman 			start = pcpu;
326*5c4a5fe1SAndy Fiddaman 			break;
327*5c4a5fe1SAndy Fiddaman 		default:
328*5c4a5fe1SAndy Fiddaman 			errx(4, "invalid cpuset for vcpu %d: '%s'", vcpu, list);
329*5c4a5fe1SAndy Fiddaman 		}
330*5c4a5fe1SAndy Fiddaman 		if (*cp == '\0')
331*5c4a5fe1SAndy Fiddaman 			break;
332*5c4a5fe1SAndy Fiddaman 		token = cp + 1;
333*5c4a5fe1SAndy Fiddaman 	}
334*5c4a5fe1SAndy Fiddaman }
335*5c4a5fe1SAndy Fiddaman 
336*5c4a5fe1SAndy Fiddaman static void
build_vcpumaps(void)337*5c4a5fe1SAndy Fiddaman build_vcpumaps(void)
338*5c4a5fe1SAndy Fiddaman {
339*5c4a5fe1SAndy Fiddaman 	char key[16];
340*5c4a5fe1SAndy Fiddaman 	const char *value;
341*5c4a5fe1SAndy Fiddaman 	int vcpu;
342*5c4a5fe1SAndy Fiddaman 
343*5c4a5fe1SAndy Fiddaman 	vcpumap = calloc(guest_ncpus, sizeof(*vcpumap));
344*5c4a5fe1SAndy Fiddaman 	for (vcpu = 0; vcpu < guest_ncpus; vcpu++) {
345*5c4a5fe1SAndy Fiddaman 		snprintf(key, sizeof(key), "vcpu.%d.cpuset", vcpu);
346*5c4a5fe1SAndy Fiddaman 		value = get_config_value(key);
347*5c4a5fe1SAndy Fiddaman 		if (value == NULL)
348*5c4a5fe1SAndy Fiddaman 			continue;
349*5c4a5fe1SAndy Fiddaman 		vcpumap[vcpu] = malloc(sizeof(cpuset_t));
350*5c4a5fe1SAndy Fiddaman 		if (vcpumap[vcpu] == NULL)
351*5c4a5fe1SAndy Fiddaman 			err(4, "Failed to allocate cpuset for vcpu %d", vcpu);
352*5c4a5fe1SAndy Fiddaman 		parse_cpuset(vcpu, value, vcpumap[vcpu]);
353*5c4a5fe1SAndy Fiddaman 	}
354*5c4a5fe1SAndy Fiddaman }
355*5c4a5fe1SAndy Fiddaman #endif /* __FreeBSD__ */
356*5c4a5fe1SAndy Fiddaman 
357*5c4a5fe1SAndy Fiddaman void *
paddr_guest2host(struct vmctx * ctx,uintptr_t gaddr,size_t len)358*5c4a5fe1SAndy Fiddaman paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
359*5c4a5fe1SAndy Fiddaman {
360*5c4a5fe1SAndy Fiddaman 
361*5c4a5fe1SAndy Fiddaman 	return (vm_map_gpa(ctx, gaddr, len));
362*5c4a5fe1SAndy Fiddaman }
363*5c4a5fe1SAndy Fiddaman 
364*5c4a5fe1SAndy Fiddaman int
fbsdrun_virtio_msix(void)365*5c4a5fe1SAndy Fiddaman fbsdrun_virtio_msix(void)
366*5c4a5fe1SAndy Fiddaman {
367*5c4a5fe1SAndy Fiddaman 
368*5c4a5fe1SAndy Fiddaman 	return (get_config_bool_default("virtio_msix", true));
369*5c4a5fe1SAndy Fiddaman }
370*5c4a5fe1SAndy Fiddaman 
371*5c4a5fe1SAndy Fiddaman struct vcpu *
fbsdrun_vcpu(int vcpuid)372*5c4a5fe1SAndy Fiddaman fbsdrun_vcpu(int vcpuid)
373*5c4a5fe1SAndy Fiddaman {
374*5c4a5fe1SAndy Fiddaman 	return (vcpu_info[vcpuid].vcpu);
375*5c4a5fe1SAndy Fiddaman }
376*5c4a5fe1SAndy Fiddaman 
377*5c4a5fe1SAndy Fiddaman static void *
fbsdrun_start_thread(void * param)378*5c4a5fe1SAndy Fiddaman fbsdrun_start_thread(void *param)
379*5c4a5fe1SAndy Fiddaman {
380*5c4a5fe1SAndy Fiddaman 	char tname[MAXCOMLEN + 1];
381*5c4a5fe1SAndy Fiddaman 	struct vcpu_info *vi = param;
382*5c4a5fe1SAndy Fiddaman #ifdef	__FreeBSD__
383*5c4a5fe1SAndy Fiddaman 	int error;
384*5c4a5fe1SAndy Fiddaman #endif
385*5c4a5fe1SAndy Fiddaman 
386*5c4a5fe1SAndy Fiddaman 	snprintf(tname, sizeof(tname), "vcpu %d", vi->vcpuid);
387*5c4a5fe1SAndy Fiddaman 	pthread_set_name_np(pthread_self(), tname);
388*5c4a5fe1SAndy Fiddaman 
389*5c4a5fe1SAndy Fiddaman #ifdef	__FreeBSD__
390*5c4a5fe1SAndy Fiddaman 	if (vcpumap[vi->vcpuid] != NULL) {
391*5c4a5fe1SAndy Fiddaman 		error = pthread_setaffinity_np(pthread_self(),
392*5c4a5fe1SAndy Fiddaman 		    sizeof(cpuset_t), vcpumap[vi->vcpuid]);
393*5c4a5fe1SAndy Fiddaman 		assert(error == 0);
394*5c4a5fe1SAndy Fiddaman 	}
395*5c4a5fe1SAndy Fiddaman #endif
396*5c4a5fe1SAndy Fiddaman 
397*5c4a5fe1SAndy Fiddaman 	gdb_cpu_add(vi->vcpu);
398*5c4a5fe1SAndy Fiddaman 
399*5c4a5fe1SAndy Fiddaman 	vm_loop(vi->ctx, vi->vcpu);
400*5c4a5fe1SAndy Fiddaman 
401*5c4a5fe1SAndy Fiddaman 	/* not reached */
402*5c4a5fe1SAndy Fiddaman 	exit(1);
403*5c4a5fe1SAndy Fiddaman 	return (NULL);
404*5c4a5fe1SAndy Fiddaman }
405*5c4a5fe1SAndy Fiddaman 
406*5c4a5fe1SAndy Fiddaman void
fbsdrun_addcpu(int vcpuid,bool suspend)407*5c4a5fe1SAndy Fiddaman fbsdrun_addcpu(int vcpuid, bool suspend)
408*5c4a5fe1SAndy Fiddaman {
409*5c4a5fe1SAndy Fiddaman 	struct vcpu_info *vi;
410*5c4a5fe1SAndy Fiddaman 	pthread_t thr;
411*5c4a5fe1SAndy Fiddaman 	int error;
412*5c4a5fe1SAndy Fiddaman 
413*5c4a5fe1SAndy Fiddaman 	vi = &vcpu_info[vcpuid];
414*5c4a5fe1SAndy Fiddaman 
415*5c4a5fe1SAndy Fiddaman 	error = vm_activate_cpu(vi->vcpu);
416*5c4a5fe1SAndy Fiddaman 	if (error != 0)
417*5c4a5fe1SAndy Fiddaman 		err(EX_OSERR, "could not activate CPU %d", vi->vcpuid);
418*5c4a5fe1SAndy Fiddaman 
419*5c4a5fe1SAndy Fiddaman 	CPU_SET_ATOMIC(vcpuid, &cpumask);
420*5c4a5fe1SAndy Fiddaman 
421*5c4a5fe1SAndy Fiddaman 	if (suspend) {
422*5c4a5fe1SAndy Fiddaman 		error = vm_suspend_cpu(vi->vcpu);
423*5c4a5fe1SAndy Fiddaman 		assert(error == 0);
424*5c4a5fe1SAndy Fiddaman 	}
425*5c4a5fe1SAndy Fiddaman 
426*5c4a5fe1SAndy Fiddaman 	error = pthread_create(&thr, NULL, fbsdrun_start_thread, vi);
427*5c4a5fe1SAndy Fiddaman 	assert(error == 0);
428*5c4a5fe1SAndy Fiddaman }
429*5c4a5fe1SAndy Fiddaman 
430*5c4a5fe1SAndy Fiddaman void
fbsdrun_deletecpu(int vcpu)431*5c4a5fe1SAndy Fiddaman fbsdrun_deletecpu(int vcpu)
432*5c4a5fe1SAndy Fiddaman {
433*5c4a5fe1SAndy Fiddaman 	static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
434*5c4a5fe1SAndy Fiddaman 	static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
435*5c4a5fe1SAndy Fiddaman 
436*5c4a5fe1SAndy Fiddaman 	pthread_mutex_lock(&resetcpu_mtx);
437*5c4a5fe1SAndy Fiddaman 	if (!CPU_ISSET(vcpu, &cpumask)) {
438*5c4a5fe1SAndy Fiddaman 		EPRINTLN("Attempting to delete unknown cpu %d", vcpu);
439*5c4a5fe1SAndy Fiddaman 		exit(4);
440*5c4a5fe1SAndy Fiddaman 	}
441*5c4a5fe1SAndy Fiddaman 
442*5c4a5fe1SAndy Fiddaman 	CPU_CLR(vcpu, &cpumask);
443*5c4a5fe1SAndy Fiddaman 
444*5c4a5fe1SAndy Fiddaman 	if (vcpu != BSP) {
445*5c4a5fe1SAndy Fiddaman 		pthread_cond_signal(&resetcpu_cond);
446*5c4a5fe1SAndy Fiddaman 		pthread_mutex_unlock(&resetcpu_mtx);
447*5c4a5fe1SAndy Fiddaman 		pthread_exit(NULL);
448*5c4a5fe1SAndy Fiddaman 		/* NOTREACHED */
449*5c4a5fe1SAndy Fiddaman 	}
450*5c4a5fe1SAndy Fiddaman 
451*5c4a5fe1SAndy Fiddaman 	while (!CPU_EMPTY(&cpumask)) {
452*5c4a5fe1SAndy Fiddaman 		pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
453*5c4a5fe1SAndy Fiddaman 	}
454*5c4a5fe1SAndy Fiddaman 	pthread_mutex_unlock(&resetcpu_mtx);
455*5c4a5fe1SAndy Fiddaman }
456*5c4a5fe1SAndy Fiddaman 
457*5c4a5fe1SAndy Fiddaman static void
vm_loop(struct vmctx * ctx,struct vcpu * vcpu)458*5c4a5fe1SAndy Fiddaman vm_loop(struct vmctx *ctx, struct vcpu *vcpu)
459*5c4a5fe1SAndy Fiddaman {
460*5c4a5fe1SAndy Fiddaman 	struct vm_exit vme;
461*5c4a5fe1SAndy Fiddaman 	int error, rc;
462*5c4a5fe1SAndy Fiddaman 	enum vm_exitcode exitcode;
463*5c4a5fe1SAndy Fiddaman 	cpuset_t active_cpus;
464*5c4a5fe1SAndy Fiddaman 	struct vm_entry *ventry;
465*5c4a5fe1SAndy Fiddaman 
466*5c4a5fe1SAndy Fiddaman 	error = vm_active_cpus(ctx, &active_cpus);
467*5c4a5fe1SAndy Fiddaman 	assert(CPU_ISSET(vcpu_id(vcpu), &active_cpus));
468*5c4a5fe1SAndy Fiddaman 
469*5c4a5fe1SAndy Fiddaman 	ventry = vmentry_vcpu(vcpu_id(vcpu));
470*5c4a5fe1SAndy Fiddaman 
471*5c4a5fe1SAndy Fiddaman 	while (1) {
472*5c4a5fe1SAndy Fiddaman 		error = vm_run(vcpu, ventry, &vme);
473*5c4a5fe1SAndy Fiddaman 		if (error != 0)
474*5c4a5fe1SAndy Fiddaman 			break;
475*5c4a5fe1SAndy Fiddaman 
476*5c4a5fe1SAndy Fiddaman 		if (ventry->cmd != VEC_DEFAULT) {
477*5c4a5fe1SAndy Fiddaman 			/*
478*5c4a5fe1SAndy Fiddaman 			 * Discard any lingering entry state after it has been
479*5c4a5fe1SAndy Fiddaman 			 * submitted via vm_run().
480*5c4a5fe1SAndy Fiddaman 			 */
481*5c4a5fe1SAndy Fiddaman 			bzero(ventry, sizeof (*ventry));
482*5c4a5fe1SAndy Fiddaman 		}
483*5c4a5fe1SAndy Fiddaman 
484*5c4a5fe1SAndy Fiddaman 		exitcode = vme.exitcode;
485*5c4a5fe1SAndy Fiddaman 		if (exitcode >= VM_EXITCODE_MAX ||
486*5c4a5fe1SAndy Fiddaman 		    vmexit_handlers[exitcode] == NULL) {
487*5c4a5fe1SAndy Fiddaman 			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
488*5c4a5fe1SAndy Fiddaman 			    exitcode);
489*5c4a5fe1SAndy Fiddaman 			exit(4);
490*5c4a5fe1SAndy Fiddaman 		}
491*5c4a5fe1SAndy Fiddaman 
492*5c4a5fe1SAndy Fiddaman 		rc = (*vmexit_handlers[exitcode])(ctx, vcpu, &vme);
493*5c4a5fe1SAndy Fiddaman 
494*5c4a5fe1SAndy Fiddaman 		switch (rc) {
495*5c4a5fe1SAndy Fiddaman 		case VMEXIT_CONTINUE:
496*5c4a5fe1SAndy Fiddaman 			break;
497*5c4a5fe1SAndy Fiddaman 		case VMEXIT_ABORT:
498*5c4a5fe1SAndy Fiddaman 			abort();
499*5c4a5fe1SAndy Fiddaman 		default:
500*5c4a5fe1SAndy Fiddaman 			exit(4);
501*5c4a5fe1SAndy Fiddaman 		}
502*5c4a5fe1SAndy Fiddaman 	}
503*5c4a5fe1SAndy Fiddaman 	EPRINTLN("vm_run error %d, errno %d", error, errno);
504*5c4a5fe1SAndy Fiddaman }
505*5c4a5fe1SAndy Fiddaman 
506*5c4a5fe1SAndy Fiddaman static int
num_vcpus_allowed(struct vmctx * ctx,struct vcpu * vcpu)507*5c4a5fe1SAndy Fiddaman num_vcpus_allowed(struct vmctx *ctx, struct vcpu *vcpu)
508*5c4a5fe1SAndy Fiddaman {
509*5c4a5fe1SAndy Fiddaman 	uint16_t sockets, cores, threads, maxcpus;
510*5c4a5fe1SAndy Fiddaman #ifdef __FreeBSD__
511*5c4a5fe1SAndy Fiddaman 	int tmp, error;
512*5c4a5fe1SAndy Fiddaman 
513*5c4a5fe1SAndy Fiddaman 	/*
514*5c4a5fe1SAndy Fiddaman 	 * The guest is allowed to spinup more than one processor only if the
515*5c4a5fe1SAndy Fiddaman 	 * UNRESTRICTED_GUEST capability is available.
516*5c4a5fe1SAndy Fiddaman 	 */
517*5c4a5fe1SAndy Fiddaman 	error = vm_get_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, &tmp);
518*5c4a5fe1SAndy Fiddaman 	if (error != 0)
519*5c4a5fe1SAndy Fiddaman 		return (1);
520*5c4a5fe1SAndy Fiddaman #else
521*5c4a5fe1SAndy Fiddaman 	int error;
522*5c4a5fe1SAndy Fiddaman 	/* Unrestricted Guest is always enabled on illumos */
523*5c4a5fe1SAndy Fiddaman 
524*5c4a5fe1SAndy Fiddaman #endif /* __FreeBSD__ */
525*5c4a5fe1SAndy Fiddaman 
526*5c4a5fe1SAndy Fiddaman 	error = vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus);
527*5c4a5fe1SAndy Fiddaman 	if (error == 0)
528*5c4a5fe1SAndy Fiddaman 		return (maxcpus);
529*5c4a5fe1SAndy Fiddaman 	else
530*5c4a5fe1SAndy Fiddaman 		return (1);
531*5c4a5fe1SAndy Fiddaman }
532*5c4a5fe1SAndy Fiddaman 
533*5c4a5fe1SAndy Fiddaman static struct vmctx *
do_open(const char * vmname)534*5c4a5fe1SAndy Fiddaman do_open(const char *vmname)
535*5c4a5fe1SAndy Fiddaman {
536*5c4a5fe1SAndy Fiddaman 	struct vmctx *ctx;
537*5c4a5fe1SAndy Fiddaman 	int error;
538*5c4a5fe1SAndy Fiddaman 	bool reinit, romboot;
539*5c4a5fe1SAndy Fiddaman 
540*5c4a5fe1SAndy Fiddaman 	reinit = romboot = false;
541*5c4a5fe1SAndy Fiddaman 
542*5c4a5fe1SAndy Fiddaman 	romboot = bootrom_boot();
543*5c4a5fe1SAndy Fiddaman 
544*5c4a5fe1SAndy Fiddaman #ifndef __FreeBSD__
545*5c4a5fe1SAndy Fiddaman 	uint64_t create_flags = 0;
546*5c4a5fe1SAndy Fiddaman 	if (get_config_bool_default("memory.use_reservoir", false)) {
547*5c4a5fe1SAndy Fiddaman 		create_flags |= VCF_RESERVOIR_MEM;
548*5c4a5fe1SAndy Fiddaman 	}
549*5c4a5fe1SAndy Fiddaman 	error = vm_create(vmname, create_flags);
550*5c4a5fe1SAndy Fiddaman #else
551*5c4a5fe1SAndy Fiddaman 	error = vm_create(vmname);
552*5c4a5fe1SAndy Fiddaman #endif /* __FreeBSD__ */
553*5c4a5fe1SAndy Fiddaman 	if (error) {
554*5c4a5fe1SAndy Fiddaman 		if (errno == EEXIST) {
555*5c4a5fe1SAndy Fiddaman 			if (romboot) {
556*5c4a5fe1SAndy Fiddaman 				reinit = true;
557*5c4a5fe1SAndy Fiddaman 			} else {
558*5c4a5fe1SAndy Fiddaman 				/*
559*5c4a5fe1SAndy Fiddaman 				 * The virtual machine has been setup by the
560*5c4a5fe1SAndy Fiddaman 				 * userspace bootloader.
561*5c4a5fe1SAndy Fiddaman 				 */
562*5c4a5fe1SAndy Fiddaman 			}
563*5c4a5fe1SAndy Fiddaman 		} else {
564*5c4a5fe1SAndy Fiddaman 			perror("vm_create");
565*5c4a5fe1SAndy Fiddaman 			exit(4);
566*5c4a5fe1SAndy Fiddaman 		}
567*5c4a5fe1SAndy Fiddaman 	} else {
568*5c4a5fe1SAndy Fiddaman 		if (!romboot) {
569*5c4a5fe1SAndy Fiddaman 			/*
570*5c4a5fe1SAndy Fiddaman 			 * If the virtual machine was just created then a
571*5c4a5fe1SAndy Fiddaman 			 * bootrom must be configured to boot it.
572*5c4a5fe1SAndy Fiddaman 			 */
573*5c4a5fe1SAndy Fiddaman 			fprintf(stderr, "virtual machine cannot be booted\n");
574*5c4a5fe1SAndy Fiddaman 			exit(4);
575*5c4a5fe1SAndy Fiddaman 		}
576*5c4a5fe1SAndy Fiddaman 	}
577*5c4a5fe1SAndy Fiddaman 
578*5c4a5fe1SAndy Fiddaman 	ctx = vm_open(vmname);
579*5c4a5fe1SAndy Fiddaman 	if (ctx == NULL) {
580*5c4a5fe1SAndy Fiddaman 		perror("vm_open");
581*5c4a5fe1SAndy Fiddaman 		exit(4);
582*5c4a5fe1SAndy Fiddaman 	}
583*5c4a5fe1SAndy Fiddaman 
584*5c4a5fe1SAndy Fiddaman #ifndef WITHOUT_CAPSICUM
585*5c4a5fe1SAndy Fiddaman 	if (vm_limit_rights(ctx) != 0)
586*5c4a5fe1SAndy Fiddaman 		err(EX_OSERR, "vm_limit_rights");
587*5c4a5fe1SAndy Fiddaman #endif
588*5c4a5fe1SAndy Fiddaman 
589*5c4a5fe1SAndy Fiddaman 	if (reinit) {
590*5c4a5fe1SAndy Fiddaman #ifndef __FreeBSD__
591*5c4a5fe1SAndy Fiddaman 		error = vm_reinit(ctx, 0);
592*5c4a5fe1SAndy Fiddaman #else
593*5c4a5fe1SAndy Fiddaman 		error = vm_reinit(ctx);
594*5c4a5fe1SAndy Fiddaman #endif
595*5c4a5fe1SAndy Fiddaman 		if (error) {
596*5c4a5fe1SAndy Fiddaman 			perror("vm_reinit");
597*5c4a5fe1SAndy Fiddaman 			exit(4);
598*5c4a5fe1SAndy Fiddaman 		}
599*5c4a5fe1SAndy Fiddaman 	}
600*5c4a5fe1SAndy Fiddaman 	error = vm_set_topology(ctx, cpu_sockets, cpu_cores, cpu_threads, 0);
601*5c4a5fe1SAndy Fiddaman 	if (error)
602*5c4a5fe1SAndy Fiddaman 		errx(EX_OSERR, "vm_set_topology");
603*5c4a5fe1SAndy Fiddaman 	return (ctx);
604*5c4a5fe1SAndy Fiddaman }
605*5c4a5fe1SAndy Fiddaman 
606*5c4a5fe1SAndy Fiddaman bool
bhyve_parse_config_option(const char * option)607*5c4a5fe1SAndy Fiddaman bhyve_parse_config_option(const char *option)
608*5c4a5fe1SAndy Fiddaman {
609*5c4a5fe1SAndy Fiddaman 	const char *value;
610*5c4a5fe1SAndy Fiddaman 	char *path;
611*5c4a5fe1SAndy Fiddaman 
612*5c4a5fe1SAndy Fiddaman 	value = strchr(option, '=');
613*5c4a5fe1SAndy Fiddaman 	if (value == NULL || value[1] == '\0')
614*5c4a5fe1SAndy Fiddaman 		return (false);
615*5c4a5fe1SAndy Fiddaman 	path = strndup(option, value - option);
616*5c4a5fe1SAndy Fiddaman 	if (path == NULL)
617*5c4a5fe1SAndy Fiddaman 		err(4, "Failed to allocate memory");
618*5c4a5fe1SAndy Fiddaman 	set_config_value(path, value + 1);
619*5c4a5fe1SAndy Fiddaman 	free(path);
620*5c4a5fe1SAndy Fiddaman 	return (true);
621*5c4a5fe1SAndy Fiddaman }
622*5c4a5fe1SAndy Fiddaman 
623*5c4a5fe1SAndy Fiddaman void
bhyve_parse_simple_config_file(const char * path)624*5c4a5fe1SAndy Fiddaman bhyve_parse_simple_config_file(const char *path)
625*5c4a5fe1SAndy Fiddaman {
626*5c4a5fe1SAndy Fiddaman 	FILE *fp;
627*5c4a5fe1SAndy Fiddaman 	char *line, *cp;
628*5c4a5fe1SAndy Fiddaman 	size_t linecap;
629*5c4a5fe1SAndy Fiddaman 	unsigned int lineno;
630*5c4a5fe1SAndy Fiddaman 
631*5c4a5fe1SAndy Fiddaman 	fp = fopen(path, "r");
632*5c4a5fe1SAndy Fiddaman 	if (fp == NULL)
633*5c4a5fe1SAndy Fiddaman 		err(4, "Failed to open configuration file %s", path);
634*5c4a5fe1SAndy Fiddaman 	line = NULL;
635*5c4a5fe1SAndy Fiddaman 	linecap = 0;
636*5c4a5fe1SAndy Fiddaman 	lineno = 1;
637*5c4a5fe1SAndy Fiddaman 	for (lineno = 1; getline(&line, &linecap, fp) > 0; lineno++) {
638*5c4a5fe1SAndy Fiddaman 		if (*line == '#' || *line == '\n')
639*5c4a5fe1SAndy Fiddaman 			continue;
640*5c4a5fe1SAndy Fiddaman 		cp = strchr(line, '\n');
641*5c4a5fe1SAndy Fiddaman 		if (cp != NULL)
642*5c4a5fe1SAndy Fiddaman 			*cp = '\0';
643*5c4a5fe1SAndy Fiddaman 		if (!bhyve_parse_config_option(line))
644*5c4a5fe1SAndy Fiddaman 			errx(4, "%s line %u: invalid config option '%s'", path,
645*5c4a5fe1SAndy Fiddaman 			    lineno, line);
646*5c4a5fe1SAndy Fiddaman 	}
647*5c4a5fe1SAndy Fiddaman 	free(line);
648*5c4a5fe1SAndy Fiddaman 	fclose(fp);
649*5c4a5fe1SAndy Fiddaman }
650*5c4a5fe1SAndy Fiddaman 
651*5c4a5fe1SAndy Fiddaman void
bhyve_parse_gdb_options(const char * opt)652*5c4a5fe1SAndy Fiddaman bhyve_parse_gdb_options(const char *opt)
653*5c4a5fe1SAndy Fiddaman {
654*5c4a5fe1SAndy Fiddaman 	const char *sport;
655*5c4a5fe1SAndy Fiddaman 	char *colon;
656*5c4a5fe1SAndy Fiddaman 
657*5c4a5fe1SAndy Fiddaman 	if (opt[0] == 'w') {
658*5c4a5fe1SAndy Fiddaman 		set_config_bool("gdb.wait", true);
659*5c4a5fe1SAndy Fiddaman 		opt++;
660*5c4a5fe1SAndy Fiddaman 	}
661*5c4a5fe1SAndy Fiddaman 
662*5c4a5fe1SAndy Fiddaman 	colon = strrchr(opt, ':');
663*5c4a5fe1SAndy Fiddaman 	if (colon == NULL) {
664*5c4a5fe1SAndy Fiddaman 		sport = opt;
665*5c4a5fe1SAndy Fiddaman 	} else {
666*5c4a5fe1SAndy Fiddaman 		*colon = '\0';
667*5c4a5fe1SAndy Fiddaman 		colon++;
668*5c4a5fe1SAndy Fiddaman 		sport = colon;
669*5c4a5fe1SAndy Fiddaman 		set_config_value("gdb.address", opt);
670*5c4a5fe1SAndy Fiddaman 	}
671*5c4a5fe1SAndy Fiddaman 
672*5c4a5fe1SAndy Fiddaman 	set_config_value("gdb.port", sport);
673*5c4a5fe1SAndy Fiddaman }
674*5c4a5fe1SAndy Fiddaman 
675*5c4a5fe1SAndy Fiddaman int
main(int argc,char * argv[])676*5c4a5fe1SAndy Fiddaman main(int argc, char *argv[])
677*5c4a5fe1SAndy Fiddaman {
678*5c4a5fe1SAndy Fiddaman 	int error;
679*5c4a5fe1SAndy Fiddaman 	int max_vcpus, memflags;
680*5c4a5fe1SAndy Fiddaman 	struct vcpu *bsp;
681*5c4a5fe1SAndy Fiddaman 	struct vmctx *ctx;
682*5c4a5fe1SAndy Fiddaman 	size_t memsize;
683*5c4a5fe1SAndy Fiddaman 	const char *value, *vmname;
684*5c4a5fe1SAndy Fiddaman 
685*5c4a5fe1SAndy Fiddaman 	bhyve_init_config();
686*5c4a5fe1SAndy Fiddaman 
687*5c4a5fe1SAndy Fiddaman 	bhyve_optparse(argc, argv);
688*5c4a5fe1SAndy Fiddaman 	argc -= optind;
689*5c4a5fe1SAndy Fiddaman 	argv += optind;
690*5c4a5fe1SAndy Fiddaman 
691*5c4a5fe1SAndy Fiddaman 	if (argc > 1)
692*5c4a5fe1SAndy Fiddaman 		bhyve_usage(1);
693*5c4a5fe1SAndy Fiddaman 
694*5c4a5fe1SAndy Fiddaman 	if (argc == 1)
695*5c4a5fe1SAndy Fiddaman 		set_config_value("name", argv[0]);
696*5c4a5fe1SAndy Fiddaman 
697*5c4a5fe1SAndy Fiddaman 	vmname = get_config_value("name");
698*5c4a5fe1SAndy Fiddaman 	if (vmname == NULL)
699*5c4a5fe1SAndy Fiddaman 		bhyve_usage(1);
700*5c4a5fe1SAndy Fiddaman 
701*5c4a5fe1SAndy Fiddaman 	if (get_config_bool_default("config.dump", false)) {
702*5c4a5fe1SAndy Fiddaman 		dump_config();
703*5c4a5fe1SAndy Fiddaman 		exit(1);
704*5c4a5fe1SAndy Fiddaman 	}
705*5c4a5fe1SAndy Fiddaman 
706*5c4a5fe1SAndy Fiddaman #ifndef __FreeBSD__
707*5c4a5fe1SAndy Fiddaman 	illumos_priv_init();
708*5c4a5fe1SAndy Fiddaman #endif
709*5c4a5fe1SAndy Fiddaman 
710*5c4a5fe1SAndy Fiddaman 	calc_topology();
711*5c4a5fe1SAndy Fiddaman 
712*5c4a5fe1SAndy Fiddaman #ifdef __FreeBSD__
713*5c4a5fe1SAndy Fiddaman 	build_vcpumaps();
714*5c4a5fe1SAndy Fiddaman #endif
715*5c4a5fe1SAndy Fiddaman 
716*5c4a5fe1SAndy Fiddaman 	value = get_config_value("memory.size");
717*5c4a5fe1SAndy Fiddaman 	error = vm_parse_memsize(value, &memsize);
718*5c4a5fe1SAndy Fiddaman 	if (error)
719*5c4a5fe1SAndy Fiddaman 		errx(EX_USAGE, "invalid memsize '%s'", value);
720*5c4a5fe1SAndy Fiddaman 
721*5c4a5fe1SAndy Fiddaman 	ctx = do_open(vmname);
722*5c4a5fe1SAndy Fiddaman 
723*5c4a5fe1SAndy Fiddaman 	bsp = vm_vcpu_open(ctx, BSP);
724*5c4a5fe1SAndy Fiddaman 	max_vcpus = num_vcpus_allowed(ctx, bsp);
725*5c4a5fe1SAndy Fiddaman 	if (guest_ncpus > max_vcpus) {
726*5c4a5fe1SAndy Fiddaman 		fprintf(stderr, "%d vCPUs requested but only %d available\n",
727*5c4a5fe1SAndy Fiddaman 			guest_ncpus, max_vcpus);
728*5c4a5fe1SAndy Fiddaman 		exit(4);
729*5c4a5fe1SAndy Fiddaman 	}
730*5c4a5fe1SAndy Fiddaman 
731*5c4a5fe1SAndy Fiddaman 	bhyve_init_vcpu(bsp);
732*5c4a5fe1SAndy Fiddaman 
733*5c4a5fe1SAndy Fiddaman        /* Allocate per-VCPU resources. */
734*5c4a5fe1SAndy Fiddaman 	vcpu_info = calloc(guest_ncpus, sizeof(*vcpu_info));
735*5c4a5fe1SAndy Fiddaman 	for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) {
736*5c4a5fe1SAndy Fiddaman 		vcpu_info[vcpuid].ctx = ctx;
737*5c4a5fe1SAndy Fiddaman 		vcpu_info[vcpuid].vcpuid = vcpuid;
738*5c4a5fe1SAndy Fiddaman 		if (vcpuid == BSP)
739*5c4a5fe1SAndy Fiddaman 			vcpu_info[vcpuid].vcpu = bsp;
740*5c4a5fe1SAndy Fiddaman 		else
741*5c4a5fe1SAndy Fiddaman 			vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid);
742*5c4a5fe1SAndy Fiddaman 	}
743*5c4a5fe1SAndy Fiddaman 
744*5c4a5fe1SAndy Fiddaman 	memflags = 0;
745*5c4a5fe1SAndy Fiddaman 	if (get_config_bool_default("memory.wired", false))
746*5c4a5fe1SAndy Fiddaman 		memflags |= VM_MEM_F_WIRED;
747*5c4a5fe1SAndy Fiddaman 	if (get_config_bool_default("memory.guest_in_core", false))
748*5c4a5fe1SAndy Fiddaman 		memflags |= VM_MEM_F_INCORE;
749*5c4a5fe1SAndy Fiddaman 	vm_set_memflags(ctx, memflags);
750*5c4a5fe1SAndy Fiddaman #ifdef	__FreeBSD__
751*5c4a5fe1SAndy Fiddaman 	error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
752*5c4a5fe1SAndy Fiddaman #else
753*5c4a5fe1SAndy Fiddaman 	int _errno;
754*5c4a5fe1SAndy Fiddaman 	do {
755*5c4a5fe1SAndy Fiddaman 		errno = 0;
756*5c4a5fe1SAndy Fiddaman 		error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
757*5c4a5fe1SAndy Fiddaman 		_errno = errno;
758*5c4a5fe1SAndy Fiddaman 		if (error != 0 && _errno == ENOMEM) {
759*5c4a5fe1SAndy Fiddaman 			(void) fprintf(stderr, "Unable to allocate memory "
760*5c4a5fe1SAndy Fiddaman 			    "(%llu), retrying in 1 second\n", memsize);
761*5c4a5fe1SAndy Fiddaman 			sleep(1);
762*5c4a5fe1SAndy Fiddaman 		}
763*5c4a5fe1SAndy Fiddaman 	} while (_errno == ENOMEM);
764*5c4a5fe1SAndy Fiddaman #endif
765*5c4a5fe1SAndy Fiddaman 	if (error) {
766*5c4a5fe1SAndy Fiddaman 		fprintf(stderr, "Unable to set up memory (%d)\n", errno);
767*5c4a5fe1SAndy Fiddaman 		exit(4);
768*5c4a5fe1SAndy Fiddaman 	}
769*5c4a5fe1SAndy Fiddaman 
770*5c4a5fe1SAndy Fiddaman 	init_mem(guest_ncpus);
771*5c4a5fe1SAndy Fiddaman 	init_bootrom(ctx);
772*5c4a5fe1SAndy Fiddaman 
773*5c4a5fe1SAndy Fiddaman 	if (bhyve_init_platform(ctx, bsp) != 0)
774*5c4a5fe1SAndy Fiddaman 		exit(4);
775*5c4a5fe1SAndy Fiddaman 
776*5c4a5fe1SAndy Fiddaman 	if (qemu_fwcfg_init(ctx) != 0) {
777*5c4a5fe1SAndy Fiddaman 		fprintf(stderr, "qemu fwcfg initialization error\n");
778*5c4a5fe1SAndy Fiddaman 		exit(4);
779*5c4a5fe1SAndy Fiddaman 	}
780*5c4a5fe1SAndy Fiddaman 
781*5c4a5fe1SAndy Fiddaman 	if (qemu_fwcfg_add_file("opt/bhyve/hw.ncpu", sizeof(guest_ncpus),
782*5c4a5fe1SAndy Fiddaman 	    &guest_ncpus) != 0) {
783*5c4a5fe1SAndy Fiddaman 		fprintf(stderr, "Could not add qemu fwcfg opt/bhyve/hw.ncpu\n");
784*5c4a5fe1SAndy Fiddaman 		exit(4);
785*5c4a5fe1SAndy Fiddaman 	}
786*5c4a5fe1SAndy Fiddaman 
787*5c4a5fe1SAndy Fiddaman 	/*
788*5c4a5fe1SAndy Fiddaman 	 * Exit if a device emulation finds an error in its initialization
789*5c4a5fe1SAndy Fiddaman 	 */
790*5c4a5fe1SAndy Fiddaman 	if (init_pci(ctx) != 0) {
791*5c4a5fe1SAndy Fiddaman 		EPRINTLN("Device emulation initialization error: %s",
792*5c4a5fe1SAndy Fiddaman 		    strerror(errno));
793*5c4a5fe1SAndy Fiddaman 		exit(4);
794*5c4a5fe1SAndy Fiddaman 	}
795*5c4a5fe1SAndy Fiddaman 	if (init_tpm(ctx) != 0) {
796*5c4a5fe1SAndy Fiddaman 		EPRINTLN("Failed to init TPM device");
797*5c4a5fe1SAndy Fiddaman 		exit(4);
798*5c4a5fe1SAndy Fiddaman 	}
799*5c4a5fe1SAndy Fiddaman 
800*5c4a5fe1SAndy Fiddaman 	/*
801*5c4a5fe1SAndy Fiddaman 	 * Initialize after PCI, to allow a bootrom file to reserve the high
802*5c4a5fe1SAndy Fiddaman 	 * region.
803*5c4a5fe1SAndy Fiddaman 	 */
804*5c4a5fe1SAndy Fiddaman 	if (get_config_bool("acpi_tables"))
805*5c4a5fe1SAndy Fiddaman 		vmgenc_init(ctx);
806*5c4a5fe1SAndy Fiddaman 
807*5c4a5fe1SAndy Fiddaman #ifdef __FreeBSD__
808*5c4a5fe1SAndy Fiddaman 	init_gdb(ctx);
809*5c4a5fe1SAndy Fiddaman #else
810*5c4a5fe1SAndy Fiddaman 	if (value != NULL) {
811*5c4a5fe1SAndy Fiddaman 		int port = atoi(value);
812*5c4a5fe1SAndy Fiddaman 
813*5c4a5fe1SAndy Fiddaman 		if (port < 0)
814*5c4a5fe1SAndy Fiddaman 			init_mdb(ctx);
815*5c4a5fe1SAndy Fiddaman 		else
816*5c4a5fe1SAndy Fiddaman 			init_gdb(ctx);
817*5c4a5fe1SAndy Fiddaman 	}
818*5c4a5fe1SAndy Fiddaman #endif
819*5c4a5fe1SAndy Fiddaman 
820*5c4a5fe1SAndy Fiddaman 	if (bootrom_boot()) {
821*5c4a5fe1SAndy Fiddaman #ifdef __FreeBSD__
822*5c4a5fe1SAndy Fiddaman 		if (vm_set_capability(bsp, VM_CAP_UNRESTRICTED_GUEST, 1)) {
823*5c4a5fe1SAndy Fiddaman 			fprintf(stderr, "ROM boot failed: unrestricted guest "
824*5c4a5fe1SAndy Fiddaman 			    "capability not available\n");
825*5c4a5fe1SAndy Fiddaman 			exit(4);
826*5c4a5fe1SAndy Fiddaman 		}
827*5c4a5fe1SAndy Fiddaman #else
828*5c4a5fe1SAndy Fiddaman 		/* Unrestricted Guest is always enabled on illumos */
829*5c4a5fe1SAndy Fiddaman #endif
830*5c4a5fe1SAndy Fiddaman 		error = vcpu_reset(bsp);
831*5c4a5fe1SAndy Fiddaman 		assert(error == 0);
832*5c4a5fe1SAndy Fiddaman 	}
833*5c4a5fe1SAndy Fiddaman 
834*5c4a5fe1SAndy Fiddaman 	if (bhyve_init_platform_late(ctx, bsp) != 0)
835*5c4a5fe1SAndy Fiddaman 		exit(4);
836*5c4a5fe1SAndy Fiddaman 
837*5c4a5fe1SAndy Fiddaman 	/*
838*5c4a5fe1SAndy Fiddaman 	 * Change the proc title to include the VM name.
839*5c4a5fe1SAndy Fiddaman 	 */
840*5c4a5fe1SAndy Fiddaman 	setproctitle("%s", vmname);
841*5c4a5fe1SAndy Fiddaman 
842*5c4a5fe1SAndy Fiddaman #ifndef WITHOUT_CAPSICUM
843*5c4a5fe1SAndy Fiddaman 	caph_cache_catpages();
844*5c4a5fe1SAndy Fiddaman 
845*5c4a5fe1SAndy Fiddaman 	if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
846*5c4a5fe1SAndy Fiddaman 		errx(EX_OSERR, "Unable to apply rights for sandbox");
847*5c4a5fe1SAndy Fiddaman 
848*5c4a5fe1SAndy Fiddaman 	if (caph_enter() == -1)
849*5c4a5fe1SAndy Fiddaman 		errx(EX_OSERR, "cap_enter() failed");
850*5c4a5fe1SAndy Fiddaman #endif
851*5c4a5fe1SAndy Fiddaman 
852*5c4a5fe1SAndy Fiddaman #ifndef __FreeBSD__
853*5c4a5fe1SAndy Fiddaman 	illumos_priv_lock();
854*5c4a5fe1SAndy Fiddaman #endif
855*5c4a5fe1SAndy Fiddaman 
856*5c4a5fe1SAndy Fiddaman #ifndef	__FreeBSD__
857*5c4a5fe1SAndy Fiddaman 	if (vmentry_init(guest_ncpus) != 0)
858*5c4a5fe1SAndy Fiddaman 		err(EX_OSERR, "vmentry_init() failed");
859*5c4a5fe1SAndy Fiddaman #endif
860*5c4a5fe1SAndy Fiddaman 
861*5c4a5fe1SAndy Fiddaman 	/*
862*5c4a5fe1SAndy Fiddaman 	 * Add all vCPUs.
863*5c4a5fe1SAndy Fiddaman 	 */
864*5c4a5fe1SAndy Fiddaman 	for (int vcpuid = 0; vcpuid < guest_ncpus; vcpuid++) {
865*5c4a5fe1SAndy Fiddaman #ifdef	__FreeBSD__
866*5c4a5fe1SAndy Fiddaman 		bool suspend = (vcpuid != BSP);
867*5c4a5fe1SAndy Fiddaman #else
868*5c4a5fe1SAndy Fiddaman 		bool suspend = vcpuid == BSP &&
869*5c4a5fe1SAndy Fiddaman 		    get_config_bool_default("suspend_at_boot", false);
870*5c4a5fe1SAndy Fiddaman #endif
871*5c4a5fe1SAndy Fiddaman 		bhyve_start_vcpu(vcpu_info[vcpuid].vcpu, vcpuid == BSP,
872*5c4a5fe1SAndy Fiddaman 		    suspend);
873*5c4a5fe1SAndy Fiddaman 	}
874*5c4a5fe1SAndy Fiddaman 
875*5c4a5fe1SAndy Fiddaman 	/*
876*5c4a5fe1SAndy Fiddaman 	 * Head off to the main event dispatch loop
877*5c4a5fe1SAndy Fiddaman 	 */
878*5c4a5fe1SAndy Fiddaman 	mevent_dispatch();
879*5c4a5fe1SAndy Fiddaman 
880*5c4a5fe1SAndy Fiddaman 	exit(4);
881*5c4a5fe1SAndy Fiddaman }
882