xref: /illumos-gate/usr/src/cmd/bhyve/amd64/bhyverun_machdep.c (revision 3fe455549728ac525df3be56130ad8e075d645d7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * This file and its contents are supplied under the terms of the
30  * Common Development and Distribution License ("CDDL"), version 1.0.
31  * You may only use this file in accordance with the terms of version
32  * 1.0 of the CDDL.
33  *
34  * A full copy of the text of the CDDL should have accompanied this
35  * source.  A copy of the CDDL is also available via the Internet at
36  * http://www.illumos.org/license/CDDL.
37  *
38  * Copyright 2015 Pluribus Networks Inc.
39  * Copyright 2018 Joyent, Inc.
40  * Copyright 2022 Oxide Computer Company
41  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
42  */
43 
44 #include <sys/types.h>
45 #include <machine/vmm.h>
46 
47 #include <assert.h>
48 #include <err.h>
49 #include <stdbool.h>
50 #include <stdlib.h>
51 #include <sysexits.h>
52 #include <sys/types.h>
53 #include <sys/vmm.h>
54 #include <vmmapi.h>
55 
56 #include "bhyverun.h"
57 #include "bootrom.h"
58 #include "acpi.h"
59 #include "atkbdc.h"
60 #include "config.h"
61 #include "debug.h"
62 #include "e820.h"
63 #include "fwctl.h"
64 #include "ioapic.h"
65 #include "inout.h"
66 #ifndef	__FreeBSD__
67 #include "kernemu_dev.h"
68 #endif
69 #include "mptbl.h"
70 #include "pci_emul.h"
71 #include "pci_irq.h"
72 #include "spinup_ap.h"
73 #include "pci_lpc.h"
74 #include "rtc.h"
75 #include "smbiostbl.h"
76 #include "xmsr.h"
77 
78 void
79 bhyve_usage(int code)
80 {
81 	const char *progname = getprogname();
82 
83 	fprintf(stderr,
84 #ifdef	__FreeBSD__
85 		"Usage: %s [-AaCDeHhPSuWwxY]\n"
86 #else
87 		"Usage: %s [-aCDdeHhPSuWwxY]\n"
88 #endif
89 		"       %2$.*3$s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
90 #ifdef	__FreeBSD__
91 		"       %2$.*3$s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n"
92 		"       %2$.*3$s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n"
93 
94 		"       -A: create ACPI tables\n"
95 #else
96 		"       %2$.*3$s [-k <config_file>] [-l <lpc>] [-m mem] [-o <var>=<value>]\n"
97 		"       %2$.*3$s [-s <pci>] [-U uuid] vmname\n"
98 #endif
99 		"       -a: local apic is in xAPIC mode (deprecated)\n"
100 #ifndef __FreeBSD__
101 		"       -B type,key=value,...: set SMBIOS information\n"
102 #endif
103 		"       -C: include guest memory in core file\n"
104 		"       -c: number of CPUs and/or topology specification\n"
105 		"       -D: destroy on power-off\n"
106 #ifndef __FreeBSD__
107 		"       -d: suspend cpu at boot\n"
108 #endif
109 		"       -e: exit on unhandled I/O access\n"
110 #ifdef	__FreeBSD__
111 		"       -G: start a debug server\n"
112 #endif
113 		"       -H: vmexit from the guest on HLT\n"
114 		"       -h: help\n"
115 		"       -k: key=value flat config file\n"
116 		"       -K: PS2 keyboard layout\n"
117 		"       -l: LPC device configuration\n"
118 		"       -m: memory size\n"
119 		"       -o: set config 'var' to 'value'\n"
120 		"       -P: vmexit from the guest on pause\n"
121 #ifdef	__FreeBSD__
122 		"       -p: pin 'vcpu' to 'hostcpu'\n"
123 #endif
124 		"       -S: guest memory cannot be swapped\n"
125 		"       -s: <slot,driver,configinfo> PCI slot config\n"
126 		"       -U: UUID\n"
127 		"       -u: RTC keeps UTC time\n"
128 		"       -W: force virtio to use single-vector MSI\n"
129 		"       -w: ignore unimplemented MSRs\n"
130 		"       -x: local APIC is in x2APIC mode\n"
131 		"       -Y: disable MPtable generation\n",
132 		progname, "", (int)strlen(progname));
133 
134 	exit(code);
135 }
136 
137 void
138 bhyve_optparse(int argc, char **argv)
139 {
140 	const char *optstr;
141 	int c;
142 
143 #ifdef	__FreeBSD__
144 	optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:";
145 #else
146 	/* +d, +B, -p */
147 	optstr = "adehuwxACDHIPSWYk:f:o:G:c:s:m:l:B:K:U:";
148 #endif
149 	while ((c = getopt(argc, argv, optstr)) != -1) {
150 		switch (c) {
151 		case 'a':
152 			set_config_bool("x86.x2apic", false);
153 			break;
154 		case 'A':
155 			set_config_bool("acpi_tables", true);
156 			break;
157 		case 'D':
158 			set_config_bool("destroy_on_poweroff", true);
159 			break;
160 #ifndef	__FreeBSD__
161 		case 'B':
162 			if (smbios_parse(optarg) != 0) {
163 				errx(EX_USAGE, "invalid SMBIOS "
164 				    "configuration '%s'", optarg);
165 			}
166 			break;
167 		case 'd':
168 			set_config_bool("suspend_at_boot", true);
169 			break;
170 #endif
171 #ifdef	__FreeBSD__
172 		case 'p':
173 			if (pincpu_parse(optarg) != 0) {
174 				errx(EX_USAGE, "invalid vcpu pinning "
175 				    "configuration '%s'", optarg);
176 			}
177 			break;
178 #endif
179 		case 'c':
180 			if (bhyve_topology_parse(optarg) != 0) {
181 			    errx(EX_USAGE, "invalid cpu topology "
182 				"'%s'", optarg);
183 			}
184 			break;
185 		case 'C':
186 			set_config_bool("memory.guest_in_core", true);
187 			break;
188 		case 'f':
189 			if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) {
190 			    errx(EX_USAGE, "invalid fwcfg item '%s'", optarg);
191 			}
192 			break;
193 		case 'G':
194 			bhyve_parse_gdb_options(optarg);
195 			break;
196 		case 'k':
197 			bhyve_parse_simple_config_file(optarg);
198 			break;
199 		case 'K':
200 			set_config_value("keyboard.layout", optarg);
201 			break;
202 		case 'l':
203 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
204 				lpc_print_supported_devices();
205 				exit(0);
206 			} else if (lpc_device_parse(optarg) != 0) {
207 				errx(EX_USAGE, "invalid lpc device "
208 				    "configuration '%s'", optarg);
209 			}
210 			break;
211 		case 's':
212 			if (strncmp(optarg, "help", strlen(optarg)) == 0) {
213 				pci_print_supported_devices();
214 				exit(0);
215 			} else if (pci_parse_slot(optarg) != 0)
216 				exit(4);
217 			else
218 				break;
219 		case 'S':
220 			set_config_bool("memory.wired", true);
221 			break;
222 		case 'm':
223 			set_config_value("memory.size", optarg);
224 			break;
225 		case 'o':
226 			if (!bhyve_parse_config_option(optarg))
227 				errx(EX_USAGE, "invalid configuration option '%s'", optarg);
228 			break;
229 		case 'H':
230 			set_config_bool("x86.vmexit_on_hlt", true);
231 			break;
232 		case 'I':
233 			/*
234 			 * The "-I" option was used to add an ioapic to the
235 			 * virtual machine.
236 			 *
237 			 * An ioapic is now provided unconditionally for each
238 			 * virtual machine and this option is now deprecated.
239 			 */
240 			break;
241 		case 'P':
242 			set_config_bool("x86.vmexit_on_pause", true);
243 			break;
244 		case 'e':
245 			set_config_bool("x86.strictio", true);
246 			break;
247 		case 'u':
248 			set_config_bool("rtc.use_localtime", false);
249 			break;
250 		case 'U':
251 			set_config_value("uuid", optarg);
252 			break;
253 		case 'w':
254 			set_config_bool("x86.strictmsr", false);
255 			break;
256 		case 'W':
257 			set_config_bool("virtio_msix", false);
258 			break;
259 		case 'x':
260 			set_config_bool("x86.x2apic", true);
261 			break;
262 		case 'Y':
263 			set_config_bool("x86.mptable", false);
264 			break;
265 		case 'h':
266 			bhyve_usage(0);
267 		default:
268 			bhyve_usage(1);
269 		}
270 	}
271 
272 	/* Handle backwards compatibility aliases in config options. */
273 	if (get_config_value("lpc.bootrom") != NULL &&
274 	    get_config_value("bootrom") == NULL) {
275 		warnx("lpc.bootrom is deprecated, use '-o bootrom' instead");
276 		set_config_value("bootrom", get_config_value("lpc.bootrom"));
277 	}
278 	if (get_config_value("lpc.bootvars") != NULL &&
279 	    get_config_value("bootvars") == NULL) {
280 		warnx("lpc.bootvars is deprecated, use '-o bootvars' instead");
281 		set_config_value("bootvars", get_config_value("lpc.bootvars"));
282 	}
283 }
284 
285 void
286 bhyve_init_config(void)
287 {
288 	init_config();
289 
290 	/* Set default values prior to option parsing. */
291 	set_config_bool("acpi_tables", false);
292 	set_config_bool("acpi_tables_in_memory", true);
293 	set_config_value("memory.size", "256M");
294 	set_config_bool("x86.strictmsr", true);
295 	set_config_value("lpc.fwcfg", "bhyve");
296 }
297 
298 void
299 bhyve_init_vcpu(struct vcpu *vcpu)
300 {
301 	int err, tmp;
302 
303 #ifdef	__FreeBSD__
304 	if (get_config_bool_default("x86.vmexit_on_hlt", false)) {
305 		err = vm_get_capability(vcpu, VM_CAP_HALT_EXIT, &tmp);
306 		if (err < 0) {
307 			EPRINTLN("VM exit on HLT not supported");
308 			exit(4);
309 		}
310 		vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1);
311 	}
312 #else
313 	/*
314 	 * We insist that vmexit-on-hlt is available on the host CPU, and enable
315 	 * it by default.  Configuration of that feature is done with both of
316 	 * those facts in mind.
317 	 */
318 	tmp = (int)get_config_bool_default("x86.vmexit_on_hlt", true);
319 	err = vm_set_capability(vcpu, VM_CAP_HALT_EXIT, tmp);
320 	if (err < 0) {
321 		fprintf(stderr, "VM exit on HLT not supported\n");
322 		exit(4);
323 	}
324 #endif /* __FreeBSD__ */
325 
326 	if (get_config_bool_default("x86.vmexit_on_pause", false)) {
327 		/*
328 		 * pause exit support required for this mode
329 		 */
330 		err = vm_get_capability(vcpu, VM_CAP_PAUSE_EXIT, &tmp);
331 		if (err < 0) {
332 			EPRINTLN("SMP mux requested, no pause support");
333 			exit(4);
334 		}
335 		vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1);
336 	}
337 
338 	if (get_config_bool_default("x86.x2apic", false))
339 		err = vm_set_x2apic_state(vcpu, X2APIC_ENABLED);
340 	else
341 		err = vm_set_x2apic_state(vcpu, X2APIC_DISABLED);
342 
343 	if (err) {
344 		EPRINTLN("Unable to set x2apic state (%d)", err);
345 		exit(4);
346 	}
347 
348 #ifdef	__FreeBSD__
349 	vm_set_capability(vcpu, VM_CAP_ENABLE_INVPCID, 1);
350 
351 	err = vm_set_capability(vcpu, VM_CAP_IPI_EXIT, 1);
352 	assert(err == 0);
353 #endif
354 }
355 
356 void
357 bhyve_start_vcpu(struct vcpu *vcpu, bool bsp, bool suspend)
358 {
359 	int error;
360 
361 	if (!bsp) {
362 #ifndef	__FreeBSD__
363 		/*
364 		 * On illumos, all APs are spun up halted and run-state
365 		 * transitions (INIT, SIPI, etc) are handled in-kernel.
366 		 */
367 		spinup_ap(vcpu, 0);
368 #endif
369 
370 		bhyve_init_vcpu(vcpu);
371 
372 #ifdef	__FreeBSD__
373 		/*
374 		 * Enable the 'unrestricted guest' mode for APs.
375 		 *
376 		 * APs startup in power-on 16-bit mode.
377 		 */
378 		error = vm_set_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
379 		assert(error == 0);
380 #endif
381 	}
382 
383 #ifndef	__FreeBSD__
384 	/*
385 	 * The value of 'suspend' for the BSP depends on whether the -d
386 	 * (suspend_at_boot) flag was given to bhyve. Regardless of that
387 	 * value we always want to set the BSP to VRS_RUN and all others to
388 	 * VRS_HALT.
389 	 */
390 	error = vm_set_run_state(vcpu, bsp ? VRS_RUN : VRS_HALT, 0);
391 	assert(error == 0);
392 #endif
393 
394 	fbsdrun_addcpu(vcpu_id(vcpu), suspend);
395 }
396 
397 int
398 bhyve_init_platform(struct vmctx *ctx, struct vcpu *bsp __unused)
399 {
400 	int error;
401 
402 	error = init_msr();
403 	if (error != 0)
404 		return (error);
405 	init_inout();
406 #ifdef	__FreeBSD__
407 	kernemu_dev_init();
408 #endif
409 	atkbdc_init(ctx);
410 	pci_irq_init(ctx);
411 	ioapic_init(ctx);
412 	rtc_init(ctx);
413 	sci_init(ctx);
414 #ifndef	__FreeBSD__
415 	pmtmr_init(ctx);
416 #endif
417 	error = e820_init(ctx);
418 	if (error != 0)
419 		return (error);
420 	error = bootrom_loadrom(ctx);
421 	if (error != 0)
422 		return (error);
423 
424 #ifndef	__FreeBSD__
425 	if (get_config_bool_default("e820.debug", false))
426 		e820_dump_table();
427 #endif
428 
429 	return (0);
430 }
431 
432 int
433 bhyve_init_platform_late(struct vmctx *ctx, struct vcpu *bsp __unused)
434 {
435 	int error;
436 
437 	if (get_config_bool_default("x86.mptable", true)) {
438 		error = mptable_build(ctx, guest_ncpus);
439 		if (error != 0)
440 			return (error);
441 	}
442 	error = smbios_build(ctx);
443 	if (error != 0)
444 		return (error);
445 	error = e820_finalize();
446 	if (error != 0)
447 		return (error);
448 
449 	if (bootrom_boot() && strcmp(lpc_fwcfg(), "bhyve") == 0)
450 		fwctl_init();
451 
452 	if (get_config_bool("acpi_tables")) {
453 		error = acpi_build(ctx, guest_ncpus);
454 		assert(error == 0);
455 	}
456 
457 	return (0);
458 }
459