1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 /*
29 * This file and its contents are supplied under the terms of the
30 * Common Development and Distribution License ("CDDL"), version 1.0.
31 * You may only use this file in accordance with the terms of version
32 * 1.0 of the CDDL.
33 *
34 * A full copy of the text of the CDDL should have accompanied this
35 * source. A copy of the CDDL is also available via the Internet at
36 * http://www.illumos.org/license/CDDL.
37 *
38 * Copyright 2015 Pluribus Networks Inc.
39 * Copyright 2018 Joyent, Inc.
40 * Copyright 2022 Oxide Computer Company
41 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
42 */
43
44 #include <sys/types.h>
45 #include <machine/vmm.h>
46
47 #include <assert.h>
48 #include <err.h>
49 #include <stdbool.h>
50 #include <stdlib.h>
51 #include <sysexits.h>
52 #include <sys/types.h>
53 #include <sys/vmm.h>
54 #include <vmmapi.h>
55
56 #include "bhyverun.h"
57 #include "bootrom.h"
58 #include "acpi.h"
59 #include "atkbdc.h"
60 #include "config.h"
61 #include "debug.h"
62 #include "e820.h"
63 #include "fwctl.h"
64 #include "ioapic.h"
65 #include "inout.h"
66 #ifndef __FreeBSD__
67 #include "kernemu_dev.h"
68 #endif
69 #include "mptbl.h"
70 #include "pci_emul.h"
71 #include "pci_irq.h"
72 #include "spinup_ap.h"
73 #include "pci_lpc.h"
74 #include "rtc.h"
75 #include "smbiostbl.h"
76 #include "xmsr.h"
77
78 void
bhyve_usage(int code)79 bhyve_usage(int code)
80 {
81 const char *progname = getprogname();
82
83 fprintf(stderr,
84 #ifdef __FreeBSD__
85 "Usage: %s [-AaCDeHhPSuWwxY]\n"
86 #else
87 "Usage: %s [-aCDdeHhPSuWwxY]\n"
88 #endif
89 " %2$.*3$s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
90 #ifdef __FreeBSD__
91 " %2$.*3$s [-G port] [-k config_file] [-l lpc] [-m mem] [-o var=value]\n"
92 " %2$.*3$s [-p vcpu:hostcpu] [-r file] [-s pci] [-U uuid] vmname\n"
93
94 " -A: create ACPI tables\n"
95 #else
96 " %2$.*3$s [-k <config_file>] [-l <lpc>] [-m mem] [-o <var>=<value>]\n"
97 " %2$.*3$s [-s <pci>] [-U uuid] vmname\n"
98 #endif
99 " -a: local apic is in xAPIC mode (deprecated)\n"
100 #ifndef __FreeBSD__
101 " -B type,key=value,...: set SMBIOS information\n"
102 #endif
103 " -C: include guest memory in core file\n"
104 " -c: number of CPUs and/or topology specification\n"
105 " -D: destroy on power-off\n"
106 #ifndef __FreeBSD__
107 " -d: suspend cpu at boot\n"
108 #endif
109 " -e: exit on unhandled I/O access\n"
110 #ifdef __FreeBSD__
111 " -G: start a debug server\n"
112 #endif
113 " -H: vmexit from the guest on HLT\n"
114 " -h: help\n"
115 " -k: key=value flat config file\n"
116 " -K: PS2 keyboard layout\n"
117 " -l: LPC device configuration\n"
118 " -m: memory size\n"
119 " -o: set config 'var' to 'value'\n"
120 " -P: vmexit from the guest on pause\n"
121 #ifdef __FreeBSD__
122 " -p: pin 'vcpu' to 'hostcpu'\n"
123 #endif
124 " -S: guest memory cannot be swapped\n"
125 " -s: <slot,driver,configinfo> PCI slot config\n"
126 " -U: UUID\n"
127 " -u: RTC keeps UTC time\n"
128 " -W: force virtio to use single-vector MSI\n"
129 " -w: ignore unimplemented MSRs\n"
130 " -x: local APIC is in x2APIC mode\n"
131 " -Y: disable MPtable generation\n",
132 progname, "", (int)strlen(progname));
133
134 exit(code);
135 }
136
137 void
bhyve_optparse(int argc,char ** argv)138 bhyve_optparse(int argc, char **argv)
139 {
140 const char *optstr;
141 int c;
142
143 #ifdef __FreeBSD__
144 optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:";
145 #else
146 /* +d, +B, -p */
147 optstr = "adehuwxACDHIPSWYk:f:o:G:c:s:m:l:B:K:U:";
148 #endif
149 while ((c = getopt(argc, argv, optstr)) != -1) {
150 switch (c) {
151 case 'a':
152 set_config_bool("x86.x2apic", false);
153 break;
154 case 'A':
155 set_config_bool("acpi_tables", true);
156 break;
157 case 'D':
158 set_config_bool("destroy_on_poweroff", true);
159 break;
160 #ifndef __FreeBSD__
161 case 'B':
162 if (smbios_parse(optarg) != 0) {
163 errx(EX_USAGE, "invalid SMBIOS "
164 "configuration '%s'", optarg);
165 }
166 break;
167 case 'd':
168 set_config_bool("suspend_at_boot", true);
169 break;
170 #endif
171 #ifdef __FreeBSD__
172 case 'p':
173 if (pincpu_parse(optarg) != 0) {
174 errx(EX_USAGE, "invalid vcpu pinning "
175 "configuration '%s'", optarg);
176 }
177 break;
178 #endif
179 case 'c':
180 if (bhyve_topology_parse(optarg) != 0) {
181 errx(EX_USAGE, "invalid cpu topology "
182 "'%s'", optarg);
183 }
184 break;
185 case 'C':
186 set_config_bool("memory.guest_in_core", true);
187 break;
188 case 'f':
189 if (qemu_fwcfg_parse_cmdline_arg(optarg) != 0) {
190 errx(EX_USAGE, "invalid fwcfg item '%s'", optarg);
191 }
192 break;
193 case 'G':
194 bhyve_parse_gdb_options(optarg);
195 break;
196 case 'k':
197 bhyve_parse_simple_config_file(optarg);
198 break;
199 case 'K':
200 set_config_value("keyboard.layout", optarg);
201 break;
202 case 'l':
203 if (strncmp(optarg, "help", strlen(optarg)) == 0) {
204 lpc_print_supported_devices();
205 exit(0);
206 } else if (lpc_device_parse(optarg) != 0) {
207 errx(EX_USAGE, "invalid lpc device "
208 "configuration '%s'", optarg);
209 }
210 break;
211 case 's':
212 if (strncmp(optarg, "help", strlen(optarg)) == 0) {
213 pci_print_supported_devices();
214 exit(0);
215 } else if (pci_parse_slot(optarg) != 0)
216 exit(4);
217 else
218 break;
219 case 'S':
220 set_config_bool("memory.wired", true);
221 break;
222 case 'm':
223 set_config_value("memory.size", optarg);
224 break;
225 case 'o':
226 if (!bhyve_parse_config_option(optarg))
227 errx(EX_USAGE, "invalid configuration option '%s'", optarg);
228 break;
229 case 'H':
230 set_config_bool("x86.vmexit_on_hlt", true);
231 break;
232 case 'I':
233 /*
234 * The "-I" option was used to add an ioapic to the
235 * virtual machine.
236 *
237 * An ioapic is now provided unconditionally for each
238 * virtual machine and this option is now deprecated.
239 */
240 break;
241 case 'P':
242 set_config_bool("x86.vmexit_on_pause", true);
243 break;
244 case 'e':
245 set_config_bool("x86.strictio", true);
246 break;
247 case 'u':
248 set_config_bool("rtc.use_localtime", false);
249 break;
250 case 'U':
251 set_config_value("uuid", optarg);
252 break;
253 case 'w':
254 set_config_bool("x86.strictmsr", false);
255 break;
256 case 'W':
257 set_config_bool("virtio_msix", false);
258 break;
259 case 'x':
260 set_config_bool("x86.x2apic", true);
261 break;
262 case 'Y':
263 set_config_bool("x86.mptable", false);
264 break;
265 case 'h':
266 bhyve_usage(0);
267 default:
268 bhyve_usage(1);
269 }
270 }
271
272 /* Handle backwards compatibility aliases in config options. */
273 if (get_config_value("lpc.bootrom") != NULL &&
274 get_config_value("bootrom") == NULL) {
275 warnx("lpc.bootrom is deprecated, use '-o bootrom' instead");
276 set_config_value("bootrom", get_config_value("lpc.bootrom"));
277 }
278 if (get_config_value("lpc.bootvars") != NULL &&
279 get_config_value("bootvars") == NULL) {
280 warnx("lpc.bootvars is deprecated, use '-o bootvars' instead");
281 set_config_value("bootvars", get_config_value("lpc.bootvars"));
282 }
283 }
284
285 void
bhyve_init_config(void)286 bhyve_init_config(void)
287 {
288 init_config();
289
290 /* Set default values prior to option parsing. */
291 set_config_bool("acpi_tables", false);
292 set_config_bool("acpi_tables_in_memory", true);
293 set_config_value("memory.size", "256M");
294 set_config_bool("x86.strictmsr", true);
295 set_config_value("lpc.fwcfg", "bhyve");
296 }
297
298 void
bhyve_init_vcpu(struct vcpu * vcpu)299 bhyve_init_vcpu(struct vcpu *vcpu)
300 {
301 int err, tmp;
302
303 #ifdef __FreeBSD__
304 if (get_config_bool_default("x86.vmexit_on_hlt", false)) {
305 err = vm_get_capability(vcpu, VM_CAP_HALT_EXIT, &tmp);
306 if (err < 0) {
307 EPRINTLN("VM exit on HLT not supported");
308 exit(4);
309 }
310 vm_set_capability(vcpu, VM_CAP_HALT_EXIT, 1);
311 }
312 #else
313 /*
314 * We insist that vmexit-on-hlt is available on the host CPU, and enable
315 * it by default. Configuration of that feature is done with both of
316 * those facts in mind.
317 */
318 tmp = (int)get_config_bool_default("x86.vmexit_on_hlt", true);
319 err = vm_set_capability(vcpu, VM_CAP_HALT_EXIT, tmp);
320 if (err < 0) {
321 fprintf(stderr, "VM exit on HLT not supported\n");
322 exit(4);
323 }
324 #endif /* __FreeBSD__ */
325
326 if (get_config_bool_default("x86.vmexit_on_pause", false)) {
327 /*
328 * pause exit support required for this mode
329 */
330 err = vm_get_capability(vcpu, VM_CAP_PAUSE_EXIT, &tmp);
331 if (err < 0) {
332 EPRINTLN("SMP mux requested, no pause support");
333 exit(4);
334 }
335 vm_set_capability(vcpu, VM_CAP_PAUSE_EXIT, 1);
336 }
337
338 if (get_config_bool_default("x86.x2apic", false))
339 err = vm_set_x2apic_state(vcpu, X2APIC_ENABLED);
340 else
341 err = vm_set_x2apic_state(vcpu, X2APIC_DISABLED);
342
343 if (err) {
344 EPRINTLN("Unable to set x2apic state (%d)", err);
345 exit(4);
346 }
347
348 #ifdef __FreeBSD__
349 vm_set_capability(vcpu, VM_CAP_ENABLE_INVPCID, 1);
350
351 err = vm_set_capability(vcpu, VM_CAP_IPI_EXIT, 1);
352 assert(err == 0);
353 #endif
354 }
355
356 void
bhyve_start_vcpu(struct vcpu * vcpu,bool bsp,bool suspend)357 bhyve_start_vcpu(struct vcpu *vcpu, bool bsp, bool suspend)
358 {
359 int error;
360
361 if (!bsp) {
362 #ifndef __FreeBSD__
363 /*
364 * On illumos, all APs are spun up halted and run-state
365 * transitions (INIT, SIPI, etc) are handled in-kernel.
366 */
367 spinup_ap(vcpu, 0);
368 #endif
369
370 bhyve_init_vcpu(vcpu);
371
372 #ifdef __FreeBSD__
373 /*
374 * Enable the 'unrestricted guest' mode for APs.
375 *
376 * APs startup in power-on 16-bit mode.
377 */
378 error = vm_set_capability(vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
379 assert(error == 0);
380 #endif
381 }
382
383 #ifndef __FreeBSD__
384 /*
385 * The value of 'suspend' for the BSP depends on whether the -d
386 * (suspend_at_boot) flag was given to bhyve. Regardless of that
387 * value we always want to set the BSP to VRS_RUN and all others to
388 * VRS_HALT.
389 */
390 error = vm_set_run_state(vcpu, bsp ? VRS_RUN : VRS_HALT, 0);
391 assert(error == 0);
392 #endif
393
394 fbsdrun_addcpu(vcpu_id(vcpu), suspend);
395 }
396
397 int
bhyve_init_platform(struct vmctx * ctx,struct vcpu * bsp __unused)398 bhyve_init_platform(struct vmctx *ctx, struct vcpu *bsp __unused)
399 {
400 int error;
401
402 error = init_msr();
403 if (error != 0)
404 return (error);
405 init_inout();
406 #ifdef __FreeBSD__
407 kernemu_dev_init();
408 #endif
409 atkbdc_init(ctx);
410 pci_irq_init(ctx);
411 ioapic_init(ctx);
412 rtc_init(ctx);
413 sci_init(ctx);
414 #ifndef __FreeBSD__
415 pmtmr_init(ctx);
416 #endif
417 error = e820_init(ctx);
418 if (error != 0)
419 return (error);
420 error = bootrom_loadrom(ctx);
421 if (error != 0)
422 return (error);
423
424 #ifndef __FreeBSD__
425 if (get_config_bool_default("e820.debug", false))
426 e820_dump_table();
427 #endif
428
429 return (0);
430 }
431
432 int
bhyve_init_platform_late(struct vmctx * ctx,struct vcpu * bsp __unused)433 bhyve_init_platform_late(struct vmctx *ctx, struct vcpu *bsp __unused)
434 {
435 int error;
436
437 if (get_config_bool_default("x86.mptable", true)) {
438 error = mptable_build(ctx, guest_ncpus);
439 if (error != 0)
440 return (error);
441 }
442 error = smbios_build(ctx);
443 if (error != 0)
444 return (error);
445 error = e820_finalize();
446 if (error != 0)
447 return (error);
448
449 if (bootrom_boot() && strcmp(lpc_fwcfg(), "bhyve") == 0)
450 fwctl_init();
451
452 if (get_config_bool("acpi_tables")) {
453 error = acpi_build(ctx, guest_ncpus);
454 assert(error == 0);
455 }
456
457 return (0);
458 }
459