1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/types.h>
30
31 #include <machine/vmm.h>
32 #include <machine/vmm_dev.h>
33 #include <machine/vmm_instruction_emul.h>
34 #include <amd64/vmm/intel/vmcs.h>
35 #include <x86/apicreg.h>
36
37 #include <assert.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <stdlib.h>
41 #include <strings.h>
42 #include <unistd.h>
43
44 #include <vmmapi.h>
45
46 #include "bhyverun.h"
47 #include "config.h"
48 #include "debug.h"
49 #include "gdb.h"
50 #include "inout.h"
51 #include "mem.h"
52 #ifdef BHYVE_SNAPSHOT
53 #include "snapshot.h"
54 #endif
55 #include "spinup_ap.h"
56 #include "vmexit.h"
57 #include "xmsr.h"
58
59 void
vm_inject_fault(struct vcpu * vcpu,int vector,int errcode_valid,int errcode)60 vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid,
61 int errcode)
62 {
63 int error, restart_instruction;
64
65 restart_instruction = 1;
66
67 error = vm_inject_exception(vcpu, vector, errcode_valid, errcode,
68 restart_instruction);
69 assert(error == 0);
70 }
71
72 static int
vmexit_inout(struct vmctx * ctx,struct vcpu * vcpu,struct vm_run * vmrun)73 vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
74 {
75 struct vm_exit *vme;
76 int error;
77 int bytes, port, in;
78
79 vme = vmrun->vm_exit;
80 port = vme->u.inout.port;
81 bytes = vme->u.inout.bytes;
82 in = vme->u.inout.in;
83
84 error = emulate_inout(ctx, vcpu, vme);
85 if (error) {
86 EPRINTLN("Unhandled %s%c 0x%04x at 0x%lx",
87 in ? "in" : "out",
88 bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
89 port, vme->rip);
90 return (VMEXIT_ABORT);
91 } else {
92 return (VMEXIT_CONTINUE);
93 }
94 }
95
96 static int
vmexit_rdmsr(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun)97 vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
98 struct vm_run *vmrun)
99 {
100 struct vm_exit *vme;
101 uint64_t val;
102 uint32_t eax, edx;
103 int error;
104
105 vme = vmrun->vm_exit;
106
107 val = 0;
108 error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
109 if (error != 0) {
110 EPRINTLN("rdmsr to register %#x on vcpu %d",
111 vme->u.msr.code, vcpu_id(vcpu));
112 if (get_config_bool("x86.strictmsr")) {
113 vm_inject_gp(vcpu);
114 return (VMEXIT_CONTINUE);
115 }
116 }
117
118 eax = val;
119 error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
120 assert(error == 0);
121
122 edx = val >> 32;
123 error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
124 assert(error == 0);
125
126 return (VMEXIT_CONTINUE);
127 }
128
129 static int
vmexit_wrmsr(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun)130 vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
131 struct vm_run *vmrun)
132 {
133 struct vm_exit *vme;
134 int error;
135
136 vme = vmrun->vm_exit;
137
138 error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
139 if (error != 0) {
140 EPRINTLN("wrmsr to register %#x(%#lx) on vcpu %d",
141 vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
142 if (get_config_bool("x86.strictmsr")) {
143 vm_inject_gp(vcpu);
144 return (VMEXIT_CONTINUE);
145 }
146 }
147 return (VMEXIT_CONTINUE);
148 }
149
150 static const char * const vmx_exit_reason_desc[] = {
151 [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
152 [EXIT_REASON_EXT_INTR] = "External interrupt",
153 [EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
154 [EXIT_REASON_INIT] = "INIT signal",
155 [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
156 [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
157 [EXIT_REASON_SMI] = "Other SMI",
158 [EXIT_REASON_INTR_WINDOW] = "Interrupt window",
159 [EXIT_REASON_NMI_WINDOW] = "NMI window",
160 [EXIT_REASON_TASK_SWITCH] = "Task switch",
161 [EXIT_REASON_CPUID] = "CPUID",
162 [EXIT_REASON_GETSEC] = "GETSEC",
163 [EXIT_REASON_HLT] = "HLT",
164 [EXIT_REASON_INVD] = "INVD",
165 [EXIT_REASON_INVLPG] = "INVLPG",
166 [EXIT_REASON_RDPMC] = "RDPMC",
167 [EXIT_REASON_RDTSC] = "RDTSC",
168 [EXIT_REASON_RSM] = "RSM",
169 [EXIT_REASON_VMCALL] = "VMCALL",
170 [EXIT_REASON_VMCLEAR] = "VMCLEAR",
171 [EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
172 [EXIT_REASON_VMPTRLD] = "VMPTRLD",
173 [EXIT_REASON_VMPTRST] = "VMPTRST",
174 [EXIT_REASON_VMREAD] = "VMREAD",
175 [EXIT_REASON_VMRESUME] = "VMRESUME",
176 [EXIT_REASON_VMWRITE] = "VMWRITE",
177 [EXIT_REASON_VMXOFF] = "VMXOFF",
178 [EXIT_REASON_VMXON] = "VMXON",
179 [EXIT_REASON_CR_ACCESS] = "Control-register accesses",
180 [EXIT_REASON_DR_ACCESS] = "MOV DR",
181 [EXIT_REASON_INOUT] = "I/O instruction",
182 [EXIT_REASON_RDMSR] = "RDMSR",
183 [EXIT_REASON_WRMSR] = "WRMSR",
184 [EXIT_REASON_INVAL_VMCS] =
185 "VM-entry failure due to invalid guest state",
186 [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
187 [EXIT_REASON_MWAIT] = "MWAIT",
188 [EXIT_REASON_MTF] = "Monitor trap flag",
189 [EXIT_REASON_MONITOR] = "MONITOR",
190 [EXIT_REASON_PAUSE] = "PAUSE",
191 [EXIT_REASON_MCE_DURING_ENTRY] =
192 "VM-entry failure due to machine-check event",
193 [EXIT_REASON_TPR] = "TPR below threshold",
194 [EXIT_REASON_APIC_ACCESS] = "APIC access",
195 [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
196 [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
197 [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
198 [EXIT_REASON_EPT_FAULT] = "EPT violation",
199 [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
200 [EXIT_REASON_INVEPT] = "INVEPT",
201 [EXIT_REASON_RDTSCP] = "RDTSCP",
202 [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
203 [EXIT_REASON_INVVPID] = "INVVPID",
204 [EXIT_REASON_WBINVD] = "WBINVD",
205 [EXIT_REASON_XSETBV] = "XSETBV",
206 [EXIT_REASON_APIC_WRITE] = "APIC write",
207 [EXIT_REASON_RDRAND] = "RDRAND",
208 [EXIT_REASON_INVPCID] = "INVPCID",
209 [EXIT_REASON_VMFUNC] = "VMFUNC",
210 [EXIT_REASON_ENCLS] = "ENCLS",
211 [EXIT_REASON_RDSEED] = "RDSEED",
212 [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
213 [EXIT_REASON_XSAVES] = "XSAVES",
214 [EXIT_REASON_XRSTORS] = "XRSTORS"
215 };
216
217 static const char *
vmexit_vmx_desc(uint32_t exit_reason)218 vmexit_vmx_desc(uint32_t exit_reason)
219 {
220
221 if (exit_reason >= nitems(vmx_exit_reason_desc) ||
222 vmx_exit_reason_desc[exit_reason] == NULL)
223 return ("Unknown");
224 return (vmx_exit_reason_desc[exit_reason]);
225 }
226
227 #define DEBUG_EPT_MISCONFIG
228 #ifdef DEBUG_EPT_MISCONFIG
229 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400
230
231 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
232 static int ept_misconfig_ptenum;
233 #endif
234
235 static int
vmexit_vmx(struct vmctx * ctx,struct vcpu * vcpu,struct vm_run * vmrun)236 vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
237 {
238 struct vm_exit *vme;
239
240 vme = vmrun->vm_exit;
241
242 EPRINTLN("vm exit[%d]", vcpu_id(vcpu));
243 EPRINTLN("\treason\t\tVMX");
244 EPRINTLN("\trip\t\t0x%016lx", vme->rip);
245 EPRINTLN("\tinst_length\t%d", vme->inst_length);
246 EPRINTLN("\tstatus\t\t%d", vme->u.vmx.status);
247 EPRINTLN("\texit_reason\t%u (%s)", vme->u.vmx.exit_reason,
248 vmexit_vmx_desc(vme->u.vmx.exit_reason));
249 EPRINTLN("\tqualification\t0x%016lx",
250 vme->u.vmx.exit_qualification);
251 EPRINTLN("\tinst_type\t\t%d", vme->u.vmx.inst_type);
252 EPRINTLN("\tinst_error\t\t%d", vme->u.vmx.inst_error);
253 #ifdef DEBUG_EPT_MISCONFIG
254 if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
255 vm_get_register(vcpu,
256 VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
257 &ept_misconfig_gpa);
258 vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
259 &ept_misconfig_ptenum);
260 EPRINTLN("\tEPT misconfiguration:");
261 EPRINTLN("\t\tGPA: %#lx", ept_misconfig_gpa);
262 EPRINTLN("\t\tPTE(%d): %#lx %#lx %#lx %#lx",
263 ept_misconfig_ptenum, ept_misconfig_pte[0],
264 ept_misconfig_pte[1], ept_misconfig_pte[2],
265 ept_misconfig_pte[3]);
266 }
267 #endif /* DEBUG_EPT_MISCONFIG */
268 return (VMEXIT_ABORT);
269 }
270
271 static int
vmexit_svm(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun)272 vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
273 {
274 struct vm_exit *vme;
275
276 vme = vmrun->vm_exit;
277
278 EPRINTLN("vm exit[%d]", vcpu_id(vcpu));
279 EPRINTLN("\treason\t\tSVM");
280 EPRINTLN("\trip\t\t0x%016lx", vme->rip);
281 EPRINTLN("\tinst_length\t%d", vme->inst_length);
282 EPRINTLN("\texitcode\t%#lx", vme->u.svm.exitcode);
283 EPRINTLN("\texitinfo1\t%#lx", vme->u.svm.exitinfo1);
284 EPRINTLN("\texitinfo2\t%#lx", vme->u.svm.exitinfo2);
285 return (VMEXIT_ABORT);
286 }
287
288 static int
vmexit_bogus(struct vmctx * ctx __unused,struct vcpu * vcpu __unused,struct vm_run * vmrun)289 vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
290 struct vm_run *vmrun)
291 {
292 assert(vmrun->vm_exit->inst_length == 0);
293
294 return (VMEXIT_CONTINUE);
295 }
296
297 static int
vmexit_reqidle(struct vmctx * ctx __unused,struct vcpu * vcpu __unused,struct vm_run * vmrun)298 vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
299 struct vm_run *vmrun)
300 {
301 assert(vmrun->vm_exit->inst_length == 0);
302
303 return (VMEXIT_CONTINUE);
304 }
305
306 static int
vmexit_hlt(struct vmctx * ctx __unused,struct vcpu * vcpu __unused,struct vm_run * vmrun __unused)307 vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
308 struct vm_run *vmrun __unused)
309 {
310 /*
311 * Just continue execution with the next instruction. We use
312 * the HLT VM exit as a way to be friendly with the host
313 * scheduler.
314 */
315 return (VMEXIT_CONTINUE);
316 }
317
318 static int
vmexit_pause(struct vmctx * ctx __unused,struct vcpu * vcpu __unused,struct vm_run * vmrun __unused)319 vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
320 struct vm_run *vmrun __unused)
321 {
322 return (VMEXIT_CONTINUE);
323 }
324
325 static int
vmexit_mtrap(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun)326 vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
327 struct vm_run *vmrun)
328 {
329 assert(vmrun->vm_exit->inst_length == 0);
330
331 #ifdef BHYVE_SNAPSHOT
332 checkpoint_cpu_suspend(vcpu_id(vcpu));
333 #endif
334 gdb_cpu_mtrap(vcpu);
335 #ifdef BHYVE_SNAPSHOT
336 checkpoint_cpu_resume(vcpu_id(vcpu));
337 #endif
338
339 return (VMEXIT_CONTINUE);
340 }
341
342 static int
vmexit_inst_emul(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun)343 vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
344 struct vm_run *vmrun)
345 {
346 struct vm_exit *vme;
347 struct vie *vie;
348 int err, i, cs_d;
349 enum vm_cpu_mode mode;
350
351 vme = vmrun->vm_exit;
352
353 vie = &vme->u.inst_emul.vie;
354 if (!vie->decoded) {
355 /*
356 * Attempt to decode in userspace as a fallback. This allows
357 * updating instruction decode in bhyve without rebooting the
358 * kernel (rapid prototyping), albeit with much slower
359 * emulation.
360 */
361 vie_restart(vie);
362 mode = vme->u.inst_emul.paging.cpu_mode;
363 cs_d = vme->u.inst_emul.cs_d;
364 if (vmm_decode_instruction(mode, cs_d, vie) != 0)
365 goto fail;
366 if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
367 vme->rip + vie->num_processed) != 0)
368 goto fail;
369 }
370
371 err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
372 &vme->u.inst_emul.paging);
373 if (err) {
374 if (err == ESRCH) {
375 EPRINTLN("Unhandled memory access to 0x%lx\n",
376 vme->u.inst_emul.gpa);
377 }
378 goto fail;
379 }
380
381 return (VMEXIT_CONTINUE);
382
383 fail:
384 fprintf(stderr, "Failed to emulate instruction sequence [ ");
385 for (i = 0; i < vie->num_valid; i++)
386 fprintf(stderr, "%02x", vie->inst[i]);
387 FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
388 return (VMEXIT_ABORT);
389 }
390
391 static int
vmexit_suspend(struct vmctx * ctx,struct vcpu * vcpu,struct vm_run * vmrun)392 vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
393 {
394 struct vm_exit *vme;
395 enum vm_suspend_how how;
396 int vcpuid = vcpu_id(vcpu);
397
398 vme = vmrun->vm_exit;
399
400 how = vme->u.suspended.how;
401
402 fbsdrun_deletecpu(vcpuid);
403
404 switch (how) {
405 case VM_SUSPEND_RESET:
406 exit(0);
407 case VM_SUSPEND_POWEROFF:
408 if (get_config_bool_default("destroy_on_poweroff", false))
409 vm_destroy(ctx);
410 exit(1);
411 case VM_SUSPEND_HALT:
412 exit(2);
413 case VM_SUSPEND_TRIPLEFAULT:
414 exit(3);
415 default:
416 EPRINTLN("vmexit_suspend: invalid reason %d", how);
417 exit(100);
418 }
419 return (0); /* NOTREACHED */
420 }
421
422 static int
vmexit_debug(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun __unused)423 vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
424 struct vm_run *vmrun __unused)
425 {
426
427 #ifdef BHYVE_SNAPSHOT
428 checkpoint_cpu_suspend(vcpu_id(vcpu));
429 #endif
430 gdb_cpu_suspend(vcpu);
431 #ifdef BHYVE_SNAPSHOT
432 checkpoint_cpu_resume(vcpu_id(vcpu));
433 #endif
434 /*
435 * XXX-MJ sleep for a short period to avoid chewing up the CPU in the
436 * window between activation of the vCPU thread and the STARTUP IPI.
437 */
438 usleep(1000);
439 return (VMEXIT_CONTINUE);
440 }
441
442 static int
vmexit_db(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun)443 vmexit_db(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
444 {
445
446 #ifdef BHYVE_SNAPSHOT
447 checkpoint_cpu_suspend(vcpu_id(vcpu));
448 #endif
449 gdb_cpu_debug(vcpu, vmrun->vm_exit);
450 #ifdef BHYVE_SNAPSHOT
451 checkpoint_cpu_resume(vcpu_id(vcpu));
452 #endif
453 return (VMEXIT_CONTINUE);
454 }
455
456 static int
vmexit_breakpoint(struct vmctx * ctx __unused,struct vcpu * vcpu,struct vm_run * vmrun)457 vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
458 struct vm_run *vmrun)
459 {
460 gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
461 return (VMEXIT_CONTINUE);
462 }
463
464 static int
vmexit_ipi(struct vmctx * ctx __unused,struct vcpu * vcpu __unused,struct vm_run * vmrun)465 vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
466 struct vm_run *vmrun)
467 {
468 struct vm_exit *vme;
469 cpuset_t *dmask;
470 int error = -1;
471 int i;
472
473 dmask = vmrun->cpuset;
474 vme = vmrun->vm_exit;
475
476 switch (vme->u.ipi.mode) {
477 case APIC_DELMODE_INIT:
478 CPU_FOREACH_ISSET(i, dmask) {
479 error = fbsdrun_suspendcpu(i);
480 if (error) {
481 warnx("failed to suspend cpu %d", i);
482 break;
483 }
484 }
485 break;
486 case APIC_DELMODE_STARTUP:
487 CPU_FOREACH_ISSET(i, dmask) {
488 spinup_ap(fbsdrun_vcpu(i),
489 vme->u.ipi.vector << PAGE_SHIFT);
490 }
491 error = 0;
492 break;
493 default:
494 break;
495 }
496
497 return (error);
498 }
499
500 int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
501
502 const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = {
503 [VM_EXITCODE_INOUT] = vmexit_inout,
504 [VM_EXITCODE_INOUT_STR] = vmexit_inout,
505 [VM_EXITCODE_VMX] = vmexit_vmx,
506 [VM_EXITCODE_SVM] = vmexit_svm,
507 [VM_EXITCODE_BOGUS] = vmexit_bogus,
508 [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
509 [VM_EXITCODE_RDMSR] = vmexit_rdmsr,
510 [VM_EXITCODE_WRMSR] = vmexit_wrmsr,
511 [VM_EXITCODE_MTRAP] = vmexit_mtrap,
512 [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
513 [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
514 [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
515 [VM_EXITCODE_DEBUG] = vmexit_debug,
516 [VM_EXITCODE_BPT] = vmexit_breakpoint,
517 [VM_EXITCODE_IPI] = vmexit_ipi,
518 [VM_EXITCODE_HLT] = vmexit_hlt,
519 [VM_EXITCODE_PAUSE] = vmexit_pause,
520 [VM_EXITCODE_DB] = vmexit_db,
521 };
522