xref: /freebsd/usr.sbin/bhyve/amd64/vmexit.c (revision 257405d707d77bc55b38e7c2bb83b8a9247a86ae)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/types.h>
30 
31 #include <machine/vmm.h>
32 #include <machine/vmm_dev.h>
33 #include <machine/vmm_instruction_emul.h>
34 #include <amd64/vmm/intel/vmcs.h>
35 #include <x86/apicreg.h>
36 
37 #include <assert.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <stdlib.h>
41 #include <strings.h>
42 #include <unistd.h>
43 
44 #include <vmmapi.h>
45 
46 #include "bhyverun.h"
47 #include "config.h"
48 #include "debug.h"
49 #include "gdb.h"
50 #include "inout.h"
51 #include "mem.h"
52 #ifdef BHYVE_SNAPSHOT
53 #include "snapshot.h"
54 #endif
55 #include "spinup_ap.h"
56 #include "vmexit.h"
57 #include "xmsr.h"
58 
59 void
60 vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid,
61     int errcode)
62 {
63 	int error, restart_instruction;
64 
65 	restart_instruction = 1;
66 
67 	error = vm_inject_exception(vcpu, vector, errcode_valid, errcode,
68 	    restart_instruction);
69 	assert(error == 0);
70 }
71 
72 static int
73 vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
74 {
75 	struct vm_exit *vme;
76 	int error;
77 	int bytes, port, in;
78 
79 	vme = vmrun->vm_exit;
80 	port = vme->u.inout.port;
81 	bytes = vme->u.inout.bytes;
82 	in = vme->u.inout.in;
83 
84 	error = emulate_inout(ctx, vcpu, vme);
85 	if (error) {
86 		fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
87 		    in ? "in" : "out",
88 		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
89 		    port, vme->rip);
90 		return (VMEXIT_ABORT);
91 	} else {
92 		return (VMEXIT_CONTINUE);
93 	}
94 }
95 
96 static int
97 vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
98     struct vm_run *vmrun)
99 {
100 	struct vm_exit *vme;
101 	uint64_t val;
102 	uint32_t eax, edx;
103 	int error;
104 
105 	vme = vmrun->vm_exit;
106 
107 	val = 0;
108 	error = emulate_rdmsr(vcpu, vme->u.msr.code, &val);
109 	if (error != 0) {
110 		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
111 		    vme->u.msr.code, vcpu_id(vcpu));
112 		if (get_config_bool("x86.strictmsr")) {
113 			vm_inject_gp(vcpu);
114 			return (VMEXIT_CONTINUE);
115 		}
116 	}
117 
118 	eax = val;
119 	error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
120 	assert(error == 0);
121 
122 	edx = val >> 32;
123 	error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx);
124 	assert(error == 0);
125 
126 	return (VMEXIT_CONTINUE);
127 }
128 
129 static int
130 vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu,
131     struct vm_run *vmrun)
132 {
133 	struct vm_exit *vme;
134 	int error;
135 
136 	vme = vmrun->vm_exit;
137 
138 	error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval);
139 	if (error != 0) {
140 		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
141 		    vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu));
142 		if (get_config_bool("x86.strictmsr")) {
143 			vm_inject_gp(vcpu);
144 			return (VMEXIT_CONTINUE);
145 		}
146 	}
147 	return (VMEXIT_CONTINUE);
148 }
149 
150 static const char * const vmx_exit_reason_desc[] = {
151 	[EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)",
152 	[EXIT_REASON_EXT_INTR] = "External interrupt",
153 	[EXIT_REASON_TRIPLE_FAULT] = "Triple fault",
154 	[EXIT_REASON_INIT] = "INIT signal",
155 	[EXIT_REASON_SIPI] = "Start-up IPI (SIPI)",
156 	[EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)",
157 	[EXIT_REASON_SMI] = "Other SMI",
158 	[EXIT_REASON_INTR_WINDOW] = "Interrupt window",
159 	[EXIT_REASON_NMI_WINDOW] = "NMI window",
160 	[EXIT_REASON_TASK_SWITCH] = "Task switch",
161 	[EXIT_REASON_CPUID] = "CPUID",
162 	[EXIT_REASON_GETSEC] = "GETSEC",
163 	[EXIT_REASON_HLT] = "HLT",
164 	[EXIT_REASON_INVD] = "INVD",
165 	[EXIT_REASON_INVLPG] = "INVLPG",
166 	[EXIT_REASON_RDPMC] = "RDPMC",
167 	[EXIT_REASON_RDTSC] = "RDTSC",
168 	[EXIT_REASON_RSM] = "RSM",
169 	[EXIT_REASON_VMCALL] = "VMCALL",
170 	[EXIT_REASON_VMCLEAR] = "VMCLEAR",
171 	[EXIT_REASON_VMLAUNCH] = "VMLAUNCH",
172 	[EXIT_REASON_VMPTRLD] = "VMPTRLD",
173 	[EXIT_REASON_VMPTRST] = "VMPTRST",
174 	[EXIT_REASON_VMREAD] = "VMREAD",
175 	[EXIT_REASON_VMRESUME] = "VMRESUME",
176 	[EXIT_REASON_VMWRITE] = "VMWRITE",
177 	[EXIT_REASON_VMXOFF] = "VMXOFF",
178 	[EXIT_REASON_VMXON] = "VMXON",
179 	[EXIT_REASON_CR_ACCESS] = "Control-register accesses",
180 	[EXIT_REASON_DR_ACCESS] = "MOV DR",
181 	[EXIT_REASON_INOUT] = "I/O instruction",
182 	[EXIT_REASON_RDMSR] = "RDMSR",
183 	[EXIT_REASON_WRMSR] = "WRMSR",
184 	[EXIT_REASON_INVAL_VMCS] =
185 	    "VM-entry failure due to invalid guest state",
186 	[EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading",
187 	[EXIT_REASON_MWAIT] = "MWAIT",
188 	[EXIT_REASON_MTF] = "Monitor trap flag",
189 	[EXIT_REASON_MONITOR] = "MONITOR",
190 	[EXIT_REASON_PAUSE] = "PAUSE",
191 	[EXIT_REASON_MCE_DURING_ENTRY] =
192 	    "VM-entry failure due to machine-check event",
193 	[EXIT_REASON_TPR] = "TPR below threshold",
194 	[EXIT_REASON_APIC_ACCESS] = "APIC access",
195 	[EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI",
196 	[EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR",
197 	[EXIT_REASON_LDTR_TR] = "Access to LDTR or TR",
198 	[EXIT_REASON_EPT_FAULT] = "EPT violation",
199 	[EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration",
200 	[EXIT_REASON_INVEPT] = "INVEPT",
201 	[EXIT_REASON_RDTSCP] = "RDTSCP",
202 	[EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired",
203 	[EXIT_REASON_INVVPID] = "INVVPID",
204 	[EXIT_REASON_WBINVD] = "WBINVD",
205 	[EXIT_REASON_XSETBV] = "XSETBV",
206 	[EXIT_REASON_APIC_WRITE] = "APIC write",
207 	[EXIT_REASON_RDRAND] = "RDRAND",
208 	[EXIT_REASON_INVPCID] = "INVPCID",
209 	[EXIT_REASON_VMFUNC] = "VMFUNC",
210 	[EXIT_REASON_ENCLS] = "ENCLS",
211 	[EXIT_REASON_RDSEED] = "RDSEED",
212 	[EXIT_REASON_PM_LOG_FULL] = "Page-modification log full",
213 	[EXIT_REASON_XSAVES] = "XSAVES",
214 	[EXIT_REASON_XRSTORS] = "XRSTORS"
215 };
216 
217 static const char *
218 vmexit_vmx_desc(uint32_t exit_reason)
219 {
220 
221 	if (exit_reason >= nitems(vmx_exit_reason_desc) ||
222 	    vmx_exit_reason_desc[exit_reason] == NULL)
223 		return ("Unknown");
224 	return (vmx_exit_reason_desc[exit_reason]);
225 }
226 
227 #define	DEBUG_EPT_MISCONFIG
228 #ifdef DEBUG_EPT_MISCONFIG
229 #define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
230 
231 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
232 static int ept_misconfig_ptenum;
233 #endif
234 
235 static int
236 vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
237 {
238 	struct vm_exit *vme;
239 
240 	vme = vmrun->vm_exit;
241 
242 	fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
243 	fprintf(stderr, "\treason\t\tVMX\n");
244 	fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
245 	fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
246 	fprintf(stderr, "\tstatus\t\t%d\n", vme->u.vmx.status);
247 	fprintf(stderr, "\texit_reason\t%u (%s)\n", vme->u.vmx.exit_reason,
248 	    vmexit_vmx_desc(vme->u.vmx.exit_reason));
249 	fprintf(stderr, "\tqualification\t0x%016lx\n",
250 	    vme->u.vmx.exit_qualification);
251 	fprintf(stderr, "\tinst_type\t\t%d\n", vme->u.vmx.inst_type);
252 	fprintf(stderr, "\tinst_error\t\t%d\n", vme->u.vmx.inst_error);
253 #ifdef DEBUG_EPT_MISCONFIG
254 	if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
255 		vm_get_register(vcpu,
256 		    VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
257 		    &ept_misconfig_gpa);
258 		vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
259 		    &ept_misconfig_ptenum);
260 		fprintf(stderr, "\tEPT misconfiguration:\n");
261 		fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
262 		fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
263 		    ept_misconfig_ptenum, ept_misconfig_pte[0],
264 		    ept_misconfig_pte[1], ept_misconfig_pte[2],
265 		    ept_misconfig_pte[3]);
266 	}
267 #endif	/* DEBUG_EPT_MISCONFIG */
268 	return (VMEXIT_ABORT);
269 }
270 
271 static int
272 vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun)
273 {
274 	struct vm_exit *vme;
275 
276 	vme = vmrun->vm_exit;
277 
278 	fprintf(stderr, "vm exit[%d]\n", vcpu_id(vcpu));
279 	fprintf(stderr, "\treason\t\tSVM\n");
280 	fprintf(stderr, "\trip\t\t0x%016lx\n", vme->rip);
281 	fprintf(stderr, "\tinst_length\t%d\n", vme->inst_length);
282 	fprintf(stderr, "\texitcode\t%#lx\n", vme->u.svm.exitcode);
283 	fprintf(stderr, "\texitinfo1\t%#lx\n", vme->u.svm.exitinfo1);
284 	fprintf(stderr, "\texitinfo2\t%#lx\n", vme->u.svm.exitinfo2);
285 	return (VMEXIT_ABORT);
286 }
287 
288 static int
289 vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
290     struct vm_run *vmrun)
291 {
292 	assert(vmrun->vm_exit->inst_length == 0);
293 
294 	return (VMEXIT_CONTINUE);
295 }
296 
297 static int
298 vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
299     struct vm_run *vmrun)
300 {
301 	assert(vmrun->vm_exit->inst_length == 0);
302 
303 	return (VMEXIT_CONTINUE);
304 }
305 
306 static int
307 vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
308     struct vm_run *vmrun __unused)
309 {
310 	/*
311 	 * Just continue execution with the next instruction. We use
312 	 * the HLT VM exit as a way to be friendly with the host
313 	 * scheduler.
314 	 */
315 	return (VMEXIT_CONTINUE);
316 }
317 
318 static int
319 vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
320     struct vm_run *vmrun __unused)
321 {
322 	return (VMEXIT_CONTINUE);
323 }
324 
325 static int
326 vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu,
327     struct vm_run *vmrun)
328 {
329 	assert(vmrun->vm_exit->inst_length == 0);
330 
331 #ifdef BHYVE_SNAPSHOT
332 	checkpoint_cpu_suspend(vcpu_id(vcpu));
333 #endif
334 	gdb_cpu_mtrap(vcpu);
335 #ifdef BHYVE_SNAPSHOT
336 	checkpoint_cpu_resume(vcpu_id(vcpu));
337 #endif
338 
339 	return (VMEXIT_CONTINUE);
340 }
341 
342 static int
343 vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu,
344     struct vm_run *vmrun)
345 {
346 	struct vm_exit *vme;
347 	struct vie *vie;
348 	int err, i, cs_d;
349 	enum vm_cpu_mode mode;
350 
351 	vme = vmrun->vm_exit;
352 
353 	vie = &vme->u.inst_emul.vie;
354 	if (!vie->decoded) {
355 		/*
356 		 * Attempt to decode in userspace as a fallback.  This allows
357 		 * updating instruction decode in bhyve without rebooting the
358 		 * kernel (rapid prototyping), albeit with much slower
359 		 * emulation.
360 		 */
361 		vie_restart(vie);
362 		mode = vme->u.inst_emul.paging.cpu_mode;
363 		cs_d = vme->u.inst_emul.cs_d;
364 		if (vmm_decode_instruction(mode, cs_d, vie) != 0)
365 			goto fail;
366 		if (vm_set_register(vcpu, VM_REG_GUEST_RIP,
367 		    vme->rip + vie->num_processed) != 0)
368 			goto fail;
369 	}
370 
371 	err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie,
372 	    &vme->u.inst_emul.paging);
373 	if (err) {
374 		if (err == ESRCH) {
375 			EPRINTLN("Unhandled memory access to 0x%lx\n",
376 			    vme->u.inst_emul.gpa);
377 		}
378 		goto fail;
379 	}
380 
381 	return (VMEXIT_CONTINUE);
382 
383 fail:
384 	fprintf(stderr, "Failed to emulate instruction sequence [ ");
385 	for (i = 0; i < vie->num_valid; i++)
386 		fprintf(stderr, "%02x", vie->inst[i]);
387 	FPRINTLN(stderr, " ] at 0x%lx", vme->rip);
388 	return (VMEXIT_ABORT);
389 }
390 
391 static int
392 vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun)
393 {
394 	struct vm_exit *vme;
395 	enum vm_suspend_how how;
396 	int vcpuid = vcpu_id(vcpu);
397 
398 	vme = vmrun->vm_exit;
399 
400 	how = vme->u.suspended.how;
401 
402 	fbsdrun_deletecpu(vcpuid);
403 
404 	switch (how) {
405 	case VM_SUSPEND_RESET:
406 		exit(0);
407 	case VM_SUSPEND_POWEROFF:
408 		if (get_config_bool_default("destroy_on_poweroff", false))
409 			vm_destroy(ctx);
410 		exit(1);
411 	case VM_SUSPEND_HALT:
412 		exit(2);
413 	case VM_SUSPEND_TRIPLEFAULT:
414 		exit(3);
415 	default:
416 		fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
417 		exit(100);
418 	}
419 	return (0);	/* NOTREACHED */
420 }
421 
422 static int
423 vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu,
424     struct vm_run *vmrun __unused)
425 {
426 
427 #ifdef BHYVE_SNAPSHOT
428 	checkpoint_cpu_suspend(vcpu_id(vcpu));
429 #endif
430 	gdb_cpu_suspend(vcpu);
431 #ifdef BHYVE_SNAPSHOT
432 	checkpoint_cpu_resume(vcpu_id(vcpu));
433 #endif
434 	/*
435 	 * XXX-MJ sleep for a short period to avoid chewing up the CPU in the
436 	 * window between activation of the vCPU thread and the STARTUP IPI.
437 	 */
438 	usleep(1000);
439 	return (VMEXIT_CONTINUE);
440 }
441 
442 static int
443 vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu,
444     struct vm_run *vmrun)
445 {
446 	gdb_cpu_breakpoint(vcpu, vmrun->vm_exit);
447 	return (VMEXIT_CONTINUE);
448 }
449 
450 static int
451 vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused,
452     struct vm_run *vmrun)
453 {
454 	struct vm_exit *vme;
455 	cpuset_t *dmask;
456 	int error = -1;
457 	int i;
458 
459 	dmask = vmrun->cpuset;
460 	vme = vmrun->vm_exit;
461 
462 	switch (vme->u.ipi.mode) {
463 	case APIC_DELMODE_INIT:
464 		CPU_FOREACH_ISSET(i, dmask) {
465 			error = fbsdrun_suspendcpu(i);
466 			if (error) {
467 				warnx("failed to suspend cpu %d", i);
468 				break;
469 			}
470 		}
471 		break;
472 	case APIC_DELMODE_STARTUP:
473 		CPU_FOREACH_ISSET(i, dmask) {
474 			spinup_ap(fbsdrun_vcpu(i),
475 			    vme->u.ipi.vector << PAGE_SHIFT);
476 		}
477 		error = 0;
478 		break;
479 	default:
480 		break;
481 	}
482 
483 	return (error);
484 }
485 
486 int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *);
487 
488 const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = {
489 	[VM_EXITCODE_INOUT]  = vmexit_inout,
490 	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
491 	[VM_EXITCODE_VMX]    = vmexit_vmx,
492 	[VM_EXITCODE_SVM]    = vmexit_svm,
493 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
494 	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
495 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
496 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
497 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
498 	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
499 	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
500 	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
501 	[VM_EXITCODE_DEBUG] = vmexit_debug,
502 	[VM_EXITCODE_BPT] = vmexit_breakpoint,
503 	[VM_EXITCODE_IPI] = vmexit_ipi,
504 	[VM_EXITCODE_HLT] = vmexit_hlt,
505 	[VM_EXITCODE_PAUSE] = vmexit_pause,
506 };
507