1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 31 #include <machine/vmm.h> 32 #include <machine/vmm_dev.h> 33 #include <machine/vmm_instruction_emul.h> 34 #include <amd64/vmm/intel/vmcs.h> 35 #include <x86/apicreg.h> 36 37 #include <assert.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <stdlib.h> 41 #include <strings.h> 42 #include <unistd.h> 43 44 #include <vmmapi.h> 45 46 #include "bhyverun.h" 47 #include "config.h" 48 #include "debug.h" 49 #include "gdb.h" 50 #include "inout.h" 51 #include "mem.h" 52 #ifdef BHYVE_SNAPSHOT 53 #include "snapshot.h" 54 #endif 55 #include "spinup_ap.h" 56 #include "vmexit.h" 57 #include "xmsr.h" 58 59 void 60 vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, 61 int errcode) 62 { 63 int error, restart_instruction; 64 65 restart_instruction = 1; 66 67 error = vm_inject_exception(vcpu, vector, errcode_valid, errcode, 68 restart_instruction); 69 assert(error == 0); 70 } 71 72 static int 73 vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 74 { 75 struct vm_exit *vme; 76 int error; 77 int bytes, port, in; 78 79 vme = vmrun->vm_exit; 80 port = vme->u.inout.port; 81 bytes = vme->u.inout.bytes; 82 in = vme->u.inout.in; 83 84 error = emulate_inout(ctx, vcpu, vme); 85 if (error) { 86 EPRINTLN("Unhandled %s%c 0x%04x at 0x%lx", 87 in ? "in" : "out", 88 bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 89 port, vme->rip); 90 return (VMEXIT_ABORT); 91 } else { 92 return (VMEXIT_CONTINUE); 93 } 94 } 95 96 static int 97 vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, 98 struct vm_run *vmrun) 99 { 100 struct vm_exit *vme; 101 uint64_t val; 102 uint32_t eax, edx; 103 int error; 104 105 vme = vmrun->vm_exit; 106 107 val = 0; 108 error = emulate_rdmsr(vcpu, vme->u.msr.code, &val); 109 if (error != 0) { 110 EPRINTLN("rdmsr to register %#x on vcpu %d", 111 vme->u.msr.code, vcpu_id(vcpu)); 112 if (get_config_bool("x86.strictmsr")) { 113 vm_inject_gp(vcpu); 114 return (VMEXIT_CONTINUE); 115 } 116 } 117 118 eax = val; 119 error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax); 120 assert(error == 0); 121 122 edx = val >> 32; 123 error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx); 124 assert(error == 0); 125 126 return (VMEXIT_CONTINUE); 127 } 128 129 static int 130 vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, 131 struct vm_run *vmrun) 132 { 133 struct vm_exit *vme; 134 int error; 135 136 vme = vmrun->vm_exit; 137 138 error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval); 139 if (error != 0) { 140 EPRINTLN("wrmsr to register %#x(%#lx) on vcpu %d", 141 vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu)); 142 if (get_config_bool("x86.strictmsr")) { 143 vm_inject_gp(vcpu); 144 return (VMEXIT_CONTINUE); 145 } 146 } 147 return (VMEXIT_CONTINUE); 148 } 149 150 static const char * const vmx_exit_reason_desc[] = { 151 [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", 152 [EXIT_REASON_EXT_INTR] = "External interrupt", 153 [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", 154 [EXIT_REASON_INIT] = "INIT signal", 155 [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", 156 [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", 157 [EXIT_REASON_SMI] = "Other SMI", 158 [EXIT_REASON_INTR_WINDOW] = "Interrupt window", 159 [EXIT_REASON_NMI_WINDOW] = "NMI window", 160 [EXIT_REASON_TASK_SWITCH] = "Task switch", 161 [EXIT_REASON_CPUID] = "CPUID", 162 [EXIT_REASON_GETSEC] = "GETSEC", 163 [EXIT_REASON_HLT] = "HLT", 164 [EXIT_REASON_INVD] = "INVD", 165 [EXIT_REASON_INVLPG] = "INVLPG", 166 [EXIT_REASON_RDPMC] = "RDPMC", 167 [EXIT_REASON_RDTSC] = "RDTSC", 168 [EXIT_REASON_RSM] = "RSM", 169 [EXIT_REASON_VMCALL] = "VMCALL", 170 [EXIT_REASON_VMCLEAR] = "VMCLEAR", 171 [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", 172 [EXIT_REASON_VMPTRLD] = "VMPTRLD", 173 [EXIT_REASON_VMPTRST] = "VMPTRST", 174 [EXIT_REASON_VMREAD] = "VMREAD", 175 [EXIT_REASON_VMRESUME] = "VMRESUME", 176 [EXIT_REASON_VMWRITE] = "VMWRITE", 177 [EXIT_REASON_VMXOFF] = "VMXOFF", 178 [EXIT_REASON_VMXON] = "VMXON", 179 [EXIT_REASON_CR_ACCESS] = "Control-register accesses", 180 [EXIT_REASON_DR_ACCESS] = "MOV DR", 181 [EXIT_REASON_INOUT] = "I/O instruction", 182 [EXIT_REASON_RDMSR] = "RDMSR", 183 [EXIT_REASON_WRMSR] = "WRMSR", 184 [EXIT_REASON_INVAL_VMCS] = 185 "VM-entry failure due to invalid guest state", 186 [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", 187 [EXIT_REASON_MWAIT] = "MWAIT", 188 [EXIT_REASON_MTF] = "Monitor trap flag", 189 [EXIT_REASON_MONITOR] = "MONITOR", 190 [EXIT_REASON_PAUSE] = "PAUSE", 191 [EXIT_REASON_MCE_DURING_ENTRY] = 192 "VM-entry failure due to machine-check event", 193 [EXIT_REASON_TPR] = "TPR below threshold", 194 [EXIT_REASON_APIC_ACCESS] = "APIC access", 195 [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", 196 [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", 197 [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", 198 [EXIT_REASON_EPT_FAULT] = "EPT violation", 199 [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", 200 [EXIT_REASON_INVEPT] = "INVEPT", 201 [EXIT_REASON_RDTSCP] = "RDTSCP", 202 [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", 203 [EXIT_REASON_INVVPID] = "INVVPID", 204 [EXIT_REASON_WBINVD] = "WBINVD", 205 [EXIT_REASON_XSETBV] = "XSETBV", 206 [EXIT_REASON_APIC_WRITE] = "APIC write", 207 [EXIT_REASON_RDRAND] = "RDRAND", 208 [EXIT_REASON_INVPCID] = "INVPCID", 209 [EXIT_REASON_VMFUNC] = "VMFUNC", 210 [EXIT_REASON_ENCLS] = "ENCLS", 211 [EXIT_REASON_RDSEED] = "RDSEED", 212 [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", 213 [EXIT_REASON_XSAVES] = "XSAVES", 214 [EXIT_REASON_XRSTORS] = "XRSTORS" 215 }; 216 217 static const char * 218 vmexit_vmx_desc(uint32_t exit_reason) 219 { 220 221 if (exit_reason >= nitems(vmx_exit_reason_desc) || 222 vmx_exit_reason_desc[exit_reason] == NULL) 223 return ("Unknown"); 224 return (vmx_exit_reason_desc[exit_reason]); 225 } 226 227 #define DEBUG_EPT_MISCONFIG 228 #ifdef DEBUG_EPT_MISCONFIG 229 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 230 231 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 232 static int ept_misconfig_ptenum; 233 #endif 234 235 static int 236 vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 237 { 238 struct vm_exit *vme; 239 240 vme = vmrun->vm_exit; 241 242 EPRINTLN("vm exit[%d]", vcpu_id(vcpu)); 243 EPRINTLN("\treason\t\tVMX"); 244 EPRINTLN("\trip\t\t0x%016lx", vme->rip); 245 EPRINTLN("\tinst_length\t%d", vme->inst_length); 246 EPRINTLN("\tstatus\t\t%d", vme->u.vmx.status); 247 EPRINTLN("\texit_reason\t%u (%s)", vme->u.vmx.exit_reason, 248 vmexit_vmx_desc(vme->u.vmx.exit_reason)); 249 EPRINTLN("\tqualification\t0x%016lx", 250 vme->u.vmx.exit_qualification); 251 EPRINTLN("\tinst_type\t\t%d", vme->u.vmx.inst_type); 252 EPRINTLN("\tinst_error\t\t%d", vme->u.vmx.inst_error); 253 #ifdef DEBUG_EPT_MISCONFIG 254 if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 255 vm_get_register(vcpu, 256 VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 257 &ept_misconfig_gpa); 258 vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 259 &ept_misconfig_ptenum); 260 EPRINTLN("\tEPT misconfiguration:"); 261 EPRINTLN("\t\tGPA: %#lx", ept_misconfig_gpa); 262 EPRINTLN("\t\tPTE(%d): %#lx %#lx %#lx %#lx", 263 ept_misconfig_ptenum, ept_misconfig_pte[0], 264 ept_misconfig_pte[1], ept_misconfig_pte[2], 265 ept_misconfig_pte[3]); 266 } 267 #endif /* DEBUG_EPT_MISCONFIG */ 268 return (VMEXIT_ABORT); 269 } 270 271 static int 272 vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) 273 { 274 struct vm_exit *vme; 275 276 vme = vmrun->vm_exit; 277 278 EPRINTLN("vm exit[%d]", vcpu_id(vcpu)); 279 EPRINTLN("\treason\t\tSVM"); 280 EPRINTLN("\trip\t\t0x%016lx", vme->rip); 281 EPRINTLN("\tinst_length\t%d", vme->inst_length); 282 EPRINTLN("\texitcode\t%#lx", vme->u.svm.exitcode); 283 EPRINTLN("\texitinfo1\t%#lx", vme->u.svm.exitinfo1); 284 EPRINTLN("\texitinfo2\t%#lx", vme->u.svm.exitinfo2); 285 return (VMEXIT_ABORT); 286 } 287 288 static int 289 vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 290 struct vm_run *vmrun) 291 { 292 assert(vmrun->vm_exit->inst_length == 0); 293 294 return (VMEXIT_CONTINUE); 295 } 296 297 static int 298 vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 299 struct vm_run *vmrun) 300 { 301 assert(vmrun->vm_exit->inst_length == 0); 302 303 return (VMEXIT_CONTINUE); 304 } 305 306 static int 307 vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 308 struct vm_run *vmrun __unused) 309 { 310 /* 311 * Just continue execution with the next instruction. We use 312 * the HLT VM exit as a way to be friendly with the host 313 * scheduler. 314 */ 315 return (VMEXIT_CONTINUE); 316 } 317 318 static int 319 vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 320 struct vm_run *vmrun __unused) 321 { 322 return (VMEXIT_CONTINUE); 323 } 324 325 static int 326 vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu, 327 struct vm_run *vmrun) 328 { 329 assert(vmrun->vm_exit->inst_length == 0); 330 331 #ifdef BHYVE_SNAPSHOT 332 checkpoint_cpu_suspend(vcpu_id(vcpu)); 333 #endif 334 gdb_cpu_mtrap(vcpu); 335 #ifdef BHYVE_SNAPSHOT 336 checkpoint_cpu_resume(vcpu_id(vcpu)); 337 #endif 338 339 return (VMEXIT_CONTINUE); 340 } 341 342 static int 343 vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu, 344 struct vm_run *vmrun) 345 { 346 struct vm_exit *vme; 347 struct vie *vie; 348 int err, i, cs_d; 349 enum vm_cpu_mode mode; 350 351 vme = vmrun->vm_exit; 352 353 vie = &vme->u.inst_emul.vie; 354 if (!vie->decoded) { 355 /* 356 * Attempt to decode in userspace as a fallback. This allows 357 * updating instruction decode in bhyve without rebooting the 358 * kernel (rapid prototyping), albeit with much slower 359 * emulation. 360 */ 361 vie_restart(vie); 362 mode = vme->u.inst_emul.paging.cpu_mode; 363 cs_d = vme->u.inst_emul.cs_d; 364 if (vmm_decode_instruction(mode, cs_d, vie) != 0) 365 goto fail; 366 if (vm_set_register(vcpu, VM_REG_GUEST_RIP, 367 vme->rip + vie->num_processed) != 0) 368 goto fail; 369 } 370 371 err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie, 372 &vme->u.inst_emul.paging); 373 if (err) { 374 if (err == ESRCH) { 375 EPRINTLN("Unhandled memory access to 0x%lx\n", 376 vme->u.inst_emul.gpa); 377 } 378 goto fail; 379 } 380 381 return (VMEXIT_CONTINUE); 382 383 fail: 384 fprintf(stderr, "Failed to emulate instruction sequence [ "); 385 for (i = 0; i < vie->num_valid; i++) 386 fprintf(stderr, "%02x", vie->inst[i]); 387 FPRINTLN(stderr, " ] at 0x%lx", vme->rip); 388 return (VMEXIT_ABORT); 389 } 390 391 static int 392 vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 393 { 394 struct vm_exit *vme; 395 enum vm_suspend_how how; 396 int vcpuid = vcpu_id(vcpu); 397 398 vme = vmrun->vm_exit; 399 400 how = vme->u.suspended.how; 401 402 fbsdrun_deletecpu(vcpuid); 403 404 switch (how) { 405 case VM_SUSPEND_RESET: 406 exit(0); 407 case VM_SUSPEND_POWEROFF: 408 if (get_config_bool_default("destroy_on_poweroff", false)) 409 vm_destroy(ctx); 410 exit(1); 411 case VM_SUSPEND_HALT: 412 exit(2); 413 case VM_SUSPEND_TRIPLEFAULT: 414 exit(3); 415 default: 416 EPRINTLN("vmexit_suspend: invalid reason %d", how); 417 exit(100); 418 } 419 return (0); /* NOTREACHED */ 420 } 421 422 static int 423 vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu, 424 struct vm_run *vmrun __unused) 425 { 426 427 #ifdef BHYVE_SNAPSHOT 428 checkpoint_cpu_suspend(vcpu_id(vcpu)); 429 #endif 430 gdb_cpu_suspend(vcpu); 431 #ifdef BHYVE_SNAPSHOT 432 checkpoint_cpu_resume(vcpu_id(vcpu)); 433 #endif 434 /* 435 * XXX-MJ sleep for a short period to avoid chewing up the CPU in the 436 * window between activation of the vCPU thread and the STARTUP IPI. 437 */ 438 usleep(1000); 439 return (VMEXIT_CONTINUE); 440 } 441 442 static int 443 vmexit_db(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) 444 { 445 446 #ifdef BHYVE_SNAPSHOT 447 checkpoint_cpu_suspend(vcpu_id(vcpu)); 448 #endif 449 gdb_cpu_debug(vcpu, vmrun->vm_exit); 450 #ifdef BHYVE_SNAPSHOT 451 checkpoint_cpu_resume(vcpu_id(vcpu)); 452 #endif 453 return (VMEXIT_CONTINUE); 454 } 455 456 static int 457 vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu, 458 struct vm_run *vmrun) 459 { 460 gdb_cpu_breakpoint(vcpu, vmrun->vm_exit); 461 return (VMEXIT_CONTINUE); 462 } 463 464 static int 465 vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 466 struct vm_run *vmrun) 467 { 468 struct vm_exit *vme; 469 cpuset_t *dmask; 470 int error = -1; 471 int i; 472 473 dmask = vmrun->cpuset; 474 vme = vmrun->vm_exit; 475 476 switch (vme->u.ipi.mode) { 477 case APIC_DELMODE_INIT: 478 CPU_FOREACH_ISSET(i, dmask) { 479 error = fbsdrun_suspendcpu(i); 480 if (error) { 481 warnx("failed to suspend cpu %d", i); 482 break; 483 } 484 } 485 break; 486 case APIC_DELMODE_STARTUP: 487 CPU_FOREACH_ISSET(i, dmask) { 488 spinup_ap(fbsdrun_vcpu(i), 489 vme->u.ipi.vector << PAGE_SHIFT); 490 } 491 error = 0; 492 break; 493 default: 494 break; 495 } 496 497 return (error); 498 } 499 500 int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *); 501 502 const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = { 503 [VM_EXITCODE_INOUT] = vmexit_inout, 504 [VM_EXITCODE_INOUT_STR] = vmexit_inout, 505 [VM_EXITCODE_VMX] = vmexit_vmx, 506 [VM_EXITCODE_SVM] = vmexit_svm, 507 [VM_EXITCODE_BOGUS] = vmexit_bogus, 508 [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 509 [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 510 [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 511 [VM_EXITCODE_MTRAP] = vmexit_mtrap, 512 [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 513 [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 514 [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 515 [VM_EXITCODE_DEBUG] = vmexit_debug, 516 [VM_EXITCODE_BPT] = vmexit_breakpoint, 517 [VM_EXITCODE_IPI] = vmexit_ipi, 518 [VM_EXITCODE_HLT] = vmexit_hlt, 519 [VM_EXITCODE_PAUSE] = vmexit_pause, 520 [VM_EXITCODE_DB] = vmexit_db, 521 }; 522