1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 31 #include <machine/vmm.h> 32 #include <machine/vmm_dev.h> 33 #include <machine/vmm_instruction_emul.h> 34 #include <amd64/vmm/intel/vmcs.h> 35 #include <x86/apicreg.h> 36 37 #include <assert.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <stdlib.h> 41 #include <strings.h> 42 #include <unistd.h> 43 44 #include <vmmapi.h> 45 46 #include "bhyverun.h" 47 #include "config.h" 48 #include "debug.h" 49 #include "gdb.h" 50 #include "inout.h" 51 #include "mem.h" 52 #ifdef BHYVE_SNAPSHOT 53 #include "snapshot.h" 54 #endif 55 #include "spinup_ap.h" 56 #include "vmexit.h" 57 #include "xmsr.h" 58 59 void 60 vm_inject_fault(struct vcpu *vcpu, int vector, int errcode_valid, 61 int errcode) 62 { 63 int error, restart_instruction; 64 65 restart_instruction = 1; 66 67 error = vm_inject_exception(vcpu, vector, errcode_valid, errcode, 68 restart_instruction); 69 assert(error == 0); 70 } 71 72 static int 73 vmexit_inout(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 74 { 75 struct vm_exit *vme; 76 int error; 77 int bytes, port, in; 78 79 vme = vmrun->vm_exit; 80 port = vme->u.inout.port; 81 bytes = vme->u.inout.bytes; 82 in = vme->u.inout.in; 83 84 error = emulate_inout(ctx, vcpu, vme); 85 if (error) { 86 EPRINTLN("Unhandled %s%c 0x%04x at 0x%lx", 87 in ? "in" : "out", 88 bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), 89 port, vme->rip); 90 return (VMEXIT_ABORT); 91 } else { 92 return (VMEXIT_CONTINUE); 93 } 94 } 95 96 static int 97 vmexit_rdmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, 98 struct vm_run *vmrun) 99 { 100 struct vm_exit *vme; 101 uint64_t val; 102 uint32_t eax, edx; 103 int error; 104 105 vme = vmrun->vm_exit; 106 107 val = 0; 108 error = emulate_rdmsr(vcpu, vme->u.msr.code, &val); 109 if (error != 0) { 110 if (get_config_bool("x86.strictmsr") || 111 get_config_bool("x86.verbosemsr")) { 112 EPRINTLN("rdmsr to register %#x on vcpu %d", 113 vme->u.msr.code, vcpu_id(vcpu)); 114 } 115 if (get_config_bool("x86.strictmsr")) { 116 vm_inject_gp(vcpu); 117 return (VMEXIT_CONTINUE); 118 } 119 } 120 121 eax = val; 122 error = vm_set_register(vcpu, VM_REG_GUEST_RAX, eax); 123 assert(error == 0); 124 125 edx = val >> 32; 126 error = vm_set_register(vcpu, VM_REG_GUEST_RDX, edx); 127 assert(error == 0); 128 129 return (VMEXIT_CONTINUE); 130 } 131 132 static int 133 vmexit_wrmsr(struct vmctx *ctx __unused, struct vcpu *vcpu, 134 struct vm_run *vmrun) 135 { 136 struct vm_exit *vme; 137 int error; 138 139 vme = vmrun->vm_exit; 140 141 error = emulate_wrmsr(vcpu, vme->u.msr.code, vme->u.msr.wval); 142 if (error != 0) { 143 if (get_config_bool("x86.strictmsr") || 144 get_config_bool("x86.verbosemsr")) { 145 EPRINTLN("wrmsr to register %#x(%#lx) on vcpu %d", 146 vme->u.msr.code, vme->u.msr.wval, vcpu_id(vcpu)); 147 } 148 if (get_config_bool("x86.strictmsr")) { 149 vm_inject_gp(vcpu); 150 return (VMEXIT_CONTINUE); 151 } 152 } 153 return (VMEXIT_CONTINUE); 154 } 155 156 static const char * const vmx_exit_reason_desc[] = { 157 [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", 158 [EXIT_REASON_EXT_INTR] = "External interrupt", 159 [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", 160 [EXIT_REASON_INIT] = "INIT signal", 161 [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", 162 [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", 163 [EXIT_REASON_SMI] = "Other SMI", 164 [EXIT_REASON_INTR_WINDOW] = "Interrupt window", 165 [EXIT_REASON_NMI_WINDOW] = "NMI window", 166 [EXIT_REASON_TASK_SWITCH] = "Task switch", 167 [EXIT_REASON_CPUID] = "CPUID", 168 [EXIT_REASON_GETSEC] = "GETSEC", 169 [EXIT_REASON_HLT] = "HLT", 170 [EXIT_REASON_INVD] = "INVD", 171 [EXIT_REASON_INVLPG] = "INVLPG", 172 [EXIT_REASON_RDPMC] = "RDPMC", 173 [EXIT_REASON_RDTSC] = "RDTSC", 174 [EXIT_REASON_RSM] = "RSM", 175 [EXIT_REASON_VMCALL] = "VMCALL", 176 [EXIT_REASON_VMCLEAR] = "VMCLEAR", 177 [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", 178 [EXIT_REASON_VMPTRLD] = "VMPTRLD", 179 [EXIT_REASON_VMPTRST] = "VMPTRST", 180 [EXIT_REASON_VMREAD] = "VMREAD", 181 [EXIT_REASON_VMRESUME] = "VMRESUME", 182 [EXIT_REASON_VMWRITE] = "VMWRITE", 183 [EXIT_REASON_VMXOFF] = "VMXOFF", 184 [EXIT_REASON_VMXON] = "VMXON", 185 [EXIT_REASON_CR_ACCESS] = "Control-register accesses", 186 [EXIT_REASON_DR_ACCESS] = "MOV DR", 187 [EXIT_REASON_INOUT] = "I/O instruction", 188 [EXIT_REASON_RDMSR] = "RDMSR", 189 [EXIT_REASON_WRMSR] = "WRMSR", 190 [EXIT_REASON_INVAL_VMCS] = 191 "VM-entry failure due to invalid guest state", 192 [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", 193 [EXIT_REASON_MWAIT] = "MWAIT", 194 [EXIT_REASON_MTF] = "Monitor trap flag", 195 [EXIT_REASON_MONITOR] = "MONITOR", 196 [EXIT_REASON_PAUSE] = "PAUSE", 197 [EXIT_REASON_MCE_DURING_ENTRY] = 198 "VM-entry failure due to machine-check event", 199 [EXIT_REASON_TPR] = "TPR below threshold", 200 [EXIT_REASON_APIC_ACCESS] = "APIC access", 201 [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", 202 [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", 203 [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", 204 [EXIT_REASON_EPT_FAULT] = "EPT violation", 205 [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", 206 [EXIT_REASON_INVEPT] = "INVEPT", 207 [EXIT_REASON_RDTSCP] = "RDTSCP", 208 [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", 209 [EXIT_REASON_INVVPID] = "INVVPID", 210 [EXIT_REASON_WBINVD] = "WBINVD", 211 [EXIT_REASON_XSETBV] = "XSETBV", 212 [EXIT_REASON_APIC_WRITE] = "APIC write", 213 [EXIT_REASON_RDRAND] = "RDRAND", 214 [EXIT_REASON_INVPCID] = "INVPCID", 215 [EXIT_REASON_VMFUNC] = "VMFUNC", 216 [EXIT_REASON_ENCLS] = "ENCLS", 217 [EXIT_REASON_RDSEED] = "RDSEED", 218 [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", 219 [EXIT_REASON_XSAVES] = "XSAVES", 220 [EXIT_REASON_XRSTORS] = "XRSTORS" 221 }; 222 223 static const char * 224 vmexit_vmx_desc(uint32_t exit_reason) 225 { 226 227 if (exit_reason >= nitems(vmx_exit_reason_desc) || 228 vmx_exit_reason_desc[exit_reason] == NULL) 229 return ("Unknown"); 230 return (vmx_exit_reason_desc[exit_reason]); 231 } 232 233 #define DEBUG_EPT_MISCONFIG 234 #ifdef DEBUG_EPT_MISCONFIG 235 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 236 237 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; 238 static int ept_misconfig_ptenum; 239 #endif 240 241 static int 242 vmexit_vmx(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 243 { 244 struct vm_exit *vme; 245 246 vme = vmrun->vm_exit; 247 248 EPRINTLN("vm exit[%d]", vcpu_id(vcpu)); 249 EPRINTLN("\treason\t\tVMX"); 250 EPRINTLN("\trip\t\t0x%016lx", vme->rip); 251 EPRINTLN("\tinst_length\t%d", vme->inst_length); 252 EPRINTLN("\tstatus\t\t%d", vme->u.vmx.status); 253 EPRINTLN("\texit_reason\t%u (%s)", vme->u.vmx.exit_reason, 254 vmexit_vmx_desc(vme->u.vmx.exit_reason)); 255 EPRINTLN("\tqualification\t0x%016lx", 256 vme->u.vmx.exit_qualification); 257 EPRINTLN("\tinst_type\t\t%d", vme->u.vmx.inst_type); 258 EPRINTLN("\tinst_error\t\t%d", vme->u.vmx.inst_error); 259 #ifdef DEBUG_EPT_MISCONFIG 260 if (vme->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { 261 vm_get_register(vcpu, 262 VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), 263 &ept_misconfig_gpa); 264 vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, 265 &ept_misconfig_ptenum); 266 EPRINTLN("\tEPT misconfiguration:"); 267 EPRINTLN("\t\tGPA: %#lx", ept_misconfig_gpa); 268 EPRINTLN("\t\tPTE(%d): %#lx %#lx %#lx %#lx", 269 ept_misconfig_ptenum, ept_misconfig_pte[0], 270 ept_misconfig_pte[1], ept_misconfig_pte[2], 271 ept_misconfig_pte[3]); 272 } 273 #endif /* DEBUG_EPT_MISCONFIG */ 274 return (VMEXIT_ABORT); 275 } 276 277 static int 278 vmexit_svm(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) 279 { 280 struct vm_exit *vme; 281 282 vme = vmrun->vm_exit; 283 284 EPRINTLN("vm exit[%d]", vcpu_id(vcpu)); 285 EPRINTLN("\treason\t\tSVM"); 286 EPRINTLN("\trip\t\t0x%016lx", vme->rip); 287 EPRINTLN("\tinst_length\t%d", vme->inst_length); 288 EPRINTLN("\texitcode\t%#lx", vme->u.svm.exitcode); 289 EPRINTLN("\texitinfo1\t%#lx", vme->u.svm.exitinfo1); 290 EPRINTLN("\texitinfo2\t%#lx", vme->u.svm.exitinfo2); 291 return (VMEXIT_ABORT); 292 } 293 294 static int 295 vmexit_bogus(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 296 struct vm_run *vmrun) 297 { 298 assert(vmrun->vm_exit->inst_length == 0); 299 300 return (VMEXIT_CONTINUE); 301 } 302 303 static int 304 vmexit_reqidle(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 305 struct vm_run *vmrun) 306 { 307 assert(vmrun->vm_exit->inst_length == 0); 308 309 return (VMEXIT_CONTINUE); 310 } 311 312 static int 313 vmexit_hlt(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 314 struct vm_run *vmrun __unused) 315 { 316 /* 317 * Just continue execution with the next instruction. We use 318 * the HLT VM exit as a way to be friendly with the host 319 * scheduler. 320 */ 321 return (VMEXIT_CONTINUE); 322 } 323 324 static int 325 vmexit_pause(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 326 struct vm_run *vmrun __unused) 327 { 328 return (VMEXIT_CONTINUE); 329 } 330 331 static int 332 vmexit_mtrap(struct vmctx *ctx __unused, struct vcpu *vcpu, 333 struct vm_run *vmrun) 334 { 335 assert(vmrun->vm_exit->inst_length == 0); 336 337 #ifdef BHYVE_SNAPSHOT 338 checkpoint_cpu_suspend(vcpu_id(vcpu)); 339 #endif 340 gdb_cpu_mtrap(vcpu); 341 #ifdef BHYVE_SNAPSHOT 342 checkpoint_cpu_resume(vcpu_id(vcpu)); 343 #endif 344 345 return (VMEXIT_CONTINUE); 346 } 347 348 static int 349 vmexit_inst_emul(struct vmctx *ctx __unused, struct vcpu *vcpu, 350 struct vm_run *vmrun) 351 { 352 struct vm_exit *vme; 353 struct vie *vie; 354 int err, i, cs_d; 355 enum vm_cpu_mode mode; 356 357 vme = vmrun->vm_exit; 358 359 vie = &vme->u.inst_emul.vie; 360 if (!vie->decoded) { 361 /* 362 * Attempt to decode in userspace as a fallback. This allows 363 * updating instruction decode in bhyve without rebooting the 364 * kernel (rapid prototyping), albeit with much slower 365 * emulation. 366 */ 367 vie_restart(vie); 368 mode = vme->u.inst_emul.paging.cpu_mode; 369 cs_d = vme->u.inst_emul.cs_d; 370 if (vmm_decode_instruction(mode, cs_d, vie) != 0) 371 goto fail; 372 if (vm_set_register(vcpu, VM_REG_GUEST_RIP, 373 vme->rip + vie->num_processed) != 0) 374 goto fail; 375 } 376 377 err = emulate_mem(vcpu, vme->u.inst_emul.gpa, vie, 378 &vme->u.inst_emul.paging); 379 if (err) { 380 if (err == ESRCH) { 381 EPRINTLN("Unhandled memory access to 0x%lx\n", 382 vme->u.inst_emul.gpa); 383 } 384 goto fail; 385 } 386 387 return (VMEXIT_CONTINUE); 388 389 fail: 390 fprintf(stderr, "Failed to emulate instruction sequence [ "); 391 for (i = 0; i < vie->num_valid; i++) 392 fprintf(stderr, "%02x", vie->inst[i]); 393 FPRINTLN(stderr, " ] at 0x%lx", vme->rip); 394 return (VMEXIT_ABORT); 395 } 396 397 static int 398 vmexit_suspend(struct vmctx *ctx, struct vcpu *vcpu, struct vm_run *vmrun) 399 { 400 struct vm_exit *vme; 401 enum vm_suspend_how how; 402 int vcpuid = vcpu_id(vcpu); 403 404 vme = vmrun->vm_exit; 405 406 how = vme->u.suspended.how; 407 408 fbsdrun_deletecpu(vcpuid); 409 410 switch (how) { 411 case VM_SUSPEND_RESET: 412 exit(0); 413 case VM_SUSPEND_POWEROFF: 414 if (get_config_bool_default("destroy_on_poweroff", false)) 415 vm_destroy(ctx); 416 exit(1); 417 case VM_SUSPEND_HALT: 418 exit(2); 419 case VM_SUSPEND_TRIPLEFAULT: 420 exit(3); 421 default: 422 EPRINTLN("vmexit_suspend: invalid reason %d", how); 423 exit(100); 424 } 425 return (0); /* NOTREACHED */ 426 } 427 428 static int 429 vmexit_debug(struct vmctx *ctx __unused, struct vcpu *vcpu, 430 struct vm_run *vmrun __unused) 431 { 432 433 #ifdef BHYVE_SNAPSHOT 434 checkpoint_cpu_suspend(vcpu_id(vcpu)); 435 #endif 436 gdb_cpu_suspend(vcpu); 437 #ifdef BHYVE_SNAPSHOT 438 checkpoint_cpu_resume(vcpu_id(vcpu)); 439 #endif 440 /* 441 * XXX-MJ sleep for a short period to avoid chewing up the CPU in the 442 * window between activation of the vCPU thread and the STARTUP IPI. 443 */ 444 usleep(1000); 445 return (VMEXIT_CONTINUE); 446 } 447 448 static int 449 vmexit_db(struct vmctx *ctx __unused, struct vcpu *vcpu, struct vm_run *vmrun) 450 { 451 452 #ifdef BHYVE_SNAPSHOT 453 checkpoint_cpu_suspend(vcpu_id(vcpu)); 454 #endif 455 gdb_cpu_debug(vcpu, vmrun->vm_exit); 456 #ifdef BHYVE_SNAPSHOT 457 checkpoint_cpu_resume(vcpu_id(vcpu)); 458 #endif 459 return (VMEXIT_CONTINUE); 460 } 461 462 static int 463 vmexit_breakpoint(struct vmctx *ctx __unused, struct vcpu *vcpu, 464 struct vm_run *vmrun) 465 { 466 gdb_cpu_breakpoint(vcpu, vmrun->vm_exit); 467 return (VMEXIT_CONTINUE); 468 } 469 470 static int 471 vmexit_ipi(struct vmctx *ctx __unused, struct vcpu *vcpu __unused, 472 struct vm_run *vmrun) 473 { 474 struct vm_exit *vme; 475 cpuset_t *dmask; 476 int error = -1; 477 int i; 478 479 dmask = vmrun->cpuset; 480 vme = vmrun->vm_exit; 481 482 switch (vme->u.ipi.mode) { 483 case APIC_DELMODE_INIT: 484 CPU_FOREACH_ISSET(i, dmask) { 485 error = fbsdrun_suspendcpu(i); 486 if (error) { 487 warnx("failed to suspend cpu %d", i); 488 break; 489 } 490 } 491 break; 492 case APIC_DELMODE_STARTUP: 493 CPU_FOREACH_ISSET(i, dmask) { 494 spinup_ap(fbsdrun_vcpu(i), 495 vme->u.ipi.vector << PAGE_SHIFT); 496 } 497 error = 0; 498 break; 499 default: 500 break; 501 } 502 503 return (error); 504 } 505 506 int vmexit_task_switch(struct vmctx *, struct vcpu *, struct vm_run *); 507 508 const vmexit_handler_t vmexit_handlers[VM_EXITCODE_MAX] = { 509 [VM_EXITCODE_INOUT] = vmexit_inout, 510 [VM_EXITCODE_INOUT_STR] = vmexit_inout, 511 [VM_EXITCODE_VMX] = vmexit_vmx, 512 [VM_EXITCODE_SVM] = vmexit_svm, 513 [VM_EXITCODE_BOGUS] = vmexit_bogus, 514 [VM_EXITCODE_REQIDLE] = vmexit_reqidle, 515 [VM_EXITCODE_RDMSR] = vmexit_rdmsr, 516 [VM_EXITCODE_WRMSR] = vmexit_wrmsr, 517 [VM_EXITCODE_MTRAP] = vmexit_mtrap, 518 [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, 519 [VM_EXITCODE_SUSPENDED] = vmexit_suspend, 520 [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, 521 [VM_EXITCODE_DEBUG] = vmexit_debug, 522 [VM_EXITCODE_BPT] = vmexit_breakpoint, 523 [VM_EXITCODE_IPI] = vmexit_ipi, 524 [VM_EXITCODE_HLT] = vmexit_hlt, 525 [VM_EXITCODE_PAUSE] = vmexit_pause, 526 [VM_EXITCODE_DB] = vmexit_db, 527 }; 528