1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "opt_bhyve_snapshot.h" 30 #include "opt_ddb.h" 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/sysctl.h> 35 #include <sys/systm.h> 36 #include <sys/pcpu.h> 37 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/segments.h> 42 #include <machine/vmm.h> 43 #include <machine/vmm_snapshot.h> 44 #include "vmm_host.h" 45 #include "vmx_cpufunc.h" 46 #include "vmcs.h" 47 #include "ept.h" 48 #include "vmx.h" 49 50 #ifdef DDB 51 #include <ddb/ddb.h> 52 #endif 53 54 SYSCTL_DECL(_hw_vmm_vmx); 55 56 static int no_flush_rsb; 57 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, 58 &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); 59 60 static uint64_t 61 vmcs_fix_regval(uint32_t encoding, uint64_t val) 62 { 63 64 switch (encoding) { 65 case VMCS_GUEST_CR0: 66 val = vmx_fix_cr0(val); 67 break; 68 case VMCS_GUEST_CR4: 69 val = vmx_fix_cr4(val); 70 break; 71 default: 72 break; 73 } 74 return (val); 75 } 76 77 static uint32_t 78 vmcs_field_encoding(int ident) 79 { 80 switch (ident) { 81 case VM_REG_GUEST_CR0: 82 return (VMCS_GUEST_CR0); 83 case VM_REG_GUEST_CR3: 84 return (VMCS_GUEST_CR3); 85 case VM_REG_GUEST_CR4: 86 return (VMCS_GUEST_CR4); 87 case VM_REG_GUEST_DR7: 88 return (VMCS_GUEST_DR7); 89 case VM_REG_GUEST_RSP: 90 return (VMCS_GUEST_RSP); 91 case VM_REG_GUEST_RIP: 92 return (VMCS_GUEST_RIP); 93 case VM_REG_GUEST_RFLAGS: 94 return (VMCS_GUEST_RFLAGS); 95 case VM_REG_GUEST_ES: 96 return (VMCS_GUEST_ES_SELECTOR); 97 case VM_REG_GUEST_CS: 98 return (VMCS_GUEST_CS_SELECTOR); 99 case VM_REG_GUEST_SS: 100 return (VMCS_GUEST_SS_SELECTOR); 101 case VM_REG_GUEST_DS: 102 return (VMCS_GUEST_DS_SELECTOR); 103 case VM_REG_GUEST_FS: 104 return (VMCS_GUEST_FS_SELECTOR); 105 case VM_REG_GUEST_GS: 106 return (VMCS_GUEST_GS_SELECTOR); 107 case VM_REG_GUEST_TR: 108 return (VMCS_GUEST_TR_SELECTOR); 109 case VM_REG_GUEST_LDTR: 110 return (VMCS_GUEST_LDTR_SELECTOR); 111 case VM_REG_GUEST_EFER: 112 return (VMCS_GUEST_IA32_EFER); 113 case VM_REG_GUEST_PDPTE0: 114 return (VMCS_GUEST_PDPTE0); 115 case VM_REG_GUEST_PDPTE1: 116 return (VMCS_GUEST_PDPTE1); 117 case VM_REG_GUEST_PDPTE2: 118 return (VMCS_GUEST_PDPTE2); 119 case VM_REG_GUEST_PDPTE3: 120 return (VMCS_GUEST_PDPTE3); 121 case VM_REG_GUEST_ENTRY_INST_LENGTH: 122 return (VMCS_ENTRY_INST_LENGTH); 123 default: 124 return (-1); 125 } 126 127 } 128 129 static int 130 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) 131 { 132 133 switch (seg) { 134 case VM_REG_GUEST_ES: 135 *base = VMCS_GUEST_ES_BASE; 136 *lim = VMCS_GUEST_ES_LIMIT; 137 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS; 138 break; 139 case VM_REG_GUEST_CS: 140 *base = VMCS_GUEST_CS_BASE; 141 *lim = VMCS_GUEST_CS_LIMIT; 142 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS; 143 break; 144 case VM_REG_GUEST_SS: 145 *base = VMCS_GUEST_SS_BASE; 146 *lim = VMCS_GUEST_SS_LIMIT; 147 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS; 148 break; 149 case VM_REG_GUEST_DS: 150 *base = VMCS_GUEST_DS_BASE; 151 *lim = VMCS_GUEST_DS_LIMIT; 152 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS; 153 break; 154 case VM_REG_GUEST_FS: 155 *base = VMCS_GUEST_FS_BASE; 156 *lim = VMCS_GUEST_FS_LIMIT; 157 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS; 158 break; 159 case VM_REG_GUEST_GS: 160 *base = VMCS_GUEST_GS_BASE; 161 *lim = VMCS_GUEST_GS_LIMIT; 162 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS; 163 break; 164 case VM_REG_GUEST_TR: 165 *base = VMCS_GUEST_TR_BASE; 166 *lim = VMCS_GUEST_TR_LIMIT; 167 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS; 168 break; 169 case VM_REG_GUEST_LDTR: 170 *base = VMCS_GUEST_LDTR_BASE; 171 *lim = VMCS_GUEST_LDTR_LIMIT; 172 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS; 173 break; 174 case VM_REG_GUEST_IDTR: 175 *base = VMCS_GUEST_IDTR_BASE; 176 *lim = VMCS_GUEST_IDTR_LIMIT; 177 *acc = VMCS_INVALID_ENCODING; 178 break; 179 case VM_REG_GUEST_GDTR: 180 *base = VMCS_GUEST_GDTR_BASE; 181 *lim = VMCS_GUEST_GDTR_LIMIT; 182 *acc = VMCS_INVALID_ENCODING; 183 break; 184 default: 185 return (EINVAL); 186 } 187 188 return (0); 189 } 190 191 int 192 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) 193 { 194 int error; 195 uint32_t encoding; 196 197 /* 198 * If we need to get at vmx-specific state in the VMCS we can bypass 199 * the translation of 'ident' to 'encoding' by simply setting the 200 * sign bit. As it so happens the upper 16 bits are reserved (i.e 201 * set to 0) in the encodings for the VMCS so we are free to use the 202 * sign bit. 203 */ 204 if (ident < 0) 205 encoding = ident & 0x7fffffff; 206 else 207 encoding = vmcs_field_encoding(ident); 208 209 if (encoding == (uint32_t)-1) 210 return (EINVAL); 211 212 if (!running) 213 VMPTRLD(vmcs); 214 215 error = vmread(encoding, retval); 216 217 if (!running) 218 VMCLEAR(vmcs); 219 220 return (error); 221 } 222 223 int 224 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) 225 { 226 int error; 227 uint32_t encoding; 228 229 if (ident < 0) 230 encoding = ident & 0x7fffffff; 231 else 232 encoding = vmcs_field_encoding(ident); 233 234 if (encoding == (uint32_t)-1) 235 return (EINVAL); 236 237 val = vmcs_fix_regval(encoding, val); 238 239 if (!running) 240 VMPTRLD(vmcs); 241 242 error = vmwrite(encoding, val); 243 244 if (!running) 245 VMCLEAR(vmcs); 246 247 return (error); 248 } 249 250 int 251 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 252 { 253 int error; 254 uint32_t base, limit, access; 255 256 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 257 if (error != 0) 258 panic("vmcs_setdesc: invalid segment register %d", seg); 259 260 if (!running) 261 VMPTRLD(vmcs); 262 if ((error = vmwrite(base, desc->base)) != 0) 263 goto done; 264 265 if ((error = vmwrite(limit, desc->limit)) != 0) 266 goto done; 267 268 if (access != VMCS_INVALID_ENCODING) { 269 if ((error = vmwrite(access, desc->access)) != 0) 270 goto done; 271 } 272 done: 273 if (!running) 274 VMCLEAR(vmcs); 275 return (error); 276 } 277 278 int 279 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 280 { 281 int error; 282 uint32_t base, limit, access; 283 uint64_t u64; 284 285 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 286 if (error != 0) 287 panic("vmcs_getdesc: invalid segment register %d", seg); 288 289 if (!running) 290 VMPTRLD(vmcs); 291 if ((error = vmread(base, &u64)) != 0) 292 goto done; 293 desc->base = u64; 294 295 if ((error = vmread(limit, &u64)) != 0) 296 goto done; 297 desc->limit = u64; 298 299 if (access != VMCS_INVALID_ENCODING) { 300 if ((error = vmread(access, &u64)) != 0) 301 goto done; 302 desc->access = u64; 303 } 304 done: 305 if (!running) 306 VMCLEAR(vmcs); 307 return (error); 308 } 309 310 int 311 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) 312 { 313 int error; 314 315 VMPTRLD(vmcs); 316 317 /* 318 * Guest MSRs are saved in the VM-exit MSR-store area. 319 * Guest MSRs are loaded from the VM-entry MSR-load area. 320 * Both areas point to the same location in memory. 321 */ 322 if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) 323 goto done; 324 if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) 325 goto done; 326 327 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) 328 goto done; 329 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) 330 goto done; 331 332 error = 0; 333 done: 334 VMCLEAR(vmcs); 335 return (error); 336 } 337 338 int 339 vmcs_init(struct vmcs *vmcs) 340 { 341 int error, codesel, datasel, tsssel; 342 u_long cr0, cr4, efer; 343 uint64_t pat, fsbase, idtrbase; 344 345 codesel = vmm_get_host_codesel(); 346 datasel = vmm_get_host_datasel(); 347 tsssel = vmm_get_host_tsssel(); 348 349 /* 350 * Make sure we have a "current" VMCS to work with. 351 */ 352 VMPTRLD(vmcs); 353 354 /* Host state */ 355 356 /* Initialize host IA32_PAT MSR */ 357 pat = vmm_get_host_pat(); 358 if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) 359 goto done; 360 361 /* Load the IA32_EFER MSR */ 362 efer = vmm_get_host_efer(); 363 if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) 364 goto done; 365 366 /* Load the control registers */ 367 368 cr0 = vmm_get_host_cr0(); 369 if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) 370 goto done; 371 372 cr4 = vmm_get_host_cr4() | CR4_VMXE; 373 if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) 374 goto done; 375 376 /* Load the segment selectors */ 377 if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) 378 goto done; 379 380 if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) 381 goto done; 382 383 if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) 384 goto done; 385 386 if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) 387 goto done; 388 389 if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) 390 goto done; 391 392 if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) 393 goto done; 394 395 if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) 396 goto done; 397 398 /* 399 * Load the Base-Address for %fs and idtr. 400 * 401 * Note that we exclude %gs, tss and gdtr here because their base 402 * address is pcpu specific. 403 */ 404 fsbase = vmm_get_host_fsbase(); 405 if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) 406 goto done; 407 408 idtrbase = vmm_get_host_idtrbase(); 409 if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) 410 goto done; 411 412 /* instruction pointer */ 413 if (no_flush_rsb) { 414 if ((error = vmwrite(VMCS_HOST_RIP, 415 (u_long)vmx_exit_guest)) != 0) 416 goto done; 417 } else { 418 if ((error = vmwrite(VMCS_HOST_RIP, 419 (u_long)vmx_exit_guest_flush_rsb)) != 0) 420 goto done; 421 } 422 423 /* link pointer */ 424 if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) 425 goto done; 426 done: 427 VMCLEAR(vmcs); 428 return (error); 429 } 430 431 #ifdef BHYVE_SNAPSHOT 432 int 433 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val) 434 { 435 int error; 436 437 if (!running) 438 VMPTRLD(vmcs); 439 440 error = vmread(ident, val); 441 442 if (!running) 443 VMCLEAR(vmcs); 444 445 return (error); 446 } 447 448 int 449 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val) 450 { 451 int error; 452 453 if (!running) 454 VMPTRLD(vmcs); 455 456 error = vmwrite(ident, val); 457 458 if (!running) 459 VMCLEAR(vmcs); 460 461 return (error); 462 } 463 464 int 465 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident, 466 struct vm_snapshot_meta *meta) 467 { 468 int ret; 469 uint64_t val; 470 471 if (meta->op == VM_SNAPSHOT_SAVE) { 472 ret = vmcs_getreg(vmcs, running, ident, &val); 473 if (ret != 0) 474 goto done; 475 476 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 477 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 478 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 479 480 ret = vmcs_setreg(vmcs, running, ident, val); 481 if (ret != 0) 482 goto done; 483 } else { 484 ret = EINVAL; 485 goto done; 486 } 487 488 done: 489 return (ret); 490 } 491 492 int 493 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg, 494 struct vm_snapshot_meta *meta) 495 { 496 int ret; 497 struct seg_desc desc; 498 499 if (meta->op == VM_SNAPSHOT_SAVE) { 500 ret = vmcs_getdesc(vmcs, running, seg, &desc); 501 if (ret != 0) 502 goto done; 503 504 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 505 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 506 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 507 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 508 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 509 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 510 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 511 512 ret = vmcs_setdesc(vmcs, running, seg, &desc); 513 if (ret != 0) 514 goto done; 515 } else { 516 ret = EINVAL; 517 goto done; 518 } 519 520 done: 521 return (ret); 522 } 523 524 int 525 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident, 526 struct vm_snapshot_meta *meta) 527 { 528 int ret; 529 uint64_t val; 530 531 if (meta->op == VM_SNAPSHOT_SAVE) { 532 ret = vmcs_getany(vmcs, running, ident, &val); 533 if (ret != 0) 534 goto done; 535 536 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 537 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 538 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 539 540 ret = vmcs_setany(vmcs, running, ident, val); 541 if (ret != 0) 542 goto done; 543 } else { 544 ret = EINVAL; 545 goto done; 546 } 547 548 done: 549 return (ret); 550 } 551 #endif 552 553 #ifdef DDB 554 extern int vmxon_enabled[]; 555 556 DB_SHOW_COMMAND(vmcs, db_show_vmcs) 557 { 558 uint64_t cur_vmcs, val; 559 uint32_t exit; 560 561 if (!vmxon_enabled[curcpu]) { 562 db_printf("VMX not enabled\n"); 563 return; 564 } 565 566 if (have_addr) { 567 db_printf("Only current VMCS supported\n"); 568 return; 569 } 570 571 vmptrst(&cur_vmcs); 572 if (cur_vmcs == VMCS_INITIAL) { 573 db_printf("No current VM context\n"); 574 return; 575 } 576 db_printf("VMCS: %jx\n", cur_vmcs); 577 db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); 578 db_printf("Activity: "); 579 val = vmcs_read(VMCS_GUEST_ACTIVITY); 580 switch (val) { 581 case 0: 582 db_printf("Active"); 583 break; 584 case 1: 585 db_printf("HLT"); 586 break; 587 case 2: 588 db_printf("Shutdown"); 589 break; 590 case 3: 591 db_printf("Wait for SIPI"); 592 break; 593 default: 594 db_printf("Unknown: %#lx", val); 595 } 596 db_printf("\n"); 597 exit = vmcs_read(VMCS_EXIT_REASON); 598 if (exit & 0x80000000) 599 db_printf("Entry Failure Reason: %u\n", exit & 0xffff); 600 else 601 db_printf("Exit Reason: %u\n", exit & 0xffff); 602 db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); 603 db_printf("Guest Linear Address: %#lx\n", 604 vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); 605 switch (exit & 0x8000ffff) { 606 case EXIT_REASON_EXCEPTION: 607 case EXIT_REASON_EXT_INTR: 608 val = vmcs_read(VMCS_EXIT_INTR_INFO); 609 db_printf("Interrupt Type: "); 610 switch (val >> 8 & 0x7) { 611 case 0: 612 db_printf("external"); 613 break; 614 case 2: 615 db_printf("NMI"); 616 break; 617 case 3: 618 db_printf("HW exception"); 619 break; 620 case 4: 621 db_printf("SW exception"); 622 break; 623 default: 624 db_printf("?? %lu", val >> 8 & 0x7); 625 break; 626 } 627 db_printf(" Vector: %lu", val & 0xff); 628 if (val & 0x800) 629 db_printf(" Error Code: %lx", 630 vmcs_read(VMCS_EXIT_INTR_ERRCODE)); 631 db_printf("\n"); 632 break; 633 case EXIT_REASON_EPT_FAULT: 634 case EXIT_REASON_EPT_MISCONFIG: 635 db_printf("Guest Physical Address: %#lx\n", 636 vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); 637 break; 638 } 639 db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); 640 } 641 #endif 642