1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "opt_bhyve_snapshot.h" 30 #include "opt_ddb.h" 31 32 #include <sys/param.h> 33 #include <sys/sysctl.h> 34 #include <sys/systm.h> 35 #include <sys/pcpu.h> 36 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <machine/segments.h> 41 #include <machine/vmm.h> 42 #include <machine/vmm_snapshot.h> 43 #include "vmm_host.h" 44 #include "vmx_cpufunc.h" 45 #include "vmcs.h" 46 #include "ept.h" 47 #include "vmx.h" 48 49 #ifdef DDB 50 #include <ddb/ddb.h> 51 #endif 52 53 SYSCTL_DECL(_hw_vmm_vmx); 54 55 static int no_flush_rsb; 56 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, 57 &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); 58 59 static uint64_t 60 vmcs_fix_regval(uint32_t encoding, uint64_t val) 61 { 62 63 switch (encoding) { 64 case VMCS_GUEST_CR0: 65 val = vmx_fix_cr0(val); 66 break; 67 case VMCS_GUEST_CR4: 68 val = vmx_fix_cr4(val); 69 break; 70 default: 71 break; 72 } 73 return (val); 74 } 75 76 static uint32_t 77 vmcs_field_encoding(int ident) 78 { 79 switch (ident) { 80 case VM_REG_GUEST_CR0: 81 return (VMCS_GUEST_CR0); 82 case VM_REG_GUEST_CR3: 83 return (VMCS_GUEST_CR3); 84 case VM_REG_GUEST_CR4: 85 return (VMCS_GUEST_CR4); 86 case VM_REG_GUEST_DR7: 87 return (VMCS_GUEST_DR7); 88 case VM_REG_GUEST_RSP: 89 return (VMCS_GUEST_RSP); 90 case VM_REG_GUEST_RIP: 91 return (VMCS_GUEST_RIP); 92 case VM_REG_GUEST_RFLAGS: 93 return (VMCS_GUEST_RFLAGS); 94 case VM_REG_GUEST_ES: 95 return (VMCS_GUEST_ES_SELECTOR); 96 case VM_REG_GUEST_CS: 97 return (VMCS_GUEST_CS_SELECTOR); 98 case VM_REG_GUEST_SS: 99 return (VMCS_GUEST_SS_SELECTOR); 100 case VM_REG_GUEST_DS: 101 return (VMCS_GUEST_DS_SELECTOR); 102 case VM_REG_GUEST_FS: 103 return (VMCS_GUEST_FS_SELECTOR); 104 case VM_REG_GUEST_GS: 105 return (VMCS_GUEST_GS_SELECTOR); 106 case VM_REG_GUEST_TR: 107 return (VMCS_GUEST_TR_SELECTOR); 108 case VM_REG_GUEST_LDTR: 109 return (VMCS_GUEST_LDTR_SELECTOR); 110 case VM_REG_GUEST_EFER: 111 return (VMCS_GUEST_IA32_EFER); 112 case VM_REG_GUEST_PDPTE0: 113 return (VMCS_GUEST_PDPTE0); 114 case VM_REG_GUEST_PDPTE1: 115 return (VMCS_GUEST_PDPTE1); 116 case VM_REG_GUEST_PDPTE2: 117 return (VMCS_GUEST_PDPTE2); 118 case VM_REG_GUEST_PDPTE3: 119 return (VMCS_GUEST_PDPTE3); 120 case VM_REG_GUEST_ENTRY_INST_LENGTH: 121 return (VMCS_ENTRY_INST_LENGTH); 122 case VM_REG_GUEST_FS_BASE: 123 return (VMCS_GUEST_FS_BASE); 124 case VM_REG_GUEST_GS_BASE: 125 return (VMCS_GUEST_GS_BASE); 126 default: 127 return (-1); 128 } 129 } 130 131 static int 132 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) 133 { 134 135 switch (seg) { 136 case VM_REG_GUEST_ES: 137 *base = VMCS_GUEST_ES_BASE; 138 *lim = VMCS_GUEST_ES_LIMIT; 139 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS; 140 break; 141 case VM_REG_GUEST_CS: 142 *base = VMCS_GUEST_CS_BASE; 143 *lim = VMCS_GUEST_CS_LIMIT; 144 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS; 145 break; 146 case VM_REG_GUEST_SS: 147 *base = VMCS_GUEST_SS_BASE; 148 *lim = VMCS_GUEST_SS_LIMIT; 149 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS; 150 break; 151 case VM_REG_GUEST_DS: 152 *base = VMCS_GUEST_DS_BASE; 153 *lim = VMCS_GUEST_DS_LIMIT; 154 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS; 155 break; 156 case VM_REG_GUEST_FS: 157 *base = VMCS_GUEST_FS_BASE; 158 *lim = VMCS_GUEST_FS_LIMIT; 159 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS; 160 break; 161 case VM_REG_GUEST_GS: 162 *base = VMCS_GUEST_GS_BASE; 163 *lim = VMCS_GUEST_GS_LIMIT; 164 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS; 165 break; 166 case VM_REG_GUEST_TR: 167 *base = VMCS_GUEST_TR_BASE; 168 *lim = VMCS_GUEST_TR_LIMIT; 169 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS; 170 break; 171 case VM_REG_GUEST_LDTR: 172 *base = VMCS_GUEST_LDTR_BASE; 173 *lim = VMCS_GUEST_LDTR_LIMIT; 174 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS; 175 break; 176 case VM_REG_GUEST_IDTR: 177 *base = VMCS_GUEST_IDTR_BASE; 178 *lim = VMCS_GUEST_IDTR_LIMIT; 179 *acc = VMCS_INVALID_ENCODING; 180 break; 181 case VM_REG_GUEST_GDTR: 182 *base = VMCS_GUEST_GDTR_BASE; 183 *lim = VMCS_GUEST_GDTR_LIMIT; 184 *acc = VMCS_INVALID_ENCODING; 185 break; 186 default: 187 return (EINVAL); 188 } 189 190 return (0); 191 } 192 193 int 194 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) 195 { 196 int error; 197 uint32_t encoding; 198 199 /* 200 * If we need to get at vmx-specific state in the VMCS we can bypass 201 * the translation of 'ident' to 'encoding' by simply setting the 202 * sign bit. As it so happens the upper 16 bits are reserved (i.e 203 * set to 0) in the encodings for the VMCS so we are free to use the 204 * sign bit. 205 */ 206 if (ident < 0) 207 encoding = ident & 0x7fffffff; 208 else 209 encoding = vmcs_field_encoding(ident); 210 211 if (encoding == (uint32_t)-1) 212 return (EINVAL); 213 214 if (!running) 215 VMPTRLD(vmcs); 216 217 error = vmread(encoding, retval); 218 219 if (!running) 220 VMCLEAR(vmcs); 221 222 return (error); 223 } 224 225 int 226 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) 227 { 228 int error; 229 uint32_t encoding; 230 231 if (ident < 0) 232 encoding = ident & 0x7fffffff; 233 else 234 encoding = vmcs_field_encoding(ident); 235 236 if (encoding == (uint32_t)-1) 237 return (EINVAL); 238 239 val = vmcs_fix_regval(encoding, val); 240 241 if (!running) 242 VMPTRLD(vmcs); 243 244 error = vmwrite(encoding, val); 245 246 if (!running) 247 VMCLEAR(vmcs); 248 249 return (error); 250 } 251 252 int 253 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 254 { 255 int error; 256 uint32_t base, limit, access; 257 258 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 259 if (error != 0) 260 panic("vmcs_setdesc: invalid segment register %d", seg); 261 262 if (!running) 263 VMPTRLD(vmcs); 264 if ((error = vmwrite(base, desc->base)) != 0) 265 goto done; 266 267 if ((error = vmwrite(limit, desc->limit)) != 0) 268 goto done; 269 270 if (access != VMCS_INVALID_ENCODING) { 271 if ((error = vmwrite(access, desc->access)) != 0) 272 goto done; 273 } 274 done: 275 if (!running) 276 VMCLEAR(vmcs); 277 return (error); 278 } 279 280 int 281 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 282 { 283 int error; 284 uint32_t base, limit, access; 285 uint64_t u64; 286 287 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 288 if (error != 0) 289 panic("vmcs_getdesc: invalid segment register %d", seg); 290 291 if (!running) 292 VMPTRLD(vmcs); 293 if ((error = vmread(base, &u64)) != 0) 294 goto done; 295 desc->base = u64; 296 297 if ((error = vmread(limit, &u64)) != 0) 298 goto done; 299 desc->limit = u64; 300 301 if (access != VMCS_INVALID_ENCODING) { 302 if ((error = vmread(access, &u64)) != 0) 303 goto done; 304 desc->access = u64; 305 } 306 done: 307 if (!running) 308 VMCLEAR(vmcs); 309 return (error); 310 } 311 312 int 313 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) 314 { 315 int error; 316 317 VMPTRLD(vmcs); 318 319 /* 320 * Guest MSRs are saved in the VM-exit MSR-store area. 321 * Guest MSRs are loaded from the VM-entry MSR-load area. 322 * Both areas point to the same location in memory. 323 */ 324 if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) 325 goto done; 326 if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) 327 goto done; 328 329 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) 330 goto done; 331 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) 332 goto done; 333 334 error = 0; 335 done: 336 VMCLEAR(vmcs); 337 return (error); 338 } 339 340 int 341 vmcs_init(struct vmcs *vmcs) 342 { 343 int error, codesel, datasel, tsssel; 344 u_long cr0, cr4, efer; 345 uint64_t pat, fsbase, idtrbase; 346 347 codesel = vmm_get_host_codesel(); 348 datasel = vmm_get_host_datasel(); 349 tsssel = vmm_get_host_tsssel(); 350 351 /* 352 * Make sure we have a "current" VMCS to work with. 353 */ 354 VMPTRLD(vmcs); 355 356 /* Host state */ 357 358 /* Initialize host IA32_PAT MSR */ 359 pat = vmm_get_host_pat(); 360 if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) 361 goto done; 362 363 /* Load the IA32_EFER MSR */ 364 efer = vmm_get_host_efer(); 365 if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) 366 goto done; 367 368 /* Load the control registers */ 369 370 cr0 = vmm_get_host_cr0(); 371 if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) 372 goto done; 373 374 cr4 = vmm_get_host_cr4() | CR4_VMXE; 375 if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) 376 goto done; 377 378 /* Load the segment selectors */ 379 if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) 380 goto done; 381 382 if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) 383 goto done; 384 385 if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) 386 goto done; 387 388 if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) 389 goto done; 390 391 if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) 392 goto done; 393 394 if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) 395 goto done; 396 397 if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) 398 goto done; 399 400 /* 401 * Load the Base-Address for %fs and idtr. 402 * 403 * Note that we exclude %gs, tss and gdtr here because their base 404 * address is pcpu specific. 405 */ 406 fsbase = vmm_get_host_fsbase(); 407 if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) 408 goto done; 409 410 idtrbase = vmm_get_host_idtrbase(); 411 if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) 412 goto done; 413 414 /* instruction pointer */ 415 if (no_flush_rsb) { 416 if ((error = vmwrite(VMCS_HOST_RIP, 417 (u_long)vmx_exit_guest)) != 0) 418 goto done; 419 } else { 420 if ((error = vmwrite(VMCS_HOST_RIP, 421 (u_long)vmx_exit_guest_flush_rsb)) != 0) 422 goto done; 423 } 424 425 /* link pointer */ 426 if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) 427 goto done; 428 done: 429 VMCLEAR(vmcs); 430 return (error); 431 } 432 433 #ifdef BHYVE_SNAPSHOT 434 int 435 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val) 436 { 437 int error; 438 439 if (!running) 440 VMPTRLD(vmcs); 441 442 error = vmread(ident, val); 443 444 if (!running) 445 VMCLEAR(vmcs); 446 447 return (error); 448 } 449 450 int 451 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val) 452 { 453 int error; 454 455 if (!running) 456 VMPTRLD(vmcs); 457 458 error = vmwrite(ident, val); 459 460 if (!running) 461 VMCLEAR(vmcs); 462 463 return (error); 464 } 465 466 int 467 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident, 468 struct vm_snapshot_meta *meta) 469 { 470 int ret; 471 uint64_t val; 472 473 if (meta->op == VM_SNAPSHOT_SAVE) { 474 ret = vmcs_getreg(vmcs, running, ident, &val); 475 if (ret != 0) 476 goto done; 477 478 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 479 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 480 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 481 482 ret = vmcs_setreg(vmcs, running, ident, val); 483 if (ret != 0) 484 goto done; 485 } else { 486 ret = EINVAL; 487 goto done; 488 } 489 490 done: 491 return (ret); 492 } 493 494 int 495 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg, 496 struct vm_snapshot_meta *meta) 497 { 498 int ret; 499 struct seg_desc desc; 500 501 if (meta->op == VM_SNAPSHOT_SAVE) { 502 ret = vmcs_getdesc(vmcs, running, seg, &desc); 503 if (ret != 0) 504 goto done; 505 506 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 507 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 508 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 509 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 510 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 511 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 512 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 513 514 ret = vmcs_setdesc(vmcs, running, seg, &desc); 515 if (ret != 0) 516 goto done; 517 } else { 518 ret = EINVAL; 519 goto done; 520 } 521 522 done: 523 return (ret); 524 } 525 526 int 527 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident, 528 struct vm_snapshot_meta *meta) 529 { 530 int ret; 531 uint64_t val; 532 533 if (meta->op == VM_SNAPSHOT_SAVE) { 534 ret = vmcs_getany(vmcs, running, ident, &val); 535 if (ret != 0) 536 goto done; 537 538 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 539 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 540 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 541 542 ret = vmcs_setany(vmcs, running, ident, val); 543 if (ret != 0) 544 goto done; 545 } else { 546 ret = EINVAL; 547 goto done; 548 } 549 550 done: 551 return (ret); 552 } 553 #endif 554 555 #ifdef DDB 556 extern int vmxon_enabled[]; 557 558 DB_SHOW_COMMAND(vmcs, db_show_vmcs) 559 { 560 uint64_t cur_vmcs, val; 561 uint32_t exit; 562 563 if (!vmxon_enabled[curcpu]) { 564 db_printf("VMX not enabled\n"); 565 return; 566 } 567 568 if (have_addr) { 569 db_printf("Only current VMCS supported\n"); 570 return; 571 } 572 573 vmptrst(&cur_vmcs); 574 if (cur_vmcs == VMCS_INITIAL) { 575 db_printf("No current VM context\n"); 576 return; 577 } 578 db_printf("VMCS: %jx\n", cur_vmcs); 579 db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); 580 db_printf("Activity: "); 581 val = vmcs_read(VMCS_GUEST_ACTIVITY); 582 switch (val) { 583 case 0: 584 db_printf("Active"); 585 break; 586 case 1: 587 db_printf("HLT"); 588 break; 589 case 2: 590 db_printf("Shutdown"); 591 break; 592 case 3: 593 db_printf("Wait for SIPI"); 594 break; 595 default: 596 db_printf("Unknown: %#lx", val); 597 } 598 db_printf("\n"); 599 exit = vmcs_read(VMCS_EXIT_REASON); 600 if (exit & 0x80000000) 601 db_printf("Entry Failure Reason: %u\n", exit & 0xffff); 602 else 603 db_printf("Exit Reason: %u\n", exit & 0xffff); 604 db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); 605 db_printf("Guest Linear Address: %#lx\n", 606 vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); 607 switch (exit & 0x8000ffff) { 608 case EXIT_REASON_EXCEPTION: 609 case EXIT_REASON_EXT_INTR: 610 val = vmcs_read(VMCS_EXIT_INTR_INFO); 611 db_printf("Interrupt Type: "); 612 switch (val >> 8 & 0x7) { 613 case 0: 614 db_printf("external"); 615 break; 616 case 2: 617 db_printf("NMI"); 618 break; 619 case 3: 620 db_printf("HW exception"); 621 break; 622 case 4: 623 db_printf("SW exception"); 624 break; 625 default: 626 db_printf("?? %lu", val >> 8 & 0x7); 627 break; 628 } 629 db_printf(" Vector: %lu", val & 0xff); 630 if (val & 0x800) 631 db_printf(" Error Code: %lx", 632 vmcs_read(VMCS_EXIT_INTR_ERRCODE)); 633 db_printf("\n"); 634 break; 635 case EXIT_REASON_EPT_FAULT: 636 case EXIT_REASON_EPT_MISCONFIG: 637 db_printf("Guest Physical Address: %#lx\n", 638 vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); 639 break; 640 } 641 db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); 642 } 643 #endif 644