1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include "opt_bhyve_snapshot.h" 32 #include "opt_ddb.h" 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/sysctl.h> 39 #include <sys/systm.h> 40 #include <sys/pcpu.h> 41 42 #include <vm/vm.h> 43 #include <vm/pmap.h> 44 45 #include <machine/segments.h> 46 #include <machine/vmm.h> 47 #include <machine/vmm_snapshot.h> 48 #include "vmm_host.h" 49 #include "vmx_cpufunc.h" 50 #include "vmcs.h" 51 #include "ept.h" 52 #include "vmx.h" 53 54 #ifdef DDB 55 #include <ddb/ddb.h> 56 #endif 57 58 SYSCTL_DECL(_hw_vmm_vmx); 59 60 static int no_flush_rsb; 61 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, 62 &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); 63 64 static uint64_t 65 vmcs_fix_regval(uint32_t encoding, uint64_t val) 66 { 67 68 switch (encoding) { 69 case VMCS_GUEST_CR0: 70 val = vmx_fix_cr0(val); 71 break; 72 case VMCS_GUEST_CR4: 73 val = vmx_fix_cr4(val); 74 break; 75 default: 76 break; 77 } 78 return (val); 79 } 80 81 static uint32_t 82 vmcs_field_encoding(int ident) 83 { 84 switch (ident) { 85 case VM_REG_GUEST_CR0: 86 return (VMCS_GUEST_CR0); 87 case VM_REG_GUEST_CR3: 88 return (VMCS_GUEST_CR3); 89 case VM_REG_GUEST_CR4: 90 return (VMCS_GUEST_CR4); 91 case VM_REG_GUEST_DR7: 92 return (VMCS_GUEST_DR7); 93 case VM_REG_GUEST_RSP: 94 return (VMCS_GUEST_RSP); 95 case VM_REG_GUEST_RIP: 96 return (VMCS_GUEST_RIP); 97 case VM_REG_GUEST_RFLAGS: 98 return (VMCS_GUEST_RFLAGS); 99 case VM_REG_GUEST_ES: 100 return (VMCS_GUEST_ES_SELECTOR); 101 case VM_REG_GUEST_CS: 102 return (VMCS_GUEST_CS_SELECTOR); 103 case VM_REG_GUEST_SS: 104 return (VMCS_GUEST_SS_SELECTOR); 105 case VM_REG_GUEST_DS: 106 return (VMCS_GUEST_DS_SELECTOR); 107 case VM_REG_GUEST_FS: 108 return (VMCS_GUEST_FS_SELECTOR); 109 case VM_REG_GUEST_GS: 110 return (VMCS_GUEST_GS_SELECTOR); 111 case VM_REG_GUEST_TR: 112 return (VMCS_GUEST_TR_SELECTOR); 113 case VM_REG_GUEST_LDTR: 114 return (VMCS_GUEST_LDTR_SELECTOR); 115 case VM_REG_GUEST_EFER: 116 return (VMCS_GUEST_IA32_EFER); 117 case VM_REG_GUEST_PDPTE0: 118 return (VMCS_GUEST_PDPTE0); 119 case VM_REG_GUEST_PDPTE1: 120 return (VMCS_GUEST_PDPTE1); 121 case VM_REG_GUEST_PDPTE2: 122 return (VMCS_GUEST_PDPTE2); 123 case VM_REG_GUEST_PDPTE3: 124 return (VMCS_GUEST_PDPTE3); 125 case VM_REG_GUEST_ENTRY_INST_LENGTH: 126 return (VMCS_ENTRY_INST_LENGTH); 127 default: 128 return (-1); 129 } 130 131 } 132 133 static int 134 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) 135 { 136 137 switch (seg) { 138 case VM_REG_GUEST_ES: 139 *base = VMCS_GUEST_ES_BASE; 140 *lim = VMCS_GUEST_ES_LIMIT; 141 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS; 142 break; 143 case VM_REG_GUEST_CS: 144 *base = VMCS_GUEST_CS_BASE; 145 *lim = VMCS_GUEST_CS_LIMIT; 146 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS; 147 break; 148 case VM_REG_GUEST_SS: 149 *base = VMCS_GUEST_SS_BASE; 150 *lim = VMCS_GUEST_SS_LIMIT; 151 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS; 152 break; 153 case VM_REG_GUEST_DS: 154 *base = VMCS_GUEST_DS_BASE; 155 *lim = VMCS_GUEST_DS_LIMIT; 156 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS; 157 break; 158 case VM_REG_GUEST_FS: 159 *base = VMCS_GUEST_FS_BASE; 160 *lim = VMCS_GUEST_FS_LIMIT; 161 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS; 162 break; 163 case VM_REG_GUEST_GS: 164 *base = VMCS_GUEST_GS_BASE; 165 *lim = VMCS_GUEST_GS_LIMIT; 166 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS; 167 break; 168 case VM_REG_GUEST_TR: 169 *base = VMCS_GUEST_TR_BASE; 170 *lim = VMCS_GUEST_TR_LIMIT; 171 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS; 172 break; 173 case VM_REG_GUEST_LDTR: 174 *base = VMCS_GUEST_LDTR_BASE; 175 *lim = VMCS_GUEST_LDTR_LIMIT; 176 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS; 177 break; 178 case VM_REG_GUEST_IDTR: 179 *base = VMCS_GUEST_IDTR_BASE; 180 *lim = VMCS_GUEST_IDTR_LIMIT; 181 *acc = VMCS_INVALID_ENCODING; 182 break; 183 case VM_REG_GUEST_GDTR: 184 *base = VMCS_GUEST_GDTR_BASE; 185 *lim = VMCS_GUEST_GDTR_LIMIT; 186 *acc = VMCS_INVALID_ENCODING; 187 break; 188 default: 189 return (EINVAL); 190 } 191 192 return (0); 193 } 194 195 int 196 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) 197 { 198 int error; 199 uint32_t encoding; 200 201 /* 202 * If we need to get at vmx-specific state in the VMCS we can bypass 203 * the translation of 'ident' to 'encoding' by simply setting the 204 * sign bit. As it so happens the upper 16 bits are reserved (i.e 205 * set to 0) in the encodings for the VMCS so we are free to use the 206 * sign bit. 207 */ 208 if (ident < 0) 209 encoding = ident & 0x7fffffff; 210 else 211 encoding = vmcs_field_encoding(ident); 212 213 if (encoding == (uint32_t)-1) 214 return (EINVAL); 215 216 if (!running) 217 VMPTRLD(vmcs); 218 219 error = vmread(encoding, retval); 220 221 if (!running) 222 VMCLEAR(vmcs); 223 224 return (error); 225 } 226 227 int 228 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) 229 { 230 int error; 231 uint32_t encoding; 232 233 if (ident < 0) 234 encoding = ident & 0x7fffffff; 235 else 236 encoding = vmcs_field_encoding(ident); 237 238 if (encoding == (uint32_t)-1) 239 return (EINVAL); 240 241 val = vmcs_fix_regval(encoding, val); 242 243 if (!running) 244 VMPTRLD(vmcs); 245 246 error = vmwrite(encoding, val); 247 248 if (!running) 249 VMCLEAR(vmcs); 250 251 return (error); 252 } 253 254 int 255 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 256 { 257 int error; 258 uint32_t base, limit, access; 259 260 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 261 if (error != 0) 262 panic("vmcs_setdesc: invalid segment register %d", seg); 263 264 if (!running) 265 VMPTRLD(vmcs); 266 if ((error = vmwrite(base, desc->base)) != 0) 267 goto done; 268 269 if ((error = vmwrite(limit, desc->limit)) != 0) 270 goto done; 271 272 if (access != VMCS_INVALID_ENCODING) { 273 if ((error = vmwrite(access, desc->access)) != 0) 274 goto done; 275 } 276 done: 277 if (!running) 278 VMCLEAR(vmcs); 279 return (error); 280 } 281 282 int 283 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 284 { 285 int error; 286 uint32_t base, limit, access; 287 uint64_t u64; 288 289 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 290 if (error != 0) 291 panic("vmcs_getdesc: invalid segment register %d", seg); 292 293 if (!running) 294 VMPTRLD(vmcs); 295 if ((error = vmread(base, &u64)) != 0) 296 goto done; 297 desc->base = u64; 298 299 if ((error = vmread(limit, &u64)) != 0) 300 goto done; 301 desc->limit = u64; 302 303 if (access != VMCS_INVALID_ENCODING) { 304 if ((error = vmread(access, &u64)) != 0) 305 goto done; 306 desc->access = u64; 307 } 308 done: 309 if (!running) 310 VMCLEAR(vmcs); 311 return (error); 312 } 313 314 int 315 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) 316 { 317 int error; 318 319 VMPTRLD(vmcs); 320 321 /* 322 * Guest MSRs are saved in the VM-exit MSR-store area. 323 * Guest MSRs are loaded from the VM-entry MSR-load area. 324 * Both areas point to the same location in memory. 325 */ 326 if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) 327 goto done; 328 if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) 329 goto done; 330 331 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) 332 goto done; 333 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) 334 goto done; 335 336 error = 0; 337 done: 338 VMCLEAR(vmcs); 339 return (error); 340 } 341 342 int 343 vmcs_init(struct vmcs *vmcs) 344 { 345 int error, codesel, datasel, tsssel; 346 u_long cr0, cr4, efer; 347 uint64_t pat, fsbase, idtrbase; 348 349 codesel = vmm_get_host_codesel(); 350 datasel = vmm_get_host_datasel(); 351 tsssel = vmm_get_host_tsssel(); 352 353 /* 354 * Make sure we have a "current" VMCS to work with. 355 */ 356 VMPTRLD(vmcs); 357 358 /* Host state */ 359 360 /* Initialize host IA32_PAT MSR */ 361 pat = vmm_get_host_pat(); 362 if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) 363 goto done; 364 365 /* Load the IA32_EFER MSR */ 366 efer = vmm_get_host_efer(); 367 if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) 368 goto done; 369 370 /* Load the control registers */ 371 372 cr0 = vmm_get_host_cr0(); 373 if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) 374 goto done; 375 376 cr4 = vmm_get_host_cr4() | CR4_VMXE; 377 if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) 378 goto done; 379 380 /* Load the segment selectors */ 381 if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) 382 goto done; 383 384 if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) 385 goto done; 386 387 if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) 388 goto done; 389 390 if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) 391 goto done; 392 393 if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) 394 goto done; 395 396 if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) 397 goto done; 398 399 if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) 400 goto done; 401 402 /* 403 * Load the Base-Address for %fs and idtr. 404 * 405 * Note that we exclude %gs, tss and gdtr here because their base 406 * address is pcpu specific. 407 */ 408 fsbase = vmm_get_host_fsbase(); 409 if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) 410 goto done; 411 412 idtrbase = vmm_get_host_idtrbase(); 413 if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) 414 goto done; 415 416 /* instruction pointer */ 417 if (no_flush_rsb) { 418 if ((error = vmwrite(VMCS_HOST_RIP, 419 (u_long)vmx_exit_guest)) != 0) 420 goto done; 421 } else { 422 if ((error = vmwrite(VMCS_HOST_RIP, 423 (u_long)vmx_exit_guest_flush_rsb)) != 0) 424 goto done; 425 } 426 427 /* link pointer */ 428 if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) 429 goto done; 430 done: 431 VMCLEAR(vmcs); 432 return (error); 433 } 434 435 #ifdef BHYVE_SNAPSHOT 436 int 437 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val) 438 { 439 int error; 440 441 if (!running) 442 VMPTRLD(vmcs); 443 444 error = vmread(ident, val); 445 446 if (!running) 447 VMCLEAR(vmcs); 448 449 return (error); 450 } 451 452 int 453 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val) 454 { 455 int error; 456 457 if (!running) 458 VMPTRLD(vmcs); 459 460 error = vmwrite(ident, val); 461 462 if (!running) 463 VMCLEAR(vmcs); 464 465 return (error); 466 } 467 468 int 469 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident, 470 struct vm_snapshot_meta *meta) 471 { 472 int ret; 473 uint64_t val; 474 475 if (meta->op == VM_SNAPSHOT_SAVE) { 476 ret = vmcs_getreg(vmcs, running, ident, &val); 477 if (ret != 0) 478 goto done; 479 480 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 481 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 482 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 483 484 ret = vmcs_setreg(vmcs, running, ident, val); 485 if (ret != 0) 486 goto done; 487 } else { 488 ret = EINVAL; 489 goto done; 490 } 491 492 done: 493 return (ret); 494 } 495 496 int 497 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg, 498 struct vm_snapshot_meta *meta) 499 { 500 int ret; 501 struct seg_desc desc; 502 503 if (meta->op == VM_SNAPSHOT_SAVE) { 504 ret = vmcs_getdesc(vmcs, running, seg, &desc); 505 if (ret != 0) 506 goto done; 507 508 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 509 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 510 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 511 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 512 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 513 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 514 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 515 516 ret = vmcs_setdesc(vmcs, running, seg, &desc); 517 if (ret != 0) 518 goto done; 519 } else { 520 ret = EINVAL; 521 goto done; 522 } 523 524 done: 525 return (ret); 526 } 527 528 int 529 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident, 530 struct vm_snapshot_meta *meta) 531 { 532 int ret; 533 uint64_t val; 534 535 if (meta->op == VM_SNAPSHOT_SAVE) { 536 ret = vmcs_getany(vmcs, running, ident, &val); 537 if (ret != 0) 538 goto done; 539 540 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 541 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 542 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 543 544 ret = vmcs_setany(vmcs, running, ident, val); 545 if (ret != 0) 546 goto done; 547 } else { 548 ret = EINVAL; 549 goto done; 550 } 551 552 done: 553 return (ret); 554 } 555 #endif 556 557 #ifdef DDB 558 extern int vmxon_enabled[]; 559 560 DB_SHOW_COMMAND(vmcs, db_show_vmcs) 561 { 562 uint64_t cur_vmcs, val; 563 uint32_t exit; 564 565 if (!vmxon_enabled[curcpu]) { 566 db_printf("VMX not enabled\n"); 567 return; 568 } 569 570 if (have_addr) { 571 db_printf("Only current VMCS supported\n"); 572 return; 573 } 574 575 vmptrst(&cur_vmcs); 576 if (cur_vmcs == VMCS_INITIAL) { 577 db_printf("No current VM context\n"); 578 return; 579 } 580 db_printf("VMCS: %jx\n", cur_vmcs); 581 db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); 582 db_printf("Activity: "); 583 val = vmcs_read(VMCS_GUEST_ACTIVITY); 584 switch (val) { 585 case 0: 586 db_printf("Active"); 587 break; 588 case 1: 589 db_printf("HLT"); 590 break; 591 case 2: 592 db_printf("Shutdown"); 593 break; 594 case 3: 595 db_printf("Wait for SIPI"); 596 break; 597 default: 598 db_printf("Unknown: %#lx", val); 599 } 600 db_printf("\n"); 601 exit = vmcs_read(VMCS_EXIT_REASON); 602 if (exit & 0x80000000) 603 db_printf("Entry Failure Reason: %u\n", exit & 0xffff); 604 else 605 db_printf("Exit Reason: %u\n", exit & 0xffff); 606 db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); 607 db_printf("Guest Linear Address: %#lx\n", 608 vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); 609 switch (exit & 0x8000ffff) { 610 case EXIT_REASON_EXCEPTION: 611 case EXIT_REASON_EXT_INTR: 612 val = vmcs_read(VMCS_EXIT_INTR_INFO); 613 db_printf("Interrupt Type: "); 614 switch (val >> 8 & 0x7) { 615 case 0: 616 db_printf("external"); 617 break; 618 case 2: 619 db_printf("NMI"); 620 break; 621 case 3: 622 db_printf("HW exception"); 623 break; 624 case 4: 625 db_printf("SW exception"); 626 break; 627 default: 628 db_printf("?? %lu", val >> 8 & 0x7); 629 break; 630 } 631 db_printf(" Vector: %lu", val & 0xff); 632 if (val & 0x800) 633 db_printf(" Error Code: %lx", 634 vmcs_read(VMCS_EXIT_INTR_ERRCODE)); 635 db_printf("\n"); 636 break; 637 case EXIT_REASON_EPT_FAULT: 638 case EXIT_REASON_EPT_MISCONFIG: 639 db_printf("Guest Physical Address: %#lx\n", 640 vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); 641 break; 642 } 643 db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); 644 } 645 #endif 646