1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "opt_bhyve_snapshot.h" 30 #include "opt_ddb.h" 31 32 #include <sys/param.h> 33 #include <sys/sysctl.h> 34 #include <sys/systm.h> 35 #include <sys/pcpu.h> 36 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <machine/segments.h> 41 #include <machine/vmm.h> 42 #include <machine/vmm_snapshot.h> 43 #include "vmm_host.h" 44 #include "vmx_cpufunc.h" 45 #include "vmcs.h" 46 #include "ept.h" 47 #include "vmx.h" 48 49 #ifdef DDB 50 #include <ddb/ddb.h> 51 #endif 52 53 SYSCTL_DECL(_hw_vmm_vmx); 54 55 static int no_flush_rsb; 56 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, 57 &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); 58 59 static uint64_t 60 vmcs_fix_regval(uint32_t encoding, uint64_t val) 61 { 62 63 switch (encoding) { 64 case VMCS_GUEST_CR0: 65 val = vmx_fix_cr0(val); 66 break; 67 case VMCS_GUEST_CR4: 68 val = vmx_fix_cr4(val); 69 break; 70 default: 71 break; 72 } 73 return (val); 74 } 75 76 static uint32_t 77 vmcs_field_encoding(int ident) 78 { 79 switch (ident) { 80 case VM_REG_GUEST_CR0: 81 return (VMCS_GUEST_CR0); 82 case VM_REG_GUEST_CR3: 83 return (VMCS_GUEST_CR3); 84 case VM_REG_GUEST_CR4: 85 return (VMCS_GUEST_CR4); 86 case VM_REG_GUEST_DR7: 87 return (VMCS_GUEST_DR7); 88 case VM_REG_GUEST_RSP: 89 return (VMCS_GUEST_RSP); 90 case VM_REG_GUEST_RIP: 91 return (VMCS_GUEST_RIP); 92 case VM_REG_GUEST_RFLAGS: 93 return (VMCS_GUEST_RFLAGS); 94 case VM_REG_GUEST_ES: 95 return (VMCS_GUEST_ES_SELECTOR); 96 case VM_REG_GUEST_CS: 97 return (VMCS_GUEST_CS_SELECTOR); 98 case VM_REG_GUEST_SS: 99 return (VMCS_GUEST_SS_SELECTOR); 100 case VM_REG_GUEST_DS: 101 return (VMCS_GUEST_DS_SELECTOR); 102 case VM_REG_GUEST_FS: 103 return (VMCS_GUEST_FS_SELECTOR); 104 case VM_REG_GUEST_GS: 105 return (VMCS_GUEST_GS_SELECTOR); 106 case VM_REG_GUEST_TR: 107 return (VMCS_GUEST_TR_SELECTOR); 108 case VM_REG_GUEST_LDTR: 109 return (VMCS_GUEST_LDTR_SELECTOR); 110 case VM_REG_GUEST_EFER: 111 return (VMCS_GUEST_IA32_EFER); 112 case VM_REG_GUEST_PDPTE0: 113 return (VMCS_GUEST_PDPTE0); 114 case VM_REG_GUEST_PDPTE1: 115 return (VMCS_GUEST_PDPTE1); 116 case VM_REG_GUEST_PDPTE2: 117 return (VMCS_GUEST_PDPTE2); 118 case VM_REG_GUEST_PDPTE3: 119 return (VMCS_GUEST_PDPTE3); 120 case VM_REG_GUEST_ENTRY_INST_LENGTH: 121 return (VMCS_ENTRY_INST_LENGTH); 122 default: 123 return (-1); 124 } 125 126 } 127 128 static int 129 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) 130 { 131 132 switch (seg) { 133 case VM_REG_GUEST_ES: 134 *base = VMCS_GUEST_ES_BASE; 135 *lim = VMCS_GUEST_ES_LIMIT; 136 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS; 137 break; 138 case VM_REG_GUEST_CS: 139 *base = VMCS_GUEST_CS_BASE; 140 *lim = VMCS_GUEST_CS_LIMIT; 141 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS; 142 break; 143 case VM_REG_GUEST_SS: 144 *base = VMCS_GUEST_SS_BASE; 145 *lim = VMCS_GUEST_SS_LIMIT; 146 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS; 147 break; 148 case VM_REG_GUEST_DS: 149 *base = VMCS_GUEST_DS_BASE; 150 *lim = VMCS_GUEST_DS_LIMIT; 151 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS; 152 break; 153 case VM_REG_GUEST_FS: 154 *base = VMCS_GUEST_FS_BASE; 155 *lim = VMCS_GUEST_FS_LIMIT; 156 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS; 157 break; 158 case VM_REG_GUEST_GS: 159 *base = VMCS_GUEST_GS_BASE; 160 *lim = VMCS_GUEST_GS_LIMIT; 161 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS; 162 break; 163 case VM_REG_GUEST_TR: 164 *base = VMCS_GUEST_TR_BASE; 165 *lim = VMCS_GUEST_TR_LIMIT; 166 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS; 167 break; 168 case VM_REG_GUEST_LDTR: 169 *base = VMCS_GUEST_LDTR_BASE; 170 *lim = VMCS_GUEST_LDTR_LIMIT; 171 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS; 172 break; 173 case VM_REG_GUEST_IDTR: 174 *base = VMCS_GUEST_IDTR_BASE; 175 *lim = VMCS_GUEST_IDTR_LIMIT; 176 *acc = VMCS_INVALID_ENCODING; 177 break; 178 case VM_REG_GUEST_GDTR: 179 *base = VMCS_GUEST_GDTR_BASE; 180 *lim = VMCS_GUEST_GDTR_LIMIT; 181 *acc = VMCS_INVALID_ENCODING; 182 break; 183 default: 184 return (EINVAL); 185 } 186 187 return (0); 188 } 189 190 int 191 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) 192 { 193 int error; 194 uint32_t encoding; 195 196 /* 197 * If we need to get at vmx-specific state in the VMCS we can bypass 198 * the translation of 'ident' to 'encoding' by simply setting the 199 * sign bit. As it so happens the upper 16 bits are reserved (i.e 200 * set to 0) in the encodings for the VMCS so we are free to use the 201 * sign bit. 202 */ 203 if (ident < 0) 204 encoding = ident & 0x7fffffff; 205 else 206 encoding = vmcs_field_encoding(ident); 207 208 if (encoding == (uint32_t)-1) 209 return (EINVAL); 210 211 if (!running) 212 VMPTRLD(vmcs); 213 214 error = vmread(encoding, retval); 215 216 if (!running) 217 VMCLEAR(vmcs); 218 219 return (error); 220 } 221 222 int 223 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) 224 { 225 int error; 226 uint32_t encoding; 227 228 if (ident < 0) 229 encoding = ident & 0x7fffffff; 230 else 231 encoding = vmcs_field_encoding(ident); 232 233 if (encoding == (uint32_t)-1) 234 return (EINVAL); 235 236 val = vmcs_fix_regval(encoding, val); 237 238 if (!running) 239 VMPTRLD(vmcs); 240 241 error = vmwrite(encoding, val); 242 243 if (!running) 244 VMCLEAR(vmcs); 245 246 return (error); 247 } 248 249 int 250 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 251 { 252 int error; 253 uint32_t base, limit, access; 254 255 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 256 if (error != 0) 257 panic("vmcs_setdesc: invalid segment register %d", seg); 258 259 if (!running) 260 VMPTRLD(vmcs); 261 if ((error = vmwrite(base, desc->base)) != 0) 262 goto done; 263 264 if ((error = vmwrite(limit, desc->limit)) != 0) 265 goto done; 266 267 if (access != VMCS_INVALID_ENCODING) { 268 if ((error = vmwrite(access, desc->access)) != 0) 269 goto done; 270 } 271 done: 272 if (!running) 273 VMCLEAR(vmcs); 274 return (error); 275 } 276 277 int 278 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) 279 { 280 int error; 281 uint32_t base, limit, access; 282 uint64_t u64; 283 284 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); 285 if (error != 0) 286 panic("vmcs_getdesc: invalid segment register %d", seg); 287 288 if (!running) 289 VMPTRLD(vmcs); 290 if ((error = vmread(base, &u64)) != 0) 291 goto done; 292 desc->base = u64; 293 294 if ((error = vmread(limit, &u64)) != 0) 295 goto done; 296 desc->limit = u64; 297 298 if (access != VMCS_INVALID_ENCODING) { 299 if ((error = vmread(access, &u64)) != 0) 300 goto done; 301 desc->access = u64; 302 } 303 done: 304 if (!running) 305 VMCLEAR(vmcs); 306 return (error); 307 } 308 309 int 310 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) 311 { 312 int error; 313 314 VMPTRLD(vmcs); 315 316 /* 317 * Guest MSRs are saved in the VM-exit MSR-store area. 318 * Guest MSRs are loaded from the VM-entry MSR-load area. 319 * Both areas point to the same location in memory. 320 */ 321 if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) 322 goto done; 323 if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) 324 goto done; 325 326 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) 327 goto done; 328 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) 329 goto done; 330 331 error = 0; 332 done: 333 VMCLEAR(vmcs); 334 return (error); 335 } 336 337 int 338 vmcs_init(struct vmcs *vmcs) 339 { 340 int error, codesel, datasel, tsssel; 341 u_long cr0, cr4, efer; 342 uint64_t pat, fsbase, idtrbase; 343 344 codesel = vmm_get_host_codesel(); 345 datasel = vmm_get_host_datasel(); 346 tsssel = vmm_get_host_tsssel(); 347 348 /* 349 * Make sure we have a "current" VMCS to work with. 350 */ 351 VMPTRLD(vmcs); 352 353 /* Host state */ 354 355 /* Initialize host IA32_PAT MSR */ 356 pat = vmm_get_host_pat(); 357 if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) 358 goto done; 359 360 /* Load the IA32_EFER MSR */ 361 efer = vmm_get_host_efer(); 362 if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) 363 goto done; 364 365 /* Load the control registers */ 366 367 cr0 = vmm_get_host_cr0(); 368 if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) 369 goto done; 370 371 cr4 = vmm_get_host_cr4() | CR4_VMXE; 372 if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) 373 goto done; 374 375 /* Load the segment selectors */ 376 if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) 377 goto done; 378 379 if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) 380 goto done; 381 382 if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) 383 goto done; 384 385 if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) 386 goto done; 387 388 if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) 389 goto done; 390 391 if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) 392 goto done; 393 394 if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) 395 goto done; 396 397 /* 398 * Load the Base-Address for %fs and idtr. 399 * 400 * Note that we exclude %gs, tss and gdtr here because their base 401 * address is pcpu specific. 402 */ 403 fsbase = vmm_get_host_fsbase(); 404 if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) 405 goto done; 406 407 idtrbase = vmm_get_host_idtrbase(); 408 if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) 409 goto done; 410 411 /* instruction pointer */ 412 if (no_flush_rsb) { 413 if ((error = vmwrite(VMCS_HOST_RIP, 414 (u_long)vmx_exit_guest)) != 0) 415 goto done; 416 } else { 417 if ((error = vmwrite(VMCS_HOST_RIP, 418 (u_long)vmx_exit_guest_flush_rsb)) != 0) 419 goto done; 420 } 421 422 /* link pointer */ 423 if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) 424 goto done; 425 done: 426 VMCLEAR(vmcs); 427 return (error); 428 } 429 430 #ifdef BHYVE_SNAPSHOT 431 int 432 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val) 433 { 434 int error; 435 436 if (!running) 437 VMPTRLD(vmcs); 438 439 error = vmread(ident, val); 440 441 if (!running) 442 VMCLEAR(vmcs); 443 444 return (error); 445 } 446 447 int 448 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val) 449 { 450 int error; 451 452 if (!running) 453 VMPTRLD(vmcs); 454 455 error = vmwrite(ident, val); 456 457 if (!running) 458 VMCLEAR(vmcs); 459 460 return (error); 461 } 462 463 int 464 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident, 465 struct vm_snapshot_meta *meta) 466 { 467 int ret; 468 uint64_t val; 469 470 if (meta->op == VM_SNAPSHOT_SAVE) { 471 ret = vmcs_getreg(vmcs, running, ident, &val); 472 if (ret != 0) 473 goto done; 474 475 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 476 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 477 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 478 479 ret = vmcs_setreg(vmcs, running, ident, val); 480 if (ret != 0) 481 goto done; 482 } else { 483 ret = EINVAL; 484 goto done; 485 } 486 487 done: 488 return (ret); 489 } 490 491 int 492 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg, 493 struct vm_snapshot_meta *meta) 494 { 495 int ret; 496 struct seg_desc desc; 497 498 if (meta->op == VM_SNAPSHOT_SAVE) { 499 ret = vmcs_getdesc(vmcs, running, seg, &desc); 500 if (ret != 0) 501 goto done; 502 503 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 504 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 505 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 506 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 507 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); 508 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); 509 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); 510 511 ret = vmcs_setdesc(vmcs, running, seg, &desc); 512 if (ret != 0) 513 goto done; 514 } else { 515 ret = EINVAL; 516 goto done; 517 } 518 519 done: 520 return (ret); 521 } 522 523 int 524 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident, 525 struct vm_snapshot_meta *meta) 526 { 527 int ret; 528 uint64_t val; 529 530 if (meta->op == VM_SNAPSHOT_SAVE) { 531 ret = vmcs_getany(vmcs, running, ident, &val); 532 if (ret != 0) 533 goto done; 534 535 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 536 } else if (meta->op == VM_SNAPSHOT_RESTORE) { 537 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done); 538 539 ret = vmcs_setany(vmcs, running, ident, val); 540 if (ret != 0) 541 goto done; 542 } else { 543 ret = EINVAL; 544 goto done; 545 } 546 547 done: 548 return (ret); 549 } 550 #endif 551 552 #ifdef DDB 553 extern int vmxon_enabled[]; 554 555 DB_SHOW_COMMAND(vmcs, db_show_vmcs) 556 { 557 uint64_t cur_vmcs, val; 558 uint32_t exit; 559 560 if (!vmxon_enabled[curcpu]) { 561 db_printf("VMX not enabled\n"); 562 return; 563 } 564 565 if (have_addr) { 566 db_printf("Only current VMCS supported\n"); 567 return; 568 } 569 570 vmptrst(&cur_vmcs); 571 if (cur_vmcs == VMCS_INITIAL) { 572 db_printf("No current VM context\n"); 573 return; 574 } 575 db_printf("VMCS: %jx\n", cur_vmcs); 576 db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); 577 db_printf("Activity: "); 578 val = vmcs_read(VMCS_GUEST_ACTIVITY); 579 switch (val) { 580 case 0: 581 db_printf("Active"); 582 break; 583 case 1: 584 db_printf("HLT"); 585 break; 586 case 2: 587 db_printf("Shutdown"); 588 break; 589 case 3: 590 db_printf("Wait for SIPI"); 591 break; 592 default: 593 db_printf("Unknown: %#lx", val); 594 } 595 db_printf("\n"); 596 exit = vmcs_read(VMCS_EXIT_REASON); 597 if (exit & 0x80000000) 598 db_printf("Entry Failure Reason: %u\n", exit & 0xffff); 599 else 600 db_printf("Exit Reason: %u\n", exit & 0xffff); 601 db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); 602 db_printf("Guest Linear Address: %#lx\n", 603 vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); 604 switch (exit & 0x8000ffff) { 605 case EXIT_REASON_EXCEPTION: 606 case EXIT_REASON_EXT_INTR: 607 val = vmcs_read(VMCS_EXIT_INTR_INFO); 608 db_printf("Interrupt Type: "); 609 switch (val >> 8 & 0x7) { 610 case 0: 611 db_printf("external"); 612 break; 613 case 2: 614 db_printf("NMI"); 615 break; 616 case 3: 617 db_printf("HW exception"); 618 break; 619 case 4: 620 db_printf("SW exception"); 621 break; 622 default: 623 db_printf("?? %lu", val >> 8 & 0x7); 624 break; 625 } 626 db_printf(" Vector: %lu", val & 0xff); 627 if (val & 0x800) 628 db_printf(" Error Code: %lx", 629 vmcs_read(VMCS_EXIT_INTR_ERRCODE)); 630 db_printf("\n"); 631 break; 632 case EXIT_REASON_EPT_FAULT: 633 case EXIT_REASON_EPT_MISCONFIG: 634 db_printf("Guest Physical Address: %#lx\n", 635 vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); 636 break; 637 } 638 db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); 639 } 640 #endif 641