1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 #include <stdio.h> 17 #include <unistd.h> 18 #include <stdlib.h> 19 #include <strings.h> 20 #include <assert.h> 21 #include <errno.h> 22 23 #include <sys/types.h> 24 #include <sys/segments.h> 25 #include <sys/psw.h> 26 #include <sys/controlregs.h> 27 #include <sys/sysmacros.h> 28 #include <sys/varargs.h> 29 #include <sys/debug.h> 30 #include <sys/mman.h> 31 32 #include <sys/vmm.h> 33 #include <sys/vmm_dev.h> 34 #include <vmmapi.h> 35 36 #include "in_guest.h" 37 38 39 #define PT_VALID 0x01 40 #define PT_WRITABLE 0x02 41 #define PT_WRITETHRU 0x08 42 #define PT_NOCACHE 0x10 43 #define PT_PAGESIZE 0x80 44 45 #define SEG_ACCESS_TYPE_MASK 0x1f 46 #define SEG_ACCESS_DPL_MASK 0x60 47 #define SEG_ACCESS_P (1 << 7) 48 #define SEG_ACCESS_AVL (1 << 12) 49 #define SEG_ACCESS_L (1 << 13) 50 #define SEG_ACCESS_D (1 << 14) 51 #define SEG_ACCESS_G (1 << 15) 52 #define SEG_ACCESS_UNUSABLE (1 << 16) 53 54 55 /* 56 * Keep the test name and VM context around so the consumer is not required to 57 * pass either of them to us for subsequent test-related operations after the 58 * initialization has been performed. 59 * 60 * The test code is not designed to be reentrant at this point. 61 */ 62 static struct vmctx *test_vmctx = NULL; 63 static const char *test_name = NULL; 64 65 static uint64_t test_msg_addr = 0; 66 67 static int 68 setup_rom(struct vmctx *ctx) 69 { 70 const size_t seg_sz = 0x1000; 71 const uintptr_t seg_addr = MEM_LOC_ROM; 72 const int fd = vm_get_device_fd(ctx); 73 int err; 74 75 struct vm_memseg memseg = { 76 .segid = VM_BOOTROM, 77 .len = 0x1000, 78 }; 79 (void) strlcpy(memseg.name, "testrom", sizeof (memseg.name)); 80 err = ioctl(fd, VM_ALLOC_MEMSEG, &memseg); 81 if (err != 0) { 82 return (err); 83 } 84 err = vm_mmap_memseg(ctx, seg_addr, VM_BOOTROM, 0, seg_sz, 85 PROT_READ | PROT_EXEC); 86 return (err); 87 } 88 89 static void 90 populate_identity_table(struct vmctx *ctx) 91 { 92 uint64_t gpa, pte_loc; 93 94 /* Set up 2MiB PTEs for everything up through 0xffffffff */ 95 for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M; 96 gpa < 0x100000000; 97 pte_loc += PAGE_SIZE) { 98 uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE); 99 100 for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) { 101 *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE; 102 /* Make traditional MMIO space uncachable */ 103 if (gpa >= 0xc0000000) { 104 *ptep |= PT_WRITETHRU | PT_NOCACHE; 105 } 106 } 107 } 108 assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G); 109 110 uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE); 111 pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE; 112 pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE; 113 pdep[2] = 114 (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 115 pdep[3] = 116 (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 117 118 pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE); 119 pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE; 120 } 121 122 static void 123 populate_desc_tables(struct vmctx *ctx) 124 { 125 126 } 127 128 void 129 test_cleanup(bool is_failure) 130 { 131 if (test_vmctx != NULL) { 132 bool keep_on_fail = false; 133 134 const char *keep_var; 135 if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) { 136 if (strlen(keep_var) != 0 && 137 strcmp(keep_var, "0") != 0) { 138 keep_on_fail = true; 139 } 140 } 141 142 /* 143 * Destroy the instance unless the test failed and it was 144 * requested that we keep it around. 145 */ 146 if (!is_failure || !keep_on_fail) { 147 vm_destroy(test_vmctx); 148 } 149 test_name = NULL; 150 test_vmctx = NULL; 151 } 152 } 153 154 static void fail_finish(void) 155 { 156 assert(test_name != NULL); 157 (void) printf("FAIL %s\n", test_name); 158 159 test_cleanup(true); 160 exit(EXIT_FAILURE); 161 } 162 163 void 164 test_fail(void) 165 { 166 fail_finish(); 167 } 168 169 void 170 test_fail_errno(int err, const char *msg) 171 { 172 const char *err_str = strerror(err); 173 174 (void) fprintf(stderr, "%s: %s\n", msg, err_str); 175 fail_finish(); 176 } 177 178 void 179 test_fail_msg(const char *fmt, ...) 180 { 181 va_list ap; 182 183 va_start(ap, fmt); 184 (void) vfprintf(stderr, fmt, ap); 185 186 fail_finish(); 187 } 188 189 void 190 test_fail_vmexit(const struct vm_exit *vexit) 191 { 192 const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n"; 193 194 switch (vexit->exitcode) { 195 case VM_EXITCODE_INOUT: 196 (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip); 197 (void) fprintf(stderr, 198 "\teax: %08x\n" 199 "\tport: %04x\n" 200 "\tbytes: %u\n" 201 "\tflags: %x\n", 202 vexit->u.inout.eax, 203 vexit->u.inout.port, 204 vexit->u.inout.bytes, 205 vexit->u.inout.flags); 206 break; 207 case VM_EXITCODE_RDMSR: 208 (void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip); 209 (void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code); 210 break; 211 case VM_EXITCODE_WRMSR: 212 (void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip); 213 (void) fprintf(stderr, 214 "\tcode: %08x\n" 215 "\twval: %016lx\n", 216 vexit->u.msr.code, vexit->u.msr.wval); 217 break; 218 case VM_EXITCODE_MMIO: 219 (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip); 220 (void) fprintf(stderr, 221 "\tbytes: %u\n" 222 "\ttype: %s\n" 223 "\tgpa: %x\n" 224 "\tdata: %016x\n", 225 vexit->u.mmio.bytes, 226 vexit->u.mmio.read == 0 ? "write" : "read", 227 vexit->u.mmio.gpa, 228 vexit->u.mmio.data); 229 break; 230 case VM_EXITCODE_VMX: 231 (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip); 232 (void) fprintf(stderr, 233 "\tstatus: %x\n" 234 "\treason: %x\n" 235 "\tqualification: %lx\n" 236 "\tinst_type: %x\n" 237 "\tinst_error: %x\n", 238 vexit->u.vmx.status, 239 vexit->u.vmx.exit_reason, 240 vexit->u.vmx.exit_qualification, 241 vexit->u.vmx.inst_type, 242 vexit->u.vmx.inst_error); 243 break; 244 case VM_EXITCODE_SVM: 245 (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip); 246 break; 247 case VM_EXITCODE_INST_EMUL: 248 (void) fprintf(stderr, hdr_fmt, "instruction emulation", 249 vexit->rip); 250 const uint_t len = vexit->u.inst_emul.num_valid > 0 ? 251 vexit->u.inst_emul.num_valid : 15; 252 (void) fprintf(stderr, "\tinstruction bytes: ["); 253 for (uint_t i = 0; i < len; i++) { 254 (void) fprintf(stderr, "%s%02x", 255 i == 0 ? "" : ", ", 256 vexit->u.inst_emul.inst[i]); 257 } 258 (void) fprintf(stderr, "]\n"); 259 break; 260 case VM_EXITCODE_SUSPENDED: 261 (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip); 262 switch (vexit->u.suspended.how) { 263 case VM_SUSPEND_RESET: 264 (void) fprintf(stderr, "\thow: reset"); 265 break; 266 case VM_SUSPEND_POWEROFF: 267 (void) fprintf(stderr, "\thow: poweroff"); 268 break; 269 case VM_SUSPEND_HALT: 270 (void) fprintf(stderr, "\thow: halt"); 271 break; 272 case VM_SUSPEND_TRIPLEFAULT: 273 (void) fprintf(stderr, "\thow: triple-fault"); 274 break; 275 default: 276 (void) fprintf(stderr, "\thow: unknown - %d", 277 vexit->u.suspended.how); 278 break; 279 } 280 break; 281 default: 282 (void) fprintf(stderr, "Unexpected code %d exit:\n" 283 "\t%%rip: %lx\n", vexit->exitcode, vexit->rip); 284 break; 285 } 286 fail_finish(); 287 } 288 289 void 290 test_pass(void) 291 { 292 assert(test_name != NULL); 293 (void) printf("PASS %s\n", test_name); 294 test_cleanup(false); 295 exit(EXIT_SUCCESS); 296 } 297 298 const char * 299 test_msg_get(struct vmctx *ctx) 300 { 301 /* Disregard if the message address is still NULL */ 302 const uint64_t msg_addr = test_msg_addr; 303 if (msg_addr == 0) { 304 return (NULL); 305 } 306 307 /* 308 * We want to try to map up to one page after the specified message 309 * address, keeping in mind the end of lowmem. (The payload, and 310 * thus message, is assumed to be in lowmem at this time.) 311 */ 312 const uint64_t lowmem_end = vm_get_lowmem_size(ctx); 313 const uint64_t msg_map_end = MIN(msg_addr + PAGE_SIZE, lowmem_end); 314 315 if (msg_map_end >= lowmem_end || msg_map_end <= msg_addr) { 316 return (NULL); 317 } 318 const uint64_t max_msg_len = msg_map_end - msg_addr; 319 320 /* 321 * Get the mapping to that guest memory. This assumes that the payload 322 * has provided a guest-physical address to us. 323 */ 324 const char *result = vm_map_gpa(ctx, msg_addr, max_msg_len); 325 if (result == NULL) { 326 return (NULL); 327 } 328 329 /* Demand a NUL-terminated string shorter than the map limit */ 330 if (strnlen(result, max_msg_len) >= max_msg_len) { 331 return (NULL); 332 } 333 334 return (result); 335 } 336 337 void 338 test_msg_print(struct vmctx *ctx) 339 { 340 const char *payload_msg = test_msg_get(ctx); 341 342 if (payload_msg != NULL) { 343 (void) fprintf(stderr, "MSG: %s\n", payload_msg); 344 } 345 } 346 347 static int 348 load_payload(struct vmctx *ctx) 349 { 350 extern uint8_t payload_data; 351 extern uint32_t payload_size; 352 353 const uint32_t len = payload_size; 354 const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD); 355 356 if (len > cap) { 357 test_fail_msg("Payload size %u > capacity %u\n", len, cap); 358 } 359 360 const size_t map_len = P2ROUNDUP(len, PAGE_SIZE); 361 void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len); 362 bcopy(&payload_data, outp, len); 363 364 return (0); 365 } 366 367 static struct vmctx * 368 test_initialize_opts(const char *tname, uint64_t create_flags, bool is_plain) 369 { 370 char vm_name[VM_MAX_NAMELEN]; 371 int err; 372 struct vmctx *ctx; 373 374 assert(test_vmctx == NULL); 375 assert(test_name == NULL); 376 377 test_name = strdup(tname); 378 (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d", 379 test_name, getpid()); 380 381 err = vm_create(vm_name, create_flags); 382 if (err != 0) { 383 test_fail_errno(err, "Could not create VM"); 384 } 385 386 ctx = vm_open(vm_name); 387 if (ctx == NULL) { 388 test_fail_errno(errno, "Could not open VM"); 389 } 390 test_vmctx = ctx; 391 392 /* No further setup required for a "plain" instance */ 393 if (is_plain) { 394 return (ctx); 395 } 396 397 err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL); 398 if (err != 0) { 399 test_fail_errno(err, "Could not set up VM memory"); 400 } 401 402 err = setup_rom(ctx); 403 if (err != 0) { 404 test_fail_errno(err, "Could not set up VM ROM segment"); 405 } 406 407 populate_identity_table(ctx); 408 populate_desc_tables(ctx); 409 410 err = load_payload(ctx); 411 if (err != 0) { 412 test_fail_errno(err, "Could not load payload"); 413 } 414 415 return (ctx); 416 } 417 418 struct vmctx * 419 test_initialize(const char *tname) 420 { 421 return (test_initialize_opts(tname, 0, false)); 422 } 423 424 struct vmctx * 425 test_initialize_plain(const char *tname) 426 { 427 return (test_initialize_opts(tname, 0, true)); 428 } 429 430 struct vmctx * 431 test_initialize_flags(const char *tname, uint64_t create_flags) 432 { 433 return (test_initialize_opts(tname, create_flags, false)); 434 } 435 436 void 437 test_reinitialize(struct vmctx *ctx, uint64_t flags) 438 { 439 int err; 440 441 if ((err = vm_reinit(ctx, flags)) != 0) { 442 test_fail_errno(err, "Could not reinit VM"); 443 } 444 445 /* Reload tables and payload in case they were altered */ 446 447 populate_identity_table(ctx); 448 populate_desc_tables(ctx); 449 450 err = load_payload(ctx); 451 if (err != 0) { 452 test_fail_errno(err, "Could not load payload"); 453 } 454 } 455 456 int 457 test_setup_vcpu(struct vcpu *vcpu, uint64_t rip, uint64_t rsp) 458 { 459 int err; 460 461 err = vm_activate_cpu(vcpu); 462 if (err != 0 && err != EBUSY) { 463 return (err); 464 } 465 466 /* 467 * Granularity bit important here for VMX validity: 468 * "If any bit in the limit field in the range 31:20 is 1, G must be 1" 469 */ 470 err = vm_set_desc(vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX, 471 SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G); 472 if (err != 0) { 473 return (err); 474 } 475 476 err = vm_set_desc(vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX, 477 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L | 478 SEG_ACCESS_D | SEG_ACCESS_G); 479 if (err != 0) { 480 return (err); 481 } 482 483 err = vm_set_desc(vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX, 484 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G); 485 if (err != 0) { 486 return (err); 487 } 488 489 /* 490 * While SVM will happilly run with an otherwise unusable TR, VMX 491 * includes it among its entry checks. 492 */ 493 err = vm_set_desc(vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff, 494 SDT_SYSTSSBSY | SEG_ACCESS_P); 495 if (err != 0) { 496 return (err); 497 } 498 err = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0); 499 if (err != 0) { 500 return (err); 501 } 502 err = vm_set_desc(vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0); 503 if (err != 0) { 504 return (err); 505 } 506 507 /* Mark unused segments as explicitly unusable (for VMX) */ 508 const int unsable_segs[] = { 509 VM_REG_GUEST_ES, 510 VM_REG_GUEST_FS, 511 VM_REG_GUEST_GS, 512 VM_REG_GUEST_LDTR, 513 }; 514 for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) { 515 err = vm_set_desc(vcpu, unsable_segs[i], 0, 0, 516 SEG_ACCESS_UNUSABLE); 517 if (err != 0) { 518 return (err); 519 } 520 } 521 522 /* Place CPU directly in long mode */ 523 const int regnums[] = { 524 VM_REG_GUEST_CR0, 525 VM_REG_GUEST_CR3, 526 VM_REG_GUEST_CR4, 527 VM_REG_GUEST_EFER, 528 VM_REG_GUEST_RFLAGS, 529 VM_REG_GUEST_RIP, 530 VM_REG_GUEST_RSP, 531 VM_REG_GUEST_CS, 532 VM_REG_GUEST_SS, 533 VM_REG_GUEST_DS, 534 VM_REG_GUEST_TR, 535 }; 536 uint64_t regvals[] = { 537 CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS | 538 CR0_MP | CR0_PE, 539 MEM_LOC_PAGE_TABLE_512G, 540 CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE, 541 AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE, 542 /* start with interrupts disabled */ 543 PS_MB1, 544 rip, 545 rsp, 546 (GDT_KCODE << 3), 547 (GDT_KDATA << 3), 548 (GDT_KDATA << 3), 549 (GDT_KTSS << 3), 550 }; 551 assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals)); 552 553 err = vm_set_register_set(vcpu, ARRAY_SIZE(regnums), regnums, 554 regvals); 555 if (err != 0) { 556 return (err); 557 } 558 559 err = vm_set_run_state(vcpu, VRS_RUN, 0); 560 if (err != 0) { 561 return (err); 562 } 563 564 return (0); 565 } 566 567 static enum vm_exit_kind 568 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit) 569 { 570 const struct vm_inout *inout = &vexit->u.inout; 571 572 switch (vexit->exitcode) { 573 case VM_EXITCODE_BOGUS: 574 bzero(ventry, sizeof (ventry)); 575 return (VEK_REENTR); 576 case VM_EXITCODE_INOUT: 577 if (inout->port == IOP_TEST_RESULT && 578 (inout->flags & INOUT_IN) == 0) { 579 if (inout->eax == TEST_RESULT_PASS) { 580 return (VEK_TEST_PASS); 581 } else { 582 return (VEK_TEST_FAIL); 583 } 584 } 585 if (inout->port == IOP_TEST_MSG && 586 (inout->flags & INOUT_IN) == 0 && 587 inout->bytes == 4) { 588 test_msg_addr = inout->eax; 589 ventry_fulfill_inout(vexit, ventry, 0); 590 return (VEK_TEST_MSG); 591 } 592 break; 593 default: 594 break; 595 } 596 return (VEK_UNHANDLED); 597 } 598 599 enum vm_exit_kind 600 test_run_vcpu(struct vcpu *vcpu, struct vm_entry *ventry, struct vm_exit *vexit) 601 { 602 int err; 603 604 err = vm_run(vcpu, ventry, vexit); 605 if (err != 0) { 606 test_fail_errno(err, "Failure during vcpu entry"); 607 } 608 609 return (which_exit_kind(ventry, vexit)); 610 } 611 612 void 613 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry, 614 uint32_t data) 615 { 616 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT); 617 618 ventry->cmd = VEC_FULFILL_INOUT; 619 bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout)); 620 if ((ventry->u.inout.flags & INOUT_IN) != 0) { 621 ventry->u.inout.eax = data; 622 } 623 } 624 625 void 626 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry, 627 uint64_t data) 628 { 629 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO); 630 631 ventry->cmd = VEC_FULFILL_MMIO; 632 bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio)); 633 if (ventry->u.mmio.read != 0) { 634 ventry->u.mmio.data = data; 635 } 636 } 637 638 bool 639 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port, 640 uint_t len, uint32_t *valp) 641 { 642 if (vexit->exitcode != VM_EXITCODE_INOUT) { 643 return (false); 644 } 645 646 const uint_t flag = is_read ? INOUT_IN : 0; 647 if (vexit->u.inout.port != port || 648 vexit->u.inout.bytes != len || 649 (vexit->u.inout.flags & INOUT_IN) != flag) { 650 return (false); 651 } 652 653 if (!is_read && valp != NULL) { 654 *valp = vexit->u.inout.eax; 655 } 656 return (true); 657 } 658 659 bool 660 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr, 661 uint_t len, uint64_t *valp) 662 { 663 if (vexit->exitcode != VM_EXITCODE_MMIO) { 664 return (false); 665 } 666 667 if (vexit->u.mmio.gpa != addr || 668 vexit->u.mmio.bytes != len || 669 (vexit->u.mmio.read != 0) != is_read) { 670 return (false); 671 } 672 673 if (!is_read && valp != NULL) { 674 *valp = vexit->u.mmio.data; 675 } 676 return (true); 677 } 678