1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2023 Oxide Computer Company 14 */ 15 16 #include <stdio.h> 17 #include <unistd.h> 18 #include <stdlib.h> 19 #include <strings.h> 20 #include <assert.h> 21 #include <errno.h> 22 23 #include <sys/types.h> 24 #include <sys/segments.h> 25 #include <sys/psw.h> 26 #include <sys/controlregs.h> 27 #include <sys/sysmacros.h> 28 #include <sys/varargs.h> 29 #include <sys/debug.h> 30 #include <sys/mman.h> 31 32 #include <sys/vmm.h> 33 #include <sys/vmm_dev.h> 34 #include <vmmapi.h> 35 36 #include "in_guest.h" 37 38 39 #define PT_VALID 0x01 40 #define PT_WRITABLE 0x02 41 #define PT_WRITETHRU 0x08 42 #define PT_NOCACHE 0x10 43 #define PT_PAGESIZE 0x80 44 45 #define SEG_ACCESS_TYPE_MASK 0x1f 46 #define SEG_ACCESS_DPL_MASK 0x60 47 #define SEG_ACCESS_P (1 << 7) 48 #define SEG_ACCESS_AVL (1 << 12) 49 #define SEG_ACCESS_L (1 << 13) 50 #define SEG_ACCESS_D (1 << 14) 51 #define SEG_ACCESS_G (1 << 15) 52 #define SEG_ACCESS_UNUSABLE (1 << 16) 53 54 55 /* 56 * Keep the test name and VM context around so the consumer is not required to 57 * pass either of them to us for subsequent test-related operations after the 58 * initialization has been performed. 59 * 60 * The test code is not designed to be reentrant at this point. 61 */ 62 static struct vmctx *test_vmctx = NULL; 63 static const char *test_name = NULL; 64 65 static uint64_t test_msg_addr = 0; 66 67 static int 68 setup_rom(struct vmctx *ctx) 69 { 70 const size_t seg_sz = 0x1000; 71 const uintptr_t seg_addr = MEM_LOC_ROM; 72 const int fd = vm_get_device_fd(ctx); 73 int err; 74 75 struct vm_memseg memseg = { 76 .segid = VM_BOOTROM, 77 .len = 0x1000, 78 }; 79 (void) strlcpy(memseg.name, "testrom", sizeof (memseg.name)); 80 err = ioctl(fd, VM_ALLOC_MEMSEG, &memseg); 81 if (err != 0) { 82 return (err); 83 } 84 err = vm_mmap_memseg(ctx, seg_addr, VM_BOOTROM, 0, seg_sz, 85 PROT_READ | PROT_EXEC); 86 return (err); 87 } 88 89 static void 90 populate_identity_table(struct vmctx *ctx) 91 { 92 uint64_t gpa, pte_loc; 93 94 /* Set up 2MiB PTEs for everything up through 0xffffffff */ 95 for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M; 96 gpa < 0x100000000; 97 pte_loc += PAGE_SIZE) { 98 uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE); 99 100 for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) { 101 *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE; 102 /* Make traditional MMIO space uncachable */ 103 if (gpa >= 0xc0000000) { 104 *ptep |= PT_WRITETHRU | PT_NOCACHE; 105 } 106 } 107 } 108 assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G); 109 110 uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE); 111 pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE; 112 pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE; 113 pdep[2] = 114 (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 115 pdep[3] = 116 (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 117 118 pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE); 119 pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE; 120 } 121 122 static void 123 populate_desc_tables(struct vmctx *ctx) 124 { 125 126 } 127 128 void 129 test_cleanup(bool is_failure) 130 { 131 if (test_vmctx != NULL) { 132 bool keep_on_fail = false; 133 134 const char *keep_var; 135 if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) { 136 if (strlen(keep_var) != 0 && 137 strcmp(keep_var, "0") != 0) { 138 keep_on_fail = true; 139 } 140 } 141 142 /* 143 * Destroy the instance unless the test failed and it was 144 * requested that we keep it around. 145 */ 146 if (!is_failure || !keep_on_fail) { 147 vm_destroy(test_vmctx); 148 } 149 test_name = NULL; 150 test_vmctx = NULL; 151 } 152 } 153 154 static void fail_finish(void) 155 { 156 assert(test_name != NULL); 157 (void) printf("FAIL %s\n", test_name); 158 159 test_cleanup(true); 160 exit(EXIT_FAILURE); 161 } 162 163 void 164 test_fail(void) 165 { 166 fail_finish(); 167 } 168 169 void 170 test_fail_errno(int err, const char *msg) 171 { 172 const char *err_str = strerror(err); 173 174 (void) fprintf(stderr, "%s: %s\n", msg, err_str); 175 fail_finish(); 176 } 177 178 void 179 test_fail_msg(const char *fmt, ...) 180 { 181 va_list ap; 182 183 va_start(ap, fmt); 184 (void) vfprintf(stderr, fmt, ap); 185 186 fail_finish(); 187 } 188 189 void 190 test_fail_vmexit(const struct vm_exit *vexit) 191 { 192 const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n"; 193 194 switch (vexit->exitcode) { 195 case VM_EXITCODE_INOUT: 196 (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip); 197 (void) fprintf(stderr, 198 "\teax: %08x\n" 199 "\tport: %04x\n" 200 "\tbytes: %u\n" 201 "\tflags: %x\n", 202 vexit->u.inout.eax, 203 vexit->u.inout.port, 204 vexit->u.inout.bytes, 205 vexit->u.inout.flags); 206 break; 207 case VM_EXITCODE_RDMSR: 208 (void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip); 209 (void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code); 210 break; 211 case VM_EXITCODE_WRMSR: 212 (void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip); 213 (void) fprintf(stderr, 214 "\tcode: %08x\n" 215 "\twval: %016lx\n", 216 vexit->u.msr.code, vexit->u.msr.wval); 217 break; 218 case VM_EXITCODE_MMIO: 219 (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip); 220 (void) fprintf(stderr, 221 "\tbytes: %u\n" 222 "\ttype: %s\n" 223 "\tgpa: %x\n" 224 "\tdata: %016x\n", 225 vexit->u.mmio.bytes, 226 vexit->u.mmio.read == 0 ? "write" : "read", 227 vexit->u.mmio.gpa, 228 vexit->u.mmio.data); 229 break; 230 case VM_EXITCODE_VMX: 231 (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip); 232 (void) fprintf(stderr, 233 "\tstatus: %x\n" 234 "\treason: %x\n" 235 "\tqualification: %lx\n" 236 "\tinst_type: %x\n" 237 "\tinst_error: %x\n", 238 vexit->u.vmx.status, 239 vexit->u.vmx.exit_reason, 240 vexit->u.vmx.exit_qualification, 241 vexit->u.vmx.inst_type, 242 vexit->u.vmx.inst_error); 243 break; 244 case VM_EXITCODE_SVM: 245 (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip); 246 break; 247 case VM_EXITCODE_INST_EMUL: 248 (void) fprintf(stderr, hdr_fmt, "instruction emulation", 249 vexit->rip); 250 const uint_t len = vexit->u.inst_emul.num_valid > 0 ? 251 vexit->u.inst_emul.num_valid : 15; 252 (void) fprintf(stderr, "\tinstruction bytes: ["); 253 for (uint_t i = 0; i < len; i++) { 254 (void) fprintf(stderr, "%s%02x", 255 i == 0 ? "" : ", ", 256 vexit->u.inst_emul.inst[i]); 257 } 258 (void) fprintf(stderr, "]\n"); 259 break; 260 case VM_EXITCODE_SUSPENDED: 261 (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip); 262 switch (vexit->u.suspended.how) { 263 case VM_SUSPEND_RESET: 264 (void) fprintf(stderr, "\thow: reset"); 265 break; 266 case VM_SUSPEND_POWEROFF: 267 (void) fprintf(stderr, "\thow: poweroff"); 268 break; 269 case VM_SUSPEND_HALT: 270 (void) fprintf(stderr, "\thow: halt"); 271 break; 272 case VM_SUSPEND_TRIPLEFAULT: 273 (void) fprintf(stderr, "\thow: triple-fault"); 274 break; 275 default: 276 (void) fprintf(stderr, "\thow: unknown - %d", 277 vexit->u.suspended.how); 278 break; 279 } 280 break; 281 default: 282 (void) fprintf(stderr, "Unexpected code %d exit:\n" 283 "\t%%rip: %lx\n", vexit->exitcode, vexit->rip); 284 break; 285 } 286 fail_finish(); 287 } 288 289 void 290 test_pass(void) 291 { 292 assert(test_name != NULL); 293 (void) printf("PASS %s\n", test_name); 294 test_cleanup(false); 295 exit(EXIT_SUCCESS); 296 } 297 298 const char * 299 test_msg_get(struct vmctx *ctx) 300 { 301 /* Disregard if the message address is still NULL */ 302 const uint64_t msg_addr = test_msg_addr; 303 if (msg_addr == 0) { 304 return (NULL); 305 } 306 307 /* 308 * We want to try to map up to one page after the specified message 309 * address, keeping in mind the end of lowmem. (The payload, and 310 * thus message, is assumed to be in lowmem at this time.) 311 */ 312 const uint64_t lowmem_end = vm_get_lowmem_size(ctx); 313 const uint64_t msg_map_end = MIN(msg_addr + PAGE_SIZE, lowmem_end); 314 315 if (msg_map_end >= lowmem_end || msg_map_end <= msg_addr) { 316 return (NULL); 317 } 318 const uint64_t max_msg_len = msg_map_end - msg_addr; 319 320 /* 321 * Get the mapping to that guest memory. This assumes that the payload 322 * has provided a guest-physical address to us. 323 */ 324 const char *result = vm_map_gpa(ctx, msg_addr, max_msg_len); 325 if (result == NULL) { 326 return (NULL); 327 } 328 329 /* Demand a NUL-terminated string shorter than the map limit */ 330 if (strnlen(result, max_msg_len) >= max_msg_len) { 331 return (NULL); 332 } 333 334 return (result); 335 } 336 337 void 338 test_msg_print(struct vmctx *ctx) 339 { 340 const char *payload_msg = test_msg_get(ctx); 341 342 if (payload_msg != NULL) { 343 (void) fprintf(stderr, "MSG: %s\n", payload_msg); 344 } 345 } 346 347 static int 348 load_payload(struct vmctx *ctx) 349 { 350 extern uint8_t payload_data; 351 extern uint32_t payload_size; 352 353 const uint32_t len = payload_size; 354 const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD); 355 356 if (len > cap) { 357 test_fail_msg("Payload size %u > capacity %u\n", len, cap); 358 } 359 360 const size_t map_len = P2ROUNDUP(len, PAGE_SIZE); 361 void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len); 362 bcopy(&payload_data, outp, len); 363 364 return (0); 365 } 366 367 struct vmctx * 368 test_initialize(const char *tname) 369 { 370 return (test_initialize_flags(tname, 0)); 371 } 372 373 struct vmctx * 374 test_initialize_flags(const char *tname, uint64_t create_flags) 375 { 376 char vm_name[VM_MAX_NAMELEN]; 377 int err; 378 struct vmctx *ctx; 379 380 assert(test_vmctx == NULL); 381 assert(test_name == NULL); 382 383 test_name = strdup(tname); 384 (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d", 385 test_name, getpid()); 386 387 err = vm_create(vm_name, create_flags); 388 if (err != 0) { 389 test_fail_errno(err, "Could not create VM"); 390 } 391 392 ctx = vm_open(vm_name); 393 if (ctx == NULL) { 394 test_fail_errno(errno, "Could not open VM"); 395 } 396 test_vmctx = ctx; 397 398 err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL); 399 if (err != 0) { 400 test_fail_errno(err, "Could not set up VM memory"); 401 } 402 403 err = setup_rom(ctx); 404 if (err != 0) { 405 test_fail_errno(err, "Could not set up VM ROM segment"); 406 } 407 408 populate_identity_table(ctx); 409 populate_desc_tables(ctx); 410 411 err = load_payload(ctx); 412 if (err != 0) { 413 test_fail_errno(err, "Could not load payload"); 414 } 415 416 return (ctx); 417 } 418 419 void 420 test_reinitialize(struct vmctx *ctx, uint64_t flags) 421 { 422 int err; 423 424 if ((err = vm_reinit(ctx, flags)) != 0) { 425 test_fail_errno(err, "Could not reinit VM"); 426 } 427 428 /* Reload tables and payload in case they were altered */ 429 430 populate_identity_table(ctx); 431 populate_desc_tables(ctx); 432 433 err = load_payload(ctx); 434 if (err != 0) { 435 test_fail_errno(err, "Could not load payload"); 436 } 437 } 438 439 int 440 test_setup_vcpu(struct vcpu *vcpu, uint64_t rip, uint64_t rsp) 441 { 442 int err; 443 444 err = vm_activate_cpu(vcpu); 445 if (err != 0 && err != EBUSY) { 446 return (err); 447 } 448 449 /* 450 * Granularity bit important here for VMX validity: 451 * "If any bit in the limit field in the range 31:20 is 1, G must be 1" 452 */ 453 err = vm_set_desc(vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX, 454 SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G); 455 if (err != 0) { 456 return (err); 457 } 458 459 err = vm_set_desc(vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX, 460 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L | 461 SEG_ACCESS_D | SEG_ACCESS_G); 462 if (err != 0) { 463 return (err); 464 } 465 466 err = vm_set_desc(vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX, 467 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G); 468 if (err != 0) { 469 return (err); 470 } 471 472 /* 473 * While SVM will happilly run with an otherwise unusable TR, VMX 474 * includes it among its entry checks. 475 */ 476 err = vm_set_desc(vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff, 477 SDT_SYSTSSBSY | SEG_ACCESS_P); 478 if (err != 0) { 479 return (err); 480 } 481 err = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0); 482 if (err != 0) { 483 return (err); 484 } 485 err = vm_set_desc(vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0); 486 if (err != 0) { 487 return (err); 488 } 489 490 /* Mark unused segments as explicitly unusable (for VMX) */ 491 const int unsable_segs[] = { 492 VM_REG_GUEST_ES, 493 VM_REG_GUEST_FS, 494 VM_REG_GUEST_GS, 495 VM_REG_GUEST_LDTR, 496 }; 497 for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) { 498 err = vm_set_desc(vcpu, unsable_segs[i], 0, 0, 499 SEG_ACCESS_UNUSABLE); 500 if (err != 0) { 501 return (err); 502 } 503 } 504 505 /* Place CPU directly in long mode */ 506 const int regnums[] = { 507 VM_REG_GUEST_CR0, 508 VM_REG_GUEST_CR3, 509 VM_REG_GUEST_CR4, 510 VM_REG_GUEST_EFER, 511 VM_REG_GUEST_RFLAGS, 512 VM_REG_GUEST_RIP, 513 VM_REG_GUEST_RSP, 514 VM_REG_GUEST_CS, 515 VM_REG_GUEST_SS, 516 VM_REG_GUEST_DS, 517 VM_REG_GUEST_TR, 518 }; 519 uint64_t regvals[] = { 520 CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS | 521 CR0_MP | CR0_PE, 522 MEM_LOC_PAGE_TABLE_512G, 523 CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE, 524 AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE, 525 /* start with interrupts disabled */ 526 PS_MB1, 527 rip, 528 rsp, 529 (GDT_KCODE << 3), 530 (GDT_KDATA << 3), 531 (GDT_KDATA << 3), 532 (GDT_KTSS << 3), 533 }; 534 assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals)); 535 536 err = vm_set_register_set(vcpu, ARRAY_SIZE(regnums), regnums, 537 regvals); 538 if (err != 0) { 539 return (err); 540 } 541 542 err = vm_set_run_state(vcpu, VRS_RUN, 0); 543 if (err != 0) { 544 return (err); 545 } 546 547 return (0); 548 } 549 550 static enum vm_exit_kind 551 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit) 552 { 553 const struct vm_inout *inout = &vexit->u.inout; 554 555 switch (vexit->exitcode) { 556 case VM_EXITCODE_BOGUS: 557 bzero(ventry, sizeof (ventry)); 558 return (VEK_REENTR); 559 case VM_EXITCODE_INOUT: 560 if (inout->port == IOP_TEST_RESULT && 561 (inout->flags & INOUT_IN) == 0) { 562 if (inout->eax == TEST_RESULT_PASS) { 563 return (VEK_TEST_PASS); 564 } else { 565 return (VEK_TEST_FAIL); 566 } 567 } 568 if (inout->port == IOP_TEST_MSG && 569 (inout->flags & INOUT_IN) == 0 && 570 inout->bytes == 4) { 571 test_msg_addr = inout->eax; 572 ventry_fulfill_inout(vexit, ventry, 0); 573 return (VEK_TEST_MSG); 574 } 575 break; 576 default: 577 break; 578 } 579 return (VEK_UNHANDLED); 580 } 581 582 enum vm_exit_kind 583 test_run_vcpu(struct vcpu *vcpu, struct vm_entry *ventry, struct vm_exit *vexit) 584 { 585 int err; 586 587 err = vm_run(vcpu, ventry, vexit); 588 if (err != 0) { 589 test_fail_errno(err, "Failure during vcpu entry"); 590 } 591 592 return (which_exit_kind(ventry, vexit)); 593 } 594 595 void 596 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry, 597 uint32_t data) 598 { 599 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT); 600 601 ventry->cmd = VEC_FULFILL_INOUT; 602 bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout)); 603 if ((ventry->u.inout.flags & INOUT_IN) != 0) { 604 ventry->u.inout.eax = data; 605 } 606 } 607 608 void 609 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry, 610 uint64_t data) 611 { 612 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO); 613 614 ventry->cmd = VEC_FULFILL_MMIO; 615 bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio)); 616 if (ventry->u.mmio.read != 0) { 617 ventry->u.mmio.data = data; 618 } 619 } 620 621 bool 622 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port, 623 uint_t len, uint32_t *valp) 624 { 625 if (vexit->exitcode != VM_EXITCODE_INOUT) { 626 return (false); 627 } 628 629 const uint_t flag = is_read ? INOUT_IN : 0; 630 if (vexit->u.inout.port != port || 631 vexit->u.inout.bytes != len || 632 (vexit->u.inout.flags & INOUT_IN) != flag) { 633 return (false); 634 } 635 636 if (!is_read && valp != NULL) { 637 *valp = vexit->u.inout.eax; 638 } 639 return (true); 640 } 641 642 bool 643 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr, 644 uint_t len, uint64_t *valp) 645 { 646 if (vexit->exitcode != VM_EXITCODE_MMIO) { 647 return (false); 648 } 649 650 if (vexit->u.mmio.gpa != addr || 651 vexit->u.mmio.bytes != len || 652 (vexit->u.mmio.read != 0) != is_read) { 653 return (false); 654 } 655 656 if (!is_read && valp != NULL) { 657 *valp = vexit->u.mmio.data; 658 } 659 return (true); 660 } 661