1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2022 Oxide Computer Company 14 */ 15 16 #include <stdio.h> 17 #include <unistd.h> 18 #include <stdlib.h> 19 #include <strings.h> 20 #include <assert.h> 21 #include <errno.h> 22 23 #include <sys/types.h> 24 #include <sys/segments.h> 25 #include <sys/psw.h> 26 #include <sys/controlregs.h> 27 #include <sys/sysmacros.h> 28 #include <sys/varargs.h> 29 #include <sys/debug.h> 30 #include <sys/mman.h> 31 32 #include <sys/vmm.h> 33 #include <sys/vmm_dev.h> 34 #include <vmmapi.h> 35 36 #include "in_guest.h" 37 38 39 #define PT_VALID 0x01 40 #define PT_WRITABLE 0x02 41 #define PT_WRITETHRU 0x08 42 #define PT_NOCACHE 0x10 43 #define PT_PAGESIZE 0x80 44 45 #define SEG_ACCESS_TYPE_MASK 0x1f 46 #define SEG_ACCESS_DPL_MASK 0x60 47 #define SEG_ACCESS_P (1 << 7) 48 #define SEG_ACCESS_AVL (1 << 12) 49 #define SEG_ACCESS_L (1 << 13) 50 #define SEG_ACCESS_D (1 << 14) 51 #define SEG_ACCESS_G (1 << 15) 52 #define SEG_ACCESS_UNUSABLE (1 << 16) 53 54 55 /* 56 * Keep the test name and VM context around so the consumer is not required to 57 * pass either of them to us for subsequent test-related operations after the 58 * initialization has been performed. 59 * 60 * The test code is not designed to be reentrant at this point. 61 */ 62 static struct vmctx *test_vmctx = NULL; 63 static const char *test_name = NULL; 64 65 static int 66 setup_rom(struct vmctx *ctx) 67 { 68 const size_t seg_sz = 0x1000; 69 const uintptr_t seg_addr = MEM_LOC_ROM; 70 const int fd = vm_get_device_fd(ctx); 71 int err; 72 73 struct vm_memseg memseg = { 74 .segid = VM_BOOTROM, 75 .len = 0x1000, 76 }; 77 (void) strlcpy(memseg.name, "testrom", sizeof (memseg.name)); 78 err = ioctl(fd, VM_ALLOC_MEMSEG, &memseg); 79 if (err != 0) { 80 return (err); 81 } 82 err = vm_mmap_memseg(ctx, seg_addr, VM_BOOTROM, 0, seg_sz, 83 PROT_READ | PROT_EXEC); 84 return (err); 85 } 86 87 static void 88 populate_identity_table(struct vmctx *ctx) 89 { 90 uint64_t gpa, pte_loc; 91 92 /* Set up 2MiB PTEs for everything up through 0xffffffff */ 93 for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M; 94 gpa < 0x100000000; 95 pte_loc += PAGE_SIZE) { 96 uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE); 97 98 for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) { 99 *ptep = gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE; 100 /* Make traditional MMIO space uncachable */ 101 if (gpa >= 0xc0000000) { 102 *ptep |= PT_WRITETHRU | PT_NOCACHE; 103 } 104 } 105 } 106 assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G); 107 108 uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE); 109 pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE; 110 pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE; 111 pdep[2] = 112 (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 113 pdep[3] = 114 (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE; 115 116 pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE); 117 pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE; 118 } 119 120 static void 121 populate_desc_tables(struct vmctx *ctx) 122 { 123 124 } 125 126 static void 127 test_cleanup(bool is_failure) 128 { 129 if (test_vmctx != NULL) { 130 bool keep_on_fail = false; 131 132 const char *keep_var; 133 if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) { 134 if (strlen(keep_var) != 0 && 135 strcmp(keep_var, "0") != 0) { 136 keep_on_fail = true; 137 } 138 } 139 140 /* 141 * Destroy the instance unless the test failed and it was 142 * requested that we keep it around. 143 */ 144 if (!is_failure || !keep_on_fail) { 145 vm_destroy(test_vmctx); 146 } 147 test_vmctx = NULL; 148 } 149 } 150 151 static void fail_finish(void) 152 { 153 assert(test_name != NULL); 154 (void) printf("FAIL %s\n", test_name); 155 156 test_cleanup(true); 157 exit(EXIT_FAILURE); 158 } 159 160 void 161 test_fail_errno(int err, const char *msg) 162 { 163 const char *err_str = strerror(err); 164 165 (void) fprintf(stderr, "%s: %s\n", msg, err_str); 166 fail_finish(); 167 } 168 169 void 170 test_fail_msg(const char *fmt, ...) 171 { 172 va_list ap; 173 174 va_start(ap, fmt); 175 (void) vfprintf(stderr, fmt, ap); 176 177 fail_finish(); 178 } 179 180 void 181 test_fail_vmexit(const struct vm_exit *vexit) 182 { 183 const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n"; 184 185 switch (vexit->exitcode) { 186 case VM_EXITCODE_INOUT: 187 (void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip); 188 (void) fprintf(stderr, 189 "\teax: %08x\n" 190 "\tport: %04x\n" 191 "\tbytes: %u\n" 192 "\tflags: %x\n", 193 vexit->u.inout.eax, 194 vexit->u.inout.port, 195 vexit->u.inout.bytes, 196 vexit->u.inout.flags); 197 break; 198 case VM_EXITCODE_RDMSR: 199 (void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip); 200 (void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code); 201 break; 202 case VM_EXITCODE_WRMSR: 203 (void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip); 204 (void) fprintf(stderr, 205 "\tcode: %08x\n" 206 "\twval: %016lx\n", 207 vexit->u.msr.code, vexit->u.msr.wval); 208 break; 209 case VM_EXITCODE_MMIO: 210 (void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip); 211 (void) fprintf(stderr, 212 "\tbytes: %u\n" 213 "\ttype: %s\n" 214 "\tgpa: %x\n" 215 "\tdata: %016x\n", 216 vexit->u.mmio.bytes, 217 vexit->u.mmio.read == 0 ? "write" : "read", 218 vexit->u.mmio.gpa, 219 vexit->u.mmio.data); 220 break; 221 case VM_EXITCODE_VMX: 222 (void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip); 223 (void) fprintf(stderr, 224 "\tstatus: %x\n" 225 "\treason: %x\n" 226 "\tqualification: %lx\n" 227 "\tinst_type: %x\n" 228 "\tinst_error: %x\n", 229 vexit->u.vmx.status, 230 vexit->u.vmx.exit_reason, 231 vexit->u.vmx.exit_qualification, 232 vexit->u.vmx.inst_type, 233 vexit->u.vmx.inst_error); 234 break; 235 case VM_EXITCODE_SVM: 236 (void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip); 237 break; 238 case VM_EXITCODE_INST_EMUL: 239 (void) fprintf(stderr, hdr_fmt, "instruction emulation", 240 vexit->rip); 241 const uint_t len = vexit->u.inst_emul.num_valid > 0 ? 242 vexit->u.inst_emul.num_valid : 15; 243 (void) fprintf(stderr, "\tinstruction bytes: ["); 244 for (uint_t i = 0; i < len; i++) { 245 (void) fprintf(stderr, "%s%02x", 246 i == 0 ? "" : ", ", 247 vexit->u.inst_emul.inst[i]); 248 } 249 (void) fprintf(stderr, "]\n"); 250 break; 251 case VM_EXITCODE_SUSPENDED: 252 (void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip); 253 switch (vexit->u.suspended.how) { 254 case VM_SUSPEND_RESET: 255 (void) fprintf(stderr, "\thow: reset"); 256 break; 257 case VM_SUSPEND_POWEROFF: 258 (void) fprintf(stderr, "\thow: poweroff"); 259 break; 260 case VM_SUSPEND_HALT: 261 (void) fprintf(stderr, "\thow: halt"); 262 break; 263 case VM_SUSPEND_TRIPLEFAULT: 264 (void) fprintf(stderr, "\thow: triple-fault"); 265 break; 266 default: 267 (void) fprintf(stderr, "\thow: unknown - %d", 268 vexit->u.suspended.how); 269 break; 270 } 271 break; 272 default: 273 (void) fprintf(stderr, "Unexpected code %d exit:\n" 274 "\t%%rip: %lx\n", vexit->exitcode, vexit->rip); 275 break; 276 } 277 fail_finish(); 278 } 279 280 void 281 test_pass(void) 282 { 283 assert(test_name != NULL); 284 (void) printf("PASS %s\n", test_name); 285 test_cleanup(false); 286 exit(EXIT_SUCCESS); 287 } 288 289 static int 290 load_payload(struct vmctx *ctx) 291 { 292 extern uint8_t payload_data; 293 extern uint32_t payload_size; 294 295 const uint32_t len = payload_size; 296 const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD); 297 298 if (len > cap) { 299 test_fail_msg("Payload size %u > capacity %u\n", len, cap); 300 } 301 302 const size_t map_len = P2ROUNDUP(len, PAGE_SIZE); 303 void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len); 304 bcopy(&payload_data, outp, len); 305 306 return (0); 307 } 308 309 struct vmctx * 310 test_initialize(const char *tname) 311 { 312 char vm_name[VM_MAX_NAMELEN]; 313 int err; 314 struct vmctx *ctx; 315 316 assert(test_vmctx == NULL); 317 assert(test_name == NULL); 318 319 test_name = strdup(tname); 320 (void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d", 321 test_name, getpid()); 322 323 err = vm_create(vm_name, 0); 324 if (err != 0) { 325 test_fail_errno(err, "Could not create VM"); 326 } 327 328 ctx = vm_open(vm_name); 329 if (ctx == NULL) { 330 test_fail_errno(errno, "Could not open VM"); 331 } 332 test_vmctx = ctx; 333 334 err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL); 335 if (err != 0) { 336 test_fail_errno(err, "Could not set up VM memory"); 337 } 338 339 err = setup_rom(ctx); 340 if (err != 0) { 341 test_fail_errno(err, "Could not set up VM ROM segment"); 342 } 343 344 populate_identity_table(ctx); 345 populate_desc_tables(ctx); 346 347 err = load_payload(ctx); 348 if (err != 0) { 349 test_fail_errno(err, "Could not load payload"); 350 } 351 352 return (ctx); 353 } 354 355 int 356 test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp) 357 { 358 int err; 359 360 err = vm_activate_cpu(ctx, vcpu); 361 if (err != 0 && err != EBUSY) { 362 return (err); 363 } 364 365 /* 366 * Granularity bit important here for VMX validity: 367 * "If any bit in the limit field in the range 31:20 is 1, G must be 1" 368 */ 369 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX, 370 SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G); 371 if (err != 0) { 372 return (err); 373 } 374 375 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX, 376 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L | 377 SEG_ACCESS_D | SEG_ACCESS_G); 378 if (err != 0) { 379 return (err); 380 } 381 382 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX, 383 SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G); 384 if (err != 0) { 385 return (err); 386 } 387 388 /* 389 * While SVM will happilly run with an otherwise unusable TR, VMX 390 * includes it among its entry checks. 391 */ 392 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff, 393 SDT_SYSTSSBSY | SEG_ACCESS_P); 394 if (err != 0) { 395 return (err); 396 } 397 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0); 398 if (err != 0) { 399 return (err); 400 } 401 err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0); 402 if (err != 0) { 403 return (err); 404 } 405 406 /* Mark unused segments as explicitly unusable (for VMX) */ 407 const int unsable_segs[] = { 408 VM_REG_GUEST_ES, 409 VM_REG_GUEST_FS, 410 VM_REG_GUEST_GS, 411 VM_REG_GUEST_LDTR, 412 }; 413 for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) { 414 err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0, 415 SEG_ACCESS_UNUSABLE); 416 if (err != 0) { 417 return (err); 418 } 419 } 420 421 /* Place CPU directly in long mode */ 422 const int regnums[] = { 423 VM_REG_GUEST_CR0, 424 VM_REG_GUEST_CR3, 425 VM_REG_GUEST_CR4, 426 VM_REG_GUEST_EFER, 427 VM_REG_GUEST_RFLAGS, 428 VM_REG_GUEST_RIP, 429 VM_REG_GUEST_RSP, 430 VM_REG_GUEST_CS, 431 VM_REG_GUEST_SS, 432 VM_REG_GUEST_DS, 433 VM_REG_GUEST_TR, 434 }; 435 uint64_t regvals[] = { 436 CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS | 437 CR0_MP | CR0_PE, 438 MEM_LOC_PAGE_TABLE_512G, 439 CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE, 440 AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE, 441 /* start with interrupts disabled */ 442 PS_MB1, 443 rip, 444 rsp, 445 (GDT_KCODE << 3), 446 (GDT_KDATA << 3), 447 (GDT_KDATA << 3), 448 (GDT_KTSS << 3), 449 }; 450 assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals)); 451 452 err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums, 453 regvals); 454 if (err != 0) { 455 return (err); 456 } 457 458 err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0); 459 if (err != 0) { 460 return (err); 461 } 462 463 return (0); 464 } 465 466 static enum vm_exit_kind 467 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit) 468 { 469 const struct vm_inout *inout = &vexit->u.inout; 470 471 switch (vexit->exitcode) { 472 case VM_EXITCODE_BOGUS: 473 case VM_EXITCODE_REQIDLE: 474 bzero(ventry, sizeof (ventry)); 475 return (VEK_REENTR); 476 case VM_EXITCODE_INOUT: 477 if (inout->port == IOP_TEST_RESULT && 478 (inout->flags & INOUT_IN) == 0) { 479 if (inout->eax == TEST_RESULT_PASS) { 480 return (VEK_TEST_PASS); 481 } else { 482 return (VEK_TEST_FAIL); 483 } 484 } 485 break; 486 default: 487 break; 488 } 489 return (VEK_UNHANDLED); 490 } 491 492 enum vm_exit_kind 493 test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry, 494 struct vm_exit *vexit) 495 { 496 int err; 497 498 err = vm_run(ctx, vcpu, ventry, vexit); 499 if (err != 0) { 500 test_fail_errno(err, "Failure during vcpu entry"); 501 } 502 503 return (which_exit_kind(ventry, vexit)); 504 } 505 506 void 507 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry, 508 uint32_t data) 509 { 510 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT); 511 512 ventry->cmd = VEC_FULFILL_INOUT; 513 bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout)); 514 if ((ventry->u.inout.flags & INOUT_IN) != 0) { 515 ventry->u.inout.eax = data; 516 } 517 } 518 519 void 520 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry, 521 uint64_t data) 522 { 523 VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO); 524 525 ventry->cmd = VEC_FULFILL_MMIO; 526 bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio)); 527 if (ventry->u.mmio.read != 0) { 528 ventry->u.mmio.data = data; 529 } 530 } 531 532 bool 533 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port, 534 uint_t len, uint32_t *valp) 535 { 536 if (vexit->exitcode != VM_EXITCODE_INOUT) { 537 return (false); 538 } 539 540 const uint_t flag = is_read ? INOUT_IN : 0; 541 if (vexit->u.inout.port != port || 542 vexit->u.inout.bytes != len || 543 (vexit->u.inout.flags & INOUT_IN) != flag) { 544 return (false); 545 } 546 547 if (!is_read && valp != NULL) { 548 *valp = vexit->u.inout.eax; 549 } 550 return (true); 551 } 552 553 bool 554 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr, 555 uint_t len, uint64_t *valp) 556 { 557 if (vexit->exitcode != VM_EXITCODE_MMIO) { 558 return (false); 559 } 560 561 if (vexit->u.mmio.gpa != addr || 562 vexit->u.mmio.bytes != len || 563 (vexit->u.mmio.read != 0) != is_read) { 564 return (false); 565 } 566 567 if (!is_read && valp != NULL) { 568 *valp = vexit->u.mmio.data; 569 } 570 return (true); 571 } 572