1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 */ 15 16 /* 17 * Library for native code to access bhyve VMs, without the need to use 18 * FreeBSD compat headers 19 */ 20 21 #include <sys/param.h> 22 #include <sys/list.h> 23 #include <sys/stddef.h> 24 #include <sys/mman.h> 25 #include <sys/kdi_regs.h> 26 #include <sys/sysmacros.h> 27 #include <sys/controlregs.h> 28 #include <sys/note.h> 29 #include <sys/debug.h> 30 #include <errno.h> 31 #include <stdlib.h> 32 #include <strings.h> 33 #include <unistd.h> 34 #include <assert.h> 35 36 #include <machine/vmm.h> 37 #include <vmmapi.h> 38 39 #include <libvmm.h> 40 41 typedef struct vmm_memseg vmm_memseg_t; 42 43 #define VMM_MEMSEG_DEVMEM 0x1 44 45 struct vmm_memseg { 46 list_node_t vms_list; 47 int vms_segid; 48 int vms_prot; 49 int vms_flags; 50 uintptr_t vms_gpa; 51 off_t vms_segoff; 52 size_t vms_seglen; 53 size_t vms_maplen; 54 char vms_name[64]; 55 }; 56 57 struct vmm { 58 struct vmctx *vmm_ctx; 59 list_t vmm_memlist; 60 char *vmm_mem; 61 size_t vmm_memsize; 62 size_t vmm_ncpu; 63 }; 64 65 66 /* 67 * This code relies on two assumptions: 68 * - CPUs are never removed from the "active set", not even when suspended. 69 * A CPU being active just means that it has been used by the guest OS. 70 * - The CPU numbering is consecutive. 71 */ 72 static void 73 vmm_update_ncpu(vmm_t *vmm) 74 { 75 cpuset_t cpuset; 76 77 assert(vm_active_cpus(vmm->vmm_ctx, &cpuset) == 0); 78 79 for (vmm->vmm_ncpu = 0; 80 CPU_ISSET(vmm->vmm_ncpu, &cpuset) == 1; 81 vmm->vmm_ncpu++) 82 ; 83 } 84 85 vmm_t * 86 vmm_open_vm(const char *name) 87 { 88 vmm_t *vmm = NULL; 89 90 vmm = malloc(sizeof (vmm_t)); 91 if (vmm == NULL) 92 return (NULL); 93 94 bzero(vmm, sizeof (vmm_t)); 95 vmm->vmm_mem = MAP_FAILED; 96 97 list_create(&vmm->vmm_memlist, sizeof (vmm_memseg_t), 98 offsetof(vmm_memseg_t, vms_list)); 99 100 vmm->vmm_ctx = vm_open(name); 101 if (vmm->vmm_ctx == NULL) { 102 free(vmm); 103 return (NULL); 104 } 105 106 vmm_update_ncpu(vmm); 107 108 /* 109 * If we open a VM that has just been created we may see a state 110 * where it has no CPUs configured yet. We'll just wait for 10ms 111 * and retry until we get a non-zero CPU count. 112 */ 113 if (vmm->vmm_ncpu == 0) { 114 do { 115 (void) usleep(10000); 116 vmm_update_ncpu(vmm); 117 } while (vmm->vmm_ncpu == 0); 118 } 119 120 return (vmm); 121 } 122 123 void 124 vmm_close_vm(vmm_t *vmm) 125 { 126 vmm_unmap(vmm); 127 128 list_destroy(&vmm->vmm_memlist); 129 130 if (vmm->vmm_ctx != NULL) 131 vm_close(vmm->vmm_ctx); 132 133 free(vmm); 134 } 135 136 static vmm_memseg_t * 137 vmm_get_memseg(vmm_t *vmm, uintptr_t gpa) 138 { 139 vmm_memseg_t ms, *ret; 140 int error, flags; 141 142 bzero(&ms, sizeof (vmm_memseg_t)); 143 ms.vms_gpa = gpa; 144 error = vm_mmap_getnext(vmm->vmm_ctx, &ms.vms_gpa, &ms.vms_segid, 145 &ms.vms_segoff, &ms.vms_maplen, &ms.vms_prot, &flags); 146 if (error) 147 return (NULL); 148 149 error = vm_get_memseg(vmm->vmm_ctx, ms.vms_segid, &ms.vms_seglen, 150 ms.vms_name, sizeof (ms.vms_name)); 151 if (error) 152 return (NULL); 153 154 /* 155 * Regular memory segments don't have a name, but devmem segments do. 156 * We can use that information to set the DEVMEM flag if necessary. 157 */ 158 ms.vms_flags = ms.vms_name[0] != '\0' ? VMM_MEMSEG_DEVMEM : 0; 159 160 ret = malloc(sizeof (vmm_memseg_t)); 161 if (ret == NULL) 162 return (NULL); 163 164 *ret = ms; 165 166 return (ret); 167 } 168 169 int 170 vmm_map(vmm_t *vmm, boolean_t writable) 171 { 172 uintptr_t last_gpa = 0; 173 vmm_memseg_t *ms; 174 int prot_write = writable ? PROT_WRITE : 0; 175 176 if (vmm->vmm_mem != MAP_FAILED) { 177 errno = EINVAL; 178 return (-1); 179 } 180 181 assert(list_is_empty(&vmm->vmm_memlist)); 182 183 for (;;) { 184 ms = vmm_get_memseg(vmm, last_gpa); 185 186 if (ms == NULL) 187 break; 188 189 last_gpa = ms->vms_gpa + ms->vms_maplen; 190 list_insert_tail(&vmm->vmm_memlist, ms); 191 } 192 193 vmm->vmm_mem = mmap(NULL, last_gpa, PROT_NONE, 194 MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0); 195 196 if (vmm->vmm_mem == MAP_FAILED) 197 goto fail; 198 199 for (ms = list_head(&vmm->vmm_memlist); 200 ms != NULL; 201 ms = list_next(&vmm->vmm_memlist, ms)) { 202 off_t mapoff = ms->vms_gpa; 203 204 if ((ms->vms_flags & VMM_MEMSEG_DEVMEM) && 205 vm_get_devmem_offset(vmm->vmm_ctx, ms->vms_segid, &mapoff) 206 != 0) 207 goto fail; 208 209 vmm->vmm_memsize += ms->vms_maplen; 210 211 if (mmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen, 212 PROT_READ | prot_write, MAP_SHARED | MAP_FIXED, 213 vm_get_device_fd(vmm->vmm_ctx), mapoff) == MAP_FAILED) 214 goto fail; 215 } 216 217 return (0); 218 219 fail: 220 vmm_unmap(vmm); 221 222 return (-1); 223 } 224 225 void 226 vmm_unmap(vmm_t *vmm) 227 { 228 while (!list_is_empty(&vmm->vmm_memlist)) { 229 vmm_memseg_t *ms = list_remove_head(&vmm->vmm_memlist); 230 231 if (vmm->vmm_mem != MAP_FAILED) { 232 (void) munmap(vmm->vmm_mem + ms->vms_gpa, 233 ms->vms_maplen); 234 } 235 236 free(ms); 237 } 238 239 if (vmm->vmm_mem != MAP_FAILED) 240 (void) munmap(vmm->vmm_mem, vmm->vmm_memsize); 241 242 vmm->vmm_mem = MAP_FAILED; 243 vmm->vmm_memsize = 0; 244 } 245 246 ssize_t 247 vmm_pread(vmm_t *vmm, void *buf, size_t len, uintptr_t addr) 248 { 249 ssize_t count = 0; 250 vmm_memseg_t *ms; 251 ssize_t res = len; 252 253 for (ms = list_head(&vmm->vmm_memlist); 254 ms != NULL && len != 0; 255 ms = list_next(&vmm->vmm_memlist, ms)) { 256 257 if (addr >= ms->vms_gpa && 258 addr < ms->vms_gpa + ms->vms_maplen) { 259 res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); 260 261 if (res < 0) 262 res = 0; 263 264 bcopy(vmm->vmm_mem + addr, buf, len - res); 265 count += len - res; 266 addr += len - res; 267 len = res; 268 } 269 } 270 271 if (res) 272 errno = EFAULT; 273 else 274 errno = 0; 275 276 return (count); 277 } 278 279 ssize_t 280 vmm_pwrite(vmm_t *vmm, const void *buf, size_t len, uintptr_t addr) 281 { 282 ssize_t count = 0; 283 vmm_memseg_t *ms; 284 ssize_t res = len; 285 286 for (ms = list_head(&vmm->vmm_memlist); 287 ms != NULL; 288 ms = list_next(&vmm->vmm_memlist, ms)) { 289 if (addr >= ms->vms_gpa && 290 addr < ms->vms_gpa + ms->vms_maplen) { 291 res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); 292 293 if (res < 0) 294 res = 0; 295 296 bcopy(buf, vmm->vmm_mem + addr, len - res); 297 count += len - res; 298 addr += len - res; 299 len = res; 300 } 301 } 302 303 if (res) 304 errno = EFAULT; 305 else 306 errno = 0; 307 308 return (count); 309 } 310 311 size_t 312 vmm_ncpu(vmm_t *vmm) 313 { 314 return (vmm->vmm_ncpu); 315 } 316 317 size_t 318 vmm_memsize(vmm_t *vmm) 319 { 320 return (vmm->vmm_memsize); 321 } 322 323 int 324 vmm_cont(vmm_t *vmm) 325 { 326 return (vm_resume_cpu(vmm->vmm_ctx, -1)); 327 } 328 329 int 330 vmm_step(vmm_t *vmm, int vcpu) 331 { 332 cpuset_t cpuset; 333 int ret; 334 335 if (vcpu >= vmm->vmm_ncpu) { 336 errno = EINVAL; 337 return (-1); 338 } 339 340 ret = vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); 341 if (ret != 0) 342 return (-1); 343 344 assert(vm_resume_cpu(vmm->vmm_ctx, vcpu) == 0); 345 346 do { 347 (void) vm_debug_cpus(vmm->vmm_ctx, &cpuset); 348 } while (!CPU_ISSET(vcpu, &cpuset)); 349 350 (void) vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); 351 352 return (ret); 353 } 354 355 int 356 vmm_stop(vmm_t *vmm) 357 { 358 int ret = vm_suspend_cpu(vmm->vmm_ctx, -1); 359 360 if (ret == 0) 361 vmm_update_ncpu(vmm); 362 363 return (ret); 364 } 365 366 /* 367 * Mapping of KDI-defined registers to vmmapi-defined registers. 368 * Registers not known to vmmapi use VM_REG_LAST, which is invalid and 369 * causes an error in vm_{get,set}_register_set(). 370 * 371 * This array must be kept in sync with the definitions in kdi_regs.h. 372 */ 373 static int vmm_kdi_regmap[] = { 374 VM_REG_LAST, /* KDIREG_SAVFP */ 375 VM_REG_LAST, /* KDIREG_SAVPC */ 376 VM_REG_GUEST_RDI, /* KDIREG_RDI */ 377 VM_REG_GUEST_RSI, /* KDIREG_RSI */ 378 VM_REG_GUEST_RDX, /* KDIREG_RDX */ 379 VM_REG_GUEST_RCX, /* KDIREG_RCX */ 380 VM_REG_GUEST_R8, /* KDIREG_R8 */ 381 VM_REG_GUEST_R9, /* KDIREG_R9 */ 382 VM_REG_GUEST_RAX, /* KDIREG_RAX */ 383 VM_REG_GUEST_RBX, /* KDIREG_RBX */ 384 VM_REG_GUEST_RBP, /* KDIREG_RBP */ 385 VM_REG_GUEST_R10, /* KDIREG_R10 */ 386 VM_REG_GUEST_R11, /* KDIREG_R11 */ 387 VM_REG_GUEST_R12, /* KDIREG_R12 */ 388 VM_REG_GUEST_R13, /* KDIREG_R13 */ 389 VM_REG_GUEST_R14, /* KDIREG_R14 */ 390 VM_REG_GUEST_R15, /* KDIREG_R15 */ 391 VM_REG_LAST, /* KDIREG_FSBASE */ 392 VM_REG_LAST, /* KDIREG_GSBASE */ 393 VM_REG_LAST, /* KDIREG_KGSBASE */ 394 VM_REG_GUEST_CR2, /* KDIREG_CR2 */ 395 VM_REG_GUEST_CR3, /* KDIREG_CR3 */ 396 VM_REG_GUEST_DS, /* KDIREG_DS */ 397 VM_REG_GUEST_ES, /* KDIREG_ES */ 398 VM_REG_GUEST_FS, /* KDIREG_FS */ 399 VM_REG_GUEST_GS, /* KDIREG_GS */ 400 VM_REG_LAST, /* KDIREG_TRAPNO */ 401 VM_REG_LAST, /* KDIREG_ERR */ 402 VM_REG_GUEST_RIP, /* KDIREG_RIP */ 403 VM_REG_GUEST_CS, /* KDIREG_CS */ 404 VM_REG_GUEST_RFLAGS, /* KDIREG_RFLAGS */ 405 VM_REG_GUEST_RSP, /* KDIREG_RSP */ 406 VM_REG_GUEST_SS /* KDIREG_SS */ 407 }; 408 CTASSERT(ARRAY_SIZE(vmm_kdi_regmap) == KDIREG_NGREG); 409 410 /* 411 * Mapping of libvmm-defined registers to vmmapi-defined registers. 412 * 413 * This array must be kept in sync with the definitions in libvmm.h 414 */ 415 static int vmm_sys_regmap[] = { 416 VM_REG_GUEST_CR0, /* VMM_REG_CR0 */ 417 VM_REG_GUEST_CR2, /* VMM_REG_CR2 */ 418 VM_REG_GUEST_CR3, /* VMM_REG_CR3 */ 419 VM_REG_GUEST_CR4, /* VMM_REG_CR4 */ 420 VM_REG_GUEST_DR0, /* VMM_REG_DR0 */ 421 VM_REG_GUEST_DR1, /* VMM_REG_DR1 */ 422 VM_REG_GUEST_DR2, /* VMM_REG_DR2 */ 423 VM_REG_GUEST_DR3, /* VMM_REG_DR3 */ 424 VM_REG_GUEST_DR6, /* VMM_REG_DR6 */ 425 VM_REG_GUEST_DR7, /* VMM_REG_DR7 */ 426 VM_REG_GUEST_EFER, /* VMM_REG_EFER */ 427 VM_REG_GUEST_PDPTE0, /* VMM_REG_PDPTE0 */ 428 VM_REG_GUEST_PDPTE1, /* VMM_REG_PDPTE1 */ 429 VM_REG_GUEST_PDPTE2, /* VMM_REG_PDPTE2 */ 430 VM_REG_GUEST_PDPTE3, /* VMM_REG_PDPTE3 */ 431 VM_REG_GUEST_INTR_SHADOW, /* VMM_REG_INTR_SHADOW */ 432 }; 433 434 /* 435 * Mapping of libvmm-defined descriptors to vmmapi-defined descriptors. 436 * 437 * This array must be kept in sync with the definitions in libvmm.h 438 */ 439 static int vmm_descmap[] = { 440 VM_REG_GUEST_GDTR, 441 VM_REG_GUEST_LDTR, 442 VM_REG_GUEST_IDTR, 443 VM_REG_GUEST_TR, 444 VM_REG_GUEST_CS, 445 VM_REG_GUEST_DS, 446 VM_REG_GUEST_ES, 447 VM_REG_GUEST_FS, 448 VM_REG_GUEST_GS, 449 VM_REG_GUEST_SS 450 }; 451 452 static int 453 vmm_mapreg(int reg) 454 { 455 errno = 0; 456 457 if (reg < 0) 458 goto fail; 459 460 if (reg < KDIREG_NGREG) 461 return (vmm_kdi_regmap[reg]); 462 463 if (reg >= VMM_REG_OFFSET && 464 reg < VMM_REG_OFFSET + ARRAY_SIZE(vmm_sys_regmap)) 465 return (vmm_sys_regmap[reg - VMM_REG_OFFSET]); 466 467 fail: 468 errno = EINVAL; 469 return (VM_REG_LAST); 470 } 471 472 static int 473 vmm_mapdesc(int desc) 474 { 475 errno = 0; 476 477 if (desc >= VMM_DESC_OFFSET && 478 desc < VMM_DESC_OFFSET + ARRAY_SIZE(vmm_descmap)) 479 return (vmm_descmap[desc - VMM_DESC_OFFSET]); 480 481 errno = EINVAL; 482 return (VM_REG_LAST); 483 } 484 485 int 486 vmm_getreg(vmm_t *vmm, int vcpu, int reg, uint64_t *val) 487 { 488 reg = vmm_mapreg(reg); 489 490 if (reg == VM_REG_LAST) 491 return (-1); 492 493 return (vm_get_register(vmm->vmm_ctx, vcpu, reg, val)); 494 } 495 496 int 497 vmm_setreg(vmm_t *vmm, int vcpu, int reg, uint64_t val) 498 { 499 reg = vmm_mapreg(reg); 500 501 if (reg == VM_REG_LAST) 502 return (-1); 503 504 return (vm_set_register(vmm->vmm_ctx, vcpu, reg, val)); 505 } 506 507 int 508 vmm_get_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums, 509 uint64_t *regvals) 510 { 511 int *vm_regnums; 512 int i; 513 int ret = -1; 514 515 vm_regnums = malloc(sizeof (int) * nregs); 516 if (vm_regnums == NULL) 517 return (ret); 518 519 for (i = 0; i != nregs; i++) { 520 vm_regnums[i] = vmm_mapreg(regnums[i]); 521 if (vm_regnums[i] == VM_REG_LAST) 522 goto fail; 523 } 524 525 ret = vm_get_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums, 526 regvals); 527 528 fail: 529 free(vm_regnums); 530 return (ret); 531 } 532 533 int 534 vmm_set_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums, 535 uint64_t *regvals) 536 { 537 int *vm_regnums; 538 int i; 539 int ret = -1; 540 541 vm_regnums = malloc(sizeof (int) * nregs); 542 if (vm_regnums == NULL) 543 return (ret); 544 545 for (i = 0; i != nregs; i++) { 546 vm_regnums[i] = vmm_mapreg(regnums[i]); 547 if (vm_regnums[i] == VM_REG_LAST) 548 goto fail; 549 } 550 551 ret = vm_set_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums, 552 regvals); 553 554 fail: 555 free(vm_regnums); 556 return (ret); 557 } 558 559 int 560 vmm_get_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd) 561 { 562 desc = vmm_mapdesc(desc); 563 if (desc == VM_REG_LAST) 564 return (-1); 565 566 return (vm_get_desc(vmm->vmm_ctx, vcpu, desc, &vd->vd_base, &vd->vd_lim, 567 &vd->vd_acc)); 568 } 569 570 int 571 vmm_set_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd) 572 { 573 desc = vmm_mapdesc(desc); 574 if (desc == VM_REG_LAST) 575 return (-1); 576 577 return (vm_set_desc(vmm->vmm_ctx, vcpu, desc, vd->vd_base, vd->vd_lim, 578 vd->vd_acc)); 579 } 580 581 /* 582 * Structure to hold MMU state during address translation. 583 * The contents of vmm_mmu_regnum[] must be kept in sync with this. 584 */ 585 typedef struct vmm_mmu { 586 uint64_t vm_cr0; 587 uint64_t vm_cr3; 588 uint64_t vm_cr4; 589 uint64_t vm_efer; 590 } vmm_mmu_t; 591 592 static const int vmm_mmu_regnum[] = { 593 VMM_REG_CR0, 594 VMM_REG_CR3, 595 VMM_REG_CR4, 596 VMM_REG_EFER 597 }; 598 599 #define X86_PTE_P 0x001ULL 600 #define X86_PTE_PS 0x080ULL 601 602 #define X86_PTE_PHYSMASK 0x000ffffffffff000ULL 603 #define X86_PAGE_SHIFT 12 604 #define X86_PAGE_SIZE (1ULL << X86_PAGE_SHIFT) 605 606 #define X86_SEG_CODE_DATA (1ULL << 4) 607 #define X86_SEG_PRESENT (1ULL << 7) 608 #define X86_SEG_LONG (1ULL << 13) 609 #define X86_SEG_BIG (1ULL << 14) 610 #define X86_SEG_GRANULARITY (1ULL << 15) 611 #define X86_SEG_UNUSABLE (1ULL << 16) 612 613 #define X86_SEG_USABLE (X86_SEG_PRESENT | X86_SEG_CODE_DATA) 614 #define X86_SEG_USABLE_MASK (X86_SEG_UNUSABLE | X86_SEG_USABLE) 615 616 /* 617 * vmm_pte2paddr: 618 * 619 * Recursively calculate the physical address from a virtual address, 620 * starting at the given PTE level using the given PTE. 621 */ 622 static int 623 vmm_pte2paddr(vmm_t *vmm, uint64_t pte, boolean_t ia32, int level, 624 uint64_t vaddr, uint64_t *paddr) 625 { 626 int pte_size = ia32 ? sizeof (uint32_t) : sizeof (uint64_t); 627 int off_bits = ia32 ? 10 : 9; 628 boolean_t hugepage = B_FALSE; 629 uint64_t offset; 630 uint64_t off_mask, off_shift; 631 632 if (level < 4 && (pte & X86_PTE_P) == 0) { 633 errno = EFAULT; 634 return (-1); 635 } 636 637 off_shift = X86_PAGE_SHIFT + off_bits * level; 638 off_mask = (1ULL << off_shift) - 1; 639 640 offset = vaddr & off_mask; 641 642 if ((level == 1 || level == 2) && (pte & X86_PTE_PS) != 0) { 643 hugepage = B_TRUE; 644 } else { 645 if (level > 0) { 646 offset >>= off_shift - off_bits; 647 offset <<= X86_PAGE_SHIFT - off_bits; 648 } 649 off_mask = 0xfff; 650 } 651 652 *paddr = (pte & X86_PTE_PHYSMASK & ~off_mask) + offset; 653 654 if (level == 0 || hugepage) 655 return (0); 656 657 pte = 0; 658 if (vmm_pread(vmm, &pte, pte_size, *paddr) != pte_size) 659 return (-1); 660 return (vmm_pte2paddr(vmm, pte, ia32, level - 1, vaddr, paddr)); 661 } 662 663 static vmm_mode_t 664 vmm_vcpu_mmu_mode(vmm_t *vmm, int vcpu, vmm_mmu_t *mmu) 665 { 666 if ((mmu->vm_cr0 & CR0_PE) == 0) 667 return (VMM_MODE_REAL); 668 else if ((mmu->vm_cr4 & CR4_PAE) == 0) 669 return (VMM_MODE_PROT); 670 else if ((mmu->vm_efer & AMD_EFER_LME) == 0) 671 return (VMM_MODE_PAE); 672 else 673 return (VMM_MODE_LONG); 674 } 675 676 vmm_mode_t 677 vmm_vcpu_mode(vmm_t *vmm, int vcpu) 678 { 679 vmm_mmu_t mmu = { 0 }; 680 681 if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum), 682 vmm_mmu_regnum, (uint64_t *)&mmu) != 0) 683 return (VMM_MODE_UNKNOWN); 684 685 return (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu)); 686 } 687 688 vmm_isa_t 689 vmm_vcpu_isa(vmm_t *vmm, int vcpu) 690 { 691 vmm_desc_t cs; 692 693 if (vmm_get_desc(vmm, vcpu, VMM_DESC_CS, &cs) != 0) 694 return (VMM_ISA_UNKNOWN); 695 696 switch (cs.vd_acc & (X86_SEG_BIG | X86_SEG_LONG)) { 697 case 0x0: /* 16b code segment */ 698 return (VMM_ISA_16); 699 case X86_SEG_LONG: /* 64b code segment */ 700 return (VMM_ISA_64); 701 case X86_SEG_BIG: /* 32b code segment */ 702 return (VMM_ISA_32); 703 } 704 705 return (VMM_ISA_UNKNOWN); 706 } 707 708 /* 709 * vmm_vtol: 710 * 711 * Translate a virtual address to a physical address on a certain vCPU, 712 * using the specified segment register or descriptor according to the mode. 713 * 714 */ 715 int 716 vmm_vtol(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *laddr) 717 { 718 vmm_desc_t desc; 719 uint64_t limit; 720 721 if (vmm_get_desc(vmm, vcpu, seg, &desc) != 0) 722 return (-1); 723 724 switch (vmm_vcpu_mode(vmm, vcpu)) { 725 case VMM_MODE_REAL: 726 if (seg == VMM_DESC_FS || seg == VMM_DESC_GS) 727 goto fault; 728 /* FALLTHRU */ 729 case VMM_MODE_PROT: 730 case VMM_MODE_PAE: 731 if ((desc.vd_acc & X86_SEG_USABLE_MASK) != X86_SEG_USABLE) 732 /* unusable, system segment, or not present */ 733 goto fault; 734 735 limit = desc.vd_lim; 736 if (desc.vd_acc & X86_SEG_GRANULARITY) 737 limit *= 4096; 738 739 if (vaddr > limit) 740 goto fault; 741 /* FALLTHRU */ 742 case VMM_MODE_LONG: 743 *laddr = desc.vd_base + vaddr; 744 return (0); 745 746 default: 747 fault: 748 errno = EFAULT; 749 return (-1); 750 } 751 752 } 753 754 /* 755 * vmm_vtop: 756 * 757 * Translate a virtual address to a guest physical address on a certain vCPU, 758 * according to the mode the vCPU is in. 759 */ 760 int 761 vmm_vtop(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *paddr) 762 { 763 vmm_mmu_t mmu = { 0 }; 764 int ret = 0; 765 766 if (vmm_vtol(vmm, vcpu, seg, vaddr, &vaddr) != 0) 767 return (-1); 768 769 if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum), 770 vmm_mmu_regnum, (uint64_t *)&mmu) != 0) 771 return (-1); 772 773 if ((mmu.vm_cr0 & CR0_PG) == 0) { 774 /* no paging, physical equals virtual */ 775 *paddr = vaddr; 776 return (0); 777 } 778 779 switch (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu)) { 780 case VMM_MODE_PROT: 781 /* protected mode, no PAE: 2-level paging, 32bit PTEs */ 782 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_TRUE, 2, vaddr, paddr); 783 break; 784 case VMM_MODE_PAE: 785 /* protected mode with PAE: 3-level paging, 64bit PTEs */ 786 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 3, vaddr, paddr); 787 break; 788 case VMM_MODE_LONG: 789 /* long mode: 4-level paging, 64bit PTEs */ 790 ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 4, vaddr, paddr); 791 break; 792 default: 793 ret = -1; 794 } 795 796 return (ret); 797 } 798 799 ssize_t 800 vmm_vread(vmm_t *vmm, int vcpu, int seg, void *buf, size_t len, uintptr_t addr) 801 { 802 ssize_t res = 0; 803 uint64_t paddr; 804 size_t plen; 805 uint64_t boundary; 806 807 while (len != 0) { 808 if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) { 809 errno = EFAULT; 810 return (0); 811 } 812 813 boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); 814 if (addr + len > boundary) 815 plen = boundary - addr; 816 else 817 plen = len; 818 819 if (vmm_pread(vmm, buf, plen, paddr) != plen) 820 return (0); 821 len -= plen; 822 addr += plen; 823 buf += plen; 824 res += plen; 825 } 826 827 return (res); 828 } 829 830 ssize_t 831 vmm_vwrite(vmm_t *vmm, int vcpu, int seg, const void *buf, size_t len, 832 uintptr_t addr) 833 { 834 ssize_t res = 0; 835 uint64_t paddr; 836 size_t plen; 837 uint64_t boundary; 838 839 while (len != 0) { 840 if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) { 841 errno = EFAULT; 842 return (0); 843 } 844 845 boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); 846 if (addr + len > boundary) 847 plen = boundary - addr; 848 else 849 plen = len; 850 851 if (vmm_pwrite(vmm, buf, plen, paddr) != plen) 852 return (0); 853 len -= plen; 854 addr += plen; 855 buf += plen; 856 res += plen; 857 } 858 859 return (res); 860 } 861