1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/capsicum.h> 36 #include <sys/sysctl.h> 37 #include <sys/ioctl.h> 38 #include <sys/linker.h> 39 #include <sys/mman.h> 40 #include <sys/module.h> 41 #include <sys/_iovec.h> 42 #include <sys/cpuset.h> 43 44 #include <x86/segments.h> 45 #include <machine/specialreg.h> 46 47 #include <capsicum_helpers.h> 48 #include <errno.h> 49 #include <stdbool.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <assert.h> 53 #include <string.h> 54 #include <fcntl.h> 55 #include <unistd.h> 56 57 #include <libutil.h> 58 59 #include <vm/vm.h> 60 #include <machine/vmm.h> 61 #include <machine/vmm_dev.h> 62 #include <machine/vmm_snapshot.h> 63 64 #include "vmmapi.h" 65 #include "internal.h" 66 67 #define MB (1024 * 1024UL) 68 #define GB (1024 * 1024 * 1024UL) 69 70 /* 71 * Size of the guard region before and after the virtual address space 72 * mapping the guest physical memory. This must be a multiple of the 73 * superpage size for performance reasons. 74 */ 75 #define VM_MMAP_GUARD_SIZE (4 * MB) 76 77 #define PROT_RW (PROT_READ | PROT_WRITE) 78 #define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) 79 80 struct vmctx { 81 int fd; 82 uint32_t lowmem_limit; 83 int memflags; 84 size_t lowmem; 85 size_t highmem; 86 char *baseaddr; 87 char *name; 88 }; 89 90 #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 91 #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 92 93 static int 94 vm_device_open(const char *name) 95 { 96 int fd, len; 97 char *vmfile; 98 99 len = strlen("/dev/vmm/") + strlen(name) + 1; 100 vmfile = malloc(len); 101 assert(vmfile != NULL); 102 snprintf(vmfile, len, "/dev/vmm/%s", name); 103 104 /* Open the device file */ 105 fd = open(vmfile, O_RDWR, 0); 106 107 free(vmfile); 108 return (fd); 109 } 110 111 int 112 vm_create(const char *name) 113 { 114 /* Try to load vmm(4) module before creating a guest. */ 115 if (modfind("vmm") < 0) 116 kldload("vmm"); 117 return (CREATE(name)); 118 } 119 120 struct vmctx * 121 vm_open(const char *name) 122 { 123 struct vmctx *vm; 124 int saved_errno; 125 126 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 127 assert(vm != NULL); 128 129 vm->fd = -1; 130 vm->memflags = 0; 131 vm->lowmem_limit = 3 * GB; 132 vm->name = (char *)(vm + 1); 133 strcpy(vm->name, name); 134 135 if ((vm->fd = vm_device_open(vm->name)) < 0) 136 goto err; 137 138 return (vm); 139 err: 140 saved_errno = errno; 141 free(vm); 142 errno = saved_errno; 143 return (NULL); 144 } 145 146 void 147 vm_close(struct vmctx *vm) 148 { 149 assert(vm != NULL); 150 151 close(vm->fd); 152 free(vm); 153 } 154 155 void 156 vm_destroy(struct vmctx *vm) 157 { 158 assert(vm != NULL); 159 160 if (vm->fd >= 0) 161 close(vm->fd); 162 DESTROY(vm->name); 163 164 free(vm); 165 } 166 167 struct vcpu * 168 vm_vcpu_open(struct vmctx *ctx, int vcpuid) 169 { 170 struct vcpu *vcpu; 171 172 vcpu = malloc(sizeof(*vcpu)); 173 vcpu->ctx = ctx; 174 vcpu->vcpuid = vcpuid; 175 return (vcpu); 176 } 177 178 void 179 vm_vcpu_close(struct vcpu *vcpu) 180 { 181 free(vcpu); 182 } 183 184 int 185 vcpu_id(struct vcpu *vcpu) 186 { 187 return (vcpu->vcpuid); 188 } 189 190 int 191 vm_parse_memsize(const char *opt, size_t *ret_memsize) 192 { 193 char *endptr; 194 size_t optval; 195 int error; 196 197 optval = strtoul(opt, &endptr, 0); 198 if (*opt != '\0' && *endptr == '\0') { 199 /* 200 * For the sake of backward compatibility if the memory size 201 * specified on the command line is less than a megabyte then 202 * it is interpreted as being in units of MB. 203 */ 204 if (optval < MB) 205 optval *= MB; 206 *ret_memsize = optval; 207 error = 0; 208 } else 209 error = expand_number(opt, ret_memsize); 210 211 return (error); 212 } 213 214 uint32_t 215 vm_get_lowmem_limit(struct vmctx *ctx) 216 { 217 218 return (ctx->lowmem_limit); 219 } 220 221 void 222 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) 223 { 224 225 ctx->lowmem_limit = limit; 226 } 227 228 void 229 vm_set_memflags(struct vmctx *ctx, int flags) 230 { 231 232 ctx->memflags = flags; 233 } 234 235 int 236 vm_get_memflags(struct vmctx *ctx) 237 { 238 239 return (ctx->memflags); 240 } 241 242 /* 243 * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len). 244 */ 245 int 246 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, 247 size_t len, int prot) 248 { 249 struct vm_memmap memmap; 250 int error, flags; 251 252 memmap.gpa = gpa; 253 memmap.segid = segid; 254 memmap.segoff = off; 255 memmap.len = len; 256 memmap.prot = prot; 257 memmap.flags = 0; 258 259 if (ctx->memflags & VM_MEM_F_WIRED) 260 memmap.flags |= VM_MEMMAP_F_WIRED; 261 262 /* 263 * If this mapping already exists then don't create it again. This 264 * is the common case for SYSMEM mappings created by bhyveload(8). 265 */ 266 error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags); 267 if (error == 0 && gpa == memmap.gpa) { 268 if (segid != memmap.segid || off != memmap.segoff || 269 prot != memmap.prot || flags != memmap.flags) { 270 errno = EEXIST; 271 return (-1); 272 } else { 273 return (0); 274 } 275 } 276 277 error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap); 278 return (error); 279 } 280 281 int 282 vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr, 283 size_t *lowmem_size, size_t *highmem_size) 284 { 285 286 *guest_baseaddr = ctx->baseaddr; 287 *lowmem_size = ctx->lowmem; 288 *highmem_size = ctx->highmem; 289 return (0); 290 } 291 292 int 293 vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len) 294 { 295 struct vm_munmap munmap; 296 int error; 297 298 munmap.gpa = gpa; 299 munmap.len = len; 300 301 error = ioctl(ctx->fd, VM_MUNMAP_MEMSEG, &munmap); 302 return (error); 303 } 304 305 int 306 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, 307 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 308 { 309 struct vm_memmap memmap; 310 int error; 311 312 bzero(&memmap, sizeof(struct vm_memmap)); 313 memmap.gpa = *gpa; 314 error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap); 315 if (error == 0) { 316 *gpa = memmap.gpa; 317 *segid = memmap.segid; 318 *segoff = memmap.segoff; 319 *len = memmap.len; 320 *prot = memmap.prot; 321 *flags = memmap.flags; 322 } 323 return (error); 324 } 325 326 /* 327 * Return 0 if the segments are identical and non-zero otherwise. 328 * 329 * This is slightly complicated by the fact that only device memory segments 330 * are named. 331 */ 332 static int 333 cmpseg(size_t len, const char *str, size_t len2, const char *str2) 334 { 335 336 if (len == len2) { 337 if ((!str && !str2) || (str && str2 && !strcmp(str, str2))) 338 return (0); 339 } 340 return (-1); 341 } 342 343 static int 344 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) 345 { 346 struct vm_memseg memseg; 347 size_t n; 348 int error; 349 350 /* 351 * If the memory segment has already been created then just return. 352 * This is the usual case for the SYSMEM segment created by userspace 353 * loaders like bhyveload(8). 354 */ 355 error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name, 356 sizeof(memseg.name)); 357 if (error) 358 return (error); 359 360 if (memseg.len != 0) { 361 if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) { 362 errno = EINVAL; 363 return (-1); 364 } else { 365 return (0); 366 } 367 } 368 369 bzero(&memseg, sizeof(struct vm_memseg)); 370 memseg.segid = segid; 371 memseg.len = len; 372 if (name != NULL) { 373 n = strlcpy(memseg.name, name, sizeof(memseg.name)); 374 if (n >= sizeof(memseg.name)) { 375 errno = ENAMETOOLONG; 376 return (-1); 377 } 378 } 379 380 error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg); 381 return (error); 382 } 383 384 int 385 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, 386 size_t bufsize) 387 { 388 struct vm_memseg memseg; 389 size_t n; 390 int error; 391 392 memseg.segid = segid; 393 error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); 394 if (error == 0) { 395 *lenp = memseg.len; 396 n = strlcpy(namebuf, memseg.name, bufsize); 397 if (n >= bufsize) { 398 errno = ENAMETOOLONG; 399 error = -1; 400 } 401 } 402 return (error); 403 } 404 405 static int 406 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) 407 { 408 char *ptr; 409 int error, flags; 410 411 /* Map 'len' bytes starting at 'gpa' in the guest address space */ 412 error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); 413 if (error) 414 return (error); 415 416 flags = MAP_SHARED | MAP_FIXED; 417 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 418 flags |= MAP_NOCORE; 419 420 /* mmap into the process address space on the host */ 421 ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa); 422 if (ptr == MAP_FAILED) 423 return (-1); 424 425 return (0); 426 } 427 428 int 429 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) 430 { 431 size_t objsize, len; 432 vm_paddr_t gpa; 433 char *baseaddr, *ptr; 434 int error; 435 436 assert(vms == VM_MMAP_ALL); 437 438 /* 439 * If 'memsize' cannot fit entirely in the 'lowmem' segment then 440 * create another 'highmem' segment above 4GB for the remainder. 441 */ 442 if (memsize > ctx->lowmem_limit) { 443 ctx->lowmem = ctx->lowmem_limit; 444 ctx->highmem = memsize - ctx->lowmem_limit; 445 objsize = 4*GB + ctx->highmem; 446 } else { 447 ctx->lowmem = memsize; 448 ctx->highmem = 0; 449 objsize = ctx->lowmem; 450 } 451 452 error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); 453 if (error) 454 return (error); 455 456 /* 457 * Stake out a contiguous region covering the guest physical memory 458 * and the adjoining guard regions. 459 */ 460 len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; 461 ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0); 462 if (ptr == MAP_FAILED) 463 return (-1); 464 465 baseaddr = ptr + VM_MMAP_GUARD_SIZE; 466 if (ctx->highmem > 0) { 467 gpa = 4*GB; 468 len = ctx->highmem; 469 error = setup_memory_segment(ctx, gpa, len, baseaddr); 470 if (error) 471 return (error); 472 } 473 474 if (ctx->lowmem > 0) { 475 gpa = 0; 476 len = ctx->lowmem; 477 error = setup_memory_segment(ctx, gpa, len, baseaddr); 478 if (error) 479 return (error); 480 } 481 482 ctx->baseaddr = baseaddr; 483 484 return (0); 485 } 486 487 /* 488 * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in 489 * the lowmem or highmem regions. 490 * 491 * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region. 492 * The instruction emulation code depends on this behavior. 493 */ 494 void * 495 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) 496 { 497 498 if (ctx->lowmem > 0) { 499 if (gaddr < ctx->lowmem && len <= ctx->lowmem && 500 gaddr + len <= ctx->lowmem) 501 return (ctx->baseaddr + gaddr); 502 } 503 504 if (ctx->highmem > 0) { 505 if (gaddr >= 4*GB) { 506 if (gaddr < 4*GB + ctx->highmem && 507 len <= ctx->highmem && 508 gaddr + len <= 4*GB + ctx->highmem) 509 return (ctx->baseaddr + gaddr); 510 } 511 } 512 513 return (NULL); 514 } 515 516 vm_paddr_t 517 vm_rev_map_gpa(struct vmctx *ctx, void *addr) 518 { 519 vm_paddr_t offaddr; 520 521 offaddr = (char *)addr - ctx->baseaddr; 522 523 if (ctx->lowmem > 0) 524 if (offaddr <= ctx->lowmem) 525 return (offaddr); 526 527 if (ctx->highmem > 0) 528 if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem) 529 return (offaddr); 530 531 return ((vm_paddr_t)-1); 532 } 533 534 const char * 535 vm_get_name(struct vmctx *ctx) 536 { 537 538 return (ctx->name); 539 } 540 541 size_t 542 vm_get_lowmem_size(struct vmctx *ctx) 543 { 544 545 return (ctx->lowmem); 546 } 547 548 size_t 549 vm_get_highmem_size(struct vmctx *ctx) 550 { 551 552 return (ctx->highmem); 553 } 554 555 void * 556 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) 557 { 558 char pathname[MAXPATHLEN]; 559 size_t len2; 560 char *base, *ptr; 561 int fd, error, flags; 562 563 fd = -1; 564 ptr = MAP_FAILED; 565 if (name == NULL || strlen(name) == 0) { 566 errno = EINVAL; 567 goto done; 568 } 569 570 error = vm_alloc_memseg(ctx, segid, len, name); 571 if (error) 572 goto done; 573 574 strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname)); 575 strlcat(pathname, ctx->name, sizeof(pathname)); 576 strlcat(pathname, ".", sizeof(pathname)); 577 strlcat(pathname, name, sizeof(pathname)); 578 579 fd = open(pathname, O_RDWR); 580 if (fd < 0) 581 goto done; 582 583 /* 584 * Stake out a contiguous region covering the device memory and the 585 * adjoining guard regions. 586 */ 587 len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE; 588 base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 589 0); 590 if (base == MAP_FAILED) 591 goto done; 592 593 flags = MAP_SHARED | MAP_FIXED; 594 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 595 flags |= MAP_NOCORE; 596 597 /* mmap the devmem region in the host address space */ 598 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0); 599 done: 600 if (fd >= 0) 601 close(fd); 602 return (ptr); 603 } 604 605 static int 606 vcpu_ioctl(struct vcpu *vcpu, u_long cmd, void *arg) 607 { 608 /* 609 * XXX: fragile, handle with care 610 * Assumes that the first field of the ioctl data 611 * is the vcpuid. 612 */ 613 *(int *)arg = vcpu->vcpuid; 614 return (ioctl(vcpu->ctx->fd, cmd, arg)); 615 } 616 617 int 618 vm_set_desc(struct vcpu *vcpu, int reg, 619 uint64_t base, uint32_t limit, uint32_t access) 620 { 621 int error; 622 struct vm_seg_desc vmsegdesc; 623 624 bzero(&vmsegdesc, sizeof(vmsegdesc)); 625 vmsegdesc.regnum = reg; 626 vmsegdesc.desc.base = base; 627 vmsegdesc.desc.limit = limit; 628 vmsegdesc.desc.access = access; 629 630 error = vcpu_ioctl(vcpu, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 631 return (error); 632 } 633 634 int 635 vm_get_desc(struct vcpu *vcpu, int reg, uint64_t *base, uint32_t *limit, 636 uint32_t *access) 637 { 638 int error; 639 struct vm_seg_desc vmsegdesc; 640 641 bzero(&vmsegdesc, sizeof(vmsegdesc)); 642 vmsegdesc.regnum = reg; 643 644 error = vcpu_ioctl(vcpu, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 645 if (error == 0) { 646 *base = vmsegdesc.desc.base; 647 *limit = vmsegdesc.desc.limit; 648 *access = vmsegdesc.desc.access; 649 } 650 return (error); 651 } 652 653 int 654 vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *seg_desc) 655 { 656 int error; 657 658 error = vm_get_desc(vcpu, reg, &seg_desc->base, &seg_desc->limit, 659 &seg_desc->access); 660 return (error); 661 } 662 663 int 664 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 665 { 666 int error; 667 struct vm_register vmreg; 668 669 bzero(&vmreg, sizeof(vmreg)); 670 vmreg.regnum = reg; 671 vmreg.regval = val; 672 673 error = vcpu_ioctl(vcpu, VM_SET_REGISTER, &vmreg); 674 return (error); 675 } 676 677 int 678 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *ret_val) 679 { 680 int error; 681 struct vm_register vmreg; 682 683 bzero(&vmreg, sizeof(vmreg)); 684 vmreg.regnum = reg; 685 686 error = vcpu_ioctl(vcpu, VM_GET_REGISTER, &vmreg); 687 *ret_val = vmreg.regval; 688 return (error); 689 } 690 691 int 692 vm_set_register_set(struct vcpu *vcpu, unsigned int count, 693 const int *regnums, uint64_t *regvals) 694 { 695 int error; 696 struct vm_register_set vmregset; 697 698 bzero(&vmregset, sizeof(vmregset)); 699 vmregset.count = count; 700 vmregset.regnums = regnums; 701 vmregset.regvals = regvals; 702 703 error = vcpu_ioctl(vcpu, VM_SET_REGISTER_SET, &vmregset); 704 return (error); 705 } 706 707 int 708 vm_get_register_set(struct vcpu *vcpu, unsigned int count, 709 const int *regnums, uint64_t *regvals) 710 { 711 int error; 712 struct vm_register_set vmregset; 713 714 bzero(&vmregset, sizeof(vmregset)); 715 vmregset.count = count; 716 vmregset.regnums = regnums; 717 vmregset.regvals = regvals; 718 719 error = vcpu_ioctl(vcpu, VM_GET_REGISTER_SET, &vmregset); 720 return (error); 721 } 722 723 int 724 vm_run(struct vcpu *vcpu, struct vm_exit *vmexit) 725 { 726 int error; 727 struct vm_run vmrun; 728 729 bzero(&vmrun, sizeof(vmrun)); 730 731 error = vcpu_ioctl(vcpu, VM_RUN, &vmrun); 732 bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); 733 return (error); 734 } 735 736 int 737 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) 738 { 739 struct vm_suspend vmsuspend; 740 741 bzero(&vmsuspend, sizeof(vmsuspend)); 742 vmsuspend.how = how; 743 return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); 744 } 745 746 int 747 vm_reinit(struct vmctx *ctx) 748 { 749 750 return (ioctl(ctx->fd, VM_REINIT, 0)); 751 } 752 753 int 754 vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid, 755 uint32_t errcode, int restart_instruction) 756 { 757 struct vm_exception exc; 758 759 exc.vector = vector; 760 exc.error_code = errcode; 761 exc.error_code_valid = errcode_valid; 762 exc.restart_instruction = restart_instruction; 763 764 return (vcpu_ioctl(vcpu, VM_INJECT_EXCEPTION, &exc)); 765 } 766 767 int 768 vm_apicid2vcpu(struct vmctx *ctx __unused, int apicid) 769 { 770 /* 771 * The apic id associated with the 'vcpu' has the same numerical value 772 * as the 'vcpu' itself. 773 */ 774 return (apicid); 775 } 776 777 int 778 vm_lapic_irq(struct vcpu *vcpu, int vector) 779 { 780 struct vm_lapic_irq vmirq; 781 782 bzero(&vmirq, sizeof(vmirq)); 783 vmirq.vector = vector; 784 785 return (vcpu_ioctl(vcpu, VM_LAPIC_IRQ, &vmirq)); 786 } 787 788 int 789 vm_lapic_local_irq(struct vcpu *vcpu, int vector) 790 { 791 struct vm_lapic_irq vmirq; 792 793 bzero(&vmirq, sizeof(vmirq)); 794 vmirq.vector = vector; 795 796 return (vcpu_ioctl(vcpu, VM_LAPIC_LOCAL_IRQ, &vmirq)); 797 } 798 799 int 800 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) 801 { 802 struct vm_lapic_msi vmmsi; 803 804 bzero(&vmmsi, sizeof(vmmsi)); 805 vmmsi.addr = addr; 806 vmmsi.msg = msg; 807 808 return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); 809 } 810 811 int 812 vm_ioapic_assert_irq(struct vmctx *ctx, int irq) 813 { 814 struct vm_ioapic_irq ioapic_irq; 815 816 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 817 ioapic_irq.irq = irq; 818 819 return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); 820 } 821 822 int 823 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) 824 { 825 struct vm_ioapic_irq ioapic_irq; 826 827 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 828 ioapic_irq.irq = irq; 829 830 return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); 831 } 832 833 int 834 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) 835 { 836 struct vm_ioapic_irq ioapic_irq; 837 838 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 839 ioapic_irq.irq = irq; 840 841 return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); 842 } 843 844 int 845 vm_ioapic_pincount(struct vmctx *ctx, int *pincount) 846 { 847 848 return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount)); 849 } 850 851 int 852 vm_readwrite_kernemu_device(struct vcpu *vcpu, vm_paddr_t gpa, 853 bool write, int size, uint64_t *value) 854 { 855 struct vm_readwrite_kernemu_device irp = { 856 .access_width = fls(size) - 1, 857 .gpa = gpa, 858 .value = write ? *value : ~0ul, 859 }; 860 long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV); 861 int rc; 862 863 rc = vcpu_ioctl(vcpu, cmd, &irp); 864 if (rc == 0 && !write) 865 *value = irp.value; 866 return (rc); 867 } 868 869 int 870 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 871 { 872 struct vm_isa_irq isa_irq; 873 874 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 875 isa_irq.atpic_irq = atpic_irq; 876 isa_irq.ioapic_irq = ioapic_irq; 877 878 return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq)); 879 } 880 881 int 882 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 883 { 884 struct vm_isa_irq isa_irq; 885 886 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 887 isa_irq.atpic_irq = atpic_irq; 888 isa_irq.ioapic_irq = ioapic_irq; 889 890 return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq)); 891 } 892 893 int 894 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 895 { 896 struct vm_isa_irq isa_irq; 897 898 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 899 isa_irq.atpic_irq = atpic_irq; 900 isa_irq.ioapic_irq = ioapic_irq; 901 902 return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); 903 } 904 905 int 906 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, 907 enum vm_intr_trigger trigger) 908 { 909 struct vm_isa_irq_trigger isa_irq_trigger; 910 911 bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); 912 isa_irq_trigger.atpic_irq = atpic_irq; 913 isa_irq_trigger.trigger = trigger; 914 915 return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); 916 } 917 918 int 919 vm_inject_nmi(struct vcpu *vcpu) 920 { 921 struct vm_nmi vmnmi; 922 923 bzero(&vmnmi, sizeof(vmnmi)); 924 925 return (vcpu_ioctl(vcpu, VM_INJECT_NMI, &vmnmi)); 926 } 927 928 static const char *capstrmap[] = { 929 [VM_CAP_HALT_EXIT] = "hlt_exit", 930 [VM_CAP_MTRAP_EXIT] = "mtrap_exit", 931 [VM_CAP_PAUSE_EXIT] = "pause_exit", 932 [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest", 933 [VM_CAP_ENABLE_INVPCID] = "enable_invpcid", 934 [VM_CAP_BPT_EXIT] = "bpt_exit", 935 }; 936 937 int 938 vm_capability_name2type(const char *capname) 939 { 940 int i; 941 942 for (i = 0; i < (int)nitems(capstrmap); i++) { 943 if (strcmp(capstrmap[i], capname) == 0) 944 return (i); 945 } 946 947 return (-1); 948 } 949 950 const char * 951 vm_capability_type2name(int type) 952 { 953 if (type >= 0 && type < (int)nitems(capstrmap)) 954 return (capstrmap[type]); 955 956 return (NULL); 957 } 958 959 int 960 vm_get_capability(struct vcpu *vcpu, enum vm_cap_type cap, int *retval) 961 { 962 int error; 963 struct vm_capability vmcap; 964 965 bzero(&vmcap, sizeof(vmcap)); 966 vmcap.captype = cap; 967 968 error = vcpu_ioctl(vcpu, VM_GET_CAPABILITY, &vmcap); 969 *retval = vmcap.capval; 970 return (error); 971 } 972 973 int 974 vm_set_capability(struct vcpu *vcpu, enum vm_cap_type cap, int val) 975 { 976 struct vm_capability vmcap; 977 978 bzero(&vmcap, sizeof(vmcap)); 979 vmcap.captype = cap; 980 vmcap.capval = val; 981 982 return (vcpu_ioctl(vcpu, VM_SET_CAPABILITY, &vmcap)); 983 } 984 985 int 986 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 987 { 988 struct vm_pptdev pptdev; 989 990 bzero(&pptdev, sizeof(pptdev)); 991 pptdev.bus = bus; 992 pptdev.slot = slot; 993 pptdev.func = func; 994 995 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 996 } 997 998 int 999 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 1000 { 1001 struct vm_pptdev pptdev; 1002 1003 bzero(&pptdev, sizeof(pptdev)); 1004 pptdev.bus = bus; 1005 pptdev.slot = slot; 1006 pptdev.func = func; 1007 1008 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 1009 } 1010 1011 int 1012 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 1013 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 1014 { 1015 struct vm_pptdev_mmio pptmmio; 1016 1017 bzero(&pptmmio, sizeof(pptmmio)); 1018 pptmmio.bus = bus; 1019 pptmmio.slot = slot; 1020 pptmmio.func = func; 1021 pptmmio.gpa = gpa; 1022 pptmmio.len = len; 1023 pptmmio.hpa = hpa; 1024 1025 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1026 } 1027 1028 int 1029 vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 1030 vm_paddr_t gpa, size_t len) 1031 { 1032 struct vm_pptdev_mmio pptmmio; 1033 1034 bzero(&pptmmio, sizeof(pptmmio)); 1035 pptmmio.bus = bus; 1036 pptmmio.slot = slot; 1037 pptmmio.func = func; 1038 pptmmio.gpa = gpa; 1039 pptmmio.len = len; 1040 1041 return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio)); 1042 } 1043 1044 int 1045 vm_setup_pptdev_msi(struct vmctx *ctx, int bus, int slot, int func, 1046 uint64_t addr, uint64_t msg, int numvec) 1047 { 1048 struct vm_pptdev_msi pptmsi; 1049 1050 bzero(&pptmsi, sizeof(pptmsi)); 1051 pptmsi.bus = bus; 1052 pptmsi.slot = slot; 1053 pptmsi.func = func; 1054 pptmsi.msg = msg; 1055 pptmsi.addr = addr; 1056 pptmsi.numvec = numvec; 1057 1058 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1059 } 1060 1061 int 1062 vm_setup_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func, 1063 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) 1064 { 1065 struct vm_pptdev_msix pptmsix; 1066 1067 bzero(&pptmsix, sizeof(pptmsix)); 1068 pptmsix.bus = bus; 1069 pptmsix.slot = slot; 1070 pptmsix.func = func; 1071 pptmsix.idx = idx; 1072 pptmsix.msg = msg; 1073 pptmsix.addr = addr; 1074 pptmsix.vector_control = vector_control; 1075 1076 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1077 } 1078 1079 int 1080 vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func) 1081 { 1082 struct vm_pptdev ppt; 1083 1084 bzero(&ppt, sizeof(ppt)); 1085 ppt.bus = bus; 1086 ppt.slot = slot; 1087 ppt.func = func; 1088 1089 return ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &ppt); 1090 } 1091 1092 uint64_t * 1093 vm_get_stats(struct vcpu *vcpu, struct timeval *ret_tv, 1094 int *ret_entries) 1095 { 1096 static _Thread_local uint64_t *stats_buf; 1097 static _Thread_local u_int stats_count; 1098 uint64_t *new_stats; 1099 struct vm_stats vmstats; 1100 u_int count, index; 1101 bool have_stats; 1102 1103 have_stats = false; 1104 count = 0; 1105 for (index = 0;; index += nitems(vmstats.statbuf)) { 1106 vmstats.index = index; 1107 if (vcpu_ioctl(vcpu, VM_STATS, &vmstats) != 0) 1108 break; 1109 if (stats_count < index + vmstats.num_entries) { 1110 new_stats = realloc(stats_buf, 1111 (index + vmstats.num_entries) * sizeof(uint64_t)); 1112 if (new_stats == NULL) { 1113 errno = ENOMEM; 1114 return (NULL); 1115 } 1116 stats_count = index + vmstats.num_entries; 1117 stats_buf = new_stats; 1118 } 1119 memcpy(stats_buf + index, vmstats.statbuf, 1120 vmstats.num_entries * sizeof(uint64_t)); 1121 count += vmstats.num_entries; 1122 have_stats = true; 1123 1124 if (vmstats.num_entries != nitems(vmstats.statbuf)) 1125 break; 1126 } 1127 if (have_stats) { 1128 if (ret_entries) 1129 *ret_entries = count; 1130 if (ret_tv) 1131 *ret_tv = vmstats.tv; 1132 return (stats_buf); 1133 } else 1134 return (NULL); 1135 } 1136 1137 const char * 1138 vm_get_stat_desc(struct vmctx *ctx, int index) 1139 { 1140 static struct vm_stat_desc statdesc; 1141 1142 statdesc.index = index; 1143 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 1144 return (statdesc.desc); 1145 else 1146 return (NULL); 1147 } 1148 1149 int 1150 vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state) 1151 { 1152 int error; 1153 struct vm_x2apic x2apic; 1154 1155 bzero(&x2apic, sizeof(x2apic)); 1156 1157 error = vcpu_ioctl(vcpu, VM_GET_X2APIC_STATE, &x2apic); 1158 *state = x2apic.state; 1159 return (error); 1160 } 1161 1162 int 1163 vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1164 { 1165 int error; 1166 struct vm_x2apic x2apic; 1167 1168 bzero(&x2apic, sizeof(x2apic)); 1169 x2apic.state = state; 1170 1171 error = vcpu_ioctl(vcpu, VM_SET_X2APIC_STATE, &x2apic); 1172 1173 return (error); 1174 } 1175 1176 /* 1177 * From Intel Vol 3a: 1178 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 1179 */ 1180 int 1181 vcpu_reset(struct vcpu *vcpu) 1182 { 1183 int error; 1184 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 1185 uint32_t desc_access, desc_limit; 1186 uint16_t sel; 1187 1188 zero = 0; 1189 1190 rflags = 0x2; 1191 error = vm_set_register(vcpu, VM_REG_GUEST_RFLAGS, rflags); 1192 if (error) 1193 goto done; 1194 1195 rip = 0xfff0; 1196 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RIP, rip)) != 0) 1197 goto done; 1198 1199 /* 1200 * According to Intels Software Developer Manual CR0 should be 1201 * initialized with CR0_ET | CR0_NW | CR0_CD but that crashes some 1202 * guests like Windows. 1203 */ 1204 cr0 = CR0_NE; 1205 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 1206 goto done; 1207 1208 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR2, zero)) != 0) 1209 goto done; 1210 1211 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR3, zero)) != 0) 1212 goto done; 1213 1214 cr4 = 0; 1215 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 1216 goto done; 1217 1218 /* 1219 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 1220 */ 1221 desc_base = 0xffff0000; 1222 desc_limit = 0xffff; 1223 desc_access = 0x0093; 1224 error = vm_set_desc(vcpu, VM_REG_GUEST_CS, 1225 desc_base, desc_limit, desc_access); 1226 if (error) 1227 goto done; 1228 1229 sel = 0xf000; 1230 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CS, sel)) != 0) 1231 goto done; 1232 1233 /* 1234 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 1235 */ 1236 desc_base = 0; 1237 desc_limit = 0xffff; 1238 desc_access = 0x0093; 1239 error = vm_set_desc(vcpu, VM_REG_GUEST_SS, 1240 desc_base, desc_limit, desc_access); 1241 if (error) 1242 goto done; 1243 1244 error = vm_set_desc(vcpu, VM_REG_GUEST_DS, 1245 desc_base, desc_limit, desc_access); 1246 if (error) 1247 goto done; 1248 1249 error = vm_set_desc(vcpu, VM_REG_GUEST_ES, 1250 desc_base, desc_limit, desc_access); 1251 if (error) 1252 goto done; 1253 1254 error = vm_set_desc(vcpu, VM_REG_GUEST_FS, 1255 desc_base, desc_limit, desc_access); 1256 if (error) 1257 goto done; 1258 1259 error = vm_set_desc(vcpu, VM_REG_GUEST_GS, 1260 desc_base, desc_limit, desc_access); 1261 if (error) 1262 goto done; 1263 1264 sel = 0; 1265 if ((error = vm_set_register(vcpu, VM_REG_GUEST_SS, sel)) != 0) 1266 goto done; 1267 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DS, sel)) != 0) 1268 goto done; 1269 if ((error = vm_set_register(vcpu, VM_REG_GUEST_ES, sel)) != 0) 1270 goto done; 1271 if ((error = vm_set_register(vcpu, VM_REG_GUEST_FS, sel)) != 0) 1272 goto done; 1273 if ((error = vm_set_register(vcpu, VM_REG_GUEST_GS, sel)) != 0) 1274 goto done; 1275 1276 if ((error = vm_set_register(vcpu, VM_REG_GUEST_EFER, zero)) != 0) 1277 goto done; 1278 1279 /* General purpose registers */ 1280 rdx = 0xf00; 1281 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RAX, zero)) != 0) 1282 goto done; 1283 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBX, zero)) != 0) 1284 goto done; 1285 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RCX, zero)) != 0) 1286 goto done; 1287 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 1288 goto done; 1289 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSI, zero)) != 0) 1290 goto done; 1291 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDI, zero)) != 0) 1292 goto done; 1293 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBP, zero)) != 0) 1294 goto done; 1295 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSP, zero)) != 0) 1296 goto done; 1297 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R8, zero)) != 0) 1298 goto done; 1299 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R9, zero)) != 0) 1300 goto done; 1301 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R10, zero)) != 0) 1302 goto done; 1303 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R11, zero)) != 0) 1304 goto done; 1305 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R12, zero)) != 0) 1306 goto done; 1307 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R13, zero)) != 0) 1308 goto done; 1309 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R14, zero)) != 0) 1310 goto done; 1311 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R15, zero)) != 0) 1312 goto done; 1313 1314 /* GDTR, IDTR */ 1315 desc_base = 0; 1316 desc_limit = 0xffff; 1317 desc_access = 0; 1318 error = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, 1319 desc_base, desc_limit, desc_access); 1320 if (error != 0) 1321 goto done; 1322 1323 error = vm_set_desc(vcpu, VM_REG_GUEST_IDTR, 1324 desc_base, desc_limit, desc_access); 1325 if (error != 0) 1326 goto done; 1327 1328 /* TR */ 1329 desc_base = 0; 1330 desc_limit = 0xffff; 1331 desc_access = 0x0000008b; 1332 error = vm_set_desc(vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 1333 if (error) 1334 goto done; 1335 1336 sel = 0; 1337 if ((error = vm_set_register(vcpu, VM_REG_GUEST_TR, sel)) != 0) 1338 goto done; 1339 1340 /* LDTR */ 1341 desc_base = 0; 1342 desc_limit = 0xffff; 1343 desc_access = 0x00000082; 1344 error = vm_set_desc(vcpu, VM_REG_GUEST_LDTR, desc_base, 1345 desc_limit, desc_access); 1346 if (error) 1347 goto done; 1348 1349 sel = 0; 1350 if ((error = vm_set_register(vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 1351 goto done; 1352 1353 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR6, 1354 0xffff0ff0)) != 0) 1355 goto done; 1356 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR7, 0x400)) != 1357 0) 1358 goto done; 1359 1360 if ((error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW, 1361 zero)) != 0) 1362 goto done; 1363 1364 error = 0; 1365 done: 1366 return (error); 1367 } 1368 1369 int 1370 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) 1371 { 1372 int error, i; 1373 struct vm_gpa_pte gpapte; 1374 1375 bzero(&gpapte, sizeof(gpapte)); 1376 gpapte.gpa = gpa; 1377 1378 error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte); 1379 1380 if (error == 0) { 1381 *num = gpapte.ptenum; 1382 for (i = 0; i < gpapte.ptenum; i++) 1383 pte[i] = gpapte.pte[i]; 1384 } 1385 1386 return (error); 1387 } 1388 1389 int 1390 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) 1391 { 1392 int error; 1393 struct vm_hpet_cap cap; 1394 1395 bzero(&cap, sizeof(struct vm_hpet_cap)); 1396 error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); 1397 if (capabilities != NULL) 1398 *capabilities = cap.capabilities; 1399 return (error); 1400 } 1401 1402 int 1403 vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging, 1404 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1405 { 1406 struct vm_gla2gpa gg; 1407 int error; 1408 1409 bzero(&gg, sizeof(struct vm_gla2gpa)); 1410 gg.prot = prot; 1411 gg.gla = gla; 1412 gg.paging = *paging; 1413 1414 error = vcpu_ioctl(vcpu, VM_GLA2GPA, &gg); 1415 if (error == 0) { 1416 *fault = gg.fault; 1417 *gpa = gg.gpa; 1418 } 1419 return (error); 1420 } 1421 1422 int 1423 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 1424 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1425 { 1426 struct vm_gla2gpa gg; 1427 int error; 1428 1429 bzero(&gg, sizeof(struct vm_gla2gpa)); 1430 gg.prot = prot; 1431 gg.gla = gla; 1432 gg.paging = *paging; 1433 1434 error = vcpu_ioctl(vcpu, VM_GLA2GPA_NOFAULT, &gg); 1435 if (error == 0) { 1436 *fault = gg.fault; 1437 *gpa = gg.gpa; 1438 } 1439 return (error); 1440 } 1441 1442 #ifndef min 1443 #define min(a,b) (((a) < (b)) ? (a) : (b)) 1444 #endif 1445 1446 int 1447 vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, 1448 uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, 1449 int *fault) 1450 { 1451 void *va; 1452 uint64_t gpa, off; 1453 int error, i, n; 1454 1455 for (i = 0; i < iovcnt; i++) { 1456 iov[i].iov_base = 0; 1457 iov[i].iov_len = 0; 1458 } 1459 1460 while (len) { 1461 assert(iovcnt > 0); 1462 error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); 1463 if (error || *fault) 1464 return (error); 1465 1466 off = gpa & PAGE_MASK; 1467 n = MIN(len, PAGE_SIZE - off); 1468 1469 va = vm_map_gpa(vcpu->ctx, gpa, n); 1470 if (va == NULL) 1471 return (EFAULT); 1472 1473 iov->iov_base = va; 1474 iov->iov_len = n; 1475 iov++; 1476 iovcnt--; 1477 1478 gla += n; 1479 len -= n; 1480 } 1481 return (0); 1482 } 1483 1484 void 1485 vm_copy_teardown(struct iovec *iov __unused, int iovcnt __unused) 1486 { 1487 /* 1488 * Intentionally empty. This is used by the instruction 1489 * emulation code shared with the kernel. The in-kernel 1490 * version of this is non-empty. 1491 */ 1492 } 1493 1494 void 1495 vm_copyin(struct iovec *iov, void *vp, size_t len) 1496 { 1497 const char *src; 1498 char *dst; 1499 size_t n; 1500 1501 dst = vp; 1502 while (len) { 1503 assert(iov->iov_len); 1504 n = min(len, iov->iov_len); 1505 src = iov->iov_base; 1506 bcopy(src, dst, n); 1507 1508 iov++; 1509 dst += n; 1510 len -= n; 1511 } 1512 } 1513 1514 void 1515 vm_copyout(const void *vp, struct iovec *iov, size_t len) 1516 { 1517 const char *src; 1518 char *dst; 1519 size_t n; 1520 1521 src = vp; 1522 while (len) { 1523 assert(iov->iov_len); 1524 n = min(len, iov->iov_len); 1525 dst = iov->iov_base; 1526 bcopy(src, dst, n); 1527 1528 iov++; 1529 src += n; 1530 len -= n; 1531 } 1532 } 1533 1534 static int 1535 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus) 1536 { 1537 struct vm_cpuset vm_cpuset; 1538 int error; 1539 1540 bzero(&vm_cpuset, sizeof(struct vm_cpuset)); 1541 vm_cpuset.which = which; 1542 vm_cpuset.cpusetsize = sizeof(cpuset_t); 1543 vm_cpuset.cpus = cpus; 1544 1545 error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset); 1546 return (error); 1547 } 1548 1549 int 1550 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus) 1551 { 1552 1553 return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus)); 1554 } 1555 1556 int 1557 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) 1558 { 1559 1560 return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus)); 1561 } 1562 1563 int 1564 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus) 1565 { 1566 1567 return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus)); 1568 } 1569 1570 int 1571 vm_activate_cpu(struct vcpu *vcpu) 1572 { 1573 struct vm_activate_cpu ac; 1574 int error; 1575 1576 bzero(&ac, sizeof(struct vm_activate_cpu)); 1577 error = vcpu_ioctl(vcpu, VM_ACTIVATE_CPU, &ac); 1578 return (error); 1579 } 1580 1581 int 1582 vm_suspend_all_cpus(struct vmctx *ctx) 1583 { 1584 struct vm_activate_cpu ac; 1585 int error; 1586 1587 bzero(&ac, sizeof(struct vm_activate_cpu)); 1588 ac.vcpuid = -1; 1589 error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac); 1590 return (error); 1591 } 1592 1593 int 1594 vm_suspend_cpu(struct vcpu *vcpu) 1595 { 1596 struct vm_activate_cpu ac; 1597 int error; 1598 1599 bzero(&ac, sizeof(struct vm_activate_cpu)); 1600 error = vcpu_ioctl(vcpu, VM_SUSPEND_CPU, &ac); 1601 return (error); 1602 } 1603 1604 int 1605 vm_resume_cpu(struct vcpu *vcpu) 1606 { 1607 struct vm_activate_cpu ac; 1608 int error; 1609 1610 bzero(&ac, sizeof(struct vm_activate_cpu)); 1611 error = vcpu_ioctl(vcpu, VM_RESUME_CPU, &ac); 1612 return (error); 1613 } 1614 1615 int 1616 vm_resume_all_cpus(struct vmctx *ctx) 1617 { 1618 struct vm_activate_cpu ac; 1619 int error; 1620 1621 bzero(&ac, sizeof(struct vm_activate_cpu)); 1622 ac.vcpuid = -1; 1623 error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); 1624 return (error); 1625 } 1626 1627 int 1628 vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) 1629 { 1630 struct vm_intinfo vmii; 1631 int error; 1632 1633 bzero(&vmii, sizeof(struct vm_intinfo)); 1634 error = vcpu_ioctl(vcpu, VM_GET_INTINFO, &vmii); 1635 if (error == 0) { 1636 *info1 = vmii.info1; 1637 *info2 = vmii.info2; 1638 } 1639 return (error); 1640 } 1641 1642 int 1643 vm_set_intinfo(struct vcpu *vcpu, uint64_t info1) 1644 { 1645 struct vm_intinfo vmii; 1646 int error; 1647 1648 bzero(&vmii, sizeof(struct vm_intinfo)); 1649 vmii.info1 = info1; 1650 error = vcpu_ioctl(vcpu, VM_SET_INTINFO, &vmii); 1651 return (error); 1652 } 1653 1654 int 1655 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) 1656 { 1657 struct vm_rtc_data rtcdata; 1658 int error; 1659 1660 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1661 rtcdata.offset = offset; 1662 rtcdata.value = value; 1663 error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); 1664 return (error); 1665 } 1666 1667 int 1668 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) 1669 { 1670 struct vm_rtc_data rtcdata; 1671 int error; 1672 1673 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1674 rtcdata.offset = offset; 1675 error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); 1676 if (error == 0) 1677 *retval = rtcdata.value; 1678 return (error); 1679 } 1680 1681 int 1682 vm_rtc_settime(struct vmctx *ctx, time_t secs) 1683 { 1684 struct vm_rtc_time rtctime; 1685 int error; 1686 1687 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1688 rtctime.secs = secs; 1689 error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); 1690 return (error); 1691 } 1692 1693 int 1694 vm_rtc_gettime(struct vmctx *ctx, time_t *secs) 1695 { 1696 struct vm_rtc_time rtctime; 1697 int error; 1698 1699 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1700 error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); 1701 if (error == 0) 1702 *secs = rtctime.secs; 1703 return (error); 1704 } 1705 1706 int 1707 vm_restart_instruction(struct vcpu *vcpu) 1708 { 1709 int arg; 1710 1711 return (vcpu_ioctl(vcpu, VM_RESTART_INSTRUCTION, &arg)); 1712 } 1713 1714 int 1715 vm_snapshot_req(struct vm_snapshot_meta *meta) 1716 { 1717 1718 if (ioctl(meta->ctx->fd, VM_SNAPSHOT_REQ, meta) == -1) { 1719 #ifdef SNAPSHOT_DEBUG 1720 fprintf(stderr, "%s: snapshot failed for %s: %d\r\n", 1721 __func__, meta->dev_name, errno); 1722 #endif 1723 return (-1); 1724 } 1725 return (0); 1726 } 1727 1728 int 1729 vm_restore_time(struct vmctx *ctx) 1730 { 1731 int dummy; 1732 1733 dummy = 0; 1734 return (ioctl(ctx->fd, VM_RESTORE_TIME, &dummy)); 1735 } 1736 1737 int 1738 vm_set_topology(struct vmctx *ctx, 1739 uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) 1740 { 1741 struct vm_cpu_topology topology; 1742 1743 bzero(&topology, sizeof (struct vm_cpu_topology)); 1744 topology.sockets = sockets; 1745 topology.cores = cores; 1746 topology.threads = threads; 1747 topology.maxcpus = maxcpus; 1748 return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology)); 1749 } 1750 1751 int 1752 vm_get_topology(struct vmctx *ctx, 1753 uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) 1754 { 1755 struct vm_cpu_topology topology; 1756 int error; 1757 1758 bzero(&topology, sizeof (struct vm_cpu_topology)); 1759 error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology); 1760 if (error == 0) { 1761 *sockets = topology.sockets; 1762 *cores = topology.cores; 1763 *threads = topology.threads; 1764 *maxcpus = topology.maxcpus; 1765 } 1766 return (error); 1767 } 1768 1769 /* Keep in sync with machine/vmm_dev.h. */ 1770 static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT, 1771 VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG, 1772 VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER, 1773 VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR, 1774 VM_SET_REGISTER_SET, VM_GET_REGISTER_SET, 1775 VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV, 1776 VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ, 1777 VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ, 1778 VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ, 1779 VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER, 1780 VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV, 1781 VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI, 1782 VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX, 1783 VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, 1784 VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, 1785 VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, 1786 VM_GLA2GPA_NOFAULT, 1787 VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, 1788 VM_SET_INTINFO, VM_GET_INTINFO, 1789 VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, 1790 VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, 1791 VM_SNAPSHOT_REQ, VM_RESTORE_TIME 1792 }; 1793 1794 int 1795 vm_limit_rights(struct vmctx *ctx) 1796 { 1797 cap_rights_t rights; 1798 size_t ncmds; 1799 1800 cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); 1801 if (caph_rights_limit(ctx->fd, &rights) != 0) 1802 return (-1); 1803 ncmds = nitems(vm_ioctl_cmds); 1804 if (caph_ioctls_limit(ctx->fd, vm_ioctl_cmds, ncmds) != 0) 1805 return (-1); 1806 return (0); 1807 } 1808 1809 /* 1810 * Avoid using in new code. Operations on the fd should be wrapped here so that 1811 * capability rights can be kept in sync. 1812 */ 1813 int 1814 vm_get_device_fd(struct vmctx *ctx) 1815 { 1816 1817 return (ctx->fd); 1818 } 1819 1820 /* Legacy interface, do not use. */ 1821 const cap_ioctl_t * 1822 vm_get_ioctls(size_t *len) 1823 { 1824 cap_ioctl_t *cmds; 1825 1826 if (len == NULL) { 1827 cmds = malloc(sizeof(vm_ioctl_cmds)); 1828 if (cmds == NULL) 1829 return (NULL); 1830 bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds)); 1831 return (cmds); 1832 } 1833 1834 *len = nitems(vm_ioctl_cmds); 1835 return (NULL); 1836 } 1837