1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/capsicum.h> 32 #include <sys/sysctl.h> 33 #include <sys/ioctl.h> 34 #include <sys/linker.h> 35 #include <sys/mman.h> 36 #include <sys/module.h> 37 #include <sys/_iovec.h> 38 #include <sys/cpuset.h> 39 40 #include <capsicum_helpers.h> 41 #include <errno.h> 42 #include <stdbool.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <assert.h> 46 #include <string.h> 47 #include <fcntl.h> 48 #include <unistd.h> 49 50 #include <libutil.h> 51 52 #include <vm/vm.h> 53 #include <machine/vmm.h> 54 #include <machine/vmm_dev.h> 55 #include <machine/vmm_snapshot.h> 56 57 #include "vmmapi.h" 58 #include "internal.h" 59 60 #define MB (1024 * 1024UL) 61 #define GB (1024 * 1024 * 1024UL) 62 63 /* 64 * Size of the guard region before and after the virtual address space 65 * mapping the guest physical memory. This must be a multiple of the 66 * superpage size for performance reasons. 67 */ 68 #define VM_MMAP_GUARD_SIZE (4 * MB) 69 70 #define PROT_RW (PROT_READ | PROT_WRITE) 71 #define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) 72 73 struct vmctx { 74 int fd; 75 uint32_t lowmem_limit; 76 int memflags; 77 size_t lowmem; 78 size_t highmem; 79 char *baseaddr; 80 char *name; 81 }; 82 83 #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) 84 #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) 85 86 static int 87 vm_device_open(const char *name) 88 { 89 int fd, len; 90 char *vmfile; 91 92 len = strlen("/dev/vmm/") + strlen(name) + 1; 93 vmfile = malloc(len); 94 assert(vmfile != NULL); 95 snprintf(vmfile, len, "/dev/vmm/%s", name); 96 97 /* Open the device file */ 98 fd = open(vmfile, O_RDWR, 0); 99 100 free(vmfile); 101 return (fd); 102 } 103 104 int 105 vm_create(const char *name) 106 { 107 /* Try to load vmm(4) module before creating a guest. */ 108 if (modfind("vmm") < 0) 109 kldload("vmm"); 110 return (CREATE(name)); 111 } 112 113 struct vmctx * 114 vm_open(const char *name) 115 { 116 struct vmctx *vm; 117 int saved_errno; 118 119 vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); 120 assert(vm != NULL); 121 122 vm->fd = -1; 123 vm->memflags = 0; 124 vm->lowmem_limit = 3 * GB; 125 vm->name = (char *)(vm + 1); 126 strcpy(vm->name, name); 127 128 if ((vm->fd = vm_device_open(vm->name)) < 0) 129 goto err; 130 131 return (vm); 132 err: 133 saved_errno = errno; 134 free(vm); 135 errno = saved_errno; 136 return (NULL); 137 } 138 139 void 140 vm_close(struct vmctx *vm) 141 { 142 assert(vm != NULL); 143 144 close(vm->fd); 145 free(vm); 146 } 147 148 void 149 vm_destroy(struct vmctx *vm) 150 { 151 assert(vm != NULL); 152 153 if (vm->fd >= 0) 154 close(vm->fd); 155 DESTROY(vm->name); 156 157 free(vm); 158 } 159 160 struct vcpu * 161 vm_vcpu_open(struct vmctx *ctx, int vcpuid) 162 { 163 struct vcpu *vcpu; 164 165 vcpu = malloc(sizeof(*vcpu)); 166 vcpu->ctx = ctx; 167 vcpu->vcpuid = vcpuid; 168 return (vcpu); 169 } 170 171 void 172 vm_vcpu_close(struct vcpu *vcpu) 173 { 174 free(vcpu); 175 } 176 177 int 178 vcpu_id(struct vcpu *vcpu) 179 { 180 return (vcpu->vcpuid); 181 } 182 183 int 184 vm_parse_memsize(const char *opt, size_t *ret_memsize) 185 { 186 char *endptr; 187 size_t optval; 188 int error; 189 190 optval = strtoul(opt, &endptr, 0); 191 if (*opt != '\0' && *endptr == '\0') { 192 /* 193 * For the sake of backward compatibility if the memory size 194 * specified on the command line is less than a megabyte then 195 * it is interpreted as being in units of MB. 196 */ 197 if (optval < MB) 198 optval *= MB; 199 *ret_memsize = optval; 200 error = 0; 201 } else 202 error = expand_number(opt, ret_memsize); 203 204 return (error); 205 } 206 207 uint32_t 208 vm_get_lowmem_limit(struct vmctx *ctx) 209 { 210 211 return (ctx->lowmem_limit); 212 } 213 214 void 215 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) 216 { 217 218 ctx->lowmem_limit = limit; 219 } 220 221 void 222 vm_set_memflags(struct vmctx *ctx, int flags) 223 { 224 225 ctx->memflags = flags; 226 } 227 228 int 229 vm_get_memflags(struct vmctx *ctx) 230 { 231 232 return (ctx->memflags); 233 } 234 235 /* 236 * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len). 237 */ 238 int 239 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, 240 size_t len, int prot) 241 { 242 struct vm_memmap memmap; 243 int error, flags; 244 245 memmap.gpa = gpa; 246 memmap.segid = segid; 247 memmap.segoff = off; 248 memmap.len = len; 249 memmap.prot = prot; 250 memmap.flags = 0; 251 252 if (ctx->memflags & VM_MEM_F_WIRED) 253 memmap.flags |= VM_MEMMAP_F_WIRED; 254 255 /* 256 * If this mapping already exists then don't create it again. This 257 * is the common case for SYSMEM mappings created by bhyveload(8). 258 */ 259 error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags); 260 if (error == 0 && gpa == memmap.gpa) { 261 if (segid != memmap.segid || off != memmap.segoff || 262 prot != memmap.prot || flags != memmap.flags) { 263 errno = EEXIST; 264 return (-1); 265 } else { 266 return (0); 267 } 268 } 269 270 error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap); 271 return (error); 272 } 273 274 int 275 vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr, 276 size_t *lowmem_size, size_t *highmem_size) 277 { 278 279 *guest_baseaddr = ctx->baseaddr; 280 *lowmem_size = ctx->lowmem; 281 *highmem_size = ctx->highmem; 282 return (0); 283 } 284 285 int 286 vm_munmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, size_t len) 287 { 288 struct vm_munmap munmap; 289 int error; 290 291 munmap.gpa = gpa; 292 munmap.len = len; 293 294 error = ioctl(ctx->fd, VM_MUNMAP_MEMSEG, &munmap); 295 return (error); 296 } 297 298 int 299 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, 300 vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) 301 { 302 struct vm_memmap memmap; 303 int error; 304 305 bzero(&memmap, sizeof(struct vm_memmap)); 306 memmap.gpa = *gpa; 307 error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap); 308 if (error == 0) { 309 *gpa = memmap.gpa; 310 *segid = memmap.segid; 311 *segoff = memmap.segoff; 312 *len = memmap.len; 313 *prot = memmap.prot; 314 *flags = memmap.flags; 315 } 316 return (error); 317 } 318 319 /* 320 * Return 0 if the segments are identical and non-zero otherwise. 321 * 322 * This is slightly complicated by the fact that only device memory segments 323 * are named. 324 */ 325 static int 326 cmpseg(size_t len, const char *str, size_t len2, const char *str2) 327 { 328 329 if (len == len2) { 330 if ((!str && !str2) || (str && str2 && !strcmp(str, str2))) 331 return (0); 332 } 333 return (-1); 334 } 335 336 static int 337 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) 338 { 339 struct vm_memseg memseg; 340 size_t n; 341 int error; 342 343 /* 344 * If the memory segment has already been created then just return. 345 * This is the usual case for the SYSMEM segment created by userspace 346 * loaders like bhyveload(8). 347 */ 348 error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name, 349 sizeof(memseg.name)); 350 if (error) 351 return (error); 352 353 if (memseg.len != 0) { 354 if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) { 355 errno = EINVAL; 356 return (-1); 357 } else { 358 return (0); 359 } 360 } 361 362 bzero(&memseg, sizeof(struct vm_memseg)); 363 memseg.segid = segid; 364 memseg.len = len; 365 if (name != NULL) { 366 n = strlcpy(memseg.name, name, sizeof(memseg.name)); 367 if (n >= sizeof(memseg.name)) { 368 errno = ENAMETOOLONG; 369 return (-1); 370 } 371 } 372 373 error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg); 374 return (error); 375 } 376 377 int 378 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, 379 size_t bufsize) 380 { 381 struct vm_memseg memseg; 382 size_t n; 383 int error; 384 385 memseg.segid = segid; 386 error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); 387 if (error == 0) { 388 *lenp = memseg.len; 389 n = strlcpy(namebuf, memseg.name, bufsize); 390 if (n >= bufsize) { 391 errno = ENAMETOOLONG; 392 error = -1; 393 } 394 } 395 return (error); 396 } 397 398 static int 399 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) 400 { 401 char *ptr; 402 int error, flags; 403 404 /* Map 'len' bytes starting at 'gpa' in the guest address space */ 405 error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); 406 if (error) 407 return (error); 408 409 flags = MAP_SHARED | MAP_FIXED; 410 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 411 flags |= MAP_NOCORE; 412 413 /* mmap into the process address space on the host */ 414 ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa); 415 if (ptr == MAP_FAILED) 416 return (-1); 417 418 return (0); 419 } 420 421 int 422 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) 423 { 424 size_t objsize, len; 425 vm_paddr_t gpa; 426 char *baseaddr, *ptr; 427 int error; 428 429 assert(vms == VM_MMAP_ALL); 430 431 /* 432 * If 'memsize' cannot fit entirely in the 'lowmem' segment then 433 * create another 'highmem' segment above 4GB for the remainder. 434 */ 435 if (memsize > ctx->lowmem_limit) { 436 ctx->lowmem = ctx->lowmem_limit; 437 ctx->highmem = memsize - ctx->lowmem_limit; 438 objsize = 4*GB + ctx->highmem; 439 } else { 440 ctx->lowmem = memsize; 441 ctx->highmem = 0; 442 objsize = ctx->lowmem; 443 } 444 445 error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); 446 if (error) 447 return (error); 448 449 /* 450 * Stake out a contiguous region covering the guest physical memory 451 * and the adjoining guard regions. 452 */ 453 len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; 454 ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0); 455 if (ptr == MAP_FAILED) 456 return (-1); 457 458 baseaddr = ptr + VM_MMAP_GUARD_SIZE; 459 if (ctx->highmem > 0) { 460 gpa = 4*GB; 461 len = ctx->highmem; 462 error = setup_memory_segment(ctx, gpa, len, baseaddr); 463 if (error) 464 return (error); 465 } 466 467 if (ctx->lowmem > 0) { 468 gpa = 0; 469 len = ctx->lowmem; 470 error = setup_memory_segment(ctx, gpa, len, baseaddr); 471 if (error) 472 return (error); 473 } 474 475 ctx->baseaddr = baseaddr; 476 477 return (0); 478 } 479 480 /* 481 * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in 482 * the lowmem or highmem regions. 483 * 484 * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region. 485 * The instruction emulation code depends on this behavior. 486 */ 487 void * 488 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) 489 { 490 491 if (ctx->lowmem > 0) { 492 if (gaddr < ctx->lowmem && len <= ctx->lowmem && 493 gaddr + len <= ctx->lowmem) 494 return (ctx->baseaddr + gaddr); 495 } 496 497 if (ctx->highmem > 0) { 498 if (gaddr >= 4*GB) { 499 if (gaddr < 4*GB + ctx->highmem && 500 len <= ctx->highmem && 501 gaddr + len <= 4*GB + ctx->highmem) 502 return (ctx->baseaddr + gaddr); 503 } 504 } 505 506 return (NULL); 507 } 508 509 vm_paddr_t 510 vm_rev_map_gpa(struct vmctx *ctx, void *addr) 511 { 512 vm_paddr_t offaddr; 513 514 offaddr = (char *)addr - ctx->baseaddr; 515 516 if (ctx->lowmem > 0) 517 if (offaddr <= ctx->lowmem) 518 return (offaddr); 519 520 if (ctx->highmem > 0) 521 if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem) 522 return (offaddr); 523 524 return ((vm_paddr_t)-1); 525 } 526 527 const char * 528 vm_get_name(struct vmctx *ctx) 529 { 530 531 return (ctx->name); 532 } 533 534 size_t 535 vm_get_lowmem_size(struct vmctx *ctx) 536 { 537 538 return (ctx->lowmem); 539 } 540 541 size_t 542 vm_get_highmem_size(struct vmctx *ctx) 543 { 544 545 return (ctx->highmem); 546 } 547 548 void * 549 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) 550 { 551 char pathname[MAXPATHLEN]; 552 size_t len2; 553 char *base, *ptr; 554 int fd, error, flags; 555 556 fd = -1; 557 ptr = MAP_FAILED; 558 if (name == NULL || strlen(name) == 0) { 559 errno = EINVAL; 560 goto done; 561 } 562 563 error = vm_alloc_memseg(ctx, segid, len, name); 564 if (error) 565 goto done; 566 567 strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname)); 568 strlcat(pathname, ctx->name, sizeof(pathname)); 569 strlcat(pathname, ".", sizeof(pathname)); 570 strlcat(pathname, name, sizeof(pathname)); 571 572 fd = open(pathname, O_RDWR); 573 if (fd < 0) 574 goto done; 575 576 /* 577 * Stake out a contiguous region covering the device memory and the 578 * adjoining guard regions. 579 */ 580 len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE; 581 base = mmap(NULL, len2, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 582 0); 583 if (base == MAP_FAILED) 584 goto done; 585 586 flags = MAP_SHARED | MAP_FIXED; 587 if ((ctx->memflags & VM_MEM_F_INCORE) == 0) 588 flags |= MAP_NOCORE; 589 590 /* mmap the devmem region in the host address space */ 591 ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0); 592 done: 593 if (fd >= 0) 594 close(fd); 595 return (ptr); 596 } 597 598 static int 599 vcpu_ioctl(struct vcpu *vcpu, u_long cmd, void *arg) 600 { 601 /* 602 * XXX: fragile, handle with care 603 * Assumes that the first field of the ioctl data 604 * is the vcpuid. 605 */ 606 *(int *)arg = vcpu->vcpuid; 607 return (ioctl(vcpu->ctx->fd, cmd, arg)); 608 } 609 610 int 611 vm_set_desc(struct vcpu *vcpu, int reg, 612 uint64_t base, uint32_t limit, uint32_t access) 613 { 614 int error; 615 struct vm_seg_desc vmsegdesc; 616 617 bzero(&vmsegdesc, sizeof(vmsegdesc)); 618 vmsegdesc.regnum = reg; 619 vmsegdesc.desc.base = base; 620 vmsegdesc.desc.limit = limit; 621 vmsegdesc.desc.access = access; 622 623 error = vcpu_ioctl(vcpu, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); 624 return (error); 625 } 626 627 int 628 vm_get_desc(struct vcpu *vcpu, int reg, uint64_t *base, uint32_t *limit, 629 uint32_t *access) 630 { 631 int error; 632 struct vm_seg_desc vmsegdesc; 633 634 bzero(&vmsegdesc, sizeof(vmsegdesc)); 635 vmsegdesc.regnum = reg; 636 637 error = vcpu_ioctl(vcpu, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); 638 if (error == 0) { 639 *base = vmsegdesc.desc.base; 640 *limit = vmsegdesc.desc.limit; 641 *access = vmsegdesc.desc.access; 642 } 643 return (error); 644 } 645 646 int 647 vm_get_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *seg_desc) 648 { 649 int error; 650 651 error = vm_get_desc(vcpu, reg, &seg_desc->base, &seg_desc->limit, 652 &seg_desc->access); 653 return (error); 654 } 655 656 int 657 vm_set_register(struct vcpu *vcpu, int reg, uint64_t val) 658 { 659 int error; 660 struct vm_register vmreg; 661 662 bzero(&vmreg, sizeof(vmreg)); 663 vmreg.regnum = reg; 664 vmreg.regval = val; 665 666 error = vcpu_ioctl(vcpu, VM_SET_REGISTER, &vmreg); 667 return (error); 668 } 669 670 int 671 vm_get_register(struct vcpu *vcpu, int reg, uint64_t *ret_val) 672 { 673 int error; 674 struct vm_register vmreg; 675 676 bzero(&vmreg, sizeof(vmreg)); 677 vmreg.regnum = reg; 678 679 error = vcpu_ioctl(vcpu, VM_GET_REGISTER, &vmreg); 680 *ret_val = vmreg.regval; 681 return (error); 682 } 683 684 int 685 vm_set_register_set(struct vcpu *vcpu, unsigned int count, 686 const int *regnums, uint64_t *regvals) 687 { 688 int error; 689 struct vm_register_set vmregset; 690 691 bzero(&vmregset, sizeof(vmregset)); 692 vmregset.count = count; 693 vmregset.regnums = regnums; 694 vmregset.regvals = regvals; 695 696 error = vcpu_ioctl(vcpu, VM_SET_REGISTER_SET, &vmregset); 697 return (error); 698 } 699 700 int 701 vm_get_register_set(struct vcpu *vcpu, unsigned int count, 702 const int *regnums, uint64_t *regvals) 703 { 704 int error; 705 struct vm_register_set vmregset; 706 707 bzero(&vmregset, sizeof(vmregset)); 708 vmregset.count = count; 709 vmregset.regnums = regnums; 710 vmregset.regvals = regvals; 711 712 error = vcpu_ioctl(vcpu, VM_GET_REGISTER_SET, &vmregset); 713 return (error); 714 } 715 716 int 717 vm_run(struct vcpu *vcpu, struct vm_run *vmrun) 718 { 719 return (vcpu_ioctl(vcpu, VM_RUN, vmrun)); 720 } 721 722 int 723 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) 724 { 725 struct vm_suspend vmsuspend; 726 727 bzero(&vmsuspend, sizeof(vmsuspend)); 728 vmsuspend.how = how; 729 return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); 730 } 731 732 int 733 vm_reinit(struct vmctx *ctx) 734 { 735 736 return (ioctl(ctx->fd, VM_REINIT, 0)); 737 } 738 739 int 740 vm_inject_exception(struct vcpu *vcpu, int vector, int errcode_valid, 741 uint32_t errcode, int restart_instruction) 742 { 743 struct vm_exception exc; 744 745 exc.vector = vector; 746 exc.error_code = errcode; 747 exc.error_code_valid = errcode_valid; 748 exc.restart_instruction = restart_instruction; 749 750 return (vcpu_ioctl(vcpu, VM_INJECT_EXCEPTION, &exc)); 751 } 752 753 int 754 vm_apicid2vcpu(struct vmctx *ctx __unused, int apicid) 755 { 756 /* 757 * The apic id associated with the 'vcpu' has the same numerical value 758 * as the 'vcpu' itself. 759 */ 760 return (apicid); 761 } 762 763 int 764 vm_lapic_irq(struct vcpu *vcpu, int vector) 765 { 766 struct vm_lapic_irq vmirq; 767 768 bzero(&vmirq, sizeof(vmirq)); 769 vmirq.vector = vector; 770 771 return (vcpu_ioctl(vcpu, VM_LAPIC_IRQ, &vmirq)); 772 } 773 774 int 775 vm_lapic_local_irq(struct vcpu *vcpu, int vector) 776 { 777 struct vm_lapic_irq vmirq; 778 779 bzero(&vmirq, sizeof(vmirq)); 780 vmirq.vector = vector; 781 782 return (vcpu_ioctl(vcpu, VM_LAPIC_LOCAL_IRQ, &vmirq)); 783 } 784 785 int 786 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) 787 { 788 struct vm_lapic_msi vmmsi; 789 790 bzero(&vmmsi, sizeof(vmmsi)); 791 vmmsi.addr = addr; 792 vmmsi.msg = msg; 793 794 return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); 795 } 796 797 int 798 vm_ioapic_assert_irq(struct vmctx *ctx, int irq) 799 { 800 struct vm_ioapic_irq ioapic_irq; 801 802 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 803 ioapic_irq.irq = irq; 804 805 return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); 806 } 807 808 int 809 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) 810 { 811 struct vm_ioapic_irq ioapic_irq; 812 813 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 814 ioapic_irq.irq = irq; 815 816 return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); 817 } 818 819 int 820 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) 821 { 822 struct vm_ioapic_irq ioapic_irq; 823 824 bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); 825 ioapic_irq.irq = irq; 826 827 return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); 828 } 829 830 int 831 vm_ioapic_pincount(struct vmctx *ctx, int *pincount) 832 { 833 834 return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount)); 835 } 836 837 int 838 vm_readwrite_kernemu_device(struct vcpu *vcpu, vm_paddr_t gpa, 839 bool write, int size, uint64_t *value) 840 { 841 struct vm_readwrite_kernemu_device irp = { 842 .access_width = fls(size) - 1, 843 .gpa = gpa, 844 .value = write ? *value : ~0ul, 845 }; 846 long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV); 847 int rc; 848 849 rc = vcpu_ioctl(vcpu, cmd, &irp); 850 if (rc == 0 && !write) 851 *value = irp.value; 852 return (rc); 853 } 854 855 int 856 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 857 { 858 struct vm_isa_irq isa_irq; 859 860 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 861 isa_irq.atpic_irq = atpic_irq; 862 isa_irq.ioapic_irq = ioapic_irq; 863 864 return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq)); 865 } 866 867 int 868 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 869 { 870 struct vm_isa_irq isa_irq; 871 872 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 873 isa_irq.atpic_irq = atpic_irq; 874 isa_irq.ioapic_irq = ioapic_irq; 875 876 return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq)); 877 } 878 879 int 880 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) 881 { 882 struct vm_isa_irq isa_irq; 883 884 bzero(&isa_irq, sizeof(struct vm_isa_irq)); 885 isa_irq.atpic_irq = atpic_irq; 886 isa_irq.ioapic_irq = ioapic_irq; 887 888 return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); 889 } 890 891 int 892 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, 893 enum vm_intr_trigger trigger) 894 { 895 struct vm_isa_irq_trigger isa_irq_trigger; 896 897 bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); 898 isa_irq_trigger.atpic_irq = atpic_irq; 899 isa_irq_trigger.trigger = trigger; 900 901 return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); 902 } 903 904 int 905 vm_inject_nmi(struct vcpu *vcpu) 906 { 907 struct vm_nmi vmnmi; 908 909 bzero(&vmnmi, sizeof(vmnmi)); 910 911 return (vcpu_ioctl(vcpu, VM_INJECT_NMI, &vmnmi)); 912 } 913 914 static const char *capstrmap[] = { 915 [VM_CAP_HALT_EXIT] = "hlt_exit", 916 [VM_CAP_MTRAP_EXIT] = "mtrap_exit", 917 [VM_CAP_PAUSE_EXIT] = "pause_exit", 918 [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest", 919 [VM_CAP_ENABLE_INVPCID] = "enable_invpcid", 920 [VM_CAP_BPT_EXIT] = "bpt_exit", 921 }; 922 923 int 924 vm_capability_name2type(const char *capname) 925 { 926 int i; 927 928 for (i = 0; i < (int)nitems(capstrmap); i++) { 929 if (strcmp(capstrmap[i], capname) == 0) 930 return (i); 931 } 932 933 return (-1); 934 } 935 936 const char * 937 vm_capability_type2name(int type) 938 { 939 if (type >= 0 && type < (int)nitems(capstrmap)) 940 return (capstrmap[type]); 941 942 return (NULL); 943 } 944 945 int 946 vm_get_capability(struct vcpu *vcpu, enum vm_cap_type cap, int *retval) 947 { 948 int error; 949 struct vm_capability vmcap; 950 951 bzero(&vmcap, sizeof(vmcap)); 952 vmcap.captype = cap; 953 954 error = vcpu_ioctl(vcpu, VM_GET_CAPABILITY, &vmcap); 955 *retval = vmcap.capval; 956 return (error); 957 } 958 959 int 960 vm_set_capability(struct vcpu *vcpu, enum vm_cap_type cap, int val) 961 { 962 struct vm_capability vmcap; 963 964 bzero(&vmcap, sizeof(vmcap)); 965 vmcap.captype = cap; 966 vmcap.capval = val; 967 968 return (vcpu_ioctl(vcpu, VM_SET_CAPABILITY, &vmcap)); 969 } 970 971 int 972 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 973 { 974 struct vm_pptdev pptdev; 975 976 bzero(&pptdev, sizeof(pptdev)); 977 pptdev.bus = bus; 978 pptdev.slot = slot; 979 pptdev.func = func; 980 981 return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); 982 } 983 984 int 985 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) 986 { 987 struct vm_pptdev pptdev; 988 989 bzero(&pptdev, sizeof(pptdev)); 990 pptdev.bus = bus; 991 pptdev.slot = slot; 992 pptdev.func = func; 993 994 return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); 995 } 996 997 int 998 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 999 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 1000 { 1001 struct vm_pptdev_mmio pptmmio; 1002 1003 bzero(&pptmmio, sizeof(pptmmio)); 1004 pptmmio.bus = bus; 1005 pptmmio.slot = slot; 1006 pptmmio.func = func; 1007 pptmmio.gpa = gpa; 1008 pptmmio.len = len; 1009 pptmmio.hpa = hpa; 1010 1011 return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); 1012 } 1013 1014 int 1015 vm_unmap_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, 1016 vm_paddr_t gpa, size_t len) 1017 { 1018 struct vm_pptdev_mmio pptmmio; 1019 1020 bzero(&pptmmio, sizeof(pptmmio)); 1021 pptmmio.bus = bus; 1022 pptmmio.slot = slot; 1023 pptmmio.func = func; 1024 pptmmio.gpa = gpa; 1025 pptmmio.len = len; 1026 1027 return (ioctl(ctx->fd, VM_UNMAP_PPTDEV_MMIO, &pptmmio)); 1028 } 1029 1030 int 1031 vm_setup_pptdev_msi(struct vmctx *ctx, int bus, int slot, int func, 1032 uint64_t addr, uint64_t msg, int numvec) 1033 { 1034 struct vm_pptdev_msi pptmsi; 1035 1036 bzero(&pptmsi, sizeof(pptmsi)); 1037 pptmsi.bus = bus; 1038 pptmsi.slot = slot; 1039 pptmsi.func = func; 1040 pptmsi.msg = msg; 1041 pptmsi.addr = addr; 1042 pptmsi.numvec = numvec; 1043 1044 return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); 1045 } 1046 1047 int 1048 vm_setup_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func, 1049 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) 1050 { 1051 struct vm_pptdev_msix pptmsix; 1052 1053 bzero(&pptmsix, sizeof(pptmsix)); 1054 pptmsix.bus = bus; 1055 pptmsix.slot = slot; 1056 pptmsix.func = func; 1057 pptmsix.idx = idx; 1058 pptmsix.msg = msg; 1059 pptmsix.addr = addr; 1060 pptmsix.vector_control = vector_control; 1061 1062 return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); 1063 } 1064 1065 int 1066 vm_disable_pptdev_msix(struct vmctx *ctx, int bus, int slot, int func) 1067 { 1068 struct vm_pptdev ppt; 1069 1070 bzero(&ppt, sizeof(ppt)); 1071 ppt.bus = bus; 1072 ppt.slot = slot; 1073 ppt.func = func; 1074 1075 return ioctl(ctx->fd, VM_PPTDEV_DISABLE_MSIX, &ppt); 1076 } 1077 1078 uint64_t * 1079 vm_get_stats(struct vcpu *vcpu, struct timeval *ret_tv, 1080 int *ret_entries) 1081 { 1082 static _Thread_local uint64_t *stats_buf; 1083 static _Thread_local u_int stats_count; 1084 uint64_t *new_stats; 1085 struct vm_stats vmstats; 1086 u_int count, index; 1087 bool have_stats; 1088 1089 have_stats = false; 1090 count = 0; 1091 for (index = 0;; index += nitems(vmstats.statbuf)) { 1092 vmstats.index = index; 1093 if (vcpu_ioctl(vcpu, VM_STATS, &vmstats) != 0) 1094 break; 1095 if (stats_count < index + vmstats.num_entries) { 1096 new_stats = realloc(stats_buf, 1097 (index + vmstats.num_entries) * sizeof(uint64_t)); 1098 if (new_stats == NULL) { 1099 errno = ENOMEM; 1100 return (NULL); 1101 } 1102 stats_count = index + vmstats.num_entries; 1103 stats_buf = new_stats; 1104 } 1105 memcpy(stats_buf + index, vmstats.statbuf, 1106 vmstats.num_entries * sizeof(uint64_t)); 1107 count += vmstats.num_entries; 1108 have_stats = true; 1109 1110 if (vmstats.num_entries != nitems(vmstats.statbuf)) 1111 break; 1112 } 1113 if (have_stats) { 1114 if (ret_entries) 1115 *ret_entries = count; 1116 if (ret_tv) 1117 *ret_tv = vmstats.tv; 1118 return (stats_buf); 1119 } else 1120 return (NULL); 1121 } 1122 1123 const char * 1124 vm_get_stat_desc(struct vmctx *ctx, int index) 1125 { 1126 static struct vm_stat_desc statdesc; 1127 1128 statdesc.index = index; 1129 if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) 1130 return (statdesc.desc); 1131 else 1132 return (NULL); 1133 } 1134 1135 int 1136 vm_get_x2apic_state(struct vcpu *vcpu, enum x2apic_state *state) 1137 { 1138 int error; 1139 struct vm_x2apic x2apic; 1140 1141 bzero(&x2apic, sizeof(x2apic)); 1142 1143 error = vcpu_ioctl(vcpu, VM_GET_X2APIC_STATE, &x2apic); 1144 *state = x2apic.state; 1145 return (error); 1146 } 1147 1148 int 1149 vm_set_x2apic_state(struct vcpu *vcpu, enum x2apic_state state) 1150 { 1151 int error; 1152 struct vm_x2apic x2apic; 1153 1154 bzero(&x2apic, sizeof(x2apic)); 1155 x2apic.state = state; 1156 1157 error = vcpu_ioctl(vcpu, VM_SET_X2APIC_STATE, &x2apic); 1158 1159 return (error); 1160 } 1161 1162 /* 1163 * From Intel Vol 3a: 1164 * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT 1165 */ 1166 int 1167 vcpu_reset(struct vcpu *vcpu) 1168 { 1169 int error; 1170 uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; 1171 uint32_t desc_access, desc_limit; 1172 uint16_t sel; 1173 1174 zero = 0; 1175 1176 rflags = 0x2; 1177 error = vm_set_register(vcpu, VM_REG_GUEST_RFLAGS, rflags); 1178 if (error) 1179 goto done; 1180 1181 rip = 0xfff0; 1182 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RIP, rip)) != 0) 1183 goto done; 1184 1185 /* 1186 * According to Intels Software Developer Manual CR0 should be 1187 * initialized with CR0_ET | CR0_NW | CR0_CD but that crashes some 1188 * guests like Windows. 1189 */ 1190 cr0 = CR0_NE; 1191 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR0, cr0)) != 0) 1192 goto done; 1193 1194 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR2, zero)) != 0) 1195 goto done; 1196 1197 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR3, zero)) != 0) 1198 goto done; 1199 1200 cr4 = 0; 1201 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CR4, cr4)) != 0) 1202 goto done; 1203 1204 /* 1205 * CS: present, r/w, accessed, 16-bit, byte granularity, usable 1206 */ 1207 desc_base = 0xffff0000; 1208 desc_limit = 0xffff; 1209 desc_access = 0x0093; 1210 error = vm_set_desc(vcpu, VM_REG_GUEST_CS, 1211 desc_base, desc_limit, desc_access); 1212 if (error) 1213 goto done; 1214 1215 sel = 0xf000; 1216 if ((error = vm_set_register(vcpu, VM_REG_GUEST_CS, sel)) != 0) 1217 goto done; 1218 1219 /* 1220 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity 1221 */ 1222 desc_base = 0; 1223 desc_limit = 0xffff; 1224 desc_access = 0x0093; 1225 error = vm_set_desc(vcpu, VM_REG_GUEST_SS, 1226 desc_base, desc_limit, desc_access); 1227 if (error) 1228 goto done; 1229 1230 error = vm_set_desc(vcpu, VM_REG_GUEST_DS, 1231 desc_base, desc_limit, desc_access); 1232 if (error) 1233 goto done; 1234 1235 error = vm_set_desc(vcpu, VM_REG_GUEST_ES, 1236 desc_base, desc_limit, desc_access); 1237 if (error) 1238 goto done; 1239 1240 error = vm_set_desc(vcpu, VM_REG_GUEST_FS, 1241 desc_base, desc_limit, desc_access); 1242 if (error) 1243 goto done; 1244 1245 error = vm_set_desc(vcpu, VM_REG_GUEST_GS, 1246 desc_base, desc_limit, desc_access); 1247 if (error) 1248 goto done; 1249 1250 sel = 0; 1251 if ((error = vm_set_register(vcpu, VM_REG_GUEST_SS, sel)) != 0) 1252 goto done; 1253 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DS, sel)) != 0) 1254 goto done; 1255 if ((error = vm_set_register(vcpu, VM_REG_GUEST_ES, sel)) != 0) 1256 goto done; 1257 if ((error = vm_set_register(vcpu, VM_REG_GUEST_FS, sel)) != 0) 1258 goto done; 1259 if ((error = vm_set_register(vcpu, VM_REG_GUEST_GS, sel)) != 0) 1260 goto done; 1261 1262 if ((error = vm_set_register(vcpu, VM_REG_GUEST_EFER, zero)) != 0) 1263 goto done; 1264 1265 /* General purpose registers */ 1266 rdx = 0xf00; 1267 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RAX, zero)) != 0) 1268 goto done; 1269 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBX, zero)) != 0) 1270 goto done; 1271 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RCX, zero)) != 0) 1272 goto done; 1273 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDX, rdx)) != 0) 1274 goto done; 1275 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSI, zero)) != 0) 1276 goto done; 1277 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RDI, zero)) != 0) 1278 goto done; 1279 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RBP, zero)) != 0) 1280 goto done; 1281 if ((error = vm_set_register(vcpu, VM_REG_GUEST_RSP, zero)) != 0) 1282 goto done; 1283 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R8, zero)) != 0) 1284 goto done; 1285 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R9, zero)) != 0) 1286 goto done; 1287 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R10, zero)) != 0) 1288 goto done; 1289 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R11, zero)) != 0) 1290 goto done; 1291 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R12, zero)) != 0) 1292 goto done; 1293 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R13, zero)) != 0) 1294 goto done; 1295 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R14, zero)) != 0) 1296 goto done; 1297 if ((error = vm_set_register(vcpu, VM_REG_GUEST_R15, zero)) != 0) 1298 goto done; 1299 1300 /* GDTR, IDTR */ 1301 desc_base = 0; 1302 desc_limit = 0xffff; 1303 desc_access = 0; 1304 error = vm_set_desc(vcpu, VM_REG_GUEST_GDTR, 1305 desc_base, desc_limit, desc_access); 1306 if (error != 0) 1307 goto done; 1308 1309 error = vm_set_desc(vcpu, VM_REG_GUEST_IDTR, 1310 desc_base, desc_limit, desc_access); 1311 if (error != 0) 1312 goto done; 1313 1314 /* TR */ 1315 desc_base = 0; 1316 desc_limit = 0xffff; 1317 desc_access = 0x0000008b; 1318 error = vm_set_desc(vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); 1319 if (error) 1320 goto done; 1321 1322 sel = 0; 1323 if ((error = vm_set_register(vcpu, VM_REG_GUEST_TR, sel)) != 0) 1324 goto done; 1325 1326 /* LDTR */ 1327 desc_base = 0; 1328 desc_limit = 0xffff; 1329 desc_access = 0x00000082; 1330 error = vm_set_desc(vcpu, VM_REG_GUEST_LDTR, desc_base, 1331 desc_limit, desc_access); 1332 if (error) 1333 goto done; 1334 1335 sel = 0; 1336 if ((error = vm_set_register(vcpu, VM_REG_GUEST_LDTR, 0)) != 0) 1337 goto done; 1338 1339 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR6, 1340 0xffff0ff0)) != 0) 1341 goto done; 1342 if ((error = vm_set_register(vcpu, VM_REG_GUEST_DR7, 0x400)) != 1343 0) 1344 goto done; 1345 1346 if ((error = vm_set_register(vcpu, VM_REG_GUEST_INTR_SHADOW, 1347 zero)) != 0) 1348 goto done; 1349 1350 error = 0; 1351 done: 1352 return (error); 1353 } 1354 1355 int 1356 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) 1357 { 1358 int error, i; 1359 struct vm_gpa_pte gpapte; 1360 1361 bzero(&gpapte, sizeof(gpapte)); 1362 gpapte.gpa = gpa; 1363 1364 error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte); 1365 1366 if (error == 0) { 1367 *num = gpapte.ptenum; 1368 for (i = 0; i < gpapte.ptenum; i++) 1369 pte[i] = gpapte.pte[i]; 1370 } 1371 1372 return (error); 1373 } 1374 1375 int 1376 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) 1377 { 1378 int error; 1379 struct vm_hpet_cap cap; 1380 1381 bzero(&cap, sizeof(struct vm_hpet_cap)); 1382 error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); 1383 if (capabilities != NULL) 1384 *capabilities = cap.capabilities; 1385 return (error); 1386 } 1387 1388 int 1389 vm_gla2gpa(struct vcpu *vcpu, struct vm_guest_paging *paging, 1390 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1391 { 1392 struct vm_gla2gpa gg; 1393 int error; 1394 1395 bzero(&gg, sizeof(struct vm_gla2gpa)); 1396 gg.prot = prot; 1397 gg.gla = gla; 1398 gg.paging = *paging; 1399 1400 error = vcpu_ioctl(vcpu, VM_GLA2GPA, &gg); 1401 if (error == 0) { 1402 *fault = gg.fault; 1403 *gpa = gg.gpa; 1404 } 1405 return (error); 1406 } 1407 1408 int 1409 vm_gla2gpa_nofault(struct vcpu *vcpu, struct vm_guest_paging *paging, 1410 uint64_t gla, int prot, uint64_t *gpa, int *fault) 1411 { 1412 struct vm_gla2gpa gg; 1413 int error; 1414 1415 bzero(&gg, sizeof(struct vm_gla2gpa)); 1416 gg.prot = prot; 1417 gg.gla = gla; 1418 gg.paging = *paging; 1419 1420 error = vcpu_ioctl(vcpu, VM_GLA2GPA_NOFAULT, &gg); 1421 if (error == 0) { 1422 *fault = gg.fault; 1423 *gpa = gg.gpa; 1424 } 1425 return (error); 1426 } 1427 1428 #ifndef min 1429 #define min(a,b) (((a) < (b)) ? (a) : (b)) 1430 #endif 1431 1432 int 1433 vm_copy_setup(struct vcpu *vcpu, struct vm_guest_paging *paging, 1434 uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, 1435 int *fault) 1436 { 1437 void *va; 1438 uint64_t gpa, off; 1439 int error, i, n; 1440 1441 for (i = 0; i < iovcnt; i++) { 1442 iov[i].iov_base = 0; 1443 iov[i].iov_len = 0; 1444 } 1445 1446 while (len) { 1447 assert(iovcnt > 0); 1448 error = vm_gla2gpa(vcpu, paging, gla, prot, &gpa, fault); 1449 if (error || *fault) 1450 return (error); 1451 1452 off = gpa & PAGE_MASK; 1453 n = MIN(len, PAGE_SIZE - off); 1454 1455 va = vm_map_gpa(vcpu->ctx, gpa, n); 1456 if (va == NULL) 1457 return (EFAULT); 1458 1459 iov->iov_base = va; 1460 iov->iov_len = n; 1461 iov++; 1462 iovcnt--; 1463 1464 gla += n; 1465 len -= n; 1466 } 1467 return (0); 1468 } 1469 1470 void 1471 vm_copy_teardown(struct iovec *iov __unused, int iovcnt __unused) 1472 { 1473 /* 1474 * Intentionally empty. This is used by the instruction 1475 * emulation code shared with the kernel. The in-kernel 1476 * version of this is non-empty. 1477 */ 1478 } 1479 1480 void 1481 vm_copyin(struct iovec *iov, void *vp, size_t len) 1482 { 1483 const char *src; 1484 char *dst; 1485 size_t n; 1486 1487 dst = vp; 1488 while (len) { 1489 assert(iov->iov_len); 1490 n = min(len, iov->iov_len); 1491 src = iov->iov_base; 1492 bcopy(src, dst, n); 1493 1494 iov++; 1495 dst += n; 1496 len -= n; 1497 } 1498 } 1499 1500 void 1501 vm_copyout(const void *vp, struct iovec *iov, size_t len) 1502 { 1503 const char *src; 1504 char *dst; 1505 size_t n; 1506 1507 src = vp; 1508 while (len) { 1509 assert(iov->iov_len); 1510 n = min(len, iov->iov_len); 1511 dst = iov->iov_base; 1512 bcopy(src, dst, n); 1513 1514 iov++; 1515 src += n; 1516 len -= n; 1517 } 1518 } 1519 1520 static int 1521 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus) 1522 { 1523 struct vm_cpuset vm_cpuset; 1524 int error; 1525 1526 bzero(&vm_cpuset, sizeof(struct vm_cpuset)); 1527 vm_cpuset.which = which; 1528 vm_cpuset.cpusetsize = sizeof(cpuset_t); 1529 vm_cpuset.cpus = cpus; 1530 1531 error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset); 1532 return (error); 1533 } 1534 1535 int 1536 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus) 1537 { 1538 1539 return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus)); 1540 } 1541 1542 int 1543 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) 1544 { 1545 1546 return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus)); 1547 } 1548 1549 int 1550 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus) 1551 { 1552 1553 return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus)); 1554 } 1555 1556 int 1557 vm_activate_cpu(struct vcpu *vcpu) 1558 { 1559 struct vm_activate_cpu ac; 1560 int error; 1561 1562 bzero(&ac, sizeof(struct vm_activate_cpu)); 1563 error = vcpu_ioctl(vcpu, VM_ACTIVATE_CPU, &ac); 1564 return (error); 1565 } 1566 1567 int 1568 vm_suspend_all_cpus(struct vmctx *ctx) 1569 { 1570 struct vm_activate_cpu ac; 1571 int error; 1572 1573 bzero(&ac, sizeof(struct vm_activate_cpu)); 1574 ac.vcpuid = -1; 1575 error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac); 1576 return (error); 1577 } 1578 1579 int 1580 vm_suspend_cpu(struct vcpu *vcpu) 1581 { 1582 struct vm_activate_cpu ac; 1583 int error; 1584 1585 bzero(&ac, sizeof(struct vm_activate_cpu)); 1586 error = vcpu_ioctl(vcpu, VM_SUSPEND_CPU, &ac); 1587 return (error); 1588 } 1589 1590 int 1591 vm_resume_cpu(struct vcpu *vcpu) 1592 { 1593 struct vm_activate_cpu ac; 1594 int error; 1595 1596 bzero(&ac, sizeof(struct vm_activate_cpu)); 1597 error = vcpu_ioctl(vcpu, VM_RESUME_CPU, &ac); 1598 return (error); 1599 } 1600 1601 int 1602 vm_resume_all_cpus(struct vmctx *ctx) 1603 { 1604 struct vm_activate_cpu ac; 1605 int error; 1606 1607 bzero(&ac, sizeof(struct vm_activate_cpu)); 1608 ac.vcpuid = -1; 1609 error = ioctl(ctx->fd, VM_RESUME_CPU, &ac); 1610 return (error); 1611 } 1612 1613 int 1614 vm_get_intinfo(struct vcpu *vcpu, uint64_t *info1, uint64_t *info2) 1615 { 1616 struct vm_intinfo vmii; 1617 int error; 1618 1619 bzero(&vmii, sizeof(struct vm_intinfo)); 1620 error = vcpu_ioctl(vcpu, VM_GET_INTINFO, &vmii); 1621 if (error == 0) { 1622 *info1 = vmii.info1; 1623 *info2 = vmii.info2; 1624 } 1625 return (error); 1626 } 1627 1628 int 1629 vm_set_intinfo(struct vcpu *vcpu, uint64_t info1) 1630 { 1631 struct vm_intinfo vmii; 1632 int error; 1633 1634 bzero(&vmii, sizeof(struct vm_intinfo)); 1635 vmii.info1 = info1; 1636 error = vcpu_ioctl(vcpu, VM_SET_INTINFO, &vmii); 1637 return (error); 1638 } 1639 1640 int 1641 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) 1642 { 1643 struct vm_rtc_data rtcdata; 1644 int error; 1645 1646 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1647 rtcdata.offset = offset; 1648 rtcdata.value = value; 1649 error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); 1650 return (error); 1651 } 1652 1653 int 1654 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) 1655 { 1656 struct vm_rtc_data rtcdata; 1657 int error; 1658 1659 bzero(&rtcdata, sizeof(struct vm_rtc_data)); 1660 rtcdata.offset = offset; 1661 error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); 1662 if (error == 0) 1663 *retval = rtcdata.value; 1664 return (error); 1665 } 1666 1667 int 1668 vm_rtc_settime(struct vmctx *ctx, time_t secs) 1669 { 1670 struct vm_rtc_time rtctime; 1671 int error; 1672 1673 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1674 rtctime.secs = secs; 1675 error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); 1676 return (error); 1677 } 1678 1679 int 1680 vm_rtc_gettime(struct vmctx *ctx, time_t *secs) 1681 { 1682 struct vm_rtc_time rtctime; 1683 int error; 1684 1685 bzero(&rtctime, sizeof(struct vm_rtc_time)); 1686 error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); 1687 if (error == 0) 1688 *secs = rtctime.secs; 1689 return (error); 1690 } 1691 1692 int 1693 vm_restart_instruction(struct vcpu *vcpu) 1694 { 1695 int arg; 1696 1697 return (vcpu_ioctl(vcpu, VM_RESTART_INSTRUCTION, &arg)); 1698 } 1699 1700 int 1701 vm_snapshot_req(struct vmctx *ctx, struct vm_snapshot_meta *meta) 1702 { 1703 1704 if (ioctl(ctx->fd, VM_SNAPSHOT_REQ, meta) == -1) { 1705 #ifdef SNAPSHOT_DEBUG 1706 fprintf(stderr, "%s: snapshot failed for %s: %d\r\n", 1707 __func__, meta->dev_name, errno); 1708 #endif 1709 return (-1); 1710 } 1711 return (0); 1712 } 1713 1714 int 1715 vm_restore_time(struct vmctx *ctx) 1716 { 1717 int dummy; 1718 1719 dummy = 0; 1720 return (ioctl(ctx->fd, VM_RESTORE_TIME, &dummy)); 1721 } 1722 1723 int 1724 vm_set_topology(struct vmctx *ctx, 1725 uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus) 1726 { 1727 struct vm_cpu_topology topology; 1728 1729 bzero(&topology, sizeof (struct vm_cpu_topology)); 1730 topology.sockets = sockets; 1731 topology.cores = cores; 1732 topology.threads = threads; 1733 topology.maxcpus = maxcpus; 1734 return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology)); 1735 } 1736 1737 int 1738 vm_get_topology(struct vmctx *ctx, 1739 uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus) 1740 { 1741 struct vm_cpu_topology topology; 1742 int error; 1743 1744 bzero(&topology, sizeof (struct vm_cpu_topology)); 1745 error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology); 1746 if (error == 0) { 1747 *sockets = topology.sockets; 1748 *cores = topology.cores; 1749 *threads = topology.threads; 1750 *maxcpus = topology.maxcpus; 1751 } 1752 return (error); 1753 } 1754 1755 /* Keep in sync with machine/vmm_dev.h. */ 1756 static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT, 1757 VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG, 1758 VM_MMAP_GETNEXT, VM_MUNMAP_MEMSEG, VM_SET_REGISTER, VM_GET_REGISTER, 1759 VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR, 1760 VM_SET_REGISTER_SET, VM_GET_REGISTER_SET, 1761 VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV, 1762 VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ, 1763 VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ, 1764 VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ, 1765 VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER, 1766 VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV, 1767 VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI, 1768 VM_PPTDEV_MSIX, VM_UNMAP_PPTDEV_MMIO, VM_PPTDEV_DISABLE_MSIX, 1769 VM_INJECT_NMI, VM_STATS, VM_STAT_DESC, 1770 VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE, 1771 VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA, 1772 VM_GLA2GPA_NOFAULT, 1773 VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, 1774 VM_SET_INTINFO, VM_GET_INTINFO, 1775 VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, 1776 VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, 1777 VM_SNAPSHOT_REQ, VM_RESTORE_TIME 1778 }; 1779 1780 int 1781 vm_limit_rights(struct vmctx *ctx) 1782 { 1783 cap_rights_t rights; 1784 size_t ncmds; 1785 1786 cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); 1787 if (caph_rights_limit(ctx->fd, &rights) != 0) 1788 return (-1); 1789 ncmds = nitems(vm_ioctl_cmds); 1790 if (caph_ioctls_limit(ctx->fd, vm_ioctl_cmds, ncmds) != 0) 1791 return (-1); 1792 return (0); 1793 } 1794 1795 /* 1796 * Avoid using in new code. Operations on the fd should be wrapped here so that 1797 * capability rights can be kept in sync. 1798 */ 1799 int 1800 vm_get_device_fd(struct vmctx *ctx) 1801 { 1802 1803 return (ctx->fd); 1804 } 1805 1806 /* Legacy interface, do not use. */ 1807 const cap_ioctl_t * 1808 vm_get_ioctls(size_t *len) 1809 { 1810 cap_ioctl_t *cmds; 1811 1812 if (len == NULL) { 1813 cmds = malloc(sizeof(vm_ioctl_cmds)); 1814 if (cmds == NULL) 1815 return (NULL); 1816 bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds)); 1817 return (cmds); 1818 } 1819 1820 *len = nitems(vm_ioctl_cmds); 1821 return (NULL); 1822 } 1823