1 /*- 2 * Copyright (c) 2012 Sandvine, Inc. 3 * Copyright (c) 2012 NetApp, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #ifdef _KERNEL 34 #include <sys/param.h> 35 #include <sys/pcpu.h> 36 #include <sys/systm.h> 37 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/pmap.h> 42 #include <machine/vmparam.h> 43 #include <machine/vmm.h> 44 #else /* !_KERNEL */ 45 #include <sys/types.h> 46 #include <sys/errno.h> 47 48 #include <machine/vmm.h> 49 50 #include <vmmapi.h> 51 #endif /* _KERNEL */ 52 53 enum cpu_mode { 54 CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 55 CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 56 }; 57 58 /* struct vie_op.op_type */ 59 enum { 60 VIE_OP_TYPE_NONE = 0, 61 VIE_OP_TYPE_MOV, 62 VIE_OP_TYPE_AND, 63 VIE_OP_TYPE_OR, 64 VIE_OP_TYPE_LAST 65 }; 66 67 /* struct vie_op.op_flags */ 68 #define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ 69 #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 70 71 static const struct vie_op one_byte_opcodes[256] = { 72 [0x88] = { 73 .op_byte = 0x88, 74 .op_type = VIE_OP_TYPE_MOV, 75 }, 76 [0x89] = { 77 .op_byte = 0x89, 78 .op_type = VIE_OP_TYPE_MOV, 79 }, 80 [0x8B] = { 81 .op_byte = 0x8B, 82 .op_type = VIE_OP_TYPE_MOV, 83 }, 84 [0xC7] = { 85 .op_byte = 0xC7, 86 .op_type = VIE_OP_TYPE_MOV, 87 .op_flags = VIE_OP_F_IMM, 88 }, 89 [0x23] = { 90 .op_byte = 0x23, 91 .op_type = VIE_OP_TYPE_AND, 92 }, 93 [0x81] = { 94 /* XXX Group 1 extended opcode - not just AND */ 95 .op_byte = 0x81, 96 .op_type = VIE_OP_TYPE_AND, 97 .op_flags = VIE_OP_F_IMM, 98 }, 99 [0x83] = { 100 /* XXX Group 1 extended opcode - not just OR */ 101 .op_byte = 0x83, 102 .op_type = VIE_OP_TYPE_OR, 103 .op_flags = VIE_OP_F_IMM8, 104 }, 105 }; 106 107 /* struct vie.mod */ 108 #define VIE_MOD_INDIRECT 0 109 #define VIE_MOD_INDIRECT_DISP8 1 110 #define VIE_MOD_INDIRECT_DISP32 2 111 #define VIE_MOD_DIRECT 3 112 113 /* struct vie.rm */ 114 #define VIE_RM_SIB 4 115 #define VIE_RM_DISP32 5 116 117 #define GB (1024 * 1024 * 1024) 118 119 static enum vm_reg_name gpr_map[16] = { 120 VM_REG_GUEST_RAX, 121 VM_REG_GUEST_RCX, 122 VM_REG_GUEST_RDX, 123 VM_REG_GUEST_RBX, 124 VM_REG_GUEST_RSP, 125 VM_REG_GUEST_RBP, 126 VM_REG_GUEST_RSI, 127 VM_REG_GUEST_RDI, 128 VM_REG_GUEST_R8, 129 VM_REG_GUEST_R9, 130 VM_REG_GUEST_R10, 131 VM_REG_GUEST_R11, 132 VM_REG_GUEST_R12, 133 VM_REG_GUEST_R13, 134 VM_REG_GUEST_R14, 135 VM_REG_GUEST_R15 136 }; 137 138 static uint64_t size2mask[] = { 139 [1] = 0xff, 140 [2] = 0xffff, 141 [4] = 0xffffffff, 142 [8] = 0xffffffffffffffff, 143 }; 144 145 static int 146 vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 147 { 148 int error; 149 150 error = vm_get_register(vm, vcpuid, reg, rval); 151 152 return (error); 153 } 154 155 static int 156 vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 157 { 158 uint64_t val; 159 int error, rshift; 160 enum vm_reg_name reg; 161 162 rshift = 0; 163 reg = gpr_map[vie->reg]; 164 165 /* 166 * 64-bit mode imposes limitations on accessing legacy byte registers. 167 * 168 * The legacy high-byte registers cannot be addressed if the REX 169 * prefix is present. In this case the values 4, 5, 6 and 7 of the 170 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 171 * 172 * If the REX prefix is not present then the values 4, 5, 6 and 7 173 * of the 'ModRM:reg' field address the legacy high-byte registers, 174 * %ah, %ch, %dh and %bh respectively. 175 */ 176 if (!vie->rex_present) { 177 if (vie->reg & 0x4) { 178 /* 179 * Obtain the value of %ah by reading %rax and shifting 180 * right by 8 bits (same for %bh, %ch and %dh). 181 */ 182 rshift = 8; 183 reg = gpr_map[vie->reg & 0x3]; 184 } 185 } 186 187 error = vm_get_register(vm, vcpuid, reg, &val); 188 *rval = val >> rshift; 189 return (error); 190 } 191 192 static int 193 vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 194 uint64_t val, int size) 195 { 196 int error; 197 uint64_t origval; 198 199 switch (size) { 200 case 1: 201 case 2: 202 error = vie_read_register(vm, vcpuid, reg, &origval); 203 if (error) 204 return (error); 205 val &= size2mask[size]; 206 val |= origval & ~size2mask[size]; 207 break; 208 case 4: 209 val &= 0xffffffffUL; 210 break; 211 case 8: 212 break; 213 default: 214 return (EINVAL); 215 } 216 217 error = vm_set_register(vm, vcpuid, reg, val); 218 return (error); 219 } 220 221 /* 222 * The following simplifying assumptions are made during emulation: 223 * 224 * - guest is in 64-bit mode 225 * - default address size is 64-bits 226 * - default operand size is 32-bits 227 * 228 * - operand size override is not supported 229 * 230 * - address size override is not supported 231 */ 232 static int 233 emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 234 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 235 { 236 int error, size; 237 enum vm_reg_name reg; 238 uint8_t byte; 239 uint64_t val; 240 241 size = 4; 242 error = EINVAL; 243 244 switch (vie->op.op_byte) { 245 case 0x88: 246 /* 247 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 248 * 88/r: mov r/m8, r8 249 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 250 */ 251 size = 1; 252 error = vie_read_bytereg(vm, vcpuid, vie, &byte); 253 if (error == 0) 254 error = memwrite(vm, vcpuid, gpa, byte, size, arg); 255 break; 256 case 0x89: 257 /* 258 * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 259 * 89/r: mov r/m32, r32 260 * REX.W + 89/r mov r/m64, r64 261 */ 262 if (vie->rex_w) 263 size = 8; 264 reg = gpr_map[vie->reg]; 265 error = vie_read_register(vm, vcpuid, reg, &val); 266 if (error == 0) { 267 val &= size2mask[size]; 268 error = memwrite(vm, vcpuid, gpa, val, size, arg); 269 } 270 break; 271 case 0x8B: 272 /* 273 * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 274 * 8B/r: mov r32, r/m32 275 * REX.W 8B/r: mov r64, r/m64 276 */ 277 if (vie->rex_w) 278 size = 8; 279 error = memread(vm, vcpuid, gpa, &val, size, arg); 280 if (error == 0) { 281 reg = gpr_map[vie->reg]; 282 error = vie_update_register(vm, vcpuid, reg, val, size); 283 } 284 break; 285 case 0xC7: 286 /* 287 * MOV from imm32 to mem (ModRM:r/m) 288 * C7/0 mov r/m32, imm32 289 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 290 */ 291 val = vie->immediate; /* already sign-extended */ 292 293 if (vie->rex_w) 294 size = 8; 295 296 if (size != 8) 297 val &= size2mask[size]; 298 299 error = memwrite(vm, vcpuid, gpa, val, size, arg); 300 break; 301 default: 302 break; 303 } 304 305 return (error); 306 } 307 308 static int 309 emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 310 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 311 { 312 int error, size; 313 enum vm_reg_name reg; 314 uint64_t val1, val2; 315 316 size = 4; 317 error = EINVAL; 318 319 switch (vie->op.op_byte) { 320 case 0x23: 321 /* 322 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 323 * result in reg. 324 * 325 * 23/r and r32, r/m32 326 * REX.W + 23/r and r64, r/m64 327 */ 328 if (vie->rex_w) 329 size = 8; 330 331 /* get the first operand */ 332 reg = gpr_map[vie->reg]; 333 error = vie_read_register(vm, vcpuid, reg, &val1); 334 if (error) 335 break; 336 337 /* get the second operand */ 338 error = memread(vm, vcpuid, gpa, &val2, size, arg); 339 if (error) 340 break; 341 342 /* perform the operation and write the result */ 343 val1 &= val2; 344 error = vie_update_register(vm, vcpuid, reg, val1, size); 345 break; 346 case 0x81: 347 /* 348 * AND mem (ModRM:r/m) with immediate and store the 349 * result in mem. 350 * 351 * 81/ and r/m32, imm32 352 * REX.W + 81/ and r/m64, imm32 sign-extended to 64 353 * 354 * Currently, only the AND operation of the 0x81 opcode 355 * is implemented (ModRM:reg = b100). 356 */ 357 if ((vie->reg & 7) != 4) 358 break; 359 360 if (vie->rex_w) 361 size = 8; 362 363 /* get the first operand */ 364 error = memread(vm, vcpuid, gpa, &val1, size, arg); 365 if (error) 366 break; 367 368 /* 369 * perform the operation with the pre-fetched immediate 370 * operand and write the result 371 */ 372 val1 &= vie->immediate; 373 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 374 break; 375 default: 376 break; 377 } 378 return (error); 379 } 380 381 static int 382 emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 383 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 384 { 385 int error, size; 386 uint64_t val1; 387 388 size = 4; 389 error = EINVAL; 390 391 switch (vie->op.op_byte) { 392 case 0x83: 393 /* 394 * OR mem (ModRM:r/m) with immediate and store the 395 * result in mem. 396 * 397 * 83/ OR r/m32, imm8 sign-extended to 32 398 * REX.W + 83/ OR r/m64, imm8 sign-extended to 64 399 * 400 * Currently, only the OR operation of the 0x83 opcode 401 * is implemented (ModRM:reg = b001). 402 */ 403 if ((vie->reg & 7) != 1) 404 break; 405 406 if (vie->rex_w) 407 size = 8; 408 409 /* get the first operand */ 410 error = memread(vm, vcpuid, gpa, &val1, size, arg); 411 if (error) 412 break; 413 414 /* 415 * perform the operation with the pre-fetched immediate 416 * operand and write the result 417 */ 418 val1 |= vie->immediate; 419 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 420 break; 421 default: 422 break; 423 } 424 return (error); 425 } 426 427 int 428 vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 429 mem_region_read_t memread, mem_region_write_t memwrite, 430 void *memarg) 431 { 432 int error; 433 434 if (!vie->decoded) 435 return (EINVAL); 436 437 switch (vie->op.op_type) { 438 case VIE_OP_TYPE_MOV: 439 error = emulate_mov(vm, vcpuid, gpa, vie, 440 memread, memwrite, memarg); 441 break; 442 case VIE_OP_TYPE_AND: 443 error = emulate_and(vm, vcpuid, gpa, vie, 444 memread, memwrite, memarg); 445 break; 446 case VIE_OP_TYPE_OR: 447 error = emulate_or(vm, vcpuid, gpa, vie, 448 memread, memwrite, memarg); 449 break; 450 default: 451 error = EINVAL; 452 break; 453 } 454 455 return (error); 456 } 457 458 #ifdef _KERNEL 459 static void 460 vie_init(struct vie *vie) 461 { 462 463 bzero(vie, sizeof(struct vie)); 464 465 vie->base_register = VM_REG_LAST; 466 vie->index_register = VM_REG_LAST; 467 } 468 469 static int 470 gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, 471 uint64_t *gpa, uint64_t *gpaend) 472 { 473 vm_paddr_t hpa; 474 int nlevels, ptpshift, ptpindex; 475 uint64_t *ptpbase, pte, pgsize; 476 477 /* 478 * XXX assumes 64-bit guest with 4 page walk levels 479 */ 480 nlevels = 4; 481 while (--nlevels >= 0) { 482 /* Zero out the lower 12 bits and the upper 12 bits */ 483 ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 484 485 hpa = vm_gpa2hpa(vm, ptpphys, PAGE_SIZE); 486 if (hpa == -1) 487 goto error; 488 489 ptpbase = (uint64_t *)PHYS_TO_DMAP(hpa); 490 491 ptpshift = PAGE_SHIFT + nlevels * 9; 492 ptpindex = (gla >> ptpshift) & 0x1FF; 493 pgsize = 1UL << ptpshift; 494 495 pte = ptpbase[ptpindex]; 496 497 if ((pte & PG_V) == 0) 498 goto error; 499 500 if (pte & PG_PS) { 501 if (pgsize > 1 * GB) 502 goto error; 503 else 504 break; 505 } 506 507 ptpphys = pte; 508 } 509 510 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 511 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 512 *gpa = pte | (gla & (pgsize - 1)); 513 *gpaend = pte + pgsize; 514 return (0); 515 516 error: 517 return (-1); 518 } 519 520 int 521 vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, 522 uint64_t cr3, struct vie *vie) 523 { 524 int n, err; 525 uint64_t hpa, gpa, gpaend, off; 526 527 /* 528 * XXX cache previously fetched instructions using 'rip' as the tag 529 */ 530 531 if (inst_length > VIE_INST_SIZE) 532 panic("vmm_fetch_instruction: invalid length %d", inst_length); 533 534 vie_init(vie); 535 536 /* Copy the instruction into 'vie' */ 537 while (vie->num_valid < inst_length) { 538 err = gla2gpa(vm, rip, cr3, &gpa, &gpaend); 539 if (err) 540 break; 541 542 off = gpa & PAGE_MASK; 543 n = min(inst_length - vie->num_valid, PAGE_SIZE - off); 544 545 hpa = vm_gpa2hpa(vm, gpa, n); 546 if (hpa == -1) 547 break; 548 549 bcopy((void *)PHYS_TO_DMAP(hpa), &vie->inst[vie->num_valid], n); 550 551 rip += n; 552 vie->num_valid += n; 553 } 554 555 if (vie->num_valid == inst_length) 556 return (0); 557 else 558 return (-1); 559 } 560 561 static int 562 vie_peek(struct vie *vie, uint8_t *x) 563 { 564 565 if (vie->num_processed < vie->num_valid) { 566 *x = vie->inst[vie->num_processed]; 567 return (0); 568 } else 569 return (-1); 570 } 571 572 static void 573 vie_advance(struct vie *vie) 574 { 575 576 vie->num_processed++; 577 } 578 579 static int 580 decode_rex(struct vie *vie) 581 { 582 uint8_t x; 583 584 if (vie_peek(vie, &x)) 585 return (-1); 586 587 if (x >= 0x40 && x <= 0x4F) { 588 vie->rex_present = 1; 589 590 vie->rex_w = x & 0x8 ? 1 : 0; 591 vie->rex_r = x & 0x4 ? 1 : 0; 592 vie->rex_x = x & 0x2 ? 1 : 0; 593 vie->rex_b = x & 0x1 ? 1 : 0; 594 595 vie_advance(vie); 596 } 597 598 return (0); 599 } 600 601 static int 602 decode_opcode(struct vie *vie) 603 { 604 uint8_t x; 605 606 if (vie_peek(vie, &x)) 607 return (-1); 608 609 vie->op = one_byte_opcodes[x]; 610 611 if (vie->op.op_type == VIE_OP_TYPE_NONE) 612 return (-1); 613 614 vie_advance(vie); 615 return (0); 616 } 617 618 static int 619 decode_modrm(struct vie *vie) 620 { 621 uint8_t x; 622 enum cpu_mode cpu_mode; 623 624 /* 625 * XXX assuming that guest is in IA-32E 64-bit mode 626 */ 627 cpu_mode = CPU_MODE_64BIT; 628 629 if (vie_peek(vie, &x)) 630 return (-1); 631 632 vie->mod = (x >> 6) & 0x3; 633 vie->rm = (x >> 0) & 0x7; 634 vie->reg = (x >> 3) & 0x7; 635 636 /* 637 * A direct addressing mode makes no sense in the context of an EPT 638 * fault. There has to be a memory access involved to cause the 639 * EPT fault. 640 */ 641 if (vie->mod == VIE_MOD_DIRECT) 642 return (-1); 643 644 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 645 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 646 /* 647 * Table 2-5: Special Cases of REX Encodings 648 * 649 * mod=0, r/m=5 is used in the compatibility mode to 650 * indicate a disp32 without a base register. 651 * 652 * mod!=3, r/m=4 is used in the compatibility mode to 653 * indicate that the SIB byte is present. 654 * 655 * The 'b' bit in the REX prefix is don't care in 656 * this case. 657 */ 658 } else { 659 vie->rm |= (vie->rex_b << 3); 660 } 661 662 vie->reg |= (vie->rex_r << 3); 663 664 /* SIB */ 665 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 666 goto done; 667 668 vie->base_register = gpr_map[vie->rm]; 669 670 switch (vie->mod) { 671 case VIE_MOD_INDIRECT_DISP8: 672 vie->disp_bytes = 1; 673 break; 674 case VIE_MOD_INDIRECT_DISP32: 675 vie->disp_bytes = 4; 676 break; 677 case VIE_MOD_INDIRECT: 678 if (vie->rm == VIE_RM_DISP32) { 679 vie->disp_bytes = 4; 680 /* 681 * Table 2-7. RIP-Relative Addressing 682 * 683 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 684 * whereas in compatibility mode it just implies disp32. 685 */ 686 687 if (cpu_mode == CPU_MODE_64BIT) 688 vie->base_register = VM_REG_GUEST_RIP; 689 else 690 vie->base_register = VM_REG_LAST; 691 692 } 693 break; 694 } 695 696 /* Figure out immediate operand size (if any) */ 697 if (vie->op.op_flags & VIE_OP_F_IMM) 698 vie->imm_bytes = 4; 699 else if (vie->op.op_flags & VIE_OP_F_IMM8) 700 vie->imm_bytes = 1; 701 702 done: 703 vie_advance(vie); 704 705 return (0); 706 } 707 708 static int 709 decode_sib(struct vie *vie) 710 { 711 uint8_t x; 712 713 /* Proceed only if SIB byte is present */ 714 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 715 return (0); 716 717 if (vie_peek(vie, &x)) 718 return (-1); 719 720 /* De-construct the SIB byte */ 721 vie->ss = (x >> 6) & 0x3; 722 vie->index = (x >> 3) & 0x7; 723 vie->base = (x >> 0) & 0x7; 724 725 /* Apply the REX prefix modifiers */ 726 vie->index |= vie->rex_x << 3; 727 vie->base |= vie->rex_b << 3; 728 729 switch (vie->mod) { 730 case VIE_MOD_INDIRECT_DISP8: 731 vie->disp_bytes = 1; 732 break; 733 case VIE_MOD_INDIRECT_DISP32: 734 vie->disp_bytes = 4; 735 break; 736 } 737 738 if (vie->mod == VIE_MOD_INDIRECT && 739 (vie->base == 5 || vie->base == 13)) { 740 /* 741 * Special case when base register is unused if mod = 0 742 * and base = %rbp or %r13. 743 * 744 * Documented in: 745 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 746 * Table 2-5: Special Cases of REX Encodings 747 */ 748 vie->disp_bytes = 4; 749 } else { 750 vie->base_register = gpr_map[vie->base]; 751 } 752 753 /* 754 * All encodings of 'index' are valid except for %rsp (4). 755 * 756 * Documented in: 757 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 758 * Table 2-5: Special Cases of REX Encodings 759 */ 760 if (vie->index != 4) 761 vie->index_register = gpr_map[vie->index]; 762 763 /* 'scale' makes sense only in the context of an index register */ 764 if (vie->index_register < VM_REG_LAST) 765 vie->scale = 1 << vie->ss; 766 767 vie_advance(vie); 768 769 return (0); 770 } 771 772 static int 773 decode_displacement(struct vie *vie) 774 { 775 int n, i; 776 uint8_t x; 777 778 union { 779 char buf[4]; 780 int8_t signed8; 781 int32_t signed32; 782 } u; 783 784 if ((n = vie->disp_bytes) == 0) 785 return (0); 786 787 if (n != 1 && n != 4) 788 panic("decode_displacement: invalid disp_bytes %d", n); 789 790 for (i = 0; i < n; i++) { 791 if (vie_peek(vie, &x)) 792 return (-1); 793 794 u.buf[i] = x; 795 vie_advance(vie); 796 } 797 798 if (n == 1) 799 vie->displacement = u.signed8; /* sign-extended */ 800 else 801 vie->displacement = u.signed32; /* sign-extended */ 802 803 return (0); 804 } 805 806 static int 807 decode_immediate(struct vie *vie) 808 { 809 int i, n; 810 uint8_t x; 811 union { 812 char buf[4]; 813 int8_t signed8; 814 int32_t signed32; 815 } u; 816 817 if ((n = vie->imm_bytes) == 0) 818 return (0); 819 820 if (n != 1 && n != 4) 821 panic("decode_immediate: invalid imm_bytes %d", n); 822 823 for (i = 0; i < n; i++) { 824 if (vie_peek(vie, &x)) 825 return (-1); 826 827 u.buf[i] = x; 828 vie_advance(vie); 829 } 830 831 if (n == 1) 832 vie->immediate = u.signed8; /* sign-extended */ 833 else 834 vie->immediate = u.signed32; /* sign-extended */ 835 836 return (0); 837 } 838 839 /* 840 * Verify that all the bytes in the instruction buffer were consumed. 841 */ 842 static int 843 verify_inst_length(struct vie *vie) 844 { 845 846 if (vie->num_processed == vie->num_valid) 847 return (0); 848 else 849 return (-1); 850 } 851 852 /* 853 * Verify that the 'guest linear address' provided as collateral of the nested 854 * page table fault matches with our instruction decoding. 855 */ 856 static int 857 verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 858 { 859 int error; 860 uint64_t base, idx; 861 862 /* Skip 'gla' verification */ 863 if (gla == VIE_INVALID_GLA) 864 return (0); 865 866 base = 0; 867 if (vie->base_register != VM_REG_LAST) { 868 error = vm_get_register(vm, cpuid, vie->base_register, &base); 869 if (error) { 870 printf("verify_gla: error %d getting base reg %d\n", 871 error, vie->base_register); 872 return (-1); 873 } 874 875 /* 876 * RIP-relative addressing starts from the following 877 * instruction 878 */ 879 if (vie->base_register == VM_REG_GUEST_RIP) 880 base += vie->num_valid; 881 } 882 883 idx = 0; 884 if (vie->index_register != VM_REG_LAST) { 885 error = vm_get_register(vm, cpuid, vie->index_register, &idx); 886 if (error) { 887 printf("verify_gla: error %d getting index reg %d\n", 888 error, vie->index_register); 889 return (-1); 890 } 891 } 892 893 if (base + vie->scale * idx + vie->displacement != gla) { 894 printf("verify_gla mismatch: " 895 "base(0x%0lx), scale(%d), index(0x%0lx), " 896 "disp(0x%0lx), gla(0x%0lx)\n", 897 base, vie->scale, idx, vie->displacement, gla); 898 return (-1); 899 } 900 901 return (0); 902 } 903 904 int 905 vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 906 { 907 908 if (decode_rex(vie)) 909 return (-1); 910 911 if (decode_opcode(vie)) 912 return (-1); 913 914 if (decode_modrm(vie)) 915 return (-1); 916 917 if (decode_sib(vie)) 918 return (-1); 919 920 if (decode_displacement(vie)) 921 return (-1); 922 923 if (decode_immediate(vie)) 924 return (-1); 925 926 if (verify_inst_length(vie)) 927 return (-1); 928 929 if (verify_gla(vm, cpuid, gla, vie)) 930 return (-1); 931 932 vie->decoded = 1; /* success */ 933 934 return (0); 935 } 936 #endif /* _KERNEL */ 937