1 /*- 2 * Copyright (c) 2012 Sandvine, Inc. 3 * Copyright (c) 2012 NetApp, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #ifdef _KERNEL 34 #include <sys/param.h> 35 #include <sys/pcpu.h> 36 #include <sys/systm.h> 37 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/vmparam.h> 42 #include <machine/vmm.h> 43 #else /* !_KERNEL */ 44 #include <sys/types.h> 45 #include <sys/errno.h> 46 47 #include <machine/vmm.h> 48 49 #include <vmmapi.h> 50 #endif /* _KERNEL */ 51 52 /* struct vie_op.op_type */ 53 enum { 54 VIE_OP_TYPE_NONE = 0, 55 VIE_OP_TYPE_MOV, 56 VIE_OP_TYPE_MOVSX, 57 VIE_OP_TYPE_MOVZX, 58 VIE_OP_TYPE_AND, 59 VIE_OP_TYPE_OR, 60 VIE_OP_TYPE_TWO_BYTE, 61 VIE_OP_TYPE_LAST 62 }; 63 64 /* struct vie_op.op_flags */ 65 #define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ 66 #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 67 68 static const struct vie_op two_byte_opcodes[256] = { 69 [0xB6] = { 70 .op_byte = 0xB6, 71 .op_type = VIE_OP_TYPE_MOVZX, 72 }, 73 [0xBE] = { 74 .op_byte = 0xBE, 75 .op_type = VIE_OP_TYPE_MOVSX, 76 }, 77 }; 78 79 static const struct vie_op one_byte_opcodes[256] = { 80 [0x0F] = { 81 .op_byte = 0x0F, 82 .op_type = VIE_OP_TYPE_TWO_BYTE 83 }, 84 [0x88] = { 85 .op_byte = 0x88, 86 .op_type = VIE_OP_TYPE_MOV, 87 }, 88 [0x89] = { 89 .op_byte = 0x89, 90 .op_type = VIE_OP_TYPE_MOV, 91 }, 92 [0x8A] = { 93 .op_byte = 0x8A, 94 .op_type = VIE_OP_TYPE_MOV, 95 }, 96 [0x8B] = { 97 .op_byte = 0x8B, 98 .op_type = VIE_OP_TYPE_MOV, 99 }, 100 [0xC7] = { 101 .op_byte = 0xC7, 102 .op_type = VIE_OP_TYPE_MOV, 103 .op_flags = VIE_OP_F_IMM, 104 }, 105 [0x23] = { 106 .op_byte = 0x23, 107 .op_type = VIE_OP_TYPE_AND, 108 }, 109 [0x81] = { 110 /* XXX Group 1 extended opcode - not just AND */ 111 .op_byte = 0x81, 112 .op_type = VIE_OP_TYPE_AND, 113 .op_flags = VIE_OP_F_IMM, 114 }, 115 [0x83] = { 116 /* XXX Group 1 extended opcode - not just OR */ 117 .op_byte = 0x83, 118 .op_type = VIE_OP_TYPE_OR, 119 .op_flags = VIE_OP_F_IMM8, 120 }, 121 }; 122 123 /* struct vie.mod */ 124 #define VIE_MOD_INDIRECT 0 125 #define VIE_MOD_INDIRECT_DISP8 1 126 #define VIE_MOD_INDIRECT_DISP32 2 127 #define VIE_MOD_DIRECT 3 128 129 /* struct vie.rm */ 130 #define VIE_RM_SIB 4 131 #define VIE_RM_DISP32 5 132 133 #define GB (1024 * 1024 * 1024) 134 135 static enum vm_reg_name gpr_map[16] = { 136 VM_REG_GUEST_RAX, 137 VM_REG_GUEST_RCX, 138 VM_REG_GUEST_RDX, 139 VM_REG_GUEST_RBX, 140 VM_REG_GUEST_RSP, 141 VM_REG_GUEST_RBP, 142 VM_REG_GUEST_RSI, 143 VM_REG_GUEST_RDI, 144 VM_REG_GUEST_R8, 145 VM_REG_GUEST_R9, 146 VM_REG_GUEST_R10, 147 VM_REG_GUEST_R11, 148 VM_REG_GUEST_R12, 149 VM_REG_GUEST_R13, 150 VM_REG_GUEST_R14, 151 VM_REG_GUEST_R15 152 }; 153 154 static uint64_t size2mask[] = { 155 [1] = 0xff, 156 [2] = 0xffff, 157 [4] = 0xffffffff, 158 [8] = 0xffffffffffffffff, 159 }; 160 161 static int 162 vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 163 { 164 int error; 165 166 error = vm_get_register(vm, vcpuid, reg, rval); 167 168 return (error); 169 } 170 171 static int 172 vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 173 { 174 uint64_t val; 175 int error, rshift; 176 enum vm_reg_name reg; 177 178 rshift = 0; 179 reg = gpr_map[vie->reg]; 180 181 /* 182 * 64-bit mode imposes limitations on accessing legacy byte registers. 183 * 184 * The legacy high-byte registers cannot be addressed if the REX 185 * prefix is present. In this case the values 4, 5, 6 and 7 of the 186 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 187 * 188 * If the REX prefix is not present then the values 4, 5, 6 and 7 189 * of the 'ModRM:reg' field address the legacy high-byte registers, 190 * %ah, %ch, %dh and %bh respectively. 191 */ 192 if (!vie->rex_present) { 193 if (vie->reg & 0x4) { 194 /* 195 * Obtain the value of %ah by reading %rax and shifting 196 * right by 8 bits (same for %bh, %ch and %dh). 197 */ 198 rshift = 8; 199 reg = gpr_map[vie->reg & 0x3]; 200 } 201 } 202 203 error = vm_get_register(vm, vcpuid, reg, &val); 204 *rval = val >> rshift; 205 return (error); 206 } 207 208 static int 209 vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 210 uint64_t val, int size) 211 { 212 int error; 213 uint64_t origval; 214 215 switch (size) { 216 case 1: 217 case 2: 218 error = vie_read_register(vm, vcpuid, reg, &origval); 219 if (error) 220 return (error); 221 val &= size2mask[size]; 222 val |= origval & ~size2mask[size]; 223 break; 224 case 4: 225 val &= 0xffffffffUL; 226 break; 227 case 8: 228 break; 229 default: 230 return (EINVAL); 231 } 232 233 error = vm_set_register(vm, vcpuid, reg, val); 234 return (error); 235 } 236 237 /* 238 * The following simplifying assumptions are made during emulation: 239 * 240 * - guest is in 64-bit mode 241 * - default address size is 64-bits 242 * - default operand size is 32-bits 243 * 244 * - operand size override is not supported 245 * 246 * - address size override is not supported 247 */ 248 static int 249 emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 250 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 251 { 252 int error, size; 253 enum vm_reg_name reg; 254 uint8_t byte; 255 uint64_t val; 256 257 size = 4; 258 error = EINVAL; 259 260 switch (vie->op.op_byte) { 261 case 0x88: 262 /* 263 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 264 * 88/r: mov r/m8, r8 265 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 266 */ 267 size = 1; 268 error = vie_read_bytereg(vm, vcpuid, vie, &byte); 269 if (error == 0) 270 error = memwrite(vm, vcpuid, gpa, byte, size, arg); 271 break; 272 case 0x89: 273 /* 274 * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 275 * 89/r: mov r/m32, r32 276 * REX.W + 89/r mov r/m64, r64 277 */ 278 if (vie->rex_w) 279 size = 8; 280 reg = gpr_map[vie->reg]; 281 error = vie_read_register(vm, vcpuid, reg, &val); 282 if (error == 0) { 283 val &= size2mask[size]; 284 error = memwrite(vm, vcpuid, gpa, val, size, arg); 285 } 286 break; 287 case 0x8A: 288 case 0x8B: 289 /* 290 * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 291 * 8A/r: mov r/m8, r8 292 * REX + 8A/r: mov r/m8, r8 293 * 8B/r: mov r32, r/m32 294 * REX.W 8B/r: mov r64, r/m64 295 */ 296 if (vie->op.op_byte == 0x8A) 297 size = 1; 298 else if (vie->rex_w) 299 size = 8; 300 error = memread(vm, vcpuid, gpa, &val, size, arg); 301 if (error == 0) { 302 reg = gpr_map[vie->reg]; 303 error = vie_update_register(vm, vcpuid, reg, val, size); 304 } 305 break; 306 case 0xC7: 307 /* 308 * MOV from imm32 to mem (ModRM:r/m) 309 * C7/0 mov r/m32, imm32 310 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 311 */ 312 val = vie->immediate; /* already sign-extended */ 313 314 if (vie->rex_w) 315 size = 8; 316 317 if (size != 8) 318 val &= size2mask[size]; 319 320 error = memwrite(vm, vcpuid, gpa, val, size, arg); 321 break; 322 default: 323 break; 324 } 325 326 return (error); 327 } 328 329 /* 330 * The following simplifying assumptions are made during emulation: 331 * 332 * - guest is in 64-bit mode 333 * - default address size is 64-bits 334 * - default operand size is 32-bits 335 * 336 * - operand size override is not supported 337 * 338 * - address size override is not supported 339 */ 340 static int 341 emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 342 mem_region_read_t memread, mem_region_write_t memwrite, 343 void *arg) 344 { 345 int error, size; 346 enum vm_reg_name reg; 347 uint64_t val; 348 349 size = 4; 350 error = EINVAL; 351 352 switch (vie->op.op_byte) { 353 case 0xB6: 354 /* 355 * MOV and zero extend byte from mem (ModRM:r/m) to 356 * reg (ModRM:reg). 357 * 358 * 0F B6/r movzx r/m8, r32 359 * REX.W + 0F B6/r movzx r/m8, r64 360 */ 361 362 /* get the first operand */ 363 error = memread(vm, vcpuid, gpa, &val, 1, arg); 364 if (error) 365 break; 366 367 /* get the second operand */ 368 reg = gpr_map[vie->reg]; 369 370 if (vie->rex_w) 371 size = 8; 372 373 /* write the result */ 374 error = vie_update_register(vm, vcpuid, reg, val, size); 375 break; 376 case 0xBE: 377 /* 378 * MOV and sign extend byte from mem (ModRM:r/m) to 379 * reg (ModRM:reg). 380 * 381 * 0F BE/r movsx r/m8, r32 382 * REX.W + 0F BE/r movsx r/m8, r64 383 */ 384 385 /* get the first operand */ 386 error = memread(vm, vcpuid, gpa, &val, 1, arg); 387 if (error) 388 break; 389 390 /* get the second operand */ 391 reg = gpr_map[vie->reg]; 392 393 if (vie->rex_w) 394 size = 8; 395 396 /* sign extend byte */ 397 val = (int8_t)val; 398 399 /* write the result */ 400 error = vie_update_register(vm, vcpuid, reg, val, size); 401 break; 402 default: 403 break; 404 } 405 return (error); 406 } 407 408 static int 409 emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 410 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 411 { 412 int error, size; 413 enum vm_reg_name reg; 414 uint64_t val1, val2; 415 416 size = 4; 417 error = EINVAL; 418 419 switch (vie->op.op_byte) { 420 case 0x23: 421 /* 422 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 423 * result in reg. 424 * 425 * 23/r and r32, r/m32 426 * REX.W + 23/r and r64, r/m64 427 */ 428 if (vie->rex_w) 429 size = 8; 430 431 /* get the first operand */ 432 reg = gpr_map[vie->reg]; 433 error = vie_read_register(vm, vcpuid, reg, &val1); 434 if (error) 435 break; 436 437 /* get the second operand */ 438 error = memread(vm, vcpuid, gpa, &val2, size, arg); 439 if (error) 440 break; 441 442 /* perform the operation and write the result */ 443 val1 &= val2; 444 error = vie_update_register(vm, vcpuid, reg, val1, size); 445 break; 446 case 0x81: 447 /* 448 * AND mem (ModRM:r/m) with immediate and store the 449 * result in mem. 450 * 451 * 81/ and r/m32, imm32 452 * REX.W + 81/ and r/m64, imm32 sign-extended to 64 453 * 454 * Currently, only the AND operation of the 0x81 opcode 455 * is implemented (ModRM:reg = b100). 456 */ 457 if ((vie->reg & 7) != 4) 458 break; 459 460 if (vie->rex_w) 461 size = 8; 462 463 /* get the first operand */ 464 error = memread(vm, vcpuid, gpa, &val1, size, arg); 465 if (error) 466 break; 467 468 /* 469 * perform the operation with the pre-fetched immediate 470 * operand and write the result 471 */ 472 val1 &= vie->immediate; 473 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 474 break; 475 default: 476 break; 477 } 478 return (error); 479 } 480 481 static int 482 emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 483 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 484 { 485 int error, size; 486 uint64_t val1; 487 488 size = 4; 489 error = EINVAL; 490 491 switch (vie->op.op_byte) { 492 case 0x83: 493 /* 494 * OR mem (ModRM:r/m) with immediate and store the 495 * result in mem. 496 * 497 * 83/ OR r/m32, imm8 sign-extended to 32 498 * REX.W + 83/ OR r/m64, imm8 sign-extended to 64 499 * 500 * Currently, only the OR operation of the 0x83 opcode 501 * is implemented (ModRM:reg = b001). 502 */ 503 if ((vie->reg & 7) != 1) 504 break; 505 506 if (vie->rex_w) 507 size = 8; 508 509 /* get the first operand */ 510 error = memread(vm, vcpuid, gpa, &val1, size, arg); 511 if (error) 512 break; 513 514 /* 515 * perform the operation with the pre-fetched immediate 516 * operand and write the result 517 */ 518 val1 |= vie->immediate; 519 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 520 break; 521 default: 522 break; 523 } 524 return (error); 525 } 526 527 int 528 vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 529 mem_region_read_t memread, mem_region_write_t memwrite, 530 void *memarg) 531 { 532 int error; 533 534 if (!vie->decoded) 535 return (EINVAL); 536 537 switch (vie->op.op_type) { 538 case VIE_OP_TYPE_MOV: 539 error = emulate_mov(vm, vcpuid, gpa, vie, 540 memread, memwrite, memarg); 541 break; 542 case VIE_OP_TYPE_MOVSX: 543 case VIE_OP_TYPE_MOVZX: 544 error = emulate_movx(vm, vcpuid, gpa, vie, 545 memread, memwrite, memarg); 546 break; 547 case VIE_OP_TYPE_AND: 548 error = emulate_and(vm, vcpuid, gpa, vie, 549 memread, memwrite, memarg); 550 break; 551 case VIE_OP_TYPE_OR: 552 error = emulate_or(vm, vcpuid, gpa, vie, 553 memread, memwrite, memarg); 554 break; 555 default: 556 error = EINVAL; 557 break; 558 } 559 560 return (error); 561 } 562 563 #ifdef _KERNEL 564 void 565 vie_init(struct vie *vie) 566 { 567 568 bzero(vie, sizeof(struct vie)); 569 570 vie->base_register = VM_REG_LAST; 571 vie->index_register = VM_REG_LAST; 572 } 573 574 static int 575 gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, uint64_t *gpa, 576 enum vie_paging_mode paging_mode, int cpl) 577 { 578 int nlevels, ptpshift, ptpindex, usermode; 579 uint64_t *ptpbase, pte, pgsize; 580 uint32_t *ptpbase32, pte32; 581 void *cookie; 582 583 usermode = (cpl == 3 ? 1 : 0); 584 585 if (paging_mode == PAGING_MODE_FLAT) { 586 *gpa = gla; 587 return (0); 588 } 589 590 if (paging_mode == PAGING_MODE_32) { 591 nlevels = 2; 592 while (--nlevels >= 0) { 593 /* Zero out the lower 12 bits. */ 594 ptpphys &= ~0xfff; 595 596 ptpbase32 = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, 597 VM_PROT_READ, &cookie); 598 599 if (ptpbase32 == NULL) 600 goto error; 601 602 ptpshift = PAGE_SHIFT + nlevels * 10; 603 ptpindex = (gla >> ptpshift) & 0x3FF; 604 pgsize = 1UL << ptpshift; 605 606 pte32 = ptpbase32[ptpindex]; 607 608 vm_gpa_release(cookie); 609 610 if ((pte32 & PG_V) == 0) 611 goto error; 612 613 if (usermode && (pte32 & PG_U) == 0) 614 goto error; 615 616 /* XXX must be ignored if CR4.PSE=0 */ 617 if (nlevels > 0 && (pte32 & PG_PS) != 0) 618 break; 619 620 ptpphys = pte32; 621 } 622 623 /* Zero out the lower 'ptpshift' bits */ 624 pte32 >>= ptpshift; pte32 <<= ptpshift; 625 *gpa = pte32 | (gla & (pgsize - 1)); 626 return (0); 627 } 628 629 if (paging_mode == PAGING_MODE_PAE) { 630 /* Zero out the lower 5 bits and the upper 32 bits */ 631 ptpphys &= 0xffffffe0UL; 632 633 ptpbase = vm_gpa_hold(vm, ptpphys, sizeof(*ptpbase) * 4, 634 VM_PROT_READ, &cookie); 635 if (ptpbase == NULL) 636 goto error; 637 638 ptpindex = (gla >> 30) & 0x3; 639 640 pte = ptpbase[ptpindex]; 641 642 vm_gpa_release(cookie); 643 644 if ((pte & PG_V) == 0) 645 goto error; 646 647 ptpphys = pte; 648 649 nlevels = 2; 650 } else 651 nlevels = 4; 652 while (--nlevels >= 0) { 653 /* Zero out the lower 12 bits and the upper 12 bits */ 654 ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 655 656 ptpbase = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, VM_PROT_READ, 657 &cookie); 658 if (ptpbase == NULL) 659 goto error; 660 661 ptpshift = PAGE_SHIFT + nlevels * 9; 662 ptpindex = (gla >> ptpshift) & 0x1FF; 663 pgsize = 1UL << ptpshift; 664 665 pte = ptpbase[ptpindex]; 666 667 vm_gpa_release(cookie); 668 669 if ((pte & PG_V) == 0) 670 goto error; 671 672 if (usermode && (pte & PG_U) == 0) 673 goto error; 674 675 if (nlevels > 0 && (pte & PG_PS) != 0) { 676 if (pgsize > 1 * GB) 677 goto error; 678 else 679 break; 680 } 681 682 ptpphys = pte; 683 } 684 685 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 686 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 687 *gpa = pte | (gla & (pgsize - 1)); 688 return (0); 689 690 error: 691 return (-1); 692 } 693 694 int 695 vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, 696 uint64_t cr3, enum vie_paging_mode paging_mode, int cpl, 697 struct vie *vie) 698 { 699 int n, err, prot; 700 uint64_t gpa, off; 701 void *hpa, *cookie; 702 703 /* 704 * XXX cache previously fetched instructions using 'rip' as the tag 705 */ 706 707 prot = VM_PROT_READ | VM_PROT_EXECUTE; 708 if (inst_length > VIE_INST_SIZE) 709 panic("vmm_fetch_instruction: invalid length %d", inst_length); 710 711 /* Copy the instruction into 'vie' */ 712 while (vie->num_valid < inst_length) { 713 err = gla2gpa(vm, rip, cr3, &gpa, paging_mode, cpl); 714 if (err) 715 break; 716 717 off = gpa & PAGE_MASK; 718 n = min(inst_length - vie->num_valid, PAGE_SIZE - off); 719 720 if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) 721 break; 722 723 bcopy(hpa, &vie->inst[vie->num_valid], n); 724 725 vm_gpa_release(cookie); 726 727 rip += n; 728 vie->num_valid += n; 729 } 730 731 if (vie->num_valid == inst_length) 732 return (0); 733 else 734 return (-1); 735 } 736 737 static int 738 vie_peek(struct vie *vie, uint8_t *x) 739 { 740 741 if (vie->num_processed < vie->num_valid) { 742 *x = vie->inst[vie->num_processed]; 743 return (0); 744 } else 745 return (-1); 746 } 747 748 static void 749 vie_advance(struct vie *vie) 750 { 751 752 vie->num_processed++; 753 } 754 755 static int 756 decode_rex(struct vie *vie) 757 { 758 uint8_t x; 759 760 if (vie_peek(vie, &x)) 761 return (-1); 762 763 if (x >= 0x40 && x <= 0x4F) { 764 vie->rex_present = 1; 765 766 vie->rex_w = x & 0x8 ? 1 : 0; 767 vie->rex_r = x & 0x4 ? 1 : 0; 768 vie->rex_x = x & 0x2 ? 1 : 0; 769 vie->rex_b = x & 0x1 ? 1 : 0; 770 771 vie_advance(vie); 772 } 773 774 return (0); 775 } 776 777 static int 778 decode_two_byte_opcode(struct vie *vie) 779 { 780 uint8_t x; 781 782 if (vie_peek(vie, &x)) 783 return (-1); 784 785 vie->op = two_byte_opcodes[x]; 786 787 if (vie->op.op_type == VIE_OP_TYPE_NONE) 788 return (-1); 789 790 vie_advance(vie); 791 return (0); 792 } 793 794 static int 795 decode_opcode(struct vie *vie) 796 { 797 uint8_t x; 798 799 if (vie_peek(vie, &x)) 800 return (-1); 801 802 vie->op = one_byte_opcodes[x]; 803 804 if (vie->op.op_type == VIE_OP_TYPE_NONE) 805 return (-1); 806 807 vie_advance(vie); 808 809 if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 810 return (decode_two_byte_opcode(vie)); 811 812 return (0); 813 } 814 815 static int 816 decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode) 817 { 818 uint8_t x; 819 820 if (vie_peek(vie, &x)) 821 return (-1); 822 823 vie->mod = (x >> 6) & 0x3; 824 vie->rm = (x >> 0) & 0x7; 825 vie->reg = (x >> 3) & 0x7; 826 827 /* 828 * A direct addressing mode makes no sense in the context of an EPT 829 * fault. There has to be a memory access involved to cause the 830 * EPT fault. 831 */ 832 if (vie->mod == VIE_MOD_DIRECT) 833 return (-1); 834 835 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 836 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 837 /* 838 * Table 2-5: Special Cases of REX Encodings 839 * 840 * mod=0, r/m=5 is used in the compatibility mode to 841 * indicate a disp32 without a base register. 842 * 843 * mod!=3, r/m=4 is used in the compatibility mode to 844 * indicate that the SIB byte is present. 845 * 846 * The 'b' bit in the REX prefix is don't care in 847 * this case. 848 */ 849 } else { 850 vie->rm |= (vie->rex_b << 3); 851 } 852 853 vie->reg |= (vie->rex_r << 3); 854 855 /* SIB */ 856 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 857 goto done; 858 859 vie->base_register = gpr_map[vie->rm]; 860 861 switch (vie->mod) { 862 case VIE_MOD_INDIRECT_DISP8: 863 vie->disp_bytes = 1; 864 break; 865 case VIE_MOD_INDIRECT_DISP32: 866 vie->disp_bytes = 4; 867 break; 868 case VIE_MOD_INDIRECT: 869 if (vie->rm == VIE_RM_DISP32) { 870 vie->disp_bytes = 4; 871 /* 872 * Table 2-7. RIP-Relative Addressing 873 * 874 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 875 * whereas in compatibility mode it just implies disp32. 876 */ 877 878 if (cpu_mode == CPU_MODE_64BIT) 879 vie->base_register = VM_REG_GUEST_RIP; 880 else 881 vie->base_register = VM_REG_LAST; 882 } 883 break; 884 } 885 886 done: 887 vie_advance(vie); 888 889 return (0); 890 } 891 892 static int 893 decode_sib(struct vie *vie) 894 { 895 uint8_t x; 896 897 /* Proceed only if SIB byte is present */ 898 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 899 return (0); 900 901 if (vie_peek(vie, &x)) 902 return (-1); 903 904 /* De-construct the SIB byte */ 905 vie->ss = (x >> 6) & 0x3; 906 vie->index = (x >> 3) & 0x7; 907 vie->base = (x >> 0) & 0x7; 908 909 /* Apply the REX prefix modifiers */ 910 vie->index |= vie->rex_x << 3; 911 vie->base |= vie->rex_b << 3; 912 913 switch (vie->mod) { 914 case VIE_MOD_INDIRECT_DISP8: 915 vie->disp_bytes = 1; 916 break; 917 case VIE_MOD_INDIRECT_DISP32: 918 vie->disp_bytes = 4; 919 break; 920 } 921 922 if (vie->mod == VIE_MOD_INDIRECT && 923 (vie->base == 5 || vie->base == 13)) { 924 /* 925 * Special case when base register is unused if mod = 0 926 * and base = %rbp or %r13. 927 * 928 * Documented in: 929 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 930 * Table 2-5: Special Cases of REX Encodings 931 */ 932 vie->disp_bytes = 4; 933 } else { 934 vie->base_register = gpr_map[vie->base]; 935 } 936 937 /* 938 * All encodings of 'index' are valid except for %rsp (4). 939 * 940 * Documented in: 941 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 942 * Table 2-5: Special Cases of REX Encodings 943 */ 944 if (vie->index != 4) 945 vie->index_register = gpr_map[vie->index]; 946 947 /* 'scale' makes sense only in the context of an index register */ 948 if (vie->index_register < VM_REG_LAST) 949 vie->scale = 1 << vie->ss; 950 951 vie_advance(vie); 952 953 return (0); 954 } 955 956 static int 957 decode_displacement(struct vie *vie) 958 { 959 int n, i; 960 uint8_t x; 961 962 union { 963 char buf[4]; 964 int8_t signed8; 965 int32_t signed32; 966 } u; 967 968 if ((n = vie->disp_bytes) == 0) 969 return (0); 970 971 if (n != 1 && n != 4) 972 panic("decode_displacement: invalid disp_bytes %d", n); 973 974 for (i = 0; i < n; i++) { 975 if (vie_peek(vie, &x)) 976 return (-1); 977 978 u.buf[i] = x; 979 vie_advance(vie); 980 } 981 982 if (n == 1) 983 vie->displacement = u.signed8; /* sign-extended */ 984 else 985 vie->displacement = u.signed32; /* sign-extended */ 986 987 return (0); 988 } 989 990 static int 991 decode_immediate(struct vie *vie) 992 { 993 int i, n; 994 uint8_t x; 995 union { 996 char buf[4]; 997 int8_t signed8; 998 int32_t signed32; 999 } u; 1000 1001 /* Figure out immediate operand size (if any) */ 1002 if (vie->op.op_flags & VIE_OP_F_IMM) 1003 vie->imm_bytes = 4; 1004 else if (vie->op.op_flags & VIE_OP_F_IMM8) 1005 vie->imm_bytes = 1; 1006 1007 if ((n = vie->imm_bytes) == 0) 1008 return (0); 1009 1010 if (n != 1 && n != 4) 1011 panic("decode_immediate: invalid imm_bytes %d", n); 1012 1013 for (i = 0; i < n; i++) { 1014 if (vie_peek(vie, &x)) 1015 return (-1); 1016 1017 u.buf[i] = x; 1018 vie_advance(vie); 1019 } 1020 1021 if (n == 1) 1022 vie->immediate = u.signed8; /* sign-extended */ 1023 else 1024 vie->immediate = u.signed32; /* sign-extended */ 1025 1026 return (0); 1027 } 1028 1029 /* 1030 * Verify that all the bytes in the instruction buffer were consumed. 1031 */ 1032 static int 1033 verify_inst_length(struct vie *vie) 1034 { 1035 1036 if (vie->num_processed == vie->num_valid) 1037 return (0); 1038 else 1039 return (-1); 1040 } 1041 1042 /* 1043 * Verify that the 'guest linear address' provided as collateral of the nested 1044 * page table fault matches with our instruction decoding. 1045 */ 1046 static int 1047 verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 1048 { 1049 int error; 1050 uint64_t base, idx; 1051 1052 /* Skip 'gla' verification */ 1053 if (gla == VIE_INVALID_GLA) 1054 return (0); 1055 1056 base = 0; 1057 if (vie->base_register != VM_REG_LAST) { 1058 error = vm_get_register(vm, cpuid, vie->base_register, &base); 1059 if (error) { 1060 printf("verify_gla: error %d getting base reg %d\n", 1061 error, vie->base_register); 1062 return (-1); 1063 } 1064 1065 /* 1066 * RIP-relative addressing starts from the following 1067 * instruction 1068 */ 1069 if (vie->base_register == VM_REG_GUEST_RIP) 1070 base += vie->num_valid; 1071 } 1072 1073 idx = 0; 1074 if (vie->index_register != VM_REG_LAST) { 1075 error = vm_get_register(vm, cpuid, vie->index_register, &idx); 1076 if (error) { 1077 printf("verify_gla: error %d getting index reg %d\n", 1078 error, vie->index_register); 1079 return (-1); 1080 } 1081 } 1082 1083 if (base + vie->scale * idx + vie->displacement != gla) { 1084 printf("verify_gla mismatch: " 1085 "base(0x%0lx), scale(%d), index(0x%0lx), " 1086 "disp(0x%0lx), gla(0x%0lx)\n", 1087 base, vie->scale, idx, vie->displacement, gla); 1088 return (-1); 1089 } 1090 1091 return (0); 1092 } 1093 1094 int 1095 vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, 1096 enum vie_cpu_mode cpu_mode, struct vie *vie) 1097 { 1098 1099 if (cpu_mode == CPU_MODE_64BIT) { 1100 if (decode_rex(vie)) 1101 return (-1); 1102 } 1103 1104 if (decode_opcode(vie)) 1105 return (-1); 1106 1107 if (decode_modrm(vie, cpu_mode)) 1108 return (-1); 1109 1110 if (decode_sib(vie)) 1111 return (-1); 1112 1113 if (decode_displacement(vie)) 1114 return (-1); 1115 1116 if (decode_immediate(vie)) 1117 return (-1); 1118 1119 if (verify_inst_length(vie)) 1120 return (-1); 1121 1122 if (verify_gla(vm, cpuid, gla, vie)) 1123 return (-1); 1124 1125 vie->decoded = 1; /* success */ 1126 1127 return (0); 1128 } 1129 #endif /* _KERNEL */ 1130