1 /*- 2 * Copyright (c) 2012 Sandvine, Inc. 3 * Copyright (c) 2012 NetApp, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #ifdef _KERNEL 34 #include <sys/param.h> 35 #include <sys/pcpu.h> 36 #include <sys/systm.h> 37 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/vmparam.h> 42 #include <machine/vmm.h> 43 #else /* !_KERNEL */ 44 #include <sys/types.h> 45 #include <sys/errno.h> 46 47 #include <machine/vmm.h> 48 49 #include <vmmapi.h> 50 #endif /* _KERNEL */ 51 52 /* struct vie_op.op_type */ 53 enum { 54 VIE_OP_TYPE_NONE = 0, 55 VIE_OP_TYPE_MOV, 56 VIE_OP_TYPE_MOVZX, 57 VIE_OP_TYPE_AND, 58 VIE_OP_TYPE_OR, 59 VIE_OP_TYPE_TWO_BYTE, 60 VIE_OP_TYPE_LAST 61 }; 62 63 /* struct vie_op.op_flags */ 64 #define VIE_OP_F_IMM (1 << 0) /* immediate operand present */ 65 #define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */ 66 67 static const struct vie_op two_byte_opcodes[256] = { 68 [0xB6] = { 69 .op_byte = 0xB6, 70 .op_type = VIE_OP_TYPE_MOVZX, 71 }, 72 }; 73 74 static const struct vie_op one_byte_opcodes[256] = { 75 [0x0F] = { 76 .op_byte = 0x0F, 77 .op_type = VIE_OP_TYPE_TWO_BYTE 78 }, 79 [0x88] = { 80 .op_byte = 0x88, 81 .op_type = VIE_OP_TYPE_MOV, 82 }, 83 [0x89] = { 84 .op_byte = 0x89, 85 .op_type = VIE_OP_TYPE_MOV, 86 }, 87 [0x8A] = { 88 .op_byte = 0x8A, 89 .op_type = VIE_OP_TYPE_MOV, 90 }, 91 [0x8B] = { 92 .op_byte = 0x8B, 93 .op_type = VIE_OP_TYPE_MOV, 94 }, 95 [0xC7] = { 96 .op_byte = 0xC7, 97 .op_type = VIE_OP_TYPE_MOV, 98 .op_flags = VIE_OP_F_IMM, 99 }, 100 [0x23] = { 101 .op_byte = 0x23, 102 .op_type = VIE_OP_TYPE_AND, 103 }, 104 [0x81] = { 105 /* XXX Group 1 extended opcode - not just AND */ 106 .op_byte = 0x81, 107 .op_type = VIE_OP_TYPE_AND, 108 .op_flags = VIE_OP_F_IMM, 109 }, 110 [0x83] = { 111 /* XXX Group 1 extended opcode - not just OR */ 112 .op_byte = 0x83, 113 .op_type = VIE_OP_TYPE_OR, 114 .op_flags = VIE_OP_F_IMM8, 115 }, 116 }; 117 118 /* struct vie.mod */ 119 #define VIE_MOD_INDIRECT 0 120 #define VIE_MOD_INDIRECT_DISP8 1 121 #define VIE_MOD_INDIRECT_DISP32 2 122 #define VIE_MOD_DIRECT 3 123 124 /* struct vie.rm */ 125 #define VIE_RM_SIB 4 126 #define VIE_RM_DISP32 5 127 128 #define GB (1024 * 1024 * 1024) 129 130 static enum vm_reg_name gpr_map[16] = { 131 VM_REG_GUEST_RAX, 132 VM_REG_GUEST_RCX, 133 VM_REG_GUEST_RDX, 134 VM_REG_GUEST_RBX, 135 VM_REG_GUEST_RSP, 136 VM_REG_GUEST_RBP, 137 VM_REG_GUEST_RSI, 138 VM_REG_GUEST_RDI, 139 VM_REG_GUEST_R8, 140 VM_REG_GUEST_R9, 141 VM_REG_GUEST_R10, 142 VM_REG_GUEST_R11, 143 VM_REG_GUEST_R12, 144 VM_REG_GUEST_R13, 145 VM_REG_GUEST_R14, 146 VM_REG_GUEST_R15 147 }; 148 149 static uint64_t size2mask[] = { 150 [1] = 0xff, 151 [2] = 0xffff, 152 [4] = 0xffffffff, 153 [8] = 0xffffffffffffffff, 154 }; 155 156 static int 157 vie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval) 158 { 159 int error; 160 161 error = vm_get_register(vm, vcpuid, reg, rval); 162 163 return (error); 164 } 165 166 static int 167 vie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval) 168 { 169 uint64_t val; 170 int error, rshift; 171 enum vm_reg_name reg; 172 173 rshift = 0; 174 reg = gpr_map[vie->reg]; 175 176 /* 177 * 64-bit mode imposes limitations on accessing legacy byte registers. 178 * 179 * The legacy high-byte registers cannot be addressed if the REX 180 * prefix is present. In this case the values 4, 5, 6 and 7 of the 181 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively. 182 * 183 * If the REX prefix is not present then the values 4, 5, 6 and 7 184 * of the 'ModRM:reg' field address the legacy high-byte registers, 185 * %ah, %ch, %dh and %bh respectively. 186 */ 187 if (!vie->rex_present) { 188 if (vie->reg & 0x4) { 189 /* 190 * Obtain the value of %ah by reading %rax and shifting 191 * right by 8 bits (same for %bh, %ch and %dh). 192 */ 193 rshift = 8; 194 reg = gpr_map[vie->reg & 0x3]; 195 } 196 } 197 198 error = vm_get_register(vm, vcpuid, reg, &val); 199 *rval = val >> rshift; 200 return (error); 201 } 202 203 static int 204 vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg, 205 uint64_t val, int size) 206 { 207 int error; 208 uint64_t origval; 209 210 switch (size) { 211 case 1: 212 case 2: 213 error = vie_read_register(vm, vcpuid, reg, &origval); 214 if (error) 215 return (error); 216 val &= size2mask[size]; 217 val |= origval & ~size2mask[size]; 218 break; 219 case 4: 220 val &= 0xffffffffUL; 221 break; 222 case 8: 223 break; 224 default: 225 return (EINVAL); 226 } 227 228 error = vm_set_register(vm, vcpuid, reg, val); 229 return (error); 230 } 231 232 /* 233 * The following simplifying assumptions are made during emulation: 234 * 235 * - guest is in 64-bit mode 236 * - default address size is 64-bits 237 * - default operand size is 32-bits 238 * 239 * - operand size override is not supported 240 * 241 * - address size override is not supported 242 */ 243 static int 244 emulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 245 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 246 { 247 int error, size; 248 enum vm_reg_name reg; 249 uint8_t byte; 250 uint64_t val; 251 252 size = 4; 253 error = EINVAL; 254 255 switch (vie->op.op_byte) { 256 case 0x88: 257 /* 258 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m) 259 * 88/r: mov r/m8, r8 260 * REX + 88/r: mov r/m8, r8 (%ah, %ch, %dh, %bh not available) 261 */ 262 size = 1; 263 error = vie_read_bytereg(vm, vcpuid, vie, &byte); 264 if (error == 0) 265 error = memwrite(vm, vcpuid, gpa, byte, size, arg); 266 break; 267 case 0x89: 268 /* 269 * MOV from reg (ModRM:reg) to mem (ModRM:r/m) 270 * 89/r: mov r/m32, r32 271 * REX.W + 89/r mov r/m64, r64 272 */ 273 if (vie->rex_w) 274 size = 8; 275 reg = gpr_map[vie->reg]; 276 error = vie_read_register(vm, vcpuid, reg, &val); 277 if (error == 0) { 278 val &= size2mask[size]; 279 error = memwrite(vm, vcpuid, gpa, val, size, arg); 280 } 281 break; 282 case 0x8A: 283 case 0x8B: 284 /* 285 * MOV from mem (ModRM:r/m) to reg (ModRM:reg) 286 * 8A/r: mov r/m8, r8 287 * REX + 8A/r: mov r/m8, r8 288 * 8B/r: mov r32, r/m32 289 * REX.W 8B/r: mov r64, r/m64 290 */ 291 if (vie->op.op_byte == 0x8A) 292 size = 1; 293 else if (vie->rex_w) 294 size = 8; 295 error = memread(vm, vcpuid, gpa, &val, size, arg); 296 if (error == 0) { 297 reg = gpr_map[vie->reg]; 298 error = vie_update_register(vm, vcpuid, reg, val, size); 299 } 300 break; 301 case 0xC7: 302 /* 303 * MOV from imm32 to mem (ModRM:r/m) 304 * C7/0 mov r/m32, imm32 305 * REX.W + C7/0 mov r/m64, imm32 (sign-extended to 64-bits) 306 */ 307 val = vie->immediate; /* already sign-extended */ 308 309 if (vie->rex_w) 310 size = 8; 311 312 if (size != 8) 313 val &= size2mask[size]; 314 315 error = memwrite(vm, vcpuid, gpa, val, size, arg); 316 break; 317 default: 318 break; 319 } 320 321 return (error); 322 } 323 324 /* 325 * The following simplifying assumptions are made during emulation: 326 * 327 * - guest is in 64-bit mode 328 * - default address size is 64-bits 329 * - default operand size is 32-bits 330 * 331 * - operand size override is not supported 332 * 333 * - address size override is not supported 334 */ 335 static int 336 emulate_movzx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 337 mem_region_read_t memread, mem_region_write_t memwrite, 338 void *arg) 339 { 340 int error, size; 341 enum vm_reg_name reg; 342 uint64_t val; 343 344 size = 4; 345 error = EINVAL; 346 347 switch (vie->op.op_byte) { 348 case 0xB6: 349 /* 350 * MOV and zero extend byte from mem (ModRM:r/m) to 351 * reg (ModRM:reg). 352 * 353 * 0F B6/r movzx r/m8, r32 354 * REX.W + 0F B6/r movzx r/m8, r64 355 */ 356 357 /* get the first operand */ 358 error = memread(vm, vcpuid, gpa, &val, 1, arg); 359 if (error) 360 break; 361 362 /* get the second operand */ 363 reg = gpr_map[vie->reg]; 364 365 if (vie->rex_w) 366 size = 8; 367 368 /* write the result */ 369 error = vie_update_register(vm, vcpuid, reg, val, size); 370 break; 371 default: 372 break; 373 } 374 return (error); 375 } 376 377 static int 378 emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 379 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 380 { 381 int error, size; 382 enum vm_reg_name reg; 383 uint64_t val1, val2; 384 385 size = 4; 386 error = EINVAL; 387 388 switch (vie->op.op_byte) { 389 case 0x23: 390 /* 391 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the 392 * result in reg. 393 * 394 * 23/r and r32, r/m32 395 * REX.W + 23/r and r64, r/m64 396 */ 397 if (vie->rex_w) 398 size = 8; 399 400 /* get the first operand */ 401 reg = gpr_map[vie->reg]; 402 error = vie_read_register(vm, vcpuid, reg, &val1); 403 if (error) 404 break; 405 406 /* get the second operand */ 407 error = memread(vm, vcpuid, gpa, &val2, size, arg); 408 if (error) 409 break; 410 411 /* perform the operation and write the result */ 412 val1 &= val2; 413 error = vie_update_register(vm, vcpuid, reg, val1, size); 414 break; 415 case 0x81: 416 /* 417 * AND mem (ModRM:r/m) with immediate and store the 418 * result in mem. 419 * 420 * 81/ and r/m32, imm32 421 * REX.W + 81/ and r/m64, imm32 sign-extended to 64 422 * 423 * Currently, only the AND operation of the 0x81 opcode 424 * is implemented (ModRM:reg = b100). 425 */ 426 if ((vie->reg & 7) != 4) 427 break; 428 429 if (vie->rex_w) 430 size = 8; 431 432 /* get the first operand */ 433 error = memread(vm, vcpuid, gpa, &val1, size, arg); 434 if (error) 435 break; 436 437 /* 438 * perform the operation with the pre-fetched immediate 439 * operand and write the result 440 */ 441 val1 &= vie->immediate; 442 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 443 break; 444 default: 445 break; 446 } 447 return (error); 448 } 449 450 static int 451 emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 452 mem_region_read_t memread, mem_region_write_t memwrite, void *arg) 453 { 454 int error, size; 455 uint64_t val1; 456 457 size = 4; 458 error = EINVAL; 459 460 switch (vie->op.op_byte) { 461 case 0x83: 462 /* 463 * OR mem (ModRM:r/m) with immediate and store the 464 * result in mem. 465 * 466 * 83/ OR r/m32, imm8 sign-extended to 32 467 * REX.W + 83/ OR r/m64, imm8 sign-extended to 64 468 * 469 * Currently, only the OR operation of the 0x83 opcode 470 * is implemented (ModRM:reg = b001). 471 */ 472 if ((vie->reg & 7) != 1) 473 break; 474 475 if (vie->rex_w) 476 size = 8; 477 478 /* get the first operand */ 479 error = memread(vm, vcpuid, gpa, &val1, size, arg); 480 if (error) 481 break; 482 483 /* 484 * perform the operation with the pre-fetched immediate 485 * operand and write the result 486 */ 487 val1 |= vie->immediate; 488 error = memwrite(vm, vcpuid, gpa, val1, size, arg); 489 break; 490 default: 491 break; 492 } 493 return (error); 494 } 495 496 int 497 vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, 498 mem_region_read_t memread, mem_region_write_t memwrite, 499 void *memarg) 500 { 501 int error; 502 503 if (!vie->decoded) 504 return (EINVAL); 505 506 switch (vie->op.op_type) { 507 case VIE_OP_TYPE_MOV: 508 error = emulate_mov(vm, vcpuid, gpa, vie, 509 memread, memwrite, memarg); 510 break; 511 case VIE_OP_TYPE_MOVZX: 512 error = emulate_movzx(vm, vcpuid, gpa, vie, 513 memread, memwrite, memarg); 514 break; 515 case VIE_OP_TYPE_AND: 516 error = emulate_and(vm, vcpuid, gpa, vie, 517 memread, memwrite, memarg); 518 break; 519 case VIE_OP_TYPE_OR: 520 error = emulate_or(vm, vcpuid, gpa, vie, 521 memread, memwrite, memarg); 522 break; 523 default: 524 error = EINVAL; 525 break; 526 } 527 528 return (error); 529 } 530 531 #ifdef _KERNEL 532 void 533 vie_init(struct vie *vie) 534 { 535 536 bzero(vie, sizeof(struct vie)); 537 538 vie->base_register = VM_REG_LAST; 539 vie->index_register = VM_REG_LAST; 540 } 541 542 static int 543 gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys, 544 uint64_t *gpa, enum vie_paging_mode paging_mode) 545 { 546 int nlevels, ptpshift, ptpindex; 547 uint64_t *ptpbase, pte, pgsize; 548 uint32_t *ptpbase32, pte32; 549 void *cookie; 550 551 if (paging_mode == PAGING_MODE_FLAT) { 552 *gpa = gla; 553 return (0); 554 } 555 556 if (paging_mode == PAGING_MODE_32) { 557 nlevels = 2; 558 while (--nlevels >= 0) { 559 /* Zero out the lower 12 bits. */ 560 ptpphys &= ~0xfff; 561 562 ptpbase32 = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, 563 VM_PROT_READ, &cookie); 564 565 if (ptpbase32 == NULL) 566 goto error; 567 568 ptpshift = PAGE_SHIFT + nlevels * 10; 569 ptpindex = (gla >> ptpshift) & 0x3FF; 570 pgsize = 1UL << ptpshift; 571 572 pte32 = ptpbase32[ptpindex]; 573 574 vm_gpa_release(cookie); 575 576 if ((pte32 & PG_V) == 0) 577 goto error; 578 579 if (pte32 & PG_PS) 580 break; 581 582 ptpphys = pte32; 583 } 584 585 /* Zero out the lower 'ptpshift' bits */ 586 pte32 >>= ptpshift; pte32 <<= ptpshift; 587 *gpa = pte32 | (gla & (pgsize - 1)); 588 return (0); 589 } 590 591 if (paging_mode == PAGING_MODE_PAE) { 592 /* Zero out the lower 5 bits and the upper 12 bits */ 593 ptpphys >>= 5; ptpphys <<= 17; ptpphys >>= 12; 594 595 ptpbase = vm_gpa_hold(vm, ptpphys, sizeof(*ptpbase) * 4, 596 VM_PROT_READ, &cookie); 597 if (ptpbase == NULL) 598 goto error; 599 600 ptpindex = (gla >> 30) & 0x3; 601 602 pte = ptpbase[ptpindex]; 603 604 vm_gpa_release(cookie); 605 606 if ((pte & PG_V) == 0) 607 goto error; 608 609 ptpphys = pte; 610 611 nlevels = 2; 612 } else 613 nlevels = 4; 614 while (--nlevels >= 0) { 615 /* Zero out the lower 12 bits and the upper 12 bits */ 616 ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; 617 618 ptpbase = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, VM_PROT_READ, 619 &cookie); 620 if (ptpbase == NULL) 621 goto error; 622 623 ptpshift = PAGE_SHIFT + nlevels * 9; 624 ptpindex = (gla >> ptpshift) & 0x1FF; 625 pgsize = 1UL << ptpshift; 626 627 pte = ptpbase[ptpindex]; 628 629 vm_gpa_release(cookie); 630 631 if ((pte & PG_V) == 0) 632 goto error; 633 634 if (pte & PG_PS) { 635 if (pgsize > 1 * GB) 636 goto error; 637 else 638 break; 639 } 640 641 ptpphys = pte; 642 } 643 644 /* Zero out the lower 'ptpshift' bits and the upper 12 bits */ 645 pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12; 646 *gpa = pte | (gla & (pgsize - 1)); 647 return (0); 648 649 error: 650 return (-1); 651 } 652 653 int 654 vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, 655 uint64_t cr3, enum vie_paging_mode paging_mode, 656 struct vie *vie) 657 { 658 int n, err, prot; 659 uint64_t gpa, off; 660 void *hpa, *cookie; 661 662 /* 663 * XXX cache previously fetched instructions using 'rip' as the tag 664 */ 665 666 prot = VM_PROT_READ | VM_PROT_EXECUTE; 667 if (inst_length > VIE_INST_SIZE) 668 panic("vmm_fetch_instruction: invalid length %d", inst_length); 669 670 /* Copy the instruction into 'vie' */ 671 while (vie->num_valid < inst_length) { 672 err = gla2gpa(vm, rip, cr3, &gpa, paging_mode); 673 if (err) 674 break; 675 676 off = gpa & PAGE_MASK; 677 n = min(inst_length - vie->num_valid, PAGE_SIZE - off); 678 679 if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL) 680 break; 681 682 bcopy(hpa, &vie->inst[vie->num_valid], n); 683 684 vm_gpa_release(cookie); 685 686 rip += n; 687 vie->num_valid += n; 688 } 689 690 if (vie->num_valid == inst_length) 691 return (0); 692 else 693 return (-1); 694 } 695 696 static int 697 vie_peek(struct vie *vie, uint8_t *x) 698 { 699 700 if (vie->num_processed < vie->num_valid) { 701 *x = vie->inst[vie->num_processed]; 702 return (0); 703 } else 704 return (-1); 705 } 706 707 static void 708 vie_advance(struct vie *vie) 709 { 710 711 vie->num_processed++; 712 } 713 714 static int 715 decode_rex(struct vie *vie) 716 { 717 uint8_t x; 718 719 if (vie_peek(vie, &x)) 720 return (-1); 721 722 if (x >= 0x40 && x <= 0x4F) { 723 vie->rex_present = 1; 724 725 vie->rex_w = x & 0x8 ? 1 : 0; 726 vie->rex_r = x & 0x4 ? 1 : 0; 727 vie->rex_x = x & 0x2 ? 1 : 0; 728 vie->rex_b = x & 0x1 ? 1 : 0; 729 730 vie_advance(vie); 731 } 732 733 return (0); 734 } 735 736 static int 737 decode_two_byte_opcode(struct vie *vie) 738 { 739 uint8_t x; 740 741 if (vie_peek(vie, &x)) 742 return (-1); 743 744 vie->op = two_byte_opcodes[x]; 745 746 if (vie->op.op_type == VIE_OP_TYPE_NONE) 747 return (-1); 748 749 vie_advance(vie); 750 return (0); 751 } 752 753 static int 754 decode_opcode(struct vie *vie) 755 { 756 uint8_t x; 757 758 if (vie_peek(vie, &x)) 759 return (-1); 760 761 vie->op = one_byte_opcodes[x]; 762 763 if (vie->op.op_type == VIE_OP_TYPE_NONE) 764 return (-1); 765 766 vie_advance(vie); 767 768 if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE) 769 return (decode_two_byte_opcode(vie)); 770 771 return (0); 772 } 773 774 static int 775 decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode) 776 { 777 uint8_t x; 778 779 if (vie_peek(vie, &x)) 780 return (-1); 781 782 vie->mod = (x >> 6) & 0x3; 783 vie->rm = (x >> 0) & 0x7; 784 vie->reg = (x >> 3) & 0x7; 785 786 /* 787 * A direct addressing mode makes no sense in the context of an EPT 788 * fault. There has to be a memory access involved to cause the 789 * EPT fault. 790 */ 791 if (vie->mod == VIE_MOD_DIRECT) 792 return (-1); 793 794 if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) || 795 (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) { 796 /* 797 * Table 2-5: Special Cases of REX Encodings 798 * 799 * mod=0, r/m=5 is used in the compatibility mode to 800 * indicate a disp32 without a base register. 801 * 802 * mod!=3, r/m=4 is used in the compatibility mode to 803 * indicate that the SIB byte is present. 804 * 805 * The 'b' bit in the REX prefix is don't care in 806 * this case. 807 */ 808 } else { 809 vie->rm |= (vie->rex_b << 3); 810 } 811 812 vie->reg |= (vie->rex_r << 3); 813 814 /* SIB */ 815 if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB) 816 goto done; 817 818 vie->base_register = gpr_map[vie->rm]; 819 820 switch (vie->mod) { 821 case VIE_MOD_INDIRECT_DISP8: 822 vie->disp_bytes = 1; 823 break; 824 case VIE_MOD_INDIRECT_DISP32: 825 vie->disp_bytes = 4; 826 break; 827 case VIE_MOD_INDIRECT: 828 if (vie->rm == VIE_RM_DISP32) { 829 vie->disp_bytes = 4; 830 /* 831 * Table 2-7. RIP-Relative Addressing 832 * 833 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32 834 * whereas in compatibility mode it just implies disp32. 835 */ 836 837 if (cpu_mode == CPU_MODE_64BIT) 838 vie->base_register = VM_REG_GUEST_RIP; 839 else 840 vie->base_register = VM_REG_LAST; 841 } 842 break; 843 } 844 845 done: 846 vie_advance(vie); 847 848 return (0); 849 } 850 851 static int 852 decode_sib(struct vie *vie) 853 { 854 uint8_t x; 855 856 /* Proceed only if SIB byte is present */ 857 if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB) 858 return (0); 859 860 if (vie_peek(vie, &x)) 861 return (-1); 862 863 /* De-construct the SIB byte */ 864 vie->ss = (x >> 6) & 0x3; 865 vie->index = (x >> 3) & 0x7; 866 vie->base = (x >> 0) & 0x7; 867 868 /* Apply the REX prefix modifiers */ 869 vie->index |= vie->rex_x << 3; 870 vie->base |= vie->rex_b << 3; 871 872 switch (vie->mod) { 873 case VIE_MOD_INDIRECT_DISP8: 874 vie->disp_bytes = 1; 875 break; 876 case VIE_MOD_INDIRECT_DISP32: 877 vie->disp_bytes = 4; 878 break; 879 } 880 881 if (vie->mod == VIE_MOD_INDIRECT && 882 (vie->base == 5 || vie->base == 13)) { 883 /* 884 * Special case when base register is unused if mod = 0 885 * and base = %rbp or %r13. 886 * 887 * Documented in: 888 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 889 * Table 2-5: Special Cases of REX Encodings 890 */ 891 vie->disp_bytes = 4; 892 } else { 893 vie->base_register = gpr_map[vie->base]; 894 } 895 896 /* 897 * All encodings of 'index' are valid except for %rsp (4). 898 * 899 * Documented in: 900 * Table 2-3: 32-bit Addressing Forms with the SIB Byte 901 * Table 2-5: Special Cases of REX Encodings 902 */ 903 if (vie->index != 4) 904 vie->index_register = gpr_map[vie->index]; 905 906 /* 'scale' makes sense only in the context of an index register */ 907 if (vie->index_register < VM_REG_LAST) 908 vie->scale = 1 << vie->ss; 909 910 vie_advance(vie); 911 912 return (0); 913 } 914 915 static int 916 decode_displacement(struct vie *vie) 917 { 918 int n, i; 919 uint8_t x; 920 921 union { 922 char buf[4]; 923 int8_t signed8; 924 int32_t signed32; 925 } u; 926 927 if ((n = vie->disp_bytes) == 0) 928 return (0); 929 930 if (n != 1 && n != 4) 931 panic("decode_displacement: invalid disp_bytes %d", n); 932 933 for (i = 0; i < n; i++) { 934 if (vie_peek(vie, &x)) 935 return (-1); 936 937 u.buf[i] = x; 938 vie_advance(vie); 939 } 940 941 if (n == 1) 942 vie->displacement = u.signed8; /* sign-extended */ 943 else 944 vie->displacement = u.signed32; /* sign-extended */ 945 946 return (0); 947 } 948 949 static int 950 decode_immediate(struct vie *vie) 951 { 952 int i, n; 953 uint8_t x; 954 union { 955 char buf[4]; 956 int8_t signed8; 957 int32_t signed32; 958 } u; 959 960 /* Figure out immediate operand size (if any) */ 961 if (vie->op.op_flags & VIE_OP_F_IMM) 962 vie->imm_bytes = 4; 963 else if (vie->op.op_flags & VIE_OP_F_IMM8) 964 vie->imm_bytes = 1; 965 966 if ((n = vie->imm_bytes) == 0) 967 return (0); 968 969 if (n != 1 && n != 4) 970 panic("decode_immediate: invalid imm_bytes %d", n); 971 972 for (i = 0; i < n; i++) { 973 if (vie_peek(vie, &x)) 974 return (-1); 975 976 u.buf[i] = x; 977 vie_advance(vie); 978 } 979 980 if (n == 1) 981 vie->immediate = u.signed8; /* sign-extended */ 982 else 983 vie->immediate = u.signed32; /* sign-extended */ 984 985 return (0); 986 } 987 988 /* 989 * Verify that all the bytes in the instruction buffer were consumed. 990 */ 991 static int 992 verify_inst_length(struct vie *vie) 993 { 994 995 if (vie->num_processed == vie->num_valid) 996 return (0); 997 else 998 return (-1); 999 } 1000 1001 /* 1002 * Verify that the 'guest linear address' provided as collateral of the nested 1003 * page table fault matches with our instruction decoding. 1004 */ 1005 static int 1006 verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) 1007 { 1008 int error; 1009 uint64_t base, idx; 1010 1011 /* Skip 'gla' verification */ 1012 if (gla == VIE_INVALID_GLA) 1013 return (0); 1014 1015 base = 0; 1016 if (vie->base_register != VM_REG_LAST) { 1017 error = vm_get_register(vm, cpuid, vie->base_register, &base); 1018 if (error) { 1019 printf("verify_gla: error %d getting base reg %d\n", 1020 error, vie->base_register); 1021 return (-1); 1022 } 1023 1024 /* 1025 * RIP-relative addressing starts from the following 1026 * instruction 1027 */ 1028 if (vie->base_register == VM_REG_GUEST_RIP) 1029 base += vie->num_valid; 1030 } 1031 1032 idx = 0; 1033 if (vie->index_register != VM_REG_LAST) { 1034 error = vm_get_register(vm, cpuid, vie->index_register, &idx); 1035 if (error) { 1036 printf("verify_gla: error %d getting index reg %d\n", 1037 error, vie->index_register); 1038 return (-1); 1039 } 1040 } 1041 1042 if (base + vie->scale * idx + vie->displacement != gla) { 1043 printf("verify_gla mismatch: " 1044 "base(0x%0lx), scale(%d), index(0x%0lx), " 1045 "disp(0x%0lx), gla(0x%0lx)\n", 1046 base, vie->scale, idx, vie->displacement, gla); 1047 return (-1); 1048 } 1049 1050 return (0); 1051 } 1052 1053 int 1054 vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, 1055 enum vie_cpu_mode cpu_mode, struct vie *vie) 1056 { 1057 1058 if (cpu_mode == CPU_MODE_64BIT) { 1059 if (decode_rex(vie)) 1060 return (-1); 1061 } 1062 1063 if (decode_opcode(vie)) 1064 return (-1); 1065 1066 if (decode_modrm(vie, cpu_mode)) 1067 return (-1); 1068 1069 if (decode_sib(vie)) 1070 return (-1); 1071 1072 if (decode_displacement(vie)) 1073 return (-1); 1074 1075 if (decode_immediate(vie)) 1076 return (-1); 1077 1078 if (verify_inst_length(vie)) 1079 return (-1); 1080 1081 if (verify_gla(vm, cpuid, gla, vie)) 1082 return (-1); 1083 1084 vie->decoded = 1; /* success */ 1085 1086 return (0); 1087 } 1088 #endif /* _KERNEL */ 1089