1 //===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is part of the X86 Disassembler. 10 // It contains the public interface of the instruction decoder. 11 // Documentation for the disassembler can be found in X86Disassembler.h. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H 16 #define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/Support/X86DisassemblerDecoderCommon.h" 20 21 namespace llvm { 22 namespace X86Disassembler { 23 24 // Accessor functions for various fields of an Intel instruction 25 #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) 26 #define regFromModRM(modRM) (((modRM) & 0x38) >> 3) 27 #define rmFromModRM(modRM) ((modRM) & 0x7) 28 #define scaleFromSIB(sib) (((sib) & 0xc0) >> 6) 29 #define indexFromSIB(sib) (((sib) & 0x38) >> 3) 30 #define baseFromSIB(sib) ((sib) & 0x7) 31 #define wFromREX(rex) (((rex) & 0x8) >> 3) 32 #define rFromREX(rex) (((rex) & 0x4) >> 2) 33 #define xFromREX(rex) (((rex) & 0x2) >> 1) 34 #define bFromREX(rex) ((rex) & 0x1) 35 36 #define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7) 37 #define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6) 38 #define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5) 39 #define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4) 40 #define mmFromEVEX2of4(evex) ((evex) & 0x3) 41 #define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7) 42 #define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3) 43 #define ppFromEVEX3of4(evex) ((evex) & 0x3) 44 #define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7) 45 #define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6) 46 #define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5) 47 #define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4) 48 #define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3) 49 #define aaaFromEVEX4of4(evex) ((evex) & 0x7) 50 51 #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) 52 #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) 53 #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) 54 #define mmmmmFromVEX2of3(vex) ((vex) & 0x1f) 55 #define wFromVEX3of3(vex) (((vex) & 0x80) >> 7) 56 #define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3) 57 #define lFromVEX3of3(vex) (((vex) & 0x4) >> 2) 58 #define ppFromVEX3of3(vex) ((vex) & 0x3) 59 60 #define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7) 61 #define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3) 62 #define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) 63 #define ppFromVEX2of2(vex) ((vex) & 0x3) 64 65 #define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7) 66 #define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6) 67 #define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5) 68 #define mmmmmFromXOP2of3(xop) ((xop) & 0x1f) 69 #define wFromXOP3of3(xop) (((xop) & 0x80) >> 7) 70 #define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3) 71 #define lFromXOP3of3(xop) (((xop) & 0x4) >> 2) 72 #define ppFromXOP3of3(xop) ((xop) & 0x3) 73 74 // These enums represent Intel registers for use by the decoder. 75 #define REGS_8BIT \ 76 ENTRY(AL) \ 77 ENTRY(CL) \ 78 ENTRY(DL) \ 79 ENTRY(BL) \ 80 ENTRY(AH) \ 81 ENTRY(CH) \ 82 ENTRY(DH) \ 83 ENTRY(BH) \ 84 ENTRY(R8B) \ 85 ENTRY(R9B) \ 86 ENTRY(R10B) \ 87 ENTRY(R11B) \ 88 ENTRY(R12B) \ 89 ENTRY(R13B) \ 90 ENTRY(R14B) \ 91 ENTRY(R15B) \ 92 ENTRY(SPL) \ 93 ENTRY(BPL) \ 94 ENTRY(SIL) \ 95 ENTRY(DIL) 96 97 #define EA_BASES_16BIT \ 98 ENTRY(BX_SI) \ 99 ENTRY(BX_DI) \ 100 ENTRY(BP_SI) \ 101 ENTRY(BP_DI) \ 102 ENTRY(SI) \ 103 ENTRY(DI) \ 104 ENTRY(BP) \ 105 ENTRY(BX) \ 106 ENTRY(R8W) \ 107 ENTRY(R9W) \ 108 ENTRY(R10W) \ 109 ENTRY(R11W) \ 110 ENTRY(R12W) \ 111 ENTRY(R13W) \ 112 ENTRY(R14W) \ 113 ENTRY(R15W) 114 115 #define REGS_16BIT \ 116 ENTRY(AX) \ 117 ENTRY(CX) \ 118 ENTRY(DX) \ 119 ENTRY(BX) \ 120 ENTRY(SP) \ 121 ENTRY(BP) \ 122 ENTRY(SI) \ 123 ENTRY(DI) \ 124 ENTRY(R8W) \ 125 ENTRY(R9W) \ 126 ENTRY(R10W) \ 127 ENTRY(R11W) \ 128 ENTRY(R12W) \ 129 ENTRY(R13W) \ 130 ENTRY(R14W) \ 131 ENTRY(R15W) 132 133 #define EA_BASES_32BIT \ 134 ENTRY(EAX) \ 135 ENTRY(ECX) \ 136 ENTRY(EDX) \ 137 ENTRY(EBX) \ 138 ENTRY(sib) \ 139 ENTRY(EBP) \ 140 ENTRY(ESI) \ 141 ENTRY(EDI) \ 142 ENTRY(R8D) \ 143 ENTRY(R9D) \ 144 ENTRY(R10D) \ 145 ENTRY(R11D) \ 146 ENTRY(R12D) \ 147 ENTRY(R13D) \ 148 ENTRY(R14D) \ 149 ENTRY(R15D) 150 151 #define REGS_32BIT \ 152 ENTRY(EAX) \ 153 ENTRY(ECX) \ 154 ENTRY(EDX) \ 155 ENTRY(EBX) \ 156 ENTRY(ESP) \ 157 ENTRY(EBP) \ 158 ENTRY(ESI) \ 159 ENTRY(EDI) \ 160 ENTRY(R8D) \ 161 ENTRY(R9D) \ 162 ENTRY(R10D) \ 163 ENTRY(R11D) \ 164 ENTRY(R12D) \ 165 ENTRY(R13D) \ 166 ENTRY(R14D) \ 167 ENTRY(R15D) 168 169 #define EA_BASES_64BIT \ 170 ENTRY(RAX) \ 171 ENTRY(RCX) \ 172 ENTRY(RDX) \ 173 ENTRY(RBX) \ 174 ENTRY(sib64) \ 175 ENTRY(RBP) \ 176 ENTRY(RSI) \ 177 ENTRY(RDI) \ 178 ENTRY(R8) \ 179 ENTRY(R9) \ 180 ENTRY(R10) \ 181 ENTRY(R11) \ 182 ENTRY(R12) \ 183 ENTRY(R13) \ 184 ENTRY(R14) \ 185 ENTRY(R15) 186 187 #define REGS_64BIT \ 188 ENTRY(RAX) \ 189 ENTRY(RCX) \ 190 ENTRY(RDX) \ 191 ENTRY(RBX) \ 192 ENTRY(RSP) \ 193 ENTRY(RBP) \ 194 ENTRY(RSI) \ 195 ENTRY(RDI) \ 196 ENTRY(R8) \ 197 ENTRY(R9) \ 198 ENTRY(R10) \ 199 ENTRY(R11) \ 200 ENTRY(R12) \ 201 ENTRY(R13) \ 202 ENTRY(R14) \ 203 ENTRY(R15) 204 205 #define REGS_MMX \ 206 ENTRY(MM0) \ 207 ENTRY(MM1) \ 208 ENTRY(MM2) \ 209 ENTRY(MM3) \ 210 ENTRY(MM4) \ 211 ENTRY(MM5) \ 212 ENTRY(MM6) \ 213 ENTRY(MM7) 214 215 #define REGS_XMM \ 216 ENTRY(XMM0) \ 217 ENTRY(XMM1) \ 218 ENTRY(XMM2) \ 219 ENTRY(XMM3) \ 220 ENTRY(XMM4) \ 221 ENTRY(XMM5) \ 222 ENTRY(XMM6) \ 223 ENTRY(XMM7) \ 224 ENTRY(XMM8) \ 225 ENTRY(XMM9) \ 226 ENTRY(XMM10) \ 227 ENTRY(XMM11) \ 228 ENTRY(XMM12) \ 229 ENTRY(XMM13) \ 230 ENTRY(XMM14) \ 231 ENTRY(XMM15) \ 232 ENTRY(XMM16) \ 233 ENTRY(XMM17) \ 234 ENTRY(XMM18) \ 235 ENTRY(XMM19) \ 236 ENTRY(XMM20) \ 237 ENTRY(XMM21) \ 238 ENTRY(XMM22) \ 239 ENTRY(XMM23) \ 240 ENTRY(XMM24) \ 241 ENTRY(XMM25) \ 242 ENTRY(XMM26) \ 243 ENTRY(XMM27) \ 244 ENTRY(XMM28) \ 245 ENTRY(XMM29) \ 246 ENTRY(XMM30) \ 247 ENTRY(XMM31) 248 249 #define REGS_YMM \ 250 ENTRY(YMM0) \ 251 ENTRY(YMM1) \ 252 ENTRY(YMM2) \ 253 ENTRY(YMM3) \ 254 ENTRY(YMM4) \ 255 ENTRY(YMM5) \ 256 ENTRY(YMM6) \ 257 ENTRY(YMM7) \ 258 ENTRY(YMM8) \ 259 ENTRY(YMM9) \ 260 ENTRY(YMM10) \ 261 ENTRY(YMM11) \ 262 ENTRY(YMM12) \ 263 ENTRY(YMM13) \ 264 ENTRY(YMM14) \ 265 ENTRY(YMM15) \ 266 ENTRY(YMM16) \ 267 ENTRY(YMM17) \ 268 ENTRY(YMM18) \ 269 ENTRY(YMM19) \ 270 ENTRY(YMM20) \ 271 ENTRY(YMM21) \ 272 ENTRY(YMM22) \ 273 ENTRY(YMM23) \ 274 ENTRY(YMM24) \ 275 ENTRY(YMM25) \ 276 ENTRY(YMM26) \ 277 ENTRY(YMM27) \ 278 ENTRY(YMM28) \ 279 ENTRY(YMM29) \ 280 ENTRY(YMM30) \ 281 ENTRY(YMM31) 282 283 #define REGS_ZMM \ 284 ENTRY(ZMM0) \ 285 ENTRY(ZMM1) \ 286 ENTRY(ZMM2) \ 287 ENTRY(ZMM3) \ 288 ENTRY(ZMM4) \ 289 ENTRY(ZMM5) \ 290 ENTRY(ZMM6) \ 291 ENTRY(ZMM7) \ 292 ENTRY(ZMM8) \ 293 ENTRY(ZMM9) \ 294 ENTRY(ZMM10) \ 295 ENTRY(ZMM11) \ 296 ENTRY(ZMM12) \ 297 ENTRY(ZMM13) \ 298 ENTRY(ZMM14) \ 299 ENTRY(ZMM15) \ 300 ENTRY(ZMM16) \ 301 ENTRY(ZMM17) \ 302 ENTRY(ZMM18) \ 303 ENTRY(ZMM19) \ 304 ENTRY(ZMM20) \ 305 ENTRY(ZMM21) \ 306 ENTRY(ZMM22) \ 307 ENTRY(ZMM23) \ 308 ENTRY(ZMM24) \ 309 ENTRY(ZMM25) \ 310 ENTRY(ZMM26) \ 311 ENTRY(ZMM27) \ 312 ENTRY(ZMM28) \ 313 ENTRY(ZMM29) \ 314 ENTRY(ZMM30) \ 315 ENTRY(ZMM31) 316 317 #define REGS_MASKS \ 318 ENTRY(K0) \ 319 ENTRY(K1) \ 320 ENTRY(K2) \ 321 ENTRY(K3) \ 322 ENTRY(K4) \ 323 ENTRY(K5) \ 324 ENTRY(K6) \ 325 ENTRY(K7) 326 327 #define REGS_MASK_PAIRS \ 328 ENTRY(K0_K1) \ 329 ENTRY(K2_K3) \ 330 ENTRY(K4_K5) \ 331 ENTRY(K6_K7) 332 333 #define REGS_SEGMENT \ 334 ENTRY(ES) \ 335 ENTRY(CS) \ 336 ENTRY(SS) \ 337 ENTRY(DS) \ 338 ENTRY(FS) \ 339 ENTRY(GS) 340 341 #define REGS_DEBUG \ 342 ENTRY(DR0) \ 343 ENTRY(DR1) \ 344 ENTRY(DR2) \ 345 ENTRY(DR3) \ 346 ENTRY(DR4) \ 347 ENTRY(DR5) \ 348 ENTRY(DR6) \ 349 ENTRY(DR7) \ 350 ENTRY(DR8) \ 351 ENTRY(DR9) \ 352 ENTRY(DR10) \ 353 ENTRY(DR11) \ 354 ENTRY(DR12) \ 355 ENTRY(DR13) \ 356 ENTRY(DR14) \ 357 ENTRY(DR15) 358 359 #define REGS_CONTROL \ 360 ENTRY(CR0) \ 361 ENTRY(CR1) \ 362 ENTRY(CR2) \ 363 ENTRY(CR3) \ 364 ENTRY(CR4) \ 365 ENTRY(CR5) \ 366 ENTRY(CR6) \ 367 ENTRY(CR7) \ 368 ENTRY(CR8) \ 369 ENTRY(CR9) \ 370 ENTRY(CR10) \ 371 ENTRY(CR11) \ 372 ENTRY(CR12) \ 373 ENTRY(CR13) \ 374 ENTRY(CR14) \ 375 ENTRY(CR15) 376 377 #define REGS_BOUND \ 378 ENTRY(BND0) \ 379 ENTRY(BND1) \ 380 ENTRY(BND2) \ 381 ENTRY(BND3) 382 383 #undef REGS_TMM 384 #define REGS_TMM \ 385 ENTRY(TMM0) \ 386 ENTRY(TMM1) \ 387 ENTRY(TMM2) \ 388 ENTRY(TMM3) \ 389 ENTRY(TMM4) \ 390 ENTRY(TMM5) \ 391 ENTRY(TMM6) \ 392 ENTRY(TMM7) 393 394 #define ALL_EA_BASES \ 395 EA_BASES_16BIT \ 396 EA_BASES_32BIT \ 397 EA_BASES_64BIT 398 399 #define ALL_SIB_BASES \ 400 REGS_32BIT \ 401 REGS_64BIT 402 403 #define ALL_REGS \ 404 REGS_8BIT \ 405 REGS_16BIT \ 406 REGS_32BIT \ 407 REGS_64BIT \ 408 REGS_MMX \ 409 REGS_XMM \ 410 REGS_YMM \ 411 REGS_ZMM \ 412 REGS_MASKS \ 413 REGS_MASK_PAIRS \ 414 REGS_SEGMENT \ 415 REGS_DEBUG \ 416 REGS_CONTROL \ 417 REGS_BOUND \ 418 REGS_TMM \ 419 ENTRY(RIP) 420 421 /// All possible values of the base field for effective-address 422 /// computations, a.k.a. the Mod and R/M fields of the ModR/M byte. 423 /// We distinguish between bases (EA_BASE_*) and registers that just happen 424 /// to be referred to when Mod == 0b11 (EA_REG_*). 425 enum EABase { 426 EA_BASE_NONE, 427 #define ENTRY(x) EA_BASE_##x, 428 ALL_EA_BASES 429 #undef ENTRY 430 #define ENTRY(x) EA_REG_##x, 431 ALL_REGS 432 #undef ENTRY 433 EA_max 434 }; 435 436 /// All possible values of the SIB index field. 437 /// borrows entries from ALL_EA_BASES with the special case that 438 /// sib is synonymous with NONE. 439 /// Vector SIB: index can be XMM or YMM. 440 enum SIBIndex { 441 SIB_INDEX_NONE, 442 #define ENTRY(x) SIB_INDEX_##x, 443 ALL_EA_BASES 444 REGS_XMM 445 REGS_YMM 446 REGS_ZMM 447 #undef ENTRY 448 SIB_INDEX_max 449 }; 450 451 /// All possible values of the SIB base field. 452 enum SIBBase { 453 SIB_BASE_NONE, 454 #define ENTRY(x) SIB_BASE_##x, 455 ALL_SIB_BASES 456 #undef ENTRY 457 SIB_BASE_max 458 }; 459 460 /// Possible displacement types for effective-address computations. 461 enum EADisplacement { 462 EA_DISP_NONE, 463 EA_DISP_8, 464 EA_DISP_16, 465 EA_DISP_32 466 }; 467 468 /// All possible values of the reg field in the ModR/M byte. 469 enum Reg { 470 #define ENTRY(x) MODRM_REG_##x, 471 ALL_REGS 472 #undef ENTRY 473 MODRM_REG_max 474 }; 475 476 /// All possible segment overrides. 477 enum SegmentOverride { 478 SEG_OVERRIDE_NONE, 479 SEG_OVERRIDE_CS, 480 SEG_OVERRIDE_SS, 481 SEG_OVERRIDE_DS, 482 SEG_OVERRIDE_ES, 483 SEG_OVERRIDE_FS, 484 SEG_OVERRIDE_GS, 485 SEG_OVERRIDE_max 486 }; 487 488 /// Possible values for the VEX.m-mmmm field 489 enum VEXLeadingOpcodeByte { 490 VEX_LOB_0F = 0x1, 491 VEX_LOB_0F38 = 0x2, 492 VEX_LOB_0F3A = 0x3 493 }; 494 495 enum XOPMapSelect { 496 XOP_MAP_SELECT_8 = 0x8, 497 XOP_MAP_SELECT_9 = 0x9, 498 XOP_MAP_SELECT_A = 0xA 499 }; 500 501 /// Possible values for the VEX.pp/EVEX.pp field 502 enum VEXPrefixCode { 503 VEX_PREFIX_NONE = 0x0, 504 VEX_PREFIX_66 = 0x1, 505 VEX_PREFIX_F3 = 0x2, 506 VEX_PREFIX_F2 = 0x3 507 }; 508 509 enum VectorExtensionType { 510 TYPE_NO_VEX_XOP = 0x0, 511 TYPE_VEX_2B = 0x1, 512 TYPE_VEX_3B = 0x2, 513 TYPE_EVEX = 0x3, 514 TYPE_XOP = 0x4 515 }; 516 517 /// The specification for how to extract and interpret a full instruction and 518 /// its operands. 519 struct InstructionSpecifier { 520 uint16_t operands; 521 }; 522 523 /// The x86 internal instruction, which is produced by the decoder. 524 struct InternalInstruction { 525 // Opaque value passed to the reader 526 llvm::ArrayRef<uint8_t> bytes; 527 // The address of the next byte to read via the reader 528 uint64_t readerCursor; 529 530 // General instruction information 531 532 // The mode to disassemble for (64-bit, protected, real) 533 DisassemblerMode mode; 534 // The start of the instruction, usable with the reader 535 uint64_t startLocation; 536 // The length of the instruction, in bytes 537 size_t length; 538 539 // Prefix state 540 541 // The possible mandatory prefix 542 uint8_t mandatoryPrefix; 543 // The value of the vector extension prefix(EVEX/VEX/XOP), if present 544 uint8_t vectorExtensionPrefix[4]; 545 // The type of the vector extension prefix 546 VectorExtensionType vectorExtensionType; 547 // The value of the REX prefix, if present 548 uint8_t rexPrefix; 549 // The segment override type 550 SegmentOverride segmentOverride; 551 // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease 552 bool xAcquireRelease; 553 554 // Address-size override 555 bool hasAdSize; 556 // Operand-size override 557 bool hasOpSize; 558 // Lock prefix 559 bool hasLockPrefix; 560 // The repeat prefix if any 561 uint8_t repeatPrefix; 562 563 // Sizes of various critical pieces of data, in bytes 564 uint8_t registerSize; 565 uint8_t addressSize; 566 uint8_t displacementSize; 567 uint8_t immediateSize; 568 569 // Offsets from the start of the instruction to the pieces of data, which is 570 // needed to find relocation entries for adding symbolic operands. 571 uint8_t displacementOffset; 572 uint8_t immediateOffset; 573 574 // opcode state 575 576 // The last byte of the opcode, not counting any ModR/M extension 577 uint8_t opcode; 578 579 // decode state 580 581 // The type of opcode, used for indexing into the array of decode tables 582 OpcodeType opcodeType; 583 // The instruction ID, extracted from the decode table 584 uint16_t instructionID; 585 // The specifier for the instruction, from the instruction info table 586 const InstructionSpecifier *spec; 587 588 // state for additional bytes, consumed during operand decode. Pattern: 589 // consumed___ indicates that the byte was already consumed and does not 590 // need to be consumed again. 591 592 // The VEX.vvvv field, which contains a third register operand for some AVX 593 // instructions. 594 Reg vvvv; 595 596 // The writemask for AVX-512 instructions which is contained in EVEX.aaa 597 Reg writemask; 598 599 // The ModR/M byte, which contains most register operands and some portion of 600 // all memory operands. 601 bool consumedModRM; 602 uint8_t modRM; 603 604 // The SIB byte, used for more complex 32- or 64-bit memory operands 605 uint8_t sib; 606 607 // The displacement, used for memory operands 608 int32_t displacement; 609 610 // Immediates. There can be two in some cases 611 uint8_t numImmediatesConsumed; 612 uint8_t numImmediatesTranslated; 613 uint64_t immediates[2]; 614 615 // A register or immediate operand encoded into the opcode 616 Reg opcodeRegister; 617 618 // Portions of the ModR/M byte 619 620 // These fields determine the allowable values for the ModR/M fields, which 621 // depend on operand and address widths. 622 EABase eaRegBase; 623 Reg regBase; 624 625 // The Mod and R/M fields can encode a base for an effective address, or a 626 // register. These are separated into two fields here. 627 EABase eaBase; 628 EADisplacement eaDisplacement; 629 // The reg field always encodes a register 630 Reg reg; 631 632 // SIB state 633 SIBIndex sibIndexBase; 634 SIBIndex sibIndex; 635 uint8_t sibScale; 636 SIBBase sibBase; 637 638 // Embedded rounding control. 639 uint8_t RC; 640 641 ArrayRef<OperandSpecifier> operands; 642 }; 643 644 } // namespace X86Disassembler 645 } // namespace llvm 646 647 #endif 648