1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is part of the X86 Disassembler. 10 // It contains code to translate the data produced by the decoder into 11 // MCInsts. 12 // 13 // 14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and 15 // 64-bit X86 instruction sets. The main decode sequence for an assembly 16 // instruction in this disassembler is: 17 // 18 // 1. Read the prefix bytes and determine the attributes of the instruction. 19 // These attributes, recorded in enum attributeBits 20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM 21 // provides a mapping from bitmasks to contexts, which are represented by 22 // enum InstructionContext (ibid.). 23 // 24 // 2. Read the opcode, and determine what kind of opcode it is. The 25 // disassembler distinguishes four kinds of opcodes, which are enumerated in 26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte 27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. 29 // 30 // 3. Depending on the opcode type, look in one of four ClassDecision structures 31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which 32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get 33 // a ModRMDecision (ibid.). 34 // 35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even 36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the 37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from 38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the 39 // ModR/M byte is required and how to interpret it. 40 // 41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID 42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in 43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and 44 // meanings of its operands. 45 // 46 // 6. For each operand, its encoding is an entry from OperandEncoding 47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from 48 // OperandType (ibid.). The encoding indicates how to read it from the 49 // instruction; the type indicates how to interpret the value once it has 50 // been read. For example, a register operand could be stored in the R/M 51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to 52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM 53 // register, for instance). Given this information, the operands can be 54 // extracted and interpreted. 55 // 56 // 7. As the last step, the disassembler translates the instruction information 57 // and operands into a format understandable by the client - in this case, an 58 // MCInst for use by the MC infrastructure. 59 // 60 // The disassembler is broken broadly into two parts: the table emitter that 61 // emits the instruction decode tables discussed above during compilation, and 62 // the disassembler itself. The table emitter is documented in more detail in 63 // utils/TableGen/X86DisassemblerEmitter.h. 64 // 65 // X86Disassembler.cpp contains the code responsible for step 7, and for 66 // invoking the decoder to execute steps 1-6. 67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the 68 // table emitter and the disassembler. 69 // X86DisassemblerDecoder.h contains the public interface of the decoder, 70 // factored out into C for possible use by other projects. 71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is 72 // responsible for steps 1-6. 73 // 74 //===----------------------------------------------------------------------===// 75 76 #include "MCTargetDesc/X86BaseInfo.h" 77 #include "MCTargetDesc/X86MCTargetDesc.h" 78 #include "TargetInfo/X86TargetInfo.h" 79 #include "X86DisassemblerDecoder.h" 80 #include "llvm/MC/MCContext.h" 81 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 82 #include "llvm/MC/MCExpr.h" 83 #include "llvm/MC/MCInst.h" 84 #include "llvm/MC/MCInstrInfo.h" 85 #include "llvm/MC/MCSubtargetInfo.h" 86 #include "llvm/MC/TargetRegistry.h" 87 #include "llvm/Support/Debug.h" 88 #include "llvm/Support/Format.h" 89 #include "llvm/Support/raw_ostream.h" 90 91 using namespace llvm; 92 using namespace llvm::X86Disassembler; 93 94 #define DEBUG_TYPE "x86-disassembler" 95 96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s); 97 98 // Specifies whether a ModR/M byte is needed and (if so) which 99 // instruction each possible value of the ModR/M byte corresponds to. Once 100 // this information is known, we have narrowed down to a single instruction. 101 struct ModRMDecision { 102 uint8_t modrm_type; 103 uint16_t instructionIDs; 104 }; 105 106 // Specifies which set of ModR/M->instruction tables to look at 107 // given a particular opcode. 108 struct OpcodeDecision { 109 ModRMDecision modRMDecisions[256]; 110 }; 111 112 // Specifies which opcode->instruction tables to look at given 113 // a particular context (set of attributes). Since there are many possible 114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context 115 // applies given a specific set of attributes. Hence there are only IC_max 116 // entries in this table, rather than 2^(ATTR_max). 117 struct ContextDecision { 118 OpcodeDecision opcodeDecisions[IC_max]; 119 }; 120 121 #include "X86GenDisassemblerTables.inc" 122 123 static InstrUID decode(OpcodeType type, InstructionContext insnContext, 124 uint8_t opcode, uint8_t modRM) { 125 const struct ModRMDecision *dec; 126 127 switch (type) { 128 case ONEBYTE: 129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 130 break; 131 case TWOBYTE: 132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 133 break; 134 case THREEBYTE_38: 135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 136 break; 137 case THREEBYTE_3A: 138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 139 break; 140 case XOP8_MAP: 141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 142 break; 143 case XOP9_MAP: 144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 145 break; 146 case XOPA_MAP: 147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 148 break; 149 case THREEDNOW_MAP: 150 dec = 151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 152 break; 153 case MAP5: 154 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 155 break; 156 case MAP6: 157 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 158 break; 159 } 160 161 switch (dec->modrm_type) { 162 default: 163 llvm_unreachable("Corrupt table! Unknown modrm_type"); 164 return 0; 165 case MODRM_ONEENTRY: 166 return modRMTable[dec->instructionIDs]; 167 case MODRM_SPLITRM: 168 if (modFromModRM(modRM) == 0x3) 169 return modRMTable[dec->instructionIDs + 1]; 170 return modRMTable[dec->instructionIDs]; 171 case MODRM_SPLITREG: 172 if (modFromModRM(modRM) == 0x3) 173 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; 174 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 175 case MODRM_SPLITMISC: 176 if (modFromModRM(modRM) == 0x3) 177 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; 178 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 179 case MODRM_FULL: 180 return modRMTable[dec->instructionIDs + modRM]; 181 } 182 } 183 184 static bool peek(struct InternalInstruction *insn, uint8_t &byte) { 185 uint64_t offset = insn->readerCursor - insn->startLocation; 186 if (offset >= insn->bytes.size()) 187 return true; 188 byte = insn->bytes[offset]; 189 return false; 190 } 191 192 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { 193 auto r = insn->bytes; 194 uint64_t offset = insn->readerCursor - insn->startLocation; 195 if (offset + sizeof(T) > r.size()) 196 return true; 197 T ret = 0; 198 for (unsigned i = 0; i < sizeof(T); ++i) 199 ret |= (uint64_t)r[offset + i] << (i * 8); 200 ptr = ret; 201 insn->readerCursor += sizeof(T); 202 return false; 203 } 204 205 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { 206 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f; 207 } 208 209 // Consumes all of an instruction's prefix bytes, and marks the 210 // instruction as having them. Also sets the instruction's default operand, 211 // address, and other relevant data sizes to report operands correctly. 212 // 213 // insn must not be empty. 214 static int readPrefixes(struct InternalInstruction *insn) { 215 bool isPrefix = true; 216 uint8_t byte = 0; 217 uint8_t nextByte; 218 219 LLVM_DEBUG(dbgs() << "readPrefixes()"); 220 221 while (isPrefix) { 222 // If we fail reading prefixes, just stop here and let the opcode reader 223 // deal with it. 224 if (consume(insn, byte)) 225 break; 226 227 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 228 // break and let it be disassembled as a normal "instruction". 229 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK 230 break; 231 232 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) { 233 // If the byte is 0xf2 or 0xf3, and any of the following conditions are 234 // met: 235 // - it is followed by a LOCK (0xf0) prefix 236 // - it is followed by an xchg instruction 237 // then it should be disassembled as a xacquire/xrelease not repne/rep. 238 if (((nextByte == 0xf0) || 239 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { 240 insn->xAcquireRelease = true; 241 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support 242 break; 243 } 244 // Also if the byte is 0xf3, and the following condition is met: 245 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 246 // "mov mem, imm" (opcode 0xc6/0xc7) instructions. 247 // then it should be disassembled as an xrelease not rep. 248 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || 249 nextByte == 0xc6 || nextByte == 0xc7)) { 250 insn->xAcquireRelease = true; 251 break; 252 } 253 if (isREX(insn, nextByte)) { 254 uint8_t nnextByte; 255 // Go to REX prefix after the current one 256 if (consume(insn, nnextByte)) 257 return -1; 258 // We should be able to read next byte after REX prefix 259 if (peek(insn, nnextByte)) 260 return -1; 261 --insn->readerCursor; 262 } 263 } 264 265 switch (byte) { 266 case 0xf0: // LOCK 267 insn->hasLockPrefix = true; 268 break; 269 case 0xf2: // REPNE/REPNZ 270 case 0xf3: { // REP or REPE/REPZ 271 uint8_t nextByte; 272 if (peek(insn, nextByte)) 273 break; 274 // TODO: 275 // 1. There could be several 0x66 276 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then 277 // it's not mandatory prefix 278 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need 279 // 0x0f exactly after it to be mandatory prefix 280 if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) 281 // The last of 0xf2 /0xf3 is mandatory prefix 282 insn->mandatoryPrefix = byte; 283 insn->repeatPrefix = byte; 284 break; 285 } 286 case 0x2e: // CS segment override -OR- Branch not taken 287 insn->segmentOverride = SEG_OVERRIDE_CS; 288 break; 289 case 0x36: // SS segment override -OR- Branch taken 290 insn->segmentOverride = SEG_OVERRIDE_SS; 291 break; 292 case 0x3e: // DS segment override 293 insn->segmentOverride = SEG_OVERRIDE_DS; 294 break; 295 case 0x26: // ES segment override 296 insn->segmentOverride = SEG_OVERRIDE_ES; 297 break; 298 case 0x64: // FS segment override 299 insn->segmentOverride = SEG_OVERRIDE_FS; 300 break; 301 case 0x65: // GS segment override 302 insn->segmentOverride = SEG_OVERRIDE_GS; 303 break; 304 case 0x66: { // Operand-size override { 305 uint8_t nextByte; 306 insn->hasOpSize = true; 307 if (peek(insn, nextByte)) 308 break; 309 // 0x66 can't overwrite existing mandatory prefix and should be ignored 310 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) 311 insn->mandatoryPrefix = byte; 312 break; 313 } 314 case 0x67: // Address-size override 315 insn->hasAdSize = true; 316 break; 317 default: // Not a prefix byte 318 isPrefix = false; 319 break; 320 } 321 322 if (isPrefix) 323 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte)); 324 } 325 326 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 327 328 if (byte == 0x62) { 329 uint8_t byte1, byte2; 330 if (consume(insn, byte1)) { 331 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix"); 332 return -1; 333 } 334 335 if (peek(insn, byte2)) { 336 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 337 return -1; 338 } 339 340 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && 341 ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) { 342 insn->vectorExtensionType = TYPE_EVEX; 343 } else { 344 --insn->readerCursor; // unconsume byte1 345 --insn->readerCursor; // unconsume byte 346 } 347 348 if (insn->vectorExtensionType == TYPE_EVEX) { 349 insn->vectorExtensionPrefix[0] = byte; 350 insn->vectorExtensionPrefix[1] = byte1; 351 if (consume(insn, insn->vectorExtensionPrefix[2])) { 352 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 353 return -1; 354 } 355 if (consume(insn, insn->vectorExtensionPrefix[3])) { 356 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix"); 357 return -1; 358 } 359 360 // We simulate the REX prefix for simplicity's sake 361 if (insn->mode == MODE_64BIT) { 362 insn->rexPrefix = 0x40 | 363 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | 364 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | 365 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | 366 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 367 } 368 369 LLVM_DEBUG( 370 dbgs() << format( 371 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 372 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 373 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3])); 374 } 375 } else if (byte == 0xc4) { 376 uint8_t byte1; 377 if (peek(insn, byte1)) { 378 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 379 return -1; 380 } 381 382 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 383 insn->vectorExtensionType = TYPE_VEX_3B; 384 else 385 --insn->readerCursor; 386 387 if (insn->vectorExtensionType == TYPE_VEX_3B) { 388 insn->vectorExtensionPrefix[0] = byte; 389 consume(insn, insn->vectorExtensionPrefix[1]); 390 consume(insn, insn->vectorExtensionPrefix[2]); 391 392 // We simulate the REX prefix for simplicity's sake 393 394 if (insn->mode == MODE_64BIT) 395 insn->rexPrefix = 0x40 | 396 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | 397 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | 398 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | 399 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 400 401 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 402 insn->vectorExtensionPrefix[0], 403 insn->vectorExtensionPrefix[1], 404 insn->vectorExtensionPrefix[2])); 405 } 406 } else if (byte == 0xc5) { 407 uint8_t byte1; 408 if (peek(insn, byte1)) { 409 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 410 return -1; 411 } 412 413 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 414 insn->vectorExtensionType = TYPE_VEX_2B; 415 else 416 --insn->readerCursor; 417 418 if (insn->vectorExtensionType == TYPE_VEX_2B) { 419 insn->vectorExtensionPrefix[0] = byte; 420 consume(insn, insn->vectorExtensionPrefix[1]); 421 422 if (insn->mode == MODE_64BIT) 423 insn->rexPrefix = 424 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 425 426 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 427 default: 428 break; 429 case VEX_PREFIX_66: 430 insn->hasOpSize = true; 431 break; 432 } 433 434 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx", 435 insn->vectorExtensionPrefix[0], 436 insn->vectorExtensionPrefix[1])); 437 } 438 } else if (byte == 0x8f) { 439 uint8_t byte1; 440 if (peek(insn, byte1)) { 441 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP"); 442 return -1; 443 } 444 445 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction. 446 insn->vectorExtensionType = TYPE_XOP; 447 else 448 --insn->readerCursor; 449 450 if (insn->vectorExtensionType == TYPE_XOP) { 451 insn->vectorExtensionPrefix[0] = byte; 452 consume(insn, insn->vectorExtensionPrefix[1]); 453 consume(insn, insn->vectorExtensionPrefix[2]); 454 455 // We simulate the REX prefix for simplicity's sake 456 457 if (insn->mode == MODE_64BIT) 458 insn->rexPrefix = 0x40 | 459 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | 460 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | 461 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | 462 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 463 464 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 465 default: 466 break; 467 case VEX_PREFIX_66: 468 insn->hasOpSize = true; 469 break; 470 } 471 472 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 473 insn->vectorExtensionPrefix[0], 474 insn->vectorExtensionPrefix[1], 475 insn->vectorExtensionPrefix[2])); 476 } 477 } else if (isREX(insn, byte)) { 478 if (peek(insn, nextByte)) 479 return -1; 480 insn->rexPrefix = byte; 481 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); 482 } else 483 --insn->readerCursor; 484 485 if (insn->mode == MODE_16BIT) { 486 insn->registerSize = (insn->hasOpSize ? 4 : 2); 487 insn->addressSize = (insn->hasAdSize ? 4 : 2); 488 insn->displacementSize = (insn->hasAdSize ? 4 : 2); 489 insn->immediateSize = (insn->hasOpSize ? 4 : 2); 490 } else if (insn->mode == MODE_32BIT) { 491 insn->registerSize = (insn->hasOpSize ? 2 : 4); 492 insn->addressSize = (insn->hasAdSize ? 2 : 4); 493 insn->displacementSize = (insn->hasAdSize ? 2 : 4); 494 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 495 } else if (insn->mode == MODE_64BIT) { 496 insn->displacementSize = 4; 497 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 498 insn->registerSize = 8; 499 insn->addressSize = (insn->hasAdSize ? 4 : 8); 500 insn->immediateSize = 4; 501 insn->hasOpSize = false; 502 } else { 503 insn->registerSize = (insn->hasOpSize ? 2 : 4); 504 insn->addressSize = (insn->hasAdSize ? 4 : 8); 505 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 506 } 507 } 508 509 return 0; 510 } 511 512 // Consumes the SIB byte to determine addressing information. 513 static int readSIB(struct InternalInstruction *insn) { 514 SIBBase sibBaseBase = SIB_BASE_NONE; 515 uint8_t index, base; 516 517 LLVM_DEBUG(dbgs() << "readSIB()"); 518 switch (insn->addressSize) { 519 case 2: 520 default: 521 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode"); 522 case 4: 523 insn->sibIndexBase = SIB_INDEX_EAX; 524 sibBaseBase = SIB_BASE_EAX; 525 break; 526 case 8: 527 insn->sibIndexBase = SIB_INDEX_RAX; 528 sibBaseBase = SIB_BASE_RAX; 529 break; 530 } 531 532 if (consume(insn, insn->sib)) 533 return -1; 534 535 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 536 537 if (index == 0x4) { 538 insn->sibIndex = SIB_INDEX_NONE; 539 } else { 540 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); 541 } 542 543 insn->sibScale = 1 << scaleFromSIB(insn->sib); 544 545 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 546 547 switch (base) { 548 case 0x5: 549 case 0xd: 550 switch (modFromModRM(insn->modRM)) { 551 case 0x0: 552 insn->eaDisplacement = EA_DISP_32; 553 insn->sibBase = SIB_BASE_NONE; 554 break; 555 case 0x1: 556 insn->eaDisplacement = EA_DISP_8; 557 insn->sibBase = (SIBBase)(sibBaseBase + base); 558 break; 559 case 0x2: 560 insn->eaDisplacement = EA_DISP_32; 561 insn->sibBase = (SIBBase)(sibBaseBase + base); 562 break; 563 default: 564 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte"); 565 } 566 break; 567 default: 568 insn->sibBase = (SIBBase)(sibBaseBase + base); 569 break; 570 } 571 572 return 0; 573 } 574 575 static int readDisplacement(struct InternalInstruction *insn) { 576 int8_t d8; 577 int16_t d16; 578 int32_t d32; 579 LLVM_DEBUG(dbgs() << "readDisplacement()"); 580 581 insn->displacementOffset = insn->readerCursor - insn->startLocation; 582 switch (insn->eaDisplacement) { 583 case EA_DISP_NONE: 584 break; 585 case EA_DISP_8: 586 if (consume(insn, d8)) 587 return -1; 588 insn->displacement = d8; 589 break; 590 case EA_DISP_16: 591 if (consume(insn, d16)) 592 return -1; 593 insn->displacement = d16; 594 break; 595 case EA_DISP_32: 596 if (consume(insn, d32)) 597 return -1; 598 insn->displacement = d32; 599 break; 600 } 601 602 return 0; 603 } 604 605 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. 606 static int readModRM(struct InternalInstruction *insn) { 607 uint8_t mod, rm, reg, evexrm; 608 LLVM_DEBUG(dbgs() << "readModRM()"); 609 610 if (insn->consumedModRM) 611 return 0; 612 613 if (consume(insn, insn->modRM)) 614 return -1; 615 insn->consumedModRM = true; 616 617 mod = modFromModRM(insn->modRM); 618 rm = rmFromModRM(insn->modRM); 619 reg = regFromModRM(insn->modRM); 620 621 // This goes by insn->registerSize to pick the correct register, which messes 622 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in 623 // fixupReg(). 624 switch (insn->registerSize) { 625 case 2: 626 insn->regBase = MODRM_REG_AX; 627 insn->eaRegBase = EA_REG_AX; 628 break; 629 case 4: 630 insn->regBase = MODRM_REG_EAX; 631 insn->eaRegBase = EA_REG_EAX; 632 break; 633 case 8: 634 insn->regBase = MODRM_REG_RAX; 635 insn->eaRegBase = EA_REG_RAX; 636 break; 637 } 638 639 reg |= rFromREX(insn->rexPrefix) << 3; 640 rm |= bFromREX(insn->rexPrefix) << 3; 641 642 evexrm = 0; 643 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) { 644 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 645 evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 646 } 647 648 insn->reg = (Reg)(insn->regBase + reg); 649 650 switch (insn->addressSize) { 651 case 2: { 652 EABase eaBaseBase = EA_BASE_BX_SI; 653 654 switch (mod) { 655 case 0x0: 656 if (rm == 0x6) { 657 insn->eaBase = EA_BASE_NONE; 658 insn->eaDisplacement = EA_DISP_16; 659 if (readDisplacement(insn)) 660 return -1; 661 } else { 662 insn->eaBase = (EABase)(eaBaseBase + rm); 663 insn->eaDisplacement = EA_DISP_NONE; 664 } 665 break; 666 case 0x1: 667 insn->eaBase = (EABase)(eaBaseBase + rm); 668 insn->eaDisplacement = EA_DISP_8; 669 insn->displacementSize = 1; 670 if (readDisplacement(insn)) 671 return -1; 672 break; 673 case 0x2: 674 insn->eaBase = (EABase)(eaBaseBase + rm); 675 insn->eaDisplacement = EA_DISP_16; 676 if (readDisplacement(insn)) 677 return -1; 678 break; 679 case 0x3: 680 insn->eaBase = (EABase)(insn->eaRegBase + rm); 681 if (readDisplacement(insn)) 682 return -1; 683 break; 684 } 685 break; 686 } 687 case 4: 688 case 8: { 689 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 690 691 switch (mod) { 692 case 0x0: 693 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this 694 // In determining whether RIP-relative mode is used (rm=5), 695 // or whether a SIB byte is present (rm=4), 696 // the extension bits (REX.b and EVEX.x) are ignored. 697 switch (rm & 7) { 698 case 0x4: // SIB byte is present 699 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); 700 if (readSIB(insn) || readDisplacement(insn)) 701 return -1; 702 break; 703 case 0x5: // RIP-relative 704 insn->eaBase = EA_BASE_NONE; 705 insn->eaDisplacement = EA_DISP_32; 706 if (readDisplacement(insn)) 707 return -1; 708 break; 709 default: 710 insn->eaBase = (EABase)(eaBaseBase + rm); 711 break; 712 } 713 break; 714 case 0x1: 715 insn->displacementSize = 1; 716 LLVM_FALLTHROUGH; 717 case 0x2: 718 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 719 switch (rm & 7) { 720 case 0x4: // SIB byte is present 721 insn->eaBase = EA_BASE_sib; 722 if (readSIB(insn) || readDisplacement(insn)) 723 return -1; 724 break; 725 default: 726 insn->eaBase = (EABase)(eaBaseBase + rm); 727 if (readDisplacement(insn)) 728 return -1; 729 break; 730 } 731 break; 732 case 0x3: 733 insn->eaDisplacement = EA_DISP_NONE; 734 insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm); 735 break; 736 } 737 break; 738 } 739 } // switch (insn->addressSize) 740 741 return 0; 742 } 743 744 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \ 745 static uint16_t name(struct InternalInstruction *insn, OperandType type, \ 746 uint8_t index, uint8_t *valid) { \ 747 *valid = 1; \ 748 switch (type) { \ 749 default: \ 750 debug("Unhandled register type"); \ 751 *valid = 0; \ 752 return 0; \ 753 case TYPE_Rv: \ 754 return base + index; \ 755 case TYPE_R8: \ 756 index &= mask; \ 757 if (index > 0xf) \ 758 *valid = 0; \ 759 if (insn->rexPrefix && index >= 4 && index <= 7) { \ 760 return prefix##_SPL + (index - 4); \ 761 } else { \ 762 return prefix##_AL + index; \ 763 } \ 764 case TYPE_R16: \ 765 index &= mask; \ 766 if (index > 0xf) \ 767 *valid = 0; \ 768 return prefix##_AX + index; \ 769 case TYPE_R32: \ 770 index &= mask; \ 771 if (index > 0xf) \ 772 *valid = 0; \ 773 return prefix##_EAX + index; \ 774 case TYPE_R64: \ 775 index &= mask; \ 776 if (index > 0xf) \ 777 *valid = 0; \ 778 return prefix##_RAX + index; \ 779 case TYPE_ZMM: \ 780 return prefix##_ZMM0 + index; \ 781 case TYPE_YMM: \ 782 return prefix##_YMM0 + index; \ 783 case TYPE_XMM: \ 784 return prefix##_XMM0 + index; \ 785 case TYPE_TMM: \ 786 if (index > 7) \ 787 *valid = 0; \ 788 return prefix##_TMM0 + index; \ 789 case TYPE_VK: \ 790 index &= 0xf; \ 791 if (index > 7) \ 792 *valid = 0; \ 793 return prefix##_K0 + index; \ 794 case TYPE_VK_PAIR: \ 795 if (index > 7) \ 796 *valid = 0; \ 797 return prefix##_K0_K1 + (index / 2); \ 798 case TYPE_MM64: \ 799 return prefix##_MM0 + (index & 0x7); \ 800 case TYPE_SEGMENTREG: \ 801 if ((index & 7) > 5) \ 802 *valid = 0; \ 803 return prefix##_ES + (index & 7); \ 804 case TYPE_DEBUGREG: \ 805 return prefix##_DR0 + index; \ 806 case TYPE_CONTROLREG: \ 807 return prefix##_CR0 + index; \ 808 case TYPE_MVSIBX: \ 809 return prefix##_XMM0 + index; \ 810 case TYPE_MVSIBY: \ 811 return prefix##_YMM0 + index; \ 812 case TYPE_MVSIBZ: \ 813 return prefix##_ZMM0 + index; \ 814 } \ 815 } 816 817 // Consult an operand type to determine the meaning of the reg or R/M field. If 818 // the operand is an XMM operand, for example, an operand would be XMM0 instead 819 // of AX, which readModRM() would otherwise misinterpret it as. 820 // 821 // @param insn - The instruction containing the operand. 822 // @param type - The operand type. 823 // @param index - The existing value of the field as reported by readModRM(). 824 // @param valid - The address of a uint8_t. The target is set to 1 if the 825 // field is valid for the register class; 0 if not. 826 // @return - The proper value. 827 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f) 828 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf) 829 830 // Consult an operand specifier to determine which of the fixup*Value functions 831 // to use in correcting readModRM()'ss interpretation. 832 // 833 // @param insn - See fixup*Value(). 834 // @param op - The operand specifier. 835 // @return - 0 if fixup was successful; -1 if the register returned was 836 // invalid for its class. 837 static int fixupReg(struct InternalInstruction *insn, 838 const struct OperandSpecifier *op) { 839 uint8_t valid; 840 LLVM_DEBUG(dbgs() << "fixupReg()"); 841 842 switch ((OperandEncoding)op->encoding) { 843 default: 844 debug("Expected a REG or R/M encoding in fixupReg"); 845 return -1; 846 case ENCODING_VVVV: 847 insn->vvvv = 848 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid); 849 if (!valid) 850 return -1; 851 break; 852 case ENCODING_REG: 853 insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, 854 insn->reg - insn->regBase, &valid); 855 if (!valid) 856 return -1; 857 break; 858 case ENCODING_SIB: 859 CASE_ENCODING_RM: 860 if (insn->eaBase >= insn->eaRegBase) { 861 insn->eaBase = (EABase)fixupRMValue( 862 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); 863 if (!valid) 864 return -1; 865 } 866 break; 867 } 868 869 return 0; 870 } 871 872 // Read the opcode (except the ModR/M byte in the case of extended or escape 873 // opcodes). 874 static bool readOpcode(struct InternalInstruction *insn) { 875 uint8_t current; 876 LLVM_DEBUG(dbgs() << "readOpcode()"); 877 878 insn->opcodeType = ONEBYTE; 879 if (insn->vectorExtensionType == TYPE_EVEX) { 880 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 881 default: 882 LLVM_DEBUG( 883 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)", 884 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1]))); 885 return true; 886 case VEX_LOB_0F: 887 insn->opcodeType = TWOBYTE; 888 return consume(insn, insn->opcode); 889 case VEX_LOB_0F38: 890 insn->opcodeType = THREEBYTE_38; 891 return consume(insn, insn->opcode); 892 case VEX_LOB_0F3A: 893 insn->opcodeType = THREEBYTE_3A; 894 return consume(insn, insn->opcode); 895 case VEX_LOB_MAP5: 896 insn->opcodeType = MAP5; 897 return consume(insn, insn->opcode); 898 case VEX_LOB_MAP6: 899 insn->opcodeType = MAP6; 900 return consume(insn, insn->opcode); 901 } 902 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 903 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 904 default: 905 LLVM_DEBUG( 906 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 907 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 908 return true; 909 case VEX_LOB_0F: 910 insn->opcodeType = TWOBYTE; 911 return consume(insn, insn->opcode); 912 case VEX_LOB_0F38: 913 insn->opcodeType = THREEBYTE_38; 914 return consume(insn, insn->opcode); 915 case VEX_LOB_0F3A: 916 insn->opcodeType = THREEBYTE_3A; 917 return consume(insn, insn->opcode); 918 case VEX_LOB_MAP5: 919 insn->opcodeType = MAP5; 920 return consume(insn, insn->opcode); 921 case VEX_LOB_MAP6: 922 insn->opcodeType = MAP6; 923 return consume(insn, insn->opcode); 924 } 925 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 926 insn->opcodeType = TWOBYTE; 927 return consume(insn, insn->opcode); 928 } else if (insn->vectorExtensionType == TYPE_XOP) { 929 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 930 default: 931 LLVM_DEBUG( 932 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 933 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 934 return true; 935 case XOP_MAP_SELECT_8: 936 insn->opcodeType = XOP8_MAP; 937 return consume(insn, insn->opcode); 938 case XOP_MAP_SELECT_9: 939 insn->opcodeType = XOP9_MAP; 940 return consume(insn, insn->opcode); 941 case XOP_MAP_SELECT_A: 942 insn->opcodeType = XOPA_MAP; 943 return consume(insn, insn->opcode); 944 } 945 } 946 947 if (consume(insn, current)) 948 return true; 949 950 if (current == 0x0f) { 951 LLVM_DEBUG( 952 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current)); 953 if (consume(insn, current)) 954 return true; 955 956 if (current == 0x38) { 957 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 958 current)); 959 if (consume(insn, current)) 960 return true; 961 962 insn->opcodeType = THREEBYTE_38; 963 } else if (current == 0x3a) { 964 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 965 current)); 966 if (consume(insn, current)) 967 return true; 968 969 insn->opcodeType = THREEBYTE_3A; 970 } else if (current == 0x0f) { 971 LLVM_DEBUG( 972 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current)); 973 974 // Consume operands before the opcode to comply with the 3DNow encoding 975 if (readModRM(insn)) 976 return true; 977 978 if (consume(insn, current)) 979 return true; 980 981 insn->opcodeType = THREEDNOW_MAP; 982 } else { 983 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix"); 984 insn->opcodeType = TWOBYTE; 985 } 986 } else if (insn->mandatoryPrefix) 987 // The opcode with mandatory prefix must start with opcode escape. 988 // If not it's legacy repeat prefix 989 insn->mandatoryPrefix = 0; 990 991 // At this point we have consumed the full opcode. 992 // Anything we consume from here on must be unconsumed. 993 insn->opcode = current; 994 995 return false; 996 } 997 998 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit). 999 static bool is16BitEquivalent(const char *orig, const char *equiv) { 1000 for (int i = 0;; i++) { 1001 if (orig[i] == '\0' && equiv[i] == '\0') 1002 return true; 1003 if (orig[i] == '\0' || equiv[i] == '\0') 1004 return false; 1005 if (orig[i] != equiv[i]) { 1006 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 1007 continue; 1008 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 1009 continue; 1010 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 1011 continue; 1012 return false; 1013 } 1014 } 1015 } 1016 1017 // Determine whether this instruction is a 64-bit instruction. 1018 static bool is64Bit(const char *name) { 1019 for (int i = 0;; ++i) { 1020 if (name[i] == '\0') 1021 return false; 1022 if (name[i] == '6' && name[i + 1] == '4') 1023 return true; 1024 } 1025 } 1026 1027 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1028 // for extended and escape opcodes, and using a supplied attribute mask. 1029 static int getInstructionIDWithAttrMask(uint16_t *instructionID, 1030 struct InternalInstruction *insn, 1031 uint16_t attrMask) { 1032 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]); 1033 const ContextDecision *decision; 1034 switch (insn->opcodeType) { 1035 case ONEBYTE: 1036 decision = &ONEBYTE_SYM; 1037 break; 1038 case TWOBYTE: 1039 decision = &TWOBYTE_SYM; 1040 break; 1041 case THREEBYTE_38: 1042 decision = &THREEBYTE38_SYM; 1043 break; 1044 case THREEBYTE_3A: 1045 decision = &THREEBYTE3A_SYM; 1046 break; 1047 case XOP8_MAP: 1048 decision = &XOP8_MAP_SYM; 1049 break; 1050 case XOP9_MAP: 1051 decision = &XOP9_MAP_SYM; 1052 break; 1053 case XOPA_MAP: 1054 decision = &XOPA_MAP_SYM; 1055 break; 1056 case THREEDNOW_MAP: 1057 decision = &THREEDNOW_MAP_SYM; 1058 break; 1059 case MAP5: 1060 decision = &MAP5_SYM; 1061 break; 1062 case MAP6: 1063 decision = &MAP6_SYM; 1064 break; 1065 } 1066 1067 if (decision->opcodeDecisions[insnCtx] 1068 .modRMDecisions[insn->opcode] 1069 .modrm_type != MODRM_ONEENTRY) { 1070 if (readModRM(insn)) 1071 return -1; 1072 *instructionID = 1073 decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM); 1074 } else { 1075 *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0); 1076 } 1077 1078 return 0; 1079 } 1080 1081 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1082 // for extended and escape opcodes. Determines the attributes and context for 1083 // the instruction before doing so. 1084 static int getInstructionID(struct InternalInstruction *insn, 1085 const MCInstrInfo *mii) { 1086 uint16_t attrMask; 1087 uint16_t instructionID; 1088 1089 LLVM_DEBUG(dbgs() << "getID()"); 1090 1091 attrMask = ATTR_NONE; 1092 1093 if (insn->mode == MODE_64BIT) 1094 attrMask |= ATTR_64BIT; 1095 1096 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1097 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 1098 1099 if (insn->vectorExtensionType == TYPE_EVEX) { 1100 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 1101 case VEX_PREFIX_66: 1102 attrMask |= ATTR_OPSIZE; 1103 break; 1104 case VEX_PREFIX_F3: 1105 attrMask |= ATTR_XS; 1106 break; 1107 case VEX_PREFIX_F2: 1108 attrMask |= ATTR_XD; 1109 break; 1110 } 1111 1112 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1113 attrMask |= ATTR_EVEXKZ; 1114 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1115 attrMask |= ATTR_EVEXB; 1116 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1117 attrMask |= ATTR_EVEXK; 1118 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1119 attrMask |= ATTR_VEXL; 1120 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1121 attrMask |= ATTR_EVEXL2; 1122 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 1123 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 1124 case VEX_PREFIX_66: 1125 attrMask |= ATTR_OPSIZE; 1126 break; 1127 case VEX_PREFIX_F3: 1128 attrMask |= ATTR_XS; 1129 break; 1130 case VEX_PREFIX_F2: 1131 attrMask |= ATTR_XD; 1132 break; 1133 } 1134 1135 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 1136 attrMask |= ATTR_VEXL; 1137 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 1138 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 1139 case VEX_PREFIX_66: 1140 attrMask |= ATTR_OPSIZE; 1141 if (insn->hasAdSize) 1142 attrMask |= ATTR_ADSIZE; 1143 break; 1144 case VEX_PREFIX_F3: 1145 attrMask |= ATTR_XS; 1146 break; 1147 case VEX_PREFIX_F2: 1148 attrMask |= ATTR_XD; 1149 break; 1150 } 1151 1152 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 1153 attrMask |= ATTR_VEXL; 1154 } else if (insn->vectorExtensionType == TYPE_XOP) { 1155 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 1156 case VEX_PREFIX_66: 1157 attrMask |= ATTR_OPSIZE; 1158 break; 1159 case VEX_PREFIX_F3: 1160 attrMask |= ATTR_XS; 1161 break; 1162 case VEX_PREFIX_F2: 1163 attrMask |= ATTR_XD; 1164 break; 1165 } 1166 1167 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 1168 attrMask |= ATTR_VEXL; 1169 } else { 1170 return -1; 1171 } 1172 } else if (!insn->mandatoryPrefix) { 1173 // If we don't have mandatory prefix we should use legacy prefixes here 1174 if (insn->hasOpSize && (insn->mode != MODE_16BIT)) 1175 attrMask |= ATTR_OPSIZE; 1176 if (insn->hasAdSize) 1177 attrMask |= ATTR_ADSIZE; 1178 if (insn->opcodeType == ONEBYTE) { 1179 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) 1180 // Special support for PAUSE 1181 attrMask |= ATTR_XS; 1182 } else { 1183 if (insn->repeatPrefix == 0xf2) 1184 attrMask |= ATTR_XD; 1185 else if (insn->repeatPrefix == 0xf3) 1186 attrMask |= ATTR_XS; 1187 } 1188 } else { 1189 switch (insn->mandatoryPrefix) { 1190 case 0xf2: 1191 attrMask |= ATTR_XD; 1192 break; 1193 case 0xf3: 1194 attrMask |= ATTR_XS; 1195 break; 1196 case 0x66: 1197 if (insn->mode != MODE_16BIT) 1198 attrMask |= ATTR_OPSIZE; 1199 if (insn->hasAdSize) 1200 attrMask |= ATTR_ADSIZE; 1201 break; 1202 case 0x67: 1203 attrMask |= ATTR_ADSIZE; 1204 break; 1205 } 1206 } 1207 1208 if (insn->rexPrefix & 0x08) { 1209 attrMask |= ATTR_REXW; 1210 attrMask &= ~ATTR_ADSIZE; 1211 } 1212 1213 if (insn->mode == MODE_16BIT) { 1214 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning 1215 // of the AdSize prefix is inverted w.r.t. 32-bit mode. 1216 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3) 1217 attrMask ^= ATTR_ADSIZE; 1218 // If we're in 16-bit mode and this is one of the relative jumps and opsize 1219 // prefix isn't present, we need to force the opsize attribute since the 1220 // prefix is inverted relative to 32-bit mode. 1221 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE && 1222 (insn->opcode == 0xE8 || insn->opcode == 0xE9)) 1223 attrMask |= ATTR_OPSIZE; 1224 1225 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE && 1226 insn->opcode >= 0x80 && insn->opcode <= 0x8F) 1227 attrMask |= ATTR_OPSIZE; 1228 } 1229 1230 1231 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1232 return -1; 1233 1234 // The following clauses compensate for limitations of the tables. 1235 1236 if (insn->mode != MODE_64BIT && 1237 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1238 // The tables can't distinquish between cases where the W-bit is used to 1239 // select register size and cases where its a required part of the opcode. 1240 if ((insn->vectorExtensionType == TYPE_EVEX && 1241 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1242 (insn->vectorExtensionType == TYPE_VEX_3B && 1243 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1244 (insn->vectorExtensionType == TYPE_XOP && 1245 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1246 1247 uint16_t instructionIDWithREXW; 1248 if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn, 1249 attrMask | ATTR_REXW)) { 1250 insn->instructionID = instructionID; 1251 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1252 return 0; 1253 } 1254 1255 auto SpecName = mii->getName(instructionIDWithREXW); 1256 // If not a 64-bit instruction. Switch the opcode. 1257 if (!is64Bit(SpecName.data())) { 1258 insn->instructionID = instructionIDWithREXW; 1259 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW]; 1260 return 0; 1261 } 1262 } 1263 } 1264 1265 // Absolute moves, umonitor, and movdir64b need special handling. 1266 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1267 // inverted w.r.t. 1268 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1269 // any position. 1270 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || 1271 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || 1272 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) { 1273 // Make sure we observed the prefixes in any position. 1274 if (insn->hasAdSize) 1275 attrMask |= ATTR_ADSIZE; 1276 if (insn->hasOpSize) 1277 attrMask |= ATTR_OPSIZE; 1278 1279 // In 16-bit, invert the attributes. 1280 if (insn->mode == MODE_16BIT) { 1281 attrMask ^= ATTR_ADSIZE; 1282 1283 // The OpSize attribute is only valid with the absolute moves. 1284 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) 1285 attrMask ^= ATTR_OPSIZE; 1286 } 1287 1288 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1289 return -1; 1290 1291 insn->instructionID = instructionID; 1292 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1293 return 0; 1294 } 1295 1296 if ((insn->mode == MODE_16BIT || insn->hasOpSize) && 1297 !(attrMask & ATTR_OPSIZE)) { 1298 // The instruction tables make no distinction between instructions that 1299 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1300 // particular spot (i.e., many MMX operations). In general we're 1301 // conservative, but in the specific case where OpSize is present but not in 1302 // the right place we check if there's a 16-bit operation. 1303 const struct InstructionSpecifier *spec; 1304 uint16_t instructionIDWithOpsize; 1305 llvm::StringRef specName, specWithOpSizeName; 1306 1307 spec = &INSTRUCTIONS_SYM[instructionID]; 1308 1309 if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn, 1310 attrMask | ATTR_OPSIZE)) { 1311 // ModRM required with OpSize but not present. Give up and return the 1312 // version without OpSize set. 1313 insn->instructionID = instructionID; 1314 insn->spec = spec; 1315 return 0; 1316 } 1317 1318 specName = mii->getName(instructionID); 1319 specWithOpSizeName = mii->getName(instructionIDWithOpsize); 1320 1321 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1322 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { 1323 insn->instructionID = instructionIDWithOpsize; 1324 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize]; 1325 } else { 1326 insn->instructionID = instructionID; 1327 insn->spec = spec; 1328 } 1329 return 0; 1330 } 1331 1332 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1333 insn->rexPrefix & 0x01) { 1334 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode 1335 // as XCHG %r8, %eax. 1336 const struct InstructionSpecifier *spec; 1337 uint16_t instructionIDWithNewOpcode; 1338 const struct InstructionSpecifier *specWithNewOpcode; 1339 1340 spec = &INSTRUCTIONS_SYM[instructionID]; 1341 1342 // Borrow opcode from one of the other XCHGar opcodes 1343 insn->opcode = 0x91; 1344 1345 if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn, 1346 attrMask)) { 1347 insn->opcode = 0x90; 1348 1349 insn->instructionID = instructionID; 1350 insn->spec = spec; 1351 return 0; 1352 } 1353 1354 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode]; 1355 1356 // Change back 1357 insn->opcode = 0x90; 1358 1359 insn->instructionID = instructionIDWithNewOpcode; 1360 insn->spec = specWithNewOpcode; 1361 1362 return 0; 1363 } 1364 1365 insn->instructionID = instructionID; 1366 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID]; 1367 1368 return 0; 1369 } 1370 1371 // Read an operand from the opcode field of an instruction and interprets it 1372 // appropriately given the operand width. Handles AddRegFrm instructions. 1373 // 1374 // @param insn - the instruction whose opcode field is to be read. 1375 // @param size - The width (in bytes) of the register being specified. 1376 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1377 // RAX. 1378 // @return - 0 on success; nonzero otherwise. 1379 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { 1380 LLVM_DEBUG(dbgs() << "readOpcodeRegister()"); 1381 1382 if (size == 0) 1383 size = insn->registerSize; 1384 1385 switch (size) { 1386 case 1: 1387 insn->opcodeRegister = (Reg)( 1388 MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1389 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1390 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1391 insn->opcodeRegister = 1392 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1393 } 1394 1395 break; 1396 case 2: 1397 insn->opcodeRegister = (Reg)( 1398 MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1399 break; 1400 case 4: 1401 insn->opcodeRegister = 1402 (Reg)(MODRM_REG_EAX + 1403 ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1404 break; 1405 case 8: 1406 insn->opcodeRegister = 1407 (Reg)(MODRM_REG_RAX + 1408 ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1409 break; 1410 } 1411 1412 return 0; 1413 } 1414 1415 // Consume an immediate operand from an instruction, given the desired operand 1416 // size. 1417 // 1418 // @param insn - The instruction whose operand is to be read. 1419 // @param size - The width (in bytes) of the operand. 1420 // @return - 0 if the immediate was successfully consumed; nonzero 1421 // otherwise. 1422 static int readImmediate(struct InternalInstruction *insn, uint8_t size) { 1423 uint8_t imm8; 1424 uint16_t imm16; 1425 uint32_t imm32; 1426 uint64_t imm64; 1427 1428 LLVM_DEBUG(dbgs() << "readImmediate()"); 1429 1430 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates"); 1431 1432 insn->immediateSize = size; 1433 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1434 1435 switch (size) { 1436 case 1: 1437 if (consume(insn, imm8)) 1438 return -1; 1439 insn->immediates[insn->numImmediatesConsumed] = imm8; 1440 break; 1441 case 2: 1442 if (consume(insn, imm16)) 1443 return -1; 1444 insn->immediates[insn->numImmediatesConsumed] = imm16; 1445 break; 1446 case 4: 1447 if (consume(insn, imm32)) 1448 return -1; 1449 insn->immediates[insn->numImmediatesConsumed] = imm32; 1450 break; 1451 case 8: 1452 if (consume(insn, imm64)) 1453 return -1; 1454 insn->immediates[insn->numImmediatesConsumed] = imm64; 1455 break; 1456 default: 1457 llvm_unreachable("invalid size"); 1458 } 1459 1460 insn->numImmediatesConsumed++; 1461 1462 return 0; 1463 } 1464 1465 // Consume vvvv from an instruction if it has a VEX prefix. 1466 static int readVVVV(struct InternalInstruction *insn) { 1467 LLVM_DEBUG(dbgs() << "readVVVV()"); 1468 1469 int vvvv; 1470 if (insn->vectorExtensionType == TYPE_EVEX) 1471 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1472 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1473 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1474 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1475 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1476 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1477 else if (insn->vectorExtensionType == TYPE_XOP) 1478 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1479 else 1480 return -1; 1481 1482 if (insn->mode != MODE_64BIT) 1483 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later. 1484 1485 insn->vvvv = static_cast<Reg>(vvvv); 1486 return 0; 1487 } 1488 1489 // Read an mask register from the opcode field of an instruction. 1490 // 1491 // @param insn - The instruction whose opcode field is to be read. 1492 // @return - 0 on success; nonzero otherwise. 1493 static int readMaskRegister(struct InternalInstruction *insn) { 1494 LLVM_DEBUG(dbgs() << "readMaskRegister()"); 1495 1496 if (insn->vectorExtensionType != TYPE_EVEX) 1497 return -1; 1498 1499 insn->writemask = 1500 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1501 return 0; 1502 } 1503 1504 // Consults the specifier for an instruction and consumes all 1505 // operands for that instruction, interpreting them as it goes. 1506 static int readOperands(struct InternalInstruction *insn) { 1507 int hasVVVV, needVVVV; 1508 int sawRegImm = 0; 1509 1510 LLVM_DEBUG(dbgs() << "readOperands()"); 1511 1512 // If non-zero vvvv specified, make sure one of the operands uses it. 1513 hasVVVV = !readVVVV(insn); 1514 needVVVV = hasVVVV && (insn->vvvv != 0); 1515 1516 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1517 switch (Op.encoding) { 1518 case ENCODING_NONE: 1519 case ENCODING_SI: 1520 case ENCODING_DI: 1521 break; 1522 CASE_ENCODING_VSIB: 1523 // VSIB can use the V2 bit so check only the other bits. 1524 if (needVVVV) 1525 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1526 if (readModRM(insn)) 1527 return -1; 1528 1529 // Reject if SIB wasn't used. 1530 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1531 return -1; 1532 1533 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. 1534 if (insn->sibIndex == SIB_INDEX_NONE) 1535 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4); 1536 1537 // If EVEX.v2 is set this is one of the 16-31 registers. 1538 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 1539 v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1540 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); 1541 1542 // Adjust the index register to the correct size. 1543 switch ((OperandType)Op.type) { 1544 default: 1545 debug("Unhandled VSIB index type"); 1546 return -1; 1547 case TYPE_MVSIBX: 1548 insn->sibIndex = 1549 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase)); 1550 break; 1551 case TYPE_MVSIBY: 1552 insn->sibIndex = 1553 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase)); 1554 break; 1555 case TYPE_MVSIBZ: 1556 insn->sibIndex = 1557 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase)); 1558 break; 1559 } 1560 1561 // Apply the AVX512 compressed displacement scaling factor. 1562 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1563 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1564 break; 1565 case ENCODING_SIB: 1566 // Reject if SIB wasn't used. 1567 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1568 return -1; 1569 if (readModRM(insn)) 1570 return -1; 1571 if (fixupReg(insn, &Op)) 1572 return -1; 1573 break; 1574 case ENCODING_REG: 1575 CASE_ENCODING_RM: 1576 if (readModRM(insn)) 1577 return -1; 1578 if (fixupReg(insn, &Op)) 1579 return -1; 1580 // Apply the AVX512 compressed displacement scaling factor. 1581 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1582 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1583 break; 1584 case ENCODING_IB: 1585 if (sawRegImm) { 1586 // Saw a register immediate so don't read again and instead split the 1587 // previous immediate. FIXME: This is a hack. 1588 insn->immediates[insn->numImmediatesConsumed] = 1589 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1590 ++insn->numImmediatesConsumed; 1591 break; 1592 } 1593 if (readImmediate(insn, 1)) 1594 return -1; 1595 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1596 sawRegImm = 1; 1597 break; 1598 case ENCODING_IW: 1599 if (readImmediate(insn, 2)) 1600 return -1; 1601 break; 1602 case ENCODING_ID: 1603 if (readImmediate(insn, 4)) 1604 return -1; 1605 break; 1606 case ENCODING_IO: 1607 if (readImmediate(insn, 8)) 1608 return -1; 1609 break; 1610 case ENCODING_Iv: 1611 if (readImmediate(insn, insn->immediateSize)) 1612 return -1; 1613 break; 1614 case ENCODING_Ia: 1615 if (readImmediate(insn, insn->addressSize)) 1616 return -1; 1617 break; 1618 case ENCODING_IRC: 1619 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | 1620 lFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1621 break; 1622 case ENCODING_RB: 1623 if (readOpcodeRegister(insn, 1)) 1624 return -1; 1625 break; 1626 case ENCODING_RW: 1627 if (readOpcodeRegister(insn, 2)) 1628 return -1; 1629 break; 1630 case ENCODING_RD: 1631 if (readOpcodeRegister(insn, 4)) 1632 return -1; 1633 break; 1634 case ENCODING_RO: 1635 if (readOpcodeRegister(insn, 8)) 1636 return -1; 1637 break; 1638 case ENCODING_Rv: 1639 if (readOpcodeRegister(insn, 0)) 1640 return -1; 1641 break; 1642 case ENCODING_CC: 1643 insn->immediates[1] = insn->opcode & 0xf; 1644 break; 1645 case ENCODING_FP: 1646 break; 1647 case ENCODING_VVVV: 1648 needVVVV = 0; // Mark that we have found a VVVV operand. 1649 if (!hasVVVV) 1650 return -1; 1651 if (insn->mode != MODE_64BIT) 1652 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7); 1653 if (fixupReg(insn, &Op)) 1654 return -1; 1655 break; 1656 case ENCODING_WRITEMASK: 1657 if (readMaskRegister(insn)) 1658 return -1; 1659 break; 1660 case ENCODING_DUP: 1661 break; 1662 default: 1663 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding."); 1664 return -1; 1665 } 1666 } 1667 1668 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail 1669 if (needVVVV) 1670 return -1; 1671 1672 return 0; 1673 } 1674 1675 namespace llvm { 1676 1677 // Fill-ins to make the compiler happy. These constants are never actually 1678 // assigned; they are just filler to make an automatically-generated switch 1679 // statement work. 1680 namespace X86 { 1681 enum { 1682 BX_SI = 500, 1683 BX_DI = 501, 1684 BP_SI = 502, 1685 BP_DI = 503, 1686 sib = 504, 1687 sib64 = 505 1688 }; 1689 } // namespace X86 1690 1691 } // namespace llvm 1692 1693 static bool translateInstruction(MCInst &target, 1694 InternalInstruction &source, 1695 const MCDisassembler *Dis); 1696 1697 namespace { 1698 1699 /// Generic disassembler for all X86 platforms. All each platform class should 1700 /// have to do is subclass the constructor, and provide a different 1701 /// disassemblerMode value. 1702 class X86GenericDisassembler : public MCDisassembler { 1703 std::unique_ptr<const MCInstrInfo> MII; 1704 public: 1705 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 1706 std::unique_ptr<const MCInstrInfo> MII); 1707 public: 1708 DecodeStatus getInstruction(MCInst &instr, uint64_t &size, 1709 ArrayRef<uint8_t> Bytes, uint64_t Address, 1710 raw_ostream &cStream) const override; 1711 1712 private: 1713 DisassemblerMode fMode; 1714 }; 1715 1716 } // namespace 1717 1718 X86GenericDisassembler::X86GenericDisassembler( 1719 const MCSubtargetInfo &STI, 1720 MCContext &Ctx, 1721 std::unique_ptr<const MCInstrInfo> MII) 1722 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 1723 const FeatureBitset &FB = STI.getFeatureBits(); 1724 if (FB[X86::Is16Bit]) { 1725 fMode = MODE_16BIT; 1726 return; 1727 } else if (FB[X86::Is32Bit]) { 1728 fMode = MODE_32BIT; 1729 return; 1730 } else if (FB[X86::Is64Bit]) { 1731 fMode = MODE_64BIT; 1732 return; 1733 } 1734 1735 llvm_unreachable("Invalid CPU mode"); 1736 } 1737 1738 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 1739 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 1740 raw_ostream &CStream) const { 1741 CommentStream = &CStream; 1742 1743 InternalInstruction Insn; 1744 memset(&Insn, 0, sizeof(InternalInstruction)); 1745 Insn.bytes = Bytes; 1746 Insn.startLocation = Address; 1747 Insn.readerCursor = Address; 1748 Insn.mode = fMode; 1749 1750 if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) || 1751 getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 || 1752 readOperands(&Insn)) { 1753 Size = Insn.readerCursor - Address; 1754 return Fail; 1755 } 1756 1757 Insn.operands = x86OperandSets[Insn.spec->operands]; 1758 Insn.length = Insn.readerCursor - Insn.startLocation; 1759 Size = Insn.length; 1760 if (Size > 15) 1761 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit"); 1762 1763 bool Ret = translateInstruction(Instr, Insn, this); 1764 if (!Ret) { 1765 unsigned Flags = X86::IP_NO_PREFIX; 1766 if (Insn.hasAdSize) 1767 Flags |= X86::IP_HAS_AD_SIZE; 1768 if (!Insn.mandatoryPrefix) { 1769 if (Insn.hasOpSize) 1770 Flags |= X86::IP_HAS_OP_SIZE; 1771 if (Insn.repeatPrefix == 0xf2) 1772 Flags |= X86::IP_HAS_REPEAT_NE; 1773 else if (Insn.repeatPrefix == 0xf3 && 1774 // It should not be 'pause' f3 90 1775 Insn.opcode != 0x90) 1776 Flags |= X86::IP_HAS_REPEAT; 1777 if (Insn.hasLockPrefix) 1778 Flags |= X86::IP_HAS_LOCK; 1779 } 1780 Instr.setFlags(Flags); 1781 } 1782 return (!Ret) ? Success : Fail; 1783 } 1784 1785 // 1786 // Private code that translates from struct InternalInstructions to MCInsts. 1787 // 1788 1789 /// translateRegister - Translates an internal register to the appropriate LLVM 1790 /// register, and appends it as an operand to an MCInst. 1791 /// 1792 /// @param mcInst - The MCInst to append to. 1793 /// @param reg - The Reg to append. 1794 static void translateRegister(MCInst &mcInst, Reg reg) { 1795 #define ENTRY(x) X86::x, 1796 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; 1797 #undef ENTRY 1798 1799 MCPhysReg llvmRegnum = llvmRegnums[reg]; 1800 mcInst.addOperand(MCOperand::createReg(llvmRegnum)); 1801 } 1802 1803 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 1804 0, // SEG_OVERRIDE_NONE 1805 X86::CS, 1806 X86::SS, 1807 X86::DS, 1808 X86::ES, 1809 X86::FS, 1810 X86::GS 1811 }; 1812 1813 /// translateSrcIndex - Appends a source index operand to an MCInst. 1814 /// 1815 /// @param mcInst - The MCInst to append to. 1816 /// @param insn - The internal instruction. 1817 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 1818 unsigned baseRegNo; 1819 1820 if (insn.mode == MODE_64BIT) 1821 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; 1822 else if (insn.mode == MODE_32BIT) 1823 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; 1824 else { 1825 assert(insn.mode == MODE_16BIT); 1826 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; 1827 } 1828 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1829 mcInst.addOperand(baseReg); 1830 1831 MCOperand segmentReg; 1832 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 1833 mcInst.addOperand(segmentReg); 1834 return false; 1835 } 1836 1837 /// translateDstIndex - Appends a destination index operand to an MCInst. 1838 /// 1839 /// @param mcInst - The MCInst to append to. 1840 /// @param insn - The internal instruction. 1841 1842 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 1843 unsigned baseRegNo; 1844 1845 if (insn.mode == MODE_64BIT) 1846 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; 1847 else if (insn.mode == MODE_32BIT) 1848 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; 1849 else { 1850 assert(insn.mode == MODE_16BIT); 1851 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; 1852 } 1853 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1854 mcInst.addOperand(baseReg); 1855 return false; 1856 } 1857 1858 /// translateImmediate - Appends an immediate operand to an MCInst. 1859 /// 1860 /// @param mcInst - The MCInst to append to. 1861 /// @param immediate - The immediate value to append. 1862 /// @param operand - The operand, as stored in the descriptor table. 1863 /// @param insn - The internal instruction. 1864 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 1865 const OperandSpecifier &operand, 1866 InternalInstruction &insn, 1867 const MCDisassembler *Dis) { 1868 // Sign-extend the immediate if necessary. 1869 1870 OperandType type = (OperandType)operand.type; 1871 1872 bool isBranch = false; 1873 uint64_t pcrel = 0; 1874 if (type == TYPE_REL) { 1875 isBranch = true; 1876 pcrel = insn.startLocation + insn.length; 1877 switch (operand.encoding) { 1878 default: 1879 break; 1880 case ENCODING_Iv: 1881 switch (insn.displacementSize) { 1882 default: 1883 break; 1884 case 1: 1885 if(immediate & 0x80) 1886 immediate |= ~(0xffull); 1887 break; 1888 case 2: 1889 if(immediate & 0x8000) 1890 immediate |= ~(0xffffull); 1891 break; 1892 case 4: 1893 if(immediate & 0x80000000) 1894 immediate |= ~(0xffffffffull); 1895 break; 1896 case 8: 1897 break; 1898 } 1899 break; 1900 case ENCODING_IB: 1901 if(immediate & 0x80) 1902 immediate |= ~(0xffull); 1903 break; 1904 case ENCODING_IW: 1905 if(immediate & 0x8000) 1906 immediate |= ~(0xffffull); 1907 break; 1908 case ENCODING_ID: 1909 if(immediate & 0x80000000) 1910 immediate |= ~(0xffffffffull); 1911 break; 1912 } 1913 } 1914 // By default sign-extend all X86 immediates based on their encoding. 1915 else if (type == TYPE_IMM) { 1916 switch (operand.encoding) { 1917 default: 1918 break; 1919 case ENCODING_IB: 1920 if(immediate & 0x80) 1921 immediate |= ~(0xffull); 1922 break; 1923 case ENCODING_IW: 1924 if(immediate & 0x8000) 1925 immediate |= ~(0xffffull); 1926 break; 1927 case ENCODING_ID: 1928 if(immediate & 0x80000000) 1929 immediate |= ~(0xffffffffull); 1930 break; 1931 case ENCODING_IO: 1932 break; 1933 } 1934 } 1935 1936 switch (type) { 1937 case TYPE_XMM: 1938 mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4))); 1939 return; 1940 case TYPE_YMM: 1941 mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4))); 1942 return; 1943 case TYPE_ZMM: 1944 mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); 1945 return; 1946 default: 1947 // operand is 64 bits wide. Do nothing. 1948 break; 1949 } 1950 1951 if (!Dis->tryAddingSymbolicOperand( 1952 mcInst, immediate + pcrel, insn.startLocation, isBranch, 1953 insn.immediateOffset, insn.immediateSize, insn.length)) 1954 mcInst.addOperand(MCOperand::createImm(immediate)); 1955 1956 if (type == TYPE_MOFFS) { 1957 MCOperand segmentReg; 1958 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 1959 mcInst.addOperand(segmentReg); 1960 } 1961 } 1962 1963 /// translateRMRegister - Translates a register stored in the R/M field of the 1964 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 1965 /// @param mcInst - The MCInst to append to. 1966 /// @param insn - The internal instruction to extract the R/M field 1967 /// from. 1968 /// @return - 0 on success; -1 otherwise 1969 static bool translateRMRegister(MCInst &mcInst, 1970 InternalInstruction &insn) { 1971 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 1972 debug("A R/M register operand may not have a SIB byte"); 1973 return true; 1974 } 1975 1976 switch (insn.eaBase) { 1977 default: 1978 debug("Unexpected EA base register"); 1979 return true; 1980 case EA_BASE_NONE: 1981 debug("EA_BASE_NONE for ModR/M base"); 1982 return true; 1983 #define ENTRY(x) case EA_BASE_##x: 1984 ALL_EA_BASES 1985 #undef ENTRY 1986 debug("A R/M register operand may not have a base; " 1987 "the operand must be a register."); 1988 return true; 1989 #define ENTRY(x) \ 1990 case EA_REG_##x: \ 1991 mcInst.addOperand(MCOperand::createReg(X86::x)); break; 1992 ALL_REGS 1993 #undef ENTRY 1994 } 1995 1996 return false; 1997 } 1998 1999 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 2000 /// fields of an internal instruction (and possibly its SIB byte) to a memory 2001 /// operand in LLVM's format, and appends it to an MCInst. 2002 /// 2003 /// @param mcInst - The MCInst to append to. 2004 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2005 /// from. 2006 /// @param ForceSIB - The instruction must use SIB. 2007 /// @return - 0 on success; nonzero otherwise 2008 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 2009 const MCDisassembler *Dis, 2010 bool ForceSIB = false) { 2011 // Addresses in an MCInst are represented as five operands: 2012 // 1. basereg (register) The R/M base, or (if there is a SIB) the 2013 // SIB base 2014 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 2015 // scale amount 2016 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 2017 // the index (which is multiplied by the 2018 // scale amount) 2019 // 4. displacement (immediate) 0, or the displacement if there is one 2020 // 5. segmentreg (register) x86_registerNONE for now, but could be set 2021 // if we have segment overrides 2022 2023 MCOperand baseReg; 2024 MCOperand scaleAmount; 2025 MCOperand indexReg; 2026 MCOperand displacement; 2027 MCOperand segmentReg; 2028 uint64_t pcrel = 0; 2029 2030 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2031 if (insn.sibBase != SIB_BASE_NONE) { 2032 switch (insn.sibBase) { 2033 default: 2034 debug("Unexpected sibBase"); 2035 return true; 2036 #define ENTRY(x) \ 2037 case SIB_BASE_##x: \ 2038 baseReg = MCOperand::createReg(X86::x); break; 2039 ALL_SIB_BASES 2040 #undef ENTRY 2041 } 2042 } else { 2043 baseReg = MCOperand::createReg(X86::NoRegister); 2044 } 2045 2046 if (insn.sibIndex != SIB_INDEX_NONE) { 2047 switch (insn.sibIndex) { 2048 default: 2049 debug("Unexpected sibIndex"); 2050 return true; 2051 #define ENTRY(x) \ 2052 case SIB_INDEX_##x: \ 2053 indexReg = MCOperand::createReg(X86::x); break; 2054 EA_BASES_32BIT 2055 EA_BASES_64BIT 2056 REGS_XMM 2057 REGS_YMM 2058 REGS_ZMM 2059 #undef ENTRY 2060 } 2061 } else { 2062 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present, 2063 // but no index is used and modrm alone should have been enough. 2064 // -No base register in 32-bit mode. In 64-bit mode this is used to 2065 // avoid rip-relative addressing. 2066 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a 2067 // base always requires a SIB byte. 2068 // -A scale other than 1 is used. 2069 if (!ForceSIB && 2070 (insn.sibScale != 1 || 2071 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || 2072 (insn.sibBase != SIB_BASE_NONE && 2073 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && 2074 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) { 2075 indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : 2076 X86::RIZ); 2077 } else 2078 indexReg = MCOperand::createReg(X86::NoRegister); 2079 } 2080 2081 scaleAmount = MCOperand::createImm(insn.sibScale); 2082 } else { 2083 switch (insn.eaBase) { 2084 case EA_BASE_NONE: 2085 if (insn.eaDisplacement == EA_DISP_NONE) { 2086 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 2087 return true; 2088 } 2089 if (insn.mode == MODE_64BIT){ 2090 pcrel = insn.startLocation + insn.length; 2091 Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, 2092 insn.startLocation + 2093 insn.displacementOffset); 2094 // Section 2.2.1.6 2095 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : 2096 X86::RIP); 2097 } 2098 else 2099 baseReg = MCOperand::createReg(X86::NoRegister); 2100 2101 indexReg = MCOperand::createReg(X86::NoRegister); 2102 break; 2103 case EA_BASE_BX_SI: 2104 baseReg = MCOperand::createReg(X86::BX); 2105 indexReg = MCOperand::createReg(X86::SI); 2106 break; 2107 case EA_BASE_BX_DI: 2108 baseReg = MCOperand::createReg(X86::BX); 2109 indexReg = MCOperand::createReg(X86::DI); 2110 break; 2111 case EA_BASE_BP_SI: 2112 baseReg = MCOperand::createReg(X86::BP); 2113 indexReg = MCOperand::createReg(X86::SI); 2114 break; 2115 case EA_BASE_BP_DI: 2116 baseReg = MCOperand::createReg(X86::BP); 2117 indexReg = MCOperand::createReg(X86::DI); 2118 break; 2119 default: 2120 indexReg = MCOperand::createReg(X86::NoRegister); 2121 switch (insn.eaBase) { 2122 default: 2123 debug("Unexpected eaBase"); 2124 return true; 2125 // Here, we will use the fill-ins defined above. However, 2126 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 2127 // sib and sib64 were handled in the top-level if, so they're only 2128 // placeholders to keep the compiler happy. 2129 #define ENTRY(x) \ 2130 case EA_BASE_##x: \ 2131 baseReg = MCOperand::createReg(X86::x); break; 2132 ALL_EA_BASES 2133 #undef ENTRY 2134 #define ENTRY(x) case EA_REG_##x: 2135 ALL_REGS 2136 #undef ENTRY 2137 debug("A R/M memory operand may not be a register; " 2138 "the base field must be a base."); 2139 return true; 2140 } 2141 } 2142 2143 scaleAmount = MCOperand::createImm(1); 2144 } 2145 2146 displacement = MCOperand::createImm(insn.displacement); 2147 2148 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2149 2150 mcInst.addOperand(baseReg); 2151 mcInst.addOperand(scaleAmount); 2152 mcInst.addOperand(indexReg); 2153 2154 const uint8_t dispSize = 2155 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; 2156 2157 if (!Dis->tryAddingSymbolicOperand( 2158 mcInst, insn.displacement + pcrel, insn.startLocation, false, 2159 insn.displacementOffset, dispSize, insn.length)) 2160 mcInst.addOperand(displacement); 2161 mcInst.addOperand(segmentReg); 2162 return false; 2163 } 2164 2165 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 2166 /// byte of an instruction to LLVM form, and appends it to an MCInst. 2167 /// 2168 /// @param mcInst - The MCInst to append to. 2169 /// @param operand - The operand, as stored in the descriptor table. 2170 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2171 /// from. 2172 /// @return - 0 on success; nonzero otherwise 2173 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 2174 InternalInstruction &insn, const MCDisassembler *Dis) { 2175 switch (operand.type) { 2176 default: 2177 debug("Unexpected type for a R/M operand"); 2178 return true; 2179 case TYPE_R8: 2180 case TYPE_R16: 2181 case TYPE_R32: 2182 case TYPE_R64: 2183 case TYPE_Rv: 2184 case TYPE_MM64: 2185 case TYPE_XMM: 2186 case TYPE_YMM: 2187 case TYPE_ZMM: 2188 case TYPE_TMM: 2189 case TYPE_VK_PAIR: 2190 case TYPE_VK: 2191 case TYPE_DEBUGREG: 2192 case TYPE_CONTROLREG: 2193 case TYPE_BNDR: 2194 return translateRMRegister(mcInst, insn); 2195 case TYPE_M: 2196 case TYPE_MVSIBX: 2197 case TYPE_MVSIBY: 2198 case TYPE_MVSIBZ: 2199 return translateRMMemory(mcInst, insn, Dis); 2200 case TYPE_MSIB: 2201 return translateRMMemory(mcInst, insn, Dis, true); 2202 } 2203 } 2204 2205 /// translateFPRegister - Translates a stack position on the FPU stack to its 2206 /// LLVM form, and appends it to an MCInst. 2207 /// 2208 /// @param mcInst - The MCInst to append to. 2209 /// @param stackPos - The stack position to translate. 2210 static void translateFPRegister(MCInst &mcInst, 2211 uint8_t stackPos) { 2212 mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos)); 2213 } 2214 2215 /// translateMaskRegister - Translates a 3-bit mask register number to 2216 /// LLVM form, and appends it to an MCInst. 2217 /// 2218 /// @param mcInst - The MCInst to append to. 2219 /// @param maskRegNum - Number of mask register from 0 to 7. 2220 /// @return - false on success; true otherwise. 2221 static bool translateMaskRegister(MCInst &mcInst, 2222 uint8_t maskRegNum) { 2223 if (maskRegNum >= 8) { 2224 debug("Invalid mask register number"); 2225 return true; 2226 } 2227 2228 mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum)); 2229 return false; 2230 } 2231 2232 /// translateOperand - Translates an operand stored in an internal instruction 2233 /// to LLVM's format and appends it to an MCInst. 2234 /// 2235 /// @param mcInst - The MCInst to append to. 2236 /// @param operand - The operand, as stored in the descriptor table. 2237 /// @param insn - The internal instruction. 2238 /// @return - false on success; true otherwise. 2239 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 2240 InternalInstruction &insn, 2241 const MCDisassembler *Dis) { 2242 switch (operand.encoding) { 2243 default: 2244 debug("Unhandled operand encoding during translation"); 2245 return true; 2246 case ENCODING_REG: 2247 translateRegister(mcInst, insn.reg); 2248 return false; 2249 case ENCODING_WRITEMASK: 2250 return translateMaskRegister(mcInst, insn.writemask); 2251 case ENCODING_SIB: 2252 CASE_ENCODING_RM: 2253 CASE_ENCODING_VSIB: 2254 return translateRM(mcInst, operand, insn, Dis); 2255 case ENCODING_IB: 2256 case ENCODING_IW: 2257 case ENCODING_ID: 2258 case ENCODING_IO: 2259 case ENCODING_Iv: 2260 case ENCODING_Ia: 2261 translateImmediate(mcInst, 2262 insn.immediates[insn.numImmediatesTranslated++], 2263 operand, 2264 insn, 2265 Dis); 2266 return false; 2267 case ENCODING_IRC: 2268 mcInst.addOperand(MCOperand::createImm(insn.RC)); 2269 return false; 2270 case ENCODING_SI: 2271 return translateSrcIndex(mcInst, insn); 2272 case ENCODING_DI: 2273 return translateDstIndex(mcInst, insn); 2274 case ENCODING_RB: 2275 case ENCODING_RW: 2276 case ENCODING_RD: 2277 case ENCODING_RO: 2278 case ENCODING_Rv: 2279 translateRegister(mcInst, insn.opcodeRegister); 2280 return false; 2281 case ENCODING_CC: 2282 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2283 return false; 2284 case ENCODING_FP: 2285 translateFPRegister(mcInst, insn.modRM & 7); 2286 return false; 2287 case ENCODING_VVVV: 2288 translateRegister(mcInst, insn.vvvv); 2289 return false; 2290 case ENCODING_DUP: 2291 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 2292 insn, Dis); 2293 } 2294 } 2295 2296 /// translateInstruction - Translates an internal instruction and all its 2297 /// operands to an MCInst. 2298 /// 2299 /// @param mcInst - The MCInst to populate with the instruction's data. 2300 /// @param insn - The internal instruction. 2301 /// @return - false on success; true otherwise. 2302 static bool translateInstruction(MCInst &mcInst, 2303 InternalInstruction &insn, 2304 const MCDisassembler *Dis) { 2305 if (!insn.spec) { 2306 debug("Instruction has no specification"); 2307 return true; 2308 } 2309 2310 mcInst.clear(); 2311 mcInst.setOpcode(insn.instructionID); 2312 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 2313 // prefix bytes should be disassembled as xrelease and xacquire then set the 2314 // opcode to those instead of the rep and repne opcodes. 2315 if (insn.xAcquireRelease) { 2316 if(mcInst.getOpcode() == X86::REP_PREFIX) 2317 mcInst.setOpcode(X86::XRELEASE_PREFIX); 2318 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 2319 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 2320 } 2321 2322 insn.numImmediatesTranslated = 0; 2323 2324 for (const auto &Op : insn.operands) { 2325 if (Op.encoding != ENCODING_NONE) { 2326 if (translateOperand(mcInst, Op, insn, Dis)) { 2327 return true; 2328 } 2329 } 2330 } 2331 2332 return false; 2333 } 2334 2335 static MCDisassembler *createX86Disassembler(const Target &T, 2336 const MCSubtargetInfo &STI, 2337 MCContext &Ctx) { 2338 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 2339 return new X86GenericDisassembler(STI, Ctx, std::move(MII)); 2340 } 2341 2342 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() { 2343 // Register the disassembler. 2344 TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(), 2345 createX86Disassembler); 2346 TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(), 2347 createX86Disassembler); 2348 } 2349