1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is part of the X86 Disassembler. 10 // It contains code to translate the data produced by the decoder into 11 // MCInsts. 12 // 13 // 14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and 15 // 64-bit X86 instruction sets. The main decode sequence for an assembly 16 // instruction in this disassembler is: 17 // 18 // 1. Read the prefix bytes and determine the attributes of the instruction. 19 // These attributes, recorded in enum attributeBits 20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM 21 // provides a mapping from bitmasks to contexts, which are represented by 22 // enum InstructionContext (ibid.). 23 // 24 // 2. Read the opcode, and determine what kind of opcode it is. The 25 // disassembler distinguishes four kinds of opcodes, which are enumerated in 26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte 27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. 29 // 30 // 3. Depending on the opcode type, look in one of four ClassDecision structures 31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which 32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get 33 // a ModRMDecision (ibid.). 34 // 35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even 36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the 37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from 38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the 39 // ModR/M byte is required and how to interpret it. 40 // 41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID 42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in 43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and 44 // meanings of its operands. 45 // 46 // 6. For each operand, its encoding is an entry from OperandEncoding 47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from 48 // OperandType (ibid.). The encoding indicates how to read it from the 49 // instruction; the type indicates how to interpret the value once it has 50 // been read. For example, a register operand could be stored in the R/M 51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to 52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM 53 // register, for instance). Given this information, the operands can be 54 // extracted and interpreted. 55 // 56 // 7. As the last step, the disassembler translates the instruction information 57 // and operands into a format understandable by the client - in this case, an 58 // MCInst for use by the MC infrastructure. 59 // 60 // The disassembler is broken broadly into two parts: the table emitter that 61 // emits the instruction decode tables discussed above during compilation, and 62 // the disassembler itself. The table emitter is documented in more detail in 63 // utils/TableGen/X86DisassemblerEmitter.h. 64 // 65 // X86Disassembler.cpp contains the code responsible for step 7, and for 66 // invoking the decoder to execute steps 1-6. 67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the 68 // table emitter and the disassembler. 69 // X86DisassemblerDecoder.h contains the public interface of the decoder, 70 // factored out into C for possible use by other projects. 71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is 72 // responsible for steps 1-6. 73 // 74 //===----------------------------------------------------------------------===// 75 76 #include "MCTargetDesc/X86BaseInfo.h" 77 #include "MCTargetDesc/X86MCTargetDesc.h" 78 #include "TargetInfo/X86TargetInfo.h" 79 #include "X86DisassemblerDecoder.h" 80 #include "llvm/MC/MCContext.h" 81 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 82 #include "llvm/MC/MCExpr.h" 83 #include "llvm/MC/MCInst.h" 84 #include "llvm/MC/MCInstrInfo.h" 85 #include "llvm/MC/MCSubtargetInfo.h" 86 #include "llvm/MC/TargetRegistry.h" 87 #include "llvm/Support/Debug.h" 88 #include "llvm/Support/Format.h" 89 #include "llvm/Support/raw_ostream.h" 90 91 using namespace llvm; 92 using namespace llvm::X86Disassembler; 93 94 #define DEBUG_TYPE "x86-disassembler" 95 96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s); 97 98 // Specifies whether a ModR/M byte is needed and (if so) which 99 // instruction each possible value of the ModR/M byte corresponds to. Once 100 // this information is known, we have narrowed down to a single instruction. 101 struct ModRMDecision { 102 uint8_t modrm_type; 103 uint16_t instructionIDs; 104 }; 105 106 // Specifies which set of ModR/M->instruction tables to look at 107 // given a particular opcode. 108 struct OpcodeDecision { 109 ModRMDecision modRMDecisions[256]; 110 }; 111 112 // Specifies which opcode->instruction tables to look at given 113 // a particular context (set of attributes). Since there are many possible 114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context 115 // applies given a specific set of attributes. Hence there are only IC_max 116 // entries in this table, rather than 2^(ATTR_max). 117 struct ContextDecision { 118 OpcodeDecision opcodeDecisions[IC_max]; 119 }; 120 121 #include "X86GenDisassemblerTables.inc" 122 123 static InstrUID decode(OpcodeType type, InstructionContext insnContext, 124 uint8_t opcode, uint8_t modRM) { 125 const struct ModRMDecision *dec; 126 127 switch (type) { 128 case ONEBYTE: 129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 130 break; 131 case TWOBYTE: 132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 133 break; 134 case THREEBYTE_38: 135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 136 break; 137 case THREEBYTE_3A: 138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 139 break; 140 case XOP8_MAP: 141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 142 break; 143 case XOP9_MAP: 144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 145 break; 146 case XOPA_MAP: 147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 148 break; 149 case THREEDNOW_MAP: 150 dec = 151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 152 break; 153 case MAP5: 154 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 155 break; 156 case MAP6: 157 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 158 break; 159 } 160 161 switch (dec->modrm_type) { 162 default: 163 llvm_unreachable("Corrupt table! Unknown modrm_type"); 164 return 0; 165 case MODRM_ONEENTRY: 166 return modRMTable[dec->instructionIDs]; 167 case MODRM_SPLITRM: 168 if (modFromModRM(modRM) == 0x3) 169 return modRMTable[dec->instructionIDs + 1]; 170 return modRMTable[dec->instructionIDs]; 171 case MODRM_SPLITREG: 172 if (modFromModRM(modRM) == 0x3) 173 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; 174 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 175 case MODRM_SPLITMISC: 176 if (modFromModRM(modRM) == 0x3) 177 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; 178 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 179 case MODRM_FULL: 180 return modRMTable[dec->instructionIDs + modRM]; 181 } 182 } 183 184 static bool peek(struct InternalInstruction *insn, uint8_t &byte) { 185 uint64_t offset = insn->readerCursor - insn->startLocation; 186 if (offset >= insn->bytes.size()) 187 return true; 188 byte = insn->bytes[offset]; 189 return false; 190 } 191 192 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { 193 auto r = insn->bytes; 194 uint64_t offset = insn->readerCursor - insn->startLocation; 195 if (offset + sizeof(T) > r.size()) 196 return true; 197 ptr = support::endian::read<T>(&r[offset], support::little); 198 insn->readerCursor += sizeof(T); 199 return false; 200 } 201 202 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { 203 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f; 204 } 205 206 // Consumes all of an instruction's prefix bytes, and marks the 207 // instruction as having them. Also sets the instruction's default operand, 208 // address, and other relevant data sizes to report operands correctly. 209 // 210 // insn must not be empty. 211 static int readPrefixes(struct InternalInstruction *insn) { 212 bool isPrefix = true; 213 uint8_t byte = 0; 214 uint8_t nextByte; 215 216 LLVM_DEBUG(dbgs() << "readPrefixes()"); 217 218 while (isPrefix) { 219 // If we fail reading prefixes, just stop here and let the opcode reader 220 // deal with it. 221 if (consume(insn, byte)) 222 break; 223 224 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 225 // break and let it be disassembled as a normal "instruction". 226 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK 227 break; 228 229 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) { 230 // If the byte is 0xf2 or 0xf3, and any of the following conditions are 231 // met: 232 // - it is followed by a LOCK (0xf0) prefix 233 // - it is followed by an xchg instruction 234 // then it should be disassembled as a xacquire/xrelease not repne/rep. 235 if (((nextByte == 0xf0) || 236 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { 237 insn->xAcquireRelease = true; 238 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support 239 break; 240 } 241 // Also if the byte is 0xf3, and the following condition is met: 242 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 243 // "mov mem, imm" (opcode 0xc6/0xc7) instructions. 244 // then it should be disassembled as an xrelease not rep. 245 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || 246 nextByte == 0xc6 || nextByte == 0xc7)) { 247 insn->xAcquireRelease = true; 248 break; 249 } 250 if (isREX(insn, nextByte)) { 251 uint8_t nnextByte; 252 // Go to REX prefix after the current one 253 if (consume(insn, nnextByte)) 254 return -1; 255 // We should be able to read next byte after REX prefix 256 if (peek(insn, nnextByte)) 257 return -1; 258 --insn->readerCursor; 259 } 260 } 261 262 switch (byte) { 263 case 0xf0: // LOCK 264 insn->hasLockPrefix = true; 265 break; 266 case 0xf2: // REPNE/REPNZ 267 case 0xf3: { // REP or REPE/REPZ 268 uint8_t nextByte; 269 if (peek(insn, nextByte)) 270 break; 271 // TODO: 272 // 1. There could be several 0x66 273 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then 274 // it's not mandatory prefix 275 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need 276 // 0x0f exactly after it to be mandatory prefix 277 if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) 278 // The last of 0xf2 /0xf3 is mandatory prefix 279 insn->mandatoryPrefix = byte; 280 insn->repeatPrefix = byte; 281 break; 282 } 283 case 0x2e: // CS segment override -OR- Branch not taken 284 insn->segmentOverride = SEG_OVERRIDE_CS; 285 break; 286 case 0x36: // SS segment override -OR- Branch taken 287 insn->segmentOverride = SEG_OVERRIDE_SS; 288 break; 289 case 0x3e: // DS segment override 290 insn->segmentOverride = SEG_OVERRIDE_DS; 291 break; 292 case 0x26: // ES segment override 293 insn->segmentOverride = SEG_OVERRIDE_ES; 294 break; 295 case 0x64: // FS segment override 296 insn->segmentOverride = SEG_OVERRIDE_FS; 297 break; 298 case 0x65: // GS segment override 299 insn->segmentOverride = SEG_OVERRIDE_GS; 300 break; 301 case 0x66: { // Operand-size override { 302 uint8_t nextByte; 303 insn->hasOpSize = true; 304 if (peek(insn, nextByte)) 305 break; 306 // 0x66 can't overwrite existing mandatory prefix and should be ignored 307 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) 308 insn->mandatoryPrefix = byte; 309 break; 310 } 311 case 0x67: // Address-size override 312 insn->hasAdSize = true; 313 break; 314 default: // Not a prefix byte 315 isPrefix = false; 316 break; 317 } 318 319 if (isPrefix) 320 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte)); 321 } 322 323 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 324 325 if (byte == 0x62) { 326 uint8_t byte1, byte2; 327 if (consume(insn, byte1)) { 328 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix"); 329 return -1; 330 } 331 332 if (peek(insn, byte2)) { 333 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 334 return -1; 335 } 336 337 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && 338 ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) { 339 insn->vectorExtensionType = TYPE_EVEX; 340 } else { 341 --insn->readerCursor; // unconsume byte1 342 --insn->readerCursor; // unconsume byte 343 } 344 345 if (insn->vectorExtensionType == TYPE_EVEX) { 346 insn->vectorExtensionPrefix[0] = byte; 347 insn->vectorExtensionPrefix[1] = byte1; 348 if (consume(insn, insn->vectorExtensionPrefix[2])) { 349 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 350 return -1; 351 } 352 if (consume(insn, insn->vectorExtensionPrefix[3])) { 353 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix"); 354 return -1; 355 } 356 357 // We simulate the REX prefix for simplicity's sake 358 if (insn->mode == MODE_64BIT) { 359 insn->rexPrefix = 0x40 | 360 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | 361 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | 362 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | 363 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 364 } 365 366 LLVM_DEBUG( 367 dbgs() << format( 368 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 369 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 370 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3])); 371 } 372 } else if (byte == 0xc4) { 373 uint8_t byte1; 374 if (peek(insn, byte1)) { 375 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 376 return -1; 377 } 378 379 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 380 insn->vectorExtensionType = TYPE_VEX_3B; 381 else 382 --insn->readerCursor; 383 384 if (insn->vectorExtensionType == TYPE_VEX_3B) { 385 insn->vectorExtensionPrefix[0] = byte; 386 consume(insn, insn->vectorExtensionPrefix[1]); 387 consume(insn, insn->vectorExtensionPrefix[2]); 388 389 // We simulate the REX prefix for simplicity's sake 390 391 if (insn->mode == MODE_64BIT) 392 insn->rexPrefix = 0x40 | 393 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | 394 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | 395 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | 396 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 397 398 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 399 insn->vectorExtensionPrefix[0], 400 insn->vectorExtensionPrefix[1], 401 insn->vectorExtensionPrefix[2])); 402 } 403 } else if (byte == 0xc5) { 404 uint8_t byte1; 405 if (peek(insn, byte1)) { 406 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 407 return -1; 408 } 409 410 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 411 insn->vectorExtensionType = TYPE_VEX_2B; 412 else 413 --insn->readerCursor; 414 415 if (insn->vectorExtensionType == TYPE_VEX_2B) { 416 insn->vectorExtensionPrefix[0] = byte; 417 consume(insn, insn->vectorExtensionPrefix[1]); 418 419 if (insn->mode == MODE_64BIT) 420 insn->rexPrefix = 421 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 422 423 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 424 default: 425 break; 426 case VEX_PREFIX_66: 427 insn->hasOpSize = true; 428 break; 429 } 430 431 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx", 432 insn->vectorExtensionPrefix[0], 433 insn->vectorExtensionPrefix[1])); 434 } 435 } else if (byte == 0x8f) { 436 uint8_t byte1; 437 if (peek(insn, byte1)) { 438 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP"); 439 return -1; 440 } 441 442 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction. 443 insn->vectorExtensionType = TYPE_XOP; 444 else 445 --insn->readerCursor; 446 447 if (insn->vectorExtensionType == TYPE_XOP) { 448 insn->vectorExtensionPrefix[0] = byte; 449 consume(insn, insn->vectorExtensionPrefix[1]); 450 consume(insn, insn->vectorExtensionPrefix[2]); 451 452 // We simulate the REX prefix for simplicity's sake 453 454 if (insn->mode == MODE_64BIT) 455 insn->rexPrefix = 0x40 | 456 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | 457 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | 458 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | 459 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 460 461 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 462 default: 463 break; 464 case VEX_PREFIX_66: 465 insn->hasOpSize = true; 466 break; 467 } 468 469 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 470 insn->vectorExtensionPrefix[0], 471 insn->vectorExtensionPrefix[1], 472 insn->vectorExtensionPrefix[2])); 473 } 474 } else if (isREX(insn, byte)) { 475 if (peek(insn, nextByte)) 476 return -1; 477 insn->rexPrefix = byte; 478 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); 479 } else 480 --insn->readerCursor; 481 482 if (insn->mode == MODE_16BIT) { 483 insn->registerSize = (insn->hasOpSize ? 4 : 2); 484 insn->addressSize = (insn->hasAdSize ? 4 : 2); 485 insn->displacementSize = (insn->hasAdSize ? 4 : 2); 486 insn->immediateSize = (insn->hasOpSize ? 4 : 2); 487 } else if (insn->mode == MODE_32BIT) { 488 insn->registerSize = (insn->hasOpSize ? 2 : 4); 489 insn->addressSize = (insn->hasAdSize ? 2 : 4); 490 insn->displacementSize = (insn->hasAdSize ? 2 : 4); 491 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 492 } else if (insn->mode == MODE_64BIT) { 493 insn->displacementSize = 4; 494 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 495 insn->registerSize = 8; 496 insn->addressSize = (insn->hasAdSize ? 4 : 8); 497 insn->immediateSize = 4; 498 insn->hasOpSize = false; 499 } else { 500 insn->registerSize = (insn->hasOpSize ? 2 : 4); 501 insn->addressSize = (insn->hasAdSize ? 4 : 8); 502 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 503 } 504 } 505 506 return 0; 507 } 508 509 // Consumes the SIB byte to determine addressing information. 510 static int readSIB(struct InternalInstruction *insn) { 511 SIBBase sibBaseBase = SIB_BASE_NONE; 512 uint8_t index, base; 513 514 LLVM_DEBUG(dbgs() << "readSIB()"); 515 switch (insn->addressSize) { 516 case 2: 517 default: 518 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode"); 519 case 4: 520 insn->sibIndexBase = SIB_INDEX_EAX; 521 sibBaseBase = SIB_BASE_EAX; 522 break; 523 case 8: 524 insn->sibIndexBase = SIB_INDEX_RAX; 525 sibBaseBase = SIB_BASE_RAX; 526 break; 527 } 528 529 if (consume(insn, insn->sib)) 530 return -1; 531 532 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); 533 534 if (index == 0x4) { 535 insn->sibIndex = SIB_INDEX_NONE; 536 } else { 537 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); 538 } 539 540 insn->sibScale = 1 << scaleFromSIB(insn->sib); 541 542 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); 543 544 switch (base) { 545 case 0x5: 546 case 0xd: 547 switch (modFromModRM(insn->modRM)) { 548 case 0x0: 549 insn->eaDisplacement = EA_DISP_32; 550 insn->sibBase = SIB_BASE_NONE; 551 break; 552 case 0x1: 553 insn->eaDisplacement = EA_DISP_8; 554 insn->sibBase = (SIBBase)(sibBaseBase + base); 555 break; 556 case 0x2: 557 insn->eaDisplacement = EA_DISP_32; 558 insn->sibBase = (SIBBase)(sibBaseBase + base); 559 break; 560 default: 561 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte"); 562 } 563 break; 564 default: 565 insn->sibBase = (SIBBase)(sibBaseBase + base); 566 break; 567 } 568 569 return 0; 570 } 571 572 static int readDisplacement(struct InternalInstruction *insn) { 573 int8_t d8; 574 int16_t d16; 575 int32_t d32; 576 LLVM_DEBUG(dbgs() << "readDisplacement()"); 577 578 insn->displacementOffset = insn->readerCursor - insn->startLocation; 579 switch (insn->eaDisplacement) { 580 case EA_DISP_NONE: 581 break; 582 case EA_DISP_8: 583 if (consume(insn, d8)) 584 return -1; 585 insn->displacement = d8; 586 break; 587 case EA_DISP_16: 588 if (consume(insn, d16)) 589 return -1; 590 insn->displacement = d16; 591 break; 592 case EA_DISP_32: 593 if (consume(insn, d32)) 594 return -1; 595 insn->displacement = d32; 596 break; 597 } 598 599 return 0; 600 } 601 602 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. 603 static int readModRM(struct InternalInstruction *insn) { 604 uint8_t mod, rm, reg, evexrm; 605 LLVM_DEBUG(dbgs() << "readModRM()"); 606 607 if (insn->consumedModRM) 608 return 0; 609 610 if (consume(insn, insn->modRM)) 611 return -1; 612 insn->consumedModRM = true; 613 614 mod = modFromModRM(insn->modRM); 615 rm = rmFromModRM(insn->modRM); 616 reg = regFromModRM(insn->modRM); 617 618 // This goes by insn->registerSize to pick the correct register, which messes 619 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in 620 // fixupReg(). 621 switch (insn->registerSize) { 622 case 2: 623 insn->regBase = MODRM_REG_AX; 624 insn->eaRegBase = EA_REG_AX; 625 break; 626 case 4: 627 insn->regBase = MODRM_REG_EAX; 628 insn->eaRegBase = EA_REG_EAX; 629 break; 630 case 8: 631 insn->regBase = MODRM_REG_RAX; 632 insn->eaRegBase = EA_REG_RAX; 633 break; 634 } 635 636 reg |= rFromREX(insn->rexPrefix) << 3; 637 rm |= bFromREX(insn->rexPrefix) << 3; 638 639 evexrm = 0; 640 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) { 641 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 642 evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 643 } 644 645 insn->reg = (Reg)(insn->regBase + reg); 646 647 switch (insn->addressSize) { 648 case 2: { 649 EABase eaBaseBase = EA_BASE_BX_SI; 650 651 switch (mod) { 652 case 0x0: 653 if (rm == 0x6) { 654 insn->eaBase = EA_BASE_NONE; 655 insn->eaDisplacement = EA_DISP_16; 656 if (readDisplacement(insn)) 657 return -1; 658 } else { 659 insn->eaBase = (EABase)(eaBaseBase + rm); 660 insn->eaDisplacement = EA_DISP_NONE; 661 } 662 break; 663 case 0x1: 664 insn->eaBase = (EABase)(eaBaseBase + rm); 665 insn->eaDisplacement = EA_DISP_8; 666 insn->displacementSize = 1; 667 if (readDisplacement(insn)) 668 return -1; 669 break; 670 case 0x2: 671 insn->eaBase = (EABase)(eaBaseBase + rm); 672 insn->eaDisplacement = EA_DISP_16; 673 if (readDisplacement(insn)) 674 return -1; 675 break; 676 case 0x3: 677 insn->eaBase = (EABase)(insn->eaRegBase + rm); 678 if (readDisplacement(insn)) 679 return -1; 680 break; 681 } 682 break; 683 } 684 case 4: 685 case 8: { 686 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 687 688 switch (mod) { 689 case 0x0: 690 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this 691 // In determining whether RIP-relative mode is used (rm=5), 692 // or whether a SIB byte is present (rm=4), 693 // the extension bits (REX.b and EVEX.x) are ignored. 694 switch (rm & 7) { 695 case 0x4: // SIB byte is present 696 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); 697 if (readSIB(insn) || readDisplacement(insn)) 698 return -1; 699 break; 700 case 0x5: // RIP-relative 701 insn->eaBase = EA_BASE_NONE; 702 insn->eaDisplacement = EA_DISP_32; 703 if (readDisplacement(insn)) 704 return -1; 705 break; 706 default: 707 insn->eaBase = (EABase)(eaBaseBase + rm); 708 break; 709 } 710 break; 711 case 0x1: 712 insn->displacementSize = 1; 713 [[fallthrough]]; 714 case 0x2: 715 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 716 switch (rm & 7) { 717 case 0x4: // SIB byte is present 718 insn->eaBase = EA_BASE_sib; 719 if (readSIB(insn) || readDisplacement(insn)) 720 return -1; 721 break; 722 default: 723 insn->eaBase = (EABase)(eaBaseBase + rm); 724 if (readDisplacement(insn)) 725 return -1; 726 break; 727 } 728 break; 729 case 0x3: 730 insn->eaDisplacement = EA_DISP_NONE; 731 insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm); 732 break; 733 } 734 break; 735 } 736 } // switch (insn->addressSize) 737 738 return 0; 739 } 740 741 #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \ 742 static uint16_t name(struct InternalInstruction *insn, OperandType type, \ 743 uint8_t index, uint8_t *valid) { \ 744 *valid = 1; \ 745 switch (type) { \ 746 default: \ 747 debug("Unhandled register type"); \ 748 *valid = 0; \ 749 return 0; \ 750 case TYPE_Rv: \ 751 return base + index; \ 752 case TYPE_R8: \ 753 index &= mask; \ 754 if (index > 0xf) \ 755 *valid = 0; \ 756 if (insn->rexPrefix && index >= 4 && index <= 7) { \ 757 return prefix##_SPL + (index - 4); \ 758 } else { \ 759 return prefix##_AL + index; \ 760 } \ 761 case TYPE_R16: \ 762 index &= mask; \ 763 if (index > 0xf) \ 764 *valid = 0; \ 765 return prefix##_AX + index; \ 766 case TYPE_R32: \ 767 index &= mask; \ 768 if (index > 0xf) \ 769 *valid = 0; \ 770 return prefix##_EAX + index; \ 771 case TYPE_R64: \ 772 index &= mask; \ 773 if (index > 0xf) \ 774 *valid = 0; \ 775 return prefix##_RAX + index; \ 776 case TYPE_ZMM: \ 777 return prefix##_ZMM0 + index; \ 778 case TYPE_YMM: \ 779 return prefix##_YMM0 + index; \ 780 case TYPE_XMM: \ 781 return prefix##_XMM0 + index; \ 782 case TYPE_TMM: \ 783 if (index > 7) \ 784 *valid = 0; \ 785 return prefix##_TMM0 + index; \ 786 case TYPE_VK: \ 787 index &= 0xf; \ 788 if (index > 7) \ 789 *valid = 0; \ 790 return prefix##_K0 + index; \ 791 case TYPE_VK_PAIR: \ 792 if (index > 7) \ 793 *valid = 0; \ 794 return prefix##_K0_K1 + (index / 2); \ 795 case TYPE_MM64: \ 796 return prefix##_MM0 + (index & 0x7); \ 797 case TYPE_SEGMENTREG: \ 798 if ((index & 7) > 5) \ 799 *valid = 0; \ 800 return prefix##_ES + (index & 7); \ 801 case TYPE_DEBUGREG: \ 802 return prefix##_DR0 + index; \ 803 case TYPE_CONTROLREG: \ 804 return prefix##_CR0 + index; \ 805 case TYPE_MVSIBX: \ 806 return prefix##_XMM0 + index; \ 807 case TYPE_MVSIBY: \ 808 return prefix##_YMM0 + index; \ 809 case TYPE_MVSIBZ: \ 810 return prefix##_ZMM0 + index; \ 811 } \ 812 } 813 814 // Consult an operand type to determine the meaning of the reg or R/M field. If 815 // the operand is an XMM operand, for example, an operand would be XMM0 instead 816 // of AX, which readModRM() would otherwise misinterpret it as. 817 // 818 // @param insn - The instruction containing the operand. 819 // @param type - The operand type. 820 // @param index - The existing value of the field as reported by readModRM(). 821 // @param valid - The address of a uint8_t. The target is set to 1 if the 822 // field is valid for the register class; 0 if not. 823 // @return - The proper value. 824 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f) 825 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf) 826 827 // Consult an operand specifier to determine which of the fixup*Value functions 828 // to use in correcting readModRM()'ss interpretation. 829 // 830 // @param insn - See fixup*Value(). 831 // @param op - The operand specifier. 832 // @return - 0 if fixup was successful; -1 if the register returned was 833 // invalid for its class. 834 static int fixupReg(struct InternalInstruction *insn, 835 const struct OperandSpecifier *op) { 836 uint8_t valid; 837 LLVM_DEBUG(dbgs() << "fixupReg()"); 838 839 switch ((OperandEncoding)op->encoding) { 840 default: 841 debug("Expected a REG or R/M encoding in fixupReg"); 842 return -1; 843 case ENCODING_VVVV: 844 insn->vvvv = 845 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid); 846 if (!valid) 847 return -1; 848 break; 849 case ENCODING_REG: 850 insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, 851 insn->reg - insn->regBase, &valid); 852 if (!valid) 853 return -1; 854 break; 855 case ENCODING_SIB: 856 CASE_ENCODING_RM: 857 if (insn->eaBase >= insn->eaRegBase) { 858 insn->eaBase = (EABase)fixupRMValue( 859 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); 860 if (!valid) 861 return -1; 862 } 863 break; 864 } 865 866 return 0; 867 } 868 869 // Read the opcode (except the ModR/M byte in the case of extended or escape 870 // opcodes). 871 static bool readOpcode(struct InternalInstruction *insn) { 872 uint8_t current; 873 LLVM_DEBUG(dbgs() << "readOpcode()"); 874 875 insn->opcodeType = ONEBYTE; 876 if (insn->vectorExtensionType == TYPE_EVEX) { 877 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 878 default: 879 LLVM_DEBUG( 880 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)", 881 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1]))); 882 return true; 883 case VEX_LOB_0F: 884 insn->opcodeType = TWOBYTE; 885 return consume(insn, insn->opcode); 886 case VEX_LOB_0F38: 887 insn->opcodeType = THREEBYTE_38; 888 return consume(insn, insn->opcode); 889 case VEX_LOB_0F3A: 890 insn->opcodeType = THREEBYTE_3A; 891 return consume(insn, insn->opcode); 892 case VEX_LOB_MAP5: 893 insn->opcodeType = MAP5; 894 return consume(insn, insn->opcode); 895 case VEX_LOB_MAP6: 896 insn->opcodeType = MAP6; 897 return consume(insn, insn->opcode); 898 } 899 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 900 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 901 default: 902 LLVM_DEBUG( 903 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 904 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 905 return true; 906 case VEX_LOB_0F: 907 insn->opcodeType = TWOBYTE; 908 return consume(insn, insn->opcode); 909 case VEX_LOB_0F38: 910 insn->opcodeType = THREEBYTE_38; 911 return consume(insn, insn->opcode); 912 case VEX_LOB_0F3A: 913 insn->opcodeType = THREEBYTE_3A; 914 return consume(insn, insn->opcode); 915 case VEX_LOB_MAP5: 916 insn->opcodeType = MAP5; 917 return consume(insn, insn->opcode); 918 case VEX_LOB_MAP6: 919 insn->opcodeType = MAP6; 920 return consume(insn, insn->opcode); 921 } 922 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 923 insn->opcodeType = TWOBYTE; 924 return consume(insn, insn->opcode); 925 } else if (insn->vectorExtensionType == TYPE_XOP) { 926 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 927 default: 928 LLVM_DEBUG( 929 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 930 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 931 return true; 932 case XOP_MAP_SELECT_8: 933 insn->opcodeType = XOP8_MAP; 934 return consume(insn, insn->opcode); 935 case XOP_MAP_SELECT_9: 936 insn->opcodeType = XOP9_MAP; 937 return consume(insn, insn->opcode); 938 case XOP_MAP_SELECT_A: 939 insn->opcodeType = XOPA_MAP; 940 return consume(insn, insn->opcode); 941 } 942 } 943 944 if (consume(insn, current)) 945 return true; 946 947 if (current == 0x0f) { 948 LLVM_DEBUG( 949 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current)); 950 if (consume(insn, current)) 951 return true; 952 953 if (current == 0x38) { 954 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 955 current)); 956 if (consume(insn, current)) 957 return true; 958 959 insn->opcodeType = THREEBYTE_38; 960 } else if (current == 0x3a) { 961 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 962 current)); 963 if (consume(insn, current)) 964 return true; 965 966 insn->opcodeType = THREEBYTE_3A; 967 } else if (current == 0x0f) { 968 LLVM_DEBUG( 969 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current)); 970 971 // Consume operands before the opcode to comply with the 3DNow encoding 972 if (readModRM(insn)) 973 return true; 974 975 if (consume(insn, current)) 976 return true; 977 978 insn->opcodeType = THREEDNOW_MAP; 979 } else { 980 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix"); 981 insn->opcodeType = TWOBYTE; 982 } 983 } else if (insn->mandatoryPrefix) 984 // The opcode with mandatory prefix must start with opcode escape. 985 // If not it's legacy repeat prefix 986 insn->mandatoryPrefix = 0; 987 988 // At this point we have consumed the full opcode. 989 // Anything we consume from here on must be unconsumed. 990 insn->opcode = current; 991 992 return false; 993 } 994 995 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit). 996 static bool is16BitEquivalent(const char *orig, const char *equiv) { 997 for (int i = 0;; i++) { 998 if (orig[i] == '\0' && equiv[i] == '\0') 999 return true; 1000 if (orig[i] == '\0' || equiv[i] == '\0') 1001 return false; 1002 if (orig[i] != equiv[i]) { 1003 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 1004 continue; 1005 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 1006 continue; 1007 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 1008 continue; 1009 return false; 1010 } 1011 } 1012 } 1013 1014 // Determine whether this instruction is a 64-bit instruction. 1015 static bool is64Bit(const char *name) { 1016 for (int i = 0;; ++i) { 1017 if (name[i] == '\0') 1018 return false; 1019 if (name[i] == '6' && name[i + 1] == '4') 1020 return true; 1021 } 1022 } 1023 1024 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1025 // for extended and escape opcodes, and using a supplied attribute mask. 1026 static int getInstructionIDWithAttrMask(uint16_t *instructionID, 1027 struct InternalInstruction *insn, 1028 uint16_t attrMask) { 1029 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]); 1030 const ContextDecision *decision; 1031 switch (insn->opcodeType) { 1032 case ONEBYTE: 1033 decision = &ONEBYTE_SYM; 1034 break; 1035 case TWOBYTE: 1036 decision = &TWOBYTE_SYM; 1037 break; 1038 case THREEBYTE_38: 1039 decision = &THREEBYTE38_SYM; 1040 break; 1041 case THREEBYTE_3A: 1042 decision = &THREEBYTE3A_SYM; 1043 break; 1044 case XOP8_MAP: 1045 decision = &XOP8_MAP_SYM; 1046 break; 1047 case XOP9_MAP: 1048 decision = &XOP9_MAP_SYM; 1049 break; 1050 case XOPA_MAP: 1051 decision = &XOPA_MAP_SYM; 1052 break; 1053 case THREEDNOW_MAP: 1054 decision = &THREEDNOW_MAP_SYM; 1055 break; 1056 case MAP5: 1057 decision = &MAP5_SYM; 1058 break; 1059 case MAP6: 1060 decision = &MAP6_SYM; 1061 break; 1062 } 1063 1064 if (decision->opcodeDecisions[insnCtx] 1065 .modRMDecisions[insn->opcode] 1066 .modrm_type != MODRM_ONEENTRY) { 1067 if (readModRM(insn)) 1068 return -1; 1069 *instructionID = 1070 decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM); 1071 } else { 1072 *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0); 1073 } 1074 1075 return 0; 1076 } 1077 1078 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1079 // for extended and escape opcodes. Determines the attributes and context for 1080 // the instruction before doing so. 1081 static int getInstructionID(struct InternalInstruction *insn, 1082 const MCInstrInfo *mii) { 1083 uint16_t attrMask; 1084 uint16_t instructionID; 1085 1086 LLVM_DEBUG(dbgs() << "getID()"); 1087 1088 attrMask = ATTR_NONE; 1089 1090 if (insn->mode == MODE_64BIT) 1091 attrMask |= ATTR_64BIT; 1092 1093 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1094 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 1095 1096 if (insn->vectorExtensionType == TYPE_EVEX) { 1097 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 1098 case VEX_PREFIX_66: 1099 attrMask |= ATTR_OPSIZE; 1100 break; 1101 case VEX_PREFIX_F3: 1102 attrMask |= ATTR_XS; 1103 break; 1104 case VEX_PREFIX_F2: 1105 attrMask |= ATTR_XD; 1106 break; 1107 } 1108 1109 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1110 attrMask |= ATTR_EVEXKZ; 1111 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1112 attrMask |= ATTR_EVEXB; 1113 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1114 attrMask |= ATTR_EVEXK; 1115 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1116 attrMask |= ATTR_VEXL; 1117 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1118 attrMask |= ATTR_EVEXL2; 1119 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 1120 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 1121 case VEX_PREFIX_66: 1122 attrMask |= ATTR_OPSIZE; 1123 break; 1124 case VEX_PREFIX_F3: 1125 attrMask |= ATTR_XS; 1126 break; 1127 case VEX_PREFIX_F2: 1128 attrMask |= ATTR_XD; 1129 break; 1130 } 1131 1132 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 1133 attrMask |= ATTR_VEXL; 1134 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 1135 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 1136 case VEX_PREFIX_66: 1137 attrMask |= ATTR_OPSIZE; 1138 if (insn->hasAdSize) 1139 attrMask |= ATTR_ADSIZE; 1140 break; 1141 case VEX_PREFIX_F3: 1142 attrMask |= ATTR_XS; 1143 break; 1144 case VEX_PREFIX_F2: 1145 attrMask |= ATTR_XD; 1146 break; 1147 } 1148 1149 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 1150 attrMask |= ATTR_VEXL; 1151 } else if (insn->vectorExtensionType == TYPE_XOP) { 1152 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 1153 case VEX_PREFIX_66: 1154 attrMask |= ATTR_OPSIZE; 1155 break; 1156 case VEX_PREFIX_F3: 1157 attrMask |= ATTR_XS; 1158 break; 1159 case VEX_PREFIX_F2: 1160 attrMask |= ATTR_XD; 1161 break; 1162 } 1163 1164 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 1165 attrMask |= ATTR_VEXL; 1166 } else { 1167 return -1; 1168 } 1169 } else if (!insn->mandatoryPrefix) { 1170 // If we don't have mandatory prefix we should use legacy prefixes here 1171 if (insn->hasOpSize && (insn->mode != MODE_16BIT)) 1172 attrMask |= ATTR_OPSIZE; 1173 if (insn->hasAdSize) 1174 attrMask |= ATTR_ADSIZE; 1175 if (insn->opcodeType == ONEBYTE) { 1176 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) 1177 // Special support for PAUSE 1178 attrMask |= ATTR_XS; 1179 } else { 1180 if (insn->repeatPrefix == 0xf2) 1181 attrMask |= ATTR_XD; 1182 else if (insn->repeatPrefix == 0xf3) 1183 attrMask |= ATTR_XS; 1184 } 1185 } else { 1186 switch (insn->mandatoryPrefix) { 1187 case 0xf2: 1188 attrMask |= ATTR_XD; 1189 break; 1190 case 0xf3: 1191 attrMask |= ATTR_XS; 1192 break; 1193 case 0x66: 1194 if (insn->mode != MODE_16BIT) 1195 attrMask |= ATTR_OPSIZE; 1196 if (insn->hasAdSize) 1197 attrMask |= ATTR_ADSIZE; 1198 break; 1199 case 0x67: 1200 attrMask |= ATTR_ADSIZE; 1201 break; 1202 } 1203 } 1204 1205 if (insn->rexPrefix & 0x08) { 1206 attrMask |= ATTR_REXW; 1207 attrMask &= ~ATTR_ADSIZE; 1208 } 1209 1210 if (insn->mode == MODE_16BIT) { 1211 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning 1212 // of the AdSize prefix is inverted w.r.t. 32-bit mode. 1213 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3) 1214 attrMask ^= ATTR_ADSIZE; 1215 // If we're in 16-bit mode and this is one of the relative jumps and opsize 1216 // prefix isn't present, we need to force the opsize attribute since the 1217 // prefix is inverted relative to 32-bit mode. 1218 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE && 1219 (insn->opcode == 0xE8 || insn->opcode == 0xE9)) 1220 attrMask |= ATTR_OPSIZE; 1221 1222 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE && 1223 insn->opcode >= 0x80 && insn->opcode <= 0x8F) 1224 attrMask |= ATTR_OPSIZE; 1225 } 1226 1227 1228 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1229 return -1; 1230 1231 // The following clauses compensate for limitations of the tables. 1232 1233 if (insn->mode != MODE_64BIT && 1234 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1235 // The tables can't distinquish between cases where the W-bit is used to 1236 // select register size and cases where its a required part of the opcode. 1237 if ((insn->vectorExtensionType == TYPE_EVEX && 1238 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1239 (insn->vectorExtensionType == TYPE_VEX_3B && 1240 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1241 (insn->vectorExtensionType == TYPE_XOP && 1242 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1243 1244 uint16_t instructionIDWithREXW; 1245 if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn, 1246 attrMask | ATTR_REXW)) { 1247 insn->instructionID = instructionID; 1248 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1249 return 0; 1250 } 1251 1252 auto SpecName = mii->getName(instructionIDWithREXW); 1253 // If not a 64-bit instruction. Switch the opcode. 1254 if (!is64Bit(SpecName.data())) { 1255 insn->instructionID = instructionIDWithREXW; 1256 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW]; 1257 return 0; 1258 } 1259 } 1260 } 1261 1262 // Absolute moves, umonitor, and movdir64b need special handling. 1263 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1264 // inverted w.r.t. 1265 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1266 // any position. 1267 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || 1268 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || 1269 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) { 1270 // Make sure we observed the prefixes in any position. 1271 if (insn->hasAdSize) 1272 attrMask |= ATTR_ADSIZE; 1273 if (insn->hasOpSize) 1274 attrMask |= ATTR_OPSIZE; 1275 1276 // In 16-bit, invert the attributes. 1277 if (insn->mode == MODE_16BIT) { 1278 attrMask ^= ATTR_ADSIZE; 1279 1280 // The OpSize attribute is only valid with the absolute moves. 1281 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) 1282 attrMask ^= ATTR_OPSIZE; 1283 } 1284 1285 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1286 return -1; 1287 1288 insn->instructionID = instructionID; 1289 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1290 return 0; 1291 } 1292 1293 if ((insn->mode == MODE_16BIT || insn->hasOpSize) && 1294 !(attrMask & ATTR_OPSIZE)) { 1295 // The instruction tables make no distinction between instructions that 1296 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1297 // particular spot (i.e., many MMX operations). In general we're 1298 // conservative, but in the specific case where OpSize is present but not in 1299 // the right place we check if there's a 16-bit operation. 1300 const struct InstructionSpecifier *spec; 1301 uint16_t instructionIDWithOpsize; 1302 llvm::StringRef specName, specWithOpSizeName; 1303 1304 spec = &INSTRUCTIONS_SYM[instructionID]; 1305 1306 if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn, 1307 attrMask | ATTR_OPSIZE)) { 1308 // ModRM required with OpSize but not present. Give up and return the 1309 // version without OpSize set. 1310 insn->instructionID = instructionID; 1311 insn->spec = spec; 1312 return 0; 1313 } 1314 1315 specName = mii->getName(instructionID); 1316 specWithOpSizeName = mii->getName(instructionIDWithOpsize); 1317 1318 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1319 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { 1320 insn->instructionID = instructionIDWithOpsize; 1321 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize]; 1322 } else { 1323 insn->instructionID = instructionID; 1324 insn->spec = spec; 1325 } 1326 return 0; 1327 } 1328 1329 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1330 insn->rexPrefix & 0x01) { 1331 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode 1332 // as XCHG %r8, %eax. 1333 const struct InstructionSpecifier *spec; 1334 uint16_t instructionIDWithNewOpcode; 1335 const struct InstructionSpecifier *specWithNewOpcode; 1336 1337 spec = &INSTRUCTIONS_SYM[instructionID]; 1338 1339 // Borrow opcode from one of the other XCHGar opcodes 1340 insn->opcode = 0x91; 1341 1342 if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn, 1343 attrMask)) { 1344 insn->opcode = 0x90; 1345 1346 insn->instructionID = instructionID; 1347 insn->spec = spec; 1348 return 0; 1349 } 1350 1351 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode]; 1352 1353 // Change back 1354 insn->opcode = 0x90; 1355 1356 insn->instructionID = instructionIDWithNewOpcode; 1357 insn->spec = specWithNewOpcode; 1358 1359 return 0; 1360 } 1361 1362 insn->instructionID = instructionID; 1363 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID]; 1364 1365 return 0; 1366 } 1367 1368 // Read an operand from the opcode field of an instruction and interprets it 1369 // appropriately given the operand width. Handles AddRegFrm instructions. 1370 // 1371 // @param insn - the instruction whose opcode field is to be read. 1372 // @param size - The width (in bytes) of the register being specified. 1373 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1374 // RAX. 1375 // @return - 0 on success; nonzero otherwise. 1376 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { 1377 LLVM_DEBUG(dbgs() << "readOpcodeRegister()"); 1378 1379 if (size == 0) 1380 size = insn->registerSize; 1381 1382 switch (size) { 1383 case 1: 1384 insn->opcodeRegister = (Reg)( 1385 MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1386 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1387 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1388 insn->opcodeRegister = 1389 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1390 } 1391 1392 break; 1393 case 2: 1394 insn->opcodeRegister = (Reg)( 1395 MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1396 break; 1397 case 4: 1398 insn->opcodeRegister = 1399 (Reg)(MODRM_REG_EAX + 1400 ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1401 break; 1402 case 8: 1403 insn->opcodeRegister = 1404 (Reg)(MODRM_REG_RAX + 1405 ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); 1406 break; 1407 } 1408 1409 return 0; 1410 } 1411 1412 // Consume an immediate operand from an instruction, given the desired operand 1413 // size. 1414 // 1415 // @param insn - The instruction whose operand is to be read. 1416 // @param size - The width (in bytes) of the operand. 1417 // @return - 0 if the immediate was successfully consumed; nonzero 1418 // otherwise. 1419 static int readImmediate(struct InternalInstruction *insn, uint8_t size) { 1420 uint8_t imm8; 1421 uint16_t imm16; 1422 uint32_t imm32; 1423 uint64_t imm64; 1424 1425 LLVM_DEBUG(dbgs() << "readImmediate()"); 1426 1427 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates"); 1428 1429 insn->immediateSize = size; 1430 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1431 1432 switch (size) { 1433 case 1: 1434 if (consume(insn, imm8)) 1435 return -1; 1436 insn->immediates[insn->numImmediatesConsumed] = imm8; 1437 break; 1438 case 2: 1439 if (consume(insn, imm16)) 1440 return -1; 1441 insn->immediates[insn->numImmediatesConsumed] = imm16; 1442 break; 1443 case 4: 1444 if (consume(insn, imm32)) 1445 return -1; 1446 insn->immediates[insn->numImmediatesConsumed] = imm32; 1447 break; 1448 case 8: 1449 if (consume(insn, imm64)) 1450 return -1; 1451 insn->immediates[insn->numImmediatesConsumed] = imm64; 1452 break; 1453 default: 1454 llvm_unreachable("invalid size"); 1455 } 1456 1457 insn->numImmediatesConsumed++; 1458 1459 return 0; 1460 } 1461 1462 // Consume vvvv from an instruction if it has a VEX prefix. 1463 static int readVVVV(struct InternalInstruction *insn) { 1464 LLVM_DEBUG(dbgs() << "readVVVV()"); 1465 1466 int vvvv; 1467 if (insn->vectorExtensionType == TYPE_EVEX) 1468 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1469 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1470 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1471 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1472 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1473 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1474 else if (insn->vectorExtensionType == TYPE_XOP) 1475 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1476 else 1477 return -1; 1478 1479 if (insn->mode != MODE_64BIT) 1480 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later. 1481 1482 insn->vvvv = static_cast<Reg>(vvvv); 1483 return 0; 1484 } 1485 1486 // Read an mask register from the opcode field of an instruction. 1487 // 1488 // @param insn - The instruction whose opcode field is to be read. 1489 // @return - 0 on success; nonzero otherwise. 1490 static int readMaskRegister(struct InternalInstruction *insn) { 1491 LLVM_DEBUG(dbgs() << "readMaskRegister()"); 1492 1493 if (insn->vectorExtensionType != TYPE_EVEX) 1494 return -1; 1495 1496 insn->writemask = 1497 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1498 return 0; 1499 } 1500 1501 // Consults the specifier for an instruction and consumes all 1502 // operands for that instruction, interpreting them as it goes. 1503 static int readOperands(struct InternalInstruction *insn) { 1504 int hasVVVV, needVVVV; 1505 int sawRegImm = 0; 1506 1507 LLVM_DEBUG(dbgs() << "readOperands()"); 1508 1509 // If non-zero vvvv specified, make sure one of the operands uses it. 1510 hasVVVV = !readVVVV(insn); 1511 needVVVV = hasVVVV && (insn->vvvv != 0); 1512 1513 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1514 switch (Op.encoding) { 1515 case ENCODING_NONE: 1516 case ENCODING_SI: 1517 case ENCODING_DI: 1518 break; 1519 CASE_ENCODING_VSIB: 1520 // VSIB can use the V2 bit so check only the other bits. 1521 if (needVVVV) 1522 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1523 if (readModRM(insn)) 1524 return -1; 1525 1526 // Reject if SIB wasn't used. 1527 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1528 return -1; 1529 1530 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. 1531 if (insn->sibIndex == SIB_INDEX_NONE) 1532 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4); 1533 1534 // If EVEX.v2 is set this is one of the 16-31 registers. 1535 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 1536 v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1537 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); 1538 1539 // Adjust the index register to the correct size. 1540 switch ((OperandType)Op.type) { 1541 default: 1542 debug("Unhandled VSIB index type"); 1543 return -1; 1544 case TYPE_MVSIBX: 1545 insn->sibIndex = 1546 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase)); 1547 break; 1548 case TYPE_MVSIBY: 1549 insn->sibIndex = 1550 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase)); 1551 break; 1552 case TYPE_MVSIBZ: 1553 insn->sibIndex = 1554 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase)); 1555 break; 1556 } 1557 1558 // Apply the AVX512 compressed displacement scaling factor. 1559 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1560 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1561 break; 1562 case ENCODING_SIB: 1563 // Reject if SIB wasn't used. 1564 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1565 return -1; 1566 if (readModRM(insn)) 1567 return -1; 1568 if (fixupReg(insn, &Op)) 1569 return -1; 1570 break; 1571 case ENCODING_REG: 1572 CASE_ENCODING_RM: 1573 if (readModRM(insn)) 1574 return -1; 1575 if (fixupReg(insn, &Op)) 1576 return -1; 1577 // Apply the AVX512 compressed displacement scaling factor. 1578 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1579 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1580 break; 1581 case ENCODING_IB: 1582 if (sawRegImm) { 1583 // Saw a register immediate so don't read again and instead split the 1584 // previous immediate. FIXME: This is a hack. 1585 insn->immediates[insn->numImmediatesConsumed] = 1586 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1587 ++insn->numImmediatesConsumed; 1588 break; 1589 } 1590 if (readImmediate(insn, 1)) 1591 return -1; 1592 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1593 sawRegImm = 1; 1594 break; 1595 case ENCODING_IW: 1596 if (readImmediate(insn, 2)) 1597 return -1; 1598 break; 1599 case ENCODING_ID: 1600 if (readImmediate(insn, 4)) 1601 return -1; 1602 break; 1603 case ENCODING_IO: 1604 if (readImmediate(insn, 8)) 1605 return -1; 1606 break; 1607 case ENCODING_Iv: 1608 if (readImmediate(insn, insn->immediateSize)) 1609 return -1; 1610 break; 1611 case ENCODING_Ia: 1612 if (readImmediate(insn, insn->addressSize)) 1613 return -1; 1614 break; 1615 case ENCODING_IRC: 1616 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | 1617 lFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1618 break; 1619 case ENCODING_RB: 1620 if (readOpcodeRegister(insn, 1)) 1621 return -1; 1622 break; 1623 case ENCODING_RW: 1624 if (readOpcodeRegister(insn, 2)) 1625 return -1; 1626 break; 1627 case ENCODING_RD: 1628 if (readOpcodeRegister(insn, 4)) 1629 return -1; 1630 break; 1631 case ENCODING_RO: 1632 if (readOpcodeRegister(insn, 8)) 1633 return -1; 1634 break; 1635 case ENCODING_Rv: 1636 if (readOpcodeRegister(insn, 0)) 1637 return -1; 1638 break; 1639 case ENCODING_CC: 1640 insn->immediates[1] = insn->opcode & 0xf; 1641 break; 1642 case ENCODING_FP: 1643 break; 1644 case ENCODING_VVVV: 1645 needVVVV = 0; // Mark that we have found a VVVV operand. 1646 if (!hasVVVV) 1647 return -1; 1648 if (insn->mode != MODE_64BIT) 1649 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7); 1650 if (fixupReg(insn, &Op)) 1651 return -1; 1652 break; 1653 case ENCODING_WRITEMASK: 1654 if (readMaskRegister(insn)) 1655 return -1; 1656 break; 1657 case ENCODING_DUP: 1658 break; 1659 default: 1660 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding."); 1661 return -1; 1662 } 1663 } 1664 1665 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail 1666 if (needVVVV) 1667 return -1; 1668 1669 return 0; 1670 } 1671 1672 namespace llvm { 1673 1674 // Fill-ins to make the compiler happy. These constants are never actually 1675 // assigned; they are just filler to make an automatically-generated switch 1676 // statement work. 1677 namespace X86 { 1678 enum { 1679 BX_SI = 500, 1680 BX_DI = 501, 1681 BP_SI = 502, 1682 BP_DI = 503, 1683 sib = 504, 1684 sib64 = 505 1685 }; 1686 } // namespace X86 1687 1688 } // namespace llvm 1689 1690 static bool translateInstruction(MCInst &target, 1691 InternalInstruction &source, 1692 const MCDisassembler *Dis); 1693 1694 namespace { 1695 1696 /// Generic disassembler for all X86 platforms. All each platform class should 1697 /// have to do is subclass the constructor, and provide a different 1698 /// disassemblerMode value. 1699 class X86GenericDisassembler : public MCDisassembler { 1700 std::unique_ptr<const MCInstrInfo> MII; 1701 public: 1702 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 1703 std::unique_ptr<const MCInstrInfo> MII); 1704 public: 1705 DecodeStatus getInstruction(MCInst &instr, uint64_t &size, 1706 ArrayRef<uint8_t> Bytes, uint64_t Address, 1707 raw_ostream &cStream) const override; 1708 1709 private: 1710 DisassemblerMode fMode; 1711 }; 1712 1713 } // namespace 1714 1715 X86GenericDisassembler::X86GenericDisassembler( 1716 const MCSubtargetInfo &STI, 1717 MCContext &Ctx, 1718 std::unique_ptr<const MCInstrInfo> MII) 1719 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 1720 const FeatureBitset &FB = STI.getFeatureBits(); 1721 if (FB[X86::Is16Bit]) { 1722 fMode = MODE_16BIT; 1723 return; 1724 } else if (FB[X86::Is32Bit]) { 1725 fMode = MODE_32BIT; 1726 return; 1727 } else if (FB[X86::Is64Bit]) { 1728 fMode = MODE_64BIT; 1729 return; 1730 } 1731 1732 llvm_unreachable("Invalid CPU mode"); 1733 } 1734 1735 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 1736 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 1737 raw_ostream &CStream) const { 1738 CommentStream = &CStream; 1739 1740 InternalInstruction Insn; 1741 memset(&Insn, 0, sizeof(InternalInstruction)); 1742 Insn.bytes = Bytes; 1743 Insn.startLocation = Address; 1744 Insn.readerCursor = Address; 1745 Insn.mode = fMode; 1746 1747 if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) || 1748 getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 || 1749 readOperands(&Insn)) { 1750 Size = Insn.readerCursor - Address; 1751 return Fail; 1752 } 1753 1754 Insn.operands = x86OperandSets[Insn.spec->operands]; 1755 Insn.length = Insn.readerCursor - Insn.startLocation; 1756 Size = Insn.length; 1757 if (Size > 15) 1758 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit"); 1759 1760 bool Ret = translateInstruction(Instr, Insn, this); 1761 if (!Ret) { 1762 unsigned Flags = X86::IP_NO_PREFIX; 1763 if (Insn.hasAdSize) 1764 Flags |= X86::IP_HAS_AD_SIZE; 1765 if (!Insn.mandatoryPrefix) { 1766 if (Insn.hasOpSize) 1767 Flags |= X86::IP_HAS_OP_SIZE; 1768 if (Insn.repeatPrefix == 0xf2) 1769 Flags |= X86::IP_HAS_REPEAT_NE; 1770 else if (Insn.repeatPrefix == 0xf3 && 1771 // It should not be 'pause' f3 90 1772 Insn.opcode != 0x90) 1773 Flags |= X86::IP_HAS_REPEAT; 1774 if (Insn.hasLockPrefix) 1775 Flags |= X86::IP_HAS_LOCK; 1776 } 1777 Instr.setFlags(Flags); 1778 } 1779 return (!Ret) ? Success : Fail; 1780 } 1781 1782 // 1783 // Private code that translates from struct InternalInstructions to MCInsts. 1784 // 1785 1786 /// translateRegister - Translates an internal register to the appropriate LLVM 1787 /// register, and appends it as an operand to an MCInst. 1788 /// 1789 /// @param mcInst - The MCInst to append to. 1790 /// @param reg - The Reg to append. 1791 static void translateRegister(MCInst &mcInst, Reg reg) { 1792 #define ENTRY(x) X86::x, 1793 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; 1794 #undef ENTRY 1795 1796 MCPhysReg llvmRegnum = llvmRegnums[reg]; 1797 mcInst.addOperand(MCOperand::createReg(llvmRegnum)); 1798 } 1799 1800 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 1801 0, // SEG_OVERRIDE_NONE 1802 X86::CS, 1803 X86::SS, 1804 X86::DS, 1805 X86::ES, 1806 X86::FS, 1807 X86::GS 1808 }; 1809 1810 /// translateSrcIndex - Appends a source index operand to an MCInst. 1811 /// 1812 /// @param mcInst - The MCInst to append to. 1813 /// @param insn - The internal instruction. 1814 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 1815 unsigned baseRegNo; 1816 1817 if (insn.mode == MODE_64BIT) 1818 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; 1819 else if (insn.mode == MODE_32BIT) 1820 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; 1821 else { 1822 assert(insn.mode == MODE_16BIT); 1823 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; 1824 } 1825 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1826 mcInst.addOperand(baseReg); 1827 1828 MCOperand segmentReg; 1829 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 1830 mcInst.addOperand(segmentReg); 1831 return false; 1832 } 1833 1834 /// translateDstIndex - Appends a destination index operand to an MCInst. 1835 /// 1836 /// @param mcInst - The MCInst to append to. 1837 /// @param insn - The internal instruction. 1838 1839 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 1840 unsigned baseRegNo; 1841 1842 if (insn.mode == MODE_64BIT) 1843 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; 1844 else if (insn.mode == MODE_32BIT) 1845 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; 1846 else { 1847 assert(insn.mode == MODE_16BIT); 1848 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; 1849 } 1850 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1851 mcInst.addOperand(baseReg); 1852 return false; 1853 } 1854 1855 /// translateImmediate - Appends an immediate operand to an MCInst. 1856 /// 1857 /// @param mcInst - The MCInst to append to. 1858 /// @param immediate - The immediate value to append. 1859 /// @param operand - The operand, as stored in the descriptor table. 1860 /// @param insn - The internal instruction. 1861 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 1862 const OperandSpecifier &operand, 1863 InternalInstruction &insn, 1864 const MCDisassembler *Dis) { 1865 // Sign-extend the immediate if necessary. 1866 1867 OperandType type = (OperandType)operand.type; 1868 1869 bool isBranch = false; 1870 uint64_t pcrel = 0; 1871 if (type == TYPE_REL) { 1872 isBranch = true; 1873 pcrel = insn.startLocation + insn.length; 1874 switch (operand.encoding) { 1875 default: 1876 break; 1877 case ENCODING_Iv: 1878 switch (insn.displacementSize) { 1879 default: 1880 break; 1881 case 1: 1882 if(immediate & 0x80) 1883 immediate |= ~(0xffull); 1884 break; 1885 case 2: 1886 if(immediate & 0x8000) 1887 immediate |= ~(0xffffull); 1888 break; 1889 case 4: 1890 if(immediate & 0x80000000) 1891 immediate |= ~(0xffffffffull); 1892 break; 1893 case 8: 1894 break; 1895 } 1896 break; 1897 case ENCODING_IB: 1898 if(immediate & 0x80) 1899 immediate |= ~(0xffull); 1900 break; 1901 case ENCODING_IW: 1902 if(immediate & 0x8000) 1903 immediate |= ~(0xffffull); 1904 break; 1905 case ENCODING_ID: 1906 if(immediate & 0x80000000) 1907 immediate |= ~(0xffffffffull); 1908 break; 1909 } 1910 } 1911 // By default sign-extend all X86 immediates based on their encoding. 1912 else if (type == TYPE_IMM) { 1913 switch (operand.encoding) { 1914 default: 1915 break; 1916 case ENCODING_IB: 1917 if(immediate & 0x80) 1918 immediate |= ~(0xffull); 1919 break; 1920 case ENCODING_IW: 1921 if(immediate & 0x8000) 1922 immediate |= ~(0xffffull); 1923 break; 1924 case ENCODING_ID: 1925 if(immediate & 0x80000000) 1926 immediate |= ~(0xffffffffull); 1927 break; 1928 case ENCODING_IO: 1929 break; 1930 } 1931 } 1932 1933 switch (type) { 1934 case TYPE_XMM: 1935 mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4))); 1936 return; 1937 case TYPE_YMM: 1938 mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4))); 1939 return; 1940 case TYPE_ZMM: 1941 mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); 1942 return; 1943 default: 1944 // operand is 64 bits wide. Do nothing. 1945 break; 1946 } 1947 1948 if (!Dis->tryAddingSymbolicOperand( 1949 mcInst, immediate + pcrel, insn.startLocation, isBranch, 1950 insn.immediateOffset, insn.immediateSize, insn.length)) 1951 mcInst.addOperand(MCOperand::createImm(immediate)); 1952 1953 if (type == TYPE_MOFFS) { 1954 MCOperand segmentReg; 1955 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 1956 mcInst.addOperand(segmentReg); 1957 } 1958 } 1959 1960 /// translateRMRegister - Translates a register stored in the R/M field of the 1961 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 1962 /// @param mcInst - The MCInst to append to. 1963 /// @param insn - The internal instruction to extract the R/M field 1964 /// from. 1965 /// @return - 0 on success; -1 otherwise 1966 static bool translateRMRegister(MCInst &mcInst, 1967 InternalInstruction &insn) { 1968 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 1969 debug("A R/M register operand may not have a SIB byte"); 1970 return true; 1971 } 1972 1973 switch (insn.eaBase) { 1974 default: 1975 debug("Unexpected EA base register"); 1976 return true; 1977 case EA_BASE_NONE: 1978 debug("EA_BASE_NONE for ModR/M base"); 1979 return true; 1980 #define ENTRY(x) case EA_BASE_##x: 1981 ALL_EA_BASES 1982 #undef ENTRY 1983 debug("A R/M register operand may not have a base; " 1984 "the operand must be a register."); 1985 return true; 1986 #define ENTRY(x) \ 1987 case EA_REG_##x: \ 1988 mcInst.addOperand(MCOperand::createReg(X86::x)); break; 1989 ALL_REGS 1990 #undef ENTRY 1991 } 1992 1993 return false; 1994 } 1995 1996 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 1997 /// fields of an internal instruction (and possibly its SIB byte) to a memory 1998 /// operand in LLVM's format, and appends it to an MCInst. 1999 /// 2000 /// @param mcInst - The MCInst to append to. 2001 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2002 /// from. 2003 /// @param ForceSIB - The instruction must use SIB. 2004 /// @return - 0 on success; nonzero otherwise 2005 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 2006 const MCDisassembler *Dis, 2007 bool ForceSIB = false) { 2008 // Addresses in an MCInst are represented as five operands: 2009 // 1. basereg (register) The R/M base, or (if there is a SIB) the 2010 // SIB base 2011 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 2012 // scale amount 2013 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 2014 // the index (which is multiplied by the 2015 // scale amount) 2016 // 4. displacement (immediate) 0, or the displacement if there is one 2017 // 5. segmentreg (register) x86_registerNONE for now, but could be set 2018 // if we have segment overrides 2019 2020 MCOperand baseReg; 2021 MCOperand scaleAmount; 2022 MCOperand indexReg; 2023 MCOperand displacement; 2024 MCOperand segmentReg; 2025 uint64_t pcrel = 0; 2026 2027 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2028 if (insn.sibBase != SIB_BASE_NONE) { 2029 switch (insn.sibBase) { 2030 default: 2031 debug("Unexpected sibBase"); 2032 return true; 2033 #define ENTRY(x) \ 2034 case SIB_BASE_##x: \ 2035 baseReg = MCOperand::createReg(X86::x); break; 2036 ALL_SIB_BASES 2037 #undef ENTRY 2038 } 2039 } else { 2040 baseReg = MCOperand::createReg(X86::NoRegister); 2041 } 2042 2043 if (insn.sibIndex != SIB_INDEX_NONE) { 2044 switch (insn.sibIndex) { 2045 default: 2046 debug("Unexpected sibIndex"); 2047 return true; 2048 #define ENTRY(x) \ 2049 case SIB_INDEX_##x: \ 2050 indexReg = MCOperand::createReg(X86::x); break; 2051 EA_BASES_32BIT 2052 EA_BASES_64BIT 2053 REGS_XMM 2054 REGS_YMM 2055 REGS_ZMM 2056 #undef ENTRY 2057 } 2058 } else { 2059 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present, 2060 // but no index is used and modrm alone should have been enough. 2061 // -No base register in 32-bit mode. In 64-bit mode this is used to 2062 // avoid rip-relative addressing. 2063 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a 2064 // base always requires a SIB byte. 2065 // -A scale other than 1 is used. 2066 if (!ForceSIB && 2067 (insn.sibScale != 1 || 2068 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || 2069 (insn.sibBase != SIB_BASE_NONE && 2070 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && 2071 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) { 2072 indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : 2073 X86::RIZ); 2074 } else 2075 indexReg = MCOperand::createReg(X86::NoRegister); 2076 } 2077 2078 scaleAmount = MCOperand::createImm(insn.sibScale); 2079 } else { 2080 switch (insn.eaBase) { 2081 case EA_BASE_NONE: 2082 if (insn.eaDisplacement == EA_DISP_NONE) { 2083 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 2084 return true; 2085 } 2086 if (insn.mode == MODE_64BIT){ 2087 pcrel = insn.startLocation + insn.length; 2088 Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, 2089 insn.startLocation + 2090 insn.displacementOffset); 2091 // Section 2.2.1.6 2092 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : 2093 X86::RIP); 2094 } 2095 else 2096 baseReg = MCOperand::createReg(X86::NoRegister); 2097 2098 indexReg = MCOperand::createReg(X86::NoRegister); 2099 break; 2100 case EA_BASE_BX_SI: 2101 baseReg = MCOperand::createReg(X86::BX); 2102 indexReg = MCOperand::createReg(X86::SI); 2103 break; 2104 case EA_BASE_BX_DI: 2105 baseReg = MCOperand::createReg(X86::BX); 2106 indexReg = MCOperand::createReg(X86::DI); 2107 break; 2108 case EA_BASE_BP_SI: 2109 baseReg = MCOperand::createReg(X86::BP); 2110 indexReg = MCOperand::createReg(X86::SI); 2111 break; 2112 case EA_BASE_BP_DI: 2113 baseReg = MCOperand::createReg(X86::BP); 2114 indexReg = MCOperand::createReg(X86::DI); 2115 break; 2116 default: 2117 indexReg = MCOperand::createReg(X86::NoRegister); 2118 switch (insn.eaBase) { 2119 default: 2120 debug("Unexpected eaBase"); 2121 return true; 2122 // Here, we will use the fill-ins defined above. However, 2123 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 2124 // sib and sib64 were handled in the top-level if, so they're only 2125 // placeholders to keep the compiler happy. 2126 #define ENTRY(x) \ 2127 case EA_BASE_##x: \ 2128 baseReg = MCOperand::createReg(X86::x); break; 2129 ALL_EA_BASES 2130 #undef ENTRY 2131 #define ENTRY(x) case EA_REG_##x: 2132 ALL_REGS 2133 #undef ENTRY 2134 debug("A R/M memory operand may not be a register; " 2135 "the base field must be a base."); 2136 return true; 2137 } 2138 } 2139 2140 scaleAmount = MCOperand::createImm(1); 2141 } 2142 2143 displacement = MCOperand::createImm(insn.displacement); 2144 2145 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2146 2147 mcInst.addOperand(baseReg); 2148 mcInst.addOperand(scaleAmount); 2149 mcInst.addOperand(indexReg); 2150 2151 const uint8_t dispSize = 2152 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; 2153 2154 if (!Dis->tryAddingSymbolicOperand( 2155 mcInst, insn.displacement + pcrel, insn.startLocation, false, 2156 insn.displacementOffset, dispSize, insn.length)) 2157 mcInst.addOperand(displacement); 2158 mcInst.addOperand(segmentReg); 2159 return false; 2160 } 2161 2162 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 2163 /// byte of an instruction to LLVM form, and appends it to an MCInst. 2164 /// 2165 /// @param mcInst - The MCInst to append to. 2166 /// @param operand - The operand, as stored in the descriptor table. 2167 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2168 /// from. 2169 /// @return - 0 on success; nonzero otherwise 2170 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 2171 InternalInstruction &insn, const MCDisassembler *Dis) { 2172 switch (operand.type) { 2173 default: 2174 debug("Unexpected type for a R/M operand"); 2175 return true; 2176 case TYPE_R8: 2177 case TYPE_R16: 2178 case TYPE_R32: 2179 case TYPE_R64: 2180 case TYPE_Rv: 2181 case TYPE_MM64: 2182 case TYPE_XMM: 2183 case TYPE_YMM: 2184 case TYPE_ZMM: 2185 case TYPE_TMM: 2186 case TYPE_VK_PAIR: 2187 case TYPE_VK: 2188 case TYPE_DEBUGREG: 2189 case TYPE_CONTROLREG: 2190 case TYPE_BNDR: 2191 return translateRMRegister(mcInst, insn); 2192 case TYPE_M: 2193 case TYPE_MVSIBX: 2194 case TYPE_MVSIBY: 2195 case TYPE_MVSIBZ: 2196 return translateRMMemory(mcInst, insn, Dis); 2197 case TYPE_MSIB: 2198 return translateRMMemory(mcInst, insn, Dis, true); 2199 } 2200 } 2201 2202 /// translateFPRegister - Translates a stack position on the FPU stack to its 2203 /// LLVM form, and appends it to an MCInst. 2204 /// 2205 /// @param mcInst - The MCInst to append to. 2206 /// @param stackPos - The stack position to translate. 2207 static void translateFPRegister(MCInst &mcInst, 2208 uint8_t stackPos) { 2209 mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos)); 2210 } 2211 2212 /// translateMaskRegister - Translates a 3-bit mask register number to 2213 /// LLVM form, and appends it to an MCInst. 2214 /// 2215 /// @param mcInst - The MCInst to append to. 2216 /// @param maskRegNum - Number of mask register from 0 to 7. 2217 /// @return - false on success; true otherwise. 2218 static bool translateMaskRegister(MCInst &mcInst, 2219 uint8_t maskRegNum) { 2220 if (maskRegNum >= 8) { 2221 debug("Invalid mask register number"); 2222 return true; 2223 } 2224 2225 mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum)); 2226 return false; 2227 } 2228 2229 /// translateOperand - Translates an operand stored in an internal instruction 2230 /// to LLVM's format and appends it to an MCInst. 2231 /// 2232 /// @param mcInst - The MCInst to append to. 2233 /// @param operand - The operand, as stored in the descriptor table. 2234 /// @param insn - The internal instruction. 2235 /// @return - false on success; true otherwise. 2236 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 2237 InternalInstruction &insn, 2238 const MCDisassembler *Dis) { 2239 switch (operand.encoding) { 2240 default: 2241 debug("Unhandled operand encoding during translation"); 2242 return true; 2243 case ENCODING_REG: 2244 translateRegister(mcInst, insn.reg); 2245 return false; 2246 case ENCODING_WRITEMASK: 2247 return translateMaskRegister(mcInst, insn.writemask); 2248 case ENCODING_SIB: 2249 CASE_ENCODING_RM: 2250 CASE_ENCODING_VSIB: 2251 return translateRM(mcInst, operand, insn, Dis); 2252 case ENCODING_IB: 2253 case ENCODING_IW: 2254 case ENCODING_ID: 2255 case ENCODING_IO: 2256 case ENCODING_Iv: 2257 case ENCODING_Ia: 2258 translateImmediate(mcInst, 2259 insn.immediates[insn.numImmediatesTranslated++], 2260 operand, 2261 insn, 2262 Dis); 2263 return false; 2264 case ENCODING_IRC: 2265 mcInst.addOperand(MCOperand::createImm(insn.RC)); 2266 return false; 2267 case ENCODING_SI: 2268 return translateSrcIndex(mcInst, insn); 2269 case ENCODING_DI: 2270 return translateDstIndex(mcInst, insn); 2271 case ENCODING_RB: 2272 case ENCODING_RW: 2273 case ENCODING_RD: 2274 case ENCODING_RO: 2275 case ENCODING_Rv: 2276 translateRegister(mcInst, insn.opcodeRegister); 2277 return false; 2278 case ENCODING_CC: 2279 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2280 return false; 2281 case ENCODING_FP: 2282 translateFPRegister(mcInst, insn.modRM & 7); 2283 return false; 2284 case ENCODING_VVVV: 2285 translateRegister(mcInst, insn.vvvv); 2286 return false; 2287 case ENCODING_DUP: 2288 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 2289 insn, Dis); 2290 } 2291 } 2292 2293 /// translateInstruction - Translates an internal instruction and all its 2294 /// operands to an MCInst. 2295 /// 2296 /// @param mcInst - The MCInst to populate with the instruction's data. 2297 /// @param insn - The internal instruction. 2298 /// @return - false on success; true otherwise. 2299 static bool translateInstruction(MCInst &mcInst, 2300 InternalInstruction &insn, 2301 const MCDisassembler *Dis) { 2302 if (!insn.spec) { 2303 debug("Instruction has no specification"); 2304 return true; 2305 } 2306 2307 mcInst.clear(); 2308 mcInst.setOpcode(insn.instructionID); 2309 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 2310 // prefix bytes should be disassembled as xrelease and xacquire then set the 2311 // opcode to those instead of the rep and repne opcodes. 2312 if (insn.xAcquireRelease) { 2313 if(mcInst.getOpcode() == X86::REP_PREFIX) 2314 mcInst.setOpcode(X86::XRELEASE_PREFIX); 2315 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 2316 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 2317 } 2318 2319 insn.numImmediatesTranslated = 0; 2320 2321 for (const auto &Op : insn.operands) { 2322 if (Op.encoding != ENCODING_NONE) { 2323 if (translateOperand(mcInst, Op, insn, Dis)) { 2324 return true; 2325 } 2326 } 2327 } 2328 2329 return false; 2330 } 2331 2332 static MCDisassembler *createX86Disassembler(const Target &T, 2333 const MCSubtargetInfo &STI, 2334 MCContext &Ctx) { 2335 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 2336 return new X86GenericDisassembler(STI, Ctx, std::move(MII)); 2337 } 2338 2339 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() { 2340 // Register the disassembler. 2341 TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(), 2342 createX86Disassembler); 2343 TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(), 2344 createX86Disassembler); 2345 } 2346