1 //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is part of the X86 Disassembler. 10 // It contains code to translate the data produced by the decoder into 11 // MCInsts. 12 // 13 // 14 // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and 15 // 64-bit X86 instruction sets. The main decode sequence for an assembly 16 // instruction in this disassembler is: 17 // 18 // 1. Read the prefix bytes and determine the attributes of the instruction. 19 // These attributes, recorded in enum attributeBits 20 // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM 21 // provides a mapping from bitmasks to contexts, which are represented by 22 // enum InstructionContext (ibid.). 23 // 24 // 2. Read the opcode, and determine what kind of opcode it is. The 25 // disassembler distinguishes four kinds of opcodes, which are enumerated in 26 // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte 27 // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 28 // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. 29 // 30 // 3. Depending on the opcode type, look in one of four ClassDecision structures 31 // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which 32 // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get 33 // a ModRMDecision (ibid.). 34 // 35 // 4. Some instructions, such as escape opcodes or extended opcodes, or even 36 // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the 37 // ModR/M byte to complete decode. The ModRMDecision's type is an entry from 38 // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the 39 // ModR/M byte is required and how to interpret it. 40 // 41 // 5. After resolving the ModRMDecision, the disassembler has a unique ID 42 // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in 43 // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and 44 // meanings of its operands. 45 // 46 // 6. For each operand, its encoding is an entry from OperandEncoding 47 // (X86DisassemblerDecoderCommon.h) and its type is an entry from 48 // OperandType (ibid.). The encoding indicates how to read it from the 49 // instruction; the type indicates how to interpret the value once it has 50 // been read. For example, a register operand could be stored in the R/M 51 // field of the ModR/M byte, the REG field of the ModR/M byte, or added to 52 // the main opcode. This is orthogonal from its meaning (an GPR or an XMM 53 // register, for instance). Given this information, the operands can be 54 // extracted and interpreted. 55 // 56 // 7. As the last step, the disassembler translates the instruction information 57 // and operands into a format understandable by the client - in this case, an 58 // MCInst for use by the MC infrastructure. 59 // 60 // The disassembler is broken broadly into two parts: the table emitter that 61 // emits the instruction decode tables discussed above during compilation, and 62 // the disassembler itself. The table emitter is documented in more detail in 63 // utils/TableGen/X86DisassemblerEmitter.h. 64 // 65 // X86Disassembler.cpp contains the code responsible for step 7, and for 66 // invoking the decoder to execute steps 1-6. 67 // X86DisassemblerDecoderCommon.h contains the definitions needed by both the 68 // table emitter and the disassembler. 69 // X86DisassemblerDecoder.h contains the public interface of the decoder, 70 // factored out into C for possible use by other projects. 71 // X86DisassemblerDecoder.c contains the source code of the decoder, which is 72 // responsible for steps 1-6. 73 // 74 //===----------------------------------------------------------------------===// 75 76 #include "MCTargetDesc/X86BaseInfo.h" 77 #include "MCTargetDesc/X86MCTargetDesc.h" 78 #include "TargetInfo/X86TargetInfo.h" 79 #include "X86DisassemblerDecoder.h" 80 #include "llvm/MC/MCContext.h" 81 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 82 #include "llvm/MC/MCExpr.h" 83 #include "llvm/MC/MCInst.h" 84 #include "llvm/MC/MCInstrInfo.h" 85 #include "llvm/MC/MCSubtargetInfo.h" 86 #include "llvm/MC/TargetRegistry.h" 87 #include "llvm/Support/Debug.h" 88 #include "llvm/Support/Format.h" 89 #include "llvm/Support/raw_ostream.h" 90 91 using namespace llvm; 92 using namespace llvm::X86Disassembler; 93 94 #define DEBUG_TYPE "x86-disassembler" 95 96 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s); 97 98 // Specifies whether a ModR/M byte is needed and (if so) which 99 // instruction each possible value of the ModR/M byte corresponds to. Once 100 // this information is known, we have narrowed down to a single instruction. 101 struct ModRMDecision { 102 uint8_t modrm_type; 103 uint16_t instructionIDs; 104 }; 105 106 // Specifies which set of ModR/M->instruction tables to look at 107 // given a particular opcode. 108 struct OpcodeDecision { 109 ModRMDecision modRMDecisions[256]; 110 }; 111 112 // Specifies which opcode->instruction tables to look at given 113 // a particular context (set of attributes). Since there are many possible 114 // contexts, the decoder first uses CONTEXTS_SYM to determine which context 115 // applies given a specific set of attributes. Hence there are only IC_max 116 // entries in this table, rather than 2^(ATTR_max). 117 struct ContextDecision { 118 OpcodeDecision opcodeDecisions[IC_max]; 119 }; 120 121 #include "X86GenDisassemblerTables.inc" 122 123 static InstrUID decode(OpcodeType type, InstructionContext insnContext, 124 uint8_t opcode, uint8_t modRM) { 125 const struct ModRMDecision *dec; 126 127 switch (type) { 128 case ONEBYTE: 129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 130 break; 131 case TWOBYTE: 132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 133 break; 134 case THREEBYTE_38: 135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 136 break; 137 case THREEBYTE_3A: 138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 139 break; 140 case XOP8_MAP: 141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 142 break; 143 case XOP9_MAP: 144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 145 break; 146 case XOPA_MAP: 147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 148 break; 149 case THREEDNOW_MAP: 150 dec = 151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 152 break; 153 case MAP4: 154 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 155 break; 156 case MAP5: 157 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 158 break; 159 case MAP6: 160 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 161 break; 162 case MAP7: 163 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; 164 break; 165 } 166 167 switch (dec->modrm_type) { 168 default: 169 llvm_unreachable("Corrupt table! Unknown modrm_type"); 170 return 0; 171 case MODRM_ONEENTRY: 172 return modRMTable[dec->instructionIDs]; 173 case MODRM_SPLITRM: 174 if (modFromModRM(modRM) == 0x3) 175 return modRMTable[dec->instructionIDs + 1]; 176 return modRMTable[dec->instructionIDs]; 177 case MODRM_SPLITREG: 178 if (modFromModRM(modRM) == 0x3) 179 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; 180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 181 case MODRM_SPLITMISC: 182 if (modFromModRM(modRM) == 0x3) 183 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; 184 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; 185 case MODRM_FULL: 186 return modRMTable[dec->instructionIDs + modRM]; 187 } 188 } 189 190 static bool peek(struct InternalInstruction *insn, uint8_t &byte) { 191 uint64_t offset = insn->readerCursor - insn->startLocation; 192 if (offset >= insn->bytes.size()) 193 return true; 194 byte = insn->bytes[offset]; 195 return false; 196 } 197 198 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { 199 auto r = insn->bytes; 200 uint64_t offset = insn->readerCursor - insn->startLocation; 201 if (offset + sizeof(T) > r.size()) 202 return true; 203 ptr = support::endian::read<T>(&r[offset], llvm::endianness::little); 204 insn->readerCursor += sizeof(T); 205 return false; 206 } 207 208 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { 209 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f; 210 } 211 212 static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) { 213 return insn->mode == MODE_64BIT && prefix == 0xd5; 214 } 215 216 // Consumes all of an instruction's prefix bytes, and marks the 217 // instruction as having them. Also sets the instruction's default operand, 218 // address, and other relevant data sizes to report operands correctly. 219 // 220 // insn must not be empty. 221 static int readPrefixes(struct InternalInstruction *insn) { 222 bool isPrefix = true; 223 uint8_t byte = 0; 224 uint8_t nextByte; 225 226 LLVM_DEBUG(dbgs() << "readPrefixes()"); 227 228 while (isPrefix) { 229 // If we fail reading prefixes, just stop here and let the opcode reader 230 // deal with it. 231 if (consume(insn, byte)) 232 break; 233 234 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then 235 // break and let it be disassembled as a normal "instruction". 236 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK 237 break; 238 239 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) { 240 // If the byte is 0xf2 or 0xf3, and any of the following conditions are 241 // met: 242 // - it is followed by a LOCK (0xf0) prefix 243 // - it is followed by an xchg instruction 244 // then it should be disassembled as a xacquire/xrelease not repne/rep. 245 if (((nextByte == 0xf0) || 246 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { 247 insn->xAcquireRelease = true; 248 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support 249 break; 250 } 251 // Also if the byte is 0xf3, and the following condition is met: 252 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or 253 // "mov mem, imm" (opcode 0xc6/0xc7) instructions. 254 // then it should be disassembled as an xrelease not rep. 255 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || 256 nextByte == 0xc6 || nextByte == 0xc7)) { 257 insn->xAcquireRelease = true; 258 break; 259 } 260 if (isREX(insn, nextByte)) { 261 uint8_t nnextByte; 262 // Go to REX prefix after the current one 263 if (consume(insn, nnextByte)) 264 return -1; 265 // We should be able to read next byte after REX prefix 266 if (peek(insn, nnextByte)) 267 return -1; 268 --insn->readerCursor; 269 } 270 } 271 272 switch (byte) { 273 case 0xf0: // LOCK 274 insn->hasLockPrefix = true; 275 break; 276 case 0xf2: // REPNE/REPNZ 277 case 0xf3: { // REP or REPE/REPZ 278 uint8_t nextByte; 279 if (peek(insn, nextByte)) 280 break; 281 // TODO: 282 // 1. There could be several 0x66 283 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then 284 // it's not mandatory prefix 285 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need 286 // 0x0f exactly after it to be mandatory prefix 287 // 4. if (nextByte == 0xd5) it's REX2 and we need 288 // 0x0f exactly after it to be mandatory prefix 289 if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f || 290 nextByte == 0x66) 291 // The last of 0xf2 /0xf3 is mandatory prefix 292 insn->mandatoryPrefix = byte; 293 insn->repeatPrefix = byte; 294 break; 295 } 296 case 0x2e: // CS segment override -OR- Branch not taken 297 insn->segmentOverride = SEG_OVERRIDE_CS; 298 break; 299 case 0x36: // SS segment override -OR- Branch taken 300 insn->segmentOverride = SEG_OVERRIDE_SS; 301 break; 302 case 0x3e: // DS segment override 303 insn->segmentOverride = SEG_OVERRIDE_DS; 304 break; 305 case 0x26: // ES segment override 306 insn->segmentOverride = SEG_OVERRIDE_ES; 307 break; 308 case 0x64: // FS segment override 309 insn->segmentOverride = SEG_OVERRIDE_FS; 310 break; 311 case 0x65: // GS segment override 312 insn->segmentOverride = SEG_OVERRIDE_GS; 313 break; 314 case 0x66: { // Operand-size override { 315 uint8_t nextByte; 316 insn->hasOpSize = true; 317 if (peek(insn, nextByte)) 318 break; 319 // 0x66 can't overwrite existing mandatory prefix and should be ignored 320 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) 321 insn->mandatoryPrefix = byte; 322 break; 323 } 324 case 0x67: // Address-size override 325 insn->hasAdSize = true; 326 break; 327 default: // Not a prefix byte 328 isPrefix = false; 329 break; 330 } 331 332 if (isPrefix) 333 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte)); 334 } 335 336 insn->vectorExtensionType = TYPE_NO_VEX_XOP; 337 338 if (byte == 0x62) { 339 uint8_t byte1, byte2; 340 if (consume(insn, byte1)) { 341 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix"); 342 return -1; 343 } 344 345 if (peek(insn, byte2)) { 346 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 347 return -1; 348 } 349 350 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) { 351 insn->vectorExtensionType = TYPE_EVEX; 352 } else { 353 --insn->readerCursor; // unconsume byte1 354 --insn->readerCursor; // unconsume byte 355 } 356 357 if (insn->vectorExtensionType == TYPE_EVEX) { 358 insn->vectorExtensionPrefix[0] = byte; 359 insn->vectorExtensionPrefix[1] = byte1; 360 if (consume(insn, insn->vectorExtensionPrefix[2])) { 361 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix"); 362 return -1; 363 } 364 if (consume(insn, insn->vectorExtensionPrefix[3])) { 365 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix"); 366 return -1; 367 } 368 369 if (insn->mode == MODE_64BIT) { 370 // We simulate the REX prefix for simplicity's sake 371 insn->rexPrefix = 0x40 | 372 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | 373 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | 374 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | 375 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); 376 377 // We simulate the REX2 prefix for simplicity's sake 378 insn->rex2ExtensionPrefix[1] = 379 (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) | 380 (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) | 381 (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4); 382 } 383 384 LLVM_DEBUG( 385 dbgs() << format( 386 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", 387 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], 388 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3])); 389 } 390 } else if (byte == 0xc4) { 391 uint8_t byte1; 392 if (peek(insn, byte1)) { 393 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 394 return -1; 395 } 396 397 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 398 insn->vectorExtensionType = TYPE_VEX_3B; 399 else 400 --insn->readerCursor; 401 402 if (insn->vectorExtensionType == TYPE_VEX_3B) { 403 insn->vectorExtensionPrefix[0] = byte; 404 consume(insn, insn->vectorExtensionPrefix[1]); 405 consume(insn, insn->vectorExtensionPrefix[2]); 406 407 // We simulate the REX prefix for simplicity's sake 408 409 if (insn->mode == MODE_64BIT) 410 insn->rexPrefix = 0x40 | 411 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | 412 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | 413 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | 414 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); 415 416 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", 417 insn->vectorExtensionPrefix[0], 418 insn->vectorExtensionPrefix[1], 419 insn->vectorExtensionPrefix[2])); 420 } 421 } else if (byte == 0xc5) { 422 uint8_t byte1; 423 if (peek(insn, byte1)) { 424 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX"); 425 return -1; 426 } 427 428 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) 429 insn->vectorExtensionType = TYPE_VEX_2B; 430 else 431 --insn->readerCursor; 432 433 if (insn->vectorExtensionType == TYPE_VEX_2B) { 434 insn->vectorExtensionPrefix[0] = byte; 435 consume(insn, insn->vectorExtensionPrefix[1]); 436 437 if (insn->mode == MODE_64BIT) 438 insn->rexPrefix = 439 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); 440 441 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 442 default: 443 break; 444 case VEX_PREFIX_66: 445 insn->hasOpSize = true; 446 break; 447 } 448 449 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx", 450 insn->vectorExtensionPrefix[0], 451 insn->vectorExtensionPrefix[1])); 452 } 453 } else if (byte == 0x8f) { 454 uint8_t byte1; 455 if (peek(insn, byte1)) { 456 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP"); 457 return -1; 458 } 459 460 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction. 461 insn->vectorExtensionType = TYPE_XOP; 462 else 463 --insn->readerCursor; 464 465 if (insn->vectorExtensionType == TYPE_XOP) { 466 insn->vectorExtensionPrefix[0] = byte; 467 consume(insn, insn->vectorExtensionPrefix[1]); 468 consume(insn, insn->vectorExtensionPrefix[2]); 469 470 // We simulate the REX prefix for simplicity's sake 471 472 if (insn->mode == MODE_64BIT) 473 insn->rexPrefix = 0x40 | 474 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | 475 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | 476 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | 477 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); 478 479 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 480 default: 481 break; 482 case VEX_PREFIX_66: 483 insn->hasOpSize = true; 484 break; 485 } 486 487 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", 488 insn->vectorExtensionPrefix[0], 489 insn->vectorExtensionPrefix[1], 490 insn->vectorExtensionPrefix[2])); 491 } 492 } else if (isREX2(insn, byte)) { 493 uint8_t byte1; 494 if (peek(insn, byte1)) { 495 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2"); 496 return -1; 497 } 498 insn->rex2ExtensionPrefix[0] = byte; 499 consume(insn, insn->rex2ExtensionPrefix[1]); 500 501 // We simulate the REX prefix for simplicity's sake 502 insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) | 503 (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) | 504 (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) | 505 (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0); 506 LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx", 507 insn->rex2ExtensionPrefix[0], 508 insn->rex2ExtensionPrefix[1])); 509 } else if (isREX(insn, byte)) { 510 if (peek(insn, nextByte)) 511 return -1; 512 insn->rexPrefix = byte; 513 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte)); 514 } else 515 --insn->readerCursor; 516 517 if (insn->mode == MODE_16BIT) { 518 insn->registerSize = (insn->hasOpSize ? 4 : 2); 519 insn->addressSize = (insn->hasAdSize ? 4 : 2); 520 insn->displacementSize = (insn->hasAdSize ? 4 : 2); 521 insn->immediateSize = (insn->hasOpSize ? 4 : 2); 522 } else if (insn->mode == MODE_32BIT) { 523 insn->registerSize = (insn->hasOpSize ? 2 : 4); 524 insn->addressSize = (insn->hasAdSize ? 2 : 4); 525 insn->displacementSize = (insn->hasAdSize ? 2 : 4); 526 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 527 } else if (insn->mode == MODE_64BIT) { 528 insn->displacementSize = 4; 529 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { 530 insn->registerSize = 8; 531 insn->addressSize = (insn->hasAdSize ? 4 : 8); 532 insn->immediateSize = 4; 533 insn->hasOpSize = false; 534 } else { 535 insn->registerSize = (insn->hasOpSize ? 2 : 4); 536 insn->addressSize = (insn->hasAdSize ? 4 : 8); 537 insn->immediateSize = (insn->hasOpSize ? 2 : 4); 538 } 539 } 540 541 return 0; 542 } 543 544 // Consumes the SIB byte to determine addressing information. 545 static int readSIB(struct InternalInstruction *insn) { 546 SIBBase sibBaseBase = SIB_BASE_NONE; 547 uint8_t index, base; 548 549 LLVM_DEBUG(dbgs() << "readSIB()"); 550 switch (insn->addressSize) { 551 case 2: 552 default: 553 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode"); 554 case 4: 555 insn->sibIndexBase = SIB_INDEX_EAX; 556 sibBaseBase = SIB_BASE_EAX; 557 break; 558 case 8: 559 insn->sibIndexBase = SIB_INDEX_RAX; 560 sibBaseBase = SIB_BASE_RAX; 561 break; 562 } 563 564 if (consume(insn, insn->sib)) 565 return -1; 566 567 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) | 568 (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 569 570 if (index == 0x4) { 571 insn->sibIndex = SIB_INDEX_NONE; 572 } else { 573 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); 574 } 575 576 insn->sibScale = 1 << scaleFromSIB(insn->sib); 577 578 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) | 579 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 580 581 switch (base) { 582 case 0x5: 583 case 0xd: 584 switch (modFromModRM(insn->modRM)) { 585 case 0x0: 586 insn->eaDisplacement = EA_DISP_32; 587 insn->sibBase = SIB_BASE_NONE; 588 break; 589 case 0x1: 590 insn->eaDisplacement = EA_DISP_8; 591 insn->sibBase = (SIBBase)(sibBaseBase + base); 592 break; 593 case 0x2: 594 insn->eaDisplacement = EA_DISP_32; 595 insn->sibBase = (SIBBase)(sibBaseBase + base); 596 break; 597 default: 598 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte"); 599 } 600 break; 601 default: 602 insn->sibBase = (SIBBase)(sibBaseBase + base); 603 break; 604 } 605 606 return 0; 607 } 608 609 static int readDisplacement(struct InternalInstruction *insn) { 610 int8_t d8; 611 int16_t d16; 612 int32_t d32; 613 LLVM_DEBUG(dbgs() << "readDisplacement()"); 614 615 insn->displacementOffset = insn->readerCursor - insn->startLocation; 616 switch (insn->eaDisplacement) { 617 case EA_DISP_NONE: 618 break; 619 case EA_DISP_8: 620 if (consume(insn, d8)) 621 return -1; 622 insn->displacement = d8; 623 break; 624 case EA_DISP_16: 625 if (consume(insn, d16)) 626 return -1; 627 insn->displacement = d16; 628 break; 629 case EA_DISP_32: 630 if (consume(insn, d32)) 631 return -1; 632 insn->displacement = d32; 633 break; 634 } 635 636 return 0; 637 } 638 639 // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. 640 static int readModRM(struct InternalInstruction *insn) { 641 uint8_t mod, rm, reg; 642 LLVM_DEBUG(dbgs() << "readModRM()"); 643 644 if (insn->consumedModRM) 645 return 0; 646 647 if (consume(insn, insn->modRM)) 648 return -1; 649 insn->consumedModRM = true; 650 651 mod = modFromModRM(insn->modRM); 652 rm = rmFromModRM(insn->modRM); 653 reg = regFromModRM(insn->modRM); 654 655 // This goes by insn->registerSize to pick the correct register, which messes 656 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in 657 // fixupReg(). 658 switch (insn->registerSize) { 659 case 2: 660 insn->regBase = MODRM_REG_AX; 661 insn->eaRegBase = EA_REG_AX; 662 break; 663 case 4: 664 insn->regBase = MODRM_REG_EAX; 665 insn->eaRegBase = EA_REG_EAX; 666 break; 667 case 8: 668 insn->regBase = MODRM_REG_RAX; 669 insn->eaRegBase = EA_REG_RAX; 670 break; 671 } 672 673 reg |= (rFromREX(insn->rexPrefix) << 3) | 674 (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 675 rm |= (bFromREX(insn->rexPrefix) << 3) | 676 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); 677 678 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) 679 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; 680 681 insn->reg = (Reg)(insn->regBase + reg); 682 683 switch (insn->addressSize) { 684 case 2: { 685 EABase eaBaseBase = EA_BASE_BX_SI; 686 687 switch (mod) { 688 case 0x0: 689 if (rm == 0x6) { 690 insn->eaBase = EA_BASE_NONE; 691 insn->eaDisplacement = EA_DISP_16; 692 if (readDisplacement(insn)) 693 return -1; 694 } else { 695 insn->eaBase = (EABase)(eaBaseBase + rm); 696 insn->eaDisplacement = EA_DISP_NONE; 697 } 698 break; 699 case 0x1: 700 insn->eaBase = (EABase)(eaBaseBase + rm); 701 insn->eaDisplacement = EA_DISP_8; 702 insn->displacementSize = 1; 703 if (readDisplacement(insn)) 704 return -1; 705 break; 706 case 0x2: 707 insn->eaBase = (EABase)(eaBaseBase + rm); 708 insn->eaDisplacement = EA_DISP_16; 709 if (readDisplacement(insn)) 710 return -1; 711 break; 712 case 0x3: 713 insn->eaBase = (EABase)(insn->eaRegBase + rm); 714 if (readDisplacement(insn)) 715 return -1; 716 break; 717 } 718 break; 719 } 720 case 4: 721 case 8: { 722 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); 723 724 switch (mod) { 725 case 0x0: 726 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this 727 // In determining whether RIP-relative mode is used (rm=5), 728 // or whether a SIB byte is present (rm=4), 729 // the extension bits (REX.b and EVEX.x) are ignored. 730 switch (rm & 7) { 731 case 0x4: // SIB byte is present 732 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); 733 if (readSIB(insn) || readDisplacement(insn)) 734 return -1; 735 break; 736 case 0x5: // RIP-relative 737 insn->eaBase = EA_BASE_NONE; 738 insn->eaDisplacement = EA_DISP_32; 739 if (readDisplacement(insn)) 740 return -1; 741 break; 742 default: 743 insn->eaBase = (EABase)(eaBaseBase + rm); 744 break; 745 } 746 break; 747 case 0x1: 748 insn->displacementSize = 1; 749 [[fallthrough]]; 750 case 0x2: 751 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); 752 switch (rm & 7) { 753 case 0x4: // SIB byte is present 754 insn->eaBase = EA_BASE_sib; 755 if (readSIB(insn) || readDisplacement(insn)) 756 return -1; 757 break; 758 default: 759 insn->eaBase = (EABase)(eaBaseBase + rm); 760 if (readDisplacement(insn)) 761 return -1; 762 break; 763 } 764 break; 765 case 0x3: 766 insn->eaDisplacement = EA_DISP_NONE; 767 insn->eaBase = (EABase)(insn->eaRegBase + rm); 768 break; 769 } 770 break; 771 } 772 } // switch (insn->addressSize) 773 774 return 0; 775 } 776 777 #define GENERIC_FIXUP_FUNC(name, base, prefix) \ 778 static uint16_t name(struct InternalInstruction *insn, OperandType type, \ 779 uint8_t index, uint8_t *valid) { \ 780 *valid = 1; \ 781 switch (type) { \ 782 default: \ 783 debug("Unhandled register type"); \ 784 *valid = 0; \ 785 return 0; \ 786 case TYPE_Rv: \ 787 return base + index; \ 788 case TYPE_R8: \ 789 if (insn->rexPrefix && index >= 4 && index <= 7) \ 790 return prefix##_SPL + (index - 4); \ 791 else \ 792 return prefix##_AL + index; \ 793 case TYPE_R16: \ 794 return prefix##_AX + index; \ 795 case TYPE_R32: \ 796 return prefix##_EAX + index; \ 797 case TYPE_R64: \ 798 return prefix##_RAX + index; \ 799 case TYPE_ZMM: \ 800 return prefix##_ZMM0 + index; \ 801 case TYPE_YMM: \ 802 return prefix##_YMM0 + index; \ 803 case TYPE_XMM: \ 804 return prefix##_XMM0 + index; \ 805 case TYPE_TMM: \ 806 if (index > 7) \ 807 *valid = 0; \ 808 return prefix##_TMM0 + index; \ 809 case TYPE_VK: \ 810 index &= 0xf; \ 811 if (index > 7) \ 812 *valid = 0; \ 813 return prefix##_K0 + index; \ 814 case TYPE_VK_PAIR: \ 815 if (index > 7) \ 816 *valid = 0; \ 817 return prefix##_K0_K1 + (index / 2); \ 818 case TYPE_MM64: \ 819 return prefix##_MM0 + (index & 0x7); \ 820 case TYPE_SEGMENTREG: \ 821 if ((index & 7) > 5) \ 822 *valid = 0; \ 823 return prefix##_ES + (index & 7); \ 824 case TYPE_DEBUGREG: \ 825 if (index > 15) \ 826 *valid = 0; \ 827 return prefix##_DR0 + index; \ 828 case TYPE_CONTROLREG: \ 829 if (index > 15) \ 830 *valid = 0; \ 831 return prefix##_CR0 + index; \ 832 case TYPE_MVSIBX: \ 833 return prefix##_XMM0 + index; \ 834 case TYPE_MVSIBY: \ 835 return prefix##_YMM0 + index; \ 836 case TYPE_MVSIBZ: \ 837 return prefix##_ZMM0 + index; \ 838 } \ 839 } 840 841 // Consult an operand type to determine the meaning of the reg or R/M field. If 842 // the operand is an XMM operand, for example, an operand would be XMM0 instead 843 // of AX, which readModRM() would otherwise misinterpret it as. 844 // 845 // @param insn - The instruction containing the operand. 846 // @param type - The operand type. 847 // @param index - The existing value of the field as reported by readModRM(). 848 // @param valid - The address of a uint8_t. The target is set to 1 if the 849 // field is valid for the register class; 0 if not. 850 // @return - The proper value. 851 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) 852 GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) 853 854 // Consult an operand specifier to determine which of the fixup*Value functions 855 // to use in correcting readModRM()'ss interpretation. 856 // 857 // @param insn - See fixup*Value(). 858 // @param op - The operand specifier. 859 // @return - 0 if fixup was successful; -1 if the register returned was 860 // invalid for its class. 861 static int fixupReg(struct InternalInstruction *insn, 862 const struct OperandSpecifier *op) { 863 uint8_t valid; 864 LLVM_DEBUG(dbgs() << "fixupReg()"); 865 866 switch ((OperandEncoding)op->encoding) { 867 default: 868 debug("Expected a REG or R/M encoding in fixupReg"); 869 return -1; 870 case ENCODING_VVVV: 871 insn->vvvv = 872 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid); 873 if (!valid) 874 return -1; 875 break; 876 case ENCODING_REG: 877 insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, 878 insn->reg - insn->regBase, &valid); 879 if (!valid) 880 return -1; 881 break; 882 CASE_ENCODING_RM: 883 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 884 modFromModRM(insn->modRM) == 3) { 885 // EVEX_X can extend the register id to 32 for a non-GPR register that is 886 // encoded in RM. 887 // mode : MODE_64_BIT 888 // Only 8 vector registers are available in 32 bit mode 889 // mod : 3 890 // RM encodes a register 891 switch (op->type) { 892 case TYPE_Rv: 893 case TYPE_R8: 894 case TYPE_R16: 895 case TYPE_R32: 896 case TYPE_R64: 897 break; 898 default: 899 insn->eaBase = 900 (EABase)(insn->eaBase + 901 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4)); 902 break; 903 } 904 } 905 [[fallthrough]]; 906 case ENCODING_SIB: 907 if (insn->eaBase >= insn->eaRegBase) { 908 insn->eaBase = (EABase)fixupRMValue( 909 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); 910 if (!valid) 911 return -1; 912 } 913 break; 914 } 915 916 return 0; 917 } 918 919 // Read the opcode (except the ModR/M byte in the case of extended or escape 920 // opcodes). 921 static bool readOpcode(struct InternalInstruction *insn) { 922 uint8_t current; 923 LLVM_DEBUG(dbgs() << "readOpcode()"); 924 925 insn->opcodeType = ONEBYTE; 926 if (insn->vectorExtensionType == TYPE_EVEX) { 927 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { 928 default: 929 LLVM_DEBUG( 930 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)", 931 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1]))); 932 return true; 933 case VEX_LOB_0F: 934 insn->opcodeType = TWOBYTE; 935 return consume(insn, insn->opcode); 936 case VEX_LOB_0F38: 937 insn->opcodeType = THREEBYTE_38; 938 return consume(insn, insn->opcode); 939 case VEX_LOB_0F3A: 940 insn->opcodeType = THREEBYTE_3A; 941 return consume(insn, insn->opcode); 942 case VEX_LOB_MAP4: 943 insn->opcodeType = MAP4; 944 return consume(insn, insn->opcode); 945 case VEX_LOB_MAP5: 946 insn->opcodeType = MAP5; 947 return consume(insn, insn->opcode); 948 case VEX_LOB_MAP6: 949 insn->opcodeType = MAP6; 950 return consume(insn, insn->opcode); 951 case VEX_LOB_MAP7: 952 insn->opcodeType = MAP7; 953 return consume(insn, insn->opcode); 954 } 955 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 956 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { 957 default: 958 LLVM_DEBUG( 959 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 960 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 961 return true; 962 case VEX_LOB_0F: 963 insn->opcodeType = TWOBYTE; 964 return consume(insn, insn->opcode); 965 case VEX_LOB_0F38: 966 insn->opcodeType = THREEBYTE_38; 967 return consume(insn, insn->opcode); 968 case VEX_LOB_0F3A: 969 insn->opcodeType = THREEBYTE_3A; 970 return consume(insn, insn->opcode); 971 case VEX_LOB_MAP5: 972 insn->opcodeType = MAP5; 973 return consume(insn, insn->opcode); 974 case VEX_LOB_MAP6: 975 insn->opcodeType = MAP6; 976 return consume(insn, insn->opcode); 977 case VEX_LOB_MAP7: 978 insn->opcodeType = MAP7; 979 return consume(insn, insn->opcode); 980 } 981 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 982 insn->opcodeType = TWOBYTE; 983 return consume(insn, insn->opcode); 984 } else if (insn->vectorExtensionType == TYPE_XOP) { 985 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { 986 default: 987 LLVM_DEBUG( 988 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)", 989 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]))); 990 return true; 991 case XOP_MAP_SELECT_8: 992 insn->opcodeType = XOP8_MAP; 993 return consume(insn, insn->opcode); 994 case XOP_MAP_SELECT_9: 995 insn->opcodeType = XOP9_MAP; 996 return consume(insn, insn->opcode); 997 case XOP_MAP_SELECT_A: 998 insn->opcodeType = XOPA_MAP; 999 return consume(insn, insn->opcode); 1000 } 1001 } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) { 1002 // m bit indicates opcode map 1 1003 insn->opcodeType = TWOBYTE; 1004 return consume(insn, insn->opcode); 1005 } 1006 1007 if (consume(insn, current)) 1008 return true; 1009 1010 if (current == 0x0f) { 1011 LLVM_DEBUG( 1012 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current)); 1013 if (consume(insn, current)) 1014 return true; 1015 1016 if (current == 0x38) { 1017 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1018 current)); 1019 if (consume(insn, current)) 1020 return true; 1021 1022 insn->opcodeType = THREEBYTE_38; 1023 } else if (current == 0x3a) { 1024 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)", 1025 current)); 1026 if (consume(insn, current)) 1027 return true; 1028 1029 insn->opcodeType = THREEBYTE_3A; 1030 } else if (current == 0x0f) { 1031 LLVM_DEBUG( 1032 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current)); 1033 1034 // Consume operands before the opcode to comply with the 3DNow encoding 1035 if (readModRM(insn)) 1036 return true; 1037 1038 if (consume(insn, current)) 1039 return true; 1040 1041 insn->opcodeType = THREEDNOW_MAP; 1042 } else { 1043 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix"); 1044 insn->opcodeType = TWOBYTE; 1045 } 1046 } else if (insn->mandatoryPrefix) 1047 // The opcode with mandatory prefix must start with opcode escape. 1048 // If not it's legacy repeat prefix 1049 insn->mandatoryPrefix = 0; 1050 1051 // At this point we have consumed the full opcode. 1052 // Anything we consume from here on must be unconsumed. 1053 insn->opcode = current; 1054 1055 return false; 1056 } 1057 1058 // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit). 1059 static bool is16BitEquivalent(const char *orig, const char *equiv) { 1060 for (int i = 0;; i++) { 1061 if (orig[i] == '\0' && equiv[i] == '\0') 1062 return true; 1063 if (orig[i] == '\0' || equiv[i] == '\0') 1064 return false; 1065 if (orig[i] != equiv[i]) { 1066 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') 1067 continue; 1068 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') 1069 continue; 1070 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') 1071 continue; 1072 return false; 1073 } 1074 } 1075 } 1076 1077 // Determine whether this instruction is a 64-bit instruction. 1078 static bool is64Bit(const char *name) { 1079 for (int i = 0;; ++i) { 1080 if (name[i] == '\0') 1081 return false; 1082 if (name[i] == '6' && name[i + 1] == '4') 1083 return true; 1084 } 1085 } 1086 1087 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1088 // for extended and escape opcodes, and using a supplied attribute mask. 1089 static int getInstructionIDWithAttrMask(uint16_t *instructionID, 1090 struct InternalInstruction *insn, 1091 uint16_t attrMask) { 1092 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]); 1093 const ContextDecision *decision; 1094 switch (insn->opcodeType) { 1095 case ONEBYTE: 1096 decision = &ONEBYTE_SYM; 1097 break; 1098 case TWOBYTE: 1099 decision = &TWOBYTE_SYM; 1100 break; 1101 case THREEBYTE_38: 1102 decision = &THREEBYTE38_SYM; 1103 break; 1104 case THREEBYTE_3A: 1105 decision = &THREEBYTE3A_SYM; 1106 break; 1107 case XOP8_MAP: 1108 decision = &XOP8_MAP_SYM; 1109 break; 1110 case XOP9_MAP: 1111 decision = &XOP9_MAP_SYM; 1112 break; 1113 case XOPA_MAP: 1114 decision = &XOPA_MAP_SYM; 1115 break; 1116 case THREEDNOW_MAP: 1117 decision = &THREEDNOW_MAP_SYM; 1118 break; 1119 case MAP4: 1120 decision = &MAP4_SYM; 1121 break; 1122 case MAP5: 1123 decision = &MAP5_SYM; 1124 break; 1125 case MAP6: 1126 decision = &MAP6_SYM; 1127 break; 1128 case MAP7: 1129 decision = &MAP7_SYM; 1130 break; 1131 } 1132 1133 if (decision->opcodeDecisions[insnCtx] 1134 .modRMDecisions[insn->opcode] 1135 .modrm_type != MODRM_ONEENTRY) { 1136 if (readModRM(insn)) 1137 return -1; 1138 *instructionID = 1139 decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM); 1140 } else { 1141 *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0); 1142 } 1143 1144 return 0; 1145 } 1146 1147 static bool isCCMPOrCTEST(InternalInstruction *insn) { 1148 if (insn->opcodeType != MAP4) 1149 return false; 1150 if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7) 1151 return true; 1152 switch (insn->opcode & 0xfe) { 1153 default: 1154 return false; 1155 case 0x38: 1156 case 0x3a: 1157 case 0x84: 1158 return true; 1159 case 0x80: 1160 return regFromModRM(insn->modRM) == 7; 1161 case 0xf6: 1162 return regFromModRM(insn->modRM) == 0; 1163 } 1164 } 1165 1166 static bool isNF(InternalInstruction *insn) { 1167 if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1168 return false; 1169 if (insn->opcodeType == MAP4) 1170 return true; 1171 // Below NF instructions are not in map4. 1172 if (insn->opcodeType == THREEBYTE_38 && 1173 ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) { 1174 switch (insn->opcode) { 1175 case 0xf2: // ANDN 1176 case 0xf3: // BLSI, BLSR, BLSMSK 1177 case 0xf5: // BZHI 1178 case 0xf7: // BEXTR 1179 return true; 1180 default: 1181 break; 1182 } 1183 } 1184 return false; 1185 } 1186 1187 // Determine the ID of an instruction, consuming the ModR/M byte as appropriate 1188 // for extended and escape opcodes. Determines the attributes and context for 1189 // the instruction before doing so. 1190 static int getInstructionID(struct InternalInstruction *insn, 1191 const MCInstrInfo *mii) { 1192 uint16_t attrMask; 1193 uint16_t instructionID; 1194 1195 LLVM_DEBUG(dbgs() << "getID()"); 1196 1197 attrMask = ATTR_NONE; 1198 1199 if (insn->mode == MODE_64BIT) 1200 attrMask |= ATTR_64BIT; 1201 1202 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1203 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; 1204 1205 if (insn->vectorExtensionType == TYPE_EVEX) { 1206 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { 1207 case VEX_PREFIX_66: 1208 attrMask |= ATTR_OPSIZE; 1209 break; 1210 case VEX_PREFIX_F3: 1211 attrMask |= ATTR_XS; 1212 break; 1213 case VEX_PREFIX_F2: 1214 attrMask |= ATTR_XD; 1215 break; 1216 } 1217 1218 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1219 attrMask |= ATTR_EVEXKZ; 1220 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1221 attrMask |= ATTR_EVEXB; 1222 if (isNF(insn) && !readModRM(insn) && 1223 !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa. 1224 attrMask |= ATTR_EVEXNF; 1225 // aaa is not used a opmask in MAP4 1226 else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) && 1227 (insn->opcodeType != MAP4)) 1228 attrMask |= ATTR_EVEXK; 1229 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) 1230 attrMask |= ATTR_VEXL; 1231 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1232 attrMask |= ATTR_EVEXL2; 1233 } else if (insn->vectorExtensionType == TYPE_VEX_3B) { 1234 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { 1235 case VEX_PREFIX_66: 1236 attrMask |= ATTR_OPSIZE; 1237 break; 1238 case VEX_PREFIX_F3: 1239 attrMask |= ATTR_XS; 1240 break; 1241 case VEX_PREFIX_F2: 1242 attrMask |= ATTR_XD; 1243 break; 1244 } 1245 1246 if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) 1247 attrMask |= ATTR_VEXL; 1248 } else if (insn->vectorExtensionType == TYPE_VEX_2B) { 1249 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { 1250 case VEX_PREFIX_66: 1251 attrMask |= ATTR_OPSIZE; 1252 if (insn->hasAdSize) 1253 attrMask |= ATTR_ADSIZE; 1254 break; 1255 case VEX_PREFIX_F3: 1256 attrMask |= ATTR_XS; 1257 break; 1258 case VEX_PREFIX_F2: 1259 attrMask |= ATTR_XD; 1260 break; 1261 } 1262 1263 if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) 1264 attrMask |= ATTR_VEXL; 1265 } else if (insn->vectorExtensionType == TYPE_XOP) { 1266 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { 1267 case VEX_PREFIX_66: 1268 attrMask |= ATTR_OPSIZE; 1269 break; 1270 case VEX_PREFIX_F3: 1271 attrMask |= ATTR_XS; 1272 break; 1273 case VEX_PREFIX_F2: 1274 attrMask |= ATTR_XD; 1275 break; 1276 } 1277 1278 if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) 1279 attrMask |= ATTR_VEXL; 1280 } else { 1281 return -1; 1282 } 1283 } else if (!insn->mandatoryPrefix) { 1284 // If we don't have mandatory prefix we should use legacy prefixes here 1285 if (insn->hasOpSize && (insn->mode != MODE_16BIT)) 1286 attrMask |= ATTR_OPSIZE; 1287 if (insn->hasAdSize) 1288 attrMask |= ATTR_ADSIZE; 1289 if (insn->opcodeType == ONEBYTE) { 1290 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) 1291 // Special support for PAUSE 1292 attrMask |= ATTR_XS; 1293 } else { 1294 if (insn->repeatPrefix == 0xf2) 1295 attrMask |= ATTR_XD; 1296 else if (insn->repeatPrefix == 0xf3) 1297 attrMask |= ATTR_XS; 1298 } 1299 } else { 1300 switch (insn->mandatoryPrefix) { 1301 case 0xf2: 1302 attrMask |= ATTR_XD; 1303 break; 1304 case 0xf3: 1305 attrMask |= ATTR_XS; 1306 break; 1307 case 0x66: 1308 if (insn->mode != MODE_16BIT) 1309 attrMask |= ATTR_OPSIZE; 1310 if (insn->hasAdSize) 1311 attrMask |= ATTR_ADSIZE; 1312 break; 1313 case 0x67: 1314 attrMask |= ATTR_ADSIZE; 1315 break; 1316 } 1317 } 1318 1319 if (insn->rexPrefix & 0x08) { 1320 attrMask |= ATTR_REXW; 1321 attrMask &= ~ATTR_ADSIZE; 1322 } 1323 1324 // Absolute jump and pushp/popp need special handling 1325 if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE && 1326 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50)) 1327 attrMask |= ATTR_REX2; 1328 1329 if (insn->mode == MODE_16BIT) { 1330 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning 1331 // of the AdSize prefix is inverted w.r.t. 32-bit mode. 1332 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3) 1333 attrMask ^= ATTR_ADSIZE; 1334 // If we're in 16-bit mode and this is one of the relative jumps and opsize 1335 // prefix isn't present, we need to force the opsize attribute since the 1336 // prefix is inverted relative to 32-bit mode. 1337 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE && 1338 (insn->opcode == 0xE8 || insn->opcode == 0xE9)) 1339 attrMask |= ATTR_OPSIZE; 1340 1341 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE && 1342 insn->opcode >= 0x80 && insn->opcode <= 0x8F) 1343 attrMask |= ATTR_OPSIZE; 1344 } 1345 1346 1347 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1348 return -1; 1349 1350 // The following clauses compensate for limitations of the tables. 1351 1352 if (insn->mode != MODE_64BIT && 1353 insn->vectorExtensionType != TYPE_NO_VEX_XOP) { 1354 // The tables can't distinquish between cases where the W-bit is used to 1355 // select register size and cases where its a required part of the opcode. 1356 if ((insn->vectorExtensionType == TYPE_EVEX && 1357 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || 1358 (insn->vectorExtensionType == TYPE_VEX_3B && 1359 wFromVEX3of3(insn->vectorExtensionPrefix[2])) || 1360 (insn->vectorExtensionType == TYPE_XOP && 1361 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { 1362 1363 uint16_t instructionIDWithREXW; 1364 if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn, 1365 attrMask | ATTR_REXW)) { 1366 insn->instructionID = instructionID; 1367 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1368 return 0; 1369 } 1370 1371 auto SpecName = mii->getName(instructionIDWithREXW); 1372 // If not a 64-bit instruction. Switch the opcode. 1373 if (!is64Bit(SpecName.data())) { 1374 insn->instructionID = instructionIDWithREXW; 1375 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW]; 1376 return 0; 1377 } 1378 } 1379 } 1380 1381 // Absolute moves, umonitor, and movdir64b need special handling. 1382 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are 1383 // inverted w.r.t. 1384 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in 1385 // any position. 1386 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || 1387 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || 1388 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) || 1389 (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) { 1390 // Make sure we observed the prefixes in any position. 1391 if (insn->hasAdSize) 1392 attrMask |= ATTR_ADSIZE; 1393 if (insn->hasOpSize) 1394 attrMask |= ATTR_OPSIZE; 1395 1396 // In 16-bit, invert the attributes. 1397 if (insn->mode == MODE_16BIT) { 1398 attrMask ^= ATTR_ADSIZE; 1399 1400 // The OpSize attribute is only valid with the absolute moves. 1401 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) 1402 attrMask ^= ATTR_OPSIZE; 1403 } 1404 1405 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask)) 1406 return -1; 1407 1408 insn->instructionID = instructionID; 1409 insn->spec = &INSTRUCTIONS_SYM[instructionID]; 1410 return 0; 1411 } 1412 1413 if ((insn->mode == MODE_16BIT || insn->hasOpSize) && 1414 !(attrMask & ATTR_OPSIZE)) { 1415 // The instruction tables make no distinction between instructions that 1416 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a 1417 // particular spot (i.e., many MMX operations). In general we're 1418 // conservative, but in the specific case where OpSize is present but not in 1419 // the right place we check if there's a 16-bit operation. 1420 const struct InstructionSpecifier *spec; 1421 uint16_t instructionIDWithOpsize; 1422 llvm::StringRef specName, specWithOpSizeName; 1423 1424 spec = &INSTRUCTIONS_SYM[instructionID]; 1425 1426 if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn, 1427 attrMask | ATTR_OPSIZE)) { 1428 // ModRM required with OpSize but not present. Give up and return the 1429 // version without OpSize set. 1430 insn->instructionID = instructionID; 1431 insn->spec = spec; 1432 return 0; 1433 } 1434 1435 specName = mii->getName(instructionID); 1436 specWithOpSizeName = mii->getName(instructionIDWithOpsize); 1437 1438 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && 1439 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { 1440 insn->instructionID = instructionIDWithOpsize; 1441 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize]; 1442 } else { 1443 insn->instructionID = instructionID; 1444 insn->spec = spec; 1445 } 1446 return 0; 1447 } 1448 1449 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && 1450 insn->rexPrefix & 0x01) { 1451 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode 1452 // as XCHG %r8, %eax. 1453 const struct InstructionSpecifier *spec; 1454 uint16_t instructionIDWithNewOpcode; 1455 const struct InstructionSpecifier *specWithNewOpcode; 1456 1457 spec = &INSTRUCTIONS_SYM[instructionID]; 1458 1459 // Borrow opcode from one of the other XCHGar opcodes 1460 insn->opcode = 0x91; 1461 1462 if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn, 1463 attrMask)) { 1464 insn->opcode = 0x90; 1465 1466 insn->instructionID = instructionID; 1467 insn->spec = spec; 1468 return 0; 1469 } 1470 1471 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode]; 1472 1473 // Change back 1474 insn->opcode = 0x90; 1475 1476 insn->instructionID = instructionIDWithNewOpcode; 1477 insn->spec = specWithNewOpcode; 1478 1479 return 0; 1480 } 1481 1482 insn->instructionID = instructionID; 1483 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID]; 1484 1485 return 0; 1486 } 1487 1488 // Read an operand from the opcode field of an instruction and interprets it 1489 // appropriately given the operand width. Handles AddRegFrm instructions. 1490 // 1491 // @param insn - the instruction whose opcode field is to be read. 1492 // @param size - The width (in bytes) of the register being specified. 1493 // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means 1494 // RAX. 1495 // @return - 0 on success; nonzero otherwise. 1496 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { 1497 LLVM_DEBUG(dbgs() << "readOpcodeRegister()"); 1498 1499 if (size == 0) 1500 size = insn->registerSize; 1501 1502 auto setOpcodeRegister = [&](unsigned base) { 1503 insn->opcodeRegister = 1504 (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) | 1505 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) | 1506 (insn->opcode & 7))); 1507 }; 1508 1509 switch (size) { 1510 case 1: 1511 setOpcodeRegister(MODRM_REG_AL); 1512 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && 1513 insn->opcodeRegister < MODRM_REG_AL + 0x8) { 1514 insn->opcodeRegister = 1515 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); 1516 } 1517 1518 break; 1519 case 2: 1520 setOpcodeRegister(MODRM_REG_AX); 1521 break; 1522 case 4: 1523 setOpcodeRegister(MODRM_REG_EAX); 1524 break; 1525 case 8: 1526 setOpcodeRegister(MODRM_REG_RAX); 1527 break; 1528 } 1529 1530 return 0; 1531 } 1532 1533 // Consume an immediate operand from an instruction, given the desired operand 1534 // size. 1535 // 1536 // @param insn - The instruction whose operand is to be read. 1537 // @param size - The width (in bytes) of the operand. 1538 // @return - 0 if the immediate was successfully consumed; nonzero 1539 // otherwise. 1540 static int readImmediate(struct InternalInstruction *insn, uint8_t size) { 1541 uint8_t imm8; 1542 uint16_t imm16; 1543 uint32_t imm32; 1544 uint64_t imm64; 1545 1546 LLVM_DEBUG(dbgs() << "readImmediate()"); 1547 1548 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates"); 1549 1550 insn->immediateSize = size; 1551 insn->immediateOffset = insn->readerCursor - insn->startLocation; 1552 1553 switch (size) { 1554 case 1: 1555 if (consume(insn, imm8)) 1556 return -1; 1557 insn->immediates[insn->numImmediatesConsumed] = imm8; 1558 break; 1559 case 2: 1560 if (consume(insn, imm16)) 1561 return -1; 1562 insn->immediates[insn->numImmediatesConsumed] = imm16; 1563 break; 1564 case 4: 1565 if (consume(insn, imm32)) 1566 return -1; 1567 insn->immediates[insn->numImmediatesConsumed] = imm32; 1568 break; 1569 case 8: 1570 if (consume(insn, imm64)) 1571 return -1; 1572 insn->immediates[insn->numImmediatesConsumed] = imm64; 1573 break; 1574 default: 1575 llvm_unreachable("invalid size"); 1576 } 1577 1578 insn->numImmediatesConsumed++; 1579 1580 return 0; 1581 } 1582 1583 // Consume vvvv from an instruction if it has a VEX prefix. 1584 static int readVVVV(struct InternalInstruction *insn) { 1585 LLVM_DEBUG(dbgs() << "readVVVV()"); 1586 1587 int vvvv; 1588 if (insn->vectorExtensionType == TYPE_EVEX) 1589 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | 1590 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); 1591 else if (insn->vectorExtensionType == TYPE_VEX_3B) 1592 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); 1593 else if (insn->vectorExtensionType == TYPE_VEX_2B) 1594 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); 1595 else if (insn->vectorExtensionType == TYPE_XOP) 1596 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); 1597 else 1598 return -1; 1599 1600 if (insn->mode != MODE_64BIT) 1601 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later. 1602 1603 insn->vvvv = static_cast<Reg>(vvvv); 1604 return 0; 1605 } 1606 1607 // Read an mask register from the opcode field of an instruction. 1608 // 1609 // @param insn - The instruction whose opcode field is to be read. 1610 // @return - 0 on success; nonzero otherwise. 1611 static int readMaskRegister(struct InternalInstruction *insn) { 1612 LLVM_DEBUG(dbgs() << "readMaskRegister()"); 1613 1614 if (insn->vectorExtensionType != TYPE_EVEX) 1615 return -1; 1616 1617 insn->writemask = 1618 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); 1619 return 0; 1620 } 1621 1622 // Consults the specifier for an instruction and consumes all 1623 // operands for that instruction, interpreting them as it goes. 1624 static int readOperands(struct InternalInstruction *insn) { 1625 int hasVVVV, needVVVV; 1626 int sawRegImm = 0; 1627 1628 LLVM_DEBUG(dbgs() << "readOperands()"); 1629 1630 // If non-zero vvvv specified, make sure one of the operands uses it. 1631 hasVVVV = !readVVVV(insn); 1632 needVVVV = hasVVVV && (insn->vvvv != 0); 1633 1634 for (const auto &Op : x86OperandSets[insn->spec->operands]) { 1635 switch (Op.encoding) { 1636 case ENCODING_NONE: 1637 case ENCODING_SI: 1638 case ENCODING_DI: 1639 break; 1640 CASE_ENCODING_VSIB: 1641 // VSIB can use the V2 bit so check only the other bits. 1642 if (needVVVV) 1643 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); 1644 if (readModRM(insn)) 1645 return -1; 1646 1647 // Reject if SIB wasn't used. 1648 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1649 return -1; 1650 1651 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. 1652 if (insn->sibIndex == SIB_INDEX_NONE) 1653 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4); 1654 1655 // If EVEX.v2 is set this is one of the 16-31 registers. 1656 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && 1657 v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) 1658 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); 1659 1660 // Adjust the index register to the correct size. 1661 switch ((OperandType)Op.type) { 1662 default: 1663 debug("Unhandled VSIB index type"); 1664 return -1; 1665 case TYPE_MVSIBX: 1666 insn->sibIndex = 1667 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase)); 1668 break; 1669 case TYPE_MVSIBY: 1670 insn->sibIndex = 1671 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase)); 1672 break; 1673 case TYPE_MVSIBZ: 1674 insn->sibIndex = 1675 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase)); 1676 break; 1677 } 1678 1679 // Apply the AVX512 compressed displacement scaling factor. 1680 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1681 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); 1682 break; 1683 case ENCODING_SIB: 1684 // Reject if SIB wasn't used. 1685 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) 1686 return -1; 1687 if (readModRM(insn)) 1688 return -1; 1689 if (fixupReg(insn, &Op)) 1690 return -1; 1691 break; 1692 case ENCODING_REG: 1693 CASE_ENCODING_RM: 1694 if (readModRM(insn)) 1695 return -1; 1696 if (fixupReg(insn, &Op)) 1697 return -1; 1698 // Apply the AVX512 compressed displacement scaling factor. 1699 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) 1700 insn->displacement *= 1 << (Op.encoding - ENCODING_RM); 1701 break; 1702 case ENCODING_IB: 1703 if (sawRegImm) { 1704 // Saw a register immediate so don't read again and instead split the 1705 // previous immediate. FIXME: This is a hack. 1706 insn->immediates[insn->numImmediatesConsumed] = 1707 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; 1708 ++insn->numImmediatesConsumed; 1709 break; 1710 } 1711 if (readImmediate(insn, 1)) 1712 return -1; 1713 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) 1714 sawRegImm = 1; 1715 break; 1716 case ENCODING_IW: 1717 if (readImmediate(insn, 2)) 1718 return -1; 1719 break; 1720 case ENCODING_ID: 1721 if (readImmediate(insn, 4)) 1722 return -1; 1723 break; 1724 case ENCODING_IO: 1725 if (readImmediate(insn, 8)) 1726 return -1; 1727 break; 1728 case ENCODING_Iv: 1729 if (readImmediate(insn, insn->immediateSize)) 1730 return -1; 1731 break; 1732 case ENCODING_Ia: 1733 if (readImmediate(insn, insn->addressSize)) 1734 return -1; 1735 break; 1736 case ENCODING_IRC: 1737 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | 1738 lFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1739 break; 1740 case ENCODING_RB: 1741 if (readOpcodeRegister(insn, 1)) 1742 return -1; 1743 break; 1744 case ENCODING_RW: 1745 if (readOpcodeRegister(insn, 2)) 1746 return -1; 1747 break; 1748 case ENCODING_RD: 1749 if (readOpcodeRegister(insn, 4)) 1750 return -1; 1751 break; 1752 case ENCODING_RO: 1753 if (readOpcodeRegister(insn, 8)) 1754 return -1; 1755 break; 1756 case ENCODING_Rv: 1757 if (readOpcodeRegister(insn, 0)) 1758 return -1; 1759 break; 1760 case ENCODING_CF: 1761 insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]); 1762 needVVVV = false; // oszc shares the same bits with VVVV 1763 break; 1764 case ENCODING_CC: 1765 if (isCCMPOrCTEST(insn)) 1766 insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]); 1767 else 1768 insn->immediates[1] = insn->opcode & 0xf; 1769 break; 1770 case ENCODING_FP: 1771 break; 1772 case ENCODING_VVVV: 1773 needVVVV = 0; // Mark that we have found a VVVV operand. 1774 if (!hasVVVV) 1775 return -1; 1776 if (insn->mode != MODE_64BIT) 1777 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7); 1778 if (fixupReg(insn, &Op)) 1779 return -1; 1780 break; 1781 case ENCODING_WRITEMASK: 1782 if (readMaskRegister(insn)) 1783 return -1; 1784 break; 1785 case ENCODING_DUP: 1786 break; 1787 default: 1788 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding."); 1789 return -1; 1790 } 1791 } 1792 1793 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail 1794 if (needVVVV) 1795 return -1; 1796 1797 return 0; 1798 } 1799 1800 namespace llvm { 1801 1802 // Fill-ins to make the compiler happy. These constants are never actually 1803 // assigned; they are just filler to make an automatically-generated switch 1804 // statement work. 1805 namespace X86 { 1806 enum { 1807 BX_SI = 500, 1808 BX_DI = 501, 1809 BP_SI = 502, 1810 BP_DI = 503, 1811 sib = 504, 1812 sib64 = 505 1813 }; 1814 } // namespace X86 1815 1816 } // namespace llvm 1817 1818 static bool translateInstruction(MCInst &target, 1819 InternalInstruction &source, 1820 const MCDisassembler *Dis); 1821 1822 namespace { 1823 1824 /// Generic disassembler for all X86 platforms. All each platform class should 1825 /// have to do is subclass the constructor, and provide a different 1826 /// disassemblerMode value. 1827 class X86GenericDisassembler : public MCDisassembler { 1828 std::unique_ptr<const MCInstrInfo> MII; 1829 public: 1830 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 1831 std::unique_ptr<const MCInstrInfo> MII); 1832 public: 1833 DecodeStatus getInstruction(MCInst &instr, uint64_t &size, 1834 ArrayRef<uint8_t> Bytes, uint64_t Address, 1835 raw_ostream &cStream) const override; 1836 1837 private: 1838 DisassemblerMode fMode; 1839 }; 1840 1841 } // namespace 1842 1843 X86GenericDisassembler::X86GenericDisassembler( 1844 const MCSubtargetInfo &STI, 1845 MCContext &Ctx, 1846 std::unique_ptr<const MCInstrInfo> MII) 1847 : MCDisassembler(STI, Ctx), MII(std::move(MII)) { 1848 const FeatureBitset &FB = STI.getFeatureBits(); 1849 if (FB[X86::Is16Bit]) { 1850 fMode = MODE_16BIT; 1851 return; 1852 } else if (FB[X86::Is32Bit]) { 1853 fMode = MODE_32BIT; 1854 return; 1855 } else if (FB[X86::Is64Bit]) { 1856 fMode = MODE_64BIT; 1857 return; 1858 } 1859 1860 llvm_unreachable("Invalid CPU mode"); 1861 } 1862 1863 MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( 1864 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 1865 raw_ostream &CStream) const { 1866 CommentStream = &CStream; 1867 1868 InternalInstruction Insn; 1869 memset(&Insn, 0, sizeof(InternalInstruction)); 1870 Insn.bytes = Bytes; 1871 Insn.startLocation = Address; 1872 Insn.readerCursor = Address; 1873 Insn.mode = fMode; 1874 1875 if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) || 1876 getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 || 1877 readOperands(&Insn)) { 1878 Size = Insn.readerCursor - Address; 1879 return Fail; 1880 } 1881 1882 Insn.operands = x86OperandSets[Insn.spec->operands]; 1883 Insn.length = Insn.readerCursor - Insn.startLocation; 1884 Size = Insn.length; 1885 if (Size > 15) 1886 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit"); 1887 1888 bool Ret = translateInstruction(Instr, Insn, this); 1889 if (!Ret) { 1890 unsigned Flags = X86::IP_NO_PREFIX; 1891 if (Insn.hasAdSize) 1892 Flags |= X86::IP_HAS_AD_SIZE; 1893 if (!Insn.mandatoryPrefix) { 1894 if (Insn.hasOpSize) 1895 Flags |= X86::IP_HAS_OP_SIZE; 1896 if (Insn.repeatPrefix == 0xf2) 1897 Flags |= X86::IP_HAS_REPEAT_NE; 1898 else if (Insn.repeatPrefix == 0xf3 && 1899 // It should not be 'pause' f3 90 1900 Insn.opcode != 0x90) 1901 Flags |= X86::IP_HAS_REPEAT; 1902 if (Insn.hasLockPrefix) 1903 Flags |= X86::IP_HAS_LOCK; 1904 } 1905 Instr.setFlags(Flags); 1906 } 1907 return (!Ret) ? Success : Fail; 1908 } 1909 1910 // 1911 // Private code that translates from struct InternalInstructions to MCInsts. 1912 // 1913 1914 /// translateRegister - Translates an internal register to the appropriate LLVM 1915 /// register, and appends it as an operand to an MCInst. 1916 /// 1917 /// @param mcInst - The MCInst to append to. 1918 /// @param reg - The Reg to append. 1919 static void translateRegister(MCInst &mcInst, Reg reg) { 1920 #define ENTRY(x) X86::x, 1921 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; 1922 #undef ENTRY 1923 1924 MCPhysReg llvmRegnum = llvmRegnums[reg]; 1925 mcInst.addOperand(MCOperand::createReg(llvmRegnum)); 1926 } 1927 1928 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { 1929 0, // SEG_OVERRIDE_NONE 1930 X86::CS, 1931 X86::SS, 1932 X86::DS, 1933 X86::ES, 1934 X86::FS, 1935 X86::GS 1936 }; 1937 1938 /// translateSrcIndex - Appends a source index operand to an MCInst. 1939 /// 1940 /// @param mcInst - The MCInst to append to. 1941 /// @param insn - The internal instruction. 1942 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { 1943 unsigned baseRegNo; 1944 1945 if (insn.mode == MODE_64BIT) 1946 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; 1947 else if (insn.mode == MODE_32BIT) 1948 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; 1949 else { 1950 assert(insn.mode == MODE_16BIT); 1951 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; 1952 } 1953 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1954 mcInst.addOperand(baseReg); 1955 1956 MCOperand segmentReg; 1957 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 1958 mcInst.addOperand(segmentReg); 1959 return false; 1960 } 1961 1962 /// translateDstIndex - Appends a destination index operand to an MCInst. 1963 /// 1964 /// @param mcInst - The MCInst to append to. 1965 /// @param insn - The internal instruction. 1966 1967 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { 1968 unsigned baseRegNo; 1969 1970 if (insn.mode == MODE_64BIT) 1971 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; 1972 else if (insn.mode == MODE_32BIT) 1973 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; 1974 else { 1975 assert(insn.mode == MODE_16BIT); 1976 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; 1977 } 1978 MCOperand baseReg = MCOperand::createReg(baseRegNo); 1979 mcInst.addOperand(baseReg); 1980 return false; 1981 } 1982 1983 /// translateImmediate - Appends an immediate operand to an MCInst. 1984 /// 1985 /// @param mcInst - The MCInst to append to. 1986 /// @param immediate - The immediate value to append. 1987 /// @param operand - The operand, as stored in the descriptor table. 1988 /// @param insn - The internal instruction. 1989 static void translateImmediate(MCInst &mcInst, uint64_t immediate, 1990 const OperandSpecifier &operand, 1991 InternalInstruction &insn, 1992 const MCDisassembler *Dis) { 1993 // Sign-extend the immediate if necessary. 1994 1995 OperandType type = (OperandType)operand.type; 1996 1997 bool isBranch = false; 1998 uint64_t pcrel = 0; 1999 if (type == TYPE_REL) { 2000 isBranch = true; 2001 pcrel = insn.startLocation + insn.length; 2002 switch (operand.encoding) { 2003 default: 2004 break; 2005 case ENCODING_Iv: 2006 switch (insn.displacementSize) { 2007 default: 2008 break; 2009 case 1: 2010 if(immediate & 0x80) 2011 immediate |= ~(0xffull); 2012 break; 2013 case 2: 2014 if(immediate & 0x8000) 2015 immediate |= ~(0xffffull); 2016 break; 2017 case 4: 2018 if(immediate & 0x80000000) 2019 immediate |= ~(0xffffffffull); 2020 break; 2021 case 8: 2022 break; 2023 } 2024 break; 2025 case ENCODING_IB: 2026 if(immediate & 0x80) 2027 immediate |= ~(0xffull); 2028 break; 2029 case ENCODING_IW: 2030 if(immediate & 0x8000) 2031 immediate |= ~(0xffffull); 2032 break; 2033 case ENCODING_ID: 2034 if(immediate & 0x80000000) 2035 immediate |= ~(0xffffffffull); 2036 break; 2037 } 2038 } 2039 // By default sign-extend all X86 immediates based on their encoding. 2040 else if (type == TYPE_IMM) { 2041 switch (operand.encoding) { 2042 default: 2043 break; 2044 case ENCODING_IB: 2045 if(immediate & 0x80) 2046 immediate |= ~(0xffull); 2047 break; 2048 case ENCODING_IW: 2049 if(immediate & 0x8000) 2050 immediate |= ~(0xffffull); 2051 break; 2052 case ENCODING_ID: 2053 if(immediate & 0x80000000) 2054 immediate |= ~(0xffffffffull); 2055 break; 2056 case ENCODING_IO: 2057 break; 2058 } 2059 } 2060 2061 switch (type) { 2062 case TYPE_XMM: 2063 mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4))); 2064 return; 2065 case TYPE_YMM: 2066 mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4))); 2067 return; 2068 case TYPE_ZMM: 2069 mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); 2070 return; 2071 default: 2072 // operand is 64 bits wide. Do nothing. 2073 break; 2074 } 2075 2076 if (!Dis->tryAddingSymbolicOperand( 2077 mcInst, immediate + pcrel, insn.startLocation, isBranch, 2078 insn.immediateOffset, insn.immediateSize, insn.length)) 2079 mcInst.addOperand(MCOperand::createImm(immediate)); 2080 2081 if (type == TYPE_MOFFS) { 2082 MCOperand segmentReg; 2083 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2084 mcInst.addOperand(segmentReg); 2085 } 2086 } 2087 2088 /// translateRMRegister - Translates a register stored in the R/M field of the 2089 /// ModR/M byte to its LLVM equivalent and appends it to an MCInst. 2090 /// @param mcInst - The MCInst to append to. 2091 /// @param insn - The internal instruction to extract the R/M field 2092 /// from. 2093 /// @return - 0 on success; -1 otherwise 2094 static bool translateRMRegister(MCInst &mcInst, 2095 InternalInstruction &insn) { 2096 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2097 debug("A R/M register operand may not have a SIB byte"); 2098 return true; 2099 } 2100 2101 switch (insn.eaBase) { 2102 default: 2103 debug("Unexpected EA base register"); 2104 return true; 2105 case EA_BASE_NONE: 2106 debug("EA_BASE_NONE for ModR/M base"); 2107 return true; 2108 #define ENTRY(x) case EA_BASE_##x: 2109 ALL_EA_BASES 2110 #undef ENTRY 2111 debug("A R/M register operand may not have a base; " 2112 "the operand must be a register."); 2113 return true; 2114 #define ENTRY(x) \ 2115 case EA_REG_##x: \ 2116 mcInst.addOperand(MCOperand::createReg(X86::x)); break; 2117 ALL_REGS 2118 #undef ENTRY 2119 } 2120 2121 return false; 2122 } 2123 2124 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M 2125 /// fields of an internal instruction (and possibly its SIB byte) to a memory 2126 /// operand in LLVM's format, and appends it to an MCInst. 2127 /// 2128 /// @param mcInst - The MCInst to append to. 2129 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2130 /// from. 2131 /// @param ForceSIB - The instruction must use SIB. 2132 /// @return - 0 on success; nonzero otherwise 2133 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, 2134 const MCDisassembler *Dis, 2135 bool ForceSIB = false) { 2136 // Addresses in an MCInst are represented as five operands: 2137 // 1. basereg (register) The R/M base, or (if there is a SIB) the 2138 // SIB base 2139 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified 2140 // scale amount 2141 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) 2142 // the index (which is multiplied by the 2143 // scale amount) 2144 // 4. displacement (immediate) 0, or the displacement if there is one 2145 // 5. segmentreg (register) x86_registerNONE for now, but could be set 2146 // if we have segment overrides 2147 2148 MCOperand baseReg; 2149 MCOperand scaleAmount; 2150 MCOperand indexReg; 2151 MCOperand displacement; 2152 MCOperand segmentReg; 2153 uint64_t pcrel = 0; 2154 2155 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { 2156 if (insn.sibBase != SIB_BASE_NONE) { 2157 switch (insn.sibBase) { 2158 default: 2159 debug("Unexpected sibBase"); 2160 return true; 2161 #define ENTRY(x) \ 2162 case SIB_BASE_##x: \ 2163 baseReg = MCOperand::createReg(X86::x); break; 2164 ALL_SIB_BASES 2165 #undef ENTRY 2166 } 2167 } else { 2168 baseReg = MCOperand::createReg(X86::NoRegister); 2169 } 2170 2171 if (insn.sibIndex != SIB_INDEX_NONE) { 2172 switch (insn.sibIndex) { 2173 default: 2174 debug("Unexpected sibIndex"); 2175 return true; 2176 #define ENTRY(x) \ 2177 case SIB_INDEX_##x: \ 2178 indexReg = MCOperand::createReg(X86::x); break; 2179 EA_BASES_32BIT 2180 EA_BASES_64BIT 2181 REGS_XMM 2182 REGS_YMM 2183 REGS_ZMM 2184 #undef ENTRY 2185 } 2186 } else { 2187 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present, 2188 // but no index is used and modrm alone should have been enough. 2189 // -No base register in 32-bit mode. In 64-bit mode this is used to 2190 // avoid rip-relative addressing. 2191 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a 2192 // base always requires a SIB byte. 2193 // -A scale other than 1 is used. 2194 if (!ForceSIB && 2195 (insn.sibScale != 1 || 2196 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || 2197 (insn.sibBase != SIB_BASE_NONE && 2198 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && 2199 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) { 2200 indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : 2201 X86::RIZ); 2202 } else 2203 indexReg = MCOperand::createReg(X86::NoRegister); 2204 } 2205 2206 scaleAmount = MCOperand::createImm(insn.sibScale); 2207 } else { 2208 switch (insn.eaBase) { 2209 case EA_BASE_NONE: 2210 if (insn.eaDisplacement == EA_DISP_NONE) { 2211 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); 2212 return true; 2213 } 2214 if (insn.mode == MODE_64BIT){ 2215 pcrel = insn.startLocation + insn.length; 2216 Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, 2217 insn.startLocation + 2218 insn.displacementOffset); 2219 // Section 2.2.1.6 2220 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : 2221 X86::RIP); 2222 } 2223 else 2224 baseReg = MCOperand::createReg(X86::NoRegister); 2225 2226 indexReg = MCOperand::createReg(X86::NoRegister); 2227 break; 2228 case EA_BASE_BX_SI: 2229 baseReg = MCOperand::createReg(X86::BX); 2230 indexReg = MCOperand::createReg(X86::SI); 2231 break; 2232 case EA_BASE_BX_DI: 2233 baseReg = MCOperand::createReg(X86::BX); 2234 indexReg = MCOperand::createReg(X86::DI); 2235 break; 2236 case EA_BASE_BP_SI: 2237 baseReg = MCOperand::createReg(X86::BP); 2238 indexReg = MCOperand::createReg(X86::SI); 2239 break; 2240 case EA_BASE_BP_DI: 2241 baseReg = MCOperand::createReg(X86::BP); 2242 indexReg = MCOperand::createReg(X86::DI); 2243 break; 2244 default: 2245 indexReg = MCOperand::createReg(X86::NoRegister); 2246 switch (insn.eaBase) { 2247 default: 2248 debug("Unexpected eaBase"); 2249 return true; 2250 // Here, we will use the fill-ins defined above. However, 2251 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and 2252 // sib and sib64 were handled in the top-level if, so they're only 2253 // placeholders to keep the compiler happy. 2254 #define ENTRY(x) \ 2255 case EA_BASE_##x: \ 2256 baseReg = MCOperand::createReg(X86::x); break; 2257 ALL_EA_BASES 2258 #undef ENTRY 2259 #define ENTRY(x) case EA_REG_##x: 2260 ALL_REGS 2261 #undef ENTRY 2262 debug("A R/M memory operand may not be a register; " 2263 "the base field must be a base."); 2264 return true; 2265 } 2266 } 2267 2268 scaleAmount = MCOperand::createImm(1); 2269 } 2270 2271 displacement = MCOperand::createImm(insn.displacement); 2272 2273 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); 2274 2275 mcInst.addOperand(baseReg); 2276 mcInst.addOperand(scaleAmount); 2277 mcInst.addOperand(indexReg); 2278 2279 const uint8_t dispSize = 2280 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; 2281 2282 if (!Dis->tryAddingSymbolicOperand( 2283 mcInst, insn.displacement + pcrel, insn.startLocation, false, 2284 insn.displacementOffset, dispSize, insn.length)) 2285 mcInst.addOperand(displacement); 2286 mcInst.addOperand(segmentReg); 2287 return false; 2288 } 2289 2290 /// translateRM - Translates an operand stored in the R/M (and possibly SIB) 2291 /// byte of an instruction to LLVM form, and appends it to an MCInst. 2292 /// 2293 /// @param mcInst - The MCInst to append to. 2294 /// @param operand - The operand, as stored in the descriptor table. 2295 /// @param insn - The instruction to extract Mod, R/M, and SIB fields 2296 /// from. 2297 /// @return - 0 on success; nonzero otherwise 2298 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, 2299 InternalInstruction &insn, const MCDisassembler *Dis) { 2300 switch (operand.type) { 2301 default: 2302 debug("Unexpected type for a R/M operand"); 2303 return true; 2304 case TYPE_R8: 2305 case TYPE_R16: 2306 case TYPE_R32: 2307 case TYPE_R64: 2308 case TYPE_Rv: 2309 case TYPE_MM64: 2310 case TYPE_XMM: 2311 case TYPE_YMM: 2312 case TYPE_ZMM: 2313 case TYPE_TMM: 2314 case TYPE_VK_PAIR: 2315 case TYPE_VK: 2316 case TYPE_DEBUGREG: 2317 case TYPE_CONTROLREG: 2318 case TYPE_BNDR: 2319 return translateRMRegister(mcInst, insn); 2320 case TYPE_M: 2321 case TYPE_MVSIBX: 2322 case TYPE_MVSIBY: 2323 case TYPE_MVSIBZ: 2324 return translateRMMemory(mcInst, insn, Dis); 2325 case TYPE_MSIB: 2326 return translateRMMemory(mcInst, insn, Dis, true); 2327 } 2328 } 2329 2330 /// translateFPRegister - Translates a stack position on the FPU stack to its 2331 /// LLVM form, and appends it to an MCInst. 2332 /// 2333 /// @param mcInst - The MCInst to append to. 2334 /// @param stackPos - The stack position to translate. 2335 static void translateFPRegister(MCInst &mcInst, 2336 uint8_t stackPos) { 2337 mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos)); 2338 } 2339 2340 /// translateMaskRegister - Translates a 3-bit mask register number to 2341 /// LLVM form, and appends it to an MCInst. 2342 /// 2343 /// @param mcInst - The MCInst to append to. 2344 /// @param maskRegNum - Number of mask register from 0 to 7. 2345 /// @return - false on success; true otherwise. 2346 static bool translateMaskRegister(MCInst &mcInst, 2347 uint8_t maskRegNum) { 2348 if (maskRegNum >= 8) { 2349 debug("Invalid mask register number"); 2350 return true; 2351 } 2352 2353 mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum)); 2354 return false; 2355 } 2356 2357 /// translateOperand - Translates an operand stored in an internal instruction 2358 /// to LLVM's format and appends it to an MCInst. 2359 /// 2360 /// @param mcInst - The MCInst to append to. 2361 /// @param operand - The operand, as stored in the descriptor table. 2362 /// @param insn - The internal instruction. 2363 /// @return - false on success; true otherwise. 2364 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, 2365 InternalInstruction &insn, 2366 const MCDisassembler *Dis) { 2367 switch (operand.encoding) { 2368 default: 2369 debug("Unhandled operand encoding during translation"); 2370 return true; 2371 case ENCODING_REG: 2372 translateRegister(mcInst, insn.reg); 2373 return false; 2374 case ENCODING_WRITEMASK: 2375 return translateMaskRegister(mcInst, insn.writemask); 2376 case ENCODING_SIB: 2377 CASE_ENCODING_RM: 2378 CASE_ENCODING_VSIB: 2379 return translateRM(mcInst, operand, insn, Dis); 2380 case ENCODING_IB: 2381 case ENCODING_IW: 2382 case ENCODING_ID: 2383 case ENCODING_IO: 2384 case ENCODING_Iv: 2385 case ENCODING_Ia: 2386 translateImmediate(mcInst, 2387 insn.immediates[insn.numImmediatesTranslated++], 2388 operand, 2389 insn, 2390 Dis); 2391 return false; 2392 case ENCODING_IRC: 2393 mcInst.addOperand(MCOperand::createImm(insn.RC)); 2394 return false; 2395 case ENCODING_SI: 2396 return translateSrcIndex(mcInst, insn); 2397 case ENCODING_DI: 2398 return translateDstIndex(mcInst, insn); 2399 case ENCODING_RB: 2400 case ENCODING_RW: 2401 case ENCODING_RD: 2402 case ENCODING_RO: 2403 case ENCODING_Rv: 2404 translateRegister(mcInst, insn.opcodeRegister); 2405 return false; 2406 case ENCODING_CF: 2407 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2408 return false; 2409 case ENCODING_CC: 2410 if (isCCMPOrCTEST(&insn)) 2411 mcInst.addOperand(MCOperand::createImm(insn.immediates[2])); 2412 else 2413 mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); 2414 return false; 2415 case ENCODING_FP: 2416 translateFPRegister(mcInst, insn.modRM & 7); 2417 return false; 2418 case ENCODING_VVVV: 2419 translateRegister(mcInst, insn.vvvv); 2420 return false; 2421 case ENCODING_DUP: 2422 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], 2423 insn, Dis); 2424 } 2425 } 2426 2427 /// translateInstruction - Translates an internal instruction and all its 2428 /// operands to an MCInst. 2429 /// 2430 /// @param mcInst - The MCInst to populate with the instruction's data. 2431 /// @param insn - The internal instruction. 2432 /// @return - false on success; true otherwise. 2433 static bool translateInstruction(MCInst &mcInst, 2434 InternalInstruction &insn, 2435 const MCDisassembler *Dis) { 2436 if (!insn.spec) { 2437 debug("Instruction has no specification"); 2438 return true; 2439 } 2440 2441 mcInst.clear(); 2442 mcInst.setOpcode(insn.instructionID); 2443 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 2444 // prefix bytes should be disassembled as xrelease and xacquire then set the 2445 // opcode to those instead of the rep and repne opcodes. 2446 if (insn.xAcquireRelease) { 2447 if(mcInst.getOpcode() == X86::REP_PREFIX) 2448 mcInst.setOpcode(X86::XRELEASE_PREFIX); 2449 else if(mcInst.getOpcode() == X86::REPNE_PREFIX) 2450 mcInst.setOpcode(X86::XACQUIRE_PREFIX); 2451 } 2452 2453 insn.numImmediatesTranslated = 0; 2454 2455 for (const auto &Op : insn.operands) { 2456 if (Op.encoding != ENCODING_NONE) { 2457 if (translateOperand(mcInst, Op, insn, Dis)) { 2458 return true; 2459 } 2460 } 2461 } 2462 2463 return false; 2464 } 2465 2466 static MCDisassembler *createX86Disassembler(const Target &T, 2467 const MCSubtargetInfo &STI, 2468 MCContext &Ctx) { 2469 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); 2470 return new X86GenericDisassembler(STI, Ctx, std::move(MII)); 2471 } 2472 2473 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() { 2474 // Register the disassembler. 2475 TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(), 2476 createX86Disassembler); 2477 TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(), 2478 createX86Disassembler); 2479 } 2480