xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file is part of the X86 Disassembler.
100b57cec5SDimitry Andric // It contains code to translate the data produced by the decoder into
110b57cec5SDimitry Andric //  MCInsts.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
150b57cec5SDimitry Andric // 64-bit X86 instruction sets.  The main decode sequence for an assembly
160b57cec5SDimitry Andric // instruction in this disassembler is:
170b57cec5SDimitry Andric //
180b57cec5SDimitry Andric // 1. Read the prefix bytes and determine the attributes of the instruction.
190b57cec5SDimitry Andric //    These attributes, recorded in enum attributeBits
200b57cec5SDimitry Andric //    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
210b57cec5SDimitry Andric //    provides a mapping from bitmasks to contexts, which are represented by
220b57cec5SDimitry Andric //    enum InstructionContext (ibid.).
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric // 2. Read the opcode, and determine what kind of opcode it is.  The
250b57cec5SDimitry Andric //    disassembler distinguishes four kinds of opcodes, which are enumerated in
260b57cec5SDimitry Andric //    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
270b57cec5SDimitry Andric //    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
280b57cec5SDimitry Andric //    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
290b57cec5SDimitry Andric //
300b57cec5SDimitry Andric // 3. Depending on the opcode type, look in one of four ClassDecision structures
310b57cec5SDimitry Andric //    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
320b57cec5SDimitry Andric //    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
330b57cec5SDimitry Andric //    a ModRMDecision (ibid.).
340b57cec5SDimitry Andric //
350b57cec5SDimitry Andric // 4. Some instructions, such as escape opcodes or extended opcodes, or even
360b57cec5SDimitry Andric //    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
370b57cec5SDimitry Andric //    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
380b57cec5SDimitry Andric //    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
390b57cec5SDimitry Andric //    ModR/M byte is required and how to interpret it.
400b57cec5SDimitry Andric //
410b57cec5SDimitry Andric // 5. After resolving the ModRMDecision, the disassembler has a unique ID
420b57cec5SDimitry Andric //    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
430b57cec5SDimitry Andric //    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
440b57cec5SDimitry Andric //    meanings of its operands.
450b57cec5SDimitry Andric //
460b57cec5SDimitry Andric // 6. For each operand, its encoding is an entry from OperandEncoding
470b57cec5SDimitry Andric //    (X86DisassemblerDecoderCommon.h) and its type is an entry from
480b57cec5SDimitry Andric //    OperandType (ibid.).  The encoding indicates how to read it from the
490b57cec5SDimitry Andric //    instruction; the type indicates how to interpret the value once it has
500b57cec5SDimitry Andric //    been read.  For example, a register operand could be stored in the R/M
510b57cec5SDimitry Andric //    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
520b57cec5SDimitry Andric //    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
530b57cec5SDimitry Andric //    register, for instance).  Given this information, the operands can be
540b57cec5SDimitry Andric //    extracted and interpreted.
550b57cec5SDimitry Andric //
560b57cec5SDimitry Andric // 7. As the last step, the disassembler translates the instruction information
570b57cec5SDimitry Andric //    and operands into a format understandable by the client - in this case, an
580b57cec5SDimitry Andric //    MCInst for use by the MC infrastructure.
590b57cec5SDimitry Andric //
600b57cec5SDimitry Andric // The disassembler is broken broadly into two parts: the table emitter that
610b57cec5SDimitry Andric // emits the instruction decode tables discussed above during compilation, and
620b57cec5SDimitry Andric // the disassembler itself.  The table emitter is documented in more detail in
630b57cec5SDimitry Andric // utils/TableGen/X86DisassemblerEmitter.h.
640b57cec5SDimitry Andric //
650b57cec5SDimitry Andric // X86Disassembler.cpp contains the code responsible for step 7, and for
660b57cec5SDimitry Andric //   invoking the decoder to execute steps 1-6.
670b57cec5SDimitry Andric // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
680b57cec5SDimitry Andric //   table emitter and the disassembler.
690b57cec5SDimitry Andric // X86DisassemblerDecoder.h contains the public interface of the decoder,
700b57cec5SDimitry Andric //   factored out into C for possible use by other projects.
710b57cec5SDimitry Andric // X86DisassemblerDecoder.c contains the source code of the decoder, which is
720b57cec5SDimitry Andric //   responsible for steps 1-6.
730b57cec5SDimitry Andric //
740b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric #include "MCTargetDesc/X86BaseInfo.h"
770b57cec5SDimitry Andric #include "MCTargetDesc/X86MCTargetDesc.h"
780b57cec5SDimitry Andric #include "TargetInfo/X86TargetInfo.h"
790b57cec5SDimitry Andric #include "X86DisassemblerDecoder.h"
800b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
810b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
820b57cec5SDimitry Andric #include "llvm/MC/MCExpr.h"
830b57cec5SDimitry Andric #include "llvm/MC/MCInst.h"
840b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h"
850b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
86349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
870b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
88480093f4SDimitry Andric #include "llvm/Support/Format.h"
890b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric using namespace llvm;
920b57cec5SDimitry Andric using namespace llvm::X86Disassembler;
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric #define DEBUG_TYPE "x86-disassembler"
950b57cec5SDimitry Andric 
96480093f4SDimitry Andric #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97480093f4SDimitry Andric 
98480093f4SDimitry Andric // Specifies whether a ModR/M byte is needed and (if so) which
99480093f4SDimitry Andric // instruction each possible value of the ModR/M byte corresponds to.  Once
100480093f4SDimitry Andric // this information is known, we have narrowed down to a single instruction.
101480093f4SDimitry Andric struct ModRMDecision {
102480093f4SDimitry Andric   uint8_t modrm_type;
103480093f4SDimitry Andric   uint16_t instructionIDs;
104480093f4SDimitry Andric };
105480093f4SDimitry Andric 
106480093f4SDimitry Andric // Specifies which set of ModR/M->instruction tables to look at
107480093f4SDimitry Andric // given a particular opcode.
108480093f4SDimitry Andric struct OpcodeDecision {
109480093f4SDimitry Andric   ModRMDecision modRMDecisions[256];
110480093f4SDimitry Andric };
111480093f4SDimitry Andric 
112480093f4SDimitry Andric // Specifies which opcode->instruction tables to look at given
113480093f4SDimitry Andric // a particular context (set of attributes).  Since there are many possible
114480093f4SDimitry Andric // contexts, the decoder first uses CONTEXTS_SYM to determine which context
115480093f4SDimitry Andric // applies given a specific set of attributes.  Hence there are only IC_max
116480093f4SDimitry Andric // entries in this table, rather than 2^(ATTR_max).
117480093f4SDimitry Andric struct ContextDecision {
118480093f4SDimitry Andric   OpcodeDecision opcodeDecisions[IC_max];
119480093f4SDimitry Andric };
120480093f4SDimitry Andric 
121480093f4SDimitry Andric #include "X86GenDisassemblerTables.inc"
122480093f4SDimitry Andric 
decode(OpcodeType type,InstructionContext insnContext,uint8_t opcode,uint8_t modRM)123480093f4SDimitry Andric static InstrUID decode(OpcodeType type, InstructionContext insnContext,
124480093f4SDimitry Andric                        uint8_t opcode, uint8_t modRM) {
125480093f4SDimitry Andric   const struct ModRMDecision *dec;
126480093f4SDimitry Andric 
127480093f4SDimitry Andric   switch (type) {
128480093f4SDimitry Andric   case ONEBYTE:
129480093f4SDimitry Andric     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130480093f4SDimitry Andric     break;
131480093f4SDimitry Andric   case TWOBYTE:
132480093f4SDimitry Andric     dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133480093f4SDimitry Andric     break;
134480093f4SDimitry Andric   case THREEBYTE_38:
135480093f4SDimitry Andric     dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136480093f4SDimitry Andric     break;
137480093f4SDimitry Andric   case THREEBYTE_3A:
138480093f4SDimitry Andric     dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139480093f4SDimitry Andric     break;
140480093f4SDimitry Andric   case XOP8_MAP:
141480093f4SDimitry Andric     dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142480093f4SDimitry Andric     break;
143480093f4SDimitry Andric   case XOP9_MAP:
144480093f4SDimitry Andric     dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145480093f4SDimitry Andric     break;
146480093f4SDimitry Andric   case XOPA_MAP:
147480093f4SDimitry Andric     dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148480093f4SDimitry Andric     break;
149480093f4SDimitry Andric   case THREEDNOW_MAP:
150480093f4SDimitry Andric     dec =
151480093f4SDimitry Andric         &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152480093f4SDimitry Andric     break;
1535f757f3fSDimitry Andric   case MAP4:
1545f757f3fSDimitry Andric     dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
1555f757f3fSDimitry Andric     break;
156349cc55cSDimitry Andric   case MAP5:
157349cc55cSDimitry Andric     dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158349cc55cSDimitry Andric     break;
159349cc55cSDimitry Andric   case MAP6:
160349cc55cSDimitry Andric     dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
161349cc55cSDimitry Andric     break;
1625f757f3fSDimitry Andric   case MAP7:
1635f757f3fSDimitry Andric     dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
1645f757f3fSDimitry Andric     break;
1650b57cec5SDimitry Andric   }
1660b57cec5SDimitry Andric 
167480093f4SDimitry Andric   switch (dec->modrm_type) {
168480093f4SDimitry Andric   default:
169480093f4SDimitry Andric     llvm_unreachable("Corrupt table!  Unknown modrm_type");
170480093f4SDimitry Andric     return 0;
171480093f4SDimitry Andric   case MODRM_ONEENTRY:
172480093f4SDimitry Andric     return modRMTable[dec->instructionIDs];
173480093f4SDimitry Andric   case MODRM_SPLITRM:
174480093f4SDimitry Andric     if (modFromModRM(modRM) == 0x3)
175480093f4SDimitry Andric       return modRMTable[dec->instructionIDs + 1];
176480093f4SDimitry Andric     return modRMTable[dec->instructionIDs];
177480093f4SDimitry Andric   case MODRM_SPLITREG:
178480093f4SDimitry Andric     if (modFromModRM(modRM) == 0x3)
179480093f4SDimitry Andric       return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
180480093f4SDimitry Andric     return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
181480093f4SDimitry Andric   case MODRM_SPLITMISC:
182480093f4SDimitry Andric     if (modFromModRM(modRM) == 0x3)
183480093f4SDimitry Andric       return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
184480093f4SDimitry Andric     return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
185480093f4SDimitry Andric   case MODRM_FULL:
186480093f4SDimitry Andric     return modRMTable[dec->instructionIDs + modRM];
187480093f4SDimitry Andric   }
1880b57cec5SDimitry Andric }
1890b57cec5SDimitry Andric 
peek(struct InternalInstruction * insn,uint8_t & byte)190480093f4SDimitry Andric static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
191480093f4SDimitry Andric   uint64_t offset = insn->readerCursor - insn->startLocation;
192480093f4SDimitry Andric   if (offset >= insn->bytes.size())
193480093f4SDimitry Andric     return true;
194480093f4SDimitry Andric   byte = insn->bytes[offset];
195480093f4SDimitry Andric   return false;
196480093f4SDimitry Andric }
197480093f4SDimitry Andric 
consume(InternalInstruction * insn,T & ptr)198480093f4SDimitry Andric template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
199480093f4SDimitry Andric   auto r = insn->bytes;
200480093f4SDimitry Andric   uint64_t offset = insn->readerCursor - insn->startLocation;
201480093f4SDimitry Andric   if (offset + sizeof(T) > r.size())
202480093f4SDimitry Andric     return true;
2035f757f3fSDimitry Andric   ptr = support::endian::read<T>(&r[offset], llvm::endianness::little);
204480093f4SDimitry Andric   insn->readerCursor += sizeof(T);
205480093f4SDimitry Andric   return false;
206480093f4SDimitry Andric }
207480093f4SDimitry Andric 
isREX(struct InternalInstruction * insn,uint8_t prefix)208480093f4SDimitry Andric static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
209480093f4SDimitry Andric   return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
210480093f4SDimitry Andric }
211480093f4SDimitry Andric 
isREX2(struct InternalInstruction * insn,uint8_t prefix)2125f757f3fSDimitry Andric static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
2135f757f3fSDimitry Andric   return insn->mode == MODE_64BIT && prefix == 0xd5;
2145f757f3fSDimitry Andric }
2155f757f3fSDimitry Andric 
216480093f4SDimitry Andric // Consumes all of an instruction's prefix bytes, and marks the
217480093f4SDimitry Andric // instruction as having them.  Also sets the instruction's default operand,
218480093f4SDimitry Andric // address, and other relevant data sizes to report operands correctly.
219480093f4SDimitry Andric //
220480093f4SDimitry Andric // insn must not be empty.
readPrefixes(struct InternalInstruction * insn)221480093f4SDimitry Andric static int readPrefixes(struct InternalInstruction *insn) {
222480093f4SDimitry Andric   bool isPrefix = true;
223480093f4SDimitry Andric   uint8_t byte = 0;
224480093f4SDimitry Andric   uint8_t nextByte;
225480093f4SDimitry Andric 
226480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readPrefixes()");
227480093f4SDimitry Andric 
228480093f4SDimitry Andric   while (isPrefix) {
229480093f4SDimitry Andric     // If we fail reading prefixes, just stop here and let the opcode reader
230480093f4SDimitry Andric     // deal with it.
231480093f4SDimitry Andric     if (consume(insn, byte))
232480093f4SDimitry Andric       break;
233480093f4SDimitry Andric 
234480093f4SDimitry Andric     // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
235480093f4SDimitry Andric     // break and let it be disassembled as a normal "instruction".
236480093f4SDimitry Andric     if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
237480093f4SDimitry Andric       break;
238480093f4SDimitry Andric 
239480093f4SDimitry Andric     if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
240480093f4SDimitry Andric       // If the byte is 0xf2 or 0xf3, and any of the following conditions are
241480093f4SDimitry Andric       // met:
242480093f4SDimitry Andric       // - it is followed by a LOCK (0xf0) prefix
243480093f4SDimitry Andric       // - it is followed by an xchg instruction
244480093f4SDimitry Andric       // then it should be disassembled as a xacquire/xrelease not repne/rep.
245480093f4SDimitry Andric       if (((nextByte == 0xf0) ||
246480093f4SDimitry Andric            ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
247480093f4SDimitry Andric         insn->xAcquireRelease = true;
248480093f4SDimitry Andric         if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
249480093f4SDimitry Andric           break;
250480093f4SDimitry Andric       }
251480093f4SDimitry Andric       // Also if the byte is 0xf3, and the following condition is met:
252480093f4SDimitry Andric       // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
253480093f4SDimitry Andric       //                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
254480093f4SDimitry Andric       // then it should be disassembled as an xrelease not rep.
255480093f4SDimitry Andric       if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
256480093f4SDimitry Andric                            nextByte == 0xc6 || nextByte == 0xc7)) {
257480093f4SDimitry Andric         insn->xAcquireRelease = true;
258480093f4SDimitry Andric         break;
259480093f4SDimitry Andric       }
260480093f4SDimitry Andric       if (isREX(insn, nextByte)) {
261480093f4SDimitry Andric         uint8_t nnextByte;
262480093f4SDimitry Andric         // Go to REX prefix after the current one
263480093f4SDimitry Andric         if (consume(insn, nnextByte))
264480093f4SDimitry Andric           return -1;
265480093f4SDimitry Andric         // We should be able to read next byte after REX prefix
266480093f4SDimitry Andric         if (peek(insn, nnextByte))
267480093f4SDimitry Andric           return -1;
268480093f4SDimitry Andric         --insn->readerCursor;
269480093f4SDimitry Andric       }
270480093f4SDimitry Andric     }
271480093f4SDimitry Andric 
272480093f4SDimitry Andric     switch (byte) {
273480093f4SDimitry Andric     case 0xf0: // LOCK
274480093f4SDimitry Andric       insn->hasLockPrefix = true;
275480093f4SDimitry Andric       break;
276480093f4SDimitry Andric     case 0xf2: // REPNE/REPNZ
277480093f4SDimitry Andric     case 0xf3: { // REP or REPE/REPZ
278480093f4SDimitry Andric       uint8_t nextByte;
279480093f4SDimitry Andric       if (peek(insn, nextByte))
280480093f4SDimitry Andric         break;
281480093f4SDimitry Andric       // TODO:
282480093f4SDimitry Andric       //  1. There could be several 0x66
283480093f4SDimitry Andric       //  2. if (nextByte == 0x66) and nextNextByte != 0x0f then
284480093f4SDimitry Andric       //      it's not mandatory prefix
285480093f4SDimitry Andric       //  3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
286480093f4SDimitry Andric       //     0x0f exactly after it to be mandatory prefix
287*0fca6ea1SDimitry Andric       //  4. if (nextByte == 0xd5) it's REX2 and we need
288*0fca6ea1SDimitry Andric       //     0x0f exactly after it to be mandatory prefix
289*0fca6ea1SDimitry Andric       if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f ||
290*0fca6ea1SDimitry Andric           nextByte == 0x66)
291480093f4SDimitry Andric         // The last of 0xf2 /0xf3 is mandatory prefix
292480093f4SDimitry Andric         insn->mandatoryPrefix = byte;
293480093f4SDimitry Andric       insn->repeatPrefix = byte;
294480093f4SDimitry Andric       break;
295480093f4SDimitry Andric     }
296480093f4SDimitry Andric     case 0x2e: // CS segment override -OR- Branch not taken
297480093f4SDimitry Andric       insn->segmentOverride = SEG_OVERRIDE_CS;
298480093f4SDimitry Andric       break;
299480093f4SDimitry Andric     case 0x36: // SS segment override -OR- Branch taken
300480093f4SDimitry Andric       insn->segmentOverride = SEG_OVERRIDE_SS;
301480093f4SDimitry Andric       break;
302480093f4SDimitry Andric     case 0x3e: // DS segment override
303480093f4SDimitry Andric       insn->segmentOverride = SEG_OVERRIDE_DS;
304480093f4SDimitry Andric       break;
305480093f4SDimitry Andric     case 0x26: // ES segment override
306480093f4SDimitry Andric       insn->segmentOverride = SEG_OVERRIDE_ES;
307480093f4SDimitry Andric       break;
308480093f4SDimitry Andric     case 0x64: // FS segment override
309480093f4SDimitry Andric       insn->segmentOverride = SEG_OVERRIDE_FS;
310480093f4SDimitry Andric       break;
311480093f4SDimitry Andric     case 0x65: // GS segment override
312480093f4SDimitry Andric       insn->segmentOverride = SEG_OVERRIDE_GS;
313480093f4SDimitry Andric       break;
314480093f4SDimitry Andric     case 0x66: { // Operand-size override {
315480093f4SDimitry Andric       uint8_t nextByte;
316480093f4SDimitry Andric       insn->hasOpSize = true;
317480093f4SDimitry Andric       if (peek(insn, nextByte))
318480093f4SDimitry Andric         break;
319480093f4SDimitry Andric       // 0x66 can't overwrite existing mandatory prefix and should be ignored
320480093f4SDimitry Andric       if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
321480093f4SDimitry Andric         insn->mandatoryPrefix = byte;
322480093f4SDimitry Andric       break;
323480093f4SDimitry Andric     }
324480093f4SDimitry Andric     case 0x67: // Address-size override
325480093f4SDimitry Andric       insn->hasAdSize = true;
326480093f4SDimitry Andric       break;
327480093f4SDimitry Andric     default: // Not a prefix byte
328480093f4SDimitry Andric       isPrefix = false;
329480093f4SDimitry Andric       break;
330480093f4SDimitry Andric     }
331480093f4SDimitry Andric 
332480093f4SDimitry Andric     if (isPrefix)
333480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
334480093f4SDimitry Andric   }
335480093f4SDimitry Andric 
336480093f4SDimitry Andric   insn->vectorExtensionType = TYPE_NO_VEX_XOP;
337480093f4SDimitry Andric 
338480093f4SDimitry Andric   if (byte == 0x62) {
339480093f4SDimitry Andric     uint8_t byte1, byte2;
340480093f4SDimitry Andric     if (consume(insn, byte1)) {
341480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
342480093f4SDimitry Andric       return -1;
343480093f4SDimitry Andric     }
344480093f4SDimitry Andric 
345480093f4SDimitry Andric     if (peek(insn, byte2)) {
346480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
347480093f4SDimitry Andric       return -1;
348480093f4SDimitry Andric     }
349480093f4SDimitry Andric 
3505f757f3fSDimitry Andric     if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
351480093f4SDimitry Andric       insn->vectorExtensionType = TYPE_EVEX;
352480093f4SDimitry Andric     } else {
353480093f4SDimitry Andric       --insn->readerCursor; // unconsume byte1
354480093f4SDimitry Andric       --insn->readerCursor; // unconsume byte
355480093f4SDimitry Andric     }
356480093f4SDimitry Andric 
357480093f4SDimitry Andric     if (insn->vectorExtensionType == TYPE_EVEX) {
358480093f4SDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
359480093f4SDimitry Andric       insn->vectorExtensionPrefix[1] = byte1;
360480093f4SDimitry Andric       if (consume(insn, insn->vectorExtensionPrefix[2])) {
361480093f4SDimitry Andric         LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
362480093f4SDimitry Andric         return -1;
363480093f4SDimitry Andric       }
364480093f4SDimitry Andric       if (consume(insn, insn->vectorExtensionPrefix[3])) {
365480093f4SDimitry Andric         LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
366480093f4SDimitry Andric         return -1;
367480093f4SDimitry Andric       }
368480093f4SDimitry Andric 
369480093f4SDimitry Andric       if (insn->mode == MODE_64BIT) {
3705f757f3fSDimitry Andric         // We simulate the REX prefix for simplicity's sake
371480093f4SDimitry Andric         insn->rexPrefix = 0x40 |
372480093f4SDimitry Andric                           (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
373480093f4SDimitry Andric                           (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
374480093f4SDimitry Andric                           (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
375480093f4SDimitry Andric                           (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
3765f757f3fSDimitry Andric 
3775f757f3fSDimitry Andric         // We simulate the REX2 prefix for simplicity's sake
3785f757f3fSDimitry Andric         insn->rex2ExtensionPrefix[1] =
3795f757f3fSDimitry Andric             (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |
3805f757f3fSDimitry Andric             (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |
3815f757f3fSDimitry Andric             (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);
382480093f4SDimitry Andric       }
383480093f4SDimitry Andric 
384480093f4SDimitry Andric       LLVM_DEBUG(
385480093f4SDimitry Andric           dbgs() << format(
386480093f4SDimitry Andric               "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
387480093f4SDimitry Andric               insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
388480093f4SDimitry Andric               insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
389480093f4SDimitry Andric     }
390480093f4SDimitry Andric   } else if (byte == 0xc4) {
391480093f4SDimitry Andric     uint8_t byte1;
392480093f4SDimitry Andric     if (peek(insn, byte1)) {
393480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
394480093f4SDimitry Andric       return -1;
395480093f4SDimitry Andric     }
396480093f4SDimitry Andric 
397480093f4SDimitry Andric     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
398480093f4SDimitry Andric       insn->vectorExtensionType = TYPE_VEX_3B;
399480093f4SDimitry Andric     else
400480093f4SDimitry Andric       --insn->readerCursor;
401480093f4SDimitry Andric 
402480093f4SDimitry Andric     if (insn->vectorExtensionType == TYPE_VEX_3B) {
403480093f4SDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
404480093f4SDimitry Andric       consume(insn, insn->vectorExtensionPrefix[1]);
405480093f4SDimitry Andric       consume(insn, insn->vectorExtensionPrefix[2]);
406480093f4SDimitry Andric 
407480093f4SDimitry Andric       // We simulate the REX prefix for simplicity's sake
408480093f4SDimitry Andric 
409480093f4SDimitry Andric       if (insn->mode == MODE_64BIT)
410480093f4SDimitry Andric         insn->rexPrefix = 0x40 |
411480093f4SDimitry Andric                           (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
412480093f4SDimitry Andric                           (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
413480093f4SDimitry Andric                           (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
414480093f4SDimitry Andric                           (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
415480093f4SDimitry Andric 
416480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
417480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[0],
418480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[1],
419480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[2]));
420480093f4SDimitry Andric     }
421480093f4SDimitry Andric   } else if (byte == 0xc5) {
422480093f4SDimitry Andric     uint8_t byte1;
423480093f4SDimitry Andric     if (peek(insn, byte1)) {
424480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
425480093f4SDimitry Andric       return -1;
426480093f4SDimitry Andric     }
427480093f4SDimitry Andric 
428480093f4SDimitry Andric     if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
429480093f4SDimitry Andric       insn->vectorExtensionType = TYPE_VEX_2B;
430480093f4SDimitry Andric     else
431480093f4SDimitry Andric       --insn->readerCursor;
432480093f4SDimitry Andric 
433480093f4SDimitry Andric     if (insn->vectorExtensionType == TYPE_VEX_2B) {
434480093f4SDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
435480093f4SDimitry Andric       consume(insn, insn->vectorExtensionPrefix[1]);
436480093f4SDimitry Andric 
437480093f4SDimitry Andric       if (insn->mode == MODE_64BIT)
438480093f4SDimitry Andric         insn->rexPrefix =
439480093f4SDimitry Andric             0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
440480093f4SDimitry Andric 
441480093f4SDimitry Andric       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
442480093f4SDimitry Andric       default:
443480093f4SDimitry Andric         break;
444480093f4SDimitry Andric       case VEX_PREFIX_66:
445480093f4SDimitry Andric         insn->hasOpSize = true;
446480093f4SDimitry Andric         break;
447480093f4SDimitry Andric       }
448480093f4SDimitry Andric 
449480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
450480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[0],
451480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[1]));
452480093f4SDimitry Andric     }
453480093f4SDimitry Andric   } else if (byte == 0x8f) {
454480093f4SDimitry Andric     uint8_t byte1;
455480093f4SDimitry Andric     if (peek(insn, byte1)) {
456480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
457480093f4SDimitry Andric       return -1;
458480093f4SDimitry Andric     }
459480093f4SDimitry Andric 
460480093f4SDimitry Andric     if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
461480093f4SDimitry Andric       insn->vectorExtensionType = TYPE_XOP;
462480093f4SDimitry Andric     else
463480093f4SDimitry Andric       --insn->readerCursor;
464480093f4SDimitry Andric 
465480093f4SDimitry Andric     if (insn->vectorExtensionType == TYPE_XOP) {
466480093f4SDimitry Andric       insn->vectorExtensionPrefix[0] = byte;
467480093f4SDimitry Andric       consume(insn, insn->vectorExtensionPrefix[1]);
468480093f4SDimitry Andric       consume(insn, insn->vectorExtensionPrefix[2]);
469480093f4SDimitry Andric 
470480093f4SDimitry Andric       // We simulate the REX prefix for simplicity's sake
471480093f4SDimitry Andric 
472480093f4SDimitry Andric       if (insn->mode == MODE_64BIT)
473480093f4SDimitry Andric         insn->rexPrefix = 0x40 |
474480093f4SDimitry Andric                           (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
475480093f4SDimitry Andric                           (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
476480093f4SDimitry Andric                           (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
477480093f4SDimitry Andric                           (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
478480093f4SDimitry Andric 
479480093f4SDimitry Andric       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
480480093f4SDimitry Andric       default:
481480093f4SDimitry Andric         break;
482480093f4SDimitry Andric       case VEX_PREFIX_66:
483480093f4SDimitry Andric         insn->hasOpSize = true;
484480093f4SDimitry Andric         break;
485480093f4SDimitry Andric       }
486480093f4SDimitry Andric 
487480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
488480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[0],
489480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[1],
490480093f4SDimitry Andric                                   insn->vectorExtensionPrefix[2]));
491480093f4SDimitry Andric     }
4925f757f3fSDimitry Andric   } else if (isREX2(insn, byte)) {
4935f757f3fSDimitry Andric     uint8_t byte1;
4945f757f3fSDimitry Andric     if (peek(insn, byte1)) {
4955f757f3fSDimitry Andric       LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
4965f757f3fSDimitry Andric       return -1;
4975f757f3fSDimitry Andric     }
4985f757f3fSDimitry Andric     insn->rex2ExtensionPrefix[0] = byte;
4995f757f3fSDimitry Andric     consume(insn, insn->rex2ExtensionPrefix[1]);
5005f757f3fSDimitry Andric 
5015f757f3fSDimitry Andric     // We simulate the REX prefix for simplicity's sake
5025f757f3fSDimitry Andric     insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |
5035f757f3fSDimitry Andric                       (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |
5045f757f3fSDimitry Andric                       (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |
5055f757f3fSDimitry Andric                       (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);
5065f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
5075f757f3fSDimitry Andric                                 insn->rex2ExtensionPrefix[0],
5085f757f3fSDimitry Andric                                 insn->rex2ExtensionPrefix[1]));
509480093f4SDimitry Andric   } else if (isREX(insn, byte)) {
510480093f4SDimitry Andric     if (peek(insn, nextByte))
511480093f4SDimitry Andric       return -1;
512480093f4SDimitry Andric     insn->rexPrefix = byte;
513480093f4SDimitry Andric     LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
514480093f4SDimitry Andric   } else
515480093f4SDimitry Andric     --insn->readerCursor;
516480093f4SDimitry Andric 
517480093f4SDimitry Andric   if (insn->mode == MODE_16BIT) {
518480093f4SDimitry Andric     insn->registerSize = (insn->hasOpSize ? 4 : 2);
519480093f4SDimitry Andric     insn->addressSize = (insn->hasAdSize ? 4 : 2);
520480093f4SDimitry Andric     insn->displacementSize = (insn->hasAdSize ? 4 : 2);
521480093f4SDimitry Andric     insn->immediateSize = (insn->hasOpSize ? 4 : 2);
522480093f4SDimitry Andric   } else if (insn->mode == MODE_32BIT) {
523480093f4SDimitry Andric     insn->registerSize = (insn->hasOpSize ? 2 : 4);
524480093f4SDimitry Andric     insn->addressSize = (insn->hasAdSize ? 2 : 4);
525480093f4SDimitry Andric     insn->displacementSize = (insn->hasAdSize ? 2 : 4);
526480093f4SDimitry Andric     insn->immediateSize = (insn->hasOpSize ? 2 : 4);
527480093f4SDimitry Andric   } else if (insn->mode == MODE_64BIT) {
52881ad6265SDimitry Andric     insn->displacementSize = 4;
529480093f4SDimitry Andric     if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
530480093f4SDimitry Andric       insn->registerSize = 8;
531480093f4SDimitry Andric       insn->addressSize = (insn->hasAdSize ? 4 : 8);
532480093f4SDimitry Andric       insn->immediateSize = 4;
533d409305fSDimitry Andric       insn->hasOpSize = false;
534480093f4SDimitry Andric     } else {
535480093f4SDimitry Andric       insn->registerSize = (insn->hasOpSize ? 2 : 4);
536480093f4SDimitry Andric       insn->addressSize = (insn->hasAdSize ? 4 : 8);
537480093f4SDimitry Andric       insn->immediateSize = (insn->hasOpSize ? 2 : 4);
538480093f4SDimitry Andric     }
539480093f4SDimitry Andric   }
540480093f4SDimitry Andric 
541480093f4SDimitry Andric   return 0;
542480093f4SDimitry Andric }
543480093f4SDimitry Andric 
544480093f4SDimitry Andric // Consumes the SIB byte to determine addressing information.
readSIB(struct InternalInstruction * insn)545480093f4SDimitry Andric static int readSIB(struct InternalInstruction *insn) {
546480093f4SDimitry Andric   SIBBase sibBaseBase = SIB_BASE_NONE;
547480093f4SDimitry Andric   uint8_t index, base;
548480093f4SDimitry Andric 
549480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readSIB()");
550480093f4SDimitry Andric   switch (insn->addressSize) {
551480093f4SDimitry Andric   case 2:
552480093f4SDimitry Andric   default:
553480093f4SDimitry Andric     llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
554480093f4SDimitry Andric   case 4:
555480093f4SDimitry Andric     insn->sibIndexBase = SIB_INDEX_EAX;
556480093f4SDimitry Andric     sibBaseBase = SIB_BASE_EAX;
557480093f4SDimitry Andric     break;
558480093f4SDimitry Andric   case 8:
559480093f4SDimitry Andric     insn->sibIndexBase = SIB_INDEX_RAX;
560480093f4SDimitry Andric     sibBaseBase = SIB_BASE_RAX;
561480093f4SDimitry Andric     break;
562480093f4SDimitry Andric   }
563480093f4SDimitry Andric 
564480093f4SDimitry Andric   if (consume(insn, insn->sib))
565480093f4SDimitry Andric     return -1;
566480093f4SDimitry Andric 
5675f757f3fSDimitry Andric   index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |
5685f757f3fSDimitry Andric           (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
569480093f4SDimitry Andric 
570480093f4SDimitry Andric   if (index == 0x4) {
571480093f4SDimitry Andric     insn->sibIndex = SIB_INDEX_NONE;
572480093f4SDimitry Andric   } else {
573480093f4SDimitry Andric     insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
574480093f4SDimitry Andric   }
575480093f4SDimitry Andric 
576480093f4SDimitry Andric   insn->sibScale = 1 << scaleFromSIB(insn->sib);
577480093f4SDimitry Andric 
5785f757f3fSDimitry Andric   base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |
5795f757f3fSDimitry Andric          (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
580480093f4SDimitry Andric 
581480093f4SDimitry Andric   switch (base) {
582480093f4SDimitry Andric   case 0x5:
583480093f4SDimitry Andric   case 0xd:
584480093f4SDimitry Andric     switch (modFromModRM(insn->modRM)) {
585480093f4SDimitry Andric     case 0x0:
586480093f4SDimitry Andric       insn->eaDisplacement = EA_DISP_32;
587480093f4SDimitry Andric       insn->sibBase = SIB_BASE_NONE;
588480093f4SDimitry Andric       break;
589480093f4SDimitry Andric     case 0x1:
590480093f4SDimitry Andric       insn->eaDisplacement = EA_DISP_8;
591480093f4SDimitry Andric       insn->sibBase = (SIBBase)(sibBaseBase + base);
592480093f4SDimitry Andric       break;
593480093f4SDimitry Andric     case 0x2:
594480093f4SDimitry Andric       insn->eaDisplacement = EA_DISP_32;
595480093f4SDimitry Andric       insn->sibBase = (SIBBase)(sibBaseBase + base);
596480093f4SDimitry Andric       break;
597480093f4SDimitry Andric     default:
598480093f4SDimitry Andric       llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
599480093f4SDimitry Andric     }
600480093f4SDimitry Andric     break;
601480093f4SDimitry Andric   default:
602480093f4SDimitry Andric     insn->sibBase = (SIBBase)(sibBaseBase + base);
603480093f4SDimitry Andric     break;
604480093f4SDimitry Andric   }
605480093f4SDimitry Andric 
606480093f4SDimitry Andric   return 0;
607480093f4SDimitry Andric }
608480093f4SDimitry Andric 
readDisplacement(struct InternalInstruction * insn)609480093f4SDimitry Andric static int readDisplacement(struct InternalInstruction *insn) {
610480093f4SDimitry Andric   int8_t d8;
611480093f4SDimitry Andric   int16_t d16;
612480093f4SDimitry Andric   int32_t d32;
613480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readDisplacement()");
614480093f4SDimitry Andric 
615480093f4SDimitry Andric   insn->displacementOffset = insn->readerCursor - insn->startLocation;
616480093f4SDimitry Andric   switch (insn->eaDisplacement) {
617480093f4SDimitry Andric   case EA_DISP_NONE:
618480093f4SDimitry Andric     break;
619480093f4SDimitry Andric   case EA_DISP_8:
620480093f4SDimitry Andric     if (consume(insn, d8))
621480093f4SDimitry Andric       return -1;
622480093f4SDimitry Andric     insn->displacement = d8;
623480093f4SDimitry Andric     break;
624480093f4SDimitry Andric   case EA_DISP_16:
625480093f4SDimitry Andric     if (consume(insn, d16))
626480093f4SDimitry Andric       return -1;
627480093f4SDimitry Andric     insn->displacement = d16;
628480093f4SDimitry Andric     break;
629480093f4SDimitry Andric   case EA_DISP_32:
630480093f4SDimitry Andric     if (consume(insn, d32))
631480093f4SDimitry Andric       return -1;
632480093f4SDimitry Andric     insn->displacement = d32;
633480093f4SDimitry Andric     break;
634480093f4SDimitry Andric   }
635480093f4SDimitry Andric 
636480093f4SDimitry Andric   return 0;
637480093f4SDimitry Andric }
638480093f4SDimitry Andric 
639480093f4SDimitry Andric // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
readModRM(struct InternalInstruction * insn)640480093f4SDimitry Andric static int readModRM(struct InternalInstruction *insn) {
6415f757f3fSDimitry Andric   uint8_t mod, rm, reg;
642480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readModRM()");
643480093f4SDimitry Andric 
644480093f4SDimitry Andric   if (insn->consumedModRM)
645480093f4SDimitry Andric     return 0;
646480093f4SDimitry Andric 
647480093f4SDimitry Andric   if (consume(insn, insn->modRM))
648480093f4SDimitry Andric     return -1;
649480093f4SDimitry Andric   insn->consumedModRM = true;
650480093f4SDimitry Andric 
651480093f4SDimitry Andric   mod = modFromModRM(insn->modRM);
652480093f4SDimitry Andric   rm = rmFromModRM(insn->modRM);
653480093f4SDimitry Andric   reg = regFromModRM(insn->modRM);
654480093f4SDimitry Andric 
655480093f4SDimitry Andric   // This goes by insn->registerSize to pick the correct register, which messes
656480093f4SDimitry Andric   // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
657480093f4SDimitry Andric   // fixupReg().
658480093f4SDimitry Andric   switch (insn->registerSize) {
659480093f4SDimitry Andric   case 2:
660480093f4SDimitry Andric     insn->regBase = MODRM_REG_AX;
661480093f4SDimitry Andric     insn->eaRegBase = EA_REG_AX;
662480093f4SDimitry Andric     break;
663480093f4SDimitry Andric   case 4:
664480093f4SDimitry Andric     insn->regBase = MODRM_REG_EAX;
665480093f4SDimitry Andric     insn->eaRegBase = EA_REG_EAX;
666480093f4SDimitry Andric     break;
667480093f4SDimitry Andric   case 8:
668480093f4SDimitry Andric     insn->regBase = MODRM_REG_RAX;
669480093f4SDimitry Andric     insn->eaRegBase = EA_REG_RAX;
670480093f4SDimitry Andric     break;
671480093f4SDimitry Andric   }
672480093f4SDimitry Andric 
6735f757f3fSDimitry Andric   reg |= (rFromREX(insn->rexPrefix) << 3) |
6745f757f3fSDimitry Andric          (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
6755f757f3fSDimitry Andric   rm |= (bFromREX(insn->rexPrefix) << 3) |
6765f757f3fSDimitry Andric         (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
677480093f4SDimitry Andric 
6785f757f3fSDimitry Andric   if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
679480093f4SDimitry Andric     reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
680480093f4SDimitry Andric 
681480093f4SDimitry Andric   insn->reg = (Reg)(insn->regBase + reg);
682480093f4SDimitry Andric 
683480093f4SDimitry Andric   switch (insn->addressSize) {
684480093f4SDimitry Andric   case 2: {
685480093f4SDimitry Andric     EABase eaBaseBase = EA_BASE_BX_SI;
686480093f4SDimitry Andric 
687480093f4SDimitry Andric     switch (mod) {
688480093f4SDimitry Andric     case 0x0:
689480093f4SDimitry Andric       if (rm == 0x6) {
690480093f4SDimitry Andric         insn->eaBase = EA_BASE_NONE;
691480093f4SDimitry Andric         insn->eaDisplacement = EA_DISP_16;
692480093f4SDimitry Andric         if (readDisplacement(insn))
693480093f4SDimitry Andric           return -1;
694480093f4SDimitry Andric       } else {
695480093f4SDimitry Andric         insn->eaBase = (EABase)(eaBaseBase + rm);
696480093f4SDimitry Andric         insn->eaDisplacement = EA_DISP_NONE;
697480093f4SDimitry Andric       }
698480093f4SDimitry Andric       break;
699480093f4SDimitry Andric     case 0x1:
700480093f4SDimitry Andric       insn->eaBase = (EABase)(eaBaseBase + rm);
701480093f4SDimitry Andric       insn->eaDisplacement = EA_DISP_8;
702480093f4SDimitry Andric       insn->displacementSize = 1;
703480093f4SDimitry Andric       if (readDisplacement(insn))
704480093f4SDimitry Andric         return -1;
705480093f4SDimitry Andric       break;
706480093f4SDimitry Andric     case 0x2:
707480093f4SDimitry Andric       insn->eaBase = (EABase)(eaBaseBase + rm);
708480093f4SDimitry Andric       insn->eaDisplacement = EA_DISP_16;
709480093f4SDimitry Andric       if (readDisplacement(insn))
710480093f4SDimitry Andric         return -1;
711480093f4SDimitry Andric       break;
712480093f4SDimitry Andric     case 0x3:
713480093f4SDimitry Andric       insn->eaBase = (EABase)(insn->eaRegBase + rm);
714480093f4SDimitry Andric       if (readDisplacement(insn))
715480093f4SDimitry Andric         return -1;
716480093f4SDimitry Andric       break;
717480093f4SDimitry Andric     }
718480093f4SDimitry Andric     break;
719480093f4SDimitry Andric   }
720480093f4SDimitry Andric   case 4:
721480093f4SDimitry Andric   case 8: {
722480093f4SDimitry Andric     EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
723480093f4SDimitry Andric 
724480093f4SDimitry Andric     switch (mod) {
725480093f4SDimitry Andric     case 0x0:
726480093f4SDimitry Andric       insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
727480093f4SDimitry Andric       // In determining whether RIP-relative mode is used (rm=5),
728480093f4SDimitry Andric       // or whether a SIB byte is present (rm=4),
729480093f4SDimitry Andric       // the extension bits (REX.b and EVEX.x) are ignored.
730480093f4SDimitry Andric       switch (rm & 7) {
731480093f4SDimitry Andric       case 0x4: // SIB byte is present
732480093f4SDimitry Andric         insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
733480093f4SDimitry Andric         if (readSIB(insn) || readDisplacement(insn))
734480093f4SDimitry Andric           return -1;
735480093f4SDimitry Andric         break;
736480093f4SDimitry Andric       case 0x5: // RIP-relative
737480093f4SDimitry Andric         insn->eaBase = EA_BASE_NONE;
738480093f4SDimitry Andric         insn->eaDisplacement = EA_DISP_32;
739480093f4SDimitry Andric         if (readDisplacement(insn))
740480093f4SDimitry Andric           return -1;
741480093f4SDimitry Andric         break;
742480093f4SDimitry Andric       default:
743480093f4SDimitry Andric         insn->eaBase = (EABase)(eaBaseBase + rm);
744480093f4SDimitry Andric         break;
745480093f4SDimitry Andric       }
746480093f4SDimitry Andric       break;
747480093f4SDimitry Andric     case 0x1:
748480093f4SDimitry Andric       insn->displacementSize = 1;
749bdd1243dSDimitry Andric       [[fallthrough]];
750480093f4SDimitry Andric     case 0x2:
751480093f4SDimitry Andric       insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
752480093f4SDimitry Andric       switch (rm & 7) {
753480093f4SDimitry Andric       case 0x4: // SIB byte is present
754480093f4SDimitry Andric         insn->eaBase = EA_BASE_sib;
755480093f4SDimitry Andric         if (readSIB(insn) || readDisplacement(insn))
756480093f4SDimitry Andric           return -1;
757480093f4SDimitry Andric         break;
758480093f4SDimitry Andric       default:
759480093f4SDimitry Andric         insn->eaBase = (EABase)(eaBaseBase + rm);
760480093f4SDimitry Andric         if (readDisplacement(insn))
761480093f4SDimitry Andric           return -1;
762480093f4SDimitry Andric         break;
763480093f4SDimitry Andric       }
764480093f4SDimitry Andric       break;
765480093f4SDimitry Andric     case 0x3:
766480093f4SDimitry Andric       insn->eaDisplacement = EA_DISP_NONE;
7675f757f3fSDimitry Andric       insn->eaBase = (EABase)(insn->eaRegBase + rm);
768480093f4SDimitry Andric       break;
769480093f4SDimitry Andric     }
770480093f4SDimitry Andric     break;
771480093f4SDimitry Andric   }
772480093f4SDimitry Andric   } // switch (insn->addressSize)
773480093f4SDimitry Andric 
774480093f4SDimitry Andric   return 0;
775480093f4SDimitry Andric }
776480093f4SDimitry Andric 
7775f757f3fSDimitry Andric #define GENERIC_FIXUP_FUNC(name, base, prefix)                                 \
778480093f4SDimitry Andric   static uint16_t name(struct InternalInstruction *insn, OperandType type,     \
779480093f4SDimitry Andric                        uint8_t index, uint8_t *valid) {                        \
780480093f4SDimitry Andric     *valid = 1;                                                                \
781480093f4SDimitry Andric     switch (type) {                                                            \
782480093f4SDimitry Andric     default:                                                                   \
783480093f4SDimitry Andric       debug("Unhandled register type");                                        \
784480093f4SDimitry Andric       *valid = 0;                                                              \
785480093f4SDimitry Andric       return 0;                                                                \
786480093f4SDimitry Andric     case TYPE_Rv:                                                              \
787480093f4SDimitry Andric       return base + index;                                                     \
788480093f4SDimitry Andric     case TYPE_R8:                                                              \
7895f757f3fSDimitry Andric       if (insn->rexPrefix && index >= 4 && index <= 7)                         \
790480093f4SDimitry Andric         return prefix##_SPL + (index - 4);                                     \
7915f757f3fSDimitry Andric       else                                                                     \
792480093f4SDimitry Andric         return prefix##_AL + index;                                            \
793480093f4SDimitry Andric     case TYPE_R16:                                                             \
794480093f4SDimitry Andric       return prefix##_AX + index;                                              \
795480093f4SDimitry Andric     case TYPE_R32:                                                             \
796480093f4SDimitry Andric       return prefix##_EAX + index;                                             \
797480093f4SDimitry Andric     case TYPE_R64:                                                             \
798480093f4SDimitry Andric       return prefix##_RAX + index;                                             \
799480093f4SDimitry Andric     case TYPE_ZMM:                                                             \
800480093f4SDimitry Andric       return prefix##_ZMM0 + index;                                            \
801480093f4SDimitry Andric     case TYPE_YMM:                                                             \
802480093f4SDimitry Andric       return prefix##_YMM0 + index;                                            \
803480093f4SDimitry Andric     case TYPE_XMM:                                                             \
804480093f4SDimitry Andric       return prefix##_XMM0 + index;                                            \
8055ffd83dbSDimitry Andric     case TYPE_TMM:                                                             \
8065ffd83dbSDimitry Andric       if (index > 7)                                                           \
8075ffd83dbSDimitry Andric         *valid = 0;                                                            \
8085ffd83dbSDimitry Andric       return prefix##_TMM0 + index;                                            \
809480093f4SDimitry Andric     case TYPE_VK:                                                              \
810480093f4SDimitry Andric       index &= 0xf;                                                            \
811480093f4SDimitry Andric       if (index > 7)                                                           \
812480093f4SDimitry Andric         *valid = 0;                                                            \
813480093f4SDimitry Andric       return prefix##_K0 + index;                                              \
814480093f4SDimitry Andric     case TYPE_VK_PAIR:                                                         \
815480093f4SDimitry Andric       if (index > 7)                                                           \
816480093f4SDimitry Andric         *valid = 0;                                                            \
817480093f4SDimitry Andric       return prefix##_K0_K1 + (index / 2);                                     \
818480093f4SDimitry Andric     case TYPE_MM64:                                                            \
819480093f4SDimitry Andric       return prefix##_MM0 + (index & 0x7);                                     \
820480093f4SDimitry Andric     case TYPE_SEGMENTREG:                                                      \
821480093f4SDimitry Andric       if ((index & 7) > 5)                                                     \
822480093f4SDimitry Andric         *valid = 0;                                                            \
823480093f4SDimitry Andric       return prefix##_ES + (index & 7);                                        \
824480093f4SDimitry Andric     case TYPE_DEBUGREG:                                                        \
825*0fca6ea1SDimitry Andric       if (index > 15)                                                          \
826*0fca6ea1SDimitry Andric         *valid = 0;                                                            \
827480093f4SDimitry Andric       return prefix##_DR0 + index;                                             \
828480093f4SDimitry Andric     case TYPE_CONTROLREG:                                                      \
829*0fca6ea1SDimitry Andric       if (index > 15)                                                          \
830*0fca6ea1SDimitry Andric         *valid = 0;                                                            \
831480093f4SDimitry Andric       return prefix##_CR0 + index;                                             \
832480093f4SDimitry Andric     case TYPE_MVSIBX:                                                          \
833480093f4SDimitry Andric       return prefix##_XMM0 + index;                                            \
834480093f4SDimitry Andric     case TYPE_MVSIBY:                                                          \
835480093f4SDimitry Andric       return prefix##_YMM0 + index;                                            \
836480093f4SDimitry Andric     case TYPE_MVSIBZ:                                                          \
837480093f4SDimitry Andric       return prefix##_ZMM0 + index;                                            \
838480093f4SDimitry Andric     }                                                                          \
839480093f4SDimitry Andric   }
840480093f4SDimitry Andric 
841480093f4SDimitry Andric // Consult an operand type to determine the meaning of the reg or R/M field. If
842480093f4SDimitry Andric // the operand is an XMM operand, for example, an operand would be XMM0 instead
843480093f4SDimitry Andric // of AX, which readModRM() would otherwise misinterpret it as.
844480093f4SDimitry Andric //
845480093f4SDimitry Andric // @param insn  - The instruction containing the operand.
846480093f4SDimitry Andric // @param type  - The operand type.
847480093f4SDimitry Andric // @param index - The existing value of the field as reported by readModRM().
848480093f4SDimitry Andric // @param valid - The address of a uint8_t.  The target is set to 1 if the
849480093f4SDimitry Andric //                field is valid for the register class; 0 if not.
850480093f4SDimitry Andric // @return      - The proper value.
8515f757f3fSDimitry Andric GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
8525f757f3fSDimitry Andric GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
853480093f4SDimitry Andric 
854480093f4SDimitry Andric // Consult an operand specifier to determine which of the fixup*Value functions
855480093f4SDimitry Andric // to use in correcting readModRM()'ss interpretation.
856480093f4SDimitry Andric //
857480093f4SDimitry Andric // @param insn  - See fixup*Value().
858480093f4SDimitry Andric // @param op    - The operand specifier.
859480093f4SDimitry Andric // @return      - 0 if fixup was successful; -1 if the register returned was
860480093f4SDimitry Andric //                invalid for its class.
fixupReg(struct InternalInstruction * insn,const struct OperandSpecifier * op)861480093f4SDimitry Andric static int fixupReg(struct InternalInstruction *insn,
862480093f4SDimitry Andric                     const struct OperandSpecifier *op) {
863480093f4SDimitry Andric   uint8_t valid;
864480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "fixupReg()");
865480093f4SDimitry Andric 
866480093f4SDimitry Andric   switch ((OperandEncoding)op->encoding) {
867480093f4SDimitry Andric   default:
868480093f4SDimitry Andric     debug("Expected a REG or R/M encoding in fixupReg");
869480093f4SDimitry Andric     return -1;
870480093f4SDimitry Andric   case ENCODING_VVVV:
871480093f4SDimitry Andric     insn->vvvv =
872480093f4SDimitry Andric         (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
873480093f4SDimitry Andric     if (!valid)
874480093f4SDimitry Andric       return -1;
875480093f4SDimitry Andric     break;
876480093f4SDimitry Andric   case ENCODING_REG:
877480093f4SDimitry Andric     insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
878480093f4SDimitry Andric                                    insn->reg - insn->regBase, &valid);
879480093f4SDimitry Andric     if (!valid)
880480093f4SDimitry Andric       return -1;
881480093f4SDimitry Andric     break;
882480093f4SDimitry Andric   CASE_ENCODING_RM:
8835f757f3fSDimitry Andric     if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
8845f757f3fSDimitry Andric         modFromModRM(insn->modRM) == 3) {
8855f757f3fSDimitry Andric       // EVEX_X can extend the register id to 32 for a non-GPR register that is
8865f757f3fSDimitry Andric       // encoded in RM.
8875f757f3fSDimitry Andric       // mode : MODE_64_BIT
8885f757f3fSDimitry Andric       //  Only 8 vector registers are available in 32 bit mode
8895f757f3fSDimitry Andric       // mod : 3
8905f757f3fSDimitry Andric       //  RM encodes a register
8915f757f3fSDimitry Andric       switch (op->type) {
8925f757f3fSDimitry Andric       case TYPE_Rv:
8935f757f3fSDimitry Andric       case TYPE_R8:
8945f757f3fSDimitry Andric       case TYPE_R16:
8955f757f3fSDimitry Andric       case TYPE_R32:
8965f757f3fSDimitry Andric       case TYPE_R64:
8975f757f3fSDimitry Andric         break;
8985f757f3fSDimitry Andric       default:
8995f757f3fSDimitry Andric         insn->eaBase =
9005f757f3fSDimitry Andric             (EABase)(insn->eaBase +
9015f757f3fSDimitry Andric                      (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
9025f757f3fSDimitry Andric         break;
9035f757f3fSDimitry Andric       }
9045f757f3fSDimitry Andric     }
9055f757f3fSDimitry Andric     [[fallthrough]];
9065f757f3fSDimitry Andric   case ENCODING_SIB:
907480093f4SDimitry Andric     if (insn->eaBase >= insn->eaRegBase) {
908480093f4SDimitry Andric       insn->eaBase = (EABase)fixupRMValue(
909480093f4SDimitry Andric           insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
910480093f4SDimitry Andric       if (!valid)
911480093f4SDimitry Andric         return -1;
912480093f4SDimitry Andric     }
913480093f4SDimitry Andric     break;
914480093f4SDimitry Andric   }
915480093f4SDimitry Andric 
916480093f4SDimitry Andric   return 0;
917480093f4SDimitry Andric }
918480093f4SDimitry Andric 
919480093f4SDimitry Andric // Read the opcode (except the ModR/M byte in the case of extended or escape
920480093f4SDimitry Andric // opcodes).
readOpcode(struct InternalInstruction * insn)921480093f4SDimitry Andric static bool readOpcode(struct InternalInstruction *insn) {
922480093f4SDimitry Andric   uint8_t current;
923480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readOpcode()");
924480093f4SDimitry Andric 
925480093f4SDimitry Andric   insn->opcodeType = ONEBYTE;
926480093f4SDimitry Andric   if (insn->vectorExtensionType == TYPE_EVEX) {
927349cc55cSDimitry Andric     switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
928480093f4SDimitry Andric     default:
929480093f4SDimitry Andric       LLVM_DEBUG(
930349cc55cSDimitry Andric           dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
931349cc55cSDimitry Andric                            mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
932480093f4SDimitry Andric       return true;
933480093f4SDimitry Andric     case VEX_LOB_0F:
934480093f4SDimitry Andric       insn->opcodeType = TWOBYTE;
935480093f4SDimitry Andric       return consume(insn, insn->opcode);
936480093f4SDimitry Andric     case VEX_LOB_0F38:
937480093f4SDimitry Andric       insn->opcodeType = THREEBYTE_38;
938480093f4SDimitry Andric       return consume(insn, insn->opcode);
939480093f4SDimitry Andric     case VEX_LOB_0F3A:
940480093f4SDimitry Andric       insn->opcodeType = THREEBYTE_3A;
941480093f4SDimitry Andric       return consume(insn, insn->opcode);
9425f757f3fSDimitry Andric     case VEX_LOB_MAP4:
9435f757f3fSDimitry Andric       insn->opcodeType = MAP4;
9445f757f3fSDimitry Andric       return consume(insn, insn->opcode);
945349cc55cSDimitry Andric     case VEX_LOB_MAP5:
946349cc55cSDimitry Andric       insn->opcodeType = MAP5;
947349cc55cSDimitry Andric       return consume(insn, insn->opcode);
948349cc55cSDimitry Andric     case VEX_LOB_MAP6:
949349cc55cSDimitry Andric       insn->opcodeType = MAP6;
950349cc55cSDimitry Andric       return consume(insn, insn->opcode);
951*0fca6ea1SDimitry Andric     case VEX_LOB_MAP7:
952*0fca6ea1SDimitry Andric       insn->opcodeType = MAP7;
953*0fca6ea1SDimitry Andric       return consume(insn, insn->opcode);
954480093f4SDimitry Andric     }
955480093f4SDimitry Andric   } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
956480093f4SDimitry Andric     switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
957480093f4SDimitry Andric     default:
958480093f4SDimitry Andric       LLVM_DEBUG(
959480093f4SDimitry Andric           dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
960480093f4SDimitry Andric                            mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
961480093f4SDimitry Andric       return true;
962480093f4SDimitry Andric     case VEX_LOB_0F:
963480093f4SDimitry Andric       insn->opcodeType = TWOBYTE;
964480093f4SDimitry Andric       return consume(insn, insn->opcode);
965480093f4SDimitry Andric     case VEX_LOB_0F38:
966480093f4SDimitry Andric       insn->opcodeType = THREEBYTE_38;
967480093f4SDimitry Andric       return consume(insn, insn->opcode);
968480093f4SDimitry Andric     case VEX_LOB_0F3A:
969480093f4SDimitry Andric       insn->opcodeType = THREEBYTE_3A;
970480093f4SDimitry Andric       return consume(insn, insn->opcode);
971349cc55cSDimitry Andric     case VEX_LOB_MAP5:
972349cc55cSDimitry Andric       insn->opcodeType = MAP5;
973349cc55cSDimitry Andric       return consume(insn, insn->opcode);
974349cc55cSDimitry Andric     case VEX_LOB_MAP6:
975349cc55cSDimitry Andric       insn->opcodeType = MAP6;
976349cc55cSDimitry Andric       return consume(insn, insn->opcode);
9775f757f3fSDimitry Andric     case VEX_LOB_MAP7:
9785f757f3fSDimitry Andric       insn->opcodeType = MAP7;
9795f757f3fSDimitry Andric       return consume(insn, insn->opcode);
980480093f4SDimitry Andric     }
981480093f4SDimitry Andric   } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
982480093f4SDimitry Andric     insn->opcodeType = TWOBYTE;
983480093f4SDimitry Andric     return consume(insn, insn->opcode);
984480093f4SDimitry Andric   } else if (insn->vectorExtensionType == TYPE_XOP) {
985480093f4SDimitry Andric     switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
986480093f4SDimitry Andric     default:
987480093f4SDimitry Andric       LLVM_DEBUG(
988480093f4SDimitry Andric           dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
989480093f4SDimitry Andric                            mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
990480093f4SDimitry Andric       return true;
991480093f4SDimitry Andric     case XOP_MAP_SELECT_8:
992480093f4SDimitry Andric       insn->opcodeType = XOP8_MAP;
993480093f4SDimitry Andric       return consume(insn, insn->opcode);
994480093f4SDimitry Andric     case XOP_MAP_SELECT_9:
995480093f4SDimitry Andric       insn->opcodeType = XOP9_MAP;
996480093f4SDimitry Andric       return consume(insn, insn->opcode);
997480093f4SDimitry Andric     case XOP_MAP_SELECT_A:
998480093f4SDimitry Andric       insn->opcodeType = XOPA_MAP;
999480093f4SDimitry Andric       return consume(insn, insn->opcode);
1000480093f4SDimitry Andric     }
10015f757f3fSDimitry Andric   } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {
10025f757f3fSDimitry Andric     // m bit indicates opcode map 1
10035f757f3fSDimitry Andric     insn->opcodeType = TWOBYTE;
10045f757f3fSDimitry Andric     return consume(insn, insn->opcode);
1005480093f4SDimitry Andric   }
1006480093f4SDimitry Andric 
1007480093f4SDimitry Andric   if (consume(insn, current))
1008480093f4SDimitry Andric     return true;
1009480093f4SDimitry Andric 
1010480093f4SDimitry Andric   if (current == 0x0f) {
1011480093f4SDimitry Andric     LLVM_DEBUG(
1012480093f4SDimitry Andric         dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1013480093f4SDimitry Andric     if (consume(insn, current))
1014480093f4SDimitry Andric       return true;
1015480093f4SDimitry Andric 
1016480093f4SDimitry Andric     if (current == 0x38) {
1017480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1018480093f4SDimitry Andric                                   current));
1019480093f4SDimitry Andric       if (consume(insn, current))
1020480093f4SDimitry Andric         return true;
1021480093f4SDimitry Andric 
1022480093f4SDimitry Andric       insn->opcodeType = THREEBYTE_38;
1023480093f4SDimitry Andric     } else if (current == 0x3a) {
1024480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1025480093f4SDimitry Andric                                   current));
1026480093f4SDimitry Andric       if (consume(insn, current))
1027480093f4SDimitry Andric         return true;
1028480093f4SDimitry Andric 
1029480093f4SDimitry Andric       insn->opcodeType = THREEBYTE_3A;
1030480093f4SDimitry Andric     } else if (current == 0x0f) {
1031480093f4SDimitry Andric       LLVM_DEBUG(
1032480093f4SDimitry Andric           dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1033480093f4SDimitry Andric 
1034480093f4SDimitry Andric       // Consume operands before the opcode to comply with the 3DNow encoding
1035480093f4SDimitry Andric       if (readModRM(insn))
1036480093f4SDimitry Andric         return true;
1037480093f4SDimitry Andric 
1038480093f4SDimitry Andric       if (consume(insn, current))
1039480093f4SDimitry Andric         return true;
1040480093f4SDimitry Andric 
1041480093f4SDimitry Andric       insn->opcodeType = THREEDNOW_MAP;
1042480093f4SDimitry Andric     } else {
1043480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1044480093f4SDimitry Andric       insn->opcodeType = TWOBYTE;
1045480093f4SDimitry Andric     }
1046480093f4SDimitry Andric   } else if (insn->mandatoryPrefix)
1047480093f4SDimitry Andric     // The opcode with mandatory prefix must start with opcode escape.
1048480093f4SDimitry Andric     // If not it's legacy repeat prefix
1049480093f4SDimitry Andric     insn->mandatoryPrefix = 0;
1050480093f4SDimitry Andric 
1051480093f4SDimitry Andric   // At this point we have consumed the full opcode.
1052480093f4SDimitry Andric   // Anything we consume from here on must be unconsumed.
1053480093f4SDimitry Andric   insn->opcode = current;
1054480093f4SDimitry Andric 
1055480093f4SDimitry Andric   return false;
1056480093f4SDimitry Andric }
1057480093f4SDimitry Andric 
1058480093f4SDimitry Andric // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
is16BitEquivalent(const char * orig,const char * equiv)1059480093f4SDimitry Andric static bool is16BitEquivalent(const char *orig, const char *equiv) {
1060480093f4SDimitry Andric   for (int i = 0;; i++) {
1061480093f4SDimitry Andric     if (orig[i] == '\0' && equiv[i] == '\0')
1062480093f4SDimitry Andric       return true;
1063480093f4SDimitry Andric     if (orig[i] == '\0' || equiv[i] == '\0')
1064480093f4SDimitry Andric       return false;
1065480093f4SDimitry Andric     if (orig[i] != equiv[i]) {
1066480093f4SDimitry Andric       if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1067480093f4SDimitry Andric         continue;
1068480093f4SDimitry Andric       if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1069480093f4SDimitry Andric         continue;
1070480093f4SDimitry Andric       if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1071480093f4SDimitry Andric         continue;
1072480093f4SDimitry Andric       return false;
1073480093f4SDimitry Andric     }
1074480093f4SDimitry Andric   }
1075480093f4SDimitry Andric }
1076480093f4SDimitry Andric 
1077480093f4SDimitry Andric // Determine whether this instruction is a 64-bit instruction.
is64Bit(const char * name)1078480093f4SDimitry Andric static bool is64Bit(const char *name) {
1079480093f4SDimitry Andric   for (int i = 0;; ++i) {
1080480093f4SDimitry Andric     if (name[i] == '\0')
1081480093f4SDimitry Andric       return false;
1082480093f4SDimitry Andric     if (name[i] == '6' && name[i + 1] == '4')
1083480093f4SDimitry Andric       return true;
1084480093f4SDimitry Andric   }
1085480093f4SDimitry Andric }
1086480093f4SDimitry Andric 
1087480093f4SDimitry Andric // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1088480093f4SDimitry Andric // for extended and escape opcodes, and using a supplied attribute mask.
getInstructionIDWithAttrMask(uint16_t * instructionID,struct InternalInstruction * insn,uint16_t attrMask)1089480093f4SDimitry Andric static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1090480093f4SDimitry Andric                                         struct InternalInstruction *insn,
1091480093f4SDimitry Andric                                         uint16_t attrMask) {
1092480093f4SDimitry Andric   auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1093480093f4SDimitry Andric   const ContextDecision *decision;
1094480093f4SDimitry Andric   switch (insn->opcodeType) {
1095480093f4SDimitry Andric   case ONEBYTE:
1096480093f4SDimitry Andric     decision = &ONEBYTE_SYM;
1097480093f4SDimitry Andric     break;
1098480093f4SDimitry Andric   case TWOBYTE:
1099480093f4SDimitry Andric     decision = &TWOBYTE_SYM;
1100480093f4SDimitry Andric     break;
1101480093f4SDimitry Andric   case THREEBYTE_38:
1102480093f4SDimitry Andric     decision = &THREEBYTE38_SYM;
1103480093f4SDimitry Andric     break;
1104480093f4SDimitry Andric   case THREEBYTE_3A:
1105480093f4SDimitry Andric     decision = &THREEBYTE3A_SYM;
1106480093f4SDimitry Andric     break;
1107480093f4SDimitry Andric   case XOP8_MAP:
1108480093f4SDimitry Andric     decision = &XOP8_MAP_SYM;
1109480093f4SDimitry Andric     break;
1110480093f4SDimitry Andric   case XOP9_MAP:
1111480093f4SDimitry Andric     decision = &XOP9_MAP_SYM;
1112480093f4SDimitry Andric     break;
1113480093f4SDimitry Andric   case XOPA_MAP:
1114480093f4SDimitry Andric     decision = &XOPA_MAP_SYM;
1115480093f4SDimitry Andric     break;
1116480093f4SDimitry Andric   case THREEDNOW_MAP:
1117480093f4SDimitry Andric     decision = &THREEDNOW_MAP_SYM;
1118480093f4SDimitry Andric     break;
11195f757f3fSDimitry Andric   case MAP4:
11205f757f3fSDimitry Andric     decision = &MAP4_SYM;
11215f757f3fSDimitry Andric     break;
1122349cc55cSDimitry Andric   case MAP5:
1123349cc55cSDimitry Andric     decision = &MAP5_SYM;
1124349cc55cSDimitry Andric     break;
1125349cc55cSDimitry Andric   case MAP6:
1126349cc55cSDimitry Andric     decision = &MAP6_SYM;
1127349cc55cSDimitry Andric     break;
11285f757f3fSDimitry Andric   case MAP7:
11295f757f3fSDimitry Andric     decision = &MAP7_SYM;
11305f757f3fSDimitry Andric     break;
1131480093f4SDimitry Andric   }
1132480093f4SDimitry Andric 
1133480093f4SDimitry Andric   if (decision->opcodeDecisions[insnCtx]
1134480093f4SDimitry Andric           .modRMDecisions[insn->opcode]
1135480093f4SDimitry Andric           .modrm_type != MODRM_ONEENTRY) {
1136480093f4SDimitry Andric     if (readModRM(insn))
1137480093f4SDimitry Andric       return -1;
1138480093f4SDimitry Andric     *instructionID =
1139480093f4SDimitry Andric         decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
1140480093f4SDimitry Andric   } else {
1141480093f4SDimitry Andric     *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
1142480093f4SDimitry Andric   }
1143480093f4SDimitry Andric 
1144480093f4SDimitry Andric   return 0;
1145480093f4SDimitry Andric }
1146480093f4SDimitry Andric 
isCCMPOrCTEST(InternalInstruction * insn)1147*0fca6ea1SDimitry Andric static bool isCCMPOrCTEST(InternalInstruction *insn) {
1148*0fca6ea1SDimitry Andric   if (insn->opcodeType != MAP4)
1149*0fca6ea1SDimitry Andric     return false;
1150*0fca6ea1SDimitry Andric   if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7)
1151*0fca6ea1SDimitry Andric     return true;
1152*0fca6ea1SDimitry Andric   switch (insn->opcode & 0xfe) {
1153*0fca6ea1SDimitry Andric   default:
1154*0fca6ea1SDimitry Andric     return false;
1155*0fca6ea1SDimitry Andric   case 0x38:
1156*0fca6ea1SDimitry Andric   case 0x3a:
1157*0fca6ea1SDimitry Andric   case 0x84:
1158*0fca6ea1SDimitry Andric     return true;
1159*0fca6ea1SDimitry Andric   case 0x80:
1160*0fca6ea1SDimitry Andric     return regFromModRM(insn->modRM) == 7;
1161*0fca6ea1SDimitry Andric   case 0xf6:
1162*0fca6ea1SDimitry Andric     return regFromModRM(insn->modRM) == 0;
1163*0fca6ea1SDimitry Andric   }
1164*0fca6ea1SDimitry Andric }
1165*0fca6ea1SDimitry Andric 
isNF(InternalInstruction * insn)1166*0fca6ea1SDimitry Andric static bool isNF(InternalInstruction *insn) {
1167*0fca6ea1SDimitry Andric   if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1168*0fca6ea1SDimitry Andric     return false;
1169*0fca6ea1SDimitry Andric   if (insn->opcodeType == MAP4)
1170*0fca6ea1SDimitry Andric     return true;
1171*0fca6ea1SDimitry Andric   // Below NF instructions are not in map4.
1172*0fca6ea1SDimitry Andric   if (insn->opcodeType == THREEBYTE_38 &&
1173*0fca6ea1SDimitry Andric       ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) {
1174*0fca6ea1SDimitry Andric     switch (insn->opcode) {
1175*0fca6ea1SDimitry Andric     case 0xf2: // ANDN
1176*0fca6ea1SDimitry Andric     case 0xf3: // BLSI, BLSR, BLSMSK
1177*0fca6ea1SDimitry Andric     case 0xf5: // BZHI
1178*0fca6ea1SDimitry Andric     case 0xf7: // BEXTR
1179*0fca6ea1SDimitry Andric       return true;
1180*0fca6ea1SDimitry Andric     default:
1181*0fca6ea1SDimitry Andric       break;
1182*0fca6ea1SDimitry Andric     }
1183*0fca6ea1SDimitry Andric   }
1184*0fca6ea1SDimitry Andric   return false;
1185*0fca6ea1SDimitry Andric }
1186*0fca6ea1SDimitry Andric 
1187480093f4SDimitry Andric // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1188480093f4SDimitry Andric // for extended and escape opcodes. Determines the attributes and context for
1189480093f4SDimitry Andric // the instruction before doing so.
getInstructionID(struct InternalInstruction * insn,const MCInstrInfo * mii)1190480093f4SDimitry Andric static int getInstructionID(struct InternalInstruction *insn,
1191480093f4SDimitry Andric                             const MCInstrInfo *mii) {
1192480093f4SDimitry Andric   uint16_t attrMask;
1193480093f4SDimitry Andric   uint16_t instructionID;
1194480093f4SDimitry Andric 
1195480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "getID()");
1196480093f4SDimitry Andric 
1197480093f4SDimitry Andric   attrMask = ATTR_NONE;
1198480093f4SDimitry Andric 
1199480093f4SDimitry Andric   if (insn->mode == MODE_64BIT)
1200480093f4SDimitry Andric     attrMask |= ATTR_64BIT;
1201480093f4SDimitry Andric 
1202480093f4SDimitry Andric   if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1203480093f4SDimitry Andric     attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1204480093f4SDimitry Andric 
1205480093f4SDimitry Andric     if (insn->vectorExtensionType == TYPE_EVEX) {
1206480093f4SDimitry Andric       switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1207480093f4SDimitry Andric       case VEX_PREFIX_66:
1208480093f4SDimitry Andric         attrMask |= ATTR_OPSIZE;
1209480093f4SDimitry Andric         break;
1210480093f4SDimitry Andric       case VEX_PREFIX_F3:
1211480093f4SDimitry Andric         attrMask |= ATTR_XS;
1212480093f4SDimitry Andric         break;
1213480093f4SDimitry Andric       case VEX_PREFIX_F2:
1214480093f4SDimitry Andric         attrMask |= ATTR_XD;
1215480093f4SDimitry Andric         break;
1216480093f4SDimitry Andric       }
1217480093f4SDimitry Andric 
1218480093f4SDimitry Andric       if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1219480093f4SDimitry Andric         attrMask |= ATTR_EVEXKZ;
1220480093f4SDimitry Andric       if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1221480093f4SDimitry Andric         attrMask |= ATTR_EVEXB;
1222*0fca6ea1SDimitry Andric       if (isNF(insn) && !readModRM(insn) &&
1223*0fca6ea1SDimitry Andric           !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.
1224647cbc5dSDimitry Andric         attrMask |= ATTR_EVEXNF;
1225*0fca6ea1SDimitry Andric       // aaa is not used a opmask in MAP4
1226*0fca6ea1SDimitry Andric       else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) &&
1227*0fca6ea1SDimitry Andric                (insn->opcodeType != MAP4))
1228480093f4SDimitry Andric         attrMask |= ATTR_EVEXK;
1229480093f4SDimitry Andric       if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1230480093f4SDimitry Andric         attrMask |= ATTR_VEXL;
1231480093f4SDimitry Andric       if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1232480093f4SDimitry Andric         attrMask |= ATTR_EVEXL2;
1233480093f4SDimitry Andric     } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1234480093f4SDimitry Andric       switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1235480093f4SDimitry Andric       case VEX_PREFIX_66:
1236480093f4SDimitry Andric         attrMask |= ATTR_OPSIZE;
1237480093f4SDimitry Andric         break;
1238480093f4SDimitry Andric       case VEX_PREFIX_F3:
1239480093f4SDimitry Andric         attrMask |= ATTR_XS;
1240480093f4SDimitry Andric         break;
1241480093f4SDimitry Andric       case VEX_PREFIX_F2:
1242480093f4SDimitry Andric         attrMask |= ATTR_XD;
1243480093f4SDimitry Andric         break;
1244480093f4SDimitry Andric       }
1245480093f4SDimitry Andric 
1246480093f4SDimitry Andric       if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1247480093f4SDimitry Andric         attrMask |= ATTR_VEXL;
1248480093f4SDimitry Andric     } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1249480093f4SDimitry Andric       switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1250480093f4SDimitry Andric       case VEX_PREFIX_66:
1251480093f4SDimitry Andric         attrMask |= ATTR_OPSIZE;
1252fe6060f1SDimitry Andric         if (insn->hasAdSize)
1253fe6060f1SDimitry Andric           attrMask |= ATTR_ADSIZE;
1254480093f4SDimitry Andric         break;
1255480093f4SDimitry Andric       case VEX_PREFIX_F3:
1256480093f4SDimitry Andric         attrMask |= ATTR_XS;
1257480093f4SDimitry Andric         break;
1258480093f4SDimitry Andric       case VEX_PREFIX_F2:
1259480093f4SDimitry Andric         attrMask |= ATTR_XD;
1260480093f4SDimitry Andric         break;
1261480093f4SDimitry Andric       }
1262480093f4SDimitry Andric 
1263480093f4SDimitry Andric       if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1264480093f4SDimitry Andric         attrMask |= ATTR_VEXL;
1265480093f4SDimitry Andric     } else if (insn->vectorExtensionType == TYPE_XOP) {
1266480093f4SDimitry Andric       switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1267480093f4SDimitry Andric       case VEX_PREFIX_66:
1268480093f4SDimitry Andric         attrMask |= ATTR_OPSIZE;
1269480093f4SDimitry Andric         break;
1270480093f4SDimitry Andric       case VEX_PREFIX_F3:
1271480093f4SDimitry Andric         attrMask |= ATTR_XS;
1272480093f4SDimitry Andric         break;
1273480093f4SDimitry Andric       case VEX_PREFIX_F2:
1274480093f4SDimitry Andric         attrMask |= ATTR_XD;
1275480093f4SDimitry Andric         break;
1276480093f4SDimitry Andric       }
1277480093f4SDimitry Andric 
1278480093f4SDimitry Andric       if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1279480093f4SDimitry Andric         attrMask |= ATTR_VEXL;
1280480093f4SDimitry Andric     } else {
1281480093f4SDimitry Andric       return -1;
1282480093f4SDimitry Andric     }
1283480093f4SDimitry Andric   } else if (!insn->mandatoryPrefix) {
1284480093f4SDimitry Andric     // If we don't have mandatory prefix we should use legacy prefixes here
1285480093f4SDimitry Andric     if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1286480093f4SDimitry Andric       attrMask |= ATTR_OPSIZE;
1287480093f4SDimitry Andric     if (insn->hasAdSize)
1288480093f4SDimitry Andric       attrMask |= ATTR_ADSIZE;
1289480093f4SDimitry Andric     if (insn->opcodeType == ONEBYTE) {
1290480093f4SDimitry Andric       if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1291480093f4SDimitry Andric         // Special support for PAUSE
1292480093f4SDimitry Andric         attrMask |= ATTR_XS;
1293480093f4SDimitry Andric     } else {
1294480093f4SDimitry Andric       if (insn->repeatPrefix == 0xf2)
1295480093f4SDimitry Andric         attrMask |= ATTR_XD;
1296480093f4SDimitry Andric       else if (insn->repeatPrefix == 0xf3)
1297480093f4SDimitry Andric         attrMask |= ATTR_XS;
1298480093f4SDimitry Andric     }
1299480093f4SDimitry Andric   } else {
1300480093f4SDimitry Andric     switch (insn->mandatoryPrefix) {
1301480093f4SDimitry Andric     case 0xf2:
1302480093f4SDimitry Andric       attrMask |= ATTR_XD;
1303480093f4SDimitry Andric       break;
1304480093f4SDimitry Andric     case 0xf3:
1305480093f4SDimitry Andric       attrMask |= ATTR_XS;
1306480093f4SDimitry Andric       break;
1307480093f4SDimitry Andric     case 0x66:
1308480093f4SDimitry Andric       if (insn->mode != MODE_16BIT)
1309480093f4SDimitry Andric         attrMask |= ATTR_OPSIZE;
1310fe6060f1SDimitry Andric       if (insn->hasAdSize)
1311fe6060f1SDimitry Andric         attrMask |= ATTR_ADSIZE;
1312480093f4SDimitry Andric       break;
1313480093f4SDimitry Andric     case 0x67:
1314480093f4SDimitry Andric       attrMask |= ATTR_ADSIZE;
1315480093f4SDimitry Andric       break;
1316480093f4SDimitry Andric     }
1317480093f4SDimitry Andric   }
1318480093f4SDimitry Andric 
1319480093f4SDimitry Andric   if (insn->rexPrefix & 0x08) {
1320480093f4SDimitry Andric     attrMask |= ATTR_REXW;
1321480093f4SDimitry Andric     attrMask &= ~ATTR_ADSIZE;
1322480093f4SDimitry Andric   }
1323480093f4SDimitry Andric 
13245f757f3fSDimitry Andric   // Absolute jump and pushp/popp need special handling
13255f757f3fSDimitry Andric   if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE &&
13265f757f3fSDimitry Andric       (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))
13275f757f3fSDimitry Andric     attrMask |= ATTR_REX2;
13285f757f3fSDimitry Andric 
1329480093f4SDimitry Andric   if (insn->mode == MODE_16BIT) {
1330480093f4SDimitry Andric     // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1331480093f4SDimitry Andric     // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1332480093f4SDimitry Andric     if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1333480093f4SDimitry Andric       attrMask ^= ATTR_ADSIZE;
1334480093f4SDimitry Andric     // If we're in 16-bit mode and this is one of the relative jumps and opsize
1335480093f4SDimitry Andric     // prefix isn't present, we need to force the opsize attribute since the
1336480093f4SDimitry Andric     // prefix is inverted relative to 32-bit mode.
1337480093f4SDimitry Andric     if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1338480093f4SDimitry Andric         (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1339480093f4SDimitry Andric       attrMask |= ATTR_OPSIZE;
1340480093f4SDimitry Andric 
1341480093f4SDimitry Andric     if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1342480093f4SDimitry Andric         insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1343480093f4SDimitry Andric       attrMask |= ATTR_OPSIZE;
1344480093f4SDimitry Andric   }
1345480093f4SDimitry Andric 
1346480093f4SDimitry Andric 
1347480093f4SDimitry Andric   if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1348480093f4SDimitry Andric     return -1;
1349480093f4SDimitry Andric 
1350480093f4SDimitry Andric   // The following clauses compensate for limitations of the tables.
1351480093f4SDimitry Andric 
1352480093f4SDimitry Andric   if (insn->mode != MODE_64BIT &&
1353480093f4SDimitry Andric       insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1354480093f4SDimitry Andric     // The tables can't distinquish between cases where the W-bit is used to
1355480093f4SDimitry Andric     // select register size and cases where its a required part of the opcode.
1356480093f4SDimitry Andric     if ((insn->vectorExtensionType == TYPE_EVEX &&
1357480093f4SDimitry Andric          wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1358480093f4SDimitry Andric         (insn->vectorExtensionType == TYPE_VEX_3B &&
1359480093f4SDimitry Andric          wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1360480093f4SDimitry Andric         (insn->vectorExtensionType == TYPE_XOP &&
1361480093f4SDimitry Andric          wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1362480093f4SDimitry Andric 
1363480093f4SDimitry Andric       uint16_t instructionIDWithREXW;
1364480093f4SDimitry Andric       if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
1365480093f4SDimitry Andric                                        attrMask | ATTR_REXW)) {
1366480093f4SDimitry Andric         insn->instructionID = instructionID;
1367480093f4SDimitry Andric         insn->spec = &INSTRUCTIONS_SYM[instructionID];
1368480093f4SDimitry Andric         return 0;
1369480093f4SDimitry Andric       }
1370480093f4SDimitry Andric 
1371480093f4SDimitry Andric       auto SpecName = mii->getName(instructionIDWithREXW);
1372480093f4SDimitry Andric       // If not a 64-bit instruction. Switch the opcode.
1373480093f4SDimitry Andric       if (!is64Bit(SpecName.data())) {
1374480093f4SDimitry Andric         insn->instructionID = instructionIDWithREXW;
1375480093f4SDimitry Andric         insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1376480093f4SDimitry Andric         return 0;
1377480093f4SDimitry Andric       }
1378480093f4SDimitry Andric     }
1379480093f4SDimitry Andric   }
1380480093f4SDimitry Andric 
1381480093f4SDimitry Andric   // Absolute moves, umonitor, and movdir64b need special handling.
1382480093f4SDimitry Andric   // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1383480093f4SDimitry Andric   //  inverted w.r.t.
1384480093f4SDimitry Andric   // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1385480093f4SDimitry Andric   //  any position.
1386480093f4SDimitry Andric   if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1387480093f4SDimitry Andric       (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
13885f757f3fSDimitry Andric       (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) ||
13895f757f3fSDimitry Andric       (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) {
1390480093f4SDimitry Andric     // Make sure we observed the prefixes in any position.
1391480093f4SDimitry Andric     if (insn->hasAdSize)
1392480093f4SDimitry Andric       attrMask |= ATTR_ADSIZE;
1393480093f4SDimitry Andric     if (insn->hasOpSize)
1394480093f4SDimitry Andric       attrMask |= ATTR_OPSIZE;
1395480093f4SDimitry Andric 
1396480093f4SDimitry Andric     // In 16-bit, invert the attributes.
1397480093f4SDimitry Andric     if (insn->mode == MODE_16BIT) {
1398480093f4SDimitry Andric       attrMask ^= ATTR_ADSIZE;
1399480093f4SDimitry Andric 
1400480093f4SDimitry Andric       // The OpSize attribute is only valid with the absolute moves.
1401480093f4SDimitry Andric       if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1402480093f4SDimitry Andric         attrMask ^= ATTR_OPSIZE;
1403480093f4SDimitry Andric     }
1404480093f4SDimitry Andric 
1405480093f4SDimitry Andric     if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1406480093f4SDimitry Andric       return -1;
1407480093f4SDimitry Andric 
1408480093f4SDimitry Andric     insn->instructionID = instructionID;
1409480093f4SDimitry Andric     insn->spec = &INSTRUCTIONS_SYM[instructionID];
1410480093f4SDimitry Andric     return 0;
1411480093f4SDimitry Andric   }
1412480093f4SDimitry Andric 
1413480093f4SDimitry Andric   if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1414480093f4SDimitry Andric       !(attrMask & ATTR_OPSIZE)) {
1415480093f4SDimitry Andric     // The instruction tables make no distinction between instructions that
1416480093f4SDimitry Andric     // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1417480093f4SDimitry Andric     // particular spot (i.e., many MMX operations). In general we're
1418480093f4SDimitry Andric     // conservative, but in the specific case where OpSize is present but not in
1419480093f4SDimitry Andric     // the right place we check if there's a 16-bit operation.
1420480093f4SDimitry Andric     const struct InstructionSpecifier *spec;
1421480093f4SDimitry Andric     uint16_t instructionIDWithOpsize;
1422480093f4SDimitry Andric     llvm::StringRef specName, specWithOpSizeName;
1423480093f4SDimitry Andric 
1424480093f4SDimitry Andric     spec = &INSTRUCTIONS_SYM[instructionID];
1425480093f4SDimitry Andric 
1426480093f4SDimitry Andric     if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
1427480093f4SDimitry Andric                                      attrMask | ATTR_OPSIZE)) {
1428480093f4SDimitry Andric       // ModRM required with OpSize but not present. Give up and return the
1429480093f4SDimitry Andric       // version without OpSize set.
1430480093f4SDimitry Andric       insn->instructionID = instructionID;
1431480093f4SDimitry Andric       insn->spec = spec;
1432480093f4SDimitry Andric       return 0;
1433480093f4SDimitry Andric     }
1434480093f4SDimitry Andric 
1435480093f4SDimitry Andric     specName = mii->getName(instructionID);
1436480093f4SDimitry Andric     specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1437480093f4SDimitry Andric 
1438480093f4SDimitry Andric     if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1439480093f4SDimitry Andric         (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1440480093f4SDimitry Andric       insn->instructionID = instructionIDWithOpsize;
1441480093f4SDimitry Andric       insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1442480093f4SDimitry Andric     } else {
1443480093f4SDimitry Andric       insn->instructionID = instructionID;
1444480093f4SDimitry Andric       insn->spec = spec;
1445480093f4SDimitry Andric     }
1446480093f4SDimitry Andric     return 0;
1447480093f4SDimitry Andric   }
1448480093f4SDimitry Andric 
1449480093f4SDimitry Andric   if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1450480093f4SDimitry Andric       insn->rexPrefix & 0x01) {
1451480093f4SDimitry Andric     // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1452480093f4SDimitry Andric     // as XCHG %r8, %eax.
1453480093f4SDimitry Andric     const struct InstructionSpecifier *spec;
1454480093f4SDimitry Andric     uint16_t instructionIDWithNewOpcode;
1455480093f4SDimitry Andric     const struct InstructionSpecifier *specWithNewOpcode;
1456480093f4SDimitry Andric 
1457480093f4SDimitry Andric     spec = &INSTRUCTIONS_SYM[instructionID];
1458480093f4SDimitry Andric 
1459480093f4SDimitry Andric     // Borrow opcode from one of the other XCHGar opcodes
1460480093f4SDimitry Andric     insn->opcode = 0x91;
1461480093f4SDimitry Andric 
1462480093f4SDimitry Andric     if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
1463480093f4SDimitry Andric                                      attrMask)) {
1464480093f4SDimitry Andric       insn->opcode = 0x90;
1465480093f4SDimitry Andric 
1466480093f4SDimitry Andric       insn->instructionID = instructionID;
1467480093f4SDimitry Andric       insn->spec = spec;
1468480093f4SDimitry Andric       return 0;
1469480093f4SDimitry Andric     }
1470480093f4SDimitry Andric 
1471480093f4SDimitry Andric     specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1472480093f4SDimitry Andric 
1473480093f4SDimitry Andric     // Change back
1474480093f4SDimitry Andric     insn->opcode = 0x90;
1475480093f4SDimitry Andric 
1476480093f4SDimitry Andric     insn->instructionID = instructionIDWithNewOpcode;
1477480093f4SDimitry Andric     insn->spec = specWithNewOpcode;
1478480093f4SDimitry Andric 
1479480093f4SDimitry Andric     return 0;
1480480093f4SDimitry Andric   }
1481480093f4SDimitry Andric 
1482480093f4SDimitry Andric   insn->instructionID = instructionID;
1483480093f4SDimitry Andric   insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1484480093f4SDimitry Andric 
1485480093f4SDimitry Andric   return 0;
1486480093f4SDimitry Andric }
1487480093f4SDimitry Andric 
1488480093f4SDimitry Andric // Read an operand from the opcode field of an instruction and interprets it
1489480093f4SDimitry Andric // appropriately given the operand width. Handles AddRegFrm instructions.
1490480093f4SDimitry Andric //
1491480093f4SDimitry Andric // @param insn  - the instruction whose opcode field is to be read.
1492480093f4SDimitry Andric // @param size  - The width (in bytes) of the register being specified.
1493480093f4SDimitry Andric //                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1494480093f4SDimitry Andric //                RAX.
1495480093f4SDimitry Andric // @return      - 0 on success; nonzero otherwise.
readOpcodeRegister(struct InternalInstruction * insn,uint8_t size)1496480093f4SDimitry Andric static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1497480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1498480093f4SDimitry Andric 
1499480093f4SDimitry Andric   if (size == 0)
1500480093f4SDimitry Andric     size = insn->registerSize;
1501480093f4SDimitry Andric 
15025f757f3fSDimitry Andric   auto setOpcodeRegister = [&](unsigned base) {
15035f757f3fSDimitry Andric     insn->opcodeRegister =
15045f757f3fSDimitry Andric         (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |
15055f757f3fSDimitry Andric                       (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |
15065f757f3fSDimitry Andric                       (insn->opcode & 7)));
15075f757f3fSDimitry Andric   };
15085f757f3fSDimitry Andric 
1509480093f4SDimitry Andric   switch (size) {
1510480093f4SDimitry Andric   case 1:
15115f757f3fSDimitry Andric     setOpcodeRegister(MODRM_REG_AL);
1512480093f4SDimitry Andric     if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1513480093f4SDimitry Andric         insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1514480093f4SDimitry Andric       insn->opcodeRegister =
1515480093f4SDimitry Andric           (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1516480093f4SDimitry Andric     }
1517480093f4SDimitry Andric 
1518480093f4SDimitry Andric     break;
1519480093f4SDimitry Andric   case 2:
15205f757f3fSDimitry Andric     setOpcodeRegister(MODRM_REG_AX);
1521480093f4SDimitry Andric     break;
1522480093f4SDimitry Andric   case 4:
15235f757f3fSDimitry Andric     setOpcodeRegister(MODRM_REG_EAX);
1524480093f4SDimitry Andric     break;
1525480093f4SDimitry Andric   case 8:
15265f757f3fSDimitry Andric     setOpcodeRegister(MODRM_REG_RAX);
1527480093f4SDimitry Andric     break;
1528480093f4SDimitry Andric   }
1529480093f4SDimitry Andric 
1530480093f4SDimitry Andric   return 0;
1531480093f4SDimitry Andric }
1532480093f4SDimitry Andric 
1533480093f4SDimitry Andric // Consume an immediate operand from an instruction, given the desired operand
1534480093f4SDimitry Andric // size.
1535480093f4SDimitry Andric //
1536480093f4SDimitry Andric // @param insn  - The instruction whose operand is to be read.
1537480093f4SDimitry Andric // @param size  - The width (in bytes) of the operand.
1538480093f4SDimitry Andric // @return      - 0 if the immediate was successfully consumed; nonzero
1539480093f4SDimitry Andric //                otherwise.
readImmediate(struct InternalInstruction * insn,uint8_t size)1540480093f4SDimitry Andric static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1541480093f4SDimitry Andric   uint8_t imm8;
1542480093f4SDimitry Andric   uint16_t imm16;
1543480093f4SDimitry Andric   uint32_t imm32;
1544480093f4SDimitry Andric   uint64_t imm64;
1545480093f4SDimitry Andric 
1546480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readImmediate()");
1547480093f4SDimitry Andric 
1548480093f4SDimitry Andric   assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1549480093f4SDimitry Andric 
1550480093f4SDimitry Andric   insn->immediateSize = size;
1551480093f4SDimitry Andric   insn->immediateOffset = insn->readerCursor - insn->startLocation;
1552480093f4SDimitry Andric 
1553480093f4SDimitry Andric   switch (size) {
1554480093f4SDimitry Andric   case 1:
1555480093f4SDimitry Andric     if (consume(insn, imm8))
1556480093f4SDimitry Andric       return -1;
1557480093f4SDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm8;
1558480093f4SDimitry Andric     break;
1559480093f4SDimitry Andric   case 2:
1560480093f4SDimitry Andric     if (consume(insn, imm16))
1561480093f4SDimitry Andric       return -1;
1562480093f4SDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm16;
1563480093f4SDimitry Andric     break;
1564480093f4SDimitry Andric   case 4:
1565480093f4SDimitry Andric     if (consume(insn, imm32))
1566480093f4SDimitry Andric       return -1;
1567480093f4SDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm32;
1568480093f4SDimitry Andric     break;
1569480093f4SDimitry Andric   case 8:
1570480093f4SDimitry Andric     if (consume(insn, imm64))
1571480093f4SDimitry Andric       return -1;
1572480093f4SDimitry Andric     insn->immediates[insn->numImmediatesConsumed] = imm64;
1573480093f4SDimitry Andric     break;
1574480093f4SDimitry Andric   default:
1575480093f4SDimitry Andric     llvm_unreachable("invalid size");
1576480093f4SDimitry Andric   }
1577480093f4SDimitry Andric 
1578480093f4SDimitry Andric   insn->numImmediatesConsumed++;
1579480093f4SDimitry Andric 
1580480093f4SDimitry Andric   return 0;
1581480093f4SDimitry Andric }
1582480093f4SDimitry Andric 
1583480093f4SDimitry Andric // Consume vvvv from an instruction if it has a VEX prefix.
readVVVV(struct InternalInstruction * insn)1584480093f4SDimitry Andric static int readVVVV(struct InternalInstruction *insn) {
1585480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readVVVV()");
1586480093f4SDimitry Andric 
1587480093f4SDimitry Andric   int vvvv;
1588480093f4SDimitry Andric   if (insn->vectorExtensionType == TYPE_EVEX)
1589480093f4SDimitry Andric     vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1590480093f4SDimitry Andric             vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1591480093f4SDimitry Andric   else if (insn->vectorExtensionType == TYPE_VEX_3B)
1592480093f4SDimitry Andric     vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1593480093f4SDimitry Andric   else if (insn->vectorExtensionType == TYPE_VEX_2B)
1594480093f4SDimitry Andric     vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1595480093f4SDimitry Andric   else if (insn->vectorExtensionType == TYPE_XOP)
1596480093f4SDimitry Andric     vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1597480093f4SDimitry Andric   else
1598480093f4SDimitry Andric     return -1;
1599480093f4SDimitry Andric 
1600480093f4SDimitry Andric   if (insn->mode != MODE_64BIT)
1601480093f4SDimitry Andric     vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1602480093f4SDimitry Andric 
1603480093f4SDimitry Andric   insn->vvvv = static_cast<Reg>(vvvv);
1604480093f4SDimitry Andric   return 0;
1605480093f4SDimitry Andric }
1606480093f4SDimitry Andric 
1607480093f4SDimitry Andric // Read an mask register from the opcode field of an instruction.
1608480093f4SDimitry Andric //
1609480093f4SDimitry Andric // @param insn    - The instruction whose opcode field is to be read.
1610480093f4SDimitry Andric // @return        - 0 on success; nonzero otherwise.
readMaskRegister(struct InternalInstruction * insn)1611480093f4SDimitry Andric static int readMaskRegister(struct InternalInstruction *insn) {
1612480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readMaskRegister()");
1613480093f4SDimitry Andric 
1614480093f4SDimitry Andric   if (insn->vectorExtensionType != TYPE_EVEX)
1615480093f4SDimitry Andric     return -1;
1616480093f4SDimitry Andric 
1617480093f4SDimitry Andric   insn->writemask =
1618480093f4SDimitry Andric       static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1619480093f4SDimitry Andric   return 0;
1620480093f4SDimitry Andric }
1621480093f4SDimitry Andric 
1622480093f4SDimitry Andric // Consults the specifier for an instruction and consumes all
1623480093f4SDimitry Andric // operands for that instruction, interpreting them as it goes.
readOperands(struct InternalInstruction * insn)1624480093f4SDimitry Andric static int readOperands(struct InternalInstruction *insn) {
1625480093f4SDimitry Andric   int hasVVVV, needVVVV;
1626480093f4SDimitry Andric   int sawRegImm = 0;
1627480093f4SDimitry Andric 
1628480093f4SDimitry Andric   LLVM_DEBUG(dbgs() << "readOperands()");
1629480093f4SDimitry Andric 
1630480093f4SDimitry Andric   // If non-zero vvvv specified, make sure one of the operands uses it.
1631480093f4SDimitry Andric   hasVVVV = !readVVVV(insn);
1632480093f4SDimitry Andric   needVVVV = hasVVVV && (insn->vvvv != 0);
1633480093f4SDimitry Andric 
1634480093f4SDimitry Andric   for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1635480093f4SDimitry Andric     switch (Op.encoding) {
1636480093f4SDimitry Andric     case ENCODING_NONE:
1637480093f4SDimitry Andric     case ENCODING_SI:
1638480093f4SDimitry Andric     case ENCODING_DI:
1639480093f4SDimitry Andric       break;
1640480093f4SDimitry Andric     CASE_ENCODING_VSIB:
1641480093f4SDimitry Andric       // VSIB can use the V2 bit so check only the other bits.
1642480093f4SDimitry Andric       if (needVVVV)
1643480093f4SDimitry Andric         needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1644480093f4SDimitry Andric       if (readModRM(insn))
1645480093f4SDimitry Andric         return -1;
1646480093f4SDimitry Andric 
1647480093f4SDimitry Andric       // Reject if SIB wasn't used.
1648480093f4SDimitry Andric       if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1649480093f4SDimitry Andric         return -1;
1650480093f4SDimitry Andric 
1651480093f4SDimitry Andric       // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1652480093f4SDimitry Andric       if (insn->sibIndex == SIB_INDEX_NONE)
1653480093f4SDimitry Andric         insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1654480093f4SDimitry Andric 
1655480093f4SDimitry Andric       // If EVEX.v2 is set this is one of the 16-31 registers.
1656480093f4SDimitry Andric       if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1657480093f4SDimitry Andric           v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1658480093f4SDimitry Andric         insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1659480093f4SDimitry Andric 
1660480093f4SDimitry Andric       // Adjust the index register to the correct size.
1661480093f4SDimitry Andric       switch ((OperandType)Op.type) {
1662480093f4SDimitry Andric       default:
1663480093f4SDimitry Andric         debug("Unhandled VSIB index type");
1664480093f4SDimitry Andric         return -1;
1665480093f4SDimitry Andric       case TYPE_MVSIBX:
1666480093f4SDimitry Andric         insn->sibIndex =
1667480093f4SDimitry Andric             (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1668480093f4SDimitry Andric         break;
1669480093f4SDimitry Andric       case TYPE_MVSIBY:
1670480093f4SDimitry Andric         insn->sibIndex =
1671480093f4SDimitry Andric             (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1672480093f4SDimitry Andric         break;
1673480093f4SDimitry Andric       case TYPE_MVSIBZ:
1674480093f4SDimitry Andric         insn->sibIndex =
1675480093f4SDimitry Andric             (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1676480093f4SDimitry Andric         break;
1677480093f4SDimitry Andric       }
1678480093f4SDimitry Andric 
1679480093f4SDimitry Andric       // Apply the AVX512 compressed displacement scaling factor.
1680480093f4SDimitry Andric       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1681480093f4SDimitry Andric         insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1682480093f4SDimitry Andric       break;
16835ffd83dbSDimitry Andric     case ENCODING_SIB:
16845ffd83dbSDimitry Andric       // Reject if SIB wasn't used.
16855ffd83dbSDimitry Andric       if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
16865ffd83dbSDimitry Andric         return -1;
16875ffd83dbSDimitry Andric       if (readModRM(insn))
16885ffd83dbSDimitry Andric         return -1;
16895ffd83dbSDimitry Andric       if (fixupReg(insn, &Op))
16905ffd83dbSDimitry Andric         return -1;
16915ffd83dbSDimitry Andric       break;
1692480093f4SDimitry Andric     case ENCODING_REG:
1693480093f4SDimitry Andric     CASE_ENCODING_RM:
1694480093f4SDimitry Andric       if (readModRM(insn))
1695480093f4SDimitry Andric         return -1;
1696480093f4SDimitry Andric       if (fixupReg(insn, &Op))
1697480093f4SDimitry Andric         return -1;
1698480093f4SDimitry Andric       // Apply the AVX512 compressed displacement scaling factor.
1699480093f4SDimitry Andric       if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1700480093f4SDimitry Andric         insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1701480093f4SDimitry Andric       break;
1702480093f4SDimitry Andric     case ENCODING_IB:
1703480093f4SDimitry Andric       if (sawRegImm) {
1704480093f4SDimitry Andric         // Saw a register immediate so don't read again and instead split the
1705480093f4SDimitry Andric         // previous immediate. FIXME: This is a hack.
1706480093f4SDimitry Andric         insn->immediates[insn->numImmediatesConsumed] =
1707480093f4SDimitry Andric             insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1708480093f4SDimitry Andric         ++insn->numImmediatesConsumed;
1709480093f4SDimitry Andric         break;
1710480093f4SDimitry Andric       }
1711480093f4SDimitry Andric       if (readImmediate(insn, 1))
1712480093f4SDimitry Andric         return -1;
1713480093f4SDimitry Andric       if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1714480093f4SDimitry Andric         sawRegImm = 1;
1715480093f4SDimitry Andric       break;
1716480093f4SDimitry Andric     case ENCODING_IW:
1717480093f4SDimitry Andric       if (readImmediate(insn, 2))
1718480093f4SDimitry Andric         return -1;
1719480093f4SDimitry Andric       break;
1720480093f4SDimitry Andric     case ENCODING_ID:
1721480093f4SDimitry Andric       if (readImmediate(insn, 4))
1722480093f4SDimitry Andric         return -1;
1723480093f4SDimitry Andric       break;
1724480093f4SDimitry Andric     case ENCODING_IO:
1725480093f4SDimitry Andric       if (readImmediate(insn, 8))
1726480093f4SDimitry Andric         return -1;
1727480093f4SDimitry Andric       break;
1728480093f4SDimitry Andric     case ENCODING_Iv:
1729480093f4SDimitry Andric       if (readImmediate(insn, insn->immediateSize))
1730480093f4SDimitry Andric         return -1;
1731480093f4SDimitry Andric       break;
1732480093f4SDimitry Andric     case ENCODING_Ia:
1733480093f4SDimitry Andric       if (readImmediate(insn, insn->addressSize))
1734480093f4SDimitry Andric         return -1;
1735480093f4SDimitry Andric       break;
1736480093f4SDimitry Andric     case ENCODING_IRC:
1737480093f4SDimitry Andric       insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1738480093f4SDimitry Andric                  lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1739480093f4SDimitry Andric       break;
1740480093f4SDimitry Andric     case ENCODING_RB:
1741480093f4SDimitry Andric       if (readOpcodeRegister(insn, 1))
1742480093f4SDimitry Andric         return -1;
1743480093f4SDimitry Andric       break;
1744480093f4SDimitry Andric     case ENCODING_RW:
1745480093f4SDimitry Andric       if (readOpcodeRegister(insn, 2))
1746480093f4SDimitry Andric         return -1;
1747480093f4SDimitry Andric       break;
1748480093f4SDimitry Andric     case ENCODING_RD:
1749480093f4SDimitry Andric       if (readOpcodeRegister(insn, 4))
1750480093f4SDimitry Andric         return -1;
1751480093f4SDimitry Andric       break;
1752480093f4SDimitry Andric     case ENCODING_RO:
1753480093f4SDimitry Andric       if (readOpcodeRegister(insn, 8))
1754480093f4SDimitry Andric         return -1;
1755480093f4SDimitry Andric       break;
1756480093f4SDimitry Andric     case ENCODING_Rv:
1757480093f4SDimitry Andric       if (readOpcodeRegister(insn, 0))
1758480093f4SDimitry Andric         return -1;
1759480093f4SDimitry Andric       break;
1760*0fca6ea1SDimitry Andric     case ENCODING_CF:
1761*0fca6ea1SDimitry Andric       insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]);
1762*0fca6ea1SDimitry Andric       needVVVV = false; // oszc shares the same bits with VVVV
1763*0fca6ea1SDimitry Andric       break;
1764480093f4SDimitry Andric     case ENCODING_CC:
1765*0fca6ea1SDimitry Andric       if (isCCMPOrCTEST(insn))
1766*0fca6ea1SDimitry Andric         insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1767*0fca6ea1SDimitry Andric       else
1768480093f4SDimitry Andric         insn->immediates[1] = insn->opcode & 0xf;
1769480093f4SDimitry Andric       break;
1770480093f4SDimitry Andric     case ENCODING_FP:
1771480093f4SDimitry Andric       break;
1772480093f4SDimitry Andric     case ENCODING_VVVV:
1773480093f4SDimitry Andric       needVVVV = 0; // Mark that we have found a VVVV operand.
1774480093f4SDimitry Andric       if (!hasVVVV)
1775480093f4SDimitry Andric         return -1;
1776480093f4SDimitry Andric       if (insn->mode != MODE_64BIT)
1777480093f4SDimitry Andric         insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1778480093f4SDimitry Andric       if (fixupReg(insn, &Op))
1779480093f4SDimitry Andric         return -1;
1780480093f4SDimitry Andric       break;
1781480093f4SDimitry Andric     case ENCODING_WRITEMASK:
1782480093f4SDimitry Andric       if (readMaskRegister(insn))
1783480093f4SDimitry Andric         return -1;
1784480093f4SDimitry Andric       break;
1785480093f4SDimitry Andric     case ENCODING_DUP:
1786480093f4SDimitry Andric       break;
1787480093f4SDimitry Andric     default:
1788480093f4SDimitry Andric       LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1789480093f4SDimitry Andric       return -1;
1790480093f4SDimitry Andric     }
1791480093f4SDimitry Andric   }
1792480093f4SDimitry Andric 
1793480093f4SDimitry Andric   // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1794480093f4SDimitry Andric   if (needVVVV)
1795480093f4SDimitry Andric     return -1;
1796480093f4SDimitry Andric 
1797480093f4SDimitry Andric   return 0;
1798480093f4SDimitry Andric }
17990b57cec5SDimitry Andric 
18000b57cec5SDimitry Andric namespace llvm {
18010b57cec5SDimitry Andric 
18020b57cec5SDimitry Andric // Fill-ins to make the compiler happy. These constants are never actually
18030b57cec5SDimitry Andric // assigned; they are just filler to make an automatically-generated switch
18040b57cec5SDimitry Andric // statement work.
18050b57cec5SDimitry Andric namespace X86 {
18060b57cec5SDimitry Andric   enum {
18070b57cec5SDimitry Andric     BX_SI = 500,
18080b57cec5SDimitry Andric     BX_DI = 501,
18090b57cec5SDimitry Andric     BP_SI = 502,
18100b57cec5SDimitry Andric     BP_DI = 503,
18110b57cec5SDimitry Andric     sib   = 504,
18120b57cec5SDimitry Andric     sib64 = 505
18130b57cec5SDimitry Andric   };
1814e8d8bef9SDimitry Andric } // namespace X86
18150b57cec5SDimitry Andric 
1816e8d8bef9SDimitry Andric } // namespace llvm
18170b57cec5SDimitry Andric 
18180b57cec5SDimitry Andric static bool translateInstruction(MCInst &target,
18190b57cec5SDimitry Andric                                 InternalInstruction &source,
18200b57cec5SDimitry Andric                                 const MCDisassembler *Dis);
18210b57cec5SDimitry Andric 
18220b57cec5SDimitry Andric namespace {
18230b57cec5SDimitry Andric 
18240b57cec5SDimitry Andric /// Generic disassembler for all X86 platforms. All each platform class should
18250b57cec5SDimitry Andric /// have to do is subclass the constructor, and provide a different
18260b57cec5SDimitry Andric /// disassemblerMode value.
18270b57cec5SDimitry Andric class X86GenericDisassembler : public MCDisassembler {
18280b57cec5SDimitry Andric   std::unique_ptr<const MCInstrInfo> MII;
18290b57cec5SDimitry Andric public:
18300b57cec5SDimitry Andric   X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
18310b57cec5SDimitry Andric                          std::unique_ptr<const MCInstrInfo> MII);
18320b57cec5SDimitry Andric public:
18330b57cec5SDimitry Andric   DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
18340b57cec5SDimitry Andric                               ArrayRef<uint8_t> Bytes, uint64_t Address,
18350b57cec5SDimitry Andric                               raw_ostream &cStream) const override;
18360b57cec5SDimitry Andric 
18370b57cec5SDimitry Andric private:
18380b57cec5SDimitry Andric   DisassemblerMode              fMode;
18390b57cec5SDimitry Andric };
18400b57cec5SDimitry Andric 
1841e8d8bef9SDimitry Andric } // namespace
18420b57cec5SDimitry Andric 
X86GenericDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,std::unique_ptr<const MCInstrInfo> MII)18430b57cec5SDimitry Andric X86GenericDisassembler::X86GenericDisassembler(
18440b57cec5SDimitry Andric                                          const MCSubtargetInfo &STI,
18450b57cec5SDimitry Andric                                          MCContext &Ctx,
18460b57cec5SDimitry Andric                                          std::unique_ptr<const MCInstrInfo> MII)
18470b57cec5SDimitry Andric   : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
18480b57cec5SDimitry Andric   const FeatureBitset &FB = STI.getFeatureBits();
184981ad6265SDimitry Andric   if (FB[X86::Is16Bit]) {
18500b57cec5SDimitry Andric     fMode = MODE_16BIT;
18510b57cec5SDimitry Andric     return;
185281ad6265SDimitry Andric   } else if (FB[X86::Is32Bit]) {
18530b57cec5SDimitry Andric     fMode = MODE_32BIT;
18540b57cec5SDimitry Andric     return;
185581ad6265SDimitry Andric   } else if (FB[X86::Is64Bit]) {
18560b57cec5SDimitry Andric     fMode = MODE_64BIT;
18570b57cec5SDimitry Andric     return;
18580b57cec5SDimitry Andric   }
18590b57cec5SDimitry Andric 
18600b57cec5SDimitry Andric   llvm_unreachable("Invalid CPU mode");
18610b57cec5SDimitry Andric }
18620b57cec5SDimitry Andric 
getInstruction(MCInst & Instr,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address,raw_ostream & CStream) const18630b57cec5SDimitry Andric MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
18640b57cec5SDimitry Andric     MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1865480093f4SDimitry Andric     raw_ostream &CStream) const {
18660b57cec5SDimitry Andric   CommentStream = &CStream;
18670b57cec5SDimitry Andric 
1868480093f4SDimitry Andric   InternalInstruction Insn;
1869480093f4SDimitry Andric   memset(&Insn, 0, sizeof(InternalInstruction));
1870480093f4SDimitry Andric   Insn.bytes = Bytes;
1871480093f4SDimitry Andric   Insn.startLocation = Address;
1872480093f4SDimitry Andric   Insn.readerCursor = Address;
1873480093f4SDimitry Andric   Insn.mode = fMode;
18740b57cec5SDimitry Andric 
1875480093f4SDimitry Andric   if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
1876480093f4SDimitry Andric       getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
1877480093f4SDimitry Andric       readOperands(&Insn)) {
1878480093f4SDimitry Andric     Size = Insn.readerCursor - Address;
18790b57cec5SDimitry Andric     return Fail;
1880480093f4SDimitry Andric   }
1881480093f4SDimitry Andric 
1882480093f4SDimitry Andric   Insn.operands = x86OperandSets[Insn.spec->operands];
1883480093f4SDimitry Andric   Insn.length = Insn.readerCursor - Insn.startLocation;
1884480093f4SDimitry Andric   Size = Insn.length;
1885480093f4SDimitry Andric   if (Size > 15)
1886480093f4SDimitry Andric     LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1887480093f4SDimitry Andric 
1888480093f4SDimitry Andric   bool Ret = translateInstruction(Instr, Insn, this);
18890b57cec5SDimitry Andric   if (!Ret) {
18900b57cec5SDimitry Andric     unsigned Flags = X86::IP_NO_PREFIX;
1891480093f4SDimitry Andric     if (Insn.hasAdSize)
18920b57cec5SDimitry Andric       Flags |= X86::IP_HAS_AD_SIZE;
1893480093f4SDimitry Andric     if (!Insn.mandatoryPrefix) {
1894480093f4SDimitry Andric       if (Insn.hasOpSize)
18950b57cec5SDimitry Andric         Flags |= X86::IP_HAS_OP_SIZE;
1896480093f4SDimitry Andric       if (Insn.repeatPrefix == 0xf2)
18970b57cec5SDimitry Andric         Flags |= X86::IP_HAS_REPEAT_NE;
1898480093f4SDimitry Andric       else if (Insn.repeatPrefix == 0xf3 &&
18990b57cec5SDimitry Andric                // It should not be 'pause' f3 90
1900480093f4SDimitry Andric                Insn.opcode != 0x90)
19010b57cec5SDimitry Andric         Flags |= X86::IP_HAS_REPEAT;
1902480093f4SDimitry Andric       if (Insn.hasLockPrefix)
19030b57cec5SDimitry Andric         Flags |= X86::IP_HAS_LOCK;
19040b57cec5SDimitry Andric     }
19050b57cec5SDimitry Andric     Instr.setFlags(Flags);
19060b57cec5SDimitry Andric   }
19070b57cec5SDimitry Andric   return (!Ret) ? Success : Fail;
19080b57cec5SDimitry Andric }
19090b57cec5SDimitry Andric 
19100b57cec5SDimitry Andric //
19110b57cec5SDimitry Andric // Private code that translates from struct InternalInstructions to MCInsts.
19120b57cec5SDimitry Andric //
19130b57cec5SDimitry Andric 
19140b57cec5SDimitry Andric /// translateRegister - Translates an internal register to the appropriate LLVM
19150b57cec5SDimitry Andric ///   register, and appends it as an operand to an MCInst.
19160b57cec5SDimitry Andric ///
19170b57cec5SDimitry Andric /// @param mcInst     - The MCInst to append to.
19180b57cec5SDimitry Andric /// @param reg        - The Reg to append.
translateRegister(MCInst & mcInst,Reg reg)19190b57cec5SDimitry Andric static void translateRegister(MCInst &mcInst, Reg reg) {
19200b57cec5SDimitry Andric #define ENTRY(x) X86::x,
19210b57cec5SDimitry Andric   static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
19220b57cec5SDimitry Andric #undef ENTRY
19230b57cec5SDimitry Andric 
19240b57cec5SDimitry Andric   MCPhysReg llvmRegnum = llvmRegnums[reg];
19250b57cec5SDimitry Andric   mcInst.addOperand(MCOperand::createReg(llvmRegnum));
19260b57cec5SDimitry Andric }
19270b57cec5SDimitry Andric 
19280b57cec5SDimitry Andric static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
19290b57cec5SDimitry Andric   0,        // SEG_OVERRIDE_NONE
19300b57cec5SDimitry Andric   X86::CS,
19310b57cec5SDimitry Andric   X86::SS,
19320b57cec5SDimitry Andric   X86::DS,
19330b57cec5SDimitry Andric   X86::ES,
19340b57cec5SDimitry Andric   X86::FS,
19350b57cec5SDimitry Andric   X86::GS
19360b57cec5SDimitry Andric };
19370b57cec5SDimitry Andric 
19380b57cec5SDimitry Andric /// translateSrcIndex   - Appends a source index operand to an MCInst.
19390b57cec5SDimitry Andric ///
19400b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
19410b57cec5SDimitry Andric /// @param insn         - The internal instruction.
translateSrcIndex(MCInst & mcInst,InternalInstruction & insn)19420b57cec5SDimitry Andric static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
19430b57cec5SDimitry Andric   unsigned baseRegNo;
19440b57cec5SDimitry Andric 
19450b57cec5SDimitry Andric   if (insn.mode == MODE_64BIT)
19460b57cec5SDimitry Andric     baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
19470b57cec5SDimitry Andric   else if (insn.mode == MODE_32BIT)
19480b57cec5SDimitry Andric     baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
19490b57cec5SDimitry Andric   else {
19500b57cec5SDimitry Andric     assert(insn.mode == MODE_16BIT);
19510b57cec5SDimitry Andric     baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
19520b57cec5SDimitry Andric   }
19530b57cec5SDimitry Andric   MCOperand baseReg = MCOperand::createReg(baseRegNo);
19540b57cec5SDimitry Andric   mcInst.addOperand(baseReg);
19550b57cec5SDimitry Andric 
19560b57cec5SDimitry Andric   MCOperand segmentReg;
19570b57cec5SDimitry Andric   segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
19580b57cec5SDimitry Andric   mcInst.addOperand(segmentReg);
19590b57cec5SDimitry Andric   return false;
19600b57cec5SDimitry Andric }
19610b57cec5SDimitry Andric 
19620b57cec5SDimitry Andric /// translateDstIndex   - Appends a destination index operand to an MCInst.
19630b57cec5SDimitry Andric ///
19640b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
19650b57cec5SDimitry Andric /// @param insn         - The internal instruction.
19660b57cec5SDimitry Andric 
translateDstIndex(MCInst & mcInst,InternalInstruction & insn)19670b57cec5SDimitry Andric static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
19680b57cec5SDimitry Andric   unsigned baseRegNo;
19690b57cec5SDimitry Andric 
19700b57cec5SDimitry Andric   if (insn.mode == MODE_64BIT)
19710b57cec5SDimitry Andric     baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
19720b57cec5SDimitry Andric   else if (insn.mode == MODE_32BIT)
19730b57cec5SDimitry Andric     baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
19740b57cec5SDimitry Andric   else {
19750b57cec5SDimitry Andric     assert(insn.mode == MODE_16BIT);
19760b57cec5SDimitry Andric     baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
19770b57cec5SDimitry Andric   }
19780b57cec5SDimitry Andric   MCOperand baseReg = MCOperand::createReg(baseRegNo);
19790b57cec5SDimitry Andric   mcInst.addOperand(baseReg);
19800b57cec5SDimitry Andric   return false;
19810b57cec5SDimitry Andric }
19820b57cec5SDimitry Andric 
19830b57cec5SDimitry Andric /// translateImmediate  - Appends an immediate operand to an MCInst.
19840b57cec5SDimitry Andric ///
19850b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
19860b57cec5SDimitry Andric /// @param immediate    - The immediate value to append.
19870b57cec5SDimitry Andric /// @param operand      - The operand, as stored in the descriptor table.
19880b57cec5SDimitry Andric /// @param insn         - The internal instruction.
translateImmediate(MCInst & mcInst,uint64_t immediate,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)19890b57cec5SDimitry Andric static void translateImmediate(MCInst &mcInst, uint64_t immediate,
19900b57cec5SDimitry Andric                                const OperandSpecifier &operand,
19910b57cec5SDimitry Andric                                InternalInstruction &insn,
19920b57cec5SDimitry Andric                                const MCDisassembler *Dis) {
19930b57cec5SDimitry Andric   // Sign-extend the immediate if necessary.
19940b57cec5SDimitry Andric 
19950b57cec5SDimitry Andric   OperandType type = (OperandType)operand.type;
19960b57cec5SDimitry Andric 
19970b57cec5SDimitry Andric   bool isBranch = false;
19980b57cec5SDimitry Andric   uint64_t pcrel = 0;
19990b57cec5SDimitry Andric   if (type == TYPE_REL) {
20000b57cec5SDimitry Andric     isBranch = true;
200181ad6265SDimitry Andric     pcrel = insn.startLocation + insn.length;
20020b57cec5SDimitry Andric     switch (operand.encoding) {
20030b57cec5SDimitry Andric     default:
20040b57cec5SDimitry Andric       break;
20050b57cec5SDimitry Andric     case ENCODING_Iv:
20060b57cec5SDimitry Andric       switch (insn.displacementSize) {
20070b57cec5SDimitry Andric       default:
20080b57cec5SDimitry Andric         break;
20090b57cec5SDimitry Andric       case 1:
20100b57cec5SDimitry Andric         if(immediate & 0x80)
20110b57cec5SDimitry Andric           immediate |= ~(0xffull);
20120b57cec5SDimitry Andric         break;
20130b57cec5SDimitry Andric       case 2:
20140b57cec5SDimitry Andric         if(immediate & 0x8000)
20150b57cec5SDimitry Andric           immediate |= ~(0xffffull);
20160b57cec5SDimitry Andric         break;
20170b57cec5SDimitry Andric       case 4:
20180b57cec5SDimitry Andric         if(immediate & 0x80000000)
20190b57cec5SDimitry Andric           immediate |= ~(0xffffffffull);
20200b57cec5SDimitry Andric         break;
20210b57cec5SDimitry Andric       case 8:
20220b57cec5SDimitry Andric         break;
20230b57cec5SDimitry Andric       }
20240b57cec5SDimitry Andric       break;
20250b57cec5SDimitry Andric     case ENCODING_IB:
20260b57cec5SDimitry Andric       if(immediate & 0x80)
20270b57cec5SDimitry Andric         immediate |= ~(0xffull);
20280b57cec5SDimitry Andric       break;
20290b57cec5SDimitry Andric     case ENCODING_IW:
20300b57cec5SDimitry Andric       if(immediate & 0x8000)
20310b57cec5SDimitry Andric         immediate |= ~(0xffffull);
20320b57cec5SDimitry Andric       break;
20330b57cec5SDimitry Andric     case ENCODING_ID:
20340b57cec5SDimitry Andric       if(immediate & 0x80000000)
20350b57cec5SDimitry Andric         immediate |= ~(0xffffffffull);
20360b57cec5SDimitry Andric       break;
20370b57cec5SDimitry Andric     }
20380b57cec5SDimitry Andric   }
20390b57cec5SDimitry Andric   // By default sign-extend all X86 immediates based on their encoding.
20400b57cec5SDimitry Andric   else if (type == TYPE_IMM) {
20410b57cec5SDimitry Andric     switch (operand.encoding) {
20420b57cec5SDimitry Andric     default:
20430b57cec5SDimitry Andric       break;
20440b57cec5SDimitry Andric     case ENCODING_IB:
20450b57cec5SDimitry Andric       if(immediate & 0x80)
20460b57cec5SDimitry Andric         immediate |= ~(0xffull);
20470b57cec5SDimitry Andric       break;
20480b57cec5SDimitry Andric     case ENCODING_IW:
20490b57cec5SDimitry Andric       if(immediate & 0x8000)
20500b57cec5SDimitry Andric         immediate |= ~(0xffffull);
20510b57cec5SDimitry Andric       break;
20520b57cec5SDimitry Andric     case ENCODING_ID:
20530b57cec5SDimitry Andric       if(immediate & 0x80000000)
20540b57cec5SDimitry Andric         immediate |= ~(0xffffffffull);
20550b57cec5SDimitry Andric       break;
20560b57cec5SDimitry Andric     case ENCODING_IO:
20570b57cec5SDimitry Andric       break;
20580b57cec5SDimitry Andric     }
20590b57cec5SDimitry Andric   }
20600b57cec5SDimitry Andric 
20610b57cec5SDimitry Andric   switch (type) {
20620b57cec5SDimitry Andric   case TYPE_XMM:
20630b57cec5SDimitry Andric     mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
20640b57cec5SDimitry Andric     return;
20650b57cec5SDimitry Andric   case TYPE_YMM:
20660b57cec5SDimitry Andric     mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
20670b57cec5SDimitry Andric     return;
20680b57cec5SDimitry Andric   case TYPE_ZMM:
20690b57cec5SDimitry Andric     mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
20700b57cec5SDimitry Andric     return;
20710b57cec5SDimitry Andric   default:
20720b57cec5SDimitry Andric     // operand is 64 bits wide.  Do nothing.
20730b57cec5SDimitry Andric     break;
20740b57cec5SDimitry Andric   }
20750b57cec5SDimitry Andric 
207681ad6265SDimitry Andric   if (!Dis->tryAddingSymbolicOperand(
207781ad6265SDimitry Andric           mcInst, immediate + pcrel, insn.startLocation, isBranch,
207881ad6265SDimitry Andric           insn.immediateOffset, insn.immediateSize, insn.length))
20790b57cec5SDimitry Andric     mcInst.addOperand(MCOperand::createImm(immediate));
20800b57cec5SDimitry Andric 
20810b57cec5SDimitry Andric   if (type == TYPE_MOFFS) {
20820b57cec5SDimitry Andric     MCOperand segmentReg;
20830b57cec5SDimitry Andric     segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
20840b57cec5SDimitry Andric     mcInst.addOperand(segmentReg);
20850b57cec5SDimitry Andric   }
20860b57cec5SDimitry Andric }
20870b57cec5SDimitry Andric 
20880b57cec5SDimitry Andric /// translateRMRegister - Translates a register stored in the R/M field of the
20890b57cec5SDimitry Andric ///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
20900b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
20910b57cec5SDimitry Andric /// @param insn         - The internal instruction to extract the R/M field
20920b57cec5SDimitry Andric ///                       from.
20930b57cec5SDimitry Andric /// @return             - 0 on success; -1 otherwise
translateRMRegister(MCInst & mcInst,InternalInstruction & insn)20940b57cec5SDimitry Andric static bool translateRMRegister(MCInst &mcInst,
20950b57cec5SDimitry Andric                                 InternalInstruction &insn) {
20960b57cec5SDimitry Andric   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
20970b57cec5SDimitry Andric     debug("A R/M register operand may not have a SIB byte");
20980b57cec5SDimitry Andric     return true;
20990b57cec5SDimitry Andric   }
21000b57cec5SDimitry Andric 
21010b57cec5SDimitry Andric   switch (insn.eaBase) {
21020b57cec5SDimitry Andric   default:
21030b57cec5SDimitry Andric     debug("Unexpected EA base register");
21040b57cec5SDimitry Andric     return true;
21050b57cec5SDimitry Andric   case EA_BASE_NONE:
21060b57cec5SDimitry Andric     debug("EA_BASE_NONE for ModR/M base");
21070b57cec5SDimitry Andric     return true;
21080b57cec5SDimitry Andric #define ENTRY(x) case EA_BASE_##x:
21090b57cec5SDimitry Andric   ALL_EA_BASES
21100b57cec5SDimitry Andric #undef ENTRY
21110b57cec5SDimitry Andric     debug("A R/M register operand may not have a base; "
21120b57cec5SDimitry Andric           "the operand must be a register.");
21130b57cec5SDimitry Andric     return true;
21140b57cec5SDimitry Andric #define ENTRY(x)                                                      \
21150b57cec5SDimitry Andric   case EA_REG_##x:                                                    \
21160b57cec5SDimitry Andric     mcInst.addOperand(MCOperand::createReg(X86::x)); break;
21170b57cec5SDimitry Andric   ALL_REGS
21180b57cec5SDimitry Andric #undef ENTRY
21190b57cec5SDimitry Andric   }
21200b57cec5SDimitry Andric 
21210b57cec5SDimitry Andric   return false;
21220b57cec5SDimitry Andric }
21230b57cec5SDimitry Andric 
21240b57cec5SDimitry Andric /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
21250b57cec5SDimitry Andric ///   fields of an internal instruction (and possibly its SIB byte) to a memory
21260b57cec5SDimitry Andric ///   operand in LLVM's format, and appends it to an MCInst.
21270b57cec5SDimitry Andric ///
21280b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
21290b57cec5SDimitry Andric /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
21300b57cec5SDimitry Andric ///                       from.
21315ffd83dbSDimitry Andric /// @param ForceSIB     - The instruction must use SIB.
21320b57cec5SDimitry Andric /// @return             - 0 on success; nonzero otherwise
translateRMMemory(MCInst & mcInst,InternalInstruction & insn,const MCDisassembler * Dis,bool ForceSIB=false)21330b57cec5SDimitry Andric static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
21345ffd83dbSDimitry Andric                               const MCDisassembler *Dis,
21355ffd83dbSDimitry Andric                               bool ForceSIB = false) {
21360b57cec5SDimitry Andric   // Addresses in an MCInst are represented as five operands:
21370b57cec5SDimitry Andric   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
21380b57cec5SDimitry Andric   //                                SIB base
21390b57cec5SDimitry Andric   //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
21400b57cec5SDimitry Andric   //                                scale amount
21410b57cec5SDimitry Andric   //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
21420b57cec5SDimitry Andric   //                                the index (which is multiplied by the
21430b57cec5SDimitry Andric   //                                scale amount)
21440b57cec5SDimitry Andric   //   4. displacement  (immediate) 0, or the displacement if there is one
21450b57cec5SDimitry Andric   //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
21460b57cec5SDimitry Andric   //                                if we have segment overrides
21470b57cec5SDimitry Andric 
21480b57cec5SDimitry Andric   MCOperand baseReg;
21490b57cec5SDimitry Andric   MCOperand scaleAmount;
21500b57cec5SDimitry Andric   MCOperand indexReg;
21510b57cec5SDimitry Andric   MCOperand displacement;
21520b57cec5SDimitry Andric   MCOperand segmentReg;
21530b57cec5SDimitry Andric   uint64_t pcrel = 0;
21540b57cec5SDimitry Andric 
21550b57cec5SDimitry Andric   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
21560b57cec5SDimitry Andric     if (insn.sibBase != SIB_BASE_NONE) {
21570b57cec5SDimitry Andric       switch (insn.sibBase) {
21580b57cec5SDimitry Andric       default:
21590b57cec5SDimitry Andric         debug("Unexpected sibBase");
21600b57cec5SDimitry Andric         return true;
21610b57cec5SDimitry Andric #define ENTRY(x)                                          \
21620b57cec5SDimitry Andric       case SIB_BASE_##x:                                  \
21630b57cec5SDimitry Andric         baseReg = MCOperand::createReg(X86::x); break;
21640b57cec5SDimitry Andric       ALL_SIB_BASES
21650b57cec5SDimitry Andric #undef ENTRY
21660b57cec5SDimitry Andric       }
21670b57cec5SDimitry Andric     } else {
21680b57cec5SDimitry Andric       baseReg = MCOperand::createReg(X86::NoRegister);
21690b57cec5SDimitry Andric     }
21700b57cec5SDimitry Andric 
21710b57cec5SDimitry Andric     if (insn.sibIndex != SIB_INDEX_NONE) {
21720b57cec5SDimitry Andric       switch (insn.sibIndex) {
21730b57cec5SDimitry Andric       default:
21740b57cec5SDimitry Andric         debug("Unexpected sibIndex");
21750b57cec5SDimitry Andric         return true;
21760b57cec5SDimitry Andric #define ENTRY(x)                                          \
21770b57cec5SDimitry Andric       case SIB_INDEX_##x:                                 \
21780b57cec5SDimitry Andric         indexReg = MCOperand::createReg(X86::x); break;
21790b57cec5SDimitry Andric       EA_BASES_32BIT
21800b57cec5SDimitry Andric       EA_BASES_64BIT
21810b57cec5SDimitry Andric       REGS_XMM
21820b57cec5SDimitry Andric       REGS_YMM
21830b57cec5SDimitry Andric       REGS_ZMM
21840b57cec5SDimitry Andric #undef ENTRY
21850b57cec5SDimitry Andric       }
21860b57cec5SDimitry Andric     } else {
21870b57cec5SDimitry Andric       // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
21880b57cec5SDimitry Andric       // but no index is used and modrm alone should have been enough.
21890b57cec5SDimitry Andric       // -No base register in 32-bit mode. In 64-bit mode this is used to
21900b57cec5SDimitry Andric       //  avoid rip-relative addressing.
21910b57cec5SDimitry Andric       // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
21920b57cec5SDimitry Andric       //  base always requires a SIB byte.
21930b57cec5SDimitry Andric       // -A scale other than 1 is used.
21945ffd83dbSDimitry Andric       if (!ForceSIB &&
21955ffd83dbSDimitry Andric           (insn.sibScale != 1 ||
21960b57cec5SDimitry Andric            (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
21970b57cec5SDimitry Andric            (insn.sibBase != SIB_BASE_NONE &&
21980b57cec5SDimitry Andric             insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
21995ffd83dbSDimitry Andric             insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
22000b57cec5SDimitry Andric         indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
22010b57cec5SDimitry Andric                                                                 X86::RIZ);
22020b57cec5SDimitry Andric       } else
22030b57cec5SDimitry Andric         indexReg = MCOperand::createReg(X86::NoRegister);
22040b57cec5SDimitry Andric     }
22050b57cec5SDimitry Andric 
22060b57cec5SDimitry Andric     scaleAmount = MCOperand::createImm(insn.sibScale);
22070b57cec5SDimitry Andric   } else {
22080b57cec5SDimitry Andric     switch (insn.eaBase) {
22090b57cec5SDimitry Andric     case EA_BASE_NONE:
22100b57cec5SDimitry Andric       if (insn.eaDisplacement == EA_DISP_NONE) {
22110b57cec5SDimitry Andric         debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
22120b57cec5SDimitry Andric         return true;
22130b57cec5SDimitry Andric       }
22140b57cec5SDimitry Andric       if (insn.mode == MODE_64BIT){
221581ad6265SDimitry Andric         pcrel = insn.startLocation + insn.length;
221681ad6265SDimitry Andric         Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
221781ad6265SDimitry Andric                                              insn.startLocation +
221881ad6265SDimitry Andric                                                  insn.displacementOffset);
22190b57cec5SDimitry Andric         // Section 2.2.1.6
22200b57cec5SDimitry Andric         baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
22210b57cec5SDimitry Andric                                                                X86::RIP);
22220b57cec5SDimitry Andric       }
22230b57cec5SDimitry Andric       else
22240b57cec5SDimitry Andric         baseReg = MCOperand::createReg(X86::NoRegister);
22250b57cec5SDimitry Andric 
22260b57cec5SDimitry Andric       indexReg = MCOperand::createReg(X86::NoRegister);
22270b57cec5SDimitry Andric       break;
22280b57cec5SDimitry Andric     case EA_BASE_BX_SI:
22290b57cec5SDimitry Andric       baseReg = MCOperand::createReg(X86::BX);
22300b57cec5SDimitry Andric       indexReg = MCOperand::createReg(X86::SI);
22310b57cec5SDimitry Andric       break;
22320b57cec5SDimitry Andric     case EA_BASE_BX_DI:
22330b57cec5SDimitry Andric       baseReg = MCOperand::createReg(X86::BX);
22340b57cec5SDimitry Andric       indexReg = MCOperand::createReg(X86::DI);
22350b57cec5SDimitry Andric       break;
22360b57cec5SDimitry Andric     case EA_BASE_BP_SI:
22370b57cec5SDimitry Andric       baseReg = MCOperand::createReg(X86::BP);
22380b57cec5SDimitry Andric       indexReg = MCOperand::createReg(X86::SI);
22390b57cec5SDimitry Andric       break;
22400b57cec5SDimitry Andric     case EA_BASE_BP_DI:
22410b57cec5SDimitry Andric       baseReg = MCOperand::createReg(X86::BP);
22420b57cec5SDimitry Andric       indexReg = MCOperand::createReg(X86::DI);
22430b57cec5SDimitry Andric       break;
22440b57cec5SDimitry Andric     default:
22450b57cec5SDimitry Andric       indexReg = MCOperand::createReg(X86::NoRegister);
22460b57cec5SDimitry Andric       switch (insn.eaBase) {
22470b57cec5SDimitry Andric       default:
22480b57cec5SDimitry Andric         debug("Unexpected eaBase");
22490b57cec5SDimitry Andric         return true;
22500b57cec5SDimitry Andric         // Here, we will use the fill-ins defined above.  However,
22510b57cec5SDimitry Andric         //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
22520b57cec5SDimitry Andric         //   sib and sib64 were handled in the top-level if, so they're only
22530b57cec5SDimitry Andric         //   placeholders to keep the compiler happy.
22540b57cec5SDimitry Andric #define ENTRY(x)                                        \
22550b57cec5SDimitry Andric       case EA_BASE_##x:                                 \
22560b57cec5SDimitry Andric         baseReg = MCOperand::createReg(X86::x); break;
22570b57cec5SDimitry Andric       ALL_EA_BASES
22580b57cec5SDimitry Andric #undef ENTRY
22590b57cec5SDimitry Andric #define ENTRY(x) case EA_REG_##x:
22600b57cec5SDimitry Andric       ALL_REGS
22610b57cec5SDimitry Andric #undef ENTRY
22620b57cec5SDimitry Andric         debug("A R/M memory operand may not be a register; "
22630b57cec5SDimitry Andric               "the base field must be a base.");
22640b57cec5SDimitry Andric         return true;
22650b57cec5SDimitry Andric       }
22660b57cec5SDimitry Andric     }
22670b57cec5SDimitry Andric 
22680b57cec5SDimitry Andric     scaleAmount = MCOperand::createImm(1);
22690b57cec5SDimitry Andric   }
22700b57cec5SDimitry Andric 
22710b57cec5SDimitry Andric   displacement = MCOperand::createImm(insn.displacement);
22720b57cec5SDimitry Andric 
22730b57cec5SDimitry Andric   segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
22740b57cec5SDimitry Andric 
22750b57cec5SDimitry Andric   mcInst.addOperand(baseReg);
22760b57cec5SDimitry Andric   mcInst.addOperand(scaleAmount);
22770b57cec5SDimitry Andric   mcInst.addOperand(indexReg);
227881ad6265SDimitry Andric 
227981ad6265SDimitry Andric   const uint8_t dispSize =
228081ad6265SDimitry Andric       (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
228181ad6265SDimitry Andric 
228281ad6265SDimitry Andric   if (!Dis->tryAddingSymbolicOperand(
228381ad6265SDimitry Andric           mcInst, insn.displacement + pcrel, insn.startLocation, false,
228481ad6265SDimitry Andric           insn.displacementOffset, dispSize, insn.length))
22850b57cec5SDimitry Andric     mcInst.addOperand(displacement);
22860b57cec5SDimitry Andric   mcInst.addOperand(segmentReg);
22870b57cec5SDimitry Andric   return false;
22880b57cec5SDimitry Andric }
22890b57cec5SDimitry Andric 
22900b57cec5SDimitry Andric /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
22910b57cec5SDimitry Andric ///   byte of an instruction to LLVM form, and appends it to an MCInst.
22920b57cec5SDimitry Andric ///
22930b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
22940b57cec5SDimitry Andric /// @param operand      - The operand, as stored in the descriptor table.
22950b57cec5SDimitry Andric /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
22960b57cec5SDimitry Andric ///                       from.
22970b57cec5SDimitry Andric /// @return             - 0 on success; nonzero otherwise
translateRM(MCInst & mcInst,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)22980b57cec5SDimitry Andric static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
22990b57cec5SDimitry Andric                         InternalInstruction &insn, const MCDisassembler *Dis) {
23000b57cec5SDimitry Andric   switch (operand.type) {
23010b57cec5SDimitry Andric   default:
23020b57cec5SDimitry Andric     debug("Unexpected type for a R/M operand");
23030b57cec5SDimitry Andric     return true;
23040b57cec5SDimitry Andric   case TYPE_R8:
23050b57cec5SDimitry Andric   case TYPE_R16:
23060b57cec5SDimitry Andric   case TYPE_R32:
23070b57cec5SDimitry Andric   case TYPE_R64:
23080b57cec5SDimitry Andric   case TYPE_Rv:
23090b57cec5SDimitry Andric   case TYPE_MM64:
23100b57cec5SDimitry Andric   case TYPE_XMM:
23110b57cec5SDimitry Andric   case TYPE_YMM:
23120b57cec5SDimitry Andric   case TYPE_ZMM:
23135ffd83dbSDimitry Andric   case TYPE_TMM:
23140b57cec5SDimitry Andric   case TYPE_VK_PAIR:
23150b57cec5SDimitry Andric   case TYPE_VK:
23160b57cec5SDimitry Andric   case TYPE_DEBUGREG:
23170b57cec5SDimitry Andric   case TYPE_CONTROLREG:
23180b57cec5SDimitry Andric   case TYPE_BNDR:
23190b57cec5SDimitry Andric     return translateRMRegister(mcInst, insn);
23200b57cec5SDimitry Andric   case TYPE_M:
23210b57cec5SDimitry Andric   case TYPE_MVSIBX:
23220b57cec5SDimitry Andric   case TYPE_MVSIBY:
23230b57cec5SDimitry Andric   case TYPE_MVSIBZ:
23240b57cec5SDimitry Andric     return translateRMMemory(mcInst, insn, Dis);
23255ffd83dbSDimitry Andric   case TYPE_MSIB:
23265ffd83dbSDimitry Andric     return translateRMMemory(mcInst, insn, Dis, true);
23270b57cec5SDimitry Andric   }
23280b57cec5SDimitry Andric }
23290b57cec5SDimitry Andric 
23300b57cec5SDimitry Andric /// translateFPRegister - Translates a stack position on the FPU stack to its
23310b57cec5SDimitry Andric ///   LLVM form, and appends it to an MCInst.
23320b57cec5SDimitry Andric ///
23330b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
23340b57cec5SDimitry Andric /// @param stackPos     - The stack position to translate.
translateFPRegister(MCInst & mcInst,uint8_t stackPos)23350b57cec5SDimitry Andric static void translateFPRegister(MCInst &mcInst,
23360b57cec5SDimitry Andric                                 uint8_t stackPos) {
23370b57cec5SDimitry Andric   mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
23380b57cec5SDimitry Andric }
23390b57cec5SDimitry Andric 
23400b57cec5SDimitry Andric /// translateMaskRegister - Translates a 3-bit mask register number to
23410b57cec5SDimitry Andric ///   LLVM form, and appends it to an MCInst.
23420b57cec5SDimitry Andric ///
23430b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
23440b57cec5SDimitry Andric /// @param maskRegNum   - Number of mask register from 0 to 7.
23450b57cec5SDimitry Andric /// @return             - false on success; true otherwise.
translateMaskRegister(MCInst & mcInst,uint8_t maskRegNum)23460b57cec5SDimitry Andric static bool translateMaskRegister(MCInst &mcInst,
23470b57cec5SDimitry Andric                                 uint8_t maskRegNum) {
23480b57cec5SDimitry Andric   if (maskRegNum >= 8) {
23490b57cec5SDimitry Andric     debug("Invalid mask register number");
23500b57cec5SDimitry Andric     return true;
23510b57cec5SDimitry Andric   }
23520b57cec5SDimitry Andric 
23530b57cec5SDimitry Andric   mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
23540b57cec5SDimitry Andric   return false;
23550b57cec5SDimitry Andric }
23560b57cec5SDimitry Andric 
23570b57cec5SDimitry Andric /// translateOperand - Translates an operand stored in an internal instruction
23580b57cec5SDimitry Andric ///   to LLVM's format and appends it to an MCInst.
23590b57cec5SDimitry Andric ///
23600b57cec5SDimitry Andric /// @param mcInst       - The MCInst to append to.
23610b57cec5SDimitry Andric /// @param operand      - The operand, as stored in the descriptor table.
23620b57cec5SDimitry Andric /// @param insn         - The internal instruction.
23630b57cec5SDimitry Andric /// @return             - false on success; true otherwise.
translateOperand(MCInst & mcInst,const OperandSpecifier & operand,InternalInstruction & insn,const MCDisassembler * Dis)23640b57cec5SDimitry Andric static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
23650b57cec5SDimitry Andric                              InternalInstruction &insn,
23660b57cec5SDimitry Andric                              const MCDisassembler *Dis) {
23670b57cec5SDimitry Andric   switch (operand.encoding) {
23680b57cec5SDimitry Andric   default:
23690b57cec5SDimitry Andric     debug("Unhandled operand encoding during translation");
23700b57cec5SDimitry Andric     return true;
23710b57cec5SDimitry Andric   case ENCODING_REG:
23720b57cec5SDimitry Andric     translateRegister(mcInst, insn.reg);
23730b57cec5SDimitry Andric     return false;
23740b57cec5SDimitry Andric   case ENCODING_WRITEMASK:
23750b57cec5SDimitry Andric     return translateMaskRegister(mcInst, insn.writemask);
23765ffd83dbSDimitry Andric   case ENCODING_SIB:
23770b57cec5SDimitry Andric   CASE_ENCODING_RM:
23780b57cec5SDimitry Andric   CASE_ENCODING_VSIB:
23790b57cec5SDimitry Andric     return translateRM(mcInst, operand, insn, Dis);
23800b57cec5SDimitry Andric   case ENCODING_IB:
23810b57cec5SDimitry Andric   case ENCODING_IW:
23820b57cec5SDimitry Andric   case ENCODING_ID:
23830b57cec5SDimitry Andric   case ENCODING_IO:
23840b57cec5SDimitry Andric   case ENCODING_Iv:
23850b57cec5SDimitry Andric   case ENCODING_Ia:
23860b57cec5SDimitry Andric     translateImmediate(mcInst,
23870b57cec5SDimitry Andric                        insn.immediates[insn.numImmediatesTranslated++],
23880b57cec5SDimitry Andric                        operand,
23890b57cec5SDimitry Andric                        insn,
23900b57cec5SDimitry Andric                        Dis);
23910b57cec5SDimitry Andric     return false;
23920b57cec5SDimitry Andric   case ENCODING_IRC:
23930b57cec5SDimitry Andric     mcInst.addOperand(MCOperand::createImm(insn.RC));
23940b57cec5SDimitry Andric     return false;
23950b57cec5SDimitry Andric   case ENCODING_SI:
23960b57cec5SDimitry Andric     return translateSrcIndex(mcInst, insn);
23970b57cec5SDimitry Andric   case ENCODING_DI:
23980b57cec5SDimitry Andric     return translateDstIndex(mcInst, insn);
23990b57cec5SDimitry Andric   case ENCODING_RB:
24000b57cec5SDimitry Andric   case ENCODING_RW:
24010b57cec5SDimitry Andric   case ENCODING_RD:
24020b57cec5SDimitry Andric   case ENCODING_RO:
24030b57cec5SDimitry Andric   case ENCODING_Rv:
24040b57cec5SDimitry Andric     translateRegister(mcInst, insn.opcodeRegister);
24050b57cec5SDimitry Andric     return false;
2406*0fca6ea1SDimitry Andric   case ENCODING_CF:
2407*0fca6ea1SDimitry Andric     mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2408*0fca6ea1SDimitry Andric     return false;
24090b57cec5SDimitry Andric   case ENCODING_CC:
2410*0fca6ea1SDimitry Andric     if (isCCMPOrCTEST(&insn))
2411*0fca6ea1SDimitry Andric       mcInst.addOperand(MCOperand::createImm(insn.immediates[2]));
2412*0fca6ea1SDimitry Andric     else
24130b57cec5SDimitry Andric       mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
24140b57cec5SDimitry Andric     return false;
24150b57cec5SDimitry Andric   case ENCODING_FP:
24160b57cec5SDimitry Andric     translateFPRegister(mcInst, insn.modRM & 7);
24170b57cec5SDimitry Andric     return false;
24180b57cec5SDimitry Andric   case ENCODING_VVVV:
24190b57cec5SDimitry Andric     translateRegister(mcInst, insn.vvvv);
24200b57cec5SDimitry Andric     return false;
24210b57cec5SDimitry Andric   case ENCODING_DUP:
24220b57cec5SDimitry Andric     return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
24230b57cec5SDimitry Andric                             insn, Dis);
24240b57cec5SDimitry Andric   }
24250b57cec5SDimitry Andric }
24260b57cec5SDimitry Andric 
24270b57cec5SDimitry Andric /// translateInstruction - Translates an internal instruction and all its
24280b57cec5SDimitry Andric ///   operands to an MCInst.
24290b57cec5SDimitry Andric ///
24300b57cec5SDimitry Andric /// @param mcInst       - The MCInst to populate with the instruction's data.
24310b57cec5SDimitry Andric /// @param insn         - The internal instruction.
24320b57cec5SDimitry Andric /// @return             - false on success; true otherwise.
translateInstruction(MCInst & mcInst,InternalInstruction & insn,const MCDisassembler * Dis)24330b57cec5SDimitry Andric static bool translateInstruction(MCInst &mcInst,
24340b57cec5SDimitry Andric                                 InternalInstruction &insn,
24350b57cec5SDimitry Andric                                 const MCDisassembler *Dis) {
24360b57cec5SDimitry Andric   if (!insn.spec) {
24370b57cec5SDimitry Andric     debug("Instruction has no specification");
24380b57cec5SDimitry Andric     return true;
24390b57cec5SDimitry Andric   }
24400b57cec5SDimitry Andric 
24410b57cec5SDimitry Andric   mcInst.clear();
24420b57cec5SDimitry Andric   mcInst.setOpcode(insn.instructionID);
24430b57cec5SDimitry Andric   // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
24440b57cec5SDimitry Andric   // prefix bytes should be disassembled as xrelease and xacquire then set the
24450b57cec5SDimitry Andric   // opcode to those instead of the rep and repne opcodes.
24460b57cec5SDimitry Andric   if (insn.xAcquireRelease) {
24470b57cec5SDimitry Andric     if(mcInst.getOpcode() == X86::REP_PREFIX)
24480b57cec5SDimitry Andric       mcInst.setOpcode(X86::XRELEASE_PREFIX);
24490b57cec5SDimitry Andric     else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
24500b57cec5SDimitry Andric       mcInst.setOpcode(X86::XACQUIRE_PREFIX);
24510b57cec5SDimitry Andric   }
24520b57cec5SDimitry Andric 
24530b57cec5SDimitry Andric   insn.numImmediatesTranslated = 0;
24540b57cec5SDimitry Andric 
24550b57cec5SDimitry Andric   for (const auto &Op : insn.operands) {
24560b57cec5SDimitry Andric     if (Op.encoding != ENCODING_NONE) {
24570b57cec5SDimitry Andric       if (translateOperand(mcInst, Op, insn, Dis)) {
24580b57cec5SDimitry Andric         return true;
24590b57cec5SDimitry Andric       }
24600b57cec5SDimitry Andric     }
24610b57cec5SDimitry Andric   }
24620b57cec5SDimitry Andric 
24630b57cec5SDimitry Andric   return false;
24640b57cec5SDimitry Andric }
24650b57cec5SDimitry Andric 
createX86Disassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)24660b57cec5SDimitry Andric static MCDisassembler *createX86Disassembler(const Target &T,
24670b57cec5SDimitry Andric                                              const MCSubtargetInfo &STI,
24680b57cec5SDimitry Andric                                              MCContext &Ctx) {
24690b57cec5SDimitry Andric   std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
24700b57cec5SDimitry Andric   return new X86GenericDisassembler(STI, Ctx, std::move(MII));
24710b57cec5SDimitry Andric }
24720b57cec5SDimitry Andric 
LLVMInitializeX86Disassembler()2473480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {
24740b57cec5SDimitry Andric   // Register the disassembler.
24750b57cec5SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),
24760b57cec5SDimitry Andric                                          createX86Disassembler);
24770b57cec5SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),
24780b57cec5SDimitry Andric                                          createX86Disassembler);
24790b57cec5SDimitry Andric }
2480