10b57cec5SDimitry Andric //===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file is part of the X86 Disassembler Emitter. 100b57cec5SDimitry Andric // It contains the interface of the disassembler tables. 110b57cec5SDimitry Andric // Documentation for the disassembler emitter in general can be found in 120b57cec5SDimitry Andric // X86DisassemblerEmitter.h. 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #ifndef LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H 170b57cec5SDimitry Andric #define LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric #include "X86DisassemblerShared.h" 201fd87a68SDimitry Andric #include "llvm/Support/X86DisassemblerDecoderCommon.h" 210b57cec5SDimitry Andric #include <map> 221fd87a68SDimitry Andric #include <memory> 230b57cec5SDimitry Andric #include <vector> 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric namespace llvm { 261fd87a68SDimitry Andric class raw_ostream; 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric namespace X86Disassembler { 290b57cec5SDimitry Andric 301fd87a68SDimitry Andric class ModRMFilter; 311fd87a68SDimitry Andric 320b57cec5SDimitry Andric /// DisassemblerTables - Encapsulates all the decode tables being generated by 330b57cec5SDimitry Andric /// the table emitter. Contains functions to populate the tables as well as 340b57cec5SDimitry Andric /// to emit them as hierarchical C structures suitable for consumption by the 350b57cec5SDimitry Andric /// runtime. 360b57cec5SDimitry Andric class DisassemblerTables { 370b57cec5SDimitry Andric private: 380b57cec5SDimitry Andric /// The decoder tables. There is one for each opcode type: 390b57cec5SDimitry Andric /// [0] one-byte opcodes 400b57cec5SDimitry Andric /// [1] two-byte opcodes of the form 0f __ 410b57cec5SDimitry Andric /// [2] three-byte opcodes of the form 0f 38 __ 420b57cec5SDimitry Andric /// [3] three-byte opcodes of the form 0f 3a __ 430b57cec5SDimitry Andric /// [4] XOP8 map opcode 440b57cec5SDimitry Andric /// [5] XOP9 map opcode 450b57cec5SDimitry Andric /// [6] XOPA map opcode 460b57cec5SDimitry Andric /// [7] 3dnow map opcode 475f757f3fSDimitry Andric /// [8] fixed length MAP4 opcode 485f757f3fSDimitry Andric /// [9] fixed length MAP5 opcode 495f757f3fSDimitry Andric /// [10] fixed length MAP6 opcode 505f757f3fSDimitry Andric /// [11] fixed length MAP7 opcode 515f757f3fSDimitry Andric std::unique_ptr<ContextDecision> Tables[12]; 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric // Table of ModRM encodings. 540b57cec5SDimitry Andric typedef std::map<std::vector<unsigned>, unsigned> ModRMMapTy; 550b57cec5SDimitry Andric mutable ModRMMapTy ModRMTable; 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric /// The instruction information table 580b57cec5SDimitry Andric std::vector<InstructionSpecifier> InstructionSpecifiers; 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric /// True if there are primary decode conflicts in the instruction set 610b57cec5SDimitry Andric bool HasConflicts; 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric /// emitModRMDecision - Emits a table of entries corresponding to a single 640b57cec5SDimitry Andric /// ModR/M decision. Compacts the ModR/M decision if possible. ModR/M 650b57cec5SDimitry Andric /// decisions are printed as: 660b57cec5SDimitry Andric /// 670b57cec5SDimitry Andric /// { /* struct ModRMDecision */ 680b57cec5SDimitry Andric /// TYPE, 690b57cec5SDimitry Andric /// modRMTablennnn 700b57cec5SDimitry Andric /// } 710b57cec5SDimitry Andric /// 720b57cec5SDimitry Andric /// where nnnn is a unique ID for the corresponding table of IDs. 730b57cec5SDimitry Andric /// TYPE indicates whether the table has one entry that is the same 740b57cec5SDimitry Andric /// regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one 750b57cec5SDimitry Andric /// for bytes 0xc0-0xff -, or 256 entries, one for each possible byte. 760b57cec5SDimitry Andric /// nnnn is the number of a table for looking up these values. The tables 770b57cec5SDimitry Andric /// are written separately so that tables consisting entirely of zeros will 780b57cec5SDimitry Andric /// not be duplicated. (These all have the name modRMEmptyTable.) A table 790b57cec5SDimitry Andric /// is printed as: 800b57cec5SDimitry Andric /// 810b57cec5SDimitry Andric /// InstrUID modRMTablennnn[k] = { 820b57cec5SDimitry Andric /// nnnn, /* MNEMONIC */ 830b57cec5SDimitry Andric /// ... 840b57cec5SDimitry Andric /// nnnn /* MNEMONIC */ 850b57cec5SDimitry Andric /// }; 860b57cec5SDimitry Andric /// 870b57cec5SDimitry Andric /// @param o1 - The output stream to print the ID table to. 880b57cec5SDimitry Andric /// @param o2 - The output stream to print the decision structure to. 890b57cec5SDimitry Andric /// @param i1 - The indentation level to use with stream o1. 900b57cec5SDimitry Andric /// @param i2 - The indentation level to use with stream o2. 910b57cec5SDimitry Andric /// @param ModRMTableNum - next table number for adding to ModRMTable. 920b57cec5SDimitry Andric /// @param decision - The ModR/M decision to emit. This decision has 256 930b57cec5SDimitry Andric /// entries - emitModRMDecision decides how to compact it. 94*0fca6ea1SDimitry Andric void emitModRMDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1, 95*0fca6ea1SDimitry Andric unsigned &i2, unsigned &ModRMTableNum, 960b57cec5SDimitry Andric ModRMDecision &decision) const; 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M 990b57cec5SDimitry Andric /// decisions. An OpcodeDecision is printed as: 1000b57cec5SDimitry Andric /// 1010b57cec5SDimitry Andric /// { /* struct OpcodeDecision */ 1020b57cec5SDimitry Andric /// /* 0x00 */ 1030b57cec5SDimitry Andric /// { /* struct ModRMDecision */ 1040b57cec5SDimitry Andric /// ... 1050b57cec5SDimitry Andric /// } 1060b57cec5SDimitry Andric /// ... 1070b57cec5SDimitry Andric /// } 1080b57cec5SDimitry Andric /// 1090b57cec5SDimitry Andric /// where the ModRMDecision structure is printed as described in the 1100b57cec5SDimitry Andric /// documentation for emitModRMDecision(). emitOpcodeDecision() passes on a 1110b57cec5SDimitry Andric /// stream and indent level for the UID tables generated by 1120b57cec5SDimitry Andric /// emitModRMDecision(), but does not use them itself. 1130b57cec5SDimitry Andric /// 1140b57cec5SDimitry Andric /// @param o1 - The output stream to print the ID tables generated by 1150b57cec5SDimitry Andric /// emitModRMDecision() to. 1160b57cec5SDimitry Andric /// @param o2 - The output stream for the decision structure itself. 1170b57cec5SDimitry Andric /// @param i1 - The indent level to use with stream o1. 1180b57cec5SDimitry Andric /// @param i2 - The indent level to use with stream o2. 1190b57cec5SDimitry Andric /// @param ModRMTableNum - next table number for adding to ModRMTable. 1200b57cec5SDimitry Andric /// @param decision - The OpcodeDecision to emit along with its subsidiary 1210b57cec5SDimitry Andric /// structures. 122*0fca6ea1SDimitry Andric void emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1, 123*0fca6ea1SDimitry Andric unsigned &i2, unsigned &ModRMTableNum, 1240b57cec5SDimitry Andric OpcodeDecision &decision) const; 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric /// emitContextDecision - Emits a ContextDecision and all its subsidiary 1270b57cec5SDimitry Andric /// Opcode and ModRMDecisions. A ContextDecision is printed as: 1280b57cec5SDimitry Andric /// 1290b57cec5SDimitry Andric /// struct ContextDecision NAME = { 1300b57cec5SDimitry Andric /// { /* OpcodeDecisions */ 1310b57cec5SDimitry Andric /// /* IC */ 1320b57cec5SDimitry Andric /// { /* struct OpcodeDecision */ 1330b57cec5SDimitry Andric /// ... 1340b57cec5SDimitry Andric /// }, 1350b57cec5SDimitry Andric /// ... 1360b57cec5SDimitry Andric /// } 1370b57cec5SDimitry Andric /// } 1380b57cec5SDimitry Andric /// 1390b57cec5SDimitry Andric /// NAME is the name of the ContextDecision (typically one of the four names 1400b57cec5SDimitry Andric /// ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM from 1410b57cec5SDimitry Andric /// X86DisassemblerDecoderCommon.h). 1420b57cec5SDimitry Andric /// IC is one of the contexts in InstructionContext. There is an opcode 1430b57cec5SDimitry Andric /// decision for each possible context. 1440b57cec5SDimitry Andric /// The OpcodeDecision structures are printed as described in the 1450b57cec5SDimitry Andric /// documentation for emitOpcodeDecision. 1460b57cec5SDimitry Andric /// 1470b57cec5SDimitry Andric /// @param o1 - The output stream to print the ID tables generated by 1480b57cec5SDimitry Andric /// emitModRMDecision() to. 1490b57cec5SDimitry Andric /// @param o2 - The output stream to print the decision structure to. 1500b57cec5SDimitry Andric /// @param i1 - The indent level to use with stream o1. 1510b57cec5SDimitry Andric /// @param i2 - The indent level to use with stream o2. 1520b57cec5SDimitry Andric /// @param ModRMTableNum - next table number for adding to ModRMTable. 1530b57cec5SDimitry Andric /// @param decision - The ContextDecision to emit along with its subsidiary 1540b57cec5SDimitry Andric /// structures. 1550b57cec5SDimitry Andric /// @param name - The name for the ContextDecision. 156*0fca6ea1SDimitry Andric void emitContextDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1, 157*0fca6ea1SDimitry Andric unsigned &i2, unsigned &ModRMTableNum, 1580b57cec5SDimitry Andric ContextDecision &decision, const char *name) const; 1590b57cec5SDimitry Andric 1600b57cec5SDimitry Andric /// emitInstructionInfo - Prints the instruction specifier table, which has 1610b57cec5SDimitry Andric /// one entry for each instruction, and contains name and operand 1620b57cec5SDimitry Andric /// information. This table is printed as: 1630b57cec5SDimitry Andric /// 1640b57cec5SDimitry Andric /// struct InstructionSpecifier CONTEXTS_SYM[k] = { 1650b57cec5SDimitry Andric /// { 1660b57cec5SDimitry Andric /// /* nnnn */ 1670b57cec5SDimitry Andric /// "MNEMONIC", 1680b57cec5SDimitry Andric /// 0xnn, 1690b57cec5SDimitry Andric /// { 1700b57cec5SDimitry Andric /// { 1710b57cec5SDimitry Andric /// ENCODING, 1720b57cec5SDimitry Andric /// TYPE 1730b57cec5SDimitry Andric /// }, 1740b57cec5SDimitry Andric /// ... 1750b57cec5SDimitry Andric /// } 1760b57cec5SDimitry Andric /// }, 1770b57cec5SDimitry Andric /// }; 1780b57cec5SDimitry Andric /// 1790b57cec5SDimitry Andric /// k is the total number of instructions. 1800b57cec5SDimitry Andric /// nnnn is the ID of the current instruction (0-based). This table 1810b57cec5SDimitry Andric /// includes entries for non-instructions like PHINODE. 1820b57cec5SDimitry Andric /// 0xnn is the lowest possible opcode for the current instruction, used for 1830b57cec5SDimitry Andric /// AddRegFrm instructions to compute the operand's value. 1840b57cec5SDimitry Andric /// ENCODING and TYPE describe the encoding and type for a single operand. 1850b57cec5SDimitry Andric /// 1860b57cec5SDimitry Andric /// @param o - The output stream to which the instruction table should be 1870b57cec5SDimitry Andric /// written. 1880b57cec5SDimitry Andric /// @param i - The indent level for use with the stream. 1890b57cec5SDimitry Andric void emitInstructionInfo(raw_ostream &o, unsigned &i) const; 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric /// emitContextTable - Prints the table that is used to translate from an 1920b57cec5SDimitry Andric /// instruction attribute mask to an instruction context. This table is 1930b57cec5SDimitry Andric /// printed as: 1940b57cec5SDimitry Andric /// 1950b57cec5SDimitry Andric /// InstructionContext CONTEXTS_STR[256] = { 1960b57cec5SDimitry Andric /// IC, /* 0x00 */ 1970b57cec5SDimitry Andric /// ... 1980b57cec5SDimitry Andric /// }; 1990b57cec5SDimitry Andric /// 2000b57cec5SDimitry Andric /// IC is the context corresponding to the mask 0x00, and there are 256 2010b57cec5SDimitry Andric /// possible masks. 2020b57cec5SDimitry Andric /// 203*0fca6ea1SDimitry Andric /// @param o - The output stream to which the context table should be 204*0fca6ea1SDimitry Andric /// written. 2050b57cec5SDimitry Andric /// @param i - The indent level for use with the stream. 2060b57cec5SDimitry Andric void emitContextTable(raw_ostream &o, uint32_t &i) const; 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric /// emitContextDecisions - Prints all four ContextDecision structures using 2090b57cec5SDimitry Andric /// emitContextDecision(). 2100b57cec5SDimitry Andric /// 2110b57cec5SDimitry Andric /// @param o1 - The output stream to print the ID tables generated by 2120b57cec5SDimitry Andric /// emitModRMDecision() to. 2130b57cec5SDimitry Andric /// @param o2 - The output stream to print the decision structures to. 2140b57cec5SDimitry Andric /// @param i1 - The indent level to use with stream o1. 2150b57cec5SDimitry Andric /// @param i2 - The indent level to use with stream o2. 2160b57cec5SDimitry Andric /// @param ModRMTableNum - next table number for adding to ModRMTable. 217*0fca6ea1SDimitry Andric void emitContextDecisions(raw_ostream &o1, raw_ostream &o2, unsigned &i1, 218*0fca6ea1SDimitry Andric unsigned &i2, unsigned &ModRMTableNum) const; 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a 2210b57cec5SDimitry Andric /// ModRMDecision to refer to a particular instruction ID. 2220b57cec5SDimitry Andric /// 2230b57cec5SDimitry Andric /// @param decision - The ModRMDecision to populate. 2240b57cec5SDimitry Andric /// @param filter - The filter to use in deciding which entries to populate. 2250b57cec5SDimitry Andric /// @param uid - The unique ID to set matching entries to. 2260b57cec5SDimitry Andric /// @param opcode - The opcode of the instruction, for error reporting. 227*0fca6ea1SDimitry Andric void setTableFields(ModRMDecision &decision, const ModRMFilter &filter, 228*0fca6ea1SDimitry Andric InstrUID uid, uint8_t opcode); 229*0fca6ea1SDimitry Andric 2300b57cec5SDimitry Andric public: 2310b57cec5SDimitry Andric /// Constructor - Allocates space for the class decisions and clears them. 2320b57cec5SDimitry Andric DisassemblerTables(); 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric ~DisassemblerTables(); 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric /// emit - Emits the instruction table, context table, and class decisions. 2370b57cec5SDimitry Andric /// 2380b57cec5SDimitry Andric /// @param o - The output stream to print the tables to. 2390b57cec5SDimitry Andric void emit(raw_ostream &o) const; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric /// setTableFields - Uses the opcode type, instruction context, opcode, and a 2420b57cec5SDimitry Andric /// ModRMFilter as criteria to set a particular set of entries in the 2430b57cec5SDimitry Andric /// decode tables to point to a specific uid. 2440b57cec5SDimitry Andric /// 2450b57cec5SDimitry Andric /// @param type - The opcode type (ONEBYTE, TWOBYTE, etc.) 2460b57cec5SDimitry Andric /// @param insnContext - The context to use (IC, IC_64BIT, etc.) 2470b57cec5SDimitry Andric /// @param opcode - The last byte of the opcode (not counting any escape 2480b57cec5SDimitry Andric /// or extended opcodes). 249*0fca6ea1SDimitry Andric /// @param filter - The ModRMFilter that decides which ModR/M byte 250*0fca6ea1SDimitry Andric /// values 2510b57cec5SDimitry Andric /// correspond to the desired instruction. 2520b57cec5SDimitry Andric /// @param uid - The unique ID of the instruction. 2530b57cec5SDimitry Andric /// @param is32bit - Instructon is only 32-bit 2540b57cec5SDimitry Andric /// @param noPrefix - Instruction record has no prefix. 2550b57cec5SDimitry Andric /// @param ignoresVEX_L - Instruction ignores VEX.L 2560b57cec5SDimitry Andric /// @param ignoresVEX_W - Instruction ignores VEX.W 2570b57cec5SDimitry Andric /// @param AddrSize - Instructions address size 16/32/64. 0 is unspecified 258*0fca6ea1SDimitry Andric void setTableFields(OpcodeType type, InstructionContext insnContext, 259*0fca6ea1SDimitry Andric uint8_t opcode, const ModRMFilter &filter, InstrUID uid, 260*0fca6ea1SDimitry Andric bool is32bit, bool noPrefix, bool ignoresVEX_L, 261*0fca6ea1SDimitry Andric bool ignoresVEX_W, unsigned AddrSize); 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric /// specForUID - Returns the instruction specifier for a given unique 2640b57cec5SDimitry Andric /// instruction ID. Used when resolving collisions. 2650b57cec5SDimitry Andric /// 2660b57cec5SDimitry Andric /// @param uid - The unique ID of the instruction. 2670b57cec5SDimitry Andric /// @return - A reference to the instruction specifier. specForUID(InstrUID uid)2680b57cec5SDimitry Andric InstructionSpecifier &specForUID(InstrUID uid) { 2690b57cec5SDimitry Andric if (uid >= InstructionSpecifiers.size()) 2700b57cec5SDimitry Andric InstructionSpecifiers.resize(uid + 1); 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric return InstructionSpecifiers[uid]; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric // hasConflicts - Reports whether there were primary decode conflicts 2760b57cec5SDimitry Andric // from any instructions added to the tables. 2770b57cec5SDimitry Andric // @return - true if there were; false otherwise. 2780b57cec5SDimitry Andric hasConflicts()279*0fca6ea1SDimitry Andric bool hasConflicts() { return HasConflicts; } 2800b57cec5SDimitry Andric }; 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric } // namespace X86Disassembler 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric } // namespace llvm 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric #endif 287