xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/X86DisassemblerTables.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file is part of the X86 Disassembler Emitter.
100b57cec5SDimitry Andric // It contains the interface of the disassembler tables.
110b57cec5SDimitry Andric // Documentation for the disassembler emitter in general can be found in
120b57cec5SDimitry Andric //  X86DisassemblerEmitter.h.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #ifndef LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H
170b57cec5SDimitry Andric #define LLVM_UTILS_TABLEGEN_X86DISASSEMBLERTABLES_H
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #include "X86DisassemblerShared.h"
201fd87a68SDimitry Andric #include "llvm/Support/X86DisassemblerDecoderCommon.h"
210b57cec5SDimitry Andric #include <map>
221fd87a68SDimitry Andric #include <memory>
230b57cec5SDimitry Andric #include <vector>
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric namespace llvm {
261fd87a68SDimitry Andric class raw_ostream;
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric namespace X86Disassembler {
290b57cec5SDimitry Andric 
301fd87a68SDimitry Andric class ModRMFilter;
311fd87a68SDimitry Andric 
320b57cec5SDimitry Andric /// DisassemblerTables - Encapsulates all the decode tables being generated by
330b57cec5SDimitry Andric ///   the table emitter.  Contains functions to populate the tables as well as
340b57cec5SDimitry Andric ///   to emit them as hierarchical C structures suitable for consumption by the
350b57cec5SDimitry Andric ///   runtime.
360b57cec5SDimitry Andric class DisassemblerTables {
370b57cec5SDimitry Andric private:
380b57cec5SDimitry Andric   /// The decoder tables.  There is one for each opcode type:
390b57cec5SDimitry Andric   /// [0] one-byte opcodes
400b57cec5SDimitry Andric   /// [1] two-byte opcodes of the form 0f __
410b57cec5SDimitry Andric   /// [2] three-byte opcodes of the form 0f 38 __
420b57cec5SDimitry Andric   /// [3] three-byte opcodes of the form 0f 3a __
430b57cec5SDimitry Andric   /// [4] XOP8 map opcode
440b57cec5SDimitry Andric   /// [5] XOP9 map opcode
450b57cec5SDimitry Andric   /// [6] XOPA map opcode
460b57cec5SDimitry Andric   /// [7] 3dnow map opcode
475f757f3fSDimitry Andric   /// [8] fixed length MAP4 opcode
485f757f3fSDimitry Andric   /// [9] fixed length MAP5 opcode
495f757f3fSDimitry Andric   /// [10] fixed length MAP6 opcode
505f757f3fSDimitry Andric   /// [11] fixed length MAP7 opcode
515f757f3fSDimitry Andric   std::unique_ptr<ContextDecision> Tables[12];
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric   // Table of ModRM encodings.
540b57cec5SDimitry Andric   typedef std::map<std::vector<unsigned>, unsigned> ModRMMapTy;
550b57cec5SDimitry Andric   mutable ModRMMapTy ModRMTable;
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   /// The instruction information table
580b57cec5SDimitry Andric   std::vector<InstructionSpecifier> InstructionSpecifiers;
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric   /// True if there are primary decode conflicts in the instruction set
610b57cec5SDimitry Andric   bool HasConflicts;
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric   /// emitModRMDecision - Emits a table of entries corresponding to a single
640b57cec5SDimitry Andric   ///   ModR/M decision.  Compacts the ModR/M decision if possible.  ModR/M
650b57cec5SDimitry Andric   ///   decisions are printed as:
660b57cec5SDimitry Andric   ///
670b57cec5SDimitry Andric   ///   { /* struct ModRMDecision */
680b57cec5SDimitry Andric   ///     TYPE,
690b57cec5SDimitry Andric   ///     modRMTablennnn
700b57cec5SDimitry Andric   ///   }
710b57cec5SDimitry Andric   ///
720b57cec5SDimitry Andric   ///   where nnnn is a unique ID for the corresponding table of IDs.
730b57cec5SDimitry Andric   ///   TYPE indicates whether the table has one entry that is the same
740b57cec5SDimitry Andric   ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
750b57cec5SDimitry Andric   ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.
760b57cec5SDimitry Andric   ///   nnnn is the number of a table for looking up these values.  The tables
770b57cec5SDimitry Andric   ///   are written separately so that tables consisting entirely of zeros will
780b57cec5SDimitry Andric   ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table
790b57cec5SDimitry Andric   ///   is printed as:
800b57cec5SDimitry Andric   ///
810b57cec5SDimitry Andric   ///   InstrUID modRMTablennnn[k] = {
820b57cec5SDimitry Andric   ///     nnnn, /* MNEMONIC */
830b57cec5SDimitry Andric   ///     ...
840b57cec5SDimitry Andric   ///     nnnn /* MNEMONIC */
850b57cec5SDimitry Andric   ///   };
860b57cec5SDimitry Andric   ///
870b57cec5SDimitry Andric   /// @param o1       - The output stream to print the ID table to.
880b57cec5SDimitry Andric   /// @param o2       - The output stream to print the decision structure to.
890b57cec5SDimitry Andric   /// @param i1       - The indentation level to use with stream o1.
900b57cec5SDimitry Andric   /// @param i2       - The indentation level to use with stream o2.
910b57cec5SDimitry Andric   /// @param ModRMTableNum - next table number for adding to ModRMTable.
920b57cec5SDimitry Andric   /// @param decision - The ModR/M decision to emit.  This decision has 256
930b57cec5SDimitry Andric   ///                   entries - emitModRMDecision decides how to compact it.
94*0fca6ea1SDimitry Andric   void emitModRMDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1,
95*0fca6ea1SDimitry Andric                          unsigned &i2, unsigned &ModRMTableNum,
960b57cec5SDimitry Andric                          ModRMDecision &decision) const;
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
990b57cec5SDimitry Andric   ///   decisions.  An OpcodeDecision is printed as:
1000b57cec5SDimitry Andric   ///
1010b57cec5SDimitry Andric   ///   { /* struct OpcodeDecision */
1020b57cec5SDimitry Andric   ///     /* 0x00 */
1030b57cec5SDimitry Andric   ///     { /* struct ModRMDecision */
1040b57cec5SDimitry Andric   ///       ...
1050b57cec5SDimitry Andric   ///     }
1060b57cec5SDimitry Andric   ///     ...
1070b57cec5SDimitry Andric   ///   }
1080b57cec5SDimitry Andric   ///
1090b57cec5SDimitry Andric   ///   where the ModRMDecision structure is printed as described in the
1100b57cec5SDimitry Andric   ///   documentation for emitModRMDecision().  emitOpcodeDecision() passes on a
1110b57cec5SDimitry Andric   ///   stream and indent level for the UID tables generated by
1120b57cec5SDimitry Andric   ///   emitModRMDecision(), but does not use them itself.
1130b57cec5SDimitry Andric   ///
1140b57cec5SDimitry Andric   /// @param o1       - The output stream to print the ID tables generated by
1150b57cec5SDimitry Andric   ///                   emitModRMDecision() to.
1160b57cec5SDimitry Andric   /// @param o2       - The output stream for the decision structure itself.
1170b57cec5SDimitry Andric   /// @param i1       - The indent level to use with stream o1.
1180b57cec5SDimitry Andric   /// @param i2       - The indent level to use with stream o2.
1190b57cec5SDimitry Andric   /// @param ModRMTableNum - next table number for adding to ModRMTable.
1200b57cec5SDimitry Andric   /// @param decision - The OpcodeDecision to emit along with its subsidiary
1210b57cec5SDimitry Andric   ///                    structures.
122*0fca6ea1SDimitry Andric   void emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1,
123*0fca6ea1SDimitry Andric                           unsigned &i2, unsigned &ModRMTableNum,
1240b57cec5SDimitry Andric                           OpcodeDecision &decision) const;
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric   /// emitContextDecision - Emits a ContextDecision and all its subsidiary
1270b57cec5SDimitry Andric   ///   Opcode and ModRMDecisions.  A ContextDecision is printed as:
1280b57cec5SDimitry Andric   ///
1290b57cec5SDimitry Andric   ///   struct ContextDecision NAME = {
1300b57cec5SDimitry Andric   ///     { /* OpcodeDecisions */
1310b57cec5SDimitry Andric   ///       /* IC */
1320b57cec5SDimitry Andric   ///       { /* struct OpcodeDecision */
1330b57cec5SDimitry Andric   ///         ...
1340b57cec5SDimitry Andric   ///       },
1350b57cec5SDimitry Andric   ///       ...
1360b57cec5SDimitry Andric   ///     }
1370b57cec5SDimitry Andric   ///   }
1380b57cec5SDimitry Andric   ///
1390b57cec5SDimitry Andric   ///   NAME is the name of the ContextDecision (typically one of the four names
1400b57cec5SDimitry Andric   ///   ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM from
1410b57cec5SDimitry Andric   ///   X86DisassemblerDecoderCommon.h).
1420b57cec5SDimitry Andric   ///   IC is one of the contexts in InstructionContext.  There is an opcode
1430b57cec5SDimitry Andric   ///   decision for each possible context.
1440b57cec5SDimitry Andric   ///   The OpcodeDecision structures are printed as described in the
1450b57cec5SDimitry Andric   ///   documentation for emitOpcodeDecision.
1460b57cec5SDimitry Andric   ///
1470b57cec5SDimitry Andric   /// @param o1       - The output stream to print the ID tables generated by
1480b57cec5SDimitry Andric   ///                   emitModRMDecision() to.
1490b57cec5SDimitry Andric   /// @param o2       - The output stream to print the decision structure to.
1500b57cec5SDimitry Andric   /// @param i1       - The indent level to use with stream o1.
1510b57cec5SDimitry Andric   /// @param i2       - The indent level to use with stream o2.
1520b57cec5SDimitry Andric   /// @param ModRMTableNum - next table number for adding to ModRMTable.
1530b57cec5SDimitry Andric   /// @param decision - The ContextDecision to emit along with its subsidiary
1540b57cec5SDimitry Andric   ///                   structures.
1550b57cec5SDimitry Andric   /// @param name     - The name for the ContextDecision.
156*0fca6ea1SDimitry Andric   void emitContextDecision(raw_ostream &o1, raw_ostream &o2, unsigned &i1,
157*0fca6ea1SDimitry Andric                            unsigned &i2, unsigned &ModRMTableNum,
1580b57cec5SDimitry Andric                            ContextDecision &decision, const char *name) const;
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   /// emitInstructionInfo - Prints the instruction specifier table, which has
1610b57cec5SDimitry Andric   ///   one entry for each instruction, and contains name and operand
1620b57cec5SDimitry Andric   ///   information.  This table is printed as:
1630b57cec5SDimitry Andric   ///
1640b57cec5SDimitry Andric   ///   struct InstructionSpecifier CONTEXTS_SYM[k] = {
1650b57cec5SDimitry Andric   ///     {
1660b57cec5SDimitry Andric   ///       /* nnnn */
1670b57cec5SDimitry Andric   ///       "MNEMONIC",
1680b57cec5SDimitry Andric   ///       0xnn,
1690b57cec5SDimitry Andric   ///       {
1700b57cec5SDimitry Andric   ///         {
1710b57cec5SDimitry Andric   ///           ENCODING,
1720b57cec5SDimitry Andric   ///           TYPE
1730b57cec5SDimitry Andric   ///         },
1740b57cec5SDimitry Andric   ///         ...
1750b57cec5SDimitry Andric   ///       }
1760b57cec5SDimitry Andric   ///     },
1770b57cec5SDimitry Andric   ///   };
1780b57cec5SDimitry Andric   ///
1790b57cec5SDimitry Andric   ///   k is the total number of instructions.
1800b57cec5SDimitry Andric   ///   nnnn is the ID of the current instruction (0-based).  This table
1810b57cec5SDimitry Andric   ///   includes entries for non-instructions like PHINODE.
1820b57cec5SDimitry Andric   ///   0xnn is the lowest possible opcode for the current instruction, used for
1830b57cec5SDimitry Andric   ///   AddRegFrm instructions to compute the operand's value.
1840b57cec5SDimitry Andric   ///   ENCODING and TYPE describe the encoding and type for a single operand.
1850b57cec5SDimitry Andric   ///
1860b57cec5SDimitry Andric   /// @param o  - The output stream to which the instruction table should be
1870b57cec5SDimitry Andric   ///             written.
1880b57cec5SDimitry Andric   /// @param i  - The indent level for use with the stream.
1890b57cec5SDimitry Andric   void emitInstructionInfo(raw_ostream &o, unsigned &i) const;
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric   /// emitContextTable - Prints the table that is used to translate from an
1920b57cec5SDimitry Andric   ///   instruction attribute mask to an instruction context.  This table is
1930b57cec5SDimitry Andric   ///   printed as:
1940b57cec5SDimitry Andric   ///
1950b57cec5SDimitry Andric   ///   InstructionContext CONTEXTS_STR[256] = {
1960b57cec5SDimitry Andric   ///     IC, /* 0x00 */
1970b57cec5SDimitry Andric   ///     ...
1980b57cec5SDimitry Andric   ///   };
1990b57cec5SDimitry Andric   ///
2000b57cec5SDimitry Andric   ///   IC is the context corresponding to the mask 0x00, and there are 256
2010b57cec5SDimitry Andric   ///   possible masks.
2020b57cec5SDimitry Andric   ///
203*0fca6ea1SDimitry Andric   /// @param o  - The output stream to which the context table should be
204*0fca6ea1SDimitry Andric   /// written.
2050b57cec5SDimitry Andric   /// @param i  - The indent level for use with the stream.
2060b57cec5SDimitry Andric   void emitContextTable(raw_ostream &o, uint32_t &i) const;
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   /// emitContextDecisions - Prints all four ContextDecision structures using
2090b57cec5SDimitry Andric   ///   emitContextDecision().
2100b57cec5SDimitry Andric   ///
2110b57cec5SDimitry Andric   /// @param o1 - The output stream to print the ID tables generated by
2120b57cec5SDimitry Andric   ///             emitModRMDecision() to.
2130b57cec5SDimitry Andric   /// @param o2 - The output stream to print the decision structures to.
2140b57cec5SDimitry Andric   /// @param i1 - The indent level to use with stream o1.
2150b57cec5SDimitry Andric   /// @param i2 - The indent level to use with stream o2.
2160b57cec5SDimitry Andric   /// @param ModRMTableNum - next table number for adding to ModRMTable.
217*0fca6ea1SDimitry Andric   void emitContextDecisions(raw_ostream &o1, raw_ostream &o2, unsigned &i1,
218*0fca6ea1SDimitry Andric                             unsigned &i2, unsigned &ModRMTableNum) const;
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric   /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
2210b57cec5SDimitry Andric   ///   ModRMDecision to refer to a particular instruction ID.
2220b57cec5SDimitry Andric   ///
2230b57cec5SDimitry Andric   /// @param decision - The ModRMDecision to populate.
2240b57cec5SDimitry Andric   /// @param filter   - The filter to use in deciding which entries to populate.
2250b57cec5SDimitry Andric   /// @param uid      - The unique ID to set matching entries to.
2260b57cec5SDimitry Andric   /// @param opcode   - The opcode of the instruction, for error reporting.
227*0fca6ea1SDimitry Andric   void setTableFields(ModRMDecision &decision, const ModRMFilter &filter,
228*0fca6ea1SDimitry Andric                       InstrUID uid, uint8_t opcode);
229*0fca6ea1SDimitry Andric 
2300b57cec5SDimitry Andric public:
2310b57cec5SDimitry Andric   /// Constructor - Allocates space for the class decisions and clears them.
2320b57cec5SDimitry Andric   DisassemblerTables();
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric   ~DisassemblerTables();
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric   /// emit - Emits the instruction table, context table, and class decisions.
2370b57cec5SDimitry Andric   ///
2380b57cec5SDimitry Andric   /// @param o  - The output stream to print the tables to.
2390b57cec5SDimitry Andric   void emit(raw_ostream &o) const;
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric   /// setTableFields - Uses the opcode type, instruction context, opcode, and a
2420b57cec5SDimitry Andric   ///   ModRMFilter as criteria to set a particular set of entries in the
2430b57cec5SDimitry Andric   ///   decode tables to point to a specific uid.
2440b57cec5SDimitry Andric   ///
2450b57cec5SDimitry Andric   /// @param type         - The opcode type (ONEBYTE, TWOBYTE, etc.)
2460b57cec5SDimitry Andric   /// @param insnContext  - The context to use (IC, IC_64BIT, etc.)
2470b57cec5SDimitry Andric   /// @param opcode       - The last byte of the opcode (not counting any escape
2480b57cec5SDimitry Andric   ///                       or extended opcodes).
249*0fca6ea1SDimitry Andric   /// @param filter       - The ModRMFilter that decides which ModR/M byte
250*0fca6ea1SDimitry Andric   /// values
2510b57cec5SDimitry Andric   ///                       correspond to the desired instruction.
2520b57cec5SDimitry Andric   /// @param uid          - The unique ID of the instruction.
2530b57cec5SDimitry Andric   /// @param is32bit      - Instructon is only 32-bit
2540b57cec5SDimitry Andric   /// @param noPrefix     - Instruction record has no prefix.
2550b57cec5SDimitry Andric   /// @param ignoresVEX_L - Instruction ignores VEX.L
2560b57cec5SDimitry Andric   /// @param ignoresVEX_W - Instruction ignores VEX.W
2570b57cec5SDimitry Andric   /// @param AddrSize     - Instructions address size 16/32/64. 0 is unspecified
258*0fca6ea1SDimitry Andric   void setTableFields(OpcodeType type, InstructionContext insnContext,
259*0fca6ea1SDimitry Andric                       uint8_t opcode, const ModRMFilter &filter, InstrUID uid,
260*0fca6ea1SDimitry Andric                       bool is32bit, bool noPrefix, bool ignoresVEX_L,
261*0fca6ea1SDimitry Andric                       bool ignoresVEX_W, unsigned AddrSize);
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric   /// specForUID - Returns the instruction specifier for a given unique
2640b57cec5SDimitry Andric   ///   instruction ID.  Used when resolving collisions.
2650b57cec5SDimitry Andric   ///
2660b57cec5SDimitry Andric   /// @param uid  - The unique ID of the instruction.
2670b57cec5SDimitry Andric   /// @return     - A reference to the instruction specifier.
specForUID(InstrUID uid)2680b57cec5SDimitry Andric   InstructionSpecifier &specForUID(InstrUID uid) {
2690b57cec5SDimitry Andric     if (uid >= InstructionSpecifiers.size())
2700b57cec5SDimitry Andric       InstructionSpecifiers.resize(uid + 1);
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric     return InstructionSpecifiers[uid];
2730b57cec5SDimitry Andric   }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   // hasConflicts - Reports whether there were primary decode conflicts
2760b57cec5SDimitry Andric   //   from any instructions added to the tables.
2770b57cec5SDimitry Andric   // @return  - true if there were; false otherwise.
2780b57cec5SDimitry Andric 
hasConflicts()279*0fca6ea1SDimitry Andric   bool hasConflicts() { return HasConflicts; }
2800b57cec5SDimitry Andric };
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric } // namespace X86Disassembler
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric } // namespace llvm
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric #endif
287