1 //===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "CodeGenTarget.h" 10 #include "TableGenBackends.h" 11 #include "WebAssemblyDisassemblerEmitter.h" 12 #include "X86DisassemblerTables.h" 13 #include "X86RecognizableInstr.h" 14 #include "llvm/TableGen/Error.h" 15 #include "llvm/TableGen/Record.h" 16 #include "llvm/TableGen/TableGenBackend.h" 17 18 using namespace llvm; 19 using namespace llvm::X86Disassembler; 20 21 /// DisassemblerEmitter - Contains disassembler table emitters for various 22 /// architectures. 23 24 /// X86 Disassembler Emitter 25 /// 26 /// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR 27 /// THE END OF THIS COMMENT! 28 /// 29 /// The X86 disassembler emitter is part of the X86 Disassembler, which is 30 /// documented in lib/Target/X86/X86Disassembler.h. 31 /// 32 /// The emitter produces the tables that the disassembler uses to translate 33 /// instructions. The emitter generates the following tables: 34 /// 35 /// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to 36 /// instruction contexts. Although for each attribute there are cases where 37 /// that attribute determines decoding, in the majority of cases decoding is 38 /// the same whether or not an attribute is present. For example, a 64-bit 39 /// instruction with an OPSIZE prefix and an XS prefix decodes the same way in 40 /// all cases as a 64-bit instruction with only OPSIZE set. (The XS prefix 41 /// may have effects on its execution, but does not change the instruction 42 /// returned.) This allows considerable space savings in other tables. 43 /// - Six tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM, 44 /// THREEBYTEA6_SYM, and THREEBYTEA7_SYM contain the hierarchy that the 45 /// decoder traverses while decoding an instruction. At the lowest level of 46 /// this hierarchy are instruction UIDs, 16-bit integers that can be used to 47 /// uniquely identify the instruction and correspond exactly to its position 48 /// in the list of CodeGenInstructions for the target. 49 /// - One table (INSTRUCTIONS_SYM) contains information about the operands of 50 /// each instruction and how to decode them. 51 /// 52 /// During table generation, there may be conflicts between instructions that 53 /// occupy the same space in the decode tables. These conflicts are resolved as 54 /// follows in setTableFields() (X86DisassemblerTables.cpp) 55 /// 56 /// - If the current context is the native context for one of the instructions 57 /// (that is, the attributes specified for it in the LLVM tables specify 58 /// precisely the current context), then it has priority. 59 /// - If the current context isn't native for either of the instructions, then 60 /// the higher-priority context wins (that is, the one that is more specific). 61 /// That hierarchy is determined by outranks() (X86DisassemblerTables.cpp) 62 /// - If the current context is native for both instructions, then the table 63 /// emitter reports a conflict and dies. 64 /// 65 /// *** RESOLUTION FOR "Primary decode conflict"S 66 /// 67 /// If two instructions collide, typically the solution is (in order of 68 /// likelihood): 69 /// 70 /// (1) to filter out one of the instructions by editing filter() 71 /// (X86RecognizableInstr.cpp). This is the most common resolution, but 72 /// check the Intel manuals first to make sure that (2) and (3) are not the 73 /// problem. 74 /// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are 75 /// accurate. Sometimes they are not. 76 /// (3) to fix the tables to reflect the actual context (for example, required 77 /// prefixes), and possibly to add a new context by editing 78 /// include/llvm/Support/X86DisassemblerDecoderCommon.h. This is unlikely 79 /// to be the cause. 80 /// 81 /// DisassemblerEmitter.cpp contains the implementation for the emitter, 82 /// which simply pulls out instructions from the CodeGenTarget and pushes them 83 /// into X86DisassemblerTables. 84 /// X86DisassemblerTables.h contains the interface for the instruction tables, 85 /// which manage and emit the structures discussed above. 86 /// X86DisassemblerTables.cpp contains the implementation for the instruction 87 /// tables. 88 /// X86ModRMFilters.h contains filters that can be used to determine which 89 /// ModR/M values are valid for a particular instruction. These are used to 90 /// populate ModRMDecisions. 91 /// X86RecognizableInstr.h contains the interface for a single instruction, 92 /// which knows how to translate itself from a CodeGenInstruction and provide 93 /// the information necessary for integration into the tables. 94 /// X86RecognizableInstr.cpp contains the implementation for a single 95 /// instruction. 96 97 static void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { 98 CodeGenTarget Target(Records); 99 emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS); 100 101 // X86 uses a custom disassembler. 102 if (Target.getName() == "X86") { 103 DisassemblerTables Tables; 104 105 ArrayRef<const CodeGenInstruction*> numberedInstructions = 106 Target.getInstructionsByEnumValue(); 107 108 for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i) 109 RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i); 110 111 if (Tables.hasConflicts()) { 112 PrintError(Target.getTargetRecord()->getLoc(), "Primary decode conflict"); 113 return; 114 } 115 116 Tables.emit(OS); 117 return; 118 } 119 120 // WebAssembly has variable length opcodes, so can't use EmitFixedLenDecoder 121 // below (which depends on a Size table-gen Record), and also uses a custom 122 // disassembler. 123 if (Target.getName() == "WebAssembly") { 124 emitWebAssemblyDisassemblerTables(OS, Target.getInstructionsByEnumValue()); 125 return; 126 } 127 128 std::string PredicateNamespace = std::string(Target.getName()); 129 if (PredicateNamespace == "Thumb") 130 PredicateNamespace = "ARM"; 131 EmitDecoder(Records, OS, PredicateNamespace); 132 } 133 134 static TableGen::Emitter::Opt X("gen-disassembler", EmitDisassembler, 135 "Generate disassembler"); 136