1 //===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Disassembler.h" 10 #include "llvm-c/Disassembler.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/ADT/SmallVector.h" 13 #include "llvm/MC/MCAsmInfo.h" 14 #include "llvm/MC/MCContext.h" 15 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 16 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" 17 #include "llvm/MC/MCDisassembler/MCSymbolizer.h" 18 #include "llvm/MC/MCInst.h" 19 #include "llvm/MC/MCInstPrinter.h" 20 #include "llvm/MC/MCInstrDesc.h" 21 #include "llvm/MC/MCInstrInfo.h" 22 #include "llvm/MC/MCInstrItineraries.h" 23 #include "llvm/MC/MCRegisterInfo.h" 24 #include "llvm/MC/MCSchedule.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/MC/MCTargetOptions.h" 27 #include "llvm/MC/TargetRegistry.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/FormattedStream.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include "llvm/TargetParser/Triple.h" 32 #include <cassert> 33 #include <cstring> 34 35 using namespace llvm; 36 37 // LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic 38 // disassembly is supported by passing a block of information in the DisInfo 39 // parameter and specifying the TagType and callback functions as described in 40 // the header llvm-c/Disassembler.h . The pointer to the block and the 41 // functions can all be passed as NULL. If successful, this returns a 42 // disassembler context. If not, it returns NULL. 43 // 44 LLVMDisasmContextRef 45 LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU, 46 const char *Features, void *DisInfo, int TagType, 47 LLVMOpInfoCallback GetOpInfo, 48 LLVMSymbolLookupCallback SymbolLookUp) { 49 // Get the target. 50 std::string Error; 51 const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); 52 if (!TheTarget) 53 return nullptr; 54 55 std::unique_ptr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT)); 56 if (!MRI) 57 return nullptr; 58 59 MCTargetOptions MCOptions; 60 // Get the assembler info needed to setup the MCContext. 61 std::unique_ptr<const MCAsmInfo> MAI( 62 TheTarget->createMCAsmInfo(*MRI, TT, MCOptions)); 63 if (!MAI) 64 return nullptr; 65 66 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 67 if (!MII) 68 return nullptr; 69 70 std::unique_ptr<const MCSubtargetInfo> STI( 71 TheTarget->createMCSubtargetInfo(TT, CPU, Features)); 72 if (!STI) 73 return nullptr; 74 75 // Set up the MCContext for creating symbols and MCExpr's. 76 std::unique_ptr<MCContext> Ctx( 77 new MCContext(Triple(TT), MAI.get(), MRI.get(), STI.get())); 78 if (!Ctx) 79 return nullptr; 80 81 // Set up disassembler. 82 std::unique_ptr<MCDisassembler> DisAsm( 83 TheTarget->createMCDisassembler(*STI, *Ctx)); 84 if (!DisAsm) 85 return nullptr; 86 87 std::unique_ptr<MCRelocationInfo> RelInfo( 88 TheTarget->createMCRelocationInfo(TT, *Ctx)); 89 if (!RelInfo) 90 return nullptr; 91 92 std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer( 93 TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx.get(), std::move(RelInfo))); 94 DisAsm->setSymbolizer(std::move(Symbolizer)); 95 96 // Set up the instruction printer. 97 int AsmPrinterVariant = MAI->getAssemblerDialect(); 98 std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( 99 Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI)); 100 if (!IP) 101 return nullptr; 102 103 LLVMDisasmContext *DC = new LLVMDisasmContext( 104 TT, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, std::move(MAI), 105 std::move(MRI), std::move(STI), std::move(MII), std::move(Ctx), 106 std::move(DisAsm), std::move(IP)); 107 if (!DC) 108 return nullptr; 109 110 DC->setCPU(CPU); 111 return DC; 112 } 113 114 LLVMDisasmContextRef 115 LLVMCreateDisasmCPU(const char *TT, const char *CPU, void *DisInfo, int TagType, 116 LLVMOpInfoCallback GetOpInfo, 117 LLVMSymbolLookupCallback SymbolLookUp) { 118 return LLVMCreateDisasmCPUFeatures(TT, CPU, "", DisInfo, TagType, GetOpInfo, 119 SymbolLookUp); 120 } 121 122 LLVMDisasmContextRef LLVMCreateDisasm(const char *TT, void *DisInfo, 123 int TagType, LLVMOpInfoCallback GetOpInfo, 124 LLVMSymbolLookupCallback SymbolLookUp) { 125 return LLVMCreateDisasmCPUFeatures(TT, "", "", DisInfo, TagType, GetOpInfo, 126 SymbolLookUp); 127 } 128 129 // 130 // LLVMDisasmDispose() disposes of the disassembler specified by the context. 131 // 132 void LLVMDisasmDispose(LLVMDisasmContextRef DCR){ 133 LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); 134 delete DC; 135 } 136 137 /// Emits the comments that are stored in \p DC comment stream. 138 /// Each comment in the comment stream must end with a newline. 139 static void emitComments(LLVMDisasmContext *DC, 140 formatted_raw_ostream &FormattedOS) { 141 // Flush the stream before taking its content. 142 StringRef Comments = DC->CommentsToEmit.str(); 143 // Get the default information for printing a comment. 144 const MCAsmInfo *MAI = DC->getAsmInfo(); 145 StringRef CommentBegin = MAI->getCommentString(); 146 unsigned CommentColumn = MAI->getCommentColumn(); 147 bool IsFirst = true; 148 while (!Comments.empty()) { 149 if (!IsFirst) 150 FormattedOS << '\n'; 151 // Emit a line of comments. 152 FormattedOS.PadToColumn(CommentColumn); 153 size_t Position = Comments.find('\n'); 154 FormattedOS << CommentBegin << ' ' << Comments.substr(0, Position); 155 // Move after the newline character. 156 Comments = Comments.substr(Position+1); 157 IsFirst = false; 158 } 159 FormattedOS.flush(); 160 161 // Tell the comment stream that the vector changed underneath it. 162 DC->CommentsToEmit.clear(); 163 } 164 165 /// Gets latency information for \p Inst from the itinerary 166 /// scheduling model, based on \p DC information. 167 /// \return The maximum expected latency over all the operands or -1 168 /// if no information is available. 169 static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) { 170 const int NoInformationAvailable = -1; 171 172 // Check if we have a CPU to get the itinerary information. 173 if (DC->getCPU().empty()) 174 return NoInformationAvailable; 175 176 // Get itinerary information. 177 const MCSubtargetInfo *STI = DC->getSubtargetInfo(); 178 InstrItineraryData IID = STI->getInstrItineraryForCPU(DC->getCPU()); 179 // Get the scheduling class of the requested instruction. 180 const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode()); 181 unsigned SCClass = Desc.getSchedClass(); 182 183 unsigned Latency = 0; 184 185 for (unsigned Idx = 0, IdxEnd = Inst.getNumOperands(); Idx != IdxEnd; ++Idx) 186 if (std::optional<unsigned> OperCycle = IID.getOperandCycle(SCClass, Idx)) 187 Latency = std::max(Latency, *OperCycle); 188 189 return (int)Latency; 190 } 191 192 /// Gets latency information for \p Inst, based on \p DC information. 193 /// \return The maximum expected latency over all the definitions or -1 194 /// if no information is available. 195 static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) { 196 // Try to compute scheduling information. 197 const MCSubtargetInfo *STI = DC->getSubtargetInfo(); 198 const MCSchedModel SCModel = STI->getSchedModel(); 199 const int NoInformationAvailable = -1; 200 201 // Check if we have a scheduling model for instructions. 202 if (!SCModel.hasInstrSchedModel()) 203 // Try to fall back to the itinerary model if the scheduling model doesn't 204 // have a scheduling table. Note the default does not have a table. 205 return getItineraryLatency(DC, Inst); 206 207 // Get the scheduling class of the requested instruction. 208 const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode()); 209 unsigned SCClass = Desc.getSchedClass(); 210 const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass); 211 // Resolving the variant SchedClass requires an MI to pass to 212 // SubTargetInfo::resolveSchedClass. 213 if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant()) 214 return NoInformationAvailable; 215 216 // Compute output latency. 217 int16_t Latency = 0; 218 for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; 219 DefIdx != DefEnd; ++DefIdx) { 220 // Lookup the definition's write latency in SubtargetInfo. 221 const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, 222 DefIdx); 223 Latency = std::max(Latency, WLEntry->Cycles); 224 } 225 226 return Latency; 227 } 228 229 /// Emits latency information in DC->CommentStream for \p Inst, based 230 /// on the information available in \p DC. 231 static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) { 232 int Latency = getLatency(DC, Inst); 233 234 // Report only interesting latencies. 235 if (Latency < 2) 236 return; 237 238 DC->CommentStream << "Latency: " << Latency << '\n'; 239 } 240 241 // 242 // LLVMDisasmInstruction() disassembles a single instruction using the 243 // disassembler context specified in the parameter DC. The bytes of the 244 // instruction are specified in the parameter Bytes, and contains at least 245 // BytesSize number of bytes. The instruction is at the address specified by 246 // the PC parameter. If a valid instruction can be disassembled its string is 247 // returned indirectly in OutString which whos size is specified in the 248 // parameter OutStringSize. This function returns the number of bytes in the 249 // instruction or zero if there was no valid instruction. If this function 250 // returns zero the caller will have to pick how many bytes they want to step 251 // over by printing a .byte, .long etc. to continue. 252 // 253 size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, 254 uint64_t BytesSize, uint64_t PC, char *OutString, 255 size_t OutStringSize){ 256 LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); 257 // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject. 258 ArrayRef<uint8_t> Data(Bytes, BytesSize); 259 260 uint64_t Size; 261 MCInst Inst; 262 const MCDisassembler *DisAsm = DC->getDisAsm(); 263 MCInstPrinter *IP = DC->getIP(); 264 MCDisassembler::DecodeStatus S; 265 SmallVector<char, 64> InsnStr; 266 raw_svector_ostream Annotations(InsnStr); 267 S = DisAsm->getInstruction(Inst, Size, Data, PC, Annotations); 268 switch (S) { 269 case MCDisassembler::Fail: 270 case MCDisassembler::SoftFail: 271 // FIXME: Do something different for soft failure modes? 272 return 0; 273 274 case MCDisassembler::Success: { 275 StringRef AnnotationsStr = Annotations.str(); 276 277 SmallVector<char, 64> InsnStr; 278 raw_svector_ostream OS(InsnStr); 279 formatted_raw_ostream FormattedOS(OS); 280 IP->printInst(&Inst, PC, AnnotationsStr, *DC->getSubtargetInfo(), 281 FormattedOS); 282 283 if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency) 284 emitLatency(DC, Inst); 285 286 emitComments(DC, FormattedOS); 287 288 assert(OutStringSize != 0 && "Output buffer cannot be zero size"); 289 size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); 290 std::memcpy(OutString, InsnStr.data(), OutputSize); 291 OutString[OutputSize] = '\0'; // Terminate string. 292 293 return Size; 294 } 295 } 296 llvm_unreachable("Invalid DecodeStatus!"); 297 } 298 299 // 300 // LLVMSetDisasmOptions() sets the disassembler's options. It returns 1 if it 301 // can set all the Options and 0 otherwise. 302 // 303 int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){ 304 if (Options & LLVMDisassembler_Option_UseMarkup){ 305 LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); 306 MCInstPrinter *IP = DC->getIP(); 307 IP->setUseMarkup(true); 308 DC->addOptions(LLVMDisassembler_Option_UseMarkup); 309 Options &= ~LLVMDisassembler_Option_UseMarkup; 310 } 311 if (Options & LLVMDisassembler_Option_PrintImmHex){ 312 LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); 313 MCInstPrinter *IP = DC->getIP(); 314 IP->setPrintImmHex(true); 315 DC->addOptions(LLVMDisassembler_Option_PrintImmHex); 316 Options &= ~LLVMDisassembler_Option_PrintImmHex; 317 } 318 if (Options & LLVMDisassembler_Option_AsmPrinterVariant){ 319 LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); 320 // Try to set up the new instruction printer. 321 const MCAsmInfo *MAI = DC->getAsmInfo(); 322 const MCInstrInfo *MII = DC->getInstrInfo(); 323 const MCRegisterInfo *MRI = DC->getRegisterInfo(); 324 int AsmPrinterVariant = MAI->getAssemblerDialect(); 325 AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0; 326 MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter( 327 Triple(DC->getTripleName()), AsmPrinterVariant, *MAI, *MII, *MRI); 328 if (IP) { 329 DC->setIP(IP); 330 DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant); 331 Options &= ~LLVMDisassembler_Option_AsmPrinterVariant; 332 } 333 } 334 if (Options & LLVMDisassembler_Option_SetInstrComments) { 335 LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); 336 MCInstPrinter *IP = DC->getIP(); 337 IP->setCommentStream(DC->CommentStream); 338 DC->addOptions(LLVMDisassembler_Option_SetInstrComments); 339 Options &= ~LLVMDisassembler_Option_SetInstrComments; 340 } 341 if (Options & LLVMDisassembler_Option_PrintLatency) { 342 LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); 343 DC->addOptions(LLVMDisassembler_Option_PrintLatency); 344 Options &= ~LLVMDisassembler_Option_PrintLatency; 345 } 346 return (Options == 0); 347 } 348