10b57cec5SDimitry Andric //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 100b57cec5SDimitry Andric #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 110b57cec5SDimitry Andric 125ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h" 135ffd83dbSDimitry Andric #include "llvm/BinaryFormat/XCOFF.h" 140b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCSymbolizer.h" 15*0fca6ea1SDimitry Andric #include "llvm/Support/Error.h" 160b57cec5SDimitry Andric #include <cstdint> 170b57cec5SDimitry Andric #include <memory> 185ffd83dbSDimitry Andric #include <vector> 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric namespace llvm { 210b57cec5SDimitry Andric 22bdd1243dSDimitry Andric struct XCOFFSymbolInfoTy { 23bdd1243dSDimitry Andric std::optional<XCOFF::StorageMappingClass> StorageMappingClass; 24bdd1243dSDimitry Andric std::optional<uint32_t> Index; 25bdd1243dSDimitry Andric bool IsLabel = false; 26bdd1243dSDimitry Andric bool operator<(const XCOFFSymbolInfoTy &SymInfo) const; 275ffd83dbSDimitry Andric }; 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric struct SymbolInfoTy { 305ffd83dbSDimitry Andric uint64_t Addr; 315ffd83dbSDimitry Andric StringRef Name; 32bdd1243dSDimitry Andric // XCOFF uses XCOFFSymInfo. Other targets use Type. 33bdd1243dSDimitry Andric XCOFFSymbolInfoTy XCOFFSymInfo; 345ffd83dbSDimitry Andric uint8_t Type; 355f757f3fSDimitry Andric // Used by ELF to describe a mapping symbol that is usually not displayed. 365f757f3fSDimitry Andric bool IsMappingSymbol; 375ffd83dbSDimitry Andric 385ffd83dbSDimitry Andric private: 395ffd83dbSDimitry Andric bool IsXCOFF; 4081ad6265SDimitry Andric bool HasType; 415ffd83dbSDimitry Andric 425ffd83dbSDimitry Andric public: SymbolInfoTySymbolInfoTy435f757f3fSDimitry Andric SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr, 445f757f3fSDimitry Andric StringRef Name, std::optional<uint32_t> Idx, bool Label) 45bdd1243dSDimitry Andric : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0), 465f757f3fSDimitry Andric IsMappingSymbol(false), IsXCOFF(true), HasType(false) {} 4781ad6265SDimitry Andric SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, 485f757f3fSDimitry Andric bool IsMappingSymbol = false, bool IsXCOFF = false) AddrSymbolInfoTy495f757f3fSDimitry Andric : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol), 505f757f3fSDimitry Andric IsXCOFF(IsXCOFF), HasType(true) {} isXCOFFSymbolInfoTy515ffd83dbSDimitry Andric bool isXCOFF() const { return IsXCOFF; } 525ffd83dbSDimitry Andric 535ffd83dbSDimitry Andric private: 545ffd83dbSDimitry Andric friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { 5581ad6265SDimitry Andric assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && 5681ad6265SDimitry Andric "The value of IsXCOFF and HasType in P1 and P2 should be the same " 5781ad6265SDimitry Andric "respectively."); 5881ad6265SDimitry Andric 5981ad6265SDimitry Andric if (P1.IsXCOFF && P1.HasType) 6081ad6265SDimitry Andric return std::tie(P1.Addr, P1.Type, P1.Name) < 6181ad6265SDimitry Andric std::tie(P2.Addr, P2.Type, P2.Name); 6281ad6265SDimitry Andric 635ffd83dbSDimitry Andric if (P1.IsXCOFF) 645ffd83dbSDimitry Andric return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < 655ffd83dbSDimitry Andric std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); 665ffd83dbSDimitry Andric 675f757f3fSDimitry Andric // With the same address, place mapping symbols first. 685f757f3fSDimitry Andric bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol; 695f757f3fSDimitry Andric return std::tie(P1.Addr, MS1, P1.Name, P1.Type) < 705f757f3fSDimitry Andric std::tie(P2.Addr, MS2, P2.Name, P2.Type); 715ffd83dbSDimitry Andric } 725ffd83dbSDimitry Andric }; 735ffd83dbSDimitry Andric 745ffd83dbSDimitry Andric using SectionSymbolsTy = std::vector<SymbolInfoTy>; 755ffd83dbSDimitry Andric 760b57cec5SDimitry Andric template <typename T> class ArrayRef; 770b57cec5SDimitry Andric class MCContext; 780b57cec5SDimitry Andric class MCInst; 790b57cec5SDimitry Andric class MCSubtargetInfo; 800b57cec5SDimitry Andric class raw_ostream; 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric /// Superclass for all disassemblers. Consumes a memory region and provides an 830b57cec5SDimitry Andric /// array of assembly instructions. 840b57cec5SDimitry Andric class MCDisassembler { 850b57cec5SDimitry Andric public: 860b57cec5SDimitry Andric /// Ternary decode status. Most backends will just use Fail and 870b57cec5SDimitry Andric /// Success, however some have a concept of an instruction with 880b57cec5SDimitry Andric /// understandable semantics but which is architecturally 890b57cec5SDimitry Andric /// incorrect. An example of this is ARM UNPREDICTABLE instructions 900b57cec5SDimitry Andric /// which are disassemblable but cause undefined behaviour. 910b57cec5SDimitry Andric /// 920b57cec5SDimitry Andric /// Because it makes sense to disassemble these instructions, there 930b57cec5SDimitry Andric /// is a "soft fail" failure mode that indicates the MCInst& is 940b57cec5SDimitry Andric /// valid but architecturally incorrect. 950b57cec5SDimitry Andric /// 960b57cec5SDimitry Andric /// The enum numbers are deliberately chosen such that reduction 970b57cec5SDimitry Andric /// from Success->SoftFail ->Fail can be done with a simple 980b57cec5SDimitry Andric /// bitwise-AND: 990b57cec5SDimitry Andric /// 1000b57cec5SDimitry Andric /// LEFT & TOP = | Success Unpredictable Fail 1010b57cec5SDimitry Andric /// --------------+----------------------------------- 1020b57cec5SDimitry Andric /// Success | Success Unpredictable Fail 1030b57cec5SDimitry Andric /// Unpredictable | Unpredictable Unpredictable Fail 1040b57cec5SDimitry Andric /// Fail | Fail Fail Fail 1050b57cec5SDimitry Andric /// 1060b57cec5SDimitry Andric /// An easy way of encoding this is as 0b11, 0b01, 0b00 for 1070b57cec5SDimitry Andric /// Success, SoftFail, Fail respectively. 1080b57cec5SDimitry Andric enum DecodeStatus { 1090b57cec5SDimitry Andric Fail = 0, 1100b57cec5SDimitry Andric SoftFail = 1, 1110b57cec5SDimitry Andric Success = 3 1120b57cec5SDimitry Andric }; 1130b57cec5SDimitry Andric MCDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx)1140b57cec5SDimitry Andric MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) 1150b57cec5SDimitry Andric : Ctx(Ctx), STI(STI) {} 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric virtual ~MCDisassembler(); 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric /// Returns the disassembly of a single instruction. 1200b57cec5SDimitry Andric /// 1210b57cec5SDimitry Andric /// \param Instr - An MCInst to populate with the contents of the 1220b57cec5SDimitry Andric /// instruction. 1230b57cec5SDimitry Andric /// \param Size - A value to populate with the size of the instruction, or 1240b57cec5SDimitry Andric /// the number of bytes consumed while attempting to decode 1250b57cec5SDimitry Andric /// an invalid instruction. 1260b57cec5SDimitry Andric /// \param Address - The address, in the memory space of region, of the first 1270b57cec5SDimitry Andric /// byte of the instruction. 1280b57cec5SDimitry Andric /// \param Bytes - A reference to the actual bytes of the instruction. 1290b57cec5SDimitry Andric /// \param CStream - The stream to print comments and annotations on. 1300b57cec5SDimitry Andric /// \return - MCDisassembler::Success if the instruction is valid, 1310b57cec5SDimitry Andric /// MCDisassembler::SoftFail if the instruction was 1320b57cec5SDimitry Andric /// disassemblable but invalid, 1330b57cec5SDimitry Andric /// MCDisassembler::Fail if the instruction was invalid. 1340b57cec5SDimitry Andric virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 1350b57cec5SDimitry Andric ArrayRef<uint8_t> Bytes, uint64_t Address, 1360b57cec5SDimitry Andric raw_ostream &CStream) const = 0; 1370b57cec5SDimitry Andric 1385ffd83dbSDimitry Andric /// Used to perform separate target specific disassembly for a particular 1395ffd83dbSDimitry Andric /// symbol. May parse any prelude that precedes instructions after the 1405ffd83dbSDimitry Andric /// start of a symbol, or the entire symbol. 1415ffd83dbSDimitry Andric /// This is used for example by WebAssembly to decode preludes. 1420b57cec5SDimitry Andric /// 143*0fca6ea1SDimitry Andric /// Base implementation returns false. So all targets by default decline to 144*0fca6ea1SDimitry Andric /// treat symbols separately. 1455ffd83dbSDimitry Andric /// 1465ffd83dbSDimitry Andric /// \param Symbol - The symbol. 1470b57cec5SDimitry Andric /// \param Size - The number of bytes consumed. 1480b57cec5SDimitry Andric /// \param Address - The address, in the memory space of region, of the first 1490b57cec5SDimitry Andric /// byte of the symbol. 1500b57cec5SDimitry Andric /// \param Bytes - A reference to the actual bytes at the symbol location. 151*0fca6ea1SDimitry Andric /// \return - True if this symbol triggered some target specific 152*0fca6ea1SDimitry Andric /// disassembly for this symbol. Size must be set with the 153*0fca6ea1SDimitry Andric /// number of bytes consumed. 154*0fca6ea1SDimitry Andric /// - Error if this symbol triggered some target specific 155*0fca6ea1SDimitry Andric /// disassembly for this symbol, but an error was found with 156*0fca6ea1SDimitry Andric /// it. Size must be set with the number of bytes consumed. 157*0fca6ea1SDimitry Andric /// - False if the target doesn't want to handle the symbol 158*0fca6ea1SDimitry Andric /// separately. The value of Size is ignored in this case, 159*0fca6ea1SDimitry Andric /// and Err must not be set. 160*0fca6ea1SDimitry Andric virtual Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 161*0fca6ea1SDimitry Andric ArrayRef<uint8_t> Bytes, 162*0fca6ea1SDimitry Andric uint64_t Address) const; 1635ffd83dbSDimitry Andric // TODO: 1645ffd83dbSDimitry Andric // Implement similar hooks that can be used at other points during 1655ffd83dbSDimitry Andric // disassembly. Something along the following lines: 1665ffd83dbSDimitry Andric // - onBeforeInstructionDecode() 1675ffd83dbSDimitry Andric // - onAfterInstructionDecode() 1685ffd83dbSDimitry Andric // - onSymbolEnd() 1695ffd83dbSDimitry Andric // It should help move much of the target specific code from llvm-objdump to 1705ffd83dbSDimitry Andric // respective target disassemblers. 1710b57cec5SDimitry Andric 172972a253aSDimitry Andric /// Suggest a distance to skip in a buffer of data to find the next 173972a253aSDimitry Andric /// place to look for the start of an instruction. For example, if 174972a253aSDimitry Andric /// all instructions have a fixed alignment, this might advance to 175972a253aSDimitry Andric /// the next multiple of that alignment. 176972a253aSDimitry Andric /// 177972a253aSDimitry Andric /// If not overridden, the default is 1. 178972a253aSDimitry Andric /// 179972a253aSDimitry Andric /// \param Address - The address, in the memory space of region, of the 180972a253aSDimitry Andric /// starting point (typically the first byte of something 181972a253aSDimitry Andric /// that did not decode as a valid instruction at all). 182972a253aSDimitry Andric /// \param Bytes - A reference to the actual bytes at Address. May be 183972a253aSDimitry Andric /// needed in order to determine the width of an 184972a253aSDimitry Andric /// unrecognized instruction (e.g. in Thumb this is a simple 185972a253aSDimitry Andric /// consistent criterion that doesn't require knowing the 186972a253aSDimitry Andric /// specific instruction). The caller can pass as much data 187972a253aSDimitry Andric /// as they have available, and the function is required to 188972a253aSDimitry Andric /// make a reasonable default choice if not enough data is 189972a253aSDimitry Andric /// available to make a better one. 190972a253aSDimitry Andric /// \return - A number of bytes to skip. Must always be greater than 191972a253aSDimitry Andric /// zero. May be greater than the size of Bytes. 192972a253aSDimitry Andric virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes, 193972a253aSDimitry Andric uint64_t Address) const; 194972a253aSDimitry Andric 1950b57cec5SDimitry Andric private: 1960b57cec5SDimitry Andric MCContext &Ctx; 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric protected: 1990b57cec5SDimitry Andric // Subtarget information, for instruction decoding predicates if required. 2000b57cec5SDimitry Andric const MCSubtargetInfo &STI; 2010b57cec5SDimitry Andric std::unique_ptr<MCSymbolizer> Symbolizer; 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric public: 2040b57cec5SDimitry Andric // Helpers around MCSymbolizer 20581ad6265SDimitry Andric bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, 20681ad6265SDimitry Andric bool IsBranch, uint64_t Offset, uint64_t OpSize, 20781ad6265SDimitry Andric uint64_t InstSize) const; 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric /// Set \p Symzer as the current symbolizer. 2120b57cec5SDimitry Andric /// This takes ownership of \p Symzer, and deletes the previously set one. 2130b57cec5SDimitry Andric void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); 2140b57cec5SDimitry Andric getContext()2150b57cec5SDimitry Andric MCContext& getContext() const { return Ctx; } 2160b57cec5SDimitry Andric getSubtargetInfo()2170b57cec5SDimitry Andric const MCSubtargetInfo& getSubtargetInfo() const { return STI; } 2180b57cec5SDimitry Andric 219*0fca6ea1SDimitry Andric /// ELF-specific, set the ABI version from the object header. setABIVersion(unsigned Version)220*0fca6ea1SDimitry Andric virtual void setABIVersion(unsigned Version) {} 221*0fca6ea1SDimitry Andric 2220b57cec5SDimitry Andric // Marked mutable because we cache it inside the disassembler, rather than 2230b57cec5SDimitry Andric // having to pass it around as an argument through all the autogenerated code. 2240b57cec5SDimitry Andric mutable raw_ostream *CommentStream = nullptr; 2250b57cec5SDimitry Andric }; 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric } // end namespace llvm 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 230