1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/BinaryFormat/XCOFF.h" 14 #include "llvm/MC/MCDisassembler/MCSymbolizer.h" 15 #include "llvm/Support/Error.h" 16 #include <cstdint> 17 #include <memory> 18 #include <vector> 19 20 namespace llvm { 21 22 struct XCOFFSymbolInfoTy { 23 std::optional<XCOFF::StorageMappingClass> StorageMappingClass; 24 std::optional<uint32_t> Index; 25 bool IsLabel = false; 26 bool operator<(const XCOFFSymbolInfoTy &SymInfo) const; 27 }; 28 29 struct SymbolInfoTy { 30 uint64_t Addr; 31 StringRef Name; 32 // XCOFF uses XCOFFSymInfo. Other targets use Type. 33 XCOFFSymbolInfoTy XCOFFSymInfo; 34 uint8_t Type; 35 // Used by ELF to describe a mapping symbol that is usually not displayed. 36 bool IsMappingSymbol; 37 38 private: 39 bool IsXCOFF; 40 bool HasType; 41 42 public: SymbolInfoTySymbolInfoTy43 SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr, 44 StringRef Name, std::optional<uint32_t> Idx, bool Label) 45 : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0), 46 IsMappingSymbol(false), IsXCOFF(true), HasType(false) {} 47 SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, 48 bool IsMappingSymbol = false, bool IsXCOFF = false) AddrSymbolInfoTy49 : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol), 50 IsXCOFF(IsXCOFF), HasType(true) {} isXCOFFSymbolInfoTy51 bool isXCOFF() const { return IsXCOFF; } 52 53 private: 54 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { 55 assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && 56 "The value of IsXCOFF and HasType in P1 and P2 should be the same " 57 "respectively."); 58 59 if (P1.IsXCOFF && P1.HasType) 60 return std::tie(P1.Addr, P1.Type, P1.Name) < 61 std::tie(P2.Addr, P2.Type, P2.Name); 62 63 if (P1.IsXCOFF) 64 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < 65 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); 66 67 // With the same address, place mapping symbols first. 68 bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol; 69 return std::tie(P1.Addr, MS1, P1.Name, P1.Type) < 70 std::tie(P2.Addr, MS2, P2.Name, P2.Type); 71 } 72 }; 73 74 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 75 76 template <typename T> class ArrayRef; 77 class MCContext; 78 class MCInst; 79 class MCSubtargetInfo; 80 class raw_ostream; 81 82 /// Superclass for all disassemblers. Consumes a memory region and provides an 83 /// array of assembly instructions. 84 class MCDisassembler { 85 public: 86 /// Ternary decode status. Most backends will just use Fail and 87 /// Success, however some have a concept of an instruction with 88 /// understandable semantics but which is architecturally 89 /// incorrect. An example of this is ARM UNPREDICTABLE instructions 90 /// which are disassemblable but cause undefined behaviour. 91 /// 92 /// Because it makes sense to disassemble these instructions, there 93 /// is a "soft fail" failure mode that indicates the MCInst& is 94 /// valid but architecturally incorrect. 95 /// 96 /// The enum numbers are deliberately chosen such that reduction 97 /// from Success->SoftFail ->Fail can be done with a simple 98 /// bitwise-AND: 99 /// 100 /// LEFT & TOP = | Success Unpredictable Fail 101 /// --------------+----------------------------------- 102 /// Success | Success Unpredictable Fail 103 /// Unpredictable | Unpredictable Unpredictable Fail 104 /// Fail | Fail Fail Fail 105 /// 106 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for 107 /// Success, SoftFail, Fail respectively. 108 enum DecodeStatus { 109 Fail = 0, 110 SoftFail = 1, 111 Success = 3 112 }; 113 MCDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx)114 MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) 115 : Ctx(Ctx), STI(STI) {} 116 117 virtual ~MCDisassembler(); 118 119 /// Returns the disassembly of a single instruction. 120 /// 121 /// \param Instr - An MCInst to populate with the contents of the 122 /// instruction. 123 /// \param Size - A value to populate with the size of the instruction, or 124 /// the number of bytes consumed while attempting to decode 125 /// an invalid instruction. 126 /// \param Address - The address, in the memory space of region, of the first 127 /// byte of the instruction. 128 /// \param Bytes - A reference to the actual bytes of the instruction. 129 /// \param CStream - The stream to print comments and annotations on. 130 /// \return - MCDisassembler::Success if the instruction is valid, 131 /// MCDisassembler::SoftFail if the instruction was 132 /// disassemblable but invalid, 133 /// MCDisassembler::Fail if the instruction was invalid. 134 virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 135 ArrayRef<uint8_t> Bytes, uint64_t Address, 136 raw_ostream &CStream) const = 0; 137 138 /// Used to perform separate target specific disassembly for a particular 139 /// symbol. May parse any prelude that precedes instructions after the 140 /// start of a symbol, or the entire symbol. 141 /// This is used for example by WebAssembly to decode preludes. 142 /// 143 /// Base implementation returns false. So all targets by default decline to 144 /// treat symbols separately. 145 /// 146 /// \param Symbol - The symbol. 147 /// \param Size - The number of bytes consumed. 148 /// \param Address - The address, in the memory space of region, of the first 149 /// byte of the symbol. 150 /// \param Bytes - A reference to the actual bytes at the symbol location. 151 /// \return - True if this symbol triggered some target specific 152 /// disassembly for this symbol. Size must be set with the 153 /// number of bytes consumed. 154 /// - Error if this symbol triggered some target specific 155 /// disassembly for this symbol, but an error was found with 156 /// it. Size must be set with the number of bytes consumed. 157 /// - False if the target doesn't want to handle the symbol 158 /// separately. The value of Size is ignored in this case, 159 /// and Err must not be set. 160 virtual Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 161 ArrayRef<uint8_t> Bytes, 162 uint64_t Address) const; 163 // TODO: 164 // Implement similar hooks that can be used at other points during 165 // disassembly. Something along the following lines: 166 // - onBeforeInstructionDecode() 167 // - onAfterInstructionDecode() 168 // - onSymbolEnd() 169 // It should help move much of the target specific code from llvm-objdump to 170 // respective target disassemblers. 171 172 /// Suggest a distance to skip in a buffer of data to find the next 173 /// place to look for the start of an instruction. For example, if 174 /// all instructions have a fixed alignment, this might advance to 175 /// the next multiple of that alignment. 176 /// 177 /// If not overridden, the default is 1. 178 /// 179 /// \param Address - The address, in the memory space of region, of the 180 /// starting point (typically the first byte of something 181 /// that did not decode as a valid instruction at all). 182 /// \param Bytes - A reference to the actual bytes at Address. May be 183 /// needed in order to determine the width of an 184 /// unrecognized instruction (e.g. in Thumb this is a simple 185 /// consistent criterion that doesn't require knowing the 186 /// specific instruction). The caller can pass as much data 187 /// as they have available, and the function is required to 188 /// make a reasonable default choice if not enough data is 189 /// available to make a better one. 190 /// \return - A number of bytes to skip. Must always be greater than 191 /// zero. May be greater than the size of Bytes. 192 virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes, 193 uint64_t Address) const; 194 195 private: 196 MCContext &Ctx; 197 198 protected: 199 // Subtarget information, for instruction decoding predicates if required. 200 const MCSubtargetInfo &STI; 201 std::unique_ptr<MCSymbolizer> Symbolizer; 202 203 public: 204 // Helpers around MCSymbolizer 205 bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, 206 bool IsBranch, uint64_t Offset, uint64_t OpSize, 207 uint64_t InstSize) const; 208 209 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; 210 211 /// Set \p Symzer as the current symbolizer. 212 /// This takes ownership of \p Symzer, and deletes the previously set one. 213 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); 214 getContext()215 MCContext& getContext() const { return Ctx; } 216 getSubtargetInfo()217 const MCSubtargetInfo& getSubtargetInfo() const { return STI; } 218 219 /// ELF-specific, set the ABI version from the object header. setABIVersion(unsigned Version)220 virtual void setABIVersion(unsigned Version) {} 221 222 // Marked mutable because we cache it inside the disassembler, rather than 223 // having to pass it around as an argument through all the autogenerated code. 224 mutable raw_ostream *CommentStream = nullptr; 225 }; 226 227 } // end namespace llvm 228 229 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 230