1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/BinaryFormat/XCOFF.h" 14 #include "llvm/MC/MCDisassembler/MCSymbolizer.h" 15 #include <cstdint> 16 #include <memory> 17 #include <vector> 18 19 namespace llvm { 20 21 struct XCOFFSymbolInfoTy { 22 std::optional<XCOFF::StorageMappingClass> StorageMappingClass; 23 std::optional<uint32_t> Index; 24 bool IsLabel = false; 25 bool operator<(const XCOFFSymbolInfoTy &SymInfo) const; 26 }; 27 28 struct SymbolInfoTy { 29 uint64_t Addr; 30 StringRef Name; 31 // XCOFF uses XCOFFSymInfo. Other targets use Type. 32 XCOFFSymbolInfoTy XCOFFSymInfo; 33 uint8_t Type; 34 // Used by ELF to describe a mapping symbol that is usually not displayed. 35 bool IsMappingSymbol; 36 37 private: 38 bool IsXCOFF; 39 bool HasType; 40 41 public: 42 SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr, 43 StringRef Name, std::optional<uint32_t> Idx, bool Label) 44 : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0), 45 IsMappingSymbol(false), IsXCOFF(true), HasType(false) {} 46 SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, 47 bool IsMappingSymbol = false, bool IsXCOFF = false) 48 : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol), 49 IsXCOFF(IsXCOFF), HasType(true) {} 50 bool isXCOFF() const { return IsXCOFF; } 51 52 private: 53 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { 54 assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && 55 "The value of IsXCOFF and HasType in P1 and P2 should be the same " 56 "respectively."); 57 58 if (P1.IsXCOFF && P1.HasType) 59 return std::tie(P1.Addr, P1.Type, P1.Name) < 60 std::tie(P2.Addr, P2.Type, P2.Name); 61 62 if (P1.IsXCOFF) 63 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < 64 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); 65 66 // With the same address, place mapping symbols first. 67 bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol; 68 return std::tie(P1.Addr, MS1, P1.Name, P1.Type) < 69 std::tie(P2.Addr, MS2, P2.Name, P2.Type); 70 } 71 }; 72 73 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 74 75 template <typename T> class ArrayRef; 76 class MCContext; 77 class MCInst; 78 class MCSubtargetInfo; 79 class raw_ostream; 80 81 /// Superclass for all disassemblers. Consumes a memory region and provides an 82 /// array of assembly instructions. 83 class MCDisassembler { 84 public: 85 /// Ternary decode status. Most backends will just use Fail and 86 /// Success, however some have a concept of an instruction with 87 /// understandable semantics but which is architecturally 88 /// incorrect. An example of this is ARM UNPREDICTABLE instructions 89 /// which are disassemblable but cause undefined behaviour. 90 /// 91 /// Because it makes sense to disassemble these instructions, there 92 /// is a "soft fail" failure mode that indicates the MCInst& is 93 /// valid but architecturally incorrect. 94 /// 95 /// The enum numbers are deliberately chosen such that reduction 96 /// from Success->SoftFail ->Fail can be done with a simple 97 /// bitwise-AND: 98 /// 99 /// LEFT & TOP = | Success Unpredictable Fail 100 /// --------------+----------------------------------- 101 /// Success | Success Unpredictable Fail 102 /// Unpredictable | Unpredictable Unpredictable Fail 103 /// Fail | Fail Fail Fail 104 /// 105 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for 106 /// Success, SoftFail, Fail respectively. 107 enum DecodeStatus { 108 Fail = 0, 109 SoftFail = 1, 110 Success = 3 111 }; 112 113 MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) 114 : Ctx(Ctx), STI(STI) {} 115 116 virtual ~MCDisassembler(); 117 118 /// Returns the disassembly of a single instruction. 119 /// 120 /// \param Instr - An MCInst to populate with the contents of the 121 /// instruction. 122 /// \param Size - A value to populate with the size of the instruction, or 123 /// the number of bytes consumed while attempting to decode 124 /// an invalid instruction. 125 /// \param Address - The address, in the memory space of region, of the first 126 /// byte of the instruction. 127 /// \param Bytes - A reference to the actual bytes of the instruction. 128 /// \param CStream - The stream to print comments and annotations on. 129 /// \return - MCDisassembler::Success if the instruction is valid, 130 /// MCDisassembler::SoftFail if the instruction was 131 /// disassemblable but invalid, 132 /// MCDisassembler::Fail if the instruction was invalid. 133 virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 134 ArrayRef<uint8_t> Bytes, uint64_t Address, 135 raw_ostream &CStream) const = 0; 136 137 /// Used to perform separate target specific disassembly for a particular 138 /// symbol. May parse any prelude that precedes instructions after the 139 /// start of a symbol, or the entire symbol. 140 /// This is used for example by WebAssembly to decode preludes. 141 /// 142 /// Base implementation returns std::nullopt. So all targets by default ignore 143 /// to treat symbols separately. 144 /// 145 /// \param Symbol - The symbol. 146 /// \param Size - The number of bytes consumed. 147 /// \param Address - The address, in the memory space of region, of the first 148 /// byte of the symbol. 149 /// \param Bytes - A reference to the actual bytes at the symbol location. 150 /// \param CStream - The stream to print comments and annotations on. 151 /// \return - MCDisassembler::Success if bytes are decoded 152 /// successfully. Size must hold the number of bytes that 153 /// were decoded. 154 /// - MCDisassembler::Fail if the bytes are invalid. Size 155 /// must hold the number of bytes that were decoded before 156 /// failing. The target must print nothing. This can be 157 /// done by buffering the output if needed. 158 /// - std::nullopt if the target doesn't want to handle the 159 /// symbol separately. Value of Size is ignored in this 160 /// case. 161 virtual std::optional<DecodeStatus> 162 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 163 uint64_t Address, raw_ostream &CStream) const; 164 // TODO: 165 // Implement similar hooks that can be used at other points during 166 // disassembly. Something along the following lines: 167 // - onBeforeInstructionDecode() 168 // - onAfterInstructionDecode() 169 // - onSymbolEnd() 170 // It should help move much of the target specific code from llvm-objdump to 171 // respective target disassemblers. 172 173 /// Suggest a distance to skip in a buffer of data to find the next 174 /// place to look for the start of an instruction. For example, if 175 /// all instructions have a fixed alignment, this might advance to 176 /// the next multiple of that alignment. 177 /// 178 /// If not overridden, the default is 1. 179 /// 180 /// \param Address - The address, in the memory space of region, of the 181 /// starting point (typically the first byte of something 182 /// that did not decode as a valid instruction at all). 183 /// \param Bytes - A reference to the actual bytes at Address. May be 184 /// needed in order to determine the width of an 185 /// unrecognized instruction (e.g. in Thumb this is a simple 186 /// consistent criterion that doesn't require knowing the 187 /// specific instruction). The caller can pass as much data 188 /// as they have available, and the function is required to 189 /// make a reasonable default choice if not enough data is 190 /// available to make a better one. 191 /// \return - A number of bytes to skip. Must always be greater than 192 /// zero. May be greater than the size of Bytes. 193 virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes, 194 uint64_t Address) const; 195 196 private: 197 MCContext &Ctx; 198 199 protected: 200 // Subtarget information, for instruction decoding predicates if required. 201 const MCSubtargetInfo &STI; 202 std::unique_ptr<MCSymbolizer> Symbolizer; 203 204 public: 205 // Helpers around MCSymbolizer 206 bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, 207 bool IsBranch, uint64_t Offset, uint64_t OpSize, 208 uint64_t InstSize) const; 209 210 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; 211 212 /// Set \p Symzer as the current symbolizer. 213 /// This takes ownership of \p Symzer, and deletes the previously set one. 214 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); 215 216 MCContext& getContext() const { return Ctx; } 217 218 const MCSubtargetInfo& getSubtargetInfo() const { return STI; } 219 220 // Marked mutable because we cache it inside the disassembler, rather than 221 // having to pass it around as an argument through all the autogenerated code. 222 mutable raw_ostream *CommentStream = nullptr; 223 }; 224 225 } // end namespace llvm 226 227 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 228