1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 11 12 #include "llvm/ADT/Optional.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/BinaryFormat/XCOFF.h" 15 #include "llvm/MC/MCDisassembler/MCSymbolizer.h" 16 #include <cstdint> 17 #include <memory> 18 #include <vector> 19 20 namespace llvm { 21 22 struct XCOFFSymbolInfo { 23 Optional<XCOFF::StorageMappingClass> StorageMappingClass; 24 Optional<uint32_t> Index; 25 bool IsLabel; 26 XCOFFSymbolInfo(Optional<XCOFF::StorageMappingClass> Smc, 27 Optional<uint32_t> Idx, bool Label) 28 : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {} 29 30 bool operator<(const XCOFFSymbolInfo &SymInfo) const; 31 }; 32 33 struct SymbolInfoTy { 34 uint64_t Addr; 35 StringRef Name; 36 union { 37 uint8_t Type; 38 XCOFFSymbolInfo XCOFFSymInfo; 39 }; 40 41 private: 42 bool IsXCOFF; 43 bool HasType; 44 45 public: 46 SymbolInfoTy(uint64_t Addr, StringRef Name, 47 Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx, 48 bool Label) 49 : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true), 50 HasType(false) {} 51 SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, 52 bool IsXCOFF = false) 53 : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {} 54 bool isXCOFF() const { return IsXCOFF; } 55 56 private: 57 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { 58 assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && 59 "The value of IsXCOFF and HasType in P1 and P2 should be the same " 60 "respectively."); 61 62 if (P1.IsXCOFF && P1.HasType) 63 return std::tie(P1.Addr, P1.Type, P1.Name) < 64 std::tie(P2.Addr, P2.Type, P2.Name); 65 66 if (P1.IsXCOFF) 67 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < 68 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); 69 70 return std::tie(P1.Addr, P1.Name, P1.Type) < 71 std::tie(P2.Addr, P2.Name, P2.Type); 72 } 73 }; 74 75 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 76 77 template <typename T> class ArrayRef; 78 class MCContext; 79 class MCInst; 80 class MCSubtargetInfo; 81 class raw_ostream; 82 83 /// Superclass for all disassemblers. Consumes a memory region and provides an 84 /// array of assembly instructions. 85 class MCDisassembler { 86 public: 87 /// Ternary decode status. Most backends will just use Fail and 88 /// Success, however some have a concept of an instruction with 89 /// understandable semantics but which is architecturally 90 /// incorrect. An example of this is ARM UNPREDICTABLE instructions 91 /// which are disassemblable but cause undefined behaviour. 92 /// 93 /// Because it makes sense to disassemble these instructions, there 94 /// is a "soft fail" failure mode that indicates the MCInst& is 95 /// valid but architecturally incorrect. 96 /// 97 /// The enum numbers are deliberately chosen such that reduction 98 /// from Success->SoftFail ->Fail can be done with a simple 99 /// bitwise-AND: 100 /// 101 /// LEFT & TOP = | Success Unpredictable Fail 102 /// --------------+----------------------------------- 103 /// Success | Success Unpredictable Fail 104 /// Unpredictable | Unpredictable Unpredictable Fail 105 /// Fail | Fail Fail Fail 106 /// 107 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for 108 /// Success, SoftFail, Fail respectively. 109 enum DecodeStatus { 110 Fail = 0, 111 SoftFail = 1, 112 Success = 3 113 }; 114 115 MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) 116 : Ctx(Ctx), STI(STI) {} 117 118 virtual ~MCDisassembler(); 119 120 /// Returns the disassembly of a single instruction. 121 /// 122 /// \param Instr - An MCInst to populate with the contents of the 123 /// instruction. 124 /// \param Size - A value to populate with the size of the instruction, or 125 /// the number of bytes consumed while attempting to decode 126 /// an invalid instruction. 127 /// \param Address - The address, in the memory space of region, of the first 128 /// byte of the instruction. 129 /// \param Bytes - A reference to the actual bytes of the instruction. 130 /// \param CStream - The stream to print comments and annotations on. 131 /// \return - MCDisassembler::Success if the instruction is valid, 132 /// MCDisassembler::SoftFail if the instruction was 133 /// disassemblable but invalid, 134 /// MCDisassembler::Fail if the instruction was invalid. 135 virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 136 ArrayRef<uint8_t> Bytes, uint64_t Address, 137 raw_ostream &CStream) const = 0; 138 139 /// Used to perform separate target specific disassembly for a particular 140 /// symbol. May parse any prelude that precedes instructions after the 141 /// start of a symbol, or the entire symbol. 142 /// This is used for example by WebAssembly to decode preludes. 143 /// 144 /// Base implementation returns None. So all targets by default ignore to 145 /// treat symbols separately. 146 /// 147 /// \param Symbol - The symbol. 148 /// \param Size - The number of bytes consumed. 149 /// \param Address - The address, in the memory space of region, of the first 150 /// byte of the symbol. 151 /// \param Bytes - A reference to the actual bytes at the symbol location. 152 /// \param CStream - The stream to print comments and annotations on. 153 /// \return - MCDisassembler::Success if bytes are decoded 154 /// successfully. Size must hold the number of bytes that 155 /// were decoded. 156 /// - MCDisassembler::Fail if the bytes are invalid. Size 157 /// must hold the number of bytes that were decoded before 158 /// failing. The target must print nothing. This can be 159 /// done by buffering the output if needed. 160 /// - None if the target doesn't want to handle the symbol 161 /// separately. Value of Size is ignored in this case. 162 virtual Optional<DecodeStatus> 163 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 164 uint64_t Address, raw_ostream &CStream) const; 165 // TODO: 166 // Implement similar hooks that can be used at other points during 167 // disassembly. Something along the following lines: 168 // - onBeforeInstructionDecode() 169 // - onAfterInstructionDecode() 170 // - onSymbolEnd() 171 // It should help move much of the target specific code from llvm-objdump to 172 // respective target disassemblers. 173 174 /// Suggest a distance to skip in a buffer of data to find the next 175 /// place to look for the start of an instruction. For example, if 176 /// all instructions have a fixed alignment, this might advance to 177 /// the next multiple of that alignment. 178 /// 179 /// If not overridden, the default is 1. 180 /// 181 /// \param Address - The address, in the memory space of region, of the 182 /// starting point (typically the first byte of something 183 /// that did not decode as a valid instruction at all). 184 /// \param Bytes - A reference to the actual bytes at Address. May be 185 /// needed in order to determine the width of an 186 /// unrecognized instruction (e.g. in Thumb this is a simple 187 /// consistent criterion that doesn't require knowing the 188 /// specific instruction). The caller can pass as much data 189 /// as they have available, and the function is required to 190 /// make a reasonable default choice if not enough data is 191 /// available to make a better one. 192 /// \return - A number of bytes to skip. Must always be greater than 193 /// zero. May be greater than the size of Bytes. 194 virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes, 195 uint64_t Address) const; 196 197 private: 198 MCContext &Ctx; 199 200 protected: 201 // Subtarget information, for instruction decoding predicates if required. 202 const MCSubtargetInfo &STI; 203 std::unique_ptr<MCSymbolizer> Symbolizer; 204 205 public: 206 // Helpers around MCSymbolizer 207 bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, 208 bool IsBranch, uint64_t Offset, uint64_t OpSize, 209 uint64_t InstSize) const; 210 211 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; 212 213 /// Set \p Symzer as the current symbolizer. 214 /// This takes ownership of \p Symzer, and deletes the previously set one. 215 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); 216 217 MCContext& getContext() const { return Ctx; } 218 219 const MCSubtargetInfo& getSubtargetInfo() const { return STI; } 220 221 // Marked mutable because we cache it inside the disassembler, rather than 222 // having to pass it around as an argument through all the autogenerated code. 223 mutable raw_ostream *CommentStream = nullptr; 224 }; 225 226 } // end namespace llvm 227 228 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 229