1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 11 12 #include "llvm/ADT/Optional.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/BinaryFormat/XCOFF.h" 15 #include "llvm/MC/MCDisassembler/MCSymbolizer.h" 16 #include <cstdint> 17 #include <memory> 18 #include <vector> 19 20 namespace llvm { 21 22 struct XCOFFSymbolInfo { 23 Optional<XCOFF::StorageMappingClass> StorageMappingClass; 24 Optional<uint32_t> Index; 25 bool IsLabel; 26 XCOFFSymbolInfo(Optional<XCOFF::StorageMappingClass> Smc, 27 Optional<uint32_t> Idx, bool Label) 28 : StorageMappingClass(Smc), Index(Idx), IsLabel(Label) {} 29 30 bool operator<(const XCOFFSymbolInfo &SymInfo) const; 31 }; 32 33 struct SymbolInfoTy { 34 uint64_t Addr; 35 StringRef Name; 36 union { 37 uint8_t Type; 38 XCOFFSymbolInfo XCOFFSymInfo; 39 }; 40 41 private: 42 bool IsXCOFF; 43 44 public: 45 SymbolInfoTy(uint64_t Addr, StringRef Name, 46 Optional<XCOFF::StorageMappingClass> Smc, Optional<uint32_t> Idx, 47 bool Label) 48 : Addr(Addr), Name(Name), XCOFFSymInfo(Smc, Idx, Label), IsXCOFF(true) {} 49 SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type) 50 : Addr(Addr), Name(Name), Type(Type), IsXCOFF(false) {} 51 bool isXCOFF() const { return IsXCOFF; } 52 53 private: 54 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { 55 assert(P1.IsXCOFF == P2.IsXCOFF && 56 "P1.IsXCOFF should be equal to P2.IsXCOFF."); 57 if (P1.IsXCOFF) 58 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < 59 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); 60 61 return std::tie(P1.Addr, P1.Name, P1.Type) < 62 std::tie(P2.Addr, P2.Name, P2.Type); 63 } 64 }; 65 66 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 67 68 template <typename T> class ArrayRef; 69 class MCContext; 70 class MCInst; 71 class MCSubtargetInfo; 72 class raw_ostream; 73 74 /// Superclass for all disassemblers. Consumes a memory region and provides an 75 /// array of assembly instructions. 76 class MCDisassembler { 77 public: 78 /// Ternary decode status. Most backends will just use Fail and 79 /// Success, however some have a concept of an instruction with 80 /// understandable semantics but which is architecturally 81 /// incorrect. An example of this is ARM UNPREDICTABLE instructions 82 /// which are disassemblable but cause undefined behaviour. 83 /// 84 /// Because it makes sense to disassemble these instructions, there 85 /// is a "soft fail" failure mode that indicates the MCInst& is 86 /// valid but architecturally incorrect. 87 /// 88 /// The enum numbers are deliberately chosen such that reduction 89 /// from Success->SoftFail ->Fail can be done with a simple 90 /// bitwise-AND: 91 /// 92 /// LEFT & TOP = | Success Unpredictable Fail 93 /// --------------+----------------------------------- 94 /// Success | Success Unpredictable Fail 95 /// Unpredictable | Unpredictable Unpredictable Fail 96 /// Fail | Fail Fail Fail 97 /// 98 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for 99 /// Success, SoftFail, Fail respectively. 100 enum DecodeStatus { 101 Fail = 0, 102 SoftFail = 1, 103 Success = 3 104 }; 105 106 MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) 107 : Ctx(Ctx), STI(STI) {} 108 109 virtual ~MCDisassembler(); 110 111 /// Returns the disassembly of a single instruction. 112 /// 113 /// \param Instr - An MCInst to populate with the contents of the 114 /// instruction. 115 /// \param Size - A value to populate with the size of the instruction, or 116 /// the number of bytes consumed while attempting to decode 117 /// an invalid instruction. 118 /// \param Address - The address, in the memory space of region, of the first 119 /// byte of the instruction. 120 /// \param Bytes - A reference to the actual bytes of the instruction. 121 /// \param CStream - The stream to print comments and annotations on. 122 /// \return - MCDisassembler::Success if the instruction is valid, 123 /// MCDisassembler::SoftFail if the instruction was 124 /// disassemblable but invalid, 125 /// MCDisassembler::Fail if the instruction was invalid. 126 virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 127 ArrayRef<uint8_t> Bytes, uint64_t Address, 128 raw_ostream &CStream) const = 0; 129 130 /// Used to perform separate target specific disassembly for a particular 131 /// symbol. May parse any prelude that precedes instructions after the 132 /// start of a symbol, or the entire symbol. 133 /// This is used for example by WebAssembly to decode preludes. 134 /// 135 /// Base implementation returns None. So all targets by default ignore to 136 /// treat symbols separately. 137 /// 138 /// \param Symbol - The symbol. 139 /// \param Size - The number of bytes consumed. 140 /// \param Address - The address, in the memory space of region, of the first 141 /// byte of the symbol. 142 /// \param Bytes - A reference to the actual bytes at the symbol location. 143 /// \param CStream - The stream to print comments and annotations on. 144 /// \return - MCDisassembler::Success if bytes are decoded 145 /// successfully. Size must hold the number of bytes that 146 /// were decoded. 147 /// - MCDisassembler::Fail if the bytes are invalid. Size 148 /// must hold the number of bytes that were decoded before 149 /// failing. The target must print nothing. This can be 150 /// done by buffering the output if needed. 151 /// - None if the target doesn't want to handle the symbol 152 /// separately. Value of Size is ignored in this case. 153 virtual Optional<DecodeStatus> 154 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 155 uint64_t Address, raw_ostream &CStream) const; 156 // TODO: 157 // Implement similar hooks that can be used at other points during 158 // disassembly. Something along the following lines: 159 // - onBeforeInstructionDecode() 160 // - onAfterInstructionDecode() 161 // - onSymbolEnd() 162 // It should help move much of the target specific code from llvm-objdump to 163 // respective target disassemblers. 164 165 private: 166 MCContext &Ctx; 167 168 protected: 169 // Subtarget information, for instruction decoding predicates if required. 170 const MCSubtargetInfo &STI; 171 std::unique_ptr<MCSymbolizer> Symbolizer; 172 173 public: 174 // Helpers around MCSymbolizer 175 bool tryAddingSymbolicOperand(MCInst &Inst, 176 int64_t Value, 177 uint64_t Address, bool IsBranch, 178 uint64_t Offset, uint64_t InstSize) const; 179 180 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; 181 182 /// Set \p Symzer as the current symbolizer. 183 /// This takes ownership of \p Symzer, and deletes the previously set one. 184 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); 185 186 MCContext& getContext() const { return Ctx; } 187 188 const MCSubtargetInfo& getSubtargetInfo() const { return STI; } 189 190 // Marked mutable because we cache it inside the disassembler, rather than 191 // having to pass it around as an argument through all the autogenerated code. 192 mutable raw_ostream *CommentStream = nullptr; 193 }; 194 195 } // end namespace llvm 196 197 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 198