xref: /freebsd/contrib/llvm-project/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/BinaryFormat/XCOFF.h"
14 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
15 #include <cstdint>
16 #include <memory>
17 #include <vector>
18 
19 namespace llvm {
20 
21 struct XCOFFSymbolInfoTy {
22   std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
23   std::optional<uint32_t> Index;
24   bool IsLabel = false;
25   bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
26 };
27 
28 struct SymbolInfoTy {
29   uint64_t Addr;
30   StringRef Name;
31   // XCOFF uses XCOFFSymInfo. Other targets use Type.
32   XCOFFSymbolInfoTy XCOFFSymInfo;
33   uint8_t Type;
34 
35 private:
36   bool IsXCOFF;
37   bool HasType;
38 
39 public:
40   SymbolInfoTy(uint64_t Addr, StringRef Name,
41                std::optional<XCOFF::StorageMappingClass> Smc,
42                std::optional<uint32_t> Idx, bool Label)
43       : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
44         IsXCOFF(true), HasType(false) {}
45   SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
46                bool IsXCOFF = false)
47       : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {}
48   bool isXCOFF() const { return IsXCOFF; }
49 
50 private:
51   friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
52     assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
53            "The value of IsXCOFF and HasType in P1 and P2 should be the same "
54            "respectively.");
55 
56     if (P1.IsXCOFF && P1.HasType)
57       return std::tie(P1.Addr, P1.Type, P1.Name) <
58              std::tie(P2.Addr, P2.Type, P2.Name);
59 
60     if (P1.IsXCOFF)
61       return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
62              std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
63 
64     return std::tie(P1.Addr, P1.Name, P1.Type) <
65            std::tie(P2.Addr, P2.Name, P2.Type);
66   }
67 };
68 
69 using SectionSymbolsTy = std::vector<SymbolInfoTy>;
70 
71 template <typename T> class ArrayRef;
72 class MCContext;
73 class MCInst;
74 class MCSubtargetInfo;
75 class raw_ostream;
76 
77 /// Superclass for all disassemblers. Consumes a memory region and provides an
78 /// array of assembly instructions.
79 class MCDisassembler {
80 public:
81   /// Ternary decode status. Most backends will just use Fail and
82   /// Success, however some have a concept of an instruction with
83   /// understandable semantics but which is architecturally
84   /// incorrect. An example of this is ARM UNPREDICTABLE instructions
85   /// which are disassemblable but cause undefined behaviour.
86   ///
87   /// Because it makes sense to disassemble these instructions, there
88   /// is a "soft fail" failure mode that indicates the MCInst& is
89   /// valid but architecturally incorrect.
90   ///
91   /// The enum numbers are deliberately chosen such that reduction
92   /// from Success->SoftFail ->Fail can be done with a simple
93   /// bitwise-AND:
94   ///
95   ///   LEFT & TOP =  | Success       Unpredictable   Fail
96   ///   --------------+-----------------------------------
97   ///   Success       | Success       Unpredictable   Fail
98   ///   Unpredictable | Unpredictable Unpredictable   Fail
99   ///   Fail          | Fail          Fail            Fail
100   ///
101   /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
102   /// Success, SoftFail, Fail respectively.
103   enum DecodeStatus {
104     Fail = 0,
105     SoftFail = 1,
106     Success = 3
107   };
108 
109   MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
110     : Ctx(Ctx), STI(STI) {}
111 
112   virtual ~MCDisassembler();
113 
114   /// Returns the disassembly of a single instruction.
115   ///
116   /// \param Instr    - An MCInst to populate with the contents of the
117   ///                   instruction.
118   /// \param Size     - A value to populate with the size of the instruction, or
119   ///                   the number of bytes consumed while attempting to decode
120   ///                   an invalid instruction.
121   /// \param Address  - The address, in the memory space of region, of the first
122   ///                   byte of the instruction.
123   /// \param Bytes    - A reference to the actual bytes of the instruction.
124   /// \param CStream  - The stream to print comments and annotations on.
125   /// \return         - MCDisassembler::Success if the instruction is valid,
126   ///                   MCDisassembler::SoftFail if the instruction was
127   ///                                            disassemblable but invalid,
128   ///                   MCDisassembler::Fail if the instruction was invalid.
129   virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
130                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
131                                       raw_ostream &CStream) const = 0;
132 
133   /// Used to perform separate target specific disassembly for a particular
134   /// symbol. May parse any prelude that precedes instructions after the
135   /// start of a symbol, or the entire symbol.
136   /// This is used for example by WebAssembly to decode preludes.
137   ///
138   /// Base implementation returns std::nullopt. So all targets by default ignore
139   /// to treat symbols separately.
140   ///
141   /// \param Symbol   - The symbol.
142   /// \param Size     - The number of bytes consumed.
143   /// \param Address  - The address, in the memory space of region, of the first
144   ///                   byte of the symbol.
145   /// \param Bytes    - A reference to the actual bytes at the symbol location.
146   /// \param CStream  - The stream to print comments and annotations on.
147   /// \return         - MCDisassembler::Success if bytes are decoded
148   ///                   successfully. Size must hold the number of bytes that
149   ///                   were decoded.
150   ///                 - MCDisassembler::Fail if the bytes are invalid. Size
151   ///                   must hold the number of bytes that were decoded before
152   ///                   failing. The target must print nothing. This can be
153   ///                   done by buffering the output if needed.
154   ///                 - std::nullopt if the target doesn't want to handle the
155   ///                   symbol separately. Value of Size is ignored in this
156   ///                   case.
157   virtual std::optional<DecodeStatus>
158   onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
159                 uint64_t Address, raw_ostream &CStream) const;
160   // TODO:
161   // Implement similar hooks that can be used at other points during
162   // disassembly. Something along the following lines:
163   // - onBeforeInstructionDecode()
164   // - onAfterInstructionDecode()
165   // - onSymbolEnd()
166   // It should help move much of the target specific code from llvm-objdump to
167   // respective target disassemblers.
168 
169   /// Suggest a distance to skip in a buffer of data to find the next
170   /// place to look for the start of an instruction. For example, if
171   /// all instructions have a fixed alignment, this might advance to
172   /// the next multiple of that alignment.
173   ///
174   /// If not overridden, the default is 1.
175   ///
176   /// \param Address  - The address, in the memory space of region, of the
177   ///                   starting point (typically the first byte of something
178   ///                   that did not decode as a valid instruction at all).
179   /// \param Bytes    - A reference to the actual bytes at Address. May be
180   ///                   needed in order to determine the width of an
181   ///                   unrecognized instruction (e.g. in Thumb this is a simple
182   ///                   consistent criterion that doesn't require knowing the
183   ///                   specific instruction). The caller can pass as much data
184   ///                   as they have available, and the function is required to
185   ///                   make a reasonable default choice if not enough data is
186   ///                   available to make a better one.
187   /// \return         - A number of bytes to skip. Must always be greater than
188   ///                   zero. May be greater than the size of Bytes.
189   virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
190                                       uint64_t Address) const;
191 
192 private:
193   MCContext &Ctx;
194 
195 protected:
196   // Subtarget information, for instruction decoding predicates if required.
197   const MCSubtargetInfo &STI;
198   std::unique_ptr<MCSymbolizer> Symbolizer;
199 
200 public:
201   // Helpers around MCSymbolizer
202   bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
203                                 bool IsBranch, uint64_t Offset, uint64_t OpSize,
204                                 uint64_t InstSize) const;
205 
206   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
207 
208   /// Set \p Symzer as the current symbolizer.
209   /// This takes ownership of \p Symzer, and deletes the previously set one.
210   void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
211 
212   MCContext& getContext() const { return Ctx; }
213 
214   const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
215 
216   // Marked mutable because we cache it inside the disassembler, rather than
217   // having to pass it around as an argument through all the autogenerated code.
218   mutable raw_ostream *CommentStream = nullptr;
219 };
220 
221 } // end namespace llvm
222 
223 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
224