xref: /freebsd/contrib/llvm-project/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
100b57cec5SDimitry Andric #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
110b57cec5SDimitry Andric 
125ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h"
135ffd83dbSDimitry Andric #include "llvm/BinaryFormat/XCOFF.h"
140b57cec5SDimitry Andric #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
15*0fca6ea1SDimitry Andric #include "llvm/Support/Error.h"
160b57cec5SDimitry Andric #include <cstdint>
170b57cec5SDimitry Andric #include <memory>
185ffd83dbSDimitry Andric #include <vector>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric namespace llvm {
210b57cec5SDimitry Andric 
22bdd1243dSDimitry Andric struct XCOFFSymbolInfoTy {
23bdd1243dSDimitry Andric   std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
24bdd1243dSDimitry Andric   std::optional<uint32_t> Index;
25bdd1243dSDimitry Andric   bool IsLabel = false;
26bdd1243dSDimitry Andric   bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
275ffd83dbSDimitry Andric };
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric struct SymbolInfoTy {
305ffd83dbSDimitry Andric   uint64_t Addr;
315ffd83dbSDimitry Andric   StringRef Name;
32bdd1243dSDimitry Andric   // XCOFF uses XCOFFSymInfo. Other targets use Type.
33bdd1243dSDimitry Andric   XCOFFSymbolInfoTy XCOFFSymInfo;
345ffd83dbSDimitry Andric   uint8_t Type;
355f757f3fSDimitry Andric   // Used by ELF to describe a mapping symbol that is usually not displayed.
365f757f3fSDimitry Andric   bool IsMappingSymbol;
375ffd83dbSDimitry Andric 
385ffd83dbSDimitry Andric private:
395ffd83dbSDimitry Andric   bool IsXCOFF;
4081ad6265SDimitry Andric   bool HasType;
415ffd83dbSDimitry Andric 
425ffd83dbSDimitry Andric public:
SymbolInfoTySymbolInfoTy435f757f3fSDimitry Andric   SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr,
445f757f3fSDimitry Andric                StringRef Name, std::optional<uint32_t> Idx, bool Label)
45bdd1243dSDimitry Andric       : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
465f757f3fSDimitry Andric         IsMappingSymbol(false), IsXCOFF(true), HasType(false) {}
4781ad6265SDimitry Andric   SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
485f757f3fSDimitry Andric                bool IsMappingSymbol = false, bool IsXCOFF = false)
AddrSymbolInfoTy495f757f3fSDimitry Andric       : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol),
505f757f3fSDimitry Andric         IsXCOFF(IsXCOFF), HasType(true) {}
isXCOFFSymbolInfoTy515ffd83dbSDimitry Andric   bool isXCOFF() const { return IsXCOFF; }
525ffd83dbSDimitry Andric 
535ffd83dbSDimitry Andric private:
545ffd83dbSDimitry Andric   friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
5581ad6265SDimitry Andric     assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
5681ad6265SDimitry Andric            "The value of IsXCOFF and HasType in P1 and P2 should be the same "
5781ad6265SDimitry Andric            "respectively.");
5881ad6265SDimitry Andric 
5981ad6265SDimitry Andric     if (P1.IsXCOFF && P1.HasType)
6081ad6265SDimitry Andric       return std::tie(P1.Addr, P1.Type, P1.Name) <
6181ad6265SDimitry Andric              std::tie(P2.Addr, P2.Type, P2.Name);
6281ad6265SDimitry Andric 
635ffd83dbSDimitry Andric     if (P1.IsXCOFF)
645ffd83dbSDimitry Andric       return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
655ffd83dbSDimitry Andric              std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
665ffd83dbSDimitry Andric 
675f757f3fSDimitry Andric     // With the same address, place mapping symbols first.
685f757f3fSDimitry Andric     bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol;
695f757f3fSDimitry Andric     return std::tie(P1.Addr, MS1, P1.Name, P1.Type) <
705f757f3fSDimitry Andric            std::tie(P2.Addr, MS2, P2.Name, P2.Type);
715ffd83dbSDimitry Andric   }
725ffd83dbSDimitry Andric };
735ffd83dbSDimitry Andric 
745ffd83dbSDimitry Andric using SectionSymbolsTy = std::vector<SymbolInfoTy>;
755ffd83dbSDimitry Andric 
760b57cec5SDimitry Andric template <typename T> class ArrayRef;
770b57cec5SDimitry Andric class MCContext;
780b57cec5SDimitry Andric class MCInst;
790b57cec5SDimitry Andric class MCSubtargetInfo;
800b57cec5SDimitry Andric class raw_ostream;
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric /// Superclass for all disassemblers. Consumes a memory region and provides an
830b57cec5SDimitry Andric /// array of assembly instructions.
840b57cec5SDimitry Andric class MCDisassembler {
850b57cec5SDimitry Andric public:
860b57cec5SDimitry Andric   /// Ternary decode status. Most backends will just use Fail and
870b57cec5SDimitry Andric   /// Success, however some have a concept of an instruction with
880b57cec5SDimitry Andric   /// understandable semantics but which is architecturally
890b57cec5SDimitry Andric   /// incorrect. An example of this is ARM UNPREDICTABLE instructions
900b57cec5SDimitry Andric   /// which are disassemblable but cause undefined behaviour.
910b57cec5SDimitry Andric   ///
920b57cec5SDimitry Andric   /// Because it makes sense to disassemble these instructions, there
930b57cec5SDimitry Andric   /// is a "soft fail" failure mode that indicates the MCInst& is
940b57cec5SDimitry Andric   /// valid but architecturally incorrect.
950b57cec5SDimitry Andric   ///
960b57cec5SDimitry Andric   /// The enum numbers are deliberately chosen such that reduction
970b57cec5SDimitry Andric   /// from Success->SoftFail ->Fail can be done with a simple
980b57cec5SDimitry Andric   /// bitwise-AND:
990b57cec5SDimitry Andric   ///
1000b57cec5SDimitry Andric   ///   LEFT & TOP =  | Success       Unpredictable   Fail
1010b57cec5SDimitry Andric   ///   --------------+-----------------------------------
1020b57cec5SDimitry Andric   ///   Success       | Success       Unpredictable   Fail
1030b57cec5SDimitry Andric   ///   Unpredictable | Unpredictable Unpredictable   Fail
1040b57cec5SDimitry Andric   ///   Fail          | Fail          Fail            Fail
1050b57cec5SDimitry Andric   ///
1060b57cec5SDimitry Andric   /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
1070b57cec5SDimitry Andric   /// Success, SoftFail, Fail respectively.
1080b57cec5SDimitry Andric   enum DecodeStatus {
1090b57cec5SDimitry Andric     Fail = 0,
1100b57cec5SDimitry Andric     SoftFail = 1,
1110b57cec5SDimitry Andric     Success = 3
1120b57cec5SDimitry Andric   };
1130b57cec5SDimitry Andric 
MCDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx)1140b57cec5SDimitry Andric   MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
1150b57cec5SDimitry Andric     : Ctx(Ctx), STI(STI) {}
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric   virtual ~MCDisassembler();
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric   /// Returns the disassembly of a single instruction.
1200b57cec5SDimitry Andric   ///
1210b57cec5SDimitry Andric   /// \param Instr    - An MCInst to populate with the contents of the
1220b57cec5SDimitry Andric   ///                   instruction.
1230b57cec5SDimitry Andric   /// \param Size     - A value to populate with the size of the instruction, or
1240b57cec5SDimitry Andric   ///                   the number of bytes consumed while attempting to decode
1250b57cec5SDimitry Andric   ///                   an invalid instruction.
1260b57cec5SDimitry Andric   /// \param Address  - The address, in the memory space of region, of the first
1270b57cec5SDimitry Andric   ///                   byte of the instruction.
1280b57cec5SDimitry Andric   /// \param Bytes    - A reference to the actual bytes of the instruction.
1290b57cec5SDimitry Andric   /// \param CStream  - The stream to print comments and annotations on.
1300b57cec5SDimitry Andric   /// \return         - MCDisassembler::Success if the instruction is valid,
1310b57cec5SDimitry Andric   ///                   MCDisassembler::SoftFail if the instruction was
1320b57cec5SDimitry Andric   ///                                            disassemblable but invalid,
1330b57cec5SDimitry Andric   ///                   MCDisassembler::Fail if the instruction was invalid.
1340b57cec5SDimitry Andric   virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
1350b57cec5SDimitry Andric                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
1360b57cec5SDimitry Andric                                       raw_ostream &CStream) const = 0;
1370b57cec5SDimitry Andric 
1385ffd83dbSDimitry Andric   /// Used to perform separate target specific disassembly for a particular
1395ffd83dbSDimitry Andric   /// symbol. May parse any prelude that precedes instructions after the
1405ffd83dbSDimitry Andric   /// start of a symbol, or the entire symbol.
1415ffd83dbSDimitry Andric   /// This is used for example by WebAssembly to decode preludes.
1420b57cec5SDimitry Andric   ///
143*0fca6ea1SDimitry Andric   /// Base implementation returns false. So all targets by default decline to
144*0fca6ea1SDimitry Andric   /// treat symbols separately.
1455ffd83dbSDimitry Andric   ///
1465ffd83dbSDimitry Andric   /// \param Symbol   - The symbol.
1470b57cec5SDimitry Andric   /// \param Size     - The number of bytes consumed.
1480b57cec5SDimitry Andric   /// \param Address  - The address, in the memory space of region, of the first
1490b57cec5SDimitry Andric   ///                   byte of the symbol.
1500b57cec5SDimitry Andric   /// \param Bytes    - A reference to the actual bytes at the symbol location.
151*0fca6ea1SDimitry Andric   /// \return         - True if this symbol triggered some target specific
152*0fca6ea1SDimitry Andric   ///                   disassembly for this symbol. Size must be set with the
153*0fca6ea1SDimitry Andric   ///                   number of bytes consumed.
154*0fca6ea1SDimitry Andric   ///                 - Error if this symbol triggered some target specific
155*0fca6ea1SDimitry Andric   ///                   disassembly for this symbol, but an error was found with
156*0fca6ea1SDimitry Andric   ///                   it. Size must be set with the number of bytes consumed.
157*0fca6ea1SDimitry Andric   ///                 - False if the target doesn't want to handle the symbol
158*0fca6ea1SDimitry Andric   ///                   separately. The value of Size is ignored in this case,
159*0fca6ea1SDimitry Andric   ///                   and Err must not be set.
160*0fca6ea1SDimitry Andric   virtual Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
161*0fca6ea1SDimitry Andric                                        ArrayRef<uint8_t> Bytes,
162*0fca6ea1SDimitry Andric                                        uint64_t Address) const;
1635ffd83dbSDimitry Andric   // TODO:
1645ffd83dbSDimitry Andric   // Implement similar hooks that can be used at other points during
1655ffd83dbSDimitry Andric   // disassembly. Something along the following lines:
1665ffd83dbSDimitry Andric   // - onBeforeInstructionDecode()
1675ffd83dbSDimitry Andric   // - onAfterInstructionDecode()
1685ffd83dbSDimitry Andric   // - onSymbolEnd()
1695ffd83dbSDimitry Andric   // It should help move much of the target specific code from llvm-objdump to
1705ffd83dbSDimitry Andric   // respective target disassemblers.
1710b57cec5SDimitry Andric 
172972a253aSDimitry Andric   /// Suggest a distance to skip in a buffer of data to find the next
173972a253aSDimitry Andric   /// place to look for the start of an instruction. For example, if
174972a253aSDimitry Andric   /// all instructions have a fixed alignment, this might advance to
175972a253aSDimitry Andric   /// the next multiple of that alignment.
176972a253aSDimitry Andric   ///
177972a253aSDimitry Andric   /// If not overridden, the default is 1.
178972a253aSDimitry Andric   ///
179972a253aSDimitry Andric   /// \param Address  - The address, in the memory space of region, of the
180972a253aSDimitry Andric   ///                   starting point (typically the first byte of something
181972a253aSDimitry Andric   ///                   that did not decode as a valid instruction at all).
182972a253aSDimitry Andric   /// \param Bytes    - A reference to the actual bytes at Address. May be
183972a253aSDimitry Andric   ///                   needed in order to determine the width of an
184972a253aSDimitry Andric   ///                   unrecognized instruction (e.g. in Thumb this is a simple
185972a253aSDimitry Andric   ///                   consistent criterion that doesn't require knowing the
186972a253aSDimitry Andric   ///                   specific instruction). The caller can pass as much data
187972a253aSDimitry Andric   ///                   as they have available, and the function is required to
188972a253aSDimitry Andric   ///                   make a reasonable default choice if not enough data is
189972a253aSDimitry Andric   ///                   available to make a better one.
190972a253aSDimitry Andric   /// \return         - A number of bytes to skip. Must always be greater than
191972a253aSDimitry Andric   ///                   zero. May be greater than the size of Bytes.
192972a253aSDimitry Andric   virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
193972a253aSDimitry Andric                                       uint64_t Address) const;
194972a253aSDimitry Andric 
1950b57cec5SDimitry Andric private:
1960b57cec5SDimitry Andric   MCContext &Ctx;
1970b57cec5SDimitry Andric 
1980b57cec5SDimitry Andric protected:
1990b57cec5SDimitry Andric   // Subtarget information, for instruction decoding predicates if required.
2000b57cec5SDimitry Andric   const MCSubtargetInfo &STI;
2010b57cec5SDimitry Andric   std::unique_ptr<MCSymbolizer> Symbolizer;
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric public:
2040b57cec5SDimitry Andric   // Helpers around MCSymbolizer
20581ad6265SDimitry Andric   bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
20681ad6265SDimitry Andric                                 bool IsBranch, uint64_t Offset, uint64_t OpSize,
20781ad6265SDimitry Andric                                 uint64_t InstSize) const;
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric   /// Set \p Symzer as the current symbolizer.
2120b57cec5SDimitry Andric   /// This takes ownership of \p Symzer, and deletes the previously set one.
2130b57cec5SDimitry Andric   void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
2140b57cec5SDimitry Andric 
getContext()2150b57cec5SDimitry Andric   MCContext& getContext() const { return Ctx; }
2160b57cec5SDimitry Andric 
getSubtargetInfo()2170b57cec5SDimitry Andric   const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
2180b57cec5SDimitry Andric 
219*0fca6ea1SDimitry Andric   /// ELF-specific, set the ABI version from the object header.
setABIVersion(unsigned Version)220*0fca6ea1SDimitry Andric   virtual void setABIVersion(unsigned Version) {}
221*0fca6ea1SDimitry Andric 
2220b57cec5SDimitry Andric   // Marked mutable because we cache it inside the disassembler, rather than
2230b57cec5SDimitry Andric   // having to pass it around as an argument through all the autogenerated code.
2240b57cec5SDimitry Andric   mutable raw_ostream *CommentStream = nullptr;
2250b57cec5SDimitry Andric };
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric } // end namespace llvm
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
230