xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision 59aa64914aeb1b2cccc0d4fc39ead2ee159a1e5b)
1  //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This file is part of the WebAssembly Disassembler.
11  ///
12  /// It contains code to translate the data produced by the decoder into
13  /// MCInsts.
14  ///
15  //===----------------------------------------------------------------------===//
16  
17  #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
18  #include "TargetInfo/WebAssemblyTargetInfo.h"
19  #include "llvm/MC/MCContext.h"
20  #include "llvm/MC/MCDecoderOps.h"
21  #include "llvm/MC/MCDisassembler/MCDisassembler.h"
22  #include "llvm/MC/MCInst.h"
23  #include "llvm/MC/MCInstrInfo.h"
24  #include "llvm/MC/MCSubtargetInfo.h"
25  #include "llvm/MC/MCSymbol.h"
26  #include "llvm/MC/MCSymbolWasm.h"
27  #include "llvm/MC/TargetRegistry.h"
28  #include "llvm/Support/Casting.h"
29  #include "llvm/Support/Endian.h"
30  #include "llvm/Support/LEB128.h"
31  
32  using namespace llvm;
33  
34  #define DEBUG_TYPE "wasm-disassembler"
35  
36  using DecodeStatus = MCDisassembler::DecodeStatus;
37  
38  #include "WebAssemblyGenDisassemblerTables.inc"
39  
40  namespace {
41  static constexpr int WebAssemblyInstructionTableSize = 256;
42  
43  class WebAssemblyDisassembler final : public MCDisassembler {
44    std::unique_ptr<const MCInstrInfo> MCII;
45  
46    DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
47                                ArrayRef<uint8_t> Bytes, uint64_t Address,
48                                raw_ostream &CStream) const override;
49    std::optional<DecodeStatus>
50    onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
51                  uint64_t Address, raw_ostream &CStream) const override;
52  
53  public:
54    WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
55                            std::unique_ptr<const MCInstrInfo> MCII)
56        : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
57  };
58  } // end anonymous namespace
59  
60  static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
61                                                       const MCSubtargetInfo &STI,
62                                                       MCContext &Ctx) {
63    std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
64    return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
65  }
66  
67  extern "C" LLVM_EXTERNAL_VISIBILITY void
68  LLVMInitializeWebAssemblyDisassembler() {
69    // Register the disassembler for each target.
70    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
71                                           createWebAssemblyDisassembler);
72    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
73                                           createWebAssemblyDisassembler);
74  }
75  
76  static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
77    if (Size >= Bytes.size())
78      return -1;
79    auto V = Bytes[Size];
80    Size++;
81    return V;
82  }
83  
84  static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
85                      bool Signed) {
86    unsigned N = 0;
87    const char *Error = nullptr;
88    Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
89                                 Bytes.data() + Bytes.size(), &Error)
90                 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
91                                                      Bytes.data() + Bytes.size(),
92                                                      &Error));
93    if (Error)
94      return false;
95    Size += N;
96    return true;
97  }
98  
99  static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
100                                ArrayRef<uint8_t> Bytes, bool Signed) {
101    int64_t Val;
102    if (!nextLEB(Val, Bytes, Size, Signed))
103      return false;
104    MI.addOperand(MCOperand::createImm(Val));
105    return true;
106  }
107  
108  template <typename T>
109  bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
110    if (Size + sizeof(T) > Bytes.size())
111      return false;
112    T Val =
113        support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size);
114    Size += sizeof(T);
115    if (std::is_floating_point<T>::value) {
116      MI.addOperand(
117          MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
118    } else {
119      MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
120    }
121    return true;
122  }
123  
124  std::optional<MCDisassembler::DecodeStatus>
125  WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
126                                         ArrayRef<uint8_t> Bytes,
127                                         uint64_t Address,
128                                         raw_ostream &CStream) const {
129    Size = 0;
130    if (Address == 0) {
131      // Start of a code section: we're parsing only the function count.
132      int64_t FunctionCount;
133      if (!nextLEB(FunctionCount, Bytes, Size, false))
134        return std::nullopt;
135      outs() << "        # " << FunctionCount << " functions in section.";
136    } else {
137      // Parse the start of a single function.
138      int64_t BodySize, LocalEntryCount;
139      if (!nextLEB(BodySize, Bytes, Size, false) ||
140          !nextLEB(LocalEntryCount, Bytes, Size, false))
141        return std::nullopt;
142      if (LocalEntryCount) {
143        outs() << "        .local ";
144        for (int64_t I = 0; I < LocalEntryCount; I++) {
145          int64_t Count, Type;
146          if (!nextLEB(Count, Bytes, Size, false) ||
147              !nextLEB(Type, Bytes, Size, false))
148            return std::nullopt;
149          for (int64_t J = 0; J < Count; J++) {
150            if (I || J)
151              outs() << ", ";
152            outs() << WebAssembly::anyTypeToString(Type);
153          }
154        }
155      }
156    }
157    outs() << "\n";
158    return MCDisassembler::Success;
159  }
160  
161  MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
162      MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
163      raw_ostream &CS) const {
164    CommentStream = &CS;
165    Size = 0;
166    int Opc = nextByte(Bytes, Size);
167    if (Opc < 0)
168      return MCDisassembler::Fail;
169    const auto *WasmInst = &InstructionTable0[Opc];
170    // If this is a prefix byte, indirect to another table.
171    if (WasmInst->ET == ET_Prefix) {
172      WasmInst = nullptr;
173      // Linear search, so far only 2 entries.
174      for (auto PT = PrefixTable; PT->Table; PT++) {
175        if (PT->Prefix == Opc) {
176          WasmInst = PT->Table;
177          break;
178        }
179      }
180      if (!WasmInst)
181        return MCDisassembler::Fail;
182      int64_t PrefixedOpc;
183      if (!nextLEB(PrefixedOpc, Bytes, Size, false))
184        return MCDisassembler::Fail;
185      if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
186        return MCDisassembler::Fail;
187      WasmInst += PrefixedOpc;
188    }
189    if (WasmInst->ET == ET_Unused)
190      return MCDisassembler::Fail;
191    // At this point we must have a valid instruction to decode.
192    assert(WasmInst->ET == ET_Instruction);
193    MI.setOpcode(WasmInst->Opcode);
194    // Parse any operands.
195    for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
196      auto OT = OperandTable[WasmInst->OperandStart + OPI];
197      switch (OT) {
198      // ULEB operands:
199      case WebAssembly::OPERAND_BASIC_BLOCK:
200      case WebAssembly::OPERAND_LOCAL:
201      case WebAssembly::OPERAND_GLOBAL:
202      case WebAssembly::OPERAND_FUNCTION32:
203      case WebAssembly::OPERAND_TABLE:
204      case WebAssembly::OPERAND_OFFSET32:
205      case WebAssembly::OPERAND_OFFSET64:
206      case WebAssembly::OPERAND_P2ALIGN:
207      case WebAssembly::OPERAND_TYPEINDEX:
208      case WebAssembly::OPERAND_TAG:
209      case MCOI::OPERAND_IMMEDIATE: {
210        if (!parseLEBImmediate(MI, Size, Bytes, false))
211          return MCDisassembler::Fail;
212        break;
213      }
214      // SLEB operands:
215      case WebAssembly::OPERAND_I32IMM:
216      case WebAssembly::OPERAND_I64IMM: {
217        if (!parseLEBImmediate(MI, Size, Bytes, true))
218          return MCDisassembler::Fail;
219        break;
220      }
221      // block_type operands:
222      case WebAssembly::OPERAND_SIGNATURE: {
223        int64_t Val;
224        uint64_t PrevSize = Size;
225        if (!nextLEB(Val, Bytes, Size, true))
226          return MCDisassembler::Fail;
227        if (Val < 0) {
228          // Negative values are single septet value types or empty types
229          if (Size != PrevSize + 1) {
230            MI.addOperand(
231                MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
232          } else {
233            MI.addOperand(MCOperand::createImm(Val & 0x7f));
234          }
235        } else {
236          // We don't have access to the signature, so create a symbol without one
237          MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
238          auto *WasmSym = cast<MCSymbolWasm>(Sym);
239          WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
240          const MCExpr *Expr = MCSymbolRefExpr::create(
241              WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
242          MI.addOperand(MCOperand::createExpr(Expr));
243        }
244        break;
245      }
246      // FP operands.
247      case WebAssembly::OPERAND_F32IMM: {
248        if (!parseImmediate<float>(MI, Size, Bytes))
249          return MCDisassembler::Fail;
250        break;
251      }
252      case WebAssembly::OPERAND_F64IMM: {
253        if (!parseImmediate<double>(MI, Size, Bytes))
254          return MCDisassembler::Fail;
255        break;
256      }
257      // Vector lane operands (not LEB encoded).
258      case WebAssembly::OPERAND_VEC_I8IMM: {
259        if (!parseImmediate<uint8_t>(MI, Size, Bytes))
260          return MCDisassembler::Fail;
261        break;
262      }
263      case WebAssembly::OPERAND_VEC_I16IMM: {
264        if (!parseImmediate<uint16_t>(MI, Size, Bytes))
265          return MCDisassembler::Fail;
266        break;
267      }
268      case WebAssembly::OPERAND_VEC_I32IMM: {
269        if (!parseImmediate<uint32_t>(MI, Size, Bytes))
270          return MCDisassembler::Fail;
271        break;
272      }
273      case WebAssembly::OPERAND_VEC_I64IMM: {
274        if (!parseImmediate<uint64_t>(MI, Size, Bytes))
275          return MCDisassembler::Fail;
276        break;
277      }
278      case WebAssembly::OPERAND_BRLIST: {
279        int64_t TargetTableLen;
280        if (!nextLEB(TargetTableLen, Bytes, Size, false))
281          return MCDisassembler::Fail;
282        for (int64_t I = 0; I < TargetTableLen; I++) {
283          if (!parseLEBImmediate(MI, Size, Bytes, false))
284            return MCDisassembler::Fail;
285        }
286        // Default case.
287        if (!parseLEBImmediate(MI, Size, Bytes, false))
288          return MCDisassembler::Fail;
289        break;
290      }
291      case MCOI::OPERAND_REGISTER:
292        // The tablegen header currently does not have any register operands since
293        // we use only the stack (_S) instructions.
294        // If you hit this that probably means a bad instruction definition in
295        // tablegen.
296        llvm_unreachable("Register operand in WebAssemblyDisassembler");
297      default:
298        llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
299      }
300    }
301    return MCDisassembler::Success;
302  }
303