xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision dfe57951f0610c6de42190b32c7ed844a97ee593)
1  //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This file is part of the WebAssembly Disassembler.
11  ///
12  /// It contains code to translate the data produced by the decoder into
13  /// MCInsts.
14  ///
15  //===----------------------------------------------------------------------===//
16  
17  #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
18  #include "TargetInfo/WebAssemblyTargetInfo.h"
19  #include "llvm/BinaryFormat/Wasm.h"
20  #include "llvm/MC/MCContext.h"
21  #include "llvm/MC/MCDecoderOps.h"
22  #include "llvm/MC/MCDisassembler/MCDisassembler.h"
23  #include "llvm/MC/MCInst.h"
24  #include "llvm/MC/MCInstrInfo.h"
25  #include "llvm/MC/MCSubtargetInfo.h"
26  #include "llvm/MC/MCSymbol.h"
27  #include "llvm/MC/MCSymbolWasm.h"
28  #include "llvm/MC/TargetRegistry.h"
29  #include "llvm/Support/Casting.h"
30  #include "llvm/Support/Endian.h"
31  #include "llvm/Support/LEB128.h"
32  
33  using namespace llvm;
34  
35  #define DEBUG_TYPE "wasm-disassembler"
36  
37  using DecodeStatus = MCDisassembler::DecodeStatus;
38  
39  #include "WebAssemblyGenDisassemblerTables.inc"
40  
41  namespace {
42  static constexpr int WebAssemblyInstructionTableSize = 256;
43  
44  class WebAssemblyDisassembler final : public MCDisassembler {
45    std::unique_ptr<const MCInstrInfo> MCII;
46  
47    DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
48                                ArrayRef<uint8_t> Bytes, uint64_t Address,
49                                raw_ostream &CStream) const override;
50  
51    Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
52                                 ArrayRef<uint8_t> Bytes,
53                                 uint64_t Address) const override;
54  
55  public:
56    WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
57                            std::unique_ptr<const MCInstrInfo> MCII)
58        : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
59  };
60  } // end anonymous namespace
61  
62  static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
63                                                       const MCSubtargetInfo &STI,
64                                                       MCContext &Ctx) {
65    std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
66    return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
67  }
68  
69  extern "C" LLVM_EXTERNAL_VISIBILITY void
70  LLVMInitializeWebAssemblyDisassembler() {
71    // Register the disassembler for each target.
72    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
73                                           createWebAssemblyDisassembler);
74    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
75                                           createWebAssemblyDisassembler);
76  }
77  
78  static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
79    if (Size >= Bytes.size())
80      return -1;
81    auto V = Bytes[Size];
82    Size++;
83    return V;
84  }
85  
86  static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
87                      bool Signed) {
88    unsigned N = 0;
89    const char *Error = nullptr;
90    Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
91                                 Bytes.data() + Bytes.size(), &Error)
92                 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
93                                                      Bytes.data() + Bytes.size(),
94                                                      &Error));
95    if (Error)
96      return false;
97    Size += N;
98    return true;
99  }
100  
101  static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
102                                ArrayRef<uint8_t> Bytes, bool Signed) {
103    int64_t Val;
104    if (!nextLEB(Val, Bytes, Size, Signed))
105      return false;
106    MI.addOperand(MCOperand::createImm(Val));
107    return true;
108  }
109  
110  template <typename T>
111  bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
112    if (Size + sizeof(T) > Bytes.size())
113      return false;
114    T Val =
115        support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size);
116    Size += sizeof(T);
117    if (std::is_floating_point<T>::value) {
118      MI.addOperand(
119          MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
120    } else {
121      MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
122    }
123    return true;
124  }
125  
126  Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
127                                                        uint64_t &Size,
128                                                        ArrayRef<uint8_t> Bytes,
129                                                        uint64_t Address) const {
130    Size = 0;
131    if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) {
132      // Start of a code section: we're parsing only the function count.
133      int64_t FunctionCount;
134      if (!nextLEB(FunctionCount, Bytes, Size, false))
135        return false;
136      outs() << "        # " << FunctionCount << " functions in section.";
137    } else {
138      // Parse the start of a single function.
139      int64_t BodySize, LocalEntryCount;
140      if (!nextLEB(BodySize, Bytes, Size, false) ||
141          !nextLEB(LocalEntryCount, Bytes, Size, false))
142        return false;
143      if (LocalEntryCount) {
144        outs() << "        .local ";
145        for (int64_t I = 0; I < LocalEntryCount; I++) {
146          int64_t Count, Type;
147          if (!nextLEB(Count, Bytes, Size, false) ||
148              !nextLEB(Type, Bytes, Size, false))
149            return false;
150          for (int64_t J = 0; J < Count; J++) {
151            if (I || J)
152              outs() << ", ";
153            outs() << WebAssembly::anyTypeToString(Type);
154          }
155        }
156      }
157    }
158    outs() << "\n";
159    return true;
160  }
161  
162  MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
163      MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
164      raw_ostream &CS) const {
165    CommentStream = &CS;
166    Size = 0;
167    int Opc = nextByte(Bytes, Size);
168    if (Opc < 0)
169      return MCDisassembler::Fail;
170    const auto *WasmInst = &InstructionTable0[Opc];
171    // If this is a prefix byte, indirect to another table.
172    if (WasmInst->ET == ET_Prefix) {
173      WasmInst = nullptr;
174      // Linear search, so far only 2 entries.
175      for (auto PT = PrefixTable; PT->Table; PT++) {
176        if (PT->Prefix == Opc) {
177          WasmInst = PT->Table;
178          break;
179        }
180      }
181      if (!WasmInst)
182        return MCDisassembler::Fail;
183      int64_t PrefixedOpc;
184      if (!nextLEB(PrefixedOpc, Bytes, Size, false))
185        return MCDisassembler::Fail;
186      if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
187        return MCDisassembler::Fail;
188      WasmInst += PrefixedOpc;
189    }
190    if (WasmInst->ET == ET_Unused)
191      return MCDisassembler::Fail;
192    // At this point we must have a valid instruction to decode.
193    assert(WasmInst->ET == ET_Instruction);
194    MI.setOpcode(WasmInst->Opcode);
195    // Parse any operands.
196    for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
197      auto OT = OperandTable[WasmInst->OperandStart + OPI];
198      switch (OT) {
199      // ULEB operands:
200      case WebAssembly::OPERAND_BASIC_BLOCK:
201      case WebAssembly::OPERAND_LOCAL:
202      case WebAssembly::OPERAND_GLOBAL:
203      case WebAssembly::OPERAND_FUNCTION32:
204      case WebAssembly::OPERAND_TABLE:
205      case WebAssembly::OPERAND_OFFSET32:
206      case WebAssembly::OPERAND_OFFSET64:
207      case WebAssembly::OPERAND_P2ALIGN:
208      case WebAssembly::OPERAND_TYPEINDEX:
209      case WebAssembly::OPERAND_TAG:
210      case MCOI::OPERAND_IMMEDIATE: {
211        if (!parseLEBImmediate(MI, Size, Bytes, false))
212          return MCDisassembler::Fail;
213        break;
214      }
215      // SLEB operands:
216      case WebAssembly::OPERAND_I32IMM:
217      case WebAssembly::OPERAND_I64IMM: {
218        if (!parseLEBImmediate(MI, Size, Bytes, true))
219          return MCDisassembler::Fail;
220        break;
221      }
222      // block_type operands:
223      case WebAssembly::OPERAND_SIGNATURE: {
224        int64_t Val;
225        uint64_t PrevSize = Size;
226        if (!nextLEB(Val, Bytes, Size, true))
227          return MCDisassembler::Fail;
228        if (Val < 0) {
229          // Negative values are single septet value types or empty types
230          if (Size != PrevSize + 1) {
231            MI.addOperand(
232                MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
233          } else {
234            MI.addOperand(MCOperand::createImm(Val & 0x7f));
235          }
236        } else {
237          // We don't have access to the signature, so create a symbol without one
238          MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
239          auto *WasmSym = cast<MCSymbolWasm>(Sym);
240          WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
241          const MCExpr *Expr = MCSymbolRefExpr::create(
242              WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
243          MI.addOperand(MCOperand::createExpr(Expr));
244        }
245        break;
246      }
247      // FP operands.
248      case WebAssembly::OPERAND_F32IMM: {
249        if (!parseImmediate<float>(MI, Size, Bytes))
250          return MCDisassembler::Fail;
251        break;
252      }
253      case WebAssembly::OPERAND_F64IMM: {
254        if (!parseImmediate<double>(MI, Size, Bytes))
255          return MCDisassembler::Fail;
256        break;
257      }
258      // Vector lane operands (not LEB encoded).
259      case WebAssembly::OPERAND_VEC_I8IMM: {
260        if (!parseImmediate<uint8_t>(MI, Size, Bytes))
261          return MCDisassembler::Fail;
262        break;
263      }
264      case WebAssembly::OPERAND_VEC_I16IMM: {
265        if (!parseImmediate<uint16_t>(MI, Size, Bytes))
266          return MCDisassembler::Fail;
267        break;
268      }
269      case WebAssembly::OPERAND_VEC_I32IMM: {
270        if (!parseImmediate<uint32_t>(MI, Size, Bytes))
271          return MCDisassembler::Fail;
272        break;
273      }
274      case WebAssembly::OPERAND_VEC_I64IMM: {
275        if (!parseImmediate<uint64_t>(MI, Size, Bytes))
276          return MCDisassembler::Fail;
277        break;
278      }
279      case WebAssembly::OPERAND_BRLIST: {
280        int64_t TargetTableLen;
281        if (!nextLEB(TargetTableLen, Bytes, Size, false))
282          return MCDisassembler::Fail;
283        for (int64_t I = 0; I < TargetTableLen; I++) {
284          if (!parseLEBImmediate(MI, Size, Bytes, false))
285            return MCDisassembler::Fail;
286        }
287        // Default case.
288        if (!parseLEBImmediate(MI, Size, Bytes, false))
289          return MCDisassembler::Fail;
290        break;
291      }
292      case MCOI::OPERAND_REGISTER:
293        // The tablegen header currently does not have any register operands since
294        // we use only the stack (_S) instructions.
295        // If you hit this that probably means a bad instruction definition in
296        // tablegen.
297        llvm_unreachable("Register operand in WebAssemblyDisassembler");
298      default:
299        llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
300      }
301    }
302    return MCDisassembler::Success;
303  }
304