xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision 20777f79b896f3be37086fc7e2c6cbb8cdb865c5)
1  //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This file is part of the WebAssembly Disassembler.
11  ///
12  /// It contains code to translate the data produced by the decoder into
13  /// MCInsts.
14  ///
15  //===----------------------------------------------------------------------===//
16  
17  #include "TargetInfo/WebAssemblyTargetInfo.h"
18  #include "Utils/WebAssemblyTypeUtilities.h"
19  #include "llvm/MC/MCContext.h"
20  #include "llvm/MC/MCDisassembler/MCDisassembler.h"
21  #include "llvm/MC/MCFixedLenDisassembler.h"
22  #include "llvm/MC/MCInst.h"
23  #include "llvm/MC/MCInstrInfo.h"
24  #include "llvm/MC/MCSubtargetInfo.h"
25  #include "llvm/MC/MCSymbol.h"
26  #include "llvm/MC/MCSymbolWasm.h"
27  #include "llvm/MC/TargetRegistry.h"
28  #include "llvm/Support/Endian.h"
29  #include "llvm/Support/LEB128.h"
30  
31  using namespace llvm;
32  
33  #define DEBUG_TYPE "wasm-disassembler"
34  
35  using DecodeStatus = MCDisassembler::DecodeStatus;
36  
37  #include "WebAssemblyGenDisassemblerTables.inc"
38  
39  namespace {
40  static constexpr int WebAssemblyInstructionTableSize = 256;
41  
42  class WebAssemblyDisassembler final : public MCDisassembler {
43    std::unique_ptr<const MCInstrInfo> MCII;
44  
45    DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
46                                ArrayRef<uint8_t> Bytes, uint64_t Address,
47                                raw_ostream &CStream) const override;
48    Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
49                                         ArrayRef<uint8_t> Bytes,
50                                         uint64_t Address,
51                                         raw_ostream &CStream) const override;
52  
53  public:
54    WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
55                            std::unique_ptr<const MCInstrInfo> MCII)
56        : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
57  };
58  } // end anonymous namespace
59  
60  static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
61                                                       const MCSubtargetInfo &STI,
62                                                       MCContext &Ctx) {
63    std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
64    return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
65  }
66  
67  extern "C" LLVM_EXTERNAL_VISIBILITY void
68  LLVMInitializeWebAssemblyDisassembler() {
69    // Register the disassembler for each target.
70    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
71                                           createWebAssemblyDisassembler);
72    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
73                                           createWebAssemblyDisassembler);
74  }
75  
76  static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
77    if (Size >= Bytes.size())
78      return -1;
79    auto V = Bytes[Size];
80    Size++;
81    return V;
82  }
83  
84  static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
85                      bool Signed) {
86    unsigned N = 0;
87    const char *Error = nullptr;
88    Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
89                                 Bytes.data() + Bytes.size(), &Error)
90                 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
91                                                      Bytes.data() + Bytes.size(),
92                                                      &Error));
93    if (Error)
94      return false;
95    Size += N;
96    return true;
97  }
98  
99  static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
100                                ArrayRef<uint8_t> Bytes, bool Signed) {
101    int64_t Val;
102    if (!nextLEB(Val, Bytes, Size, Signed))
103      return false;
104    MI.addOperand(MCOperand::createImm(Val));
105    return true;
106  }
107  
108  template <typename T>
109  bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
110    if (Size + sizeof(T) > Bytes.size())
111      return false;
112    T Val = support::endian::read<T, support::endianness::little, 1>(
113        Bytes.data() + Size);
114    Size += sizeof(T);
115    if (std::is_floating_point<T>::value) {
116      MI.addOperand(
117          MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
118    } else {
119      MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
120    }
121    return true;
122  }
123  
124  Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
125      SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
126      uint64_t Address, raw_ostream &CStream) const {
127    Size = 0;
128    if (Address == 0) {
129      // Start of a code section: we're parsing only the function count.
130      int64_t FunctionCount;
131      if (!nextLEB(FunctionCount, Bytes, Size, false))
132        return None;
133      outs() << "        # " << FunctionCount << " functions in section.";
134    } else {
135      // Parse the start of a single function.
136      int64_t BodySize, LocalEntryCount;
137      if (!nextLEB(BodySize, Bytes, Size, false) ||
138          !nextLEB(LocalEntryCount, Bytes, Size, false))
139        return None;
140      if (LocalEntryCount) {
141        outs() << "        .local ";
142        for (int64_t I = 0; I < LocalEntryCount; I++) {
143          int64_t Count, Type;
144          if (!nextLEB(Count, Bytes, Size, false) ||
145              !nextLEB(Type, Bytes, Size, false))
146            return None;
147          for (int64_t J = 0; J < Count; J++) {
148            if (I || J)
149              outs() << ", ";
150            outs() << WebAssembly::anyTypeToString(Type);
151          }
152        }
153      }
154    }
155    outs() << "\n";
156    return MCDisassembler::Success;
157  }
158  
159  MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
160      MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
161      raw_ostream &CS) const {
162    CommentStream = &CS;
163    Size = 0;
164    int Opc = nextByte(Bytes, Size);
165    if (Opc < 0)
166      return MCDisassembler::Fail;
167    const auto *WasmInst = &InstructionTable0[Opc];
168    // If this is a prefix byte, indirect to another table.
169    if (WasmInst->ET == ET_Prefix) {
170      WasmInst = nullptr;
171      // Linear search, so far only 2 entries.
172      for (auto PT = PrefixTable; PT->Table; PT++) {
173        if (PT->Prefix == Opc) {
174          WasmInst = PT->Table;
175          break;
176        }
177      }
178      if (!WasmInst)
179        return MCDisassembler::Fail;
180      int64_t PrefixedOpc;
181      if (!nextLEB(PrefixedOpc, Bytes, Size, false))
182        return MCDisassembler::Fail;
183      if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
184        return MCDisassembler::Fail;
185      WasmInst += PrefixedOpc;
186    }
187    if (WasmInst->ET == ET_Unused)
188      return MCDisassembler::Fail;
189    // At this point we must have a valid instruction to decode.
190    assert(WasmInst->ET == ET_Instruction);
191    MI.setOpcode(WasmInst->Opcode);
192    // Parse any operands.
193    for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
194      auto OT = OperandTable[WasmInst->OperandStart + OPI];
195      switch (OT) {
196      // ULEB operands:
197      case WebAssembly::OPERAND_BASIC_BLOCK:
198      case WebAssembly::OPERAND_LOCAL:
199      case WebAssembly::OPERAND_GLOBAL:
200      case WebAssembly::OPERAND_FUNCTION32:
201      case WebAssembly::OPERAND_TABLE:
202      case WebAssembly::OPERAND_OFFSET32:
203      case WebAssembly::OPERAND_OFFSET64:
204      case WebAssembly::OPERAND_P2ALIGN:
205      case WebAssembly::OPERAND_TYPEINDEX:
206      case WebAssembly::OPERAND_TAG:
207      case MCOI::OPERAND_IMMEDIATE: {
208        if (!parseLEBImmediate(MI, Size, Bytes, false))
209          return MCDisassembler::Fail;
210        break;
211      }
212      // SLEB operands:
213      case WebAssembly::OPERAND_I32IMM:
214      case WebAssembly::OPERAND_I64IMM: {
215        if (!parseLEBImmediate(MI, Size, Bytes, true))
216          return MCDisassembler::Fail;
217        break;
218      }
219      // block_type operands:
220      case WebAssembly::OPERAND_SIGNATURE: {
221        int64_t Val;
222        uint64_t PrevSize = Size;
223        if (!nextLEB(Val, Bytes, Size, true))
224          return MCDisassembler::Fail;
225        if (Val < 0) {
226          // Negative values are single septet value types or empty types
227          if (Size != PrevSize + 1) {
228            MI.addOperand(
229                MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
230          } else {
231            MI.addOperand(MCOperand::createImm(Val & 0x7f));
232          }
233        } else {
234          // We don't have access to the signature, so create a symbol without one
235          MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
236          auto *WasmSym = cast<MCSymbolWasm>(Sym);
237          WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
238          const MCExpr *Expr = MCSymbolRefExpr::create(
239              WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
240          MI.addOperand(MCOperand::createExpr(Expr));
241        }
242        break;
243      }
244      // FP operands.
245      case WebAssembly::OPERAND_F32IMM: {
246        if (!parseImmediate<float>(MI, Size, Bytes))
247          return MCDisassembler::Fail;
248        break;
249      }
250      case WebAssembly::OPERAND_F64IMM: {
251        if (!parseImmediate<double>(MI, Size, Bytes))
252          return MCDisassembler::Fail;
253        break;
254      }
255      // Vector lane operands (not LEB encoded).
256      case WebAssembly::OPERAND_VEC_I8IMM: {
257        if (!parseImmediate<uint8_t>(MI, Size, Bytes))
258          return MCDisassembler::Fail;
259        break;
260      }
261      case WebAssembly::OPERAND_VEC_I16IMM: {
262        if (!parseImmediate<uint16_t>(MI, Size, Bytes))
263          return MCDisassembler::Fail;
264        break;
265      }
266      case WebAssembly::OPERAND_VEC_I32IMM: {
267        if (!parseImmediate<uint32_t>(MI, Size, Bytes))
268          return MCDisassembler::Fail;
269        break;
270      }
271      case WebAssembly::OPERAND_VEC_I64IMM: {
272        if (!parseImmediate<uint64_t>(MI, Size, Bytes))
273          return MCDisassembler::Fail;
274        break;
275      }
276      case WebAssembly::OPERAND_BRLIST: {
277        int64_t TargetTableLen;
278        if (!nextLEB(TargetTableLen, Bytes, Size, false))
279          return MCDisassembler::Fail;
280        for (int64_t I = 0; I < TargetTableLen; I++) {
281          if (!parseLEBImmediate(MI, Size, Bytes, false))
282            return MCDisassembler::Fail;
283        }
284        // Default case.
285        if (!parseLEBImmediate(MI, Size, Bytes, false))
286          return MCDisassembler::Fail;
287        break;
288      }
289      case MCOI::OPERAND_REGISTER:
290        // The tablegen header currently does not have any register operands since
291        // we use only the stack (_S) instructions.
292        // If you hit this that probably means a bad instruction definition in
293        // tablegen.
294        llvm_unreachable("Register operand in WebAssemblyDisassembler");
295      default:
296        llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
297      }
298    }
299    return MCDisassembler::Success;
300  }
301