xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision 0fcececbac9880b092aeb56a41a16f1ec8ac1ae6)
1  //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This file is part of the WebAssembly Disassembler.
11  ///
12  /// It contains code to translate the data produced by the decoder into
13  /// MCInsts.
14  ///
15  //===----------------------------------------------------------------------===//
16  
17  #include "TargetInfo/WebAssemblyTargetInfo.h"
18  #include "Utils/WebAssemblyTypeUtilities.h"
19  #include "llvm/MC/MCContext.h"
20  #include "llvm/MC/MCDecoderOps.h"
21  #include "llvm/MC/MCDisassembler/MCDisassembler.h"
22  #include "llvm/MC/MCInst.h"
23  #include "llvm/MC/MCInstrInfo.h"
24  #include "llvm/MC/MCSubtargetInfo.h"
25  #include "llvm/MC/MCSymbol.h"
26  #include "llvm/MC/MCSymbolWasm.h"
27  #include "llvm/MC/TargetRegistry.h"
28  #include "llvm/Support/Endian.h"
29  #include "llvm/Support/LEB128.h"
30  
31  using namespace llvm;
32  
33  #define DEBUG_TYPE "wasm-disassembler"
34  
35  using DecodeStatus = MCDisassembler::DecodeStatus;
36  
37  #include "WebAssemblyGenDisassemblerTables.inc"
38  
39  namespace {
40  static constexpr int WebAssemblyInstructionTableSize = 256;
41  
42  class WebAssemblyDisassembler final : public MCDisassembler {
43    std::unique_ptr<const MCInstrInfo> MCII;
44  
45    DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
46                                ArrayRef<uint8_t> Bytes, uint64_t Address,
47                                raw_ostream &CStream) const override;
48    std::optional<DecodeStatus>
49    onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
50                  uint64_t Address, raw_ostream &CStream) const override;
51  
52  public:
53    WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
54                            std::unique_ptr<const MCInstrInfo> MCII)
55        : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
56  };
57  } // end anonymous namespace
58  
59  static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
60                                                       const MCSubtargetInfo &STI,
61                                                       MCContext &Ctx) {
62    std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
63    return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
64  }
65  
66  extern "C" LLVM_EXTERNAL_VISIBILITY void
67  LLVMInitializeWebAssemblyDisassembler() {
68    // Register the disassembler for each target.
69    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
70                                           createWebAssemblyDisassembler);
71    TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
72                                           createWebAssemblyDisassembler);
73  }
74  
75  static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
76    if (Size >= Bytes.size())
77      return -1;
78    auto V = Bytes[Size];
79    Size++;
80    return V;
81  }
82  
83  static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
84                      bool Signed) {
85    unsigned N = 0;
86    const char *Error = nullptr;
87    Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
88                                 Bytes.data() + Bytes.size(), &Error)
89                 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
90                                                      Bytes.data() + Bytes.size(),
91                                                      &Error));
92    if (Error)
93      return false;
94    Size += N;
95    return true;
96  }
97  
98  static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
99                                ArrayRef<uint8_t> Bytes, bool Signed) {
100    int64_t Val;
101    if (!nextLEB(Val, Bytes, Size, Signed))
102      return false;
103    MI.addOperand(MCOperand::createImm(Val));
104    return true;
105  }
106  
107  template <typename T>
108  bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
109    if (Size + sizeof(T) > Bytes.size())
110      return false;
111    T Val = support::endian::read<T, support::endianness::little, 1>(
112        Bytes.data() + Size);
113    Size += sizeof(T);
114    if (std::is_floating_point<T>::value) {
115      MI.addOperand(
116          MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
117    } else {
118      MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
119    }
120    return true;
121  }
122  
123  std::optional<MCDisassembler::DecodeStatus>
124  WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
125                                         ArrayRef<uint8_t> Bytes,
126                                         uint64_t Address,
127                                         raw_ostream &CStream) const {
128    Size = 0;
129    if (Address == 0) {
130      // Start of a code section: we're parsing only the function count.
131      int64_t FunctionCount;
132      if (!nextLEB(FunctionCount, Bytes, Size, false))
133        return std::nullopt;
134      outs() << "        # " << FunctionCount << " functions in section.";
135    } else {
136      // Parse the start of a single function.
137      int64_t BodySize, LocalEntryCount;
138      if (!nextLEB(BodySize, Bytes, Size, false) ||
139          !nextLEB(LocalEntryCount, Bytes, Size, false))
140        return std::nullopt;
141      if (LocalEntryCount) {
142        outs() << "        .local ";
143        for (int64_t I = 0; I < LocalEntryCount; I++) {
144          int64_t Count, Type;
145          if (!nextLEB(Count, Bytes, Size, false) ||
146              !nextLEB(Type, Bytes, Size, false))
147            return std::nullopt;
148          for (int64_t J = 0; J < Count; J++) {
149            if (I || J)
150              outs() << ", ";
151            outs() << WebAssembly::anyTypeToString(Type);
152          }
153        }
154      }
155    }
156    outs() << "\n";
157    return MCDisassembler::Success;
158  }
159  
160  MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
161      MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
162      raw_ostream &CS) const {
163    CommentStream = &CS;
164    Size = 0;
165    int Opc = nextByte(Bytes, Size);
166    if (Opc < 0)
167      return MCDisassembler::Fail;
168    const auto *WasmInst = &InstructionTable0[Opc];
169    // If this is a prefix byte, indirect to another table.
170    if (WasmInst->ET == ET_Prefix) {
171      WasmInst = nullptr;
172      // Linear search, so far only 2 entries.
173      for (auto PT = PrefixTable; PT->Table; PT++) {
174        if (PT->Prefix == Opc) {
175          WasmInst = PT->Table;
176          break;
177        }
178      }
179      if (!WasmInst)
180        return MCDisassembler::Fail;
181      int64_t PrefixedOpc;
182      if (!nextLEB(PrefixedOpc, Bytes, Size, false))
183        return MCDisassembler::Fail;
184      if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
185        return MCDisassembler::Fail;
186      WasmInst += PrefixedOpc;
187    }
188    if (WasmInst->ET == ET_Unused)
189      return MCDisassembler::Fail;
190    // At this point we must have a valid instruction to decode.
191    assert(WasmInst->ET == ET_Instruction);
192    MI.setOpcode(WasmInst->Opcode);
193    // Parse any operands.
194    for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
195      auto OT = OperandTable[WasmInst->OperandStart + OPI];
196      switch (OT) {
197      // ULEB operands:
198      case WebAssembly::OPERAND_BASIC_BLOCK:
199      case WebAssembly::OPERAND_LOCAL:
200      case WebAssembly::OPERAND_GLOBAL:
201      case WebAssembly::OPERAND_FUNCTION32:
202      case WebAssembly::OPERAND_TABLE:
203      case WebAssembly::OPERAND_OFFSET32:
204      case WebAssembly::OPERAND_OFFSET64:
205      case WebAssembly::OPERAND_P2ALIGN:
206      case WebAssembly::OPERAND_TYPEINDEX:
207      case WebAssembly::OPERAND_TAG:
208      case MCOI::OPERAND_IMMEDIATE: {
209        if (!parseLEBImmediate(MI, Size, Bytes, false))
210          return MCDisassembler::Fail;
211        break;
212      }
213      // SLEB operands:
214      case WebAssembly::OPERAND_I32IMM:
215      case WebAssembly::OPERAND_I64IMM: {
216        if (!parseLEBImmediate(MI, Size, Bytes, true))
217          return MCDisassembler::Fail;
218        break;
219      }
220      // block_type operands:
221      case WebAssembly::OPERAND_SIGNATURE: {
222        int64_t Val;
223        uint64_t PrevSize = Size;
224        if (!nextLEB(Val, Bytes, Size, true))
225          return MCDisassembler::Fail;
226        if (Val < 0) {
227          // Negative values are single septet value types or empty types
228          if (Size != PrevSize + 1) {
229            MI.addOperand(
230                MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
231          } else {
232            MI.addOperand(MCOperand::createImm(Val & 0x7f));
233          }
234        } else {
235          // We don't have access to the signature, so create a symbol without one
236          MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
237          auto *WasmSym = cast<MCSymbolWasm>(Sym);
238          WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
239          const MCExpr *Expr = MCSymbolRefExpr::create(
240              WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
241          MI.addOperand(MCOperand::createExpr(Expr));
242        }
243        break;
244      }
245      // FP operands.
246      case WebAssembly::OPERAND_F32IMM: {
247        if (!parseImmediate<float>(MI, Size, Bytes))
248          return MCDisassembler::Fail;
249        break;
250      }
251      case WebAssembly::OPERAND_F64IMM: {
252        if (!parseImmediate<double>(MI, Size, Bytes))
253          return MCDisassembler::Fail;
254        break;
255      }
256      // Vector lane operands (not LEB encoded).
257      case WebAssembly::OPERAND_VEC_I8IMM: {
258        if (!parseImmediate<uint8_t>(MI, Size, Bytes))
259          return MCDisassembler::Fail;
260        break;
261      }
262      case WebAssembly::OPERAND_VEC_I16IMM: {
263        if (!parseImmediate<uint16_t>(MI, Size, Bytes))
264          return MCDisassembler::Fail;
265        break;
266      }
267      case WebAssembly::OPERAND_VEC_I32IMM: {
268        if (!parseImmediate<uint32_t>(MI, Size, Bytes))
269          return MCDisassembler::Fail;
270        break;
271      }
272      case WebAssembly::OPERAND_VEC_I64IMM: {
273        if (!parseImmediate<uint64_t>(MI, Size, Bytes))
274          return MCDisassembler::Fail;
275        break;
276      }
277      case WebAssembly::OPERAND_BRLIST: {
278        int64_t TargetTableLen;
279        if (!nextLEB(TargetTableLen, Bytes, Size, false))
280          return MCDisassembler::Fail;
281        for (int64_t I = 0; I < TargetTableLen; I++) {
282          if (!parseLEBImmediate(MI, Size, Bytes, false))
283            return MCDisassembler::Fail;
284        }
285        // Default case.
286        if (!parseLEBImmediate(MI, Size, Bytes, false))
287          return MCDisassembler::Fail;
288        break;
289      }
290      case MCOI::OPERAND_REGISTER:
291        // The tablegen header currently does not have any register operands since
292        // we use only the stack (_S) instructions.
293        // If you hit this that probably means a bad instruction definition in
294        // tablegen.
295        llvm_unreachable("Register operand in WebAssemblyDisassembler");
296      default:
297        llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
298      }
299    }
300    return MCDisassembler::Success;
301  }
302