xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file is part of the WebAssembly Disassembler.
11 ///
12 /// It contains code to translate the data produced by the decoder into
13 /// MCInsts.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "MCTargetDesc/WebAssemblyMCAsmInfo.h"
18 #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
19 #include "TargetInfo/WebAssemblyTargetInfo.h"
20 #include "llvm/BinaryFormat/Wasm.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/MC/MCSymbolWasm.h"
28 #include "llvm/MC/TargetRegistry.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/Compiler.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/LEB128.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "wasm-disassembler"
37 
38 using DecodeStatus = MCDisassembler::DecodeStatus;
39 
40 #include "WebAssemblyGenDisassemblerTables.inc"
41 
42 static constexpr int WebAssemblyInstructionTableSize = 256;
43 
44 namespace {
45 class WebAssemblyDisassembler final : public MCDisassembler {
46   std::unique_ptr<const MCInstrInfo> MCII;
47 
48   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
49                               ArrayRef<uint8_t> Bytes, uint64_t Address,
50                               raw_ostream &CStream) const override;
51 
52   Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
53                                ArrayRef<uint8_t> Bytes,
54                                uint64_t Address) const override;
55 
56 public:
57   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
58                           std::unique_ptr<const MCInstrInfo> MCII)
59       : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
60 };
61 } // end anonymous namespace
62 
63 static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
64                                                      const MCSubtargetInfo &STI,
65                                                      MCContext &Ctx) {
66   std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
67   return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
68 }
69 
70 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
71 LLVMInitializeWebAssemblyDisassembler() {
72   // Register the disassembler for each target.
73   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
74                                          createWebAssemblyDisassembler);
75   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
76                                          createWebAssemblyDisassembler);
77 }
78 
79 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
80   if (Size >= Bytes.size())
81     return -1;
82   auto V = Bytes[Size];
83   Size++;
84   return V;
85 }
86 
87 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
88                     bool Signed) {
89   unsigned N = 0;
90   const char *Error = nullptr;
91   Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
92                                Bytes.data() + Bytes.size(), &Error)
93                : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
94                                                     Bytes.data() + Bytes.size(),
95                                                     &Error));
96   if (Error)
97     return false;
98   Size += N;
99   return true;
100 }
101 
102 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
103                               ArrayRef<uint8_t> Bytes, bool Signed) {
104   int64_t Val;
105   if (!nextLEB(Val, Bytes, Size, Signed))
106     return false;
107   MI.addOperand(MCOperand::createImm(Val));
108   return true;
109 }
110 
111 template <typename T>
112 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
113   if (Size + sizeof(T) > Bytes.size())
114     return false;
115   T Val =
116       support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size);
117   Size += sizeof(T);
118   if (std::is_floating_point<T>::value) {
119     MI.addOperand(
120         MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
121   } else {
122     MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
123   }
124   return true;
125 }
126 
127 Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
128                                                       uint64_t &Size,
129                                                       ArrayRef<uint8_t> Bytes,
130                                                       uint64_t Address) const {
131   Size = 0;
132   if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) {
133     // Start of a code section: we're parsing only the function count.
134     int64_t FunctionCount;
135     if (!nextLEB(FunctionCount, Bytes, Size, false))
136       return false;
137     outs() << "        # " << FunctionCount << " functions in section.";
138   } else {
139     // Parse the start of a single function.
140     int64_t BodySize, LocalEntryCount;
141     if (!nextLEB(BodySize, Bytes, Size, false) ||
142         !nextLEB(LocalEntryCount, Bytes, Size, false))
143       return false;
144     if (LocalEntryCount) {
145       outs() << "        .local ";
146       for (int64_t I = 0; I < LocalEntryCount; I++) {
147         int64_t Count, Type;
148         if (!nextLEB(Count, Bytes, Size, false) ||
149             !nextLEB(Type, Bytes, Size, false))
150           return false;
151         for (int64_t J = 0; J < Count; J++) {
152           if (I || J)
153             outs() << ", ";
154           outs() << WebAssembly::anyTypeToString(Type);
155         }
156       }
157     }
158   }
159   outs() << "\n";
160   return true;
161 }
162 
163 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
164     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
165     raw_ostream &CS) const {
166   CommentStream = &CS;
167   Size = 0;
168   int Opc = nextByte(Bytes, Size);
169   if (Opc < 0)
170     return MCDisassembler::Fail;
171   const auto *WasmInst = &InstructionTable0[Opc];
172   // If this is a prefix byte, indirect to another table.
173   if (WasmInst->ET == ET_Prefix) {
174     WasmInst = nullptr;
175     // Linear search, so far only 4 entries.
176     for (const auto &[Prefix, Table] : PrefixTable) {
177       if (Prefix == Opc) {
178         WasmInst = Table;
179         break;
180       }
181     }
182     if (!WasmInst)
183       return MCDisassembler::Fail;
184     int64_t PrefixedOpc;
185     if (!nextLEB(PrefixedOpc, Bytes, Size, false))
186       return MCDisassembler::Fail;
187     if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
188       return MCDisassembler::Fail;
189     WasmInst += PrefixedOpc;
190   }
191   if (WasmInst->ET == ET_Unused)
192     return MCDisassembler::Fail;
193   // At this point we must have a valid instruction to decode.
194   assert(WasmInst->ET == ET_Instruction);
195   MI.setOpcode(WasmInst->Opcode);
196   // Parse any operands.
197   for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
198     auto OT = OperandTable[WasmInst->OperandStart + OPI];
199     switch (OT) {
200     // ULEB operands:
201     case WebAssembly::OPERAND_BASIC_BLOCK:
202     case WebAssembly::OPERAND_LOCAL:
203     case WebAssembly::OPERAND_GLOBAL:
204     case WebAssembly::OPERAND_FUNCTION32:
205     case WebAssembly::OPERAND_TABLE:
206     case WebAssembly::OPERAND_OFFSET32:
207     case WebAssembly::OPERAND_OFFSET64:
208     case WebAssembly::OPERAND_P2ALIGN:
209     case WebAssembly::OPERAND_TYPEINDEX:
210     case WebAssembly::OPERAND_TAG:
211     case MCOI::OPERAND_IMMEDIATE: {
212       if (!parseLEBImmediate(MI, Size, Bytes, false))
213         return MCDisassembler::Fail;
214       break;
215     }
216     // SLEB operands:
217     case WebAssembly::OPERAND_I32IMM:
218     case WebAssembly::OPERAND_I64IMM: {
219       if (!parseLEBImmediate(MI, Size, Bytes, true))
220         return MCDisassembler::Fail;
221       break;
222     }
223     // block_type operands:
224     case WebAssembly::OPERAND_SIGNATURE: {
225       int64_t Val;
226       uint64_t PrevSize = Size;
227       if (!nextLEB(Val, Bytes, Size, true))
228         return MCDisassembler::Fail;
229       if (Val < 0) {
230         // Negative values are single septet value types or empty types
231         if (Size != PrevSize + 1) {
232           MI.addOperand(
233               MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
234         } else {
235           MI.addOperand(MCOperand::createImm(Val & 0x7f));
236         }
237       } else {
238         // We don't have access to the signature, so create a symbol without one
239         MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
240         auto *WasmSym = cast<MCSymbolWasm>(Sym);
241         WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
242         const MCExpr *Expr = MCSymbolRefExpr::create(
243             WasmSym, WebAssembly::S_TYPEINDEX, getContext());
244         MI.addOperand(MCOperand::createExpr(Expr));
245       }
246       break;
247     }
248     // FP operands.
249     case WebAssembly::OPERAND_F32IMM: {
250       if (!parseImmediate<float>(MI, Size, Bytes))
251         return MCDisassembler::Fail;
252       break;
253     }
254     case WebAssembly::OPERAND_F64IMM: {
255       if (!parseImmediate<double>(MI, Size, Bytes))
256         return MCDisassembler::Fail;
257       break;
258     }
259     // Vector lane operands (not LEB encoded).
260     case WebAssembly::OPERAND_VEC_I8IMM: {
261       if (!parseImmediate<uint8_t>(MI, Size, Bytes))
262         return MCDisassembler::Fail;
263       break;
264     }
265     case WebAssembly::OPERAND_VEC_I16IMM: {
266       if (!parseImmediate<uint16_t>(MI, Size, Bytes))
267         return MCDisassembler::Fail;
268       break;
269     }
270     case WebAssembly::OPERAND_VEC_I32IMM: {
271       if (!parseImmediate<uint32_t>(MI, Size, Bytes))
272         return MCDisassembler::Fail;
273       break;
274     }
275     case WebAssembly::OPERAND_VEC_I64IMM: {
276       if (!parseImmediate<uint64_t>(MI, Size, Bytes))
277         return MCDisassembler::Fail;
278       break;
279     }
280     case WebAssembly::OPERAND_BRLIST: {
281       int64_t TargetTableLen;
282       if (!nextLEB(TargetTableLen, Bytes, Size, false))
283         return MCDisassembler::Fail;
284       for (int64_t I = 0; I < TargetTableLen; I++) {
285         if (!parseLEBImmediate(MI, Size, Bytes, false))
286           return MCDisassembler::Fail;
287       }
288       // Default case.
289       if (!parseLEBImmediate(MI, Size, Bytes, false))
290         return MCDisassembler::Fail;
291       break;
292     }
293     case WebAssembly::OPERAND_CATCH_LIST: {
294       if (!parseLEBImmediate(MI, Size, Bytes, false))
295         return MCDisassembler::Fail;
296       int64_t NumCatches = MI.getOperand(MI.getNumOperands() - 1).getImm();
297       for (int64_t I = 0; I < NumCatches; I++) {
298         if (!parseImmediate<uint8_t>(MI, Size, Bytes))
299           return MCDisassembler::Fail;
300         int64_t CatchOpcode = MI.getOperand(MI.getNumOperands() - 1).getImm();
301         if (CatchOpcode == wasm::WASM_OPCODE_CATCH ||
302             CatchOpcode == wasm::WASM_OPCODE_CATCH_REF) {
303           if (!parseLEBImmediate(MI, Size, Bytes, false)) // tag index
304             return MCDisassembler::Fail;
305         }
306         if (!parseLEBImmediate(MI, Size, Bytes, false)) // destination
307           return MCDisassembler::Fail;
308       }
309       break;
310     }
311     case MCOI::OPERAND_REGISTER:
312       // The tablegen header currently does not have any register operands since
313       // we use only the stack (_S) instructions.
314       // If you hit this that probably means a bad instruction definition in
315       // tablegen.
316       llvm_unreachable("Register operand in WebAssemblyDisassembler");
317     default:
318       llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
319     }
320   }
321   return MCDisassembler::Success;
322 }
323