1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file is part of the WebAssembly Disassembler. 11 /// 12 /// It contains code to translate the data produced by the decoder into 13 /// MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h" 18 #include "TargetInfo/WebAssemblyTargetInfo.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDecoderOps.h" 21 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/MC/MCSymbol.h" 26 #include "llvm/MC/MCSymbolWasm.h" 27 #include "llvm/MC/TargetRegistry.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/Endian.h" 30 #include "llvm/Support/LEB128.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "wasm-disassembler" 35 36 using DecodeStatus = MCDisassembler::DecodeStatus; 37 38 #include "WebAssemblyGenDisassemblerTables.inc" 39 40 namespace { 41 static constexpr int WebAssemblyInstructionTableSize = 256; 42 43 class WebAssemblyDisassembler final : public MCDisassembler { 44 std::unique_ptr<const MCInstrInfo> MCII; 45 46 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 47 ArrayRef<uint8_t> Bytes, uint64_t Address, 48 raw_ostream &CStream) const override; 49 std::optional<DecodeStatus> 50 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 51 uint64_t Address, raw_ostream &CStream) const override; 52 53 public: 54 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 55 std::unique_ptr<const MCInstrInfo> MCII) 56 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 57 }; 58 } // end anonymous namespace 59 60 static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 61 const MCSubtargetInfo &STI, 62 MCContext &Ctx) { 63 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 64 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 65 } 66 67 extern "C" LLVM_EXTERNAL_VISIBILITY void 68 LLVMInitializeWebAssemblyDisassembler() { 69 // Register the disassembler for each target. 70 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 71 createWebAssemblyDisassembler); 72 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 73 createWebAssemblyDisassembler); 74 } 75 76 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 77 if (Size >= Bytes.size()) 78 return -1; 79 auto V = Bytes[Size]; 80 Size++; 81 return V; 82 } 83 84 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 85 bool Signed) { 86 unsigned N = 0; 87 const char *Error = nullptr; 88 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 89 Bytes.data() + Bytes.size(), &Error) 90 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 91 Bytes.data() + Bytes.size(), 92 &Error)); 93 if (Error) 94 return false; 95 Size += N; 96 return true; 97 } 98 99 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 100 ArrayRef<uint8_t> Bytes, bool Signed) { 101 int64_t Val; 102 if (!nextLEB(Val, Bytes, Size, Signed)) 103 return false; 104 MI.addOperand(MCOperand::createImm(Val)); 105 return true; 106 } 107 108 template <typename T> 109 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 110 if (Size + sizeof(T) > Bytes.size()) 111 return false; 112 T Val = 113 support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size); 114 Size += sizeof(T); 115 if (std::is_floating_point<T>::value) { 116 MI.addOperand( 117 MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val)))); 118 } else { 119 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 120 } 121 return true; 122 } 123 124 std::optional<MCDisassembler::DecodeStatus> 125 WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 126 ArrayRef<uint8_t> Bytes, 127 uint64_t Address, 128 raw_ostream &CStream) const { 129 Size = 0; 130 if (Address == 0) { 131 // Start of a code section: we're parsing only the function count. 132 int64_t FunctionCount; 133 if (!nextLEB(FunctionCount, Bytes, Size, false)) 134 return std::nullopt; 135 outs() << " # " << FunctionCount << " functions in section."; 136 } else { 137 // Parse the start of a single function. 138 int64_t BodySize, LocalEntryCount; 139 if (!nextLEB(BodySize, Bytes, Size, false) || 140 !nextLEB(LocalEntryCount, Bytes, Size, false)) 141 return std::nullopt; 142 if (LocalEntryCount) { 143 outs() << " .local "; 144 for (int64_t I = 0; I < LocalEntryCount; I++) { 145 int64_t Count, Type; 146 if (!nextLEB(Count, Bytes, Size, false) || 147 !nextLEB(Type, Bytes, Size, false)) 148 return std::nullopt; 149 for (int64_t J = 0; J < Count; J++) { 150 if (I || J) 151 outs() << ", "; 152 outs() << WebAssembly::anyTypeToString(Type); 153 } 154 } 155 } 156 } 157 outs() << "\n"; 158 return MCDisassembler::Success; 159 } 160 161 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 162 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 163 raw_ostream &CS) const { 164 CommentStream = &CS; 165 Size = 0; 166 int Opc = nextByte(Bytes, Size); 167 if (Opc < 0) 168 return MCDisassembler::Fail; 169 const auto *WasmInst = &InstructionTable0[Opc]; 170 // If this is a prefix byte, indirect to another table. 171 if (WasmInst->ET == ET_Prefix) { 172 WasmInst = nullptr; 173 // Linear search, so far only 2 entries. 174 for (auto PT = PrefixTable; PT->Table; PT++) { 175 if (PT->Prefix == Opc) { 176 WasmInst = PT->Table; 177 break; 178 } 179 } 180 if (!WasmInst) 181 return MCDisassembler::Fail; 182 int64_t PrefixedOpc; 183 if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 184 return MCDisassembler::Fail; 185 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 186 return MCDisassembler::Fail; 187 WasmInst += PrefixedOpc; 188 } 189 if (WasmInst->ET == ET_Unused) 190 return MCDisassembler::Fail; 191 // At this point we must have a valid instruction to decode. 192 assert(WasmInst->ET == ET_Instruction); 193 MI.setOpcode(WasmInst->Opcode); 194 // Parse any operands. 195 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 196 auto OT = OperandTable[WasmInst->OperandStart + OPI]; 197 switch (OT) { 198 // ULEB operands: 199 case WebAssembly::OPERAND_BASIC_BLOCK: 200 case WebAssembly::OPERAND_LOCAL: 201 case WebAssembly::OPERAND_GLOBAL: 202 case WebAssembly::OPERAND_FUNCTION32: 203 case WebAssembly::OPERAND_TABLE: 204 case WebAssembly::OPERAND_OFFSET32: 205 case WebAssembly::OPERAND_OFFSET64: 206 case WebAssembly::OPERAND_P2ALIGN: 207 case WebAssembly::OPERAND_TYPEINDEX: 208 case WebAssembly::OPERAND_TAG: 209 case MCOI::OPERAND_IMMEDIATE: { 210 if (!parseLEBImmediate(MI, Size, Bytes, false)) 211 return MCDisassembler::Fail; 212 break; 213 } 214 // SLEB operands: 215 case WebAssembly::OPERAND_I32IMM: 216 case WebAssembly::OPERAND_I64IMM: { 217 if (!parseLEBImmediate(MI, Size, Bytes, true)) 218 return MCDisassembler::Fail; 219 break; 220 } 221 // block_type operands: 222 case WebAssembly::OPERAND_SIGNATURE: { 223 int64_t Val; 224 uint64_t PrevSize = Size; 225 if (!nextLEB(Val, Bytes, Size, true)) 226 return MCDisassembler::Fail; 227 if (Val < 0) { 228 // Negative values are single septet value types or empty types 229 if (Size != PrevSize + 1) { 230 MI.addOperand( 231 MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 232 } else { 233 MI.addOperand(MCOperand::createImm(Val & 0x7f)); 234 } 235 } else { 236 // We don't have access to the signature, so create a symbol without one 237 MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 238 auto *WasmSym = cast<MCSymbolWasm>(Sym); 239 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 240 const MCExpr *Expr = MCSymbolRefExpr::create( 241 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 242 MI.addOperand(MCOperand::createExpr(Expr)); 243 } 244 break; 245 } 246 // FP operands. 247 case WebAssembly::OPERAND_F32IMM: { 248 if (!parseImmediate<float>(MI, Size, Bytes)) 249 return MCDisassembler::Fail; 250 break; 251 } 252 case WebAssembly::OPERAND_F64IMM: { 253 if (!parseImmediate<double>(MI, Size, Bytes)) 254 return MCDisassembler::Fail; 255 break; 256 } 257 // Vector lane operands (not LEB encoded). 258 case WebAssembly::OPERAND_VEC_I8IMM: { 259 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 260 return MCDisassembler::Fail; 261 break; 262 } 263 case WebAssembly::OPERAND_VEC_I16IMM: { 264 if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 265 return MCDisassembler::Fail; 266 break; 267 } 268 case WebAssembly::OPERAND_VEC_I32IMM: { 269 if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 270 return MCDisassembler::Fail; 271 break; 272 } 273 case WebAssembly::OPERAND_VEC_I64IMM: { 274 if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 275 return MCDisassembler::Fail; 276 break; 277 } 278 case WebAssembly::OPERAND_BRLIST: { 279 int64_t TargetTableLen; 280 if (!nextLEB(TargetTableLen, Bytes, Size, false)) 281 return MCDisassembler::Fail; 282 for (int64_t I = 0; I < TargetTableLen; I++) { 283 if (!parseLEBImmediate(MI, Size, Bytes, false)) 284 return MCDisassembler::Fail; 285 } 286 // Default case. 287 if (!parseLEBImmediate(MI, Size, Bytes, false)) 288 return MCDisassembler::Fail; 289 break; 290 } 291 case MCOI::OPERAND_REGISTER: 292 // The tablegen header currently does not have any register operands since 293 // we use only the stack (_S) instructions. 294 // If you hit this that probably means a bad instruction definition in 295 // tablegen. 296 llvm_unreachable("Register operand in WebAssemblyDisassembler"); 297 default: 298 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 299 } 300 } 301 return MCDisassembler::Success; 302 } 303