1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file is part of the WebAssembly Disassembler. 11 /// 12 /// It contains code to translate the data produced by the decoder into 13 /// MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/WebAssemblyMCAsmInfo.h" 18 #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h" 19 #include "TargetInfo/WebAssemblyTargetInfo.h" 20 #include "llvm/BinaryFormat/Wasm.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/MC/MCSymbol.h" 27 #include "llvm/MC/MCSymbolWasm.h" 28 #include "llvm/MC/TargetRegistry.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/Compiler.h" 31 #include "llvm/Support/Endian.h" 32 #include "llvm/Support/LEB128.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "wasm-disassembler" 37 38 using DecodeStatus = MCDisassembler::DecodeStatus; 39 40 #include "WebAssemblyGenDisassemblerTables.inc" 41 42 static constexpr int WebAssemblyInstructionTableSize = 256; 43 44 namespace { 45 class WebAssemblyDisassembler final : public MCDisassembler { 46 std::unique_ptr<const MCInstrInfo> MCII; 47 48 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 49 ArrayRef<uint8_t> Bytes, uint64_t Address, 50 raw_ostream &CStream) const override; 51 52 Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 53 ArrayRef<uint8_t> Bytes, 54 uint64_t Address) const override; 55 56 public: 57 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 58 std::unique_ptr<const MCInstrInfo> MCII) 59 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 60 }; 61 } // end anonymous namespace 62 63 static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 64 const MCSubtargetInfo &STI, 65 MCContext &Ctx) { 66 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 67 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 68 } 69 70 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void 71 LLVMInitializeWebAssemblyDisassembler() { 72 // Register the disassembler for each target. 73 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 74 createWebAssemblyDisassembler); 75 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 76 createWebAssemblyDisassembler); 77 } 78 79 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 80 if (Size >= Bytes.size()) 81 return -1; 82 auto V = Bytes[Size]; 83 Size++; 84 return V; 85 } 86 87 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 88 bool Signed) { 89 unsigned N = 0; 90 const char *Error = nullptr; 91 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 92 Bytes.data() + Bytes.size(), &Error) 93 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 94 Bytes.data() + Bytes.size(), 95 &Error)); 96 if (Error) 97 return false; 98 Size += N; 99 return true; 100 } 101 102 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 103 ArrayRef<uint8_t> Bytes, bool Signed) { 104 int64_t Val; 105 if (!nextLEB(Val, Bytes, Size, Signed)) 106 return false; 107 MI.addOperand(MCOperand::createImm(Val)); 108 return true; 109 } 110 111 template <typename T> 112 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 113 if (Size + sizeof(T) > Bytes.size()) 114 return false; 115 T Val = 116 support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size); 117 Size += sizeof(T); 118 if (std::is_floating_point<T>::value) { 119 MI.addOperand( 120 MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val)))); 121 } else { 122 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 123 } 124 return true; 125 } 126 127 Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, 128 uint64_t &Size, 129 ArrayRef<uint8_t> Bytes, 130 uint64_t Address) const { 131 Size = 0; 132 if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) { 133 // Start of a code section: we're parsing only the function count. 134 int64_t FunctionCount; 135 if (!nextLEB(FunctionCount, Bytes, Size, false)) 136 return false; 137 outs() << " # " << FunctionCount << " functions in section."; 138 } else { 139 // Parse the start of a single function. 140 int64_t BodySize, LocalEntryCount; 141 if (!nextLEB(BodySize, Bytes, Size, false) || 142 !nextLEB(LocalEntryCount, Bytes, Size, false)) 143 return false; 144 if (LocalEntryCount) { 145 outs() << " .local "; 146 for (int64_t I = 0; I < LocalEntryCount; I++) { 147 int64_t Count, Type; 148 if (!nextLEB(Count, Bytes, Size, false) || 149 !nextLEB(Type, Bytes, Size, false)) 150 return false; 151 for (int64_t J = 0; J < Count; J++) { 152 if (I || J) 153 outs() << ", "; 154 outs() << WebAssembly::anyTypeToString(Type); 155 } 156 } 157 } 158 } 159 outs() << "\n"; 160 return true; 161 } 162 163 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 164 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 165 raw_ostream &CS) const { 166 CommentStream = &CS; 167 Size = 0; 168 int Opc = nextByte(Bytes, Size); 169 if (Opc < 0) 170 return MCDisassembler::Fail; 171 const auto *WasmInst = &InstructionTable0[Opc]; 172 // If this is a prefix byte, indirect to another table. 173 if (WasmInst->ET == ET_Prefix) { 174 WasmInst = nullptr; 175 // Linear search, so far only 4 entries. 176 for (const auto &[Prefix, Table] : PrefixTable) { 177 if (Prefix == Opc) { 178 WasmInst = Table; 179 break; 180 } 181 } 182 if (!WasmInst) 183 return MCDisassembler::Fail; 184 int64_t PrefixedOpc; 185 if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 186 return MCDisassembler::Fail; 187 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 188 return MCDisassembler::Fail; 189 WasmInst += PrefixedOpc; 190 } 191 if (WasmInst->ET == ET_Unused) 192 return MCDisassembler::Fail; 193 // At this point we must have a valid instruction to decode. 194 assert(WasmInst->ET == ET_Instruction); 195 MI.setOpcode(WasmInst->Opcode); 196 // Parse any operands. 197 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 198 auto OT = OperandTable[WasmInst->OperandStart + OPI]; 199 switch (OT) { 200 // ULEB operands: 201 case WebAssembly::OPERAND_BASIC_BLOCK: 202 case WebAssembly::OPERAND_LOCAL: 203 case WebAssembly::OPERAND_GLOBAL: 204 case WebAssembly::OPERAND_FUNCTION32: 205 case WebAssembly::OPERAND_TABLE: 206 case WebAssembly::OPERAND_OFFSET32: 207 case WebAssembly::OPERAND_OFFSET64: 208 case WebAssembly::OPERAND_P2ALIGN: 209 case WebAssembly::OPERAND_TYPEINDEX: 210 case WebAssembly::OPERAND_TAG: 211 case MCOI::OPERAND_IMMEDIATE: { 212 if (!parseLEBImmediate(MI, Size, Bytes, false)) 213 return MCDisassembler::Fail; 214 break; 215 } 216 // SLEB operands: 217 case WebAssembly::OPERAND_I32IMM: 218 case WebAssembly::OPERAND_I64IMM: { 219 if (!parseLEBImmediate(MI, Size, Bytes, true)) 220 return MCDisassembler::Fail; 221 break; 222 } 223 // block_type operands: 224 case WebAssembly::OPERAND_SIGNATURE: { 225 int64_t Val; 226 uint64_t PrevSize = Size; 227 if (!nextLEB(Val, Bytes, Size, true)) 228 return MCDisassembler::Fail; 229 if (Val < 0) { 230 // Negative values are single septet value types or empty types 231 if (Size != PrevSize + 1) { 232 MI.addOperand( 233 MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 234 } else { 235 MI.addOperand(MCOperand::createImm(Val & 0x7f)); 236 } 237 } else { 238 // We don't have access to the signature, so create a symbol without one 239 MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 240 auto *WasmSym = cast<MCSymbolWasm>(Sym); 241 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 242 const MCExpr *Expr = MCSymbolRefExpr::create( 243 WasmSym, WebAssembly::S_TYPEINDEX, getContext()); 244 MI.addOperand(MCOperand::createExpr(Expr)); 245 } 246 break; 247 } 248 // FP operands. 249 case WebAssembly::OPERAND_F32IMM: { 250 if (!parseImmediate<float>(MI, Size, Bytes)) 251 return MCDisassembler::Fail; 252 break; 253 } 254 case WebAssembly::OPERAND_F64IMM: { 255 if (!parseImmediate<double>(MI, Size, Bytes)) 256 return MCDisassembler::Fail; 257 break; 258 } 259 // Vector lane operands (not LEB encoded). 260 case WebAssembly::OPERAND_VEC_I8IMM: { 261 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 262 return MCDisassembler::Fail; 263 break; 264 } 265 case WebAssembly::OPERAND_VEC_I16IMM: { 266 if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 267 return MCDisassembler::Fail; 268 break; 269 } 270 case WebAssembly::OPERAND_VEC_I32IMM: { 271 if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 272 return MCDisassembler::Fail; 273 break; 274 } 275 case WebAssembly::OPERAND_VEC_I64IMM: { 276 if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 277 return MCDisassembler::Fail; 278 break; 279 } 280 case WebAssembly::OPERAND_BRLIST: { 281 int64_t TargetTableLen; 282 if (!nextLEB(TargetTableLen, Bytes, Size, false)) 283 return MCDisassembler::Fail; 284 for (int64_t I = 0; I < TargetTableLen; I++) { 285 if (!parseLEBImmediate(MI, Size, Bytes, false)) 286 return MCDisassembler::Fail; 287 } 288 // Default case. 289 if (!parseLEBImmediate(MI, Size, Bytes, false)) 290 return MCDisassembler::Fail; 291 break; 292 } 293 case WebAssembly::OPERAND_CATCH_LIST: { 294 if (!parseLEBImmediate(MI, Size, Bytes, false)) 295 return MCDisassembler::Fail; 296 int64_t NumCatches = MI.getOperand(MI.getNumOperands() - 1).getImm(); 297 for (int64_t I = 0; I < NumCatches; I++) { 298 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 299 return MCDisassembler::Fail; 300 int64_t CatchOpcode = MI.getOperand(MI.getNumOperands() - 1).getImm(); 301 if (CatchOpcode == wasm::WASM_OPCODE_CATCH || 302 CatchOpcode == wasm::WASM_OPCODE_CATCH_REF) { 303 if (!parseLEBImmediate(MI, Size, Bytes, false)) // tag index 304 return MCDisassembler::Fail; 305 } 306 if (!parseLEBImmediate(MI, Size, Bytes, false)) // destination 307 return MCDisassembler::Fail; 308 } 309 break; 310 } 311 case MCOI::OPERAND_REGISTER: 312 // The tablegen header currently does not have any register operands since 313 // we use only the stack (_S) instructions. 314 // If you hit this that probably means a bad instruction definition in 315 // tablegen. 316 llvm_unreachable("Register operand in WebAssemblyDisassembler"); 317 default: 318 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 319 } 320 } 321 return MCDisassembler::Success; 322 } 323