1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file is part of the WebAssembly Disassembler. 11 /// 12 /// It contains code to translate the data produced by the decoder into 13 /// MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h" 18 #include "TargetInfo/WebAssemblyTargetInfo.h" 19 #include "llvm/BinaryFormat/Wasm.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCDecoderOps.h" 22 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/MC/MCSymbol.h" 27 #include "llvm/MC/MCSymbolWasm.h" 28 #include "llvm/MC/TargetRegistry.h" 29 #include "llvm/Support/Casting.h" 30 #include "llvm/Support/Endian.h" 31 #include "llvm/Support/LEB128.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "wasm-disassembler" 36 37 using DecodeStatus = MCDisassembler::DecodeStatus; 38 39 #include "WebAssemblyGenDisassemblerTables.inc" 40 41 namespace { 42 static constexpr int WebAssemblyInstructionTableSize = 256; 43 44 class WebAssemblyDisassembler final : public MCDisassembler { 45 std::unique_ptr<const MCInstrInfo> MCII; 46 47 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 48 ArrayRef<uint8_t> Bytes, uint64_t Address, 49 raw_ostream &CStream) const override; 50 51 Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 52 ArrayRef<uint8_t> Bytes, 53 uint64_t Address) const override; 54 55 public: 56 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 57 std::unique_ptr<const MCInstrInfo> MCII) 58 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 59 }; 60 } // end anonymous namespace 61 62 static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 63 const MCSubtargetInfo &STI, 64 MCContext &Ctx) { 65 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 66 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 67 } 68 69 extern "C" LLVM_EXTERNAL_VISIBILITY void 70 LLVMInitializeWebAssemblyDisassembler() { 71 // Register the disassembler for each target. 72 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 73 createWebAssemblyDisassembler); 74 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 75 createWebAssemblyDisassembler); 76 } 77 78 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 79 if (Size >= Bytes.size()) 80 return -1; 81 auto V = Bytes[Size]; 82 Size++; 83 return V; 84 } 85 86 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 87 bool Signed) { 88 unsigned N = 0; 89 const char *Error = nullptr; 90 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 91 Bytes.data() + Bytes.size(), &Error) 92 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 93 Bytes.data() + Bytes.size(), 94 &Error)); 95 if (Error) 96 return false; 97 Size += N; 98 return true; 99 } 100 101 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 102 ArrayRef<uint8_t> Bytes, bool Signed) { 103 int64_t Val; 104 if (!nextLEB(Val, Bytes, Size, Signed)) 105 return false; 106 MI.addOperand(MCOperand::createImm(Val)); 107 return true; 108 } 109 110 template <typename T> 111 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 112 if (Size + sizeof(T) > Bytes.size()) 113 return false; 114 T Val = 115 support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size); 116 Size += sizeof(T); 117 if (std::is_floating_point<T>::value) { 118 MI.addOperand( 119 MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val)))); 120 } else { 121 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 122 } 123 return true; 124 } 125 126 Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, 127 uint64_t &Size, 128 ArrayRef<uint8_t> Bytes, 129 uint64_t Address) const { 130 Size = 0; 131 if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) { 132 // Start of a code section: we're parsing only the function count. 133 int64_t FunctionCount; 134 if (!nextLEB(FunctionCount, Bytes, Size, false)) 135 return false; 136 outs() << " # " << FunctionCount << " functions in section."; 137 } else { 138 // Parse the start of a single function. 139 int64_t BodySize, LocalEntryCount; 140 if (!nextLEB(BodySize, Bytes, Size, false) || 141 !nextLEB(LocalEntryCount, Bytes, Size, false)) 142 return false; 143 if (LocalEntryCount) { 144 outs() << " .local "; 145 for (int64_t I = 0; I < LocalEntryCount; I++) { 146 int64_t Count, Type; 147 if (!nextLEB(Count, Bytes, Size, false) || 148 !nextLEB(Type, Bytes, Size, false)) 149 return false; 150 for (int64_t J = 0; J < Count; J++) { 151 if (I || J) 152 outs() << ", "; 153 outs() << WebAssembly::anyTypeToString(Type); 154 } 155 } 156 } 157 } 158 outs() << "\n"; 159 return true; 160 } 161 162 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 163 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 164 raw_ostream &CS) const { 165 CommentStream = &CS; 166 Size = 0; 167 int Opc = nextByte(Bytes, Size); 168 if (Opc < 0) 169 return MCDisassembler::Fail; 170 const auto *WasmInst = &InstructionTable0[Opc]; 171 // If this is a prefix byte, indirect to another table. 172 if (WasmInst->ET == ET_Prefix) { 173 WasmInst = nullptr; 174 // Linear search, so far only 2 entries. 175 for (auto PT = PrefixTable; PT->Table; PT++) { 176 if (PT->Prefix == Opc) { 177 WasmInst = PT->Table; 178 break; 179 } 180 } 181 if (!WasmInst) 182 return MCDisassembler::Fail; 183 int64_t PrefixedOpc; 184 if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 185 return MCDisassembler::Fail; 186 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 187 return MCDisassembler::Fail; 188 WasmInst += PrefixedOpc; 189 } 190 if (WasmInst->ET == ET_Unused) 191 return MCDisassembler::Fail; 192 // At this point we must have a valid instruction to decode. 193 assert(WasmInst->ET == ET_Instruction); 194 MI.setOpcode(WasmInst->Opcode); 195 // Parse any operands. 196 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 197 auto OT = OperandTable[WasmInst->OperandStart + OPI]; 198 switch (OT) { 199 // ULEB operands: 200 case WebAssembly::OPERAND_BASIC_BLOCK: 201 case WebAssembly::OPERAND_LOCAL: 202 case WebAssembly::OPERAND_GLOBAL: 203 case WebAssembly::OPERAND_FUNCTION32: 204 case WebAssembly::OPERAND_TABLE: 205 case WebAssembly::OPERAND_OFFSET32: 206 case WebAssembly::OPERAND_OFFSET64: 207 case WebAssembly::OPERAND_P2ALIGN: 208 case WebAssembly::OPERAND_TYPEINDEX: 209 case WebAssembly::OPERAND_TAG: 210 case MCOI::OPERAND_IMMEDIATE: { 211 if (!parseLEBImmediate(MI, Size, Bytes, false)) 212 return MCDisassembler::Fail; 213 break; 214 } 215 // SLEB operands: 216 case WebAssembly::OPERAND_I32IMM: 217 case WebAssembly::OPERAND_I64IMM: { 218 if (!parseLEBImmediate(MI, Size, Bytes, true)) 219 return MCDisassembler::Fail; 220 break; 221 } 222 // block_type operands: 223 case WebAssembly::OPERAND_SIGNATURE: { 224 int64_t Val; 225 uint64_t PrevSize = Size; 226 if (!nextLEB(Val, Bytes, Size, true)) 227 return MCDisassembler::Fail; 228 if (Val < 0) { 229 // Negative values are single septet value types or empty types 230 if (Size != PrevSize + 1) { 231 MI.addOperand( 232 MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 233 } else { 234 MI.addOperand(MCOperand::createImm(Val & 0x7f)); 235 } 236 } else { 237 // We don't have access to the signature, so create a symbol without one 238 MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 239 auto *WasmSym = cast<MCSymbolWasm>(Sym); 240 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 241 const MCExpr *Expr = MCSymbolRefExpr::create( 242 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 243 MI.addOperand(MCOperand::createExpr(Expr)); 244 } 245 break; 246 } 247 // FP operands. 248 case WebAssembly::OPERAND_F32IMM: { 249 if (!parseImmediate<float>(MI, Size, Bytes)) 250 return MCDisassembler::Fail; 251 break; 252 } 253 case WebAssembly::OPERAND_F64IMM: { 254 if (!parseImmediate<double>(MI, Size, Bytes)) 255 return MCDisassembler::Fail; 256 break; 257 } 258 // Vector lane operands (not LEB encoded). 259 case WebAssembly::OPERAND_VEC_I8IMM: { 260 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 261 return MCDisassembler::Fail; 262 break; 263 } 264 case WebAssembly::OPERAND_VEC_I16IMM: { 265 if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 266 return MCDisassembler::Fail; 267 break; 268 } 269 case WebAssembly::OPERAND_VEC_I32IMM: { 270 if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 271 return MCDisassembler::Fail; 272 break; 273 } 274 case WebAssembly::OPERAND_VEC_I64IMM: { 275 if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 276 return MCDisassembler::Fail; 277 break; 278 } 279 case WebAssembly::OPERAND_BRLIST: { 280 int64_t TargetTableLen; 281 if (!nextLEB(TargetTableLen, Bytes, Size, false)) 282 return MCDisassembler::Fail; 283 for (int64_t I = 0; I < TargetTableLen; I++) { 284 if (!parseLEBImmediate(MI, Size, Bytes, false)) 285 return MCDisassembler::Fail; 286 } 287 // Default case. 288 if (!parseLEBImmediate(MI, Size, Bytes, false)) 289 return MCDisassembler::Fail; 290 break; 291 } 292 case MCOI::OPERAND_REGISTER: 293 // The tablegen header currently does not have any register operands since 294 // we use only the stack (_S) instructions. 295 // If you hit this that probably means a bad instruction definition in 296 // tablegen. 297 llvm_unreachable("Register operand in WebAssemblyDisassembler"); 298 default: 299 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 300 } 301 } 302 return MCDisassembler::Success; 303 } 304