1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file is part of the WebAssembly Disassembler. 11 /// 12 /// It contains code to translate the data produced by the decoder into 13 /// MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "TargetInfo/WebAssemblyTargetInfo.h" 18 #include "Utils/WebAssemblyTypeUtilities.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDecoderOps.h" 21 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/MC/MCSymbol.h" 26 #include "llvm/MC/MCSymbolWasm.h" 27 #include "llvm/MC/TargetRegistry.h" 28 #include "llvm/Support/Endian.h" 29 #include "llvm/Support/LEB128.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "wasm-disassembler" 34 35 using DecodeStatus = MCDisassembler::DecodeStatus; 36 37 #include "WebAssemblyGenDisassemblerTables.inc" 38 39 namespace { 40 static constexpr int WebAssemblyInstructionTableSize = 256; 41 42 class WebAssemblyDisassembler final : public MCDisassembler { 43 std::unique_ptr<const MCInstrInfo> MCII; 44 45 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 46 ArrayRef<uint8_t> Bytes, uint64_t Address, 47 raw_ostream &CStream) const override; 48 std::optional<DecodeStatus> 49 onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 50 uint64_t Address, raw_ostream &CStream) const override; 51 52 public: 53 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 54 std::unique_ptr<const MCInstrInfo> MCII) 55 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 56 }; 57 } // end anonymous namespace 58 59 static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 60 const MCSubtargetInfo &STI, 61 MCContext &Ctx) { 62 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 63 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 64 } 65 66 extern "C" LLVM_EXTERNAL_VISIBILITY void 67 LLVMInitializeWebAssemblyDisassembler() { 68 // Register the disassembler for each target. 69 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 70 createWebAssemblyDisassembler); 71 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 72 createWebAssemblyDisassembler); 73 } 74 75 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 76 if (Size >= Bytes.size()) 77 return -1; 78 auto V = Bytes[Size]; 79 Size++; 80 return V; 81 } 82 83 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 84 bool Signed) { 85 unsigned N = 0; 86 const char *Error = nullptr; 87 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 88 Bytes.data() + Bytes.size(), &Error) 89 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 90 Bytes.data() + Bytes.size(), 91 &Error)); 92 if (Error) 93 return false; 94 Size += N; 95 return true; 96 } 97 98 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 99 ArrayRef<uint8_t> Bytes, bool Signed) { 100 int64_t Val; 101 if (!nextLEB(Val, Bytes, Size, Signed)) 102 return false; 103 MI.addOperand(MCOperand::createImm(Val)); 104 return true; 105 } 106 107 template <typename T> 108 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 109 if (Size + sizeof(T) > Bytes.size()) 110 return false; 111 T Val = support::endian::read<T, support::endianness::little, 1>( 112 Bytes.data() + Size); 113 Size += sizeof(T); 114 if (std::is_floating_point<T>::value) { 115 MI.addOperand( 116 MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val)))); 117 } else { 118 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 119 } 120 return true; 121 } 122 123 std::optional<MCDisassembler::DecodeStatus> 124 WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 125 ArrayRef<uint8_t> Bytes, 126 uint64_t Address, 127 raw_ostream &CStream) const { 128 Size = 0; 129 if (Address == 0) { 130 // Start of a code section: we're parsing only the function count. 131 int64_t FunctionCount; 132 if (!nextLEB(FunctionCount, Bytes, Size, false)) 133 return std::nullopt; 134 outs() << " # " << FunctionCount << " functions in section."; 135 } else { 136 // Parse the start of a single function. 137 int64_t BodySize, LocalEntryCount; 138 if (!nextLEB(BodySize, Bytes, Size, false) || 139 !nextLEB(LocalEntryCount, Bytes, Size, false)) 140 return std::nullopt; 141 if (LocalEntryCount) { 142 outs() << " .local "; 143 for (int64_t I = 0; I < LocalEntryCount; I++) { 144 int64_t Count, Type; 145 if (!nextLEB(Count, Bytes, Size, false) || 146 !nextLEB(Type, Bytes, Size, false)) 147 return std::nullopt; 148 for (int64_t J = 0; J < Count; J++) { 149 if (I || J) 150 outs() << ", "; 151 outs() << WebAssembly::anyTypeToString(Type); 152 } 153 } 154 } 155 } 156 outs() << "\n"; 157 return MCDisassembler::Success; 158 } 159 160 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 161 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 162 raw_ostream &CS) const { 163 CommentStream = &CS; 164 Size = 0; 165 int Opc = nextByte(Bytes, Size); 166 if (Opc < 0) 167 return MCDisassembler::Fail; 168 const auto *WasmInst = &InstructionTable0[Opc]; 169 // If this is a prefix byte, indirect to another table. 170 if (WasmInst->ET == ET_Prefix) { 171 WasmInst = nullptr; 172 // Linear search, so far only 2 entries. 173 for (auto PT = PrefixTable; PT->Table; PT++) { 174 if (PT->Prefix == Opc) { 175 WasmInst = PT->Table; 176 break; 177 } 178 } 179 if (!WasmInst) 180 return MCDisassembler::Fail; 181 int64_t PrefixedOpc; 182 if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 183 return MCDisassembler::Fail; 184 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 185 return MCDisassembler::Fail; 186 WasmInst += PrefixedOpc; 187 } 188 if (WasmInst->ET == ET_Unused) 189 return MCDisassembler::Fail; 190 // At this point we must have a valid instruction to decode. 191 assert(WasmInst->ET == ET_Instruction); 192 MI.setOpcode(WasmInst->Opcode); 193 // Parse any operands. 194 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 195 auto OT = OperandTable[WasmInst->OperandStart + OPI]; 196 switch (OT) { 197 // ULEB operands: 198 case WebAssembly::OPERAND_BASIC_BLOCK: 199 case WebAssembly::OPERAND_LOCAL: 200 case WebAssembly::OPERAND_GLOBAL: 201 case WebAssembly::OPERAND_FUNCTION32: 202 case WebAssembly::OPERAND_TABLE: 203 case WebAssembly::OPERAND_OFFSET32: 204 case WebAssembly::OPERAND_OFFSET64: 205 case WebAssembly::OPERAND_P2ALIGN: 206 case WebAssembly::OPERAND_TYPEINDEX: 207 case WebAssembly::OPERAND_TAG: 208 case MCOI::OPERAND_IMMEDIATE: { 209 if (!parseLEBImmediate(MI, Size, Bytes, false)) 210 return MCDisassembler::Fail; 211 break; 212 } 213 // SLEB operands: 214 case WebAssembly::OPERAND_I32IMM: 215 case WebAssembly::OPERAND_I64IMM: { 216 if (!parseLEBImmediate(MI, Size, Bytes, true)) 217 return MCDisassembler::Fail; 218 break; 219 } 220 // block_type operands: 221 case WebAssembly::OPERAND_SIGNATURE: { 222 int64_t Val; 223 uint64_t PrevSize = Size; 224 if (!nextLEB(Val, Bytes, Size, true)) 225 return MCDisassembler::Fail; 226 if (Val < 0) { 227 // Negative values are single septet value types or empty types 228 if (Size != PrevSize + 1) { 229 MI.addOperand( 230 MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 231 } else { 232 MI.addOperand(MCOperand::createImm(Val & 0x7f)); 233 } 234 } else { 235 // We don't have access to the signature, so create a symbol without one 236 MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 237 auto *WasmSym = cast<MCSymbolWasm>(Sym); 238 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 239 const MCExpr *Expr = MCSymbolRefExpr::create( 240 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 241 MI.addOperand(MCOperand::createExpr(Expr)); 242 } 243 break; 244 } 245 // FP operands. 246 case WebAssembly::OPERAND_F32IMM: { 247 if (!parseImmediate<float>(MI, Size, Bytes)) 248 return MCDisassembler::Fail; 249 break; 250 } 251 case WebAssembly::OPERAND_F64IMM: { 252 if (!parseImmediate<double>(MI, Size, Bytes)) 253 return MCDisassembler::Fail; 254 break; 255 } 256 // Vector lane operands (not LEB encoded). 257 case WebAssembly::OPERAND_VEC_I8IMM: { 258 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 259 return MCDisassembler::Fail; 260 break; 261 } 262 case WebAssembly::OPERAND_VEC_I16IMM: { 263 if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 264 return MCDisassembler::Fail; 265 break; 266 } 267 case WebAssembly::OPERAND_VEC_I32IMM: { 268 if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 269 return MCDisassembler::Fail; 270 break; 271 } 272 case WebAssembly::OPERAND_VEC_I64IMM: { 273 if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 274 return MCDisassembler::Fail; 275 break; 276 } 277 case WebAssembly::OPERAND_BRLIST: { 278 int64_t TargetTableLen; 279 if (!nextLEB(TargetTableLen, Bytes, Size, false)) 280 return MCDisassembler::Fail; 281 for (int64_t I = 0; I < TargetTableLen; I++) { 282 if (!parseLEBImmediate(MI, Size, Bytes, false)) 283 return MCDisassembler::Fail; 284 } 285 // Default case. 286 if (!parseLEBImmediate(MI, Size, Bytes, false)) 287 return MCDisassembler::Fail; 288 break; 289 } 290 case MCOI::OPERAND_REGISTER: 291 // The tablegen header currently does not have any register operands since 292 // we use only the stack (_S) instructions. 293 // If you hit this that probably means a bad instruction definition in 294 // tablegen. 295 llvm_unreachable("Register operand in WebAssemblyDisassembler"); 296 default: 297 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 298 } 299 } 300 return MCDisassembler::Success; 301 } 302