1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file is part of the WebAssembly Disassembler. 11 /// 12 /// It contains code to translate the data produced by the decoder into 13 /// MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "MCTargetDesc/WebAssemblyInstPrinter.h" 18 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 19 #include "TargetInfo/WebAssemblyTargetInfo.h" 20 #include "llvm/MC/MCContext.h" 21 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 22 #include "llvm/MC/MCFixedLenDisassembler.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCSubtargetInfo.h" 26 #include "llvm/MC/MCSymbol.h" 27 #include "llvm/MC/MCSymbolWasm.h" 28 #include "llvm/Support/Endian.h" 29 #include "llvm/Support/LEB128.h" 30 #include "llvm/Support/TargetRegistry.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "wasm-disassembler" 35 36 using DecodeStatus = MCDisassembler::DecodeStatus; 37 38 #include "WebAssemblyGenDisassemblerTables.inc" 39 40 namespace { 41 static constexpr int WebAssemblyInstructionTableSize = 256; 42 43 class WebAssemblyDisassembler final : public MCDisassembler { 44 std::unique_ptr<const MCInstrInfo> MCII; 45 46 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 47 ArrayRef<uint8_t> Bytes, uint64_t Address, 48 raw_ostream &VStream, 49 raw_ostream &CStream) const override; 50 DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size, 51 ArrayRef<uint8_t> Bytes, uint64_t Address, 52 raw_ostream &VStream, 53 raw_ostream &CStream) const override; 54 55 public: 56 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 57 std::unique_ptr<const MCInstrInfo> MCII) 58 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 59 }; 60 } // end anonymous namespace 61 62 static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 63 const MCSubtargetInfo &STI, 64 MCContext &Ctx) { 65 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 66 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 67 } 68 69 extern "C" void LLVMInitializeWebAssemblyDisassembler() { 70 // Register the disassembler for each target. 71 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 72 createWebAssemblyDisassembler); 73 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 74 createWebAssemblyDisassembler); 75 } 76 77 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 78 if (Size >= Bytes.size()) 79 return -1; 80 auto V = Bytes[Size]; 81 Size++; 82 return V; 83 } 84 85 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 86 bool Signed) { 87 unsigned N = 0; 88 const char *Error = nullptr; 89 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 90 Bytes.data() + Bytes.size(), &Error) 91 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 92 Bytes.data() + Bytes.size(), 93 &Error)); 94 if (Error) 95 return false; 96 Size += N; 97 return true; 98 } 99 100 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 101 ArrayRef<uint8_t> Bytes, bool Signed) { 102 int64_t Val; 103 if (!nextLEB(Val, Bytes, Size, Signed)) 104 return false; 105 MI.addOperand(MCOperand::createImm(Val)); 106 return true; 107 } 108 109 template <typename T> 110 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 111 if (Size + sizeof(T) > Bytes.size()) 112 return false; 113 T Val = support::endian::read<T, support::endianness::little, 1>( 114 Bytes.data() + Size); 115 Size += sizeof(T); 116 if (std::is_floating_point<T>::value) { 117 MI.addOperand(MCOperand::createFPImm(static_cast<double>(Val))); 118 } else { 119 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 120 } 121 return true; 122 } 123 124 MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart( 125 StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, 126 raw_ostream &VStream, raw_ostream &CStream) const { 127 Size = 0; 128 if (Address == 0) { 129 // Start of a code section: we're parsing only the function count. 130 int64_t FunctionCount; 131 if (!nextLEB(FunctionCount, Bytes, Size, false)) 132 return MCDisassembler::Fail; 133 outs() << " # " << FunctionCount << " functions in section."; 134 } else { 135 // Parse the start of a single function. 136 int64_t BodySize, LocalEntryCount; 137 if (!nextLEB(BodySize, Bytes, Size, false) || 138 !nextLEB(LocalEntryCount, Bytes, Size, false)) 139 return MCDisassembler::Fail; 140 if (LocalEntryCount) { 141 outs() << " .local "; 142 for (int64_t I = 0; I < LocalEntryCount; I++) { 143 int64_t Count, Type; 144 if (!nextLEB(Count, Bytes, Size, false) || 145 !nextLEB(Type, Bytes, Size, false)) 146 return MCDisassembler::Fail; 147 for (int64_t J = 0; J < Count; J++) { 148 if (I || J) 149 outs() << ", "; 150 outs() << WebAssembly::anyTypeToString(Type); 151 } 152 } 153 } 154 } 155 outs() << "\n"; 156 return MCDisassembler::Success; 157 } 158 159 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 160 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 161 raw_ostream & /*OS*/, raw_ostream &CS) const { 162 CommentStream = &CS; 163 Size = 0; 164 int Opc = nextByte(Bytes, Size); 165 if (Opc < 0) 166 return MCDisassembler::Fail; 167 const auto *WasmInst = &InstructionTable0[Opc]; 168 // If this is a prefix byte, indirect to another table. 169 if (WasmInst->ET == ET_Prefix) { 170 WasmInst = nullptr; 171 // Linear search, so far only 2 entries. 172 for (auto PT = PrefixTable; PT->Table; PT++) { 173 if (PT->Prefix == Opc) { 174 WasmInst = PT->Table; 175 break; 176 } 177 } 178 if (!WasmInst) 179 return MCDisassembler::Fail; 180 int64_t PrefixedOpc; 181 if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 182 return MCDisassembler::Fail; 183 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 184 return MCDisassembler::Fail; 185 WasmInst += PrefixedOpc; 186 } 187 if (WasmInst->ET == ET_Unused) 188 return MCDisassembler::Fail; 189 // At this point we must have a valid instruction to decode. 190 assert(WasmInst->ET == ET_Instruction); 191 MI.setOpcode(WasmInst->Opcode); 192 // Parse any operands. 193 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 194 auto OT = OperandTable[WasmInst->OperandStart + OPI]; 195 switch (OT) { 196 // ULEB operands: 197 case WebAssembly::OPERAND_BASIC_BLOCK: 198 case WebAssembly::OPERAND_LOCAL: 199 case WebAssembly::OPERAND_GLOBAL: 200 case WebAssembly::OPERAND_FUNCTION32: 201 case WebAssembly::OPERAND_OFFSET32: 202 case WebAssembly::OPERAND_P2ALIGN: 203 case WebAssembly::OPERAND_TYPEINDEX: 204 case WebAssembly::OPERAND_EVENT: 205 case MCOI::OPERAND_IMMEDIATE: { 206 if (!parseLEBImmediate(MI, Size, Bytes, false)) 207 return MCDisassembler::Fail; 208 break; 209 } 210 // SLEB operands: 211 case WebAssembly::OPERAND_I32IMM: 212 case WebAssembly::OPERAND_I64IMM: { 213 if (!parseLEBImmediate(MI, Size, Bytes, true)) 214 return MCDisassembler::Fail; 215 break; 216 } 217 // block_type operands: 218 case WebAssembly::OPERAND_SIGNATURE: { 219 int64_t Val; 220 uint64_t PrevSize = Size; 221 if (!nextLEB(Val, Bytes, Size, true)) 222 return MCDisassembler::Fail; 223 if (Val < 0) { 224 // Negative values are single septet value types or empty types 225 if (Size != PrevSize + 1) { 226 MI.addOperand( 227 MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 228 } else { 229 MI.addOperand(MCOperand::createImm(Val & 0x7f)); 230 } 231 } else { 232 // We don't have access to the signature, so create a symbol without one 233 MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 234 auto *WasmSym = cast<MCSymbolWasm>(Sym); 235 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 236 const MCExpr *Expr = MCSymbolRefExpr::create( 237 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 238 MI.addOperand(MCOperand::createExpr(Expr)); 239 } 240 break; 241 } 242 // FP operands. 243 case WebAssembly::OPERAND_F32IMM: { 244 if (!parseImmediate<float>(MI, Size, Bytes)) 245 return MCDisassembler::Fail; 246 break; 247 } 248 case WebAssembly::OPERAND_F64IMM: { 249 if (!parseImmediate<double>(MI, Size, Bytes)) 250 return MCDisassembler::Fail; 251 break; 252 } 253 // Vector lane operands (not LEB encoded). 254 case WebAssembly::OPERAND_VEC_I8IMM: { 255 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 256 return MCDisassembler::Fail; 257 break; 258 } 259 case WebAssembly::OPERAND_VEC_I16IMM: { 260 if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 261 return MCDisassembler::Fail; 262 break; 263 } 264 case WebAssembly::OPERAND_VEC_I32IMM: { 265 if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 266 return MCDisassembler::Fail; 267 break; 268 } 269 case WebAssembly::OPERAND_VEC_I64IMM: { 270 if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 271 return MCDisassembler::Fail; 272 break; 273 } 274 case WebAssembly::OPERAND_BRLIST: { 275 int64_t TargetTableLen; 276 if (!nextLEB(TargetTableLen, Bytes, Size, false)) 277 return MCDisassembler::Fail; 278 for (int64_t I = 0; I < TargetTableLen; I++) { 279 if (!parseLEBImmediate(MI, Size, Bytes, false)) 280 return MCDisassembler::Fail; 281 } 282 // Default case. 283 if (!parseLEBImmediate(MI, Size, Bytes, false)) 284 return MCDisassembler::Fail; 285 break; 286 } 287 case MCOI::OPERAND_REGISTER: 288 // The tablegen header currently does not have any register operands since 289 // we use only the stack (_S) instructions. 290 // If you hit this that probably means a bad instruction definition in 291 // tablegen. 292 llvm_unreachable("Register operand in WebAssemblyDisassembler"); 293 default: 294 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 295 } 296 } 297 return MCDisassembler::Success; 298 } 299