1 //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file is part of the WebAssembly Disassembler. 11 /// 12 /// It contains code to translate the data produced by the decoder into 13 /// MCInsts. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #include "TargetInfo/WebAssemblyTargetInfo.h" 18 #include "Utils/WebAssemblyTypeUtilities.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 21 #include "llvm/MC/MCFixedLenDisassembler.h" 22 #include "llvm/MC/MCInst.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCSubtargetInfo.h" 25 #include "llvm/MC/MCSymbol.h" 26 #include "llvm/MC/MCSymbolWasm.h" 27 #include "llvm/Support/Endian.h" 28 #include "llvm/Support/LEB128.h" 29 #include "llvm/Support/TargetRegistry.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "wasm-disassembler" 34 35 using DecodeStatus = MCDisassembler::DecodeStatus; 36 37 #include "WebAssemblyGenDisassemblerTables.inc" 38 39 namespace { 40 static constexpr int WebAssemblyInstructionTableSize = 256; 41 42 class WebAssemblyDisassembler final : public MCDisassembler { 43 std::unique_ptr<const MCInstrInfo> MCII; 44 45 DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 46 ArrayRef<uint8_t> Bytes, uint64_t Address, 47 raw_ostream &CStream) const override; 48 Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 49 ArrayRef<uint8_t> Bytes, 50 uint64_t Address, 51 raw_ostream &CStream) const override; 52 53 public: 54 WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, 55 std::unique_ptr<const MCInstrInfo> MCII) 56 : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {} 57 }; 58 } // end anonymous namespace 59 60 static MCDisassembler *createWebAssemblyDisassembler(const Target &T, 61 const MCSubtargetInfo &STI, 62 MCContext &Ctx) { 63 std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo()); 64 return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII)); 65 } 66 67 extern "C" LLVM_EXTERNAL_VISIBILITY void 68 LLVMInitializeWebAssemblyDisassembler() { 69 // Register the disassembler for each target. 70 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(), 71 createWebAssemblyDisassembler); 72 TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(), 73 createWebAssemblyDisassembler); 74 } 75 76 static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) { 77 if (Size >= Bytes.size()) 78 return -1; 79 auto V = Bytes[Size]; 80 Size++; 81 return V; 82 } 83 84 static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size, 85 bool Signed) { 86 unsigned N = 0; 87 const char *Error = nullptr; 88 Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N, 89 Bytes.data() + Bytes.size(), &Error) 90 : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N, 91 Bytes.data() + Bytes.size(), 92 &Error)); 93 if (Error) 94 return false; 95 Size += N; 96 return true; 97 } 98 99 static bool parseLEBImmediate(MCInst &MI, uint64_t &Size, 100 ArrayRef<uint8_t> Bytes, bool Signed) { 101 int64_t Val; 102 if (!nextLEB(Val, Bytes, Size, Signed)) 103 return false; 104 MI.addOperand(MCOperand::createImm(Val)); 105 return true; 106 } 107 108 template <typename T> 109 bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) { 110 if (Size + sizeof(T) > Bytes.size()) 111 return false; 112 T Val = support::endian::read<T, support::endianness::little, 1>( 113 Bytes.data() + Size); 114 Size += sizeof(T); 115 if (std::is_floating_point<T>::value) { 116 MI.addOperand( 117 MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val)))); 118 } else { 119 MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val))); 120 } 121 return true; 122 } 123 124 Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart( 125 SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes, 126 uint64_t Address, raw_ostream &CStream) const { 127 Size = 0; 128 if (Address == 0) { 129 // Start of a code section: we're parsing only the function count. 130 int64_t FunctionCount; 131 if (!nextLEB(FunctionCount, Bytes, Size, false)) 132 return None; 133 outs() << " # " << FunctionCount << " functions in section."; 134 } else { 135 // Parse the start of a single function. 136 int64_t BodySize, LocalEntryCount; 137 if (!nextLEB(BodySize, Bytes, Size, false) || 138 !nextLEB(LocalEntryCount, Bytes, Size, false)) 139 return None; 140 if (LocalEntryCount) { 141 outs() << " .local "; 142 for (int64_t I = 0; I < LocalEntryCount; I++) { 143 int64_t Count, Type; 144 if (!nextLEB(Count, Bytes, Size, false) || 145 !nextLEB(Type, Bytes, Size, false)) 146 return None; 147 for (int64_t J = 0; J < Count; J++) { 148 if (I || J) 149 outs() << ", "; 150 outs() << WebAssembly::anyTypeToString(Type); 151 } 152 } 153 } 154 } 155 outs() << "\n"; 156 return MCDisassembler::Success; 157 } 158 159 MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( 160 MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/, 161 raw_ostream &CS) const { 162 CommentStream = &CS; 163 Size = 0; 164 int Opc = nextByte(Bytes, Size); 165 if (Opc < 0) 166 return MCDisassembler::Fail; 167 const auto *WasmInst = &InstructionTable0[Opc]; 168 // If this is a prefix byte, indirect to another table. 169 if (WasmInst->ET == ET_Prefix) { 170 WasmInst = nullptr; 171 // Linear search, so far only 2 entries. 172 for (auto PT = PrefixTable; PT->Table; PT++) { 173 if (PT->Prefix == Opc) { 174 WasmInst = PT->Table; 175 break; 176 } 177 } 178 if (!WasmInst) 179 return MCDisassembler::Fail; 180 int64_t PrefixedOpc; 181 if (!nextLEB(PrefixedOpc, Bytes, Size, false)) 182 return MCDisassembler::Fail; 183 if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize) 184 return MCDisassembler::Fail; 185 WasmInst += PrefixedOpc; 186 } 187 if (WasmInst->ET == ET_Unused) 188 return MCDisassembler::Fail; 189 // At this point we must have a valid instruction to decode. 190 assert(WasmInst->ET == ET_Instruction); 191 MI.setOpcode(WasmInst->Opcode); 192 // Parse any operands. 193 for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) { 194 auto OT = OperandTable[WasmInst->OperandStart + OPI]; 195 switch (OT) { 196 // ULEB operands: 197 case WebAssembly::OPERAND_BASIC_BLOCK: 198 case WebAssembly::OPERAND_LOCAL: 199 case WebAssembly::OPERAND_GLOBAL: 200 case WebAssembly::OPERAND_FUNCTION32: 201 case WebAssembly::OPERAND_TABLE: 202 case WebAssembly::OPERAND_OFFSET32: 203 case WebAssembly::OPERAND_OFFSET64: 204 case WebAssembly::OPERAND_P2ALIGN: 205 case WebAssembly::OPERAND_TYPEINDEX: 206 case WebAssembly::OPERAND_TAG: 207 case MCOI::OPERAND_IMMEDIATE: { 208 if (!parseLEBImmediate(MI, Size, Bytes, false)) 209 return MCDisassembler::Fail; 210 break; 211 } 212 // SLEB operands: 213 case WebAssembly::OPERAND_I32IMM: 214 case WebAssembly::OPERAND_I64IMM: { 215 if (!parseLEBImmediate(MI, Size, Bytes, true)) 216 return MCDisassembler::Fail; 217 break; 218 } 219 // block_type operands: 220 case WebAssembly::OPERAND_SIGNATURE: { 221 int64_t Val; 222 uint64_t PrevSize = Size; 223 if (!nextLEB(Val, Bytes, Size, true)) 224 return MCDisassembler::Fail; 225 if (Val < 0) { 226 // Negative values are single septet value types or empty types 227 if (Size != PrevSize + 1) { 228 MI.addOperand( 229 MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid))); 230 } else { 231 MI.addOperand(MCOperand::createImm(Val & 0x7f)); 232 } 233 } else { 234 // We don't have access to the signature, so create a symbol without one 235 MCSymbol *Sym = getContext().createTempSymbol("typeindex", true); 236 auto *WasmSym = cast<MCSymbolWasm>(Sym); 237 WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); 238 const MCExpr *Expr = MCSymbolRefExpr::create( 239 WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext()); 240 MI.addOperand(MCOperand::createExpr(Expr)); 241 } 242 break; 243 } 244 // heap_type operands, for e.g. ref.null: 245 case WebAssembly::OPERAND_HEAPTYPE: { 246 int64_t Val; 247 uint64_t PrevSize = Size; 248 if (!nextLEB(Val, Bytes, Size, true)) 249 return MCDisassembler::Fail; 250 if (Val < 0 && Size == PrevSize + 1) { 251 // The HeapType encoding is like BlockType, in that encodings that 252 // decode as negative values indicate ValTypes. In practice we expect 253 // either wasm::ValType::EXTERNREF or wasm::ValType::FUNCREF here. 254 // 255 // The positive SLEB values are reserved for future expansion and are 256 // expected to be type indices in the typed function references 257 // proposal, and should disassemble as MCSymbolRefExpr as in BlockType 258 // above. 259 MI.addOperand(MCOperand::createImm(Val & 0x7f)); 260 } else { 261 MI.addOperand( 262 MCOperand::createImm(int64_t(WebAssembly::HeapType::Invalid))); 263 } 264 break; 265 } 266 // FP operands. 267 case WebAssembly::OPERAND_F32IMM: { 268 if (!parseImmediate<float>(MI, Size, Bytes)) 269 return MCDisassembler::Fail; 270 break; 271 } 272 case WebAssembly::OPERAND_F64IMM: { 273 if (!parseImmediate<double>(MI, Size, Bytes)) 274 return MCDisassembler::Fail; 275 break; 276 } 277 // Vector lane operands (not LEB encoded). 278 case WebAssembly::OPERAND_VEC_I8IMM: { 279 if (!parseImmediate<uint8_t>(MI, Size, Bytes)) 280 return MCDisassembler::Fail; 281 break; 282 } 283 case WebAssembly::OPERAND_VEC_I16IMM: { 284 if (!parseImmediate<uint16_t>(MI, Size, Bytes)) 285 return MCDisassembler::Fail; 286 break; 287 } 288 case WebAssembly::OPERAND_VEC_I32IMM: { 289 if (!parseImmediate<uint32_t>(MI, Size, Bytes)) 290 return MCDisassembler::Fail; 291 break; 292 } 293 case WebAssembly::OPERAND_VEC_I64IMM: { 294 if (!parseImmediate<uint64_t>(MI, Size, Bytes)) 295 return MCDisassembler::Fail; 296 break; 297 } 298 case WebAssembly::OPERAND_BRLIST: { 299 int64_t TargetTableLen; 300 if (!nextLEB(TargetTableLen, Bytes, Size, false)) 301 return MCDisassembler::Fail; 302 for (int64_t I = 0; I < TargetTableLen; I++) { 303 if (!parseLEBImmediate(MI, Size, Bytes, false)) 304 return MCDisassembler::Fail; 305 } 306 // Default case. 307 if (!parseLEBImmediate(MI, Size, Bytes, false)) 308 return MCDisassembler::Fail; 309 break; 310 } 311 case MCOI::OPERAND_REGISTER: 312 // The tablegen header currently does not have any register operands since 313 // we use only the stack (_S) instructions. 314 // If you hit this that probably means a bad instruction definition in 315 // tablegen. 316 llvm_unreachable("Register operand in WebAssemblyDisassembler"); 317 default: 318 llvm_unreachable("Unknown operand type in WebAssemblyDisassembler"); 319 } 320 } 321 return MCDisassembler::Success; 322 } 323