xref: /freebsd/contrib/llvm-project/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 //==- WebAssemblyAsmParser.cpp - Assembler for WebAssembly -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file is part of the WebAssembly Assembler.
11 ///
12 /// It contains code to translate a parsed .s file into MCInsts.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "AsmParser/WebAssemblyAsmTypeCheck.h"
17 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
18 #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
19 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
20 #include "TargetInfo/WebAssemblyTargetInfo.h"
21 #include "WebAssembly.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCSectionWasm.h"
30 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCSymbol.h"
33 #include "llvm/MC/MCSymbolWasm.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/SourceMgr.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "wasm-asm-parser"
40 
41 static const char *getSubtargetFeatureName(uint64_t Val);
42 
43 namespace {
44 
45 /// WebAssemblyOperand - Instances of this class represent the operands in a
46 /// parsed Wasm machine instruction.
47 struct WebAssemblyOperand : public MCParsedAsmOperand {
48   enum KindTy { Token, Integer, Float, Symbol, BrList } Kind;
49 
50   SMLoc StartLoc, EndLoc;
51 
52   struct TokOp {
53     StringRef Tok;
54   };
55 
56   struct IntOp {
57     int64_t Val;
58   };
59 
60   struct FltOp {
61     double Val;
62   };
63 
64   struct SymOp {
65     const MCExpr *Exp;
66   };
67 
68   struct BrLOp {
69     std::vector<unsigned> List;
70   };
71 
72   union {
73     struct TokOp Tok;
74     struct IntOp Int;
75     struct FltOp Flt;
76     struct SymOp Sym;
77     struct BrLOp BrL;
78   };
79 
80   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, TokOp T)
81       : Kind(K), StartLoc(Start), EndLoc(End), Tok(T) {}
82   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, IntOp I)
83       : Kind(K), StartLoc(Start), EndLoc(End), Int(I) {}
84   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, FltOp F)
85       : Kind(K), StartLoc(Start), EndLoc(End), Flt(F) {}
86   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End, SymOp S)
87       : Kind(K), StartLoc(Start), EndLoc(End), Sym(S) {}
88   WebAssemblyOperand(KindTy K, SMLoc Start, SMLoc End)
89       : Kind(K), StartLoc(Start), EndLoc(End), BrL() {}
90 
91   ~WebAssemblyOperand() {
92     if (isBrList())
93       BrL.~BrLOp();
94   }
95 
96   bool isToken() const override { return Kind == Token; }
97   bool isImm() const override { return Kind == Integer || Kind == Symbol; }
98   bool isFPImm() const { return Kind == Float; }
99   bool isMem() const override { return false; }
100   bool isReg() const override { return false; }
101   bool isBrList() const { return Kind == BrList; }
102 
103   unsigned getReg() const override {
104     llvm_unreachable("Assembly inspects a register operand");
105     return 0;
106   }
107 
108   StringRef getToken() const {
109     assert(isToken());
110     return Tok.Tok;
111   }
112 
113   SMLoc getStartLoc() const override { return StartLoc; }
114   SMLoc getEndLoc() const override { return EndLoc; }
115 
116   void addRegOperands(MCInst &, unsigned) const {
117     // Required by the assembly matcher.
118     llvm_unreachable("Assembly matcher creates register operands");
119   }
120 
121   void addImmOperands(MCInst &Inst, unsigned N) const {
122     assert(N == 1 && "Invalid number of operands!");
123     if (Kind == Integer)
124       Inst.addOperand(MCOperand::createImm(Int.Val));
125     else if (Kind == Symbol)
126       Inst.addOperand(MCOperand::createExpr(Sym.Exp));
127     else
128       llvm_unreachable("Should be integer immediate or symbol!");
129   }
130 
131   void addFPImmf32Operands(MCInst &Inst, unsigned N) const {
132     assert(N == 1 && "Invalid number of operands!");
133     if (Kind == Float)
134       Inst.addOperand(
135           MCOperand::createSFPImm(bit_cast<uint32_t>(float(Flt.Val))));
136     else
137       llvm_unreachable("Should be float immediate!");
138   }
139 
140   void addFPImmf64Operands(MCInst &Inst, unsigned N) const {
141     assert(N == 1 && "Invalid number of operands!");
142     if (Kind == Float)
143       Inst.addOperand(MCOperand::createDFPImm(bit_cast<uint64_t>(Flt.Val)));
144     else
145       llvm_unreachable("Should be float immediate!");
146   }
147 
148   void addBrListOperands(MCInst &Inst, unsigned N) const {
149     assert(N == 1 && isBrList() && "Invalid BrList!");
150     for (auto Br : BrL.List)
151       Inst.addOperand(MCOperand::createImm(Br));
152   }
153 
154   void print(raw_ostream &OS) const override {
155     switch (Kind) {
156     case Token:
157       OS << "Tok:" << Tok.Tok;
158       break;
159     case Integer:
160       OS << "Int:" << Int.Val;
161       break;
162     case Float:
163       OS << "Flt:" << Flt.Val;
164       break;
165     case Symbol:
166       OS << "Sym:" << Sym.Exp;
167       break;
168     case BrList:
169       OS << "BrList:" << BrL.List.size();
170       break;
171     }
172   }
173 };
174 
175 // Perhaps this should go somewhere common.
176 static wasm::WasmLimits DefaultLimits() {
177   return {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
178 }
179 
180 static MCSymbolWasm *GetOrCreateFunctionTableSymbol(MCContext &Ctx,
181                                                     const StringRef &Name) {
182   MCSymbolWasm *Sym = cast_or_null<MCSymbolWasm>(Ctx.lookupSymbol(Name));
183   if (Sym) {
184     if (!Sym->isFunctionTable())
185       Ctx.reportError(SMLoc(), "symbol is not a wasm funcref table");
186   } else {
187     Sym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(Name));
188     Sym->setFunctionTable();
189     // The default function table is synthesized by the linker.
190     Sym->setUndefined();
191   }
192   return Sym;
193 }
194 
195 class WebAssemblyAsmParser final : public MCTargetAsmParser {
196   MCAsmParser &Parser;
197   MCAsmLexer &Lexer;
198 
199   // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
200   std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
201   std::vector<std::unique_ptr<std::string>> Names;
202 
203   // Order of labels, directives and instructions in a .s file have no
204   // syntactical enforcement. This class is a callback from the actual parser,
205   // and yet we have to be feeding data to the streamer in a very particular
206   // order to ensure a correct binary encoding that matches the regular backend
207   // (the streamer does not enforce this). This "state machine" enum helps
208   // guarantee that correct order.
209   enum ParserState {
210     FileStart,
211     FunctionLabel,
212     FunctionStart,
213     FunctionLocals,
214     Instructions,
215     EndFunction,
216     DataSection,
217   } CurrentState = FileStart;
218 
219   // For ensuring blocks are properly nested.
220   enum NestingType {
221     Function,
222     Block,
223     Loop,
224     Try,
225     CatchAll,
226     If,
227     Else,
228     Undefined,
229   };
230   struct Nested {
231     NestingType NT;
232     wasm::WasmSignature Sig;
233   };
234   std::vector<Nested> NestingStack;
235 
236   MCSymbolWasm *DefaultFunctionTable = nullptr;
237   MCSymbol *LastFunctionLabel = nullptr;
238 
239   bool is64;
240 
241   WebAssemblyAsmTypeCheck TC;
242   // Don't type check if -no-type-check was set.
243   bool SkipTypeCheck;
244 
245 public:
246   WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
247                        const MCInstrInfo &MII, const MCTargetOptions &Options)
248       : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
249         Lexer(Parser.getLexer()), is64(STI.getTargetTriple().isArch64Bit()),
250         TC(Parser, MII, is64), SkipTypeCheck(Options.MCNoTypeCheck) {
251     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
252     // Don't type check if this is inline asm, since that is a naked sequence of
253     // instructions without a function/locals decl.
254     auto &SM = Parser.getSourceManager();
255     auto BufferName =
256         SM.getBufferInfo(SM.getMainFileID()).Buffer->getBufferIdentifier();
257     if (BufferName == "<inline asm>")
258       SkipTypeCheck = true;
259   }
260 
261   void Initialize(MCAsmParser &Parser) override {
262     MCAsmParserExtension::Initialize(Parser);
263 
264     DefaultFunctionTable = GetOrCreateFunctionTableSymbol(
265         getContext(), "__indirect_function_table");
266     if (!STI->checkFeatures("+reference-types"))
267       DefaultFunctionTable->setOmitFromLinkingSection();
268   }
269 
270 #define GET_ASSEMBLER_HEADER
271 #include "WebAssemblyGenAsmMatcher.inc"
272 
273   // TODO: This is required to be implemented, but appears unused.
274   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override {
275     llvm_unreachable("parseRegister is not implemented.");
276   }
277   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
278                                SMLoc &EndLoc) override {
279     llvm_unreachable("tryParseRegister is not implemented.");
280   }
281 
282   bool error(const Twine &Msg, const AsmToken &Tok) {
283     return Parser.Error(Tok.getLoc(), Msg + Tok.getString());
284   }
285 
286   bool error(const Twine &Msg, SMLoc Loc = SMLoc()) {
287     return Parser.Error(Loc.isValid() ? Loc : Lexer.getTok().getLoc(), Msg);
288   }
289 
290   void addSignature(std::unique_ptr<wasm::WasmSignature> &&Sig) {
291     Signatures.push_back(std::move(Sig));
292   }
293 
294   StringRef storeName(StringRef Name) {
295     std::unique_ptr<std::string> N = std::make_unique<std::string>(Name);
296     Names.push_back(std::move(N));
297     return *Names.back();
298   }
299 
300   std::pair<StringRef, StringRef> nestingString(NestingType NT) {
301     switch (NT) {
302     case Function:
303       return {"function", "end_function"};
304     case Block:
305       return {"block", "end_block"};
306     case Loop:
307       return {"loop", "end_loop"};
308     case Try:
309       return {"try", "end_try/delegate"};
310     case CatchAll:
311       return {"catch_all", "end_try"};
312     case If:
313       return {"if", "end_if"};
314     case Else:
315       return {"else", "end_if"};
316     default:
317       llvm_unreachable("unknown NestingType");
318     }
319   }
320 
321   void push(NestingType NT, wasm::WasmSignature Sig = wasm::WasmSignature()) {
322     NestingStack.push_back({NT, Sig});
323   }
324 
325   bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
326     if (NestingStack.empty())
327       return error(Twine("End of block construct with no start: ") + Ins);
328     auto Top = NestingStack.back();
329     if (Top.NT != NT1 && Top.NT != NT2)
330       return error(Twine("Block construct type mismatch, expected: ") +
331                    nestingString(Top.NT).second + ", instead got: " + Ins);
332     TC.setLastSig(Top.Sig);
333     NestingStack.pop_back();
334     return false;
335   }
336 
337   // Pop a NestingType and push a new NestingType with the same signature. Used
338   // for if-else and try-catch(_all).
339   bool popAndPushWithSameSignature(StringRef Ins, NestingType PopNT,
340                                    NestingType PushNT) {
341     if (NestingStack.empty())
342       return error(Twine("End of block construct with no start: ") + Ins);
343     auto Sig = NestingStack.back().Sig;
344     if (pop(Ins, PopNT))
345       return true;
346     push(PushNT, Sig);
347     return false;
348   }
349 
350   bool ensureEmptyNestingStack(SMLoc Loc = SMLoc()) {
351     auto Err = !NestingStack.empty();
352     while (!NestingStack.empty()) {
353       error(Twine("Unmatched block construct(s) at function end: ") +
354                 nestingString(NestingStack.back().NT).first,
355             Loc);
356       NestingStack.pop_back();
357     }
358     return Err;
359   }
360 
361   bool isNext(AsmToken::TokenKind Kind) {
362     auto Ok = Lexer.is(Kind);
363     if (Ok)
364       Parser.Lex();
365     return Ok;
366   }
367 
368   bool expect(AsmToken::TokenKind Kind, const char *KindName) {
369     if (!isNext(Kind))
370       return error(std::string("Expected ") + KindName + ", instead got: ",
371                    Lexer.getTok());
372     return false;
373   }
374 
375   StringRef expectIdent() {
376     if (!Lexer.is(AsmToken::Identifier)) {
377       error("Expected identifier, got: ", Lexer.getTok());
378       return StringRef();
379     }
380     auto Name = Lexer.getTok().getString();
381     Parser.Lex();
382     return Name;
383   }
384 
385   bool parseRegTypeList(SmallVectorImpl<wasm::ValType> &Types) {
386     while (Lexer.is(AsmToken::Identifier)) {
387       auto Type = WebAssembly::parseType(Lexer.getTok().getString());
388       if (!Type)
389         return error("unknown type: ", Lexer.getTok());
390       Types.push_back(*Type);
391       Parser.Lex();
392       if (!isNext(AsmToken::Comma))
393         break;
394     }
395     return false;
396   }
397 
398   void parseSingleInteger(bool IsNegative, OperandVector &Operands) {
399     auto &Int = Lexer.getTok();
400     int64_t Val = Int.getIntVal();
401     if (IsNegative)
402       Val = -Val;
403     Operands.push_back(std::make_unique<WebAssemblyOperand>(
404         WebAssemblyOperand::Integer, Int.getLoc(), Int.getEndLoc(),
405         WebAssemblyOperand::IntOp{Val}));
406     Parser.Lex();
407   }
408 
409   bool parseSingleFloat(bool IsNegative, OperandVector &Operands) {
410     auto &Flt = Lexer.getTok();
411     double Val;
412     if (Flt.getString().getAsDouble(Val, false))
413       return error("Cannot parse real: ", Flt);
414     if (IsNegative)
415       Val = -Val;
416     Operands.push_back(std::make_unique<WebAssemblyOperand>(
417         WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
418         WebAssemblyOperand::FltOp{Val}));
419     Parser.Lex();
420     return false;
421   }
422 
423   bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) {
424     if (Lexer.isNot(AsmToken::Identifier))
425       return true;
426     auto &Flt = Lexer.getTok();
427     auto S = Flt.getString();
428     double Val;
429     if (S.compare_insensitive("infinity") == 0) {
430       Val = std::numeric_limits<double>::infinity();
431     } else if (S.compare_insensitive("nan") == 0) {
432       Val = std::numeric_limits<double>::quiet_NaN();
433     } else {
434       return true;
435     }
436     if (IsNegative)
437       Val = -Val;
438     Operands.push_back(std::make_unique<WebAssemblyOperand>(
439         WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(),
440         WebAssemblyOperand::FltOp{Val}));
441     Parser.Lex();
442     return false;
443   }
444 
445   bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
446     // FIXME: there is probably a cleaner way to do this.
447     auto IsLoadStore = InstName.contains(".load") ||
448                        InstName.contains(".store") ||
449                        InstName.contains("prefetch");
450     auto IsAtomic = InstName.contains("atomic.");
451     if (IsLoadStore || IsAtomic) {
452       // Parse load/store operands of the form: offset:p2align=align
453       if (IsLoadStore && isNext(AsmToken::Colon)) {
454         auto Id = expectIdent();
455         if (Id != "p2align")
456           return error("Expected p2align, instead got: " + Id);
457         if (expect(AsmToken::Equal, "="))
458           return true;
459         if (!Lexer.is(AsmToken::Integer))
460           return error("Expected integer constant");
461         parseSingleInteger(false, Operands);
462       } else {
463         // v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
464         // index. We need to avoid parsing an extra alignment operand for the
465         // lane index.
466         auto IsLoadStoreLane = InstName.contains("_lane");
467         if (IsLoadStoreLane && Operands.size() == 4)
468           return false;
469         // Alignment not specified (or atomics, must use default alignment).
470         // We can't just call WebAssembly::GetDefaultP2Align since we don't have
471         // an opcode until after the assembly matcher, so set a default to fix
472         // up later.
473         auto Tok = Lexer.getTok();
474         Operands.push_back(std::make_unique<WebAssemblyOperand>(
475             WebAssemblyOperand::Integer, Tok.getLoc(), Tok.getEndLoc(),
476             WebAssemblyOperand::IntOp{-1}));
477       }
478     }
479     return false;
480   }
481 
482   void addBlockTypeOperand(OperandVector &Operands, SMLoc NameLoc,
483                            WebAssembly::BlockType BT) {
484     if (BT != WebAssembly::BlockType::Void) {
485       wasm::WasmSignature Sig({static_cast<wasm::ValType>(BT)}, {});
486       TC.setLastSig(Sig);
487       NestingStack.back().Sig = Sig;
488     }
489     Operands.push_back(std::make_unique<WebAssemblyOperand>(
490         WebAssemblyOperand::Integer, NameLoc, NameLoc,
491         WebAssemblyOperand::IntOp{static_cast<int64_t>(BT)}));
492   }
493 
494   bool parseLimits(wasm::WasmLimits *Limits) {
495     auto Tok = Lexer.getTok();
496     if (!Tok.is(AsmToken::Integer))
497       return error("Expected integer constant, instead got: ", Tok);
498     int64_t Val = Tok.getIntVal();
499     assert(Val >= 0);
500     Limits->Minimum = Val;
501     Parser.Lex();
502 
503     if (isNext(AsmToken::Comma)) {
504       Limits->Flags |= wasm::WASM_LIMITS_FLAG_HAS_MAX;
505       auto Tok = Lexer.getTok();
506       if (!Tok.is(AsmToken::Integer))
507         return error("Expected integer constant, instead got: ", Tok);
508       int64_t Val = Tok.getIntVal();
509       assert(Val >= 0);
510       Limits->Maximum = Val;
511       Parser.Lex();
512     }
513     return false;
514   }
515 
516   bool parseFunctionTableOperand(std::unique_ptr<WebAssemblyOperand> *Op) {
517     if (STI->checkFeatures("+reference-types")) {
518       // If the reference-types feature is enabled, there is an explicit table
519       // operand.  To allow the same assembly to be compiled with or without
520       // reference types, we allow the operand to be omitted, in which case we
521       // default to __indirect_function_table.
522       auto &Tok = Lexer.getTok();
523       if (Tok.is(AsmToken::Identifier)) {
524         auto *Sym =
525             GetOrCreateFunctionTableSymbol(getContext(), Tok.getString());
526         const auto *Val = MCSymbolRefExpr::create(Sym, getContext());
527         *Op = std::make_unique<WebAssemblyOperand>(
528             WebAssemblyOperand::Symbol, Tok.getLoc(), Tok.getEndLoc(),
529             WebAssemblyOperand::SymOp{Val});
530         Parser.Lex();
531         return expect(AsmToken::Comma, ",");
532       } else {
533         const auto *Val =
534             MCSymbolRefExpr::create(DefaultFunctionTable, getContext());
535         *Op = std::make_unique<WebAssemblyOperand>(
536             WebAssemblyOperand::Symbol, SMLoc(), SMLoc(),
537             WebAssemblyOperand::SymOp{Val});
538         return false;
539       }
540     } else {
541       // For the MVP there is at most one table whose number is 0, but we can't
542       // write a table symbol or issue relocations.  Instead we just ensure the
543       // table is live and write a zero.
544       getStreamer().emitSymbolAttribute(DefaultFunctionTable, MCSA_NoDeadStrip);
545       *Op = std::make_unique<WebAssemblyOperand>(WebAssemblyOperand::Integer,
546                                                  SMLoc(), SMLoc(),
547                                                  WebAssemblyOperand::IntOp{0});
548       return false;
549     }
550   }
551 
552   bool ParseInstruction(ParseInstructionInfo & /*Info*/, StringRef Name,
553                         SMLoc NameLoc, OperandVector &Operands) override {
554     // Note: Name does NOT point into the sourcecode, but to a local, so
555     // use NameLoc instead.
556     Name = StringRef(NameLoc.getPointer(), Name.size());
557 
558     // WebAssembly has instructions with / in them, which AsmLexer parses
559     // as separate tokens, so if we find such tokens immediately adjacent (no
560     // whitespace), expand the name to include them:
561     for (;;) {
562       auto &Sep = Lexer.getTok();
563       if (Sep.getLoc().getPointer() != Name.end() ||
564           Sep.getKind() != AsmToken::Slash)
565         break;
566       // Extend name with /
567       Name = StringRef(Name.begin(), Name.size() + Sep.getString().size());
568       Parser.Lex();
569       // We must now find another identifier, or error.
570       auto &Id = Lexer.getTok();
571       if (Id.getKind() != AsmToken::Identifier ||
572           Id.getLoc().getPointer() != Name.end())
573         return error("Incomplete instruction name: ", Id);
574       Name = StringRef(Name.begin(), Name.size() + Id.getString().size());
575       Parser.Lex();
576     }
577 
578     // Now construct the name as first operand.
579     Operands.push_back(std::make_unique<WebAssemblyOperand>(
580         WebAssemblyOperand::Token, NameLoc, SMLoc::getFromPointer(Name.end()),
581         WebAssemblyOperand::TokOp{Name}));
582 
583     // If this instruction is part of a control flow structure, ensure
584     // proper nesting.
585     bool ExpectBlockType = false;
586     bool ExpectFuncType = false;
587     std::unique_ptr<WebAssemblyOperand> FunctionTable;
588     if (Name == "block") {
589       push(Block);
590       ExpectBlockType = true;
591     } else if (Name == "loop") {
592       push(Loop);
593       ExpectBlockType = true;
594     } else if (Name == "try") {
595       push(Try);
596       ExpectBlockType = true;
597     } else if (Name == "if") {
598       push(If);
599       ExpectBlockType = true;
600     } else if (Name == "else") {
601       if (popAndPushWithSameSignature(Name, If, Else))
602         return true;
603     } else if (Name == "catch") {
604       if (popAndPushWithSameSignature(Name, Try, Try))
605         return true;
606     } else if (Name == "catch_all") {
607       if (popAndPushWithSameSignature(Name, Try, CatchAll))
608         return true;
609     } else if (Name == "end_if") {
610       if (pop(Name, If, Else))
611         return true;
612     } else if (Name == "end_try") {
613       if (pop(Name, Try, CatchAll))
614         return true;
615     } else if (Name == "delegate") {
616       if (pop(Name, Try))
617         return true;
618     } else if (Name == "end_loop") {
619       if (pop(Name, Loop))
620         return true;
621     } else if (Name == "end_block") {
622       if (pop(Name, Block))
623         return true;
624     } else if (Name == "end_function") {
625       ensureLocals(getStreamer());
626       CurrentState = EndFunction;
627       if (pop(Name, Function) || ensureEmptyNestingStack())
628         return true;
629     } else if (Name == "call_indirect" || Name == "return_call_indirect") {
630       // These instructions have differing operand orders in the text format vs
631       // the binary formats.  The MC instructions follow the binary format, so
632       // here we stash away the operand and append it later.
633       if (parseFunctionTableOperand(&FunctionTable))
634         return true;
635       ExpectFuncType = true;
636     }
637 
638     if (ExpectFuncType || (ExpectBlockType && Lexer.is(AsmToken::LParen))) {
639       // This has a special TYPEINDEX operand which in text we
640       // represent as a signature, such that we can re-build this signature,
641       // attach it to an anonymous symbol, which is what WasmObjectWriter
642       // expects to be able to recreate the actual unique-ified type indices.
643       auto Loc = Parser.getTok();
644       auto Signature = std::make_unique<wasm::WasmSignature>();
645       if (parseSignature(Signature.get()))
646         return true;
647       // Got signature as block type, don't need more
648       TC.setLastSig(*Signature.get());
649       if (ExpectBlockType)
650         NestingStack.back().Sig = *Signature.get();
651       ExpectBlockType = false;
652       auto &Ctx = getContext();
653       // The "true" here will cause this to be a nameless symbol.
654       MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
655       auto *WasmSym = cast<MCSymbolWasm>(Sym);
656       WasmSym->setSignature(Signature.get());
657       addSignature(std::move(Signature));
658       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
659       const MCExpr *Expr = MCSymbolRefExpr::create(
660           WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, Ctx);
661       Operands.push_back(std::make_unique<WebAssemblyOperand>(
662           WebAssemblyOperand::Symbol, Loc.getLoc(), Loc.getEndLoc(),
663           WebAssemblyOperand::SymOp{Expr}));
664     }
665 
666     while (Lexer.isNot(AsmToken::EndOfStatement)) {
667       auto &Tok = Lexer.getTok();
668       switch (Tok.getKind()) {
669       case AsmToken::Identifier: {
670         if (!parseSpecialFloatMaybe(false, Operands))
671           break;
672         auto &Id = Lexer.getTok();
673         if (ExpectBlockType) {
674           // Assume this identifier is a block_type.
675           auto BT = WebAssembly::parseBlockType(Id.getString());
676           if (BT == WebAssembly::BlockType::Invalid)
677             return error("Unknown block type: ", Id);
678           addBlockTypeOperand(Operands, NameLoc, BT);
679           Parser.Lex();
680         } else {
681           // Assume this identifier is a label.
682           const MCExpr *Val;
683           SMLoc Start = Id.getLoc();
684           SMLoc End;
685           if (Parser.parseExpression(Val, End))
686             return error("Cannot parse symbol: ", Lexer.getTok());
687           Operands.push_back(std::make_unique<WebAssemblyOperand>(
688               WebAssemblyOperand::Symbol, Start, End,
689               WebAssemblyOperand::SymOp{Val}));
690           if (checkForP2AlignIfLoadStore(Operands, Name))
691             return true;
692         }
693         break;
694       }
695       case AsmToken::Minus:
696         Parser.Lex();
697         if (Lexer.is(AsmToken::Integer)) {
698           parseSingleInteger(true, Operands);
699           if (checkForP2AlignIfLoadStore(Operands, Name))
700             return true;
701         } else if (Lexer.is(AsmToken::Real)) {
702           if (parseSingleFloat(true, Operands))
703             return true;
704         } else if (!parseSpecialFloatMaybe(true, Operands)) {
705         } else {
706           return error("Expected numeric constant instead got: ",
707                        Lexer.getTok());
708         }
709         break;
710       case AsmToken::Integer:
711         parseSingleInteger(false, Operands);
712         if (checkForP2AlignIfLoadStore(Operands, Name))
713           return true;
714         break;
715       case AsmToken::Real: {
716         if (parseSingleFloat(false, Operands))
717           return true;
718         break;
719       }
720       case AsmToken::LCurly: {
721         Parser.Lex();
722         auto Op = std::make_unique<WebAssemblyOperand>(
723             WebAssemblyOperand::BrList, Tok.getLoc(), Tok.getEndLoc());
724         if (!Lexer.is(AsmToken::RCurly))
725           for (;;) {
726             Op->BrL.List.push_back(Lexer.getTok().getIntVal());
727             expect(AsmToken::Integer, "integer");
728             if (!isNext(AsmToken::Comma))
729               break;
730           }
731         expect(AsmToken::RCurly, "}");
732         Operands.push_back(std::move(Op));
733         break;
734       }
735       default:
736         return error("Unexpected token in operand: ", Tok);
737       }
738       if (Lexer.isNot(AsmToken::EndOfStatement)) {
739         if (expect(AsmToken::Comma, ","))
740           return true;
741       }
742     }
743     if (ExpectBlockType && Operands.size() == 1) {
744       // Support blocks with no operands as default to void.
745       addBlockTypeOperand(Operands, NameLoc, WebAssembly::BlockType::Void);
746     }
747     if (FunctionTable)
748       Operands.push_back(std::move(FunctionTable));
749     Parser.Lex();
750     return false;
751   }
752 
753   bool parseSignature(wasm::WasmSignature *Signature) {
754     if (expect(AsmToken::LParen, "("))
755       return true;
756     if (parseRegTypeList(Signature->Params))
757       return true;
758     if (expect(AsmToken::RParen, ")"))
759       return true;
760     if (expect(AsmToken::MinusGreater, "->"))
761       return true;
762     if (expect(AsmToken::LParen, "("))
763       return true;
764     if (parseRegTypeList(Signature->Returns))
765       return true;
766     if (expect(AsmToken::RParen, ")"))
767       return true;
768     return false;
769   }
770 
771   bool CheckDataSection() {
772     if (CurrentState != DataSection) {
773       auto WS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
774       if (WS && WS->getKind().isText())
775         return error("data directive must occur in a data segment: ",
776                      Lexer.getTok());
777     }
778     CurrentState = DataSection;
779     return false;
780   }
781 
782   // This function processes wasm-specific directives streamed to
783   // WebAssemblyTargetStreamer, all others go to the generic parser
784   // (see WasmAsmParser).
785   ParseStatus parseDirective(AsmToken DirectiveID) override {
786     assert(DirectiveID.getKind() == AsmToken::Identifier);
787     auto &Out = getStreamer();
788     auto &TOut =
789         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
790     auto &Ctx = Out.getContext();
791 
792     if (DirectiveID.getString() == ".globaltype") {
793       auto SymName = expectIdent();
794       if (SymName.empty())
795         return ParseStatus::Failure;
796       if (expect(AsmToken::Comma, ","))
797         return ParseStatus::Failure;
798       auto TypeTok = Lexer.getTok();
799       auto TypeName = expectIdent();
800       if (TypeName.empty())
801         return ParseStatus::Failure;
802       auto Type = WebAssembly::parseType(TypeName);
803       if (!Type)
804         return error("Unknown type in .globaltype directive: ", TypeTok);
805       // Optional mutable modifier. Default to mutable for historical reasons.
806       // Ideally we would have gone with immutable as the default and used `mut`
807       // as the modifier to match the `.wat` format.
808       bool Mutable = true;
809       if (isNext(AsmToken::Comma)) {
810         TypeTok = Lexer.getTok();
811         auto Id = expectIdent();
812         if (Id.empty())
813           return ParseStatus::Failure;
814         if (Id == "immutable")
815           Mutable = false;
816         else
817           // Should we also allow `mutable` and `mut` here for clarity?
818           return error("Unknown type in .globaltype modifier: ", TypeTok);
819       }
820       // Now set this symbol with the correct type.
821       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
822       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
823       WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(*Type), Mutable});
824       // And emit the directive again.
825       TOut.emitGlobalType(WasmSym);
826       return expect(AsmToken::EndOfStatement, "EOL");
827     }
828 
829     if (DirectiveID.getString() == ".tabletype") {
830       // .tabletype SYM, ELEMTYPE[, MINSIZE[, MAXSIZE]]
831       auto SymName = expectIdent();
832       if (SymName.empty())
833         return ParseStatus::Failure;
834       if (expect(AsmToken::Comma, ","))
835         return ParseStatus::Failure;
836 
837       auto ElemTypeTok = Lexer.getTok();
838       auto ElemTypeName = expectIdent();
839       if (ElemTypeName.empty())
840         return ParseStatus::Failure;
841       std::optional<wasm::ValType> ElemType =
842           WebAssembly::parseType(ElemTypeName);
843       if (!ElemType)
844         return error("Unknown type in .tabletype directive: ", ElemTypeTok);
845 
846       wasm::WasmLimits Limits = DefaultLimits();
847       if (isNext(AsmToken::Comma) && parseLimits(&Limits))
848         return ParseStatus::Failure;
849 
850       // Now that we have the name and table type, we can actually create the
851       // symbol
852       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
853       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
854       wasm::WasmTableType Type = {*ElemType, Limits};
855       WasmSym->setTableType(Type);
856       TOut.emitTableType(WasmSym);
857       return expect(AsmToken::EndOfStatement, "EOL");
858     }
859 
860     if (DirectiveID.getString() == ".functype") {
861       // This code has to send things to the streamer similar to
862       // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
863       // TODO: would be good to factor this into a common function, but the
864       // assembler and backend really don't share any common code, and this code
865       // parses the locals separately.
866       auto SymName = expectIdent();
867       if (SymName.empty())
868         return ParseStatus::Failure;
869       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
870       if (WasmSym->isDefined()) {
871         // We push 'Function' either when a label is parsed or a .functype
872         // directive is parsed. The reason it is not easy to do this uniformly
873         // in a single place is,
874         // 1. We can't do this at label parsing time only because there are
875         //    cases we don't have .functype directive before a function label,
876         //    in which case we don't know if the label is a function at the time
877         //    of parsing.
878         // 2. We can't do this at .functype parsing time only because we want to
879         //    detect a function started with a label and not ended correctly
880         //    without encountering a .functype directive after the label.
881         if (CurrentState != FunctionLabel) {
882           // This .functype indicates a start of a function.
883           if (ensureEmptyNestingStack())
884             return ParseStatus::Failure;
885           push(Function);
886         }
887         CurrentState = FunctionStart;
888         LastFunctionLabel = WasmSym;
889       }
890       auto Signature = std::make_unique<wasm::WasmSignature>();
891       if (parseSignature(Signature.get()))
892         return ParseStatus::Failure;
893       TC.funcDecl(*Signature);
894       WasmSym->setSignature(Signature.get());
895       addSignature(std::move(Signature));
896       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
897       TOut.emitFunctionType(WasmSym);
898       // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
899       return expect(AsmToken::EndOfStatement, "EOL");
900     }
901 
902     if (DirectiveID.getString() == ".export_name") {
903       auto SymName = expectIdent();
904       if (SymName.empty())
905         return ParseStatus::Failure;
906       if (expect(AsmToken::Comma, ","))
907         return ParseStatus::Failure;
908       auto ExportName = expectIdent();
909       if (ExportName.empty())
910         return ParseStatus::Failure;
911       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
912       WasmSym->setExportName(storeName(ExportName));
913       TOut.emitExportName(WasmSym, ExportName);
914       return expect(AsmToken::EndOfStatement, "EOL");
915     }
916 
917     if (DirectiveID.getString() == ".import_module") {
918       auto SymName = expectIdent();
919       if (SymName.empty())
920         return ParseStatus::Failure;
921       if (expect(AsmToken::Comma, ","))
922         return ParseStatus::Failure;
923       auto ImportModule = expectIdent();
924       if (ImportModule.empty())
925         return ParseStatus::Failure;
926       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
927       WasmSym->setImportModule(storeName(ImportModule));
928       TOut.emitImportModule(WasmSym, ImportModule);
929       return expect(AsmToken::EndOfStatement, "EOL");
930     }
931 
932     if (DirectiveID.getString() == ".import_name") {
933       auto SymName = expectIdent();
934       if (SymName.empty())
935         return ParseStatus::Failure;
936       if (expect(AsmToken::Comma, ","))
937         return ParseStatus::Failure;
938       auto ImportName = expectIdent();
939       if (ImportName.empty())
940         return ParseStatus::Failure;
941       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
942       WasmSym->setImportName(storeName(ImportName));
943       TOut.emitImportName(WasmSym, ImportName);
944       return expect(AsmToken::EndOfStatement, "EOL");
945     }
946 
947     if (DirectiveID.getString() == ".tagtype") {
948       auto SymName = expectIdent();
949       if (SymName.empty())
950         return ParseStatus::Failure;
951       auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
952       auto Signature = std::make_unique<wasm::WasmSignature>();
953       if (parseRegTypeList(Signature->Params))
954         return ParseStatus::Failure;
955       WasmSym->setSignature(Signature.get());
956       addSignature(std::move(Signature));
957       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TAG);
958       TOut.emitTagType(WasmSym);
959       // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
960       return expect(AsmToken::EndOfStatement, "EOL");
961     }
962 
963     if (DirectiveID.getString() == ".local") {
964       if (CurrentState != FunctionStart)
965         return error(".local directive should follow the start of a function: ",
966                      Lexer.getTok());
967       SmallVector<wasm::ValType, 4> Locals;
968       if (parseRegTypeList(Locals))
969         return ParseStatus::Failure;
970       TC.localDecl(Locals);
971       TOut.emitLocal(Locals);
972       CurrentState = FunctionLocals;
973       return expect(AsmToken::EndOfStatement, "EOL");
974     }
975 
976     if (DirectiveID.getString() == ".int8" ||
977         DirectiveID.getString() == ".int16" ||
978         DirectiveID.getString() == ".int32" ||
979         DirectiveID.getString() == ".int64") {
980       if (CheckDataSection())
981         return ParseStatus::Failure;
982       const MCExpr *Val;
983       SMLoc End;
984       if (Parser.parseExpression(Val, End))
985         return error("Cannot parse .int expression: ", Lexer.getTok());
986       size_t NumBits = 0;
987       DirectiveID.getString().drop_front(4).getAsInteger(10, NumBits);
988       Out.emitValue(Val, NumBits / 8, End);
989       return expect(AsmToken::EndOfStatement, "EOL");
990     }
991 
992     if (DirectiveID.getString() == ".asciz") {
993       if (CheckDataSection())
994         return ParseStatus::Failure;
995       std::string S;
996       if (Parser.parseEscapedString(S))
997         return error("Cannot parse string constant: ", Lexer.getTok());
998       Out.emitBytes(StringRef(S.c_str(), S.length() + 1));
999       return expect(AsmToken::EndOfStatement, "EOL");
1000     }
1001 
1002     return ParseStatus::NoMatch; // We didn't process this directive.
1003   }
1004 
1005   // Called either when the first instruction is parsed of the function ends.
1006   void ensureLocals(MCStreamer &Out) {
1007     if (CurrentState == FunctionStart) {
1008       // We haven't seen a .local directive yet. The streamer requires locals to
1009       // be encoded as a prelude to the instructions, so emit an empty list of
1010       // locals here.
1011       auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
1012           *Out.getTargetStreamer());
1013       TOut.emitLocal(SmallVector<wasm::ValType, 0>());
1014       CurrentState = FunctionLocals;
1015     }
1016   }
1017 
1018   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,
1019                                OperandVector &Operands, MCStreamer &Out,
1020                                uint64_t &ErrorInfo,
1021                                bool MatchingInlineAsm) override {
1022     MCInst Inst;
1023     Inst.setLoc(IDLoc);
1024     FeatureBitset MissingFeatures;
1025     unsigned MatchResult = MatchInstructionImpl(
1026         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm);
1027     switch (MatchResult) {
1028     case Match_Success: {
1029       ensureLocals(Out);
1030       // Fix unknown p2align operands.
1031       auto Align = WebAssembly::GetDefaultP2AlignAny(Inst.getOpcode());
1032       if (Align != -1U) {
1033         auto &Op0 = Inst.getOperand(0);
1034         if (Op0.getImm() == -1)
1035           Op0.setImm(Align);
1036       }
1037       if (is64) {
1038         // Upgrade 32-bit loads/stores to 64-bit. These mostly differ by having
1039         // an offset64 arg instead of offset32, but to the assembler matcher
1040         // they're both immediates so don't get selected for.
1041         auto Opc64 = WebAssembly::getWasm64Opcode(
1042             static_cast<uint16_t>(Inst.getOpcode()));
1043         if (Opc64 >= 0) {
1044           Inst.setOpcode(Opc64);
1045         }
1046       }
1047       if (!SkipTypeCheck && TC.typeCheck(IDLoc, Inst, Operands))
1048         return true;
1049       Out.emitInstruction(Inst, getSTI());
1050       if (CurrentState == EndFunction) {
1051         onEndOfFunction(IDLoc);
1052       } else {
1053         CurrentState = Instructions;
1054       }
1055       return false;
1056     }
1057     case Match_MissingFeature: {
1058       assert(MissingFeatures.count() > 0 && "Expected missing features");
1059       SmallString<128> Message;
1060       raw_svector_ostream OS(Message);
1061       OS << "instruction requires:";
1062       for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i)
1063         if (MissingFeatures.test(i))
1064           OS << ' ' << getSubtargetFeatureName(i);
1065       return Parser.Error(IDLoc, Message);
1066     }
1067     case Match_MnemonicFail:
1068       return Parser.Error(IDLoc, "invalid instruction");
1069     case Match_NearMisses:
1070       return Parser.Error(IDLoc, "ambiguous instruction");
1071     case Match_InvalidTiedOperand:
1072     case Match_InvalidOperand: {
1073       SMLoc ErrorLoc = IDLoc;
1074       if (ErrorInfo != ~0ULL) {
1075         if (ErrorInfo >= Operands.size())
1076           return Parser.Error(IDLoc, "too few operands for instruction");
1077         ErrorLoc = Operands[ErrorInfo]->getStartLoc();
1078         if (ErrorLoc == SMLoc())
1079           ErrorLoc = IDLoc;
1080       }
1081       return Parser.Error(ErrorLoc, "invalid operand for instruction");
1082     }
1083     }
1084     llvm_unreachable("Implement any new match types added!");
1085   }
1086 
1087   void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) override {
1088     // Code below only applies to labels in text sections.
1089     auto CWS = cast<MCSectionWasm>(getStreamer().getCurrentSection().first);
1090     if (!CWS || !CWS->getKind().isText())
1091       return;
1092 
1093     auto WasmSym = cast<MCSymbolWasm>(Symbol);
1094     // Unlike other targets, we don't allow data in text sections (labels
1095     // declared with .type @object).
1096     if (WasmSym->getType() == wasm::WASM_SYMBOL_TYPE_DATA) {
1097       Parser.Error(IDLoc,
1098                    "Wasm doesn\'t support data symbols in text sections");
1099       return;
1100     }
1101 
1102     // Start a new section for the next function automatically, since our
1103     // object writer expects each function to have its own section. This way
1104     // The user can't forget this "convention".
1105     auto SymName = Symbol->getName();
1106     if (SymName.starts_with(".L"))
1107       return; // Local Symbol.
1108 
1109     // TODO: If the user explicitly creates a new function section, we ignore
1110     // its name when we create this one. It would be nice to honor their
1111     // choice, while still ensuring that we create one if they forget.
1112     // (that requires coordination with WasmAsmParser::parseSectionDirective)
1113     auto SecName = ".text." + SymName;
1114 
1115     auto *Group = CWS->getGroup();
1116     // If the current section is a COMDAT, also set the flag on the symbol.
1117     // TODO: Currently the only place that the symbols' comdat flag matters is
1118     // for importing comdat functions. But there's no way to specify that in
1119     // assembly currently.
1120     if (Group)
1121       WasmSym->setComdat(true);
1122     auto *WS =
1123         getContext().getWasmSection(SecName, SectionKind::getText(), 0, Group,
1124                                     MCContext::GenericSectionID, nullptr);
1125     getStreamer().switchSection(WS);
1126     // Also generate DWARF for this section if requested.
1127     if (getContext().getGenDwarfForAssembly())
1128       getContext().addGenDwarfSection(WS);
1129 
1130     if (WasmSym->isFunction()) {
1131       // We give the location of the label (IDLoc) here, because otherwise the
1132       // lexer's next location will be used, which can be confusing. For
1133       // example:
1134       //
1135       // test0: ; This function does not end properly
1136       //   ...
1137       //
1138       // test1: ; We would like to point to this line for error
1139       //   ...  . Not this line, which can contain any instruction
1140       ensureEmptyNestingStack(IDLoc);
1141       CurrentState = FunctionLabel;
1142       LastFunctionLabel = Symbol;
1143       push(Function);
1144     }
1145   }
1146 
1147   void onEndOfFunction(SMLoc ErrorLoc) {
1148     if (!SkipTypeCheck)
1149       TC.endOfFunction(ErrorLoc);
1150     // Reset the type checker state.
1151     TC.Clear();
1152   }
1153 
1154   void onEndOfFile() override { ensureEmptyNestingStack(); }
1155 };
1156 } // end anonymous namespace
1157 
1158 // Force static initialization.
1159 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyAsmParser() {
1160   RegisterMCAsmParser<WebAssemblyAsmParser> X(getTheWebAssemblyTarget32());
1161   RegisterMCAsmParser<WebAssemblyAsmParser> Y(getTheWebAssemblyTarget64());
1162 }
1163 
1164 #define GET_REGISTER_MATCHER
1165 #define GET_SUBTARGET_FEATURE_NAME
1166 #define GET_MATCHER_IMPLEMENTATION
1167 #include "WebAssemblyGenAsmMatcher.inc"
1168 
1169 StringRef GetMnemonic(unsigned Opc) {
1170   // FIXME: linear search!
1171   for (auto &ME : MatchTable0) {
1172     if (ME.Opcode == Opc) {
1173       return ME.getMnemonic();
1174     }
1175   }
1176   assert(false && "mnemonic not found");
1177   return StringRef();
1178 }
1179