xref: /freebsd/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCCodeView.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCDirectives.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInstPrinter.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/MC/MCInstrInfo.h"
33 #include "llvm/MC/MCParser/AsmCond.h"
34 #include "llvm/MC/MCParser/AsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCSection.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/MC/MCTargetOptions.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CommandLine.h"
46 #include "llvm/Support/ErrorHandling.h"
47 #include "llvm/Support/Format.h"
48 #include "llvm/Support/MD5.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/MemoryBuffer.h"
51 #include "llvm/Support/Path.h"
52 #include "llvm/Support/SMLoc.h"
53 #include "llvm/Support/SourceMgr.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <climits>
58 #include <cstddef>
59 #include <cstdint>
60 #include <ctime>
61 #include <deque>
62 #include <memory>
63 #include <optional>
64 #include <sstream>
65 #include <string>
66 #include <tuple>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 namespace {
73 
74 /// Helper types for tracking macro definitions.
75 typedef std::vector<AsmToken> MCAsmMacroArgument;
76 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
77 
78 /// Helper class for storing information about an active macro instantiation.
79 struct MacroInstantiation {
80   /// The location of the instantiation.
81   SMLoc InstantiationLoc;
82 
83   /// The buffer where parsing should resume upon instantiation completion.
84   unsigned ExitBuffer;
85 
86   /// The location where parsing should resume upon instantiation completion.
87   SMLoc ExitLoc;
88 
89   /// The depth of TheCondStack at the start of the instantiation.
90   size_t CondStackDepth;
91 };
92 
93 struct ParseStatementInfo {
94   /// The parsed operands from the last parsed statement.
95   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
96 
97   /// The opcode from the last parsed instruction.
98   unsigned Opcode = ~0U;
99 
100   /// Was there an error parsing the inline assembly?
101   bool ParseError = false;
102 
103   /// The value associated with a macro exit.
104   std::optional<std::string> ExitValue;
105 
106   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
107 
108   ParseStatementInfo() = delete;
ParseStatementInfo__anon60b61cd60111::ParseStatementInfo109   ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
110       : AsmRewrites(rewrites) {}
111 };
112 
113 enum FieldType {
114   FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
115   FT_REAL,     // Initializer: real number, stored as an APInt.
116   FT_STRUCT    // Initializer: struct initializer, stored recursively.
117 };
118 
119 struct FieldInfo;
120 struct StructInfo {
121   StringRef Name;
122   bool IsUnion = false;
123   bool Initializable = true;
124   unsigned Alignment = 0;
125   unsigned AlignmentSize = 0;
126   unsigned NextOffset = 0;
127   unsigned Size = 0;
128   std::vector<FieldInfo> Fields;
129   StringMap<size_t> FieldsByName;
130 
131   FieldInfo &addField(StringRef FieldName, FieldType FT,
132                       unsigned FieldAlignmentSize);
133 
134   StructInfo() = default;
135   StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
136 };
137 
138 // FIXME: This should probably use a class hierarchy, raw pointers between the
139 // objects, and dynamic type resolution instead of a union. On the other hand,
140 // ownership then becomes much more complicated; the obvious thing would be to
141 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
142 
143 struct StructInitializer;
144 struct IntFieldInfo {
145   SmallVector<const MCExpr *, 1> Values;
146 
147   IntFieldInfo() = default;
IntFieldInfo__anon60b61cd60111::IntFieldInfo148   IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
IntFieldInfo__anon60b61cd60111::IntFieldInfo149   IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
150 };
151 struct RealFieldInfo {
152   SmallVector<APInt, 1> AsIntValues;
153 
154   RealFieldInfo() = default;
RealFieldInfo__anon60b61cd60111::RealFieldInfo155   RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
RealFieldInfo__anon60b61cd60111::RealFieldInfo156   RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
157 };
158 struct StructFieldInfo {
159   std::vector<StructInitializer> Initializers;
160   StructInfo Structure;
161 
162   StructFieldInfo() = default;
163   StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
164 };
165 
166 class FieldInitializer {
167 public:
168   FieldType FT;
169   union {
170     IntFieldInfo IntInfo;
171     RealFieldInfo RealInfo;
172     StructFieldInfo StructInfo;
173   };
174 
175   ~FieldInitializer();
176   FieldInitializer(FieldType FT);
177 
178   FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
179   FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
180   FieldInitializer(std::vector<StructInitializer> &&Initializers,
181                    struct StructInfo Structure);
182 
183   FieldInitializer(const FieldInitializer &Initializer);
184   FieldInitializer(FieldInitializer &&Initializer);
185 
186   FieldInitializer &operator=(const FieldInitializer &Initializer);
187   FieldInitializer &operator=(FieldInitializer &&Initializer);
188 };
189 
190 struct StructInitializer {
191   std::vector<FieldInitializer> FieldInitializers;
192 };
193 
194 struct FieldInfo {
195   // Offset of the field within the containing STRUCT.
196   unsigned Offset = 0;
197 
198   // Total size of the field (= LengthOf * Type).
199   unsigned SizeOf = 0;
200 
201   // Number of elements in the field (1 if scalar, >1 if an array).
202   unsigned LengthOf = 0;
203 
204   // Size of a single entry in this field, in bytes ("type" in MASM standards).
205   unsigned Type = 0;
206 
207   FieldInitializer Contents;
208 
FieldInfo__anon60b61cd60111::FieldInfo209   FieldInfo(FieldType FT) : Contents(FT) {}
210 };
211 
StructFieldInfo(std::vector<StructInitializer> V,StructInfo S)212 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
213                                  StructInfo S) {
214   Initializers = std::move(V);
215   Structure = S;
216 }
217 
StructInfo(StringRef StructName,bool Union,unsigned AlignmentValue)218 StructInfo::StructInfo(StringRef StructName, bool Union,
219                        unsigned AlignmentValue)
220     : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
221 
addField(StringRef FieldName,FieldType FT,unsigned FieldAlignmentSize)222 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
223                                 unsigned FieldAlignmentSize) {
224   if (!FieldName.empty())
225     FieldsByName[FieldName.lower()] = Fields.size();
226   Fields.emplace_back(FT);
227   FieldInfo &Field = Fields.back();
228   Field.Offset =
229       llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
230   if (!IsUnion) {
231     NextOffset = std::max(NextOffset, Field.Offset);
232   }
233   AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
234   return Field;
235 }
236 
~FieldInitializer()237 FieldInitializer::~FieldInitializer() {
238   switch (FT) {
239   case FT_INTEGRAL:
240     IntInfo.~IntFieldInfo();
241     break;
242   case FT_REAL:
243     RealInfo.~RealFieldInfo();
244     break;
245   case FT_STRUCT:
246     StructInfo.~StructFieldInfo();
247     break;
248   }
249 }
250 
FieldInitializer(FieldType FT)251 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
252   switch (FT) {
253   case FT_INTEGRAL:
254     new (&IntInfo) IntFieldInfo();
255     break;
256   case FT_REAL:
257     new (&RealInfo) RealFieldInfo();
258     break;
259   case FT_STRUCT:
260     new (&StructInfo) StructFieldInfo();
261     break;
262   }
263 }
264 
FieldInitializer(SmallVector<const MCExpr *,1> && Values)265 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
266     : FT(FT_INTEGRAL) {
267   new (&IntInfo) IntFieldInfo(std::move(Values));
268 }
269 
FieldInitializer(SmallVector<APInt,1> && AsIntValues)270 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
271     : FT(FT_REAL) {
272   new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
273 }
274 
FieldInitializer(std::vector<StructInitializer> && Initializers,struct StructInfo Structure)275 FieldInitializer::FieldInitializer(
276     std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
277     : FT(FT_STRUCT) {
278   new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
279 }
280 
FieldInitializer(const FieldInitializer & Initializer)281 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
282     : FT(Initializer.FT) {
283   switch (FT) {
284   case FT_INTEGRAL:
285     new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
286     break;
287   case FT_REAL:
288     new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
289     break;
290   case FT_STRUCT:
291     new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
292     break;
293   }
294 }
295 
FieldInitializer(FieldInitializer && Initializer)296 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
297     : FT(Initializer.FT) {
298   switch (FT) {
299   case FT_INTEGRAL:
300     new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
301     break;
302   case FT_REAL:
303     new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
304     break;
305   case FT_STRUCT:
306     new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
307     break;
308   }
309 }
310 
311 FieldInitializer &
operator =(const FieldInitializer & Initializer)312 FieldInitializer::operator=(const FieldInitializer &Initializer) {
313   if (FT != Initializer.FT) {
314     switch (FT) {
315     case FT_INTEGRAL:
316       IntInfo.~IntFieldInfo();
317       break;
318     case FT_REAL:
319       RealInfo.~RealFieldInfo();
320       break;
321     case FT_STRUCT:
322       StructInfo.~StructFieldInfo();
323       break;
324     }
325   }
326   FT = Initializer.FT;
327   switch (FT) {
328   case FT_INTEGRAL:
329     IntInfo = Initializer.IntInfo;
330     break;
331   case FT_REAL:
332     RealInfo = Initializer.RealInfo;
333     break;
334   case FT_STRUCT:
335     StructInfo = Initializer.StructInfo;
336     break;
337   }
338   return *this;
339 }
340 
operator =(FieldInitializer && Initializer)341 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
342   if (FT != Initializer.FT) {
343     switch (FT) {
344     case FT_INTEGRAL:
345       IntInfo.~IntFieldInfo();
346       break;
347     case FT_REAL:
348       RealInfo.~RealFieldInfo();
349       break;
350     case FT_STRUCT:
351       StructInfo.~StructFieldInfo();
352       break;
353     }
354   }
355   FT = Initializer.FT;
356   switch (FT) {
357   case FT_INTEGRAL:
358     IntInfo = Initializer.IntInfo;
359     break;
360   case FT_REAL:
361     RealInfo = Initializer.RealInfo;
362     break;
363   case FT_STRUCT:
364     StructInfo = Initializer.StructInfo;
365     break;
366   }
367   return *this;
368 }
369 
370 /// The concrete assembly parser instance.
371 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
372 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
373 class MasmParser : public MCAsmParser {
374 private:
375   SourceMgr::DiagHandlerTy SavedDiagHandler;
376   void *SavedDiagContext;
377   std::unique_ptr<MCAsmParserExtension> PlatformParser;
378 
379   /// This is the current buffer index we're lexing from as managed by the
380   /// SourceMgr object.
381   unsigned CurBuffer;
382 
383   /// time of assembly
384   struct tm TM;
385 
386   BitVector EndStatementAtEOFStack;
387 
388   AsmCond TheCondState;
389   std::vector<AsmCond> TheCondStack;
390 
391   /// maps directive names to handler methods in parser
392   /// extensions. Extensions register themselves in this map by calling
393   /// addDirectiveHandler.
394   StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
395 
396   /// maps assembly-time variable names to variables.
397   struct Variable {
398     enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
399 
400     StringRef Name;
401     RedefinableKind Redefinable = REDEFINABLE;
402     bool IsText = false;
403     std::string TextValue;
404   };
405   StringMap<Variable> Variables;
406 
407   /// Stack of active struct definitions.
408   SmallVector<StructInfo, 1> StructInProgress;
409 
410   /// Maps struct tags to struct definitions.
411   StringMap<StructInfo> Structs;
412 
413   /// Maps data location names to types.
414   StringMap<AsmTypeInfo> KnownType;
415 
416   /// Stack of active macro instantiations.
417   std::vector<MacroInstantiation*> ActiveMacros;
418 
419   /// List of bodies of anonymous macros.
420   std::deque<MCAsmMacro> MacroLikeBodies;
421 
422   /// Keeps track of how many .macro's have been instantiated.
423   unsigned NumOfMacroInstantiations;
424 
425   /// The values from the last parsed cpp hash file line comment if any.
426   struct CppHashInfoTy {
427     StringRef Filename;
428     int64_t LineNumber;
429     SMLoc Loc;
430     unsigned Buf;
CppHashInfoTy__anon60b61cd60111::MasmParser::CppHashInfoTy431     CppHashInfoTy() : LineNumber(0), Buf(0) {}
432   };
433   CppHashInfoTy CppHashInfo;
434 
435   /// The filename from the first cpp hash file line comment, if any.
436   StringRef FirstCppHashFilename;
437 
438   /// List of forward directional labels for diagnosis at the end.
439   SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
440 
441   /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
442   /// Defaults to 1U, meaning Intel.
443   unsigned AssemblerDialect = 1U;
444 
445   /// Are we parsing ms-style inline assembly?
446   bool ParsingMSInlineAsm = false;
447 
448   // Current <...> expression depth.
449   unsigned AngleBracketDepth = 0U;
450 
451   // Number of locals defined.
452   uint16_t LocalCounter = 0;
453 
454 public:
455   MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
456              const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
457   MasmParser(const MasmParser &) = delete;
458   MasmParser &operator=(const MasmParser &) = delete;
459   ~MasmParser() override;
460 
461   bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
462 
addDirectiveHandler(StringRef Directive,ExtensionDirectiveHandler Handler)463   void addDirectiveHandler(StringRef Directive,
464                            ExtensionDirectiveHandler Handler) override {
465     ExtensionDirectiveMap[Directive] = Handler;
466     DirectiveKindMap.try_emplace(Directive, DK_HANDLER_DIRECTIVE);
467   }
468 
addAliasForDirective(StringRef Directive,StringRef Alias)469   void addAliasForDirective(StringRef Directive, StringRef Alias) override {
470     DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
471   }
472 
473   /// @name MCAsmParser Interface
474   /// {
475 
getAssemblerDialect()476   unsigned getAssemblerDialect() override {
477     if (AssemblerDialect == ~0U)
478       return MAI.getAssemblerDialect();
479     else
480       return AssemblerDialect;
481   }
setAssemblerDialect(unsigned i)482   void setAssemblerDialect(unsigned i) override {
483     AssemblerDialect = i;
484   }
485 
486   void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
487   bool Warning(SMLoc L, const Twine &Msg,
488                SMRange Range = std::nullopt) override;
489   bool printError(SMLoc L, const Twine &Msg,
490                   SMRange Range = std::nullopt) override;
491 
492   enum ExpandKind { ExpandMacros, DoNotExpandMacros };
493   const AsmToken &Lex(ExpandKind ExpandNextToken);
Lex()494   const AsmToken &Lex() override { return Lex(ExpandMacros); }
495 
setParsingMSInlineAsm(bool V)496   void setParsingMSInlineAsm(bool V) override {
497     ParsingMSInlineAsm = V;
498     // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
499     // hex integer literals.
500     Lexer.setLexMasmIntegers(V);
501   }
isParsingMSInlineAsm()502   bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
503 
isParsingMasm() const504   bool isParsingMasm() const override { return true; }
505 
506   bool defineMacro(StringRef Name, StringRef Value) override;
507 
508   bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
509   bool lookUpField(StringRef Base, StringRef Member,
510                    AsmFieldInfo &Info) const override;
511 
512   bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
513 
514   bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
515                         unsigned &NumInputs,
516                         SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
517                         SmallVectorImpl<std::string> &Constraints,
518                         SmallVectorImpl<std::string> &Clobbers,
519                         const MCInstrInfo *MII, MCInstPrinter *IP,
520                         MCAsmParserSemaCallback &SI) override;
521 
522   bool parseExpression(const MCExpr *&Res);
523   bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
524   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
525                         AsmTypeInfo *TypeInfo) override;
526   bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
527   bool parseAbsoluteExpression(int64_t &Res) override;
528 
529   /// Parse a floating point expression using the float \p Semantics
530   /// and set \p Res to the value.
531   bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
532 
533   /// Parse an identifier or string (as a quoted identifier)
534   /// and set \p Res to the identifier contents.
535   enum IdentifierPositionKind { StandardPosition, StartOfStatement };
536   bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
parseIdentifier(StringRef & Res)537   bool parseIdentifier(StringRef &Res) override {
538     return parseIdentifier(Res, StandardPosition);
539   }
540   void eatToEndOfStatement() override;
541 
542   bool checkForValidSection() override;
543 
544   /// }
545 
546 private:
547   bool expandMacros();
548   const AsmToken peekTok(bool ShouldSkipSpace = true);
549 
550   bool parseStatement(ParseStatementInfo &Info,
551                       MCAsmParserSemaCallback *SI);
552   bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
553   bool parseCppHashLineFilenameComment(SMLoc L);
554 
555   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
556                    ArrayRef<MCAsmMacroParameter> Parameters,
557                    ArrayRef<MCAsmMacroArgument> A,
558                    const std::vector<std::string> &Locals, SMLoc L);
559 
560   /// Are we inside a macro instantiation?
isInsideMacroInstantiation()561   bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
562 
563   /// Handle entry to macro instantiation.
564   ///
565   /// \param M The macro.
566   /// \param NameLoc Instantiation location.
567   bool handleMacroEntry(
568       const MCAsmMacro *M, SMLoc NameLoc,
569       AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
570 
571   /// Handle invocation of macro function.
572   ///
573   /// \param M The macro.
574   /// \param NameLoc Invocation location.
575   bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
576 
577   /// Handle exit from macro instantiation.
578   void handleMacroExit();
579 
580   /// Extract AsmTokens for a macro argument.
581   bool
582   parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
583                      AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
584 
585   /// Parse all macro arguments for a given macro.
586   bool
587   parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
588                       AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
589 
590   void printMacroInstantiations();
591 
592   bool expandStatement(SMLoc Loc);
593 
printMessage(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Msg,SMRange Range=std::nullopt) const594   void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
595                     SMRange Range = std::nullopt) const {
596     ArrayRef<SMRange> Ranges(Range);
597     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
598   }
599   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
600 
601   bool lookUpField(const StructInfo &Structure, StringRef Member,
602                    AsmFieldInfo &Info) const;
603 
604   /// Enter the specified file. This returns true on failure.
605   bool enterIncludeFile(const std::string &Filename);
606 
607   /// Reset the current lexer position to that given by \p Loc. The
608   /// current token is not set; clients should ensure Lex() is called
609   /// subsequently.
610   ///
611   /// \param InBuffer If not 0, should be the known buffer id that contains the
612   /// location.
613   void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
614                  bool EndStatementAtEOF = true);
615 
616   /// Parse up to a token of kind \p EndTok and return the contents from the
617   /// current token up to (but not including) this token; the current token on
618   /// exit will be either this kind or EOF. Reads through instantiated macro
619   /// functions and text macros.
620   SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
621   std::string parseStringTo(AsmToken::TokenKind EndTok);
622 
623   /// Parse up to the end of statement and return the contents from the current
624   /// token until the end of the statement; the current token on exit will be
625   /// either the EndOfStatement or EOF.
626   StringRef parseStringToEndOfStatement() override;
627 
628   bool parseTextItem(std::string &Data);
629 
630   unsigned getBinOpPrecedence(AsmToken::TokenKind K,
631                               MCBinaryExpr::Opcode &Kind);
632 
633   bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
634   bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
635   bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
636 
637   // Generic (target and platform independent) directive parsing.
638   enum DirectiveKind {
639     DK_NO_DIRECTIVE, // Placeholder
640     DK_HANDLER_DIRECTIVE,
641     DK_ASSIGN,
642     DK_EQU,
643     DK_TEXTEQU,
644     DK_ASCII,
645     DK_ASCIZ,
646     DK_STRING,
647     DK_BYTE,
648     DK_SBYTE,
649     DK_WORD,
650     DK_SWORD,
651     DK_DWORD,
652     DK_SDWORD,
653     DK_FWORD,
654     DK_QWORD,
655     DK_SQWORD,
656     DK_DB,
657     DK_DD,
658     DK_DF,
659     DK_DQ,
660     DK_DW,
661     DK_REAL4,
662     DK_REAL8,
663     DK_REAL10,
664     DK_ALIGN,
665     DK_EVEN,
666     DK_ORG,
667     DK_ENDR,
668     DK_EXTERN,
669     DK_PUBLIC,
670     DK_COMM,
671     DK_COMMENT,
672     DK_INCLUDE,
673     DK_REPEAT,
674     DK_WHILE,
675     DK_FOR,
676     DK_FORC,
677     DK_IF,
678     DK_IFE,
679     DK_IFB,
680     DK_IFNB,
681     DK_IFDEF,
682     DK_IFNDEF,
683     DK_IFDIF,
684     DK_IFDIFI,
685     DK_IFIDN,
686     DK_IFIDNI,
687     DK_ELSEIF,
688     DK_ELSEIFE,
689     DK_ELSEIFB,
690     DK_ELSEIFNB,
691     DK_ELSEIFDEF,
692     DK_ELSEIFNDEF,
693     DK_ELSEIFDIF,
694     DK_ELSEIFDIFI,
695     DK_ELSEIFIDN,
696     DK_ELSEIFIDNI,
697     DK_ELSE,
698     DK_ENDIF,
699 
700     DK_MACRO,
701     DK_EXITM,
702     DK_ENDM,
703     DK_PURGE,
704     DK_ERR,
705     DK_ERRB,
706     DK_ERRNB,
707     DK_ERRDEF,
708     DK_ERRNDEF,
709     DK_ERRDIF,
710     DK_ERRDIFI,
711     DK_ERRIDN,
712     DK_ERRIDNI,
713     DK_ERRE,
714     DK_ERRNZ,
715     DK_ECHO,
716     DK_STRUCT,
717     DK_UNION,
718     DK_ENDS,
719     DK_END,
720     DK_PUSHFRAME,
721     DK_PUSHREG,
722     DK_SAVEREG,
723     DK_SAVEXMM128,
724     DK_SETFRAME,
725     DK_RADIX,
726   };
727 
728   /// Maps directive name --> DirectiveKind enum, for directives parsed by this
729   /// class.
730   StringMap<DirectiveKind> DirectiveKindMap;
731 
732   bool isMacroLikeDirective();
733 
734   // Generic (target and platform independent) directive parsing.
735   enum BuiltinSymbol {
736     BI_NO_SYMBOL, // Placeholder
737     BI_DATE,
738     BI_TIME,
739     BI_VERSION,
740     BI_FILECUR,
741     BI_FILENAME,
742     BI_LINE,
743     BI_CURSEG,
744     BI_CPU,
745     BI_INTERFACE,
746     BI_CODE,
747     BI_DATA,
748     BI_FARDATA,
749     BI_WORDSIZE,
750     BI_CODESIZE,
751     BI_DATASIZE,
752     BI_MODEL,
753     BI_STACK,
754   };
755 
756   /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
757   /// class.
758   StringMap<BuiltinSymbol> BuiltinSymbolMap;
759 
760   const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
761 
762   std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
763                                                       SMLoc StartLoc);
764 
765   // Generic (target and platform independent) directive parsing.
766   enum BuiltinFunction {
767     BI_NO_FUNCTION, // Placeholder
768     BI_CATSTR,
769   };
770 
771   /// Maps builtin name --> BuiltinFunction enum, for builtins handled by this
772   /// class.
773   StringMap<BuiltinFunction> BuiltinFunctionMap;
774 
775   bool evaluateBuiltinMacroFunction(BuiltinFunction Function, StringRef Name,
776                                     std::string &Res);
777 
778   // ".ascii", ".asciz", ".string"
779   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
780 
781   // "byte", "word", ...
782   bool emitIntValue(const MCExpr *Value, unsigned Size);
783   bool parseScalarInitializer(unsigned Size,
784                               SmallVectorImpl<const MCExpr *> &Values,
785                               unsigned StringPadLength = 0);
786   bool parseScalarInstList(
787       unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
788       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
789   bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
790   bool addIntegralField(StringRef Name, unsigned Size);
791   bool parseDirectiveValue(StringRef IDVal, unsigned Size);
792   bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
793                                 StringRef Name, SMLoc NameLoc);
794 
795   // "real4", "real8", "real10"
796   bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
797   bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
798   bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
799                                size_t Size);
800   bool parseRealInstList(
801       const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
802       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
803   bool parseDirectiveNamedRealValue(StringRef TypeName,
804                                     const fltSemantics &Semantics,
805                                     unsigned Size, StringRef Name,
806                                     SMLoc NameLoc);
807 
808   bool parseOptionalAngleBracketOpen();
809   bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
810 
811   bool parseFieldInitializer(const FieldInfo &Field,
812                              FieldInitializer &Initializer);
813   bool parseFieldInitializer(const FieldInfo &Field,
814                              const IntFieldInfo &Contents,
815                              FieldInitializer &Initializer);
816   bool parseFieldInitializer(const FieldInfo &Field,
817                              const RealFieldInfo &Contents,
818                              FieldInitializer &Initializer);
819   bool parseFieldInitializer(const FieldInfo &Field,
820                              const StructFieldInfo &Contents,
821                              FieldInitializer &Initializer);
822 
823   bool parseStructInitializer(const StructInfo &Structure,
824                               StructInitializer &Initializer);
825   bool parseStructInstList(
826       const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
827       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
828 
829   bool emitFieldValue(const FieldInfo &Field);
830   bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
831   bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
832   bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
833 
834   bool emitFieldInitializer(const FieldInfo &Field,
835                             const FieldInitializer &Initializer);
836   bool emitFieldInitializer(const FieldInfo &Field,
837                             const IntFieldInfo &Contents,
838                             const IntFieldInfo &Initializer);
839   bool emitFieldInitializer(const FieldInfo &Field,
840                             const RealFieldInfo &Contents,
841                             const RealFieldInfo &Initializer);
842   bool emitFieldInitializer(const FieldInfo &Field,
843                             const StructFieldInfo &Contents,
844                             const StructFieldInfo &Initializer);
845 
846   bool emitStructInitializer(const StructInfo &Structure,
847                              const StructInitializer &Initializer);
848 
849   // User-defined types (structs, unions):
850   bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
851   bool addStructField(StringRef Name, const StructInfo &Structure);
852   bool parseDirectiveStructValue(const StructInfo &Structure,
853                                  StringRef Directive, SMLoc DirLoc);
854   bool parseDirectiveNamedStructValue(const StructInfo &Structure,
855                                       StringRef Directive, SMLoc DirLoc,
856                                       StringRef Name);
857 
858   // "=", "equ", "textequ"
859   bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
860                             DirectiveKind DirKind, SMLoc NameLoc);
861 
862   bool parseDirectiveOrg(); // "org"
863 
864   bool emitAlignTo(int64_t Alignment);
865   bool parseDirectiveAlign();  // "align"
866   bool parseDirectiveEven();   // "even"
867 
868   // macro directives
869   bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
870   bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
871                                std::string &Value);
872   bool parseDirectiveEndMacro(StringRef Directive);
873   bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
874 
875   bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
876                             StringRef Name, SMLoc NameLoc);
877   bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
878   bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
879   bool parseDirectiveNestedEnds();
880 
881   bool parseDirectiveExtern();
882 
883   /// Parse a directive like ".globl" which accepts a single symbol (which
884   /// should be a label or an external).
885   bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
886 
887   bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
888 
889   bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
890 
891   bool parseDirectiveInclude(); // "include"
892 
893   // "if" or "ife"
894   bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
895   // "ifb" or "ifnb", depending on ExpectBlank.
896   bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
897   // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
898   // CaseInsensitive.
899   bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
900                            bool CaseInsensitive);
901   // "ifdef" or "ifndef", depending on expect_defined
902   bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
903   // "elseif" or "elseife"
904   bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
905   // "elseifb" or "elseifnb", depending on ExpectBlank.
906   bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
907   // ".elseifdef" or ".elseifndef", depending on expect_defined
908   bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
909   // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
910   // ExpectEqual and CaseInsensitive.
911   bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
912                                bool CaseInsensitive);
913   bool parseDirectiveElse(SMLoc DirectiveLoc);   // "else"
914   bool parseDirectiveEndIf(SMLoc DirectiveLoc);  // "endif"
915   bool parseEscapedString(std::string &Data) override;
916   bool parseAngleBracketString(std::string &Data) override;
917 
918   // Macro-like directives
919   MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
920   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
921                                 raw_svector_ostream &OS);
922   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
923                                 SMLoc ExitLoc, raw_svector_ostream &OS);
924   bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
925   bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
926   bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
927   bool parseDirectiveWhile(SMLoc DirectiveLoc);
928 
929   // "_emit" or "__emit"
930   bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
931                             size_t Len);
932 
933   // "align"
934   bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
935 
936   // "end"
937   bool parseDirectiveEnd(SMLoc DirectiveLoc);
938 
939   // ".err"
940   bool parseDirectiveError(SMLoc DirectiveLoc);
941   // ".errb" or ".errnb", depending on ExpectBlank.
942   bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
943   // ".errdef" or ".errndef", depending on ExpectBlank.
944   bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
945   // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
946   // and CaseInsensitive.
947   bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
948                                 bool CaseInsensitive);
949   // ".erre" or ".errnz", depending on ExpectZero.
950   bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
951 
952   // ".radix"
953   bool parseDirectiveRadix(SMLoc DirectiveLoc);
954 
955   // "echo"
956   bool parseDirectiveEcho(SMLoc DirectiveLoc);
957 
958   void initializeDirectiveKindMap();
959   void initializeBuiltinSymbolMaps();
960 };
961 
962 } // end anonymous namespace
963 
964 namespace llvm {
965 
966 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
967 
968 extern MCAsmParserExtension *createCOFFMasmParser();
969 
970 } // end namespace llvm
971 
972 enum { DEFAULT_ADDRSPACE = 0 };
973 
MasmParser(SourceMgr & SM,MCContext & Ctx,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)974 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
975                        const MCAsmInfo &MAI, struct tm TM, unsigned CB)
976     : MCAsmParser(Ctx, Out, SM, MAI), CurBuffer(CB ? CB : SM.getMainFileID()),
977       TM(TM) {
978   HadError = false;
979   // Save the old handler.
980   SavedDiagHandler = SrcMgr.getDiagHandler();
981   SavedDiagContext = SrcMgr.getDiagContext();
982   // Set our own handler which calls the saved handler.
983   SrcMgr.setDiagHandler(DiagHandler, this);
984   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
985   EndStatementAtEOFStack.push_back(true);
986 
987   // Initialize the platform / file format parser.
988   switch (Ctx.getObjectFileType()) {
989   case MCContext::IsCOFF:
990     PlatformParser.reset(createCOFFMasmParser());
991     break;
992   default:
993     report_fatal_error("llvm-ml currently supports only COFF output.");
994     break;
995   }
996 
997   initializeDirectiveKindMap();
998   PlatformParser->Initialize(*this);
999   initializeBuiltinSymbolMaps();
1000 
1001   NumOfMacroInstantiations = 0;
1002 }
1003 
~MasmParser()1004 MasmParser::~MasmParser() {
1005   assert((HadError || ActiveMacros.empty()) &&
1006          "Unexpected active macro instantiation!");
1007 
1008   // Restore the saved diagnostics handler and context for use during
1009   // finalization.
1010   SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1011 }
1012 
printMacroInstantiations()1013 void MasmParser::printMacroInstantiations() {
1014   // Print the active macro instantiation stack.
1015   for (std::vector<MacroInstantiation *>::const_reverse_iterator
1016            it = ActiveMacros.rbegin(),
1017            ie = ActiveMacros.rend();
1018        it != ie; ++it)
1019     printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1020                  "while in macro instantiation");
1021 }
1022 
Note(SMLoc L,const Twine & Msg,SMRange Range)1023 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1024   printPendingErrors();
1025   printMessage(L, SourceMgr::DK_Note, Msg, Range);
1026   printMacroInstantiations();
1027 }
1028 
Warning(SMLoc L,const Twine & Msg,SMRange Range)1029 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1030   if (getTargetParser().getTargetOptions().MCNoWarn)
1031     return false;
1032   if (getTargetParser().getTargetOptions().MCFatalWarnings)
1033     return Error(L, Msg, Range);
1034   printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1035   printMacroInstantiations();
1036   return false;
1037 }
1038 
printError(SMLoc L,const Twine & Msg,SMRange Range)1039 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1040   HadError = true;
1041   printMessage(L, SourceMgr::DK_Error, Msg, Range);
1042   printMacroInstantiations();
1043   return true;
1044 }
1045 
enterIncludeFile(const std::string & Filename)1046 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1047   std::string IncludedFile;
1048   unsigned NewBuf =
1049       SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1050   if (!NewBuf)
1051     return true;
1052 
1053   CurBuffer = NewBuf;
1054   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1055   EndStatementAtEOFStack.push_back(true);
1056   return false;
1057 }
1058 
jumpToLoc(SMLoc Loc,unsigned InBuffer,bool EndStatementAtEOF)1059 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1060                            bool EndStatementAtEOF) {
1061   CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1062   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1063                   Loc.getPointer(), EndStatementAtEOF);
1064 }
1065 
expandMacros()1066 bool MasmParser::expandMacros() {
1067   const AsmToken &Tok = getTok();
1068   const std::string IDLower = Tok.getIdentifier().lower();
1069 
1070   const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1071   if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1072     // This is a macro function invocation; expand it in place.
1073     const SMLoc MacroLoc = Tok.getLoc();
1074     const StringRef MacroId = Tok.getIdentifier();
1075     Lexer.Lex();
1076     if (handleMacroInvocation(M, MacroLoc)) {
1077       Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1078       Lexer.Lex();
1079     }
1080     return false;
1081   }
1082 
1083   std::optional<std::string> ExpandedValue;
1084 
1085   if (auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1086       BuiltinIt != BuiltinSymbolMap.end()) {
1087     ExpandedValue =
1088         evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1089   } else if (auto BuiltinFuncIt = BuiltinFunctionMap.find(IDLower);
1090              BuiltinFuncIt != BuiltinFunctionMap.end()) {
1091     StringRef Name;
1092     if (parseIdentifier(Name)) {
1093       return true;
1094     }
1095     std::string Res;
1096     if (evaluateBuiltinMacroFunction(BuiltinFuncIt->getValue(), Name, Res)) {
1097       return true;
1098     }
1099     ExpandedValue = Res;
1100   } else if (auto VarIt = Variables.find(IDLower);
1101              VarIt != Variables.end() && VarIt->getValue().IsText) {
1102     ExpandedValue = VarIt->getValue().TextValue;
1103   }
1104 
1105   if (!ExpandedValue)
1106     return true;
1107   std::unique_ptr<MemoryBuffer> Instantiation =
1108       MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1109 
1110   // Jump to the macro instantiation and prime the lexer.
1111   CurBuffer =
1112       SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1113   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1114                   /*EndStatementAtEOF=*/false);
1115   EndStatementAtEOFStack.push_back(false);
1116   Lexer.Lex();
1117   return false;
1118 }
1119 
Lex(ExpandKind ExpandNextToken)1120 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1121   if (Lexer.getTok().is(AsmToken::Error))
1122     Error(Lexer.getErrLoc(), Lexer.getErr());
1123   bool StartOfStatement = false;
1124 
1125   // if it's a end of statement with a comment in it
1126   if (getTok().is(AsmToken::EndOfStatement)) {
1127     // if this is a line comment output it.
1128     if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1129         getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1130       Out.addExplicitComment(Twine(getTok().getString()));
1131     StartOfStatement = true;
1132   }
1133 
1134   const AsmToken *tok = &Lexer.Lex();
1135 
1136   while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1137     if (StartOfStatement) {
1138       AsmToken NextTok;
1139       MutableArrayRef<AsmToken> Buf(NextTok);
1140       size_t ReadCount = Lexer.peekTokens(Buf);
1141       if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1142           (NextTok.getString().equals_insensitive("equ") ||
1143            NextTok.getString().equals_insensitive("textequ"))) {
1144         // This looks like an EQU or TEXTEQU directive; don't expand the
1145         // identifier, allowing for redefinitions.
1146         break;
1147       }
1148     }
1149     if (expandMacros())
1150       break;
1151   }
1152 
1153   // Parse comments here to be deferred until end of next statement.
1154   while (tok->is(AsmToken::Comment)) {
1155     if (MAI.preserveAsmComments())
1156       Out.addExplicitComment(Twine(tok->getString()));
1157     tok = &Lexer.Lex();
1158   }
1159 
1160   // Recognize and bypass line continuations.
1161   while (tok->is(AsmToken::BackSlash) &&
1162          peekTok().is(AsmToken::EndOfStatement)) {
1163     // Eat both the backslash and the end of statement.
1164     Lexer.Lex();
1165     tok = &Lexer.Lex();
1166   }
1167 
1168   if (tok->is(AsmToken::Eof)) {
1169     // If this is the end of an included file, pop the parent file off the
1170     // include stack.
1171     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1172     if (ParentIncludeLoc != SMLoc()) {
1173       EndStatementAtEOFStack.pop_back();
1174       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1175       return Lex();
1176     }
1177     EndStatementAtEOFStack.pop_back();
1178     assert(EndStatementAtEOFStack.empty());
1179   }
1180 
1181   return *tok;
1182 }
1183 
peekTok(bool ShouldSkipSpace)1184 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1185   AsmToken Tok;
1186 
1187   MutableArrayRef<AsmToken> Buf(Tok);
1188   size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1189 
1190   if (ReadCount == 0) {
1191     // If this is the end of an included file, pop the parent file off the
1192     // include stack.
1193     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1194     if (ParentIncludeLoc != SMLoc()) {
1195       EndStatementAtEOFStack.pop_back();
1196       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1197       return peekTok(ShouldSkipSpace);
1198     }
1199     EndStatementAtEOFStack.pop_back();
1200     assert(EndStatementAtEOFStack.empty());
1201   }
1202 
1203   assert(ReadCount == 1);
1204   return Tok;
1205 }
1206 
Run(bool NoInitialTextSection,bool NoFinalize)1207 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1208   // Create the initial section, if requested.
1209   if (!NoInitialTextSection)
1210     Out.initSections(false, getTargetParser().getSTI());
1211 
1212   // Prime the lexer.
1213   Lex();
1214 
1215   HadError = false;
1216   AsmCond StartingCondState = TheCondState;
1217   SmallVector<AsmRewrite, 4> AsmStrRewrites;
1218 
1219   // While we have input, parse each statement.
1220   while (Lexer.isNot(AsmToken::Eof) ||
1221          SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1222     // Skip through the EOF at the end of an inclusion.
1223     if (Lexer.is(AsmToken::Eof))
1224       Lex();
1225 
1226     ParseStatementInfo Info(&AsmStrRewrites);
1227     bool HasError = parseStatement(Info, nullptr);
1228 
1229     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1230     // for printing ErrMsg via Lex() only if no (presumably better) parser error
1231     // exists.
1232     if (HasError && !hasPendingError() && Lexer.getTok().is(AsmToken::Error))
1233       Lex();
1234 
1235     // parseStatement returned true so may need to emit an error.
1236     printPendingErrors();
1237 
1238     // Skipping to the next line if needed.
1239     if (HasError && !getLexer().justConsumedEOL())
1240       eatToEndOfStatement();
1241   }
1242 
1243   printPendingErrors();
1244 
1245   // All errors should have been emitted.
1246   assert(!hasPendingError() && "unexpected error from parseStatement");
1247 
1248   if (TheCondState.TheCond != StartingCondState.TheCond ||
1249       TheCondState.Ignore != StartingCondState.Ignore)
1250     printError(getTok().getLoc(), "unmatched .ifs or .elses");
1251 
1252   // Check to see that all assembler local symbols were actually defined.
1253   // Targets that don't do subsections via symbols may not want this, though,
1254   // so conservatively exclude them. Only do this if we're finalizing, though,
1255   // as otherwise we won't necessarily have seen everything yet.
1256   if (!NoFinalize) {
1257     // Temporary symbols like the ones for directional jumps don't go in the
1258     // symbol table. They also need to be diagnosed in all (final) cases.
1259     for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1260       if (std::get<2>(LocSym)->isUndefined()) {
1261         // Reset the state of any "# line file" directives we've seen to the
1262         // context as it was at the diagnostic site.
1263         CppHashInfo = std::get<1>(LocSym);
1264         printError(std::get<0>(LocSym), "directional label undefined");
1265       }
1266     }
1267   }
1268 
1269   // Finalize the output stream if there are no errors and if the client wants
1270   // us to.
1271   if (!HadError && !NoFinalize)
1272     Out.finish(Lexer.getLoc());
1273 
1274   return HadError || getContext().hadError();
1275 }
1276 
checkForValidSection()1277 bool MasmParser::checkForValidSection() {
1278   if (!ParsingMSInlineAsm && !(getStreamer().getCurrentFragment() &&
1279                                getStreamer().getCurrentSectionOnly())) {
1280     Out.initSections(false, getTargetParser().getSTI());
1281     return Error(getTok().getLoc(),
1282                  "expected section directive before assembly directive");
1283   }
1284   return false;
1285 }
1286 
1287 /// Throw away the rest of the line for testing purposes.
eatToEndOfStatement()1288 void MasmParser::eatToEndOfStatement() {
1289   while (Lexer.isNot(AsmToken::EndOfStatement)) {
1290     if (Lexer.is(AsmToken::Eof)) {
1291       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1292       if (ParentIncludeLoc == SMLoc()) {
1293         break;
1294       }
1295 
1296       EndStatementAtEOFStack.pop_back();
1297       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1298     }
1299 
1300     Lexer.Lex();
1301   }
1302 
1303   // Eat EOL.
1304   if (Lexer.is(AsmToken::EndOfStatement))
1305     Lexer.Lex();
1306 }
1307 
1308 SmallVector<StringRef, 1>
parseStringRefsTo(AsmToken::TokenKind EndTok)1309 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1310   SmallVector<StringRef, 1> Refs;
1311   const char *Start = getTok().getLoc().getPointer();
1312   while (Lexer.isNot(EndTok)) {
1313     if (Lexer.is(AsmToken::Eof)) {
1314       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1315       if (ParentIncludeLoc == SMLoc()) {
1316         break;
1317       }
1318       Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1319 
1320       EndStatementAtEOFStack.pop_back();
1321       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1322       Lexer.Lex();
1323       Start = getTok().getLoc().getPointer();
1324     } else {
1325       Lexer.Lex();
1326     }
1327   }
1328   Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1329   return Refs;
1330 }
1331 
parseStringTo(AsmToken::TokenKind EndTok)1332 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1333   SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1334   std::string Str;
1335   for (StringRef S : Refs) {
1336     Str.append(S.str());
1337   }
1338   return Str;
1339 }
1340 
parseStringToEndOfStatement()1341 StringRef MasmParser::parseStringToEndOfStatement() {
1342   const char *Start = getTok().getLoc().getPointer();
1343 
1344   while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1345     Lexer.Lex();
1346 
1347   const char *End = getTok().getLoc().getPointer();
1348   return StringRef(Start, End - Start);
1349 }
1350 
1351 /// Parse a paren expression and return it.
1352 /// NOTE: This assumes the leading '(' has already been consumed.
1353 ///
1354 /// parenexpr ::= expr)
1355 ///
parseParenExpr(const MCExpr * & Res,SMLoc & EndLoc)1356 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1357   if (parseExpression(Res))
1358     return true;
1359   EndLoc = Lexer.getTok().getEndLoc();
1360   return parseRParen();
1361 }
1362 
1363 /// Parse a bracket expression and return it.
1364 /// NOTE: This assumes the leading '[' has already been consumed.
1365 ///
1366 /// bracketexpr ::= expr]
1367 ///
parseBracketExpr(const MCExpr * & Res,SMLoc & EndLoc)1368 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1369   if (parseExpression(Res))
1370     return true;
1371   EndLoc = getTok().getEndLoc();
1372   if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1373     return true;
1374   return false;
1375 }
1376 
1377 /// Parse a primary expression and return it.
1378 ///  primaryexpr ::= (parenexpr
1379 ///  primaryexpr ::= symbol
1380 ///  primaryexpr ::= number
1381 ///  primaryexpr ::= '.'
1382 ///  primaryexpr ::= ~,+,-,'not' primaryexpr
1383 ///  primaryexpr ::= string
1384 ///          (a string is interpreted as a 64-bit number in big-endian base-256)
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc,AsmTypeInfo * TypeInfo)1385 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1386                                   AsmTypeInfo *TypeInfo) {
1387   SMLoc FirstTokenLoc = getLexer().getLoc();
1388   AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1389   switch (FirstTokenKind) {
1390   default:
1391     return TokError("unknown token in expression");
1392   // If we have an error assume that we've already handled it.
1393   case AsmToken::Error:
1394     return true;
1395   case AsmToken::Exclaim:
1396     Lex(); // Eat the operator.
1397     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1398       return true;
1399     Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1400     return false;
1401   case AsmToken::Dollar:
1402   case AsmToken::At:
1403   case AsmToken::Identifier: {
1404     StringRef Identifier;
1405     if (parseIdentifier(Identifier)) {
1406       // We may have failed but $ may be a valid token.
1407       if (getTok().is(AsmToken::Dollar)) {
1408         if (Lexer.getMAI().getDollarIsPC()) {
1409           Lex();
1410           // This is a '$' reference, which references the current PC.  Emit a
1411           // temporary label to the streamer and refer to it.
1412           MCSymbol *Sym = Ctx.createTempSymbol();
1413           Out.emitLabel(Sym);
1414           Res = MCSymbolRefExpr::create(Sym, getContext());
1415           EndLoc = FirstTokenLoc;
1416           return false;
1417         }
1418         return Error(FirstTokenLoc, "invalid token in expression");
1419       }
1420     }
1421     // Parse named bitwise negation.
1422     if (Identifier.equals_insensitive("not")) {
1423       if (parsePrimaryExpr(Res, EndLoc, nullptr))
1424         return true;
1425       Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1426       return false;
1427     }
1428     // Parse directional local label references.
1429     if (Identifier.equals_insensitive("@b") ||
1430         Identifier.equals_insensitive("@f")) {
1431       bool Before = Identifier.equals_insensitive("@b");
1432       MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1433       if (Before && Sym->isUndefined())
1434         return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1435       Res = MCSymbolRefExpr::create(Sym, getContext());
1436       return false;
1437     }
1438 
1439     EndLoc = SMLoc::getFromPointer(Identifier.end());
1440 
1441     // This is a symbol reference.
1442     StringRef SymbolName = Identifier;
1443     if (SymbolName.empty())
1444       return Error(getLexer().getLoc(), "expected a symbol reference");
1445 
1446     // Find the field offset if used.
1447     AsmFieldInfo Info;
1448     auto Split = SymbolName.split('.');
1449     if (Split.second.empty()) {
1450     } else {
1451       SymbolName = Split.first;
1452       if (lookUpField(SymbolName, Split.second, Info)) {
1453         std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1454         StringRef Base = BaseMember.first, Member = BaseMember.second;
1455         lookUpField(Base, Member, Info);
1456       } else if (Structs.count(SymbolName.lower())) {
1457         // This is actually a reference to a field offset.
1458         Res = MCConstantExpr::create(Info.Offset, getContext());
1459         return false;
1460       }
1461     }
1462 
1463     MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1464     if (!Sym) {
1465       // If this is a built-in numeric value, treat it as a constant.
1466       auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1467       const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1468                                        ? BI_NO_SYMBOL
1469                                        : BuiltinIt->getValue();
1470       if (Symbol != BI_NO_SYMBOL) {
1471         const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1472         if (Value) {
1473           Res = Value;
1474           return false;
1475         }
1476       }
1477 
1478       // Variables use case-insensitive symbol names; if this is a variable, we
1479       // find the symbol using its canonical name.
1480       auto VarIt = Variables.find(SymbolName.lower());
1481       if (VarIt != Variables.end())
1482         SymbolName = VarIt->second.Name;
1483       Sym = getContext().parseSymbol(SymbolName);
1484     }
1485 
1486     // If this is an absolute variable reference, substitute it now to preserve
1487     // semantics in the face of reassignment.
1488     if (Sym->isVariable()) {
1489       auto V = Sym->getVariableValue();
1490       bool DoInline = isa<MCConstantExpr>(V);
1491       if (auto TV = dyn_cast<MCTargetExpr>(V))
1492         DoInline = TV->inlineAssignedExpr();
1493       if (DoInline) {
1494         Res = Sym->getVariableValue();
1495         return false;
1496       }
1497     }
1498 
1499     // Otherwise create a symbol ref.
1500     const MCExpr *SymRef =
1501         MCSymbolRefExpr::create(Sym, getContext(), FirstTokenLoc);
1502     if (Info.Offset) {
1503       Res = MCBinaryExpr::create(
1504           MCBinaryExpr::Add, SymRef,
1505           MCConstantExpr::create(Info.Offset, getContext()), getContext());
1506     } else {
1507       Res = SymRef;
1508     }
1509     if (TypeInfo) {
1510       if (Info.Type.Name.empty()) {
1511         auto TypeIt = KnownType.find(Identifier.lower());
1512         if (TypeIt != KnownType.end()) {
1513           Info.Type = TypeIt->second;
1514         }
1515       }
1516 
1517       *TypeInfo = Info.Type;
1518     }
1519     return false;
1520   }
1521   case AsmToken::BigNum:
1522     return TokError("literal value out of range for directive");
1523   case AsmToken::Integer: {
1524     int64_t IntVal = getTok().getIntVal();
1525     Res = MCConstantExpr::create(IntVal, getContext());
1526     EndLoc = Lexer.getTok().getEndLoc();
1527     Lex(); // Eat token.
1528     return false;
1529   }
1530   case AsmToken::String: {
1531     // MASM strings (used as constants) are interpreted as big-endian base-256.
1532     SMLoc ValueLoc = getTok().getLoc();
1533     std::string Value;
1534     if (parseEscapedString(Value))
1535       return true;
1536     if (Value.size() > 8)
1537       return Error(ValueLoc, "literal value out of range");
1538     uint64_t IntValue = 0;
1539     for (const unsigned char CharVal : Value)
1540       IntValue = (IntValue << 8) | CharVal;
1541     Res = MCConstantExpr::create(IntValue, getContext());
1542     return false;
1543   }
1544   case AsmToken::Real: {
1545     APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1546     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1547     Res = MCConstantExpr::create(IntVal, getContext());
1548     EndLoc = Lexer.getTok().getEndLoc();
1549     Lex(); // Eat token.
1550     return false;
1551   }
1552   case AsmToken::Dot: {
1553     // This is a '.' reference, which references the current PC.  Emit a
1554     // temporary label to the streamer and refer to it.
1555     MCSymbol *Sym = Ctx.createTempSymbol();
1556     Out.emitLabel(Sym);
1557     Res = MCSymbolRefExpr::create(Sym, getContext());
1558     EndLoc = Lexer.getTok().getEndLoc();
1559     Lex(); // Eat identifier.
1560     return false;
1561   }
1562   case AsmToken::LParen:
1563     Lex(); // Eat the '('.
1564     return parseParenExpr(Res, EndLoc);
1565   case AsmToken::LBrac:
1566     if (!PlatformParser->HasBracketExpressions())
1567       return TokError("brackets expression not supported on this target");
1568     Lex(); // Eat the '['.
1569     return parseBracketExpr(Res, EndLoc);
1570   case AsmToken::Minus:
1571     Lex(); // Eat the operator.
1572     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1573       return true;
1574     Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1575     return false;
1576   case AsmToken::Plus:
1577     Lex(); // Eat the operator.
1578     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1579       return true;
1580     Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1581     return false;
1582   case AsmToken::Tilde:
1583     Lex(); // Eat the operator.
1584     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1585       return true;
1586     Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1587     return false;
1588   }
1589 }
1590 
parseExpression(const MCExpr * & Res)1591 bool MasmParser::parseExpression(const MCExpr *&Res) {
1592   SMLoc EndLoc;
1593   return parseExpression(Res, EndLoc);
1594 }
1595 
1596 /// This function checks if the next token is <string> type or arithmetic.
1597 /// string that begin with character '<' must end with character '>'.
1598 /// otherwise it is arithmetics.
1599 /// If the function returns a 'true' value,
1600 /// the End argument will be filled with the last location pointed to the '>'
1601 /// character.
isAngleBracketString(SMLoc & StrLoc,SMLoc & EndLoc)1602 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1603   assert((StrLoc.getPointer() != nullptr) &&
1604          "Argument to the function cannot be a NULL value");
1605   const char *CharPtr = StrLoc.getPointer();
1606   while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1607          (*CharPtr != '\0')) {
1608     if (*CharPtr == '!')
1609       CharPtr++;
1610     CharPtr++;
1611   }
1612   if (*CharPtr == '>') {
1613     EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1614     return true;
1615   }
1616   return false;
1617 }
1618 
1619 /// creating a string without the escape characters '!'.
angleBracketString(StringRef BracketContents)1620 static std::string angleBracketString(StringRef BracketContents) {
1621   std::string Res;
1622   for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1623     if (BracketContents[Pos] == '!')
1624       Pos++;
1625     Res += BracketContents[Pos];
1626   }
1627   return Res;
1628 }
1629 
1630 /// Parse an expression and return it.
1631 ///
1632 ///  expr ::= expr &&,|| expr               -> lowest.
1633 ///  expr ::= expr |,^,&,! expr
1634 ///  expr ::= expr ==,!=,<>,<,<=,>,>= expr
1635 ///  expr ::= expr <<,>> expr
1636 ///  expr ::= expr +,- expr
1637 ///  expr ::= expr *,/,% expr               -> highest.
1638 ///  expr ::= primaryexpr
1639 ///
parseExpression(const MCExpr * & Res,SMLoc & EndLoc)1640 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1641   // Parse the expression.
1642   Res = nullptr;
1643   if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1644       parseBinOpRHS(1, Res, EndLoc))
1645     return true;
1646 
1647   // Try to constant fold it up front, if possible. Do not exploit
1648   // assembler here.
1649   int64_t Value;
1650   if (Res->evaluateAsAbsolute(Value))
1651     Res = MCConstantExpr::create(Value, getContext());
1652 
1653   return false;
1654 }
1655 
parseParenExpression(const MCExpr * & Res,SMLoc & EndLoc)1656 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1657   Res = nullptr;
1658   return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1659 }
1660 
parseAbsoluteExpression(int64_t & Res)1661 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1662   const MCExpr *Expr;
1663 
1664   SMLoc StartLoc = Lexer.getLoc();
1665   if (parseExpression(Expr))
1666     return true;
1667 
1668   if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1669     return Error(StartLoc, "expected absolute expression");
1670 
1671   return false;
1672 }
1673 
getGNUBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind,bool ShouldUseLogicalShr,bool EndExpressionAtGreater)1674 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1675                                       MCBinaryExpr::Opcode &Kind,
1676                                       bool ShouldUseLogicalShr,
1677                                       bool EndExpressionAtGreater) {
1678   switch (K) {
1679   default:
1680     return 0; // not a binop.
1681 
1682   // Lowest Precedence: &&, ||
1683   case AsmToken::AmpAmp:
1684     Kind = MCBinaryExpr::LAnd;
1685     return 2;
1686   case AsmToken::PipePipe:
1687     Kind = MCBinaryExpr::LOr;
1688     return 1;
1689 
1690   // Low Precedence: ==, !=, <>, <, <=, >, >=
1691   case AsmToken::EqualEqual:
1692     Kind = MCBinaryExpr::EQ;
1693     return 3;
1694   case AsmToken::ExclaimEqual:
1695   case AsmToken::LessGreater:
1696     Kind = MCBinaryExpr::NE;
1697     return 3;
1698   case AsmToken::Less:
1699     Kind = MCBinaryExpr::LT;
1700     return 3;
1701   case AsmToken::LessEqual:
1702     Kind = MCBinaryExpr::LTE;
1703     return 3;
1704   case AsmToken::Greater:
1705     if (EndExpressionAtGreater)
1706       return 0;
1707     Kind = MCBinaryExpr::GT;
1708     return 3;
1709   case AsmToken::GreaterEqual:
1710     Kind = MCBinaryExpr::GTE;
1711     return 3;
1712 
1713   // Low Intermediate Precedence: +, -
1714   case AsmToken::Plus:
1715     Kind = MCBinaryExpr::Add;
1716     return 4;
1717   case AsmToken::Minus:
1718     Kind = MCBinaryExpr::Sub;
1719     return 4;
1720 
1721   // High Intermediate Precedence: |, &, ^
1722   case AsmToken::Pipe:
1723     Kind = MCBinaryExpr::Or;
1724     return 5;
1725   case AsmToken::Caret:
1726     Kind = MCBinaryExpr::Xor;
1727     return 5;
1728   case AsmToken::Amp:
1729     Kind = MCBinaryExpr::And;
1730     return 5;
1731 
1732   // Highest Precedence: *, /, %, <<, >>
1733   case AsmToken::Star:
1734     Kind = MCBinaryExpr::Mul;
1735     return 6;
1736   case AsmToken::Slash:
1737     Kind = MCBinaryExpr::Div;
1738     return 6;
1739   case AsmToken::Percent:
1740     Kind = MCBinaryExpr::Mod;
1741     return 6;
1742   case AsmToken::LessLess:
1743     Kind = MCBinaryExpr::Shl;
1744     return 6;
1745   case AsmToken::GreaterGreater:
1746     if (EndExpressionAtGreater)
1747       return 0;
1748     Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
1749     return 6;
1750   }
1751 }
1752 
getBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind)1753 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
1754                                         MCBinaryExpr::Opcode &Kind) {
1755   bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
1756   return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
1757                                AngleBracketDepth > 0);
1758 }
1759 
1760 /// Parse all binary operators with precedence >= 'Precedence'.
1761 /// Res contains the LHS of the expression on input.
parseBinOpRHS(unsigned Precedence,const MCExpr * & Res,SMLoc & EndLoc)1762 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
1763                                SMLoc &EndLoc) {
1764   SMLoc StartLoc = Lexer.getLoc();
1765   while (true) {
1766     AsmToken::TokenKind TokKind = Lexer.getKind();
1767     if (Lexer.getKind() == AsmToken::Identifier) {
1768       TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
1769                     .CaseLower("and", AsmToken::Amp)
1770                     .CaseLower("not", AsmToken::Exclaim)
1771                     .CaseLower("or", AsmToken::Pipe)
1772                     .CaseLower("xor", AsmToken::Caret)
1773                     .CaseLower("shl", AsmToken::LessLess)
1774                     .CaseLower("shr", AsmToken::GreaterGreater)
1775                     .CaseLower("eq", AsmToken::EqualEqual)
1776                     .CaseLower("ne", AsmToken::ExclaimEqual)
1777                     .CaseLower("lt", AsmToken::Less)
1778                     .CaseLower("le", AsmToken::LessEqual)
1779                     .CaseLower("gt", AsmToken::Greater)
1780                     .CaseLower("ge", AsmToken::GreaterEqual)
1781                     .Default(TokKind);
1782     }
1783     MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
1784     unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
1785 
1786     // If the next token is lower precedence than we are allowed to eat, return
1787     // successfully with what we ate already.
1788     if (TokPrec < Precedence)
1789       return false;
1790 
1791     Lex();
1792 
1793     // Eat the next primary expression.
1794     const MCExpr *RHS;
1795     if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
1796       return true;
1797 
1798     // If BinOp binds less tightly with RHS than the operator after RHS, let
1799     // the pending operator take RHS as its LHS.
1800     MCBinaryExpr::Opcode Dummy;
1801     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
1802     if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
1803       return true;
1804 
1805     // Merge LHS and RHS according to operator.
1806     Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
1807   }
1808 }
1809 
1810 /// ParseStatement:
1811 ///   ::= % statement
1812 ///   ::= EndOfStatement
1813 ///   ::= Label* Directive ...Operands... EndOfStatement
1814 ///   ::= Label* Identifier OperandList* EndOfStatement
parseStatement(ParseStatementInfo & Info,MCAsmParserSemaCallback * SI)1815 bool MasmParser::parseStatement(ParseStatementInfo &Info,
1816                                 MCAsmParserSemaCallback *SI) {
1817   assert(!hasPendingError() && "parseStatement started with pending error");
1818   // Eat initial spaces and comments.
1819   while (Lexer.is(AsmToken::Space))
1820     Lex();
1821   if (Lexer.is(AsmToken::EndOfStatement)) {
1822     // If this is a line comment we can drop it safely.
1823     if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
1824         getTok().getString().front() == '\n')
1825       Out.addBlankLine();
1826     Lex();
1827     return false;
1828   }
1829 
1830   // If preceded by an expansion operator, first expand all text macros and
1831   // macro functions.
1832   if (getTok().is(AsmToken::Percent)) {
1833     SMLoc ExpansionLoc = getTok().getLoc();
1834     if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
1835       return true;
1836   }
1837 
1838   // Statements always start with an identifier, unless we're dealing with a
1839   // processor directive (.386, .686, etc.) that lexes as a real.
1840   AsmToken ID = getTok();
1841   SMLoc IDLoc = ID.getLoc();
1842   StringRef IDVal;
1843   if (Lexer.is(AsmToken::HashDirective))
1844     return parseCppHashLineFilenameComment(IDLoc);
1845   if (Lexer.is(AsmToken::Dot)) {
1846     // Treat '.' as a valid identifier in this context.
1847     Lex();
1848     IDVal = ".";
1849   } else if (Lexer.is(AsmToken::Real)) {
1850     // Treat ".<number>" as a valid identifier in this context.
1851     IDVal = getTok().getString();
1852     Lex(); // always eat a token
1853     if (!IDVal.starts_with("."))
1854       return Error(IDLoc, "unexpected token at start of statement");
1855   } else if (parseIdentifier(IDVal, StartOfStatement)) {
1856     if (!TheCondState.Ignore) {
1857       Lex(); // always eat a token
1858       return Error(IDLoc, "unexpected token at start of statement");
1859     }
1860     IDVal = "";
1861   }
1862 
1863   // Handle conditional assembly here before checking for skipping.  We
1864   // have to do this so that .endif isn't skipped in a ".if 0" block for
1865   // example.
1866   StringMap<DirectiveKind>::const_iterator DirKindIt =
1867       DirectiveKindMap.find(IDVal.lower());
1868   DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
1869                               ? DK_NO_DIRECTIVE
1870                               : DirKindIt->getValue();
1871   switch (DirKind) {
1872   default:
1873     break;
1874   case DK_IF:
1875   case DK_IFE:
1876     return parseDirectiveIf(IDLoc, DirKind);
1877   case DK_IFB:
1878     return parseDirectiveIfb(IDLoc, true);
1879   case DK_IFNB:
1880     return parseDirectiveIfb(IDLoc, false);
1881   case DK_IFDEF:
1882     return parseDirectiveIfdef(IDLoc, true);
1883   case DK_IFNDEF:
1884     return parseDirectiveIfdef(IDLoc, false);
1885   case DK_IFDIF:
1886     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
1887                                /*CaseInsensitive=*/false);
1888   case DK_IFDIFI:
1889     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
1890                                /*CaseInsensitive=*/true);
1891   case DK_IFIDN:
1892     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
1893                                /*CaseInsensitive=*/false);
1894   case DK_IFIDNI:
1895     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
1896                                /*CaseInsensitive=*/true);
1897   case DK_ELSEIF:
1898   case DK_ELSEIFE:
1899     return parseDirectiveElseIf(IDLoc, DirKind);
1900   case DK_ELSEIFB:
1901     return parseDirectiveElseIfb(IDLoc, true);
1902   case DK_ELSEIFNB:
1903     return parseDirectiveElseIfb(IDLoc, false);
1904   case DK_ELSEIFDEF:
1905     return parseDirectiveElseIfdef(IDLoc, true);
1906   case DK_ELSEIFNDEF:
1907     return parseDirectiveElseIfdef(IDLoc, false);
1908   case DK_ELSEIFDIF:
1909     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
1910                                    /*CaseInsensitive=*/false);
1911   case DK_ELSEIFDIFI:
1912     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
1913                                    /*CaseInsensitive=*/true);
1914   case DK_ELSEIFIDN:
1915     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
1916                                    /*CaseInsensitive=*/false);
1917   case DK_ELSEIFIDNI:
1918     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
1919                                    /*CaseInsensitive=*/true);
1920   case DK_ELSE:
1921     return parseDirectiveElse(IDLoc);
1922   case DK_ENDIF:
1923     return parseDirectiveEndIf(IDLoc);
1924   }
1925 
1926   // Ignore the statement if in the middle of inactive conditional
1927   // (e.g. ".if 0").
1928   if (TheCondState.Ignore) {
1929     eatToEndOfStatement();
1930     return false;
1931   }
1932 
1933   // FIXME: Recurse on local labels?
1934 
1935   // Check for a label.
1936   //   ::= identifier ':'
1937   //   ::= number ':'
1938   if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
1939     if (checkForValidSection())
1940       return true;
1941 
1942     // identifier ':'   -> Label.
1943     Lex();
1944 
1945     // Diagnose attempt to use '.' as a label.
1946     if (IDVal == ".")
1947       return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
1948 
1949     // Diagnose attempt to use a variable as a label.
1950     //
1951     // FIXME: Diagnostics. Note the location of the definition as a label.
1952     // FIXME: This doesn't diagnose assignment to a symbol which has been
1953     // implicitly marked as external.
1954     MCSymbol *Sym;
1955     if (ParsingMSInlineAsm && SI) {
1956       StringRef RewrittenLabel =
1957           SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
1958       assert(!RewrittenLabel.empty() &&
1959              "We should have an internal name here.");
1960       Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
1961                                      RewrittenLabel);
1962       IDVal = RewrittenLabel;
1963     }
1964     // Handle directional local labels
1965     if (IDVal == "@@") {
1966       Sym = Ctx.createDirectionalLocalSymbol(0);
1967     } else {
1968       Sym = getContext().parseSymbol(IDVal);
1969     }
1970 
1971     // End of Labels should be treated as end of line for lexing
1972     // purposes but that information is not available to the Lexer who
1973     // does not understand Labels. This may cause us to see a Hash
1974     // here instead of a preprocessor line comment.
1975     if (getTok().is(AsmToken::Hash)) {
1976       std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
1977       Lexer.Lex();
1978       Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
1979     }
1980 
1981     // Consume any end of statement token, if present, to avoid spurious
1982     // addBlankLine calls().
1983     if (getTok().is(AsmToken::EndOfStatement)) {
1984       Lex();
1985     }
1986 
1987     // Emit the label.
1988     if (!getTargetParser().isParsingMSInlineAsm())
1989       Out.emitLabel(Sym, IDLoc);
1990     return false;
1991   }
1992 
1993   // If macros are enabled, check to see if this is a macro instantiation.
1994   if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
1995     AsmToken::TokenKind ArgumentEndTok = parseOptionalToken(AsmToken::LParen)
1996                                              ? AsmToken::RParen
1997                                              : AsmToken::EndOfStatement;
1998     return handleMacroEntry(M, IDLoc, ArgumentEndTok);
1999   }
2000 
2001   // Otherwise, we have a normal instruction or directive.
2002 
2003   if (DirKind != DK_NO_DIRECTIVE) {
2004     // There are several entities interested in parsing directives:
2005     //
2006     // 1. Asm parser extensions. For example, platform-specific parsers
2007     //    (like the ELF parser) register themselves as extensions.
2008     // 2. The target-specific assembly parser. Some directives are target
2009     //    specific or may potentially behave differently on certain targets.
2010     // 3. The generic directive parser implemented by this class. These are
2011     //    all the directives that behave in a target and platform independent
2012     //    manner, or at least have a default behavior that's shared between
2013     //    all targets and platforms.
2014 
2015     // Special-case handling of structure-end directives at higher priority,
2016     // since ENDS is overloaded as a segment-end directive.
2017     if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2018         getTok().is(AsmToken::EndOfStatement)) {
2019       return parseDirectiveNestedEnds();
2020     }
2021 
2022     // First, check the extension directive map to see if any extension has
2023     // registered itself to parse this directive.
2024     std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2025         ExtensionDirectiveMap.lookup(IDVal.lower());
2026     if (Handler.first)
2027       return (*Handler.second)(Handler.first, IDVal, IDLoc);
2028 
2029     // Next, let the target-specific assembly parser try.
2030     if (ID.isNot(AsmToken::Identifier))
2031       return false;
2032 
2033     ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2034     assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2035            "Should only return Failure iff there was an error");
2036     if (TPDirectiveReturn.isFailure())
2037       return true;
2038     if (TPDirectiveReturn.isSuccess())
2039       return false;
2040 
2041     // Finally, if no one else is interested in this directive, it must be
2042     // generic and familiar to this class.
2043     switch (DirKind) {
2044     default:
2045       break;
2046     case DK_ASCII:
2047       return parseDirectiveAscii(IDVal, false);
2048     case DK_ASCIZ:
2049     case DK_STRING:
2050       return parseDirectiveAscii(IDVal, true);
2051     case DK_BYTE:
2052     case DK_SBYTE:
2053     case DK_DB:
2054       return parseDirectiveValue(IDVal, 1);
2055     case DK_WORD:
2056     case DK_SWORD:
2057     case DK_DW:
2058       return parseDirectiveValue(IDVal, 2);
2059     case DK_DWORD:
2060     case DK_SDWORD:
2061     case DK_DD:
2062       return parseDirectiveValue(IDVal, 4);
2063     case DK_FWORD:
2064     case DK_DF:
2065       return parseDirectiveValue(IDVal, 6);
2066     case DK_QWORD:
2067     case DK_SQWORD:
2068     case DK_DQ:
2069       return parseDirectiveValue(IDVal, 8);
2070     case DK_REAL4:
2071       return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2072     case DK_REAL8:
2073       return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2074     case DK_REAL10:
2075       return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2076     case DK_STRUCT:
2077     case DK_UNION:
2078       return parseDirectiveNestedStruct(IDVal, DirKind);
2079     case DK_ENDS:
2080       return parseDirectiveNestedEnds();
2081     case DK_ALIGN:
2082       return parseDirectiveAlign();
2083     case DK_EVEN:
2084       return parseDirectiveEven();
2085     case DK_ORG:
2086       return parseDirectiveOrg();
2087     case DK_EXTERN:
2088       return parseDirectiveExtern();
2089     case DK_PUBLIC:
2090       return parseDirectiveSymbolAttribute(MCSA_Global);
2091     case DK_COMM:
2092       return parseDirectiveComm(/*IsLocal=*/false);
2093     case DK_COMMENT:
2094       return parseDirectiveComment(IDLoc);
2095     case DK_INCLUDE:
2096       return parseDirectiveInclude();
2097     case DK_REPEAT:
2098       return parseDirectiveRepeat(IDLoc, IDVal);
2099     case DK_WHILE:
2100       return parseDirectiveWhile(IDLoc);
2101     case DK_FOR:
2102       return parseDirectiveFor(IDLoc, IDVal);
2103     case DK_FORC:
2104       return parseDirectiveForc(IDLoc, IDVal);
2105     case DK_EXITM:
2106       Info.ExitValue = "";
2107       return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2108     case DK_ENDM:
2109       Info.ExitValue = "";
2110       return parseDirectiveEndMacro(IDVal);
2111     case DK_PURGE:
2112       return parseDirectivePurgeMacro(IDLoc);
2113     case DK_END:
2114       return parseDirectiveEnd(IDLoc);
2115     case DK_ERR:
2116       return parseDirectiveError(IDLoc);
2117     case DK_ERRB:
2118       return parseDirectiveErrorIfb(IDLoc, true);
2119     case DK_ERRNB:
2120       return parseDirectiveErrorIfb(IDLoc, false);
2121     case DK_ERRDEF:
2122       return parseDirectiveErrorIfdef(IDLoc, true);
2123     case DK_ERRNDEF:
2124       return parseDirectiveErrorIfdef(IDLoc, false);
2125     case DK_ERRDIF:
2126       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2127                                       /*CaseInsensitive=*/false);
2128     case DK_ERRDIFI:
2129       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2130                                       /*CaseInsensitive=*/true);
2131     case DK_ERRIDN:
2132       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2133                                       /*CaseInsensitive=*/false);
2134     case DK_ERRIDNI:
2135       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2136                                       /*CaseInsensitive=*/true);
2137     case DK_ERRE:
2138       return parseDirectiveErrorIfe(IDLoc, true);
2139     case DK_ERRNZ:
2140       return parseDirectiveErrorIfe(IDLoc, false);
2141     case DK_RADIX:
2142       return parseDirectiveRadix(IDLoc);
2143     case DK_ECHO:
2144       return parseDirectiveEcho(IDLoc);
2145     }
2146 
2147     return Error(IDLoc, "unknown directive");
2148   }
2149 
2150   // We also check if this is allocating memory with user-defined type.
2151   auto IDIt = Structs.find(IDVal.lower());
2152   if (IDIt != Structs.end())
2153     return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2154                                      IDLoc);
2155 
2156   // Non-conditional Microsoft directives sometimes follow their first argument.
2157   const AsmToken nextTok = getTok();
2158   const StringRef nextVal = nextTok.getString();
2159   const SMLoc nextLoc = nextTok.getLoc();
2160 
2161   const AsmToken afterNextTok = peekTok();
2162 
2163   // There are several entities interested in parsing infix directives:
2164   //
2165   // 1. Asm parser extensions. For example, platform-specific parsers
2166   //    (like the ELF parser) register themselves as extensions.
2167   // 2. The generic directive parser implemented by this class. These are
2168   //    all the directives that behave in a target and platform independent
2169   //    manner, or at least have a default behavior that's shared between
2170   //    all targets and platforms.
2171 
2172   getTargetParser().flushPendingInstructions(getStreamer());
2173 
2174   // Special-case handling of structure-end directives at higher priority, since
2175   // ENDS is overloaded as a segment-end directive.
2176   if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2177     Lex();
2178     return parseDirectiveEnds(IDVal, IDLoc);
2179   }
2180 
2181   // First, check the extension directive map to see if any extension has
2182   // registered itself to parse this directive.
2183   std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2184       ExtensionDirectiveMap.lookup(nextVal.lower());
2185   if (Handler.first) {
2186     Lex();
2187     Lexer.UnLex(ID);
2188     return (*Handler.second)(Handler.first, nextVal, nextLoc);
2189   }
2190 
2191   // If no one else is interested in this directive, it must be
2192   // generic and familiar to this class.
2193   DirKindIt = DirectiveKindMap.find(nextVal.lower());
2194   DirKind = (DirKindIt == DirectiveKindMap.end())
2195                 ? DK_NO_DIRECTIVE
2196                 : DirKindIt->getValue();
2197   switch (DirKind) {
2198   default:
2199     break;
2200   case DK_ASSIGN:
2201   case DK_EQU:
2202   case DK_TEXTEQU:
2203     Lex();
2204     return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2205   case DK_BYTE:
2206     if (afterNextTok.is(AsmToken::Identifier) &&
2207         afterNextTok.getString().equals_insensitive("ptr")) {
2208       // Size directive; part of an instruction.
2209       break;
2210     }
2211     [[fallthrough]];
2212   case DK_SBYTE:
2213   case DK_DB:
2214     Lex();
2215     return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2216   case DK_WORD:
2217     if (afterNextTok.is(AsmToken::Identifier) &&
2218         afterNextTok.getString().equals_insensitive("ptr")) {
2219       // Size directive; part of an instruction.
2220       break;
2221     }
2222     [[fallthrough]];
2223   case DK_SWORD:
2224   case DK_DW:
2225     Lex();
2226     return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2227   case DK_DWORD:
2228     if (afterNextTok.is(AsmToken::Identifier) &&
2229         afterNextTok.getString().equals_insensitive("ptr")) {
2230       // Size directive; part of an instruction.
2231       break;
2232     }
2233     [[fallthrough]];
2234   case DK_SDWORD:
2235   case DK_DD:
2236     Lex();
2237     return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2238   case DK_FWORD:
2239     if (afterNextTok.is(AsmToken::Identifier) &&
2240         afterNextTok.getString().equals_insensitive("ptr")) {
2241       // Size directive; part of an instruction.
2242       break;
2243     }
2244     [[fallthrough]];
2245   case DK_DF:
2246     Lex();
2247     return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2248   case DK_QWORD:
2249     if (afterNextTok.is(AsmToken::Identifier) &&
2250         afterNextTok.getString().equals_insensitive("ptr")) {
2251       // Size directive; part of an instruction.
2252       break;
2253     }
2254     [[fallthrough]];
2255   case DK_SQWORD:
2256   case DK_DQ:
2257     Lex();
2258     return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2259   case DK_REAL4:
2260     Lex();
2261     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2262                                         IDVal, IDLoc);
2263   case DK_REAL8:
2264     Lex();
2265     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2266                                         IDVal, IDLoc);
2267   case DK_REAL10:
2268     Lex();
2269     return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2270                                         10, IDVal, IDLoc);
2271   case DK_STRUCT:
2272   case DK_UNION:
2273     Lex();
2274     return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2275   case DK_ENDS:
2276     Lex();
2277     return parseDirectiveEnds(IDVal, IDLoc);
2278   case DK_MACRO:
2279     Lex();
2280     return parseDirectiveMacro(IDVal, IDLoc);
2281   }
2282 
2283   // Finally, we check if this is allocating a variable with user-defined type.
2284   auto NextIt = Structs.find(nextVal.lower());
2285   if (NextIt != Structs.end()) {
2286     Lex();
2287     return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2288                                           nextVal, nextLoc, IDVal);
2289   }
2290 
2291   // __asm _emit or __asm __emit
2292   if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2293                              IDVal == "_EMIT" || IDVal == "__EMIT"))
2294     return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2295 
2296   // __asm align
2297   if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2298     return parseDirectiveMSAlign(IDLoc, Info);
2299 
2300   if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2301     Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2302   if (checkForValidSection())
2303     return true;
2304 
2305   // Canonicalize the opcode to lower case.
2306   std::string OpcodeStr = IDVal.lower();
2307   ParseInstructionInfo IInfo(Info.AsmRewrites);
2308   bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID,
2309                                                           Info.ParsedOperands);
2310   Info.ParseError = ParseHadError;
2311 
2312   // Dump the parsed representation, if requested.
2313   if (getShowParsedOperands()) {
2314     SmallString<256> Str;
2315     raw_svector_ostream OS(Str);
2316     OS << "parsed instruction: [";
2317     for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2318       if (i != 0)
2319         OS << ", ";
2320       Info.ParsedOperands[i]->print(OS, MAI);
2321     }
2322     OS << "]";
2323 
2324     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2325   }
2326 
2327   // Fail even if ParseInstruction erroneously returns false.
2328   if (hasPendingError() || ParseHadError)
2329     return true;
2330 
2331   // If parsing succeeded, match the instruction.
2332   if (!ParseHadError) {
2333     uint64_t ErrorInfo;
2334     if (getTargetParser().matchAndEmitInstruction(
2335             IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2336             getTargetParser().isParsingMSInlineAsm()))
2337       return true;
2338   }
2339   return false;
2340 }
2341 
2342 // Parse and erase curly braces marking block start/end.
parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> & AsmStrRewrites)2343 bool MasmParser::parseCurlyBlockScope(
2344     SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2345   // Identify curly brace marking block start/end.
2346   if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2347     return false;
2348 
2349   SMLoc StartLoc = Lexer.getLoc();
2350   Lex(); // Eat the brace.
2351   if (Lexer.is(AsmToken::EndOfStatement))
2352     Lex(); // Eat EndOfStatement following the brace.
2353 
2354   // Erase the block start/end brace from the output asm string.
2355   AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2356                                                   StartLoc.getPointer());
2357   return true;
2358 }
2359 
2360 /// parseCppHashLineFilenameComment as this:
2361 ///   ::= # number "filename"
parseCppHashLineFilenameComment(SMLoc L)2362 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2363   Lex(); // Eat the hash token.
2364   // Lexer only ever emits HashDirective if it fully formed if it's
2365   // done the checking already so this is an internal error.
2366   assert(getTok().is(AsmToken::Integer) &&
2367          "Lexing Cpp line comment: Expected Integer");
2368   int64_t LineNumber = getTok().getIntVal();
2369   Lex();
2370   assert(getTok().is(AsmToken::String) &&
2371          "Lexing Cpp line comment: Expected String");
2372   StringRef Filename = getTok().getString();
2373   Lex();
2374 
2375   // Get rid of the enclosing quotes.
2376   Filename = Filename.substr(1, Filename.size() - 2);
2377 
2378   // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2379   // and possibly DWARF file info.
2380   CppHashInfo.Loc = L;
2381   CppHashInfo.Filename = Filename;
2382   CppHashInfo.LineNumber = LineNumber;
2383   CppHashInfo.Buf = CurBuffer;
2384   if (FirstCppHashFilename.empty())
2385     FirstCppHashFilename = Filename;
2386   return false;
2387 }
2388 
2389 /// will use the last parsed cpp hash line filename comment
2390 /// for the Filename and LineNo if any in the diagnostic.
DiagHandler(const SMDiagnostic & Diag,void * Context)2391 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2392   const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2393   raw_ostream &OS = errs();
2394 
2395   const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2396   SMLoc DiagLoc = Diag.getLoc();
2397   unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2398   unsigned CppHashBuf =
2399       Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2400 
2401   // Like SourceMgr::printMessage() we need to print the include stack if any
2402   // before printing the message.
2403   unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2404   if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2405       DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2406     SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2407     DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2408   }
2409 
2410   // If we have not parsed a cpp hash line filename comment or the source
2411   // manager changed or buffer changed (like in a nested include) then just
2412   // print the normal diagnostic using its Filename and LineNo.
2413   if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2414       DiagBuf != CppHashBuf) {
2415     if (Parser->SavedDiagHandler)
2416       Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2417     else
2418       Diag.print(nullptr, OS);
2419     return;
2420   }
2421 
2422   // Use the CppHashFilename and calculate a line number based on the
2423   // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2424   // for the diagnostic.
2425   const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2426 
2427   int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2428   int CppHashLocLineNo =
2429       Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2430   int LineNo =
2431       Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2432 
2433   SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2434                        Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2435                        Diag.getLineContents(), Diag.getRanges());
2436 
2437   if (Parser->SavedDiagHandler)
2438     Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2439   else
2440     NewDiag.print(nullptr, OS);
2441 }
2442 
2443 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2444 // not accept '.'.
isMacroParameterChar(char C)2445 static bool isMacroParameterChar(char C) {
2446   return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2447 }
2448 
expandMacro(raw_svector_ostream & OS,StringRef Body,ArrayRef<MCAsmMacroParameter> Parameters,ArrayRef<MCAsmMacroArgument> A,const std::vector<std::string> & Locals,SMLoc L)2449 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2450                              ArrayRef<MCAsmMacroParameter> Parameters,
2451                              ArrayRef<MCAsmMacroArgument> A,
2452                              const std::vector<std::string> &Locals, SMLoc L) {
2453   unsigned NParameters = Parameters.size();
2454   if (NParameters != A.size())
2455     return Error(L, "Wrong number of arguments");
2456   StringMap<std::string> LocalSymbols;
2457   std::string Name;
2458   Name.reserve(6);
2459   for (StringRef Local : Locals) {
2460     raw_string_ostream LocalName(Name);
2461     LocalName << "??"
2462               << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2463     LocalSymbols.insert({Local, Name});
2464     Name.clear();
2465   }
2466 
2467   std::optional<char> CurrentQuote;
2468   while (!Body.empty()) {
2469     // Scan for the next substitution.
2470     std::size_t End = Body.size(), Pos = 0;
2471     std::size_t IdentifierPos = End;
2472     for (; Pos != End; ++Pos) {
2473       // Find the next possible macro parameter, including preceding a '&'
2474       // inside quotes.
2475       if (Body[Pos] == '&')
2476         break;
2477       if (isMacroParameterChar(Body[Pos])) {
2478         if (!CurrentQuote)
2479           break;
2480         if (IdentifierPos == End)
2481           IdentifierPos = Pos;
2482       } else {
2483         IdentifierPos = End;
2484       }
2485 
2486       // Track quotation status
2487       if (!CurrentQuote) {
2488         if (Body[Pos] == '\'' || Body[Pos] == '"')
2489           CurrentQuote = Body[Pos];
2490       } else if (Body[Pos] == CurrentQuote) {
2491         if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2492           // Escaped quote, and quotes aren't identifier chars; skip
2493           ++Pos;
2494           continue;
2495         } else {
2496           CurrentQuote.reset();
2497         }
2498       }
2499     }
2500     if (IdentifierPos != End) {
2501       // We've recognized an identifier before an apostrophe inside quotes;
2502       // check once to see if we can expand it.
2503       Pos = IdentifierPos;
2504       IdentifierPos = End;
2505     }
2506 
2507     // Add the prefix.
2508     OS << Body.slice(0, Pos);
2509 
2510     // Check if we reached the end.
2511     if (Pos == End)
2512       break;
2513 
2514     unsigned I = Pos;
2515     bool InitialAmpersand = (Body[I] == '&');
2516     if (InitialAmpersand) {
2517       ++I;
2518       ++Pos;
2519     }
2520     while (I < End && isMacroParameterChar(Body[I]))
2521       ++I;
2522 
2523     const char *Begin = Body.data() + Pos;
2524     StringRef Argument(Begin, I - Pos);
2525     const std::string ArgumentLower = Argument.lower();
2526     unsigned Index = 0;
2527 
2528     for (; Index < NParameters; ++Index)
2529       if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2530         break;
2531 
2532     if (Index == NParameters) {
2533       if (InitialAmpersand)
2534         OS << '&';
2535       auto it = LocalSymbols.find(ArgumentLower);
2536       if (it != LocalSymbols.end())
2537         OS << it->second;
2538       else
2539         OS << Argument;
2540       Pos = I;
2541     } else {
2542       for (const AsmToken &Token : A[Index]) {
2543         // In MASM, you can write '%expr'.
2544         // The prefix '%' evaluates the expression 'expr'
2545         // and uses the result as a string (e.g. replace %(1+2) with the
2546         // string "3").
2547         // Here, we identify the integer token which is the result of the
2548         // absolute expression evaluation and replace it with its string
2549         // representation.
2550         if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2551           // Emit an integer value to the buffer.
2552           OS << Token.getIntVal();
2553         else
2554           OS << Token.getString();
2555       }
2556 
2557       Pos += Argument.size();
2558       if (Pos < End && Body[Pos] == '&') {
2559         ++Pos;
2560       }
2561     }
2562     // Update the scan point.
2563     Body = Body.substr(Pos);
2564   }
2565 
2566   return false;
2567 }
2568 
parseMacroArgument(const MCAsmMacroParameter * MP,MCAsmMacroArgument & MA,AsmToken::TokenKind EndTok)2569 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
2570                                     MCAsmMacroArgument &MA,
2571                                     AsmToken::TokenKind EndTok) {
2572   if (MP && MP->Vararg) {
2573     if (Lexer.isNot(EndTok)) {
2574       SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
2575       for (StringRef S : Str) {
2576         MA.emplace_back(AsmToken::String, S);
2577       }
2578     }
2579     return false;
2580   }
2581 
2582   SMLoc StrLoc = Lexer.getLoc(), EndLoc;
2583   if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
2584     const char *StrChar = StrLoc.getPointer() + 1;
2585     const char *EndChar = EndLoc.getPointer() - 1;
2586     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
2587     /// Eat from '<' to '>'.
2588     Lex();
2589     MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
2590     return false;
2591   }
2592 
2593   unsigned ParenLevel = 0;
2594 
2595   while (true) {
2596     if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
2597       return TokError("unexpected token");
2598 
2599     if (ParenLevel == 0 && Lexer.is(AsmToken::Comma))
2600       break;
2601 
2602     // handleMacroEntry relies on not advancing the lexer here
2603     // to be able to fill in the remaining default parameter values
2604     if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
2605       break;
2606 
2607     // Adjust the current parentheses level.
2608     if (Lexer.is(AsmToken::LParen))
2609       ++ParenLevel;
2610     else if (Lexer.is(AsmToken::RParen) && ParenLevel)
2611       --ParenLevel;
2612 
2613     // Append the token to the current argument list.
2614     MA.push_back(getTok());
2615     Lex();
2616   }
2617 
2618   if (ParenLevel != 0)
2619     return TokError("unbalanced parentheses in argument");
2620 
2621   if (MA.empty() && MP) {
2622     if (MP->Required) {
2623       return TokError("missing value for required parameter '" + MP->Name +
2624                       "'");
2625     } else {
2626       MA = MP->Value;
2627     }
2628   }
2629   return false;
2630 }
2631 
2632 // Parse the macro instantiation arguments.
parseMacroArguments(const MCAsmMacro * M,MCAsmMacroArguments & A,AsmToken::TokenKind EndTok)2633 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
2634                                      MCAsmMacroArguments &A,
2635                                      AsmToken::TokenKind EndTok) {
2636   const unsigned NParameters = M ? M->Parameters.size() : 0;
2637   bool NamedParametersFound = false;
2638   SmallVector<SMLoc, 4> FALocs;
2639 
2640   A.resize(NParameters);
2641   FALocs.resize(NParameters);
2642 
2643   // Parse two kinds of macro invocations:
2644   // - macros defined without any parameters accept an arbitrary number of them
2645   // - macros defined with parameters accept at most that many of them
2646   for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
2647        ++Parameter) {
2648     SMLoc IDLoc = Lexer.getLoc();
2649     MCAsmMacroParameter FA;
2650 
2651     if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
2652       if (parseIdentifier(FA.Name))
2653         return Error(IDLoc, "invalid argument identifier for formal argument");
2654 
2655       if (Lexer.isNot(AsmToken::Equal))
2656         return TokError("expected '=' after formal parameter identifier");
2657 
2658       Lex();
2659 
2660       NamedParametersFound = true;
2661     }
2662 
2663     if (NamedParametersFound && FA.Name.empty())
2664       return Error(IDLoc, "cannot mix positional and keyword arguments");
2665 
2666     unsigned PI = Parameter;
2667     if (!FA.Name.empty()) {
2668       assert(M && "expected macro to be defined");
2669       unsigned FAI = 0;
2670       for (FAI = 0; FAI < NParameters; ++FAI)
2671         if (M->Parameters[FAI].Name == FA.Name)
2672           break;
2673 
2674       if (FAI >= NParameters) {
2675         return Error(IDLoc, "parameter named '" + FA.Name +
2676                                 "' does not exist for macro '" + M->Name + "'");
2677       }
2678       PI = FAI;
2679     }
2680     const MCAsmMacroParameter *MP = nullptr;
2681     if (M && PI < NParameters)
2682       MP = &M->Parameters[PI];
2683 
2684     SMLoc StrLoc = Lexer.getLoc();
2685     SMLoc EndLoc;
2686     if (Lexer.is(AsmToken::Percent)) {
2687       const MCExpr *AbsoluteExp;
2688       int64_t Value;
2689       /// Eat '%'.
2690       Lex();
2691       if (parseExpression(AbsoluteExp, EndLoc))
2692         return false;
2693       if (!AbsoluteExp->evaluateAsAbsolute(Value,
2694                                            getStreamer().getAssemblerPtr()))
2695         return Error(StrLoc, "expected absolute expression");
2696       const char *StrChar = StrLoc.getPointer();
2697       const char *EndChar = EndLoc.getPointer();
2698       AsmToken newToken(AsmToken::Integer,
2699                         StringRef(StrChar, EndChar - StrChar), Value);
2700       FA.Value.push_back(newToken);
2701     } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
2702       if (M)
2703         return addErrorSuffix(" in '" + M->Name + "' macro");
2704       else
2705         return true;
2706     }
2707 
2708     if (!FA.Value.empty()) {
2709       if (A.size() <= PI)
2710         A.resize(PI + 1);
2711       A[PI] = FA.Value;
2712 
2713       if (FALocs.size() <= PI)
2714         FALocs.resize(PI + 1);
2715 
2716       FALocs[PI] = Lexer.getLoc();
2717     }
2718 
2719     // At the end of the statement, fill in remaining arguments that have
2720     // default values. If there aren't any, then the next argument is
2721     // required but missing
2722     if (Lexer.is(EndTok)) {
2723       bool Failure = false;
2724       for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
2725         if (A[FAI].empty()) {
2726           if (M->Parameters[FAI].Required) {
2727             Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
2728                   "missing value for required parameter "
2729                   "'" +
2730                       M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
2731             Failure = true;
2732           }
2733 
2734           if (!M->Parameters[FAI].Value.empty())
2735             A[FAI] = M->Parameters[FAI].Value;
2736         }
2737       }
2738       return Failure;
2739     }
2740 
2741     if (Lexer.is(AsmToken::Comma))
2742       Lex();
2743   }
2744 
2745   return TokError("too many positional arguments");
2746 }
2747 
handleMacroEntry(const MCAsmMacro * M,SMLoc NameLoc,AsmToken::TokenKind ArgumentEndTok)2748 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
2749                                   AsmToken::TokenKind ArgumentEndTok) {
2750   // Arbitrarily limit macro nesting depth (default matches 'as'). We can
2751   // eliminate this, although we should protect against infinite loops.
2752   unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
2753   if (ActiveMacros.size() == MaxNestingDepth) {
2754     std::ostringstream MaxNestingDepthError;
2755     MaxNestingDepthError << "macros cannot be nested more than "
2756                          << MaxNestingDepth << " levels deep."
2757                          << " Use -asm-macro-max-nesting-depth to increase "
2758                             "this limit.";
2759     return TokError(MaxNestingDepthError.str());
2760   }
2761 
2762   MCAsmMacroArguments A;
2763   if (parseMacroArguments(M, A, ArgumentEndTok) || parseToken(ArgumentEndTok))
2764     return true;
2765 
2766   // Macro instantiation is lexical, unfortunately. We construct a new buffer
2767   // to hold the macro body with substitutions.
2768   SmallString<256> Buf;
2769   StringRef Body = M->Body;
2770   raw_svector_ostream OS(Buf);
2771 
2772   if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
2773     return true;
2774 
2775   // We include the endm in the buffer as our cue to exit the macro
2776   // instantiation.
2777   OS << "endm\n";
2778 
2779   std::unique_ptr<MemoryBuffer> Instantiation =
2780       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
2781 
2782   // Create the macro instantiation object and add to the current macro
2783   // instantiation stack.
2784   MacroInstantiation *MI = new MacroInstantiation{
2785       NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
2786   ActiveMacros.push_back(MI);
2787 
2788   ++NumOfMacroInstantiations;
2789 
2790   // Jump to the macro instantiation and prime the lexer.
2791   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
2792   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
2793   EndStatementAtEOFStack.push_back(true);
2794   Lex();
2795 
2796   return false;
2797 }
2798 
handleMacroExit()2799 void MasmParser::handleMacroExit() {
2800   // Jump to the token we should return to, and consume it.
2801   EndStatementAtEOFStack.pop_back();
2802   jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
2803             EndStatementAtEOFStack.back());
2804   Lex();
2805 
2806   // Pop the instantiation entry.
2807   delete ActiveMacros.back();
2808   ActiveMacros.pop_back();
2809 }
2810 
handleMacroInvocation(const MCAsmMacro * M,SMLoc NameLoc)2811 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
2812   if (!M->IsFunction)
2813     return Error(NameLoc, "cannot invoke macro procedure as function");
2814 
2815   if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
2816                                        "' requires arguments in parentheses") ||
2817       handleMacroEntry(M, NameLoc, AsmToken::RParen))
2818     return true;
2819 
2820   // Parse all statements in the macro, retrieving the exit value when it ends.
2821   std::string ExitValue;
2822   SmallVector<AsmRewrite, 4> AsmStrRewrites;
2823   while (Lexer.isNot(AsmToken::Eof)) {
2824     ParseStatementInfo Info(&AsmStrRewrites);
2825     bool HasError = parseStatement(Info, nullptr);
2826 
2827     if (!HasError && Info.ExitValue) {
2828       ExitValue = std::move(*Info.ExitValue);
2829       break;
2830     }
2831 
2832     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
2833     // for printing ErrMsg via Lex() only if no (presumably better) parser error
2834     // exists.
2835     if (HasError && !hasPendingError() && Lexer.getTok().is(AsmToken::Error))
2836       Lex();
2837 
2838     // parseStatement returned true so may need to emit an error.
2839     printPendingErrors();
2840 
2841     // Skipping to the next line if needed.
2842     if (HasError && !getLexer().justConsumedEOL())
2843       eatToEndOfStatement();
2844   }
2845 
2846   // Exit values may require lexing, unfortunately. We construct a new buffer to
2847   // hold the exit value.
2848   std::unique_ptr<MemoryBuffer> MacroValue =
2849       MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
2850 
2851   // Jump from this location to the instantiated exit value, and prime the
2852   // lexer.
2853   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
2854   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
2855                   /*EndStatementAtEOF=*/false);
2856   EndStatementAtEOFStack.push_back(false);
2857   Lex();
2858 
2859   return false;
2860 }
2861 
2862 /// parseIdentifier:
2863 ///   ::= identifier
2864 ///   ::= string
parseIdentifier(StringRef & Res,IdentifierPositionKind Position)2865 bool MasmParser::parseIdentifier(StringRef &Res,
2866                                  IdentifierPositionKind Position) {
2867   // The assembler has relaxed rules for accepting identifiers, in particular we
2868   // allow things like '.globl $foo' and '.def @feat.00', which would normally
2869   // be separate tokens. At this level, we have already lexed so we cannot
2870   // (currently) handle this as a context dependent token, instead we detect
2871   // adjacent tokens and return the combined identifier.
2872   if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
2873     SMLoc PrefixLoc = getLexer().getLoc();
2874 
2875     // Consume the prefix character, and check for a following identifier.
2876 
2877     AsmToken nextTok = peekTok(false);
2878 
2879     if (nextTok.isNot(AsmToken::Identifier))
2880       return true;
2881 
2882     // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
2883     if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
2884       return true;
2885 
2886     // eat $ or @
2887     Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
2888     // Construct the joined identifier and consume the token.
2889     Res =
2890         StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
2891     Lex(); // Parser Lex to maintain invariants.
2892     return false;
2893   }
2894 
2895   if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
2896     return true;
2897 
2898   Res = getTok().getIdentifier();
2899 
2900   // Consume the identifier token - but if parsing certain directives, avoid
2901   // lexical expansion of the next token.
2902   ExpandKind ExpandNextToken = ExpandMacros;
2903   if (Position == StartOfStatement &&
2904       StringSwitch<bool>(Res)
2905           .CaseLower("echo", true)
2906           .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
2907           .Default(false)) {
2908     ExpandNextToken = DoNotExpandMacros;
2909   }
2910   Lex(ExpandNextToken);
2911 
2912   return false;
2913 }
2914 
2915 /// parseDirectiveEquate:
2916 ///  ::= name "=" expression
2917 ///    | name "equ" expression    (not redefinable)
2918 ///    | name "equ" text-list
2919 ///    | name "textequ" text-list (redefinability unspecified)
parseDirectiveEquate(StringRef IDVal,StringRef Name,DirectiveKind DirKind,SMLoc NameLoc)2920 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
2921                                       DirectiveKind DirKind, SMLoc NameLoc) {
2922   auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
2923   if (BuiltinIt != BuiltinSymbolMap.end())
2924     return Error(NameLoc, "cannot redefine a built-in symbol");
2925 
2926   Variable &Var = Variables[Name.lower()];
2927   if (Var.Name.empty()) {
2928     Var.Name = Name;
2929   }
2930 
2931   SMLoc StartLoc = Lexer.getLoc();
2932   if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
2933     // "equ" and "textequ" both allow text expressions.
2934     std::string Value;
2935     std::string TextItem;
2936     if (!parseTextItem(TextItem)) {
2937       Value += TextItem;
2938 
2939       // Accept a text-list, not just one text-item.
2940       auto parseItem = [&]() -> bool {
2941         if (parseTextItem(TextItem))
2942           return TokError("expected text item");
2943         Value += TextItem;
2944         return false;
2945       };
2946       if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
2947         return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2948 
2949       if (!Var.IsText || Var.TextValue != Value) {
2950         switch (Var.Redefinable) {
2951         case Variable::NOT_REDEFINABLE:
2952           return Error(getTok().getLoc(), "invalid variable redefinition");
2953         case Variable::WARN_ON_REDEFINITION:
2954           if (Warning(NameLoc, "redefining '" + Name +
2955                                    "', already defined on the command line")) {
2956             return true;
2957           }
2958           break;
2959         default:
2960           break;
2961         }
2962       }
2963       Var.IsText = true;
2964       Var.TextValue = Value;
2965       Var.Redefinable = Variable::REDEFINABLE;
2966 
2967       return false;
2968     }
2969   }
2970   if (DirKind == DK_TEXTEQU)
2971     return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
2972 
2973   // Parse as expression assignment.
2974   const MCExpr *Expr;
2975   SMLoc EndLoc;
2976   if (parseExpression(Expr, EndLoc))
2977     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2978   StringRef ExprAsString = StringRef(
2979       StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
2980 
2981   int64_t Value;
2982   if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
2983     if (DirKind == DK_ASSIGN)
2984       return Error(
2985           StartLoc,
2986           "expected absolute expression; not all symbols have known values",
2987           {StartLoc, EndLoc});
2988 
2989     // Not an absolute expression; define as a text replacement.
2990     if (!Var.IsText || Var.TextValue != ExprAsString) {
2991       switch (Var.Redefinable) {
2992       case Variable::NOT_REDEFINABLE:
2993         return Error(getTok().getLoc(), "invalid variable redefinition");
2994       case Variable::WARN_ON_REDEFINITION:
2995         if (Warning(NameLoc, "redefining '" + Name +
2996                                  "', already defined on the command line")) {
2997           return true;
2998         }
2999         break;
3000       default:
3001         break;
3002       }
3003     }
3004 
3005     Var.IsText = true;
3006     Var.TextValue = ExprAsString.str();
3007     Var.Redefinable = Variable::REDEFINABLE;
3008 
3009     return false;
3010   }
3011 
3012   auto *Sym = getContext().parseSymbol(Var.Name);
3013   const MCConstantExpr *PrevValue =
3014       Sym->isVariable()
3015           ? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue())
3016           : nullptr;
3017   if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3018     switch (Var.Redefinable) {
3019     case Variable::NOT_REDEFINABLE:
3020       return Error(getTok().getLoc(), "invalid variable redefinition");
3021     case Variable::WARN_ON_REDEFINITION:
3022       if (Warning(NameLoc, "redefining '" + Name +
3023                                "', already defined on the command line")) {
3024         return true;
3025       }
3026       break;
3027     default:
3028       break;
3029     }
3030   }
3031 
3032   Var.IsText = false;
3033   Var.TextValue.clear();
3034   Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3035                                            : Variable::NOT_REDEFINABLE;
3036 
3037   Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3038   Sym->setVariableValue(Expr);
3039   Sym->setExternal(false);
3040 
3041   return false;
3042 }
3043 
parseEscapedString(std::string & Data)3044 bool MasmParser::parseEscapedString(std::string &Data) {
3045   if (check(getTok().isNot(AsmToken::String), "expected string"))
3046     return true;
3047 
3048   Data = "";
3049   char Quote = getTok().getString().front();
3050   StringRef Str = getTok().getStringContents();
3051   Data.reserve(Str.size());
3052   for (size_t i = 0, e = Str.size(); i != e; ++i) {
3053     Data.push_back(Str[i]);
3054     if (Str[i] == Quote) {
3055       // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3056       // If we're escaping the string's trailing delimiter, we're definitely
3057       // missing a quotation mark.
3058       if (i + 1 == Str.size())
3059         return Error(getTok().getLoc(), "missing quotation mark in string");
3060       if (Str[i + 1] == Quote)
3061         ++i;
3062     }
3063   }
3064 
3065   Lex();
3066   return false;
3067 }
3068 
parseAngleBracketString(std::string & Data)3069 bool MasmParser::parseAngleBracketString(std::string &Data) {
3070   SMLoc EndLoc, StartLoc = getTok().getLoc();
3071   if (isAngleBracketString(StartLoc, EndLoc)) {
3072     const char *StartChar = StartLoc.getPointer() + 1;
3073     const char *EndChar = EndLoc.getPointer() - 1;
3074     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3075     // Eat from '<' to '>'.
3076     Lex();
3077 
3078     Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3079     return false;
3080   }
3081   return true;
3082 }
3083 
3084 /// textItem ::= textLiteral | textMacroID | % constExpr
parseTextItem(std::string & Data)3085 bool MasmParser::parseTextItem(std::string &Data) {
3086   switch (getTok().getKind()) {
3087   default:
3088     return true;
3089   case AsmToken::Percent: {
3090     int64_t Res;
3091     if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3092       return true;
3093     Data = std::to_string(Res);
3094     return false;
3095   }
3096   case AsmToken::Less:
3097   case AsmToken::LessEqual:
3098   case AsmToken::LessLess:
3099   case AsmToken::LessGreater:
3100     return parseAngleBracketString(Data);
3101   case AsmToken::Identifier: {
3102     // This must be a text macro; we need to expand it accordingly.
3103     StringRef ID;
3104     SMLoc StartLoc = getTok().getLoc();
3105     if (parseIdentifier(ID))
3106       return true;
3107     Data = ID.str();
3108 
3109     bool Expanded = false;
3110     while (true) {
3111       // Try to resolve as a built-in text macro
3112       auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3113       if (BuiltinIt != BuiltinSymbolMap.end()) {
3114         std::optional<std::string> BuiltinText =
3115             evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3116         if (!BuiltinText) {
3117           // Not a text macro; break without substituting
3118           break;
3119         }
3120         Data = std::move(*BuiltinText);
3121         ID = StringRef(Data);
3122         Expanded = true;
3123         continue;
3124       }
3125 
3126       // Try to resolve as a built-in macro function
3127       auto BuiltinFuncIt = BuiltinFunctionMap.find(ID.lower());
3128       if (BuiltinFuncIt != BuiltinFunctionMap.end()) {
3129         Data.clear();
3130         if (evaluateBuiltinMacroFunction(BuiltinFuncIt->getValue(), ID, Data)) {
3131           return true;
3132         }
3133         ID = StringRef(Data);
3134         Expanded = true;
3135         continue;
3136       }
3137 
3138       // Try to resolve as a variable text macro
3139       auto VarIt = Variables.find(ID.lower());
3140       if (VarIt != Variables.end()) {
3141         const Variable &Var = VarIt->getValue();
3142         if (!Var.IsText) {
3143           // Not a text macro; break without substituting
3144           break;
3145         }
3146         Data = Var.TextValue;
3147         ID = StringRef(Data);
3148         Expanded = true;
3149         continue;
3150       }
3151 
3152       break;
3153     }
3154 
3155     if (!Expanded) {
3156       // Not a text macro; not usable in TextItem context. Since we haven't used
3157       // the token, put it back for better error recovery.
3158       getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3159       return true;
3160     }
3161     return false;
3162   }
3163   }
3164   llvm_unreachable("unhandled token kind");
3165 }
3166 
3167 /// parseDirectiveAscii:
3168 ///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
parseDirectiveAscii(StringRef IDVal,bool ZeroTerminated)3169 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3170   auto parseOp = [&]() -> bool {
3171     std::string Data;
3172     if (checkForValidSection() || parseEscapedString(Data))
3173       return true;
3174     getStreamer().emitBytes(Data);
3175     if (ZeroTerminated)
3176       getStreamer().emitBytes(StringRef("\0", 1));
3177     return false;
3178   };
3179 
3180   if (parseMany(parseOp))
3181     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3182   return false;
3183 }
3184 
emitIntValue(const MCExpr * Value,unsigned Size)3185 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3186   // Special case constant expressions to match code generator.
3187   if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3188     assert(Size <= 8 && "Invalid size");
3189     int64_t IntValue = MCE->getValue();
3190     if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3191       return Error(MCE->getLoc(), "out of range literal value");
3192     getStreamer().emitIntValue(IntValue, Size);
3193   } else {
3194     const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3195     if (MSE && MSE->getSymbol().getName() == "?") {
3196       // ? initializer; treat as 0.
3197       getStreamer().emitIntValue(0, Size);
3198     } else {
3199       getStreamer().emitValue(Value, Size, Value->getLoc());
3200     }
3201   }
3202   return false;
3203 }
3204 
parseScalarInitializer(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,unsigned StringPadLength)3205 bool MasmParser::parseScalarInitializer(unsigned Size,
3206                                         SmallVectorImpl<const MCExpr *> &Values,
3207                                         unsigned StringPadLength) {
3208   if (Size == 1 && getTok().is(AsmToken::String)) {
3209     std::string Value;
3210     if (parseEscapedString(Value))
3211       return true;
3212     // Treat each character as an initializer.
3213     for (const unsigned char CharVal : Value)
3214       Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3215 
3216     // Pad the string with spaces to the specified length.
3217     for (size_t i = Value.size(); i < StringPadLength; ++i)
3218       Values.push_back(MCConstantExpr::create(' ', getContext()));
3219   } else {
3220     const MCExpr *Value;
3221     if (parseExpression(Value))
3222       return true;
3223     if (getTok().is(AsmToken::Identifier) &&
3224         getTok().getString().equals_insensitive("dup")) {
3225       Lex(); // Eat 'dup'.
3226       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3227       if (!MCE)
3228         return Error(Value->getLoc(),
3229                      "cannot repeat value a non-constant number of times");
3230       const int64_t Repetitions = MCE->getValue();
3231       if (Repetitions < 0)
3232         return Error(Value->getLoc(),
3233                      "cannot repeat value a negative number of times");
3234 
3235       SmallVector<const MCExpr *, 1> DuplicatedValues;
3236       if (parseToken(AsmToken::LParen,
3237                      "parentheses required for 'dup' contents") ||
3238           parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3239         return true;
3240 
3241       for (int i = 0; i < Repetitions; ++i)
3242         Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3243     } else {
3244       Values.push_back(Value);
3245     }
3246   }
3247   return false;
3248 }
3249 
parseScalarInstList(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,const AsmToken::TokenKind EndToken)3250 bool MasmParser::parseScalarInstList(unsigned Size,
3251                                      SmallVectorImpl<const MCExpr *> &Values,
3252                                      const AsmToken::TokenKind EndToken) {
3253   while (getTok().isNot(EndToken) &&
3254          (EndToken != AsmToken::Greater ||
3255           getTok().isNot(AsmToken::GreaterGreater))) {
3256     parseScalarInitializer(Size, Values);
3257 
3258     // If we see a comma, continue, and allow line continuation.
3259     if (!parseOptionalToken(AsmToken::Comma))
3260       break;
3261     parseOptionalToken(AsmToken::EndOfStatement);
3262   }
3263   return false;
3264 }
3265 
emitIntegralValues(unsigned Size,unsigned * Count)3266 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3267   SmallVector<const MCExpr *, 1> Values;
3268   if (checkForValidSection() || parseScalarInstList(Size, Values))
3269     return true;
3270 
3271   for (const auto *Value : Values) {
3272     emitIntValue(Value, Size);
3273   }
3274   if (Count)
3275     *Count = Values.size();
3276   return false;
3277 }
3278 
3279 // Add a field to the current structure.
addIntegralField(StringRef Name,unsigned Size)3280 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3281   StructInfo &Struct = StructInProgress.back();
3282   FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3283   IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3284 
3285   Field.Type = Size;
3286 
3287   if (parseScalarInstList(Size, IntInfo.Values))
3288     return true;
3289 
3290   Field.SizeOf = Field.Type * IntInfo.Values.size();
3291   Field.LengthOf = IntInfo.Values.size();
3292   const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3293   if (!Struct.IsUnion) {
3294     Struct.NextOffset = FieldEnd;
3295   }
3296   Struct.Size = std::max(Struct.Size, FieldEnd);
3297   return false;
3298 }
3299 
3300 /// parseDirectiveValue
3301 ///  ::= (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveValue(StringRef IDVal,unsigned Size)3302 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3303   if (StructInProgress.empty()) {
3304     // Initialize data value.
3305     if (emitIntegralValues(Size))
3306       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3307   } else if (addIntegralField("", Size)) {
3308     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3309   }
3310 
3311   return false;
3312 }
3313 
3314 /// parseDirectiveNamedValue
3315 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedValue(StringRef TypeName,unsigned Size,StringRef Name,SMLoc NameLoc)3316 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3317                                           StringRef Name, SMLoc NameLoc) {
3318   if (StructInProgress.empty()) {
3319     // Initialize named data value.
3320     MCSymbol *Sym = getContext().parseSymbol(Name);
3321     getStreamer().emitLabel(Sym);
3322     unsigned Count;
3323     if (emitIntegralValues(Size, &Count))
3324       return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3325 
3326     AsmTypeInfo Type;
3327     Type.Name = TypeName;
3328     Type.Size = Size * Count;
3329     Type.ElementSize = Size;
3330     Type.Length = Count;
3331     KnownType[Name.lower()] = Type;
3332   } else if (addIntegralField(Name, Size)) {
3333     return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3334   }
3335 
3336   return false;
3337 }
3338 
parseRealValue(const fltSemantics & Semantics,APInt & Res)3339 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3340   // We don't truly support arithmetic on floating point expressions, so we
3341   // have to manually parse unary prefixes.
3342   bool IsNeg = false;
3343   SMLoc SignLoc;
3344   if (getLexer().is(AsmToken::Minus)) {
3345     SignLoc = getLexer().getLoc();
3346     Lexer.Lex();
3347     IsNeg = true;
3348   } else if (getLexer().is(AsmToken::Plus)) {
3349     SignLoc = getLexer().getLoc();
3350     Lexer.Lex();
3351   }
3352 
3353   if (Lexer.is(AsmToken::Error))
3354     return TokError(Lexer.getErr());
3355   if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3356       Lexer.isNot(AsmToken::Identifier))
3357     return TokError("unexpected token in directive");
3358 
3359   // Convert to an APFloat.
3360   APFloat Value(Semantics);
3361   StringRef IDVal = getTok().getString();
3362   if (getLexer().is(AsmToken::Identifier)) {
3363     if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3364       Value = APFloat::getInf(Semantics);
3365     else if (IDVal.equals_insensitive("nan"))
3366       Value = APFloat::getNaN(Semantics, false, ~0);
3367     else if (IDVal.equals_insensitive("?"))
3368       Value = APFloat::getZero(Semantics);
3369     else
3370       return TokError("invalid floating point literal");
3371   } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3372     // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3373     // To match ML64.exe, ignore the initial sign.
3374     unsigned SizeInBits = Value.getSizeInBits(Semantics);
3375     if (SizeInBits != (IDVal.size() << 2))
3376       return TokError("invalid floating point literal");
3377 
3378     // Consume the numeric token.
3379     Lex();
3380 
3381     Res = APInt(SizeInBits, IDVal, 16);
3382     if (SignLoc.isValid())
3383       return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3384     return false;
3385   } else if (errorToBool(
3386                  Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3387                      .takeError())) {
3388     return TokError("invalid floating point literal");
3389   }
3390   if (IsNeg)
3391     Value.changeSign();
3392 
3393   // Consume the numeric token.
3394   Lex();
3395 
3396   Res = Value.bitcastToAPInt();
3397 
3398   return false;
3399 }
3400 
parseRealInstList(const fltSemantics & Semantics,SmallVectorImpl<APInt> & ValuesAsInt,const AsmToken::TokenKind EndToken)3401 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3402                                    SmallVectorImpl<APInt> &ValuesAsInt,
3403                                    const AsmToken::TokenKind EndToken) {
3404   while (getTok().isNot(EndToken) ||
3405          (EndToken == AsmToken::Greater &&
3406           getTok().isNot(AsmToken::GreaterGreater))) {
3407     const AsmToken NextTok = peekTok();
3408     if (NextTok.is(AsmToken::Identifier) &&
3409         NextTok.getString().equals_insensitive("dup")) {
3410       const MCExpr *Value;
3411       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3412         return true;
3413       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3414       if (!MCE)
3415         return Error(Value->getLoc(),
3416                      "cannot repeat value a non-constant number of times");
3417       const int64_t Repetitions = MCE->getValue();
3418       if (Repetitions < 0)
3419         return Error(Value->getLoc(),
3420                      "cannot repeat value a negative number of times");
3421 
3422       SmallVector<APInt, 1> DuplicatedValues;
3423       if (parseToken(AsmToken::LParen,
3424                      "parentheses required for 'dup' contents") ||
3425           parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3426         return true;
3427 
3428       for (int i = 0; i < Repetitions; ++i)
3429         ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3430     } else {
3431       APInt AsInt;
3432       if (parseRealValue(Semantics, AsInt))
3433         return true;
3434       ValuesAsInt.push_back(AsInt);
3435     }
3436 
3437     // Continue if we see a comma. (Also, allow line continuation.)
3438     if (!parseOptionalToken(AsmToken::Comma))
3439       break;
3440     parseOptionalToken(AsmToken::EndOfStatement);
3441   }
3442 
3443   return false;
3444 }
3445 
3446 // Initialize real data values.
emitRealValues(const fltSemantics & Semantics,unsigned * Count)3447 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3448                                 unsigned *Count) {
3449   if (checkForValidSection())
3450     return true;
3451 
3452   SmallVector<APInt, 1> ValuesAsInt;
3453   if (parseRealInstList(Semantics, ValuesAsInt))
3454     return true;
3455 
3456   for (const APInt &AsInt : ValuesAsInt) {
3457     getStreamer().emitIntValue(AsInt);
3458   }
3459   if (Count)
3460     *Count = ValuesAsInt.size();
3461   return false;
3462 }
3463 
3464 // Add a real field to the current struct.
addRealField(StringRef Name,const fltSemantics & Semantics,size_t Size)3465 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3466                               size_t Size) {
3467   StructInfo &Struct = StructInProgress.back();
3468   FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3469   RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3470 
3471   Field.SizeOf = 0;
3472 
3473   if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3474     return true;
3475 
3476   Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3477   Field.LengthOf = RealInfo.AsIntValues.size();
3478   Field.SizeOf = Field.Type * Field.LengthOf;
3479 
3480   const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3481   if (!Struct.IsUnion) {
3482     Struct.NextOffset = FieldEnd;
3483   }
3484   Struct.Size = std::max(Struct.Size, FieldEnd);
3485   return false;
3486 }
3487 
3488 /// parseDirectiveRealValue
3489 ///  ::= (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveRealValue(StringRef IDVal,const fltSemantics & Semantics,size_t Size)3490 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3491                                          const fltSemantics &Semantics,
3492                                          size_t Size) {
3493   if (StructInProgress.empty()) {
3494     // Initialize data value.
3495     if (emitRealValues(Semantics))
3496       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3497   } else if (addRealField("", Semantics, Size)) {
3498     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3499   }
3500   return false;
3501 }
3502 
3503 /// parseDirectiveNamedRealValue
3504 ///  ::= name (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveNamedRealValue(StringRef TypeName,const fltSemantics & Semantics,unsigned Size,StringRef Name,SMLoc NameLoc)3505 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3506                                               const fltSemantics &Semantics,
3507                                               unsigned Size, StringRef Name,
3508                                               SMLoc NameLoc) {
3509   if (StructInProgress.empty()) {
3510     // Initialize named data value.
3511     MCSymbol *Sym = getContext().parseSymbol(Name);
3512     getStreamer().emitLabel(Sym);
3513     unsigned Count;
3514     if (emitRealValues(Semantics, &Count))
3515       return addErrorSuffix(" in '" + TypeName + "' directive");
3516 
3517     AsmTypeInfo Type;
3518     Type.Name = TypeName;
3519     Type.Size = Size * Count;
3520     Type.ElementSize = Size;
3521     Type.Length = Count;
3522     KnownType[Name.lower()] = Type;
3523   } else if (addRealField(Name, Semantics, Size)) {
3524     return addErrorSuffix(" in '" + TypeName + "' directive");
3525   }
3526   return false;
3527 }
3528 
parseOptionalAngleBracketOpen()3529 bool MasmParser::parseOptionalAngleBracketOpen() {
3530   const AsmToken Tok = getTok();
3531   if (parseOptionalToken(AsmToken::LessLess)) {
3532     AngleBracketDepth++;
3533     Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
3534     return true;
3535   } else if (parseOptionalToken(AsmToken::LessGreater)) {
3536     AngleBracketDepth++;
3537     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3538     return true;
3539   } else if (parseOptionalToken(AsmToken::Less)) {
3540     AngleBracketDepth++;
3541     return true;
3542   }
3543 
3544   return false;
3545 }
3546 
parseAngleBracketClose(const Twine & Msg)3547 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
3548   const AsmToken Tok = getTok();
3549   if (parseOptionalToken(AsmToken::GreaterGreater)) {
3550     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3551   } else if (parseToken(AsmToken::Greater, Msg)) {
3552     return true;
3553   }
3554   AngleBracketDepth--;
3555   return false;
3556 }
3557 
parseFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,FieldInitializer & Initializer)3558 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3559                                        const IntFieldInfo &Contents,
3560                                        FieldInitializer &Initializer) {
3561   SMLoc Loc = getTok().getLoc();
3562 
3563   SmallVector<const MCExpr *, 1> Values;
3564   if (parseOptionalToken(AsmToken::LCurly)) {
3565     if (Field.LengthOf == 1 && Field.Type > 1)
3566       return Error(Loc, "Cannot initialize scalar field with array value");
3567     if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
3568         parseToken(AsmToken::RCurly))
3569       return true;
3570   } else if (parseOptionalAngleBracketOpen()) {
3571     if (Field.LengthOf == 1 && Field.Type > 1)
3572       return Error(Loc, "Cannot initialize scalar field with array value");
3573     if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
3574         parseAngleBracketClose())
3575       return true;
3576   } else if (Field.LengthOf > 1 && Field.Type > 1) {
3577     return Error(Loc, "Cannot initialize array field with scalar value");
3578   } else if (parseScalarInitializer(Field.Type, Values,
3579                                     /*StringPadLength=*/Field.LengthOf)) {
3580     return true;
3581   }
3582 
3583   if (Values.size() > Field.LengthOf) {
3584     return Error(Loc, "Initializer too long for field; expected at most " +
3585                           std::to_string(Field.LengthOf) + " elements, got " +
3586                           std::to_string(Values.size()));
3587   }
3588   // Default-initialize all remaining values.
3589   Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
3590 
3591   Initializer = FieldInitializer(std::move(Values));
3592   return false;
3593 }
3594 
parseFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,FieldInitializer & Initializer)3595 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3596                                        const RealFieldInfo &Contents,
3597                                        FieldInitializer &Initializer) {
3598   const fltSemantics *Semantics;
3599   switch (Field.Type) {
3600   case 4:
3601     Semantics = &APFloat::IEEEsingle();
3602     break;
3603   case 8:
3604     Semantics = &APFloat::IEEEdouble();
3605     break;
3606   case 10:
3607     Semantics = &APFloat::x87DoubleExtended();
3608     break;
3609   default:
3610     llvm_unreachable("unknown real field type");
3611   }
3612 
3613   SMLoc Loc = getTok().getLoc();
3614 
3615   SmallVector<APInt, 1> AsIntValues;
3616   if (parseOptionalToken(AsmToken::LCurly)) {
3617     if (Field.LengthOf == 1)
3618       return Error(Loc, "Cannot initialize scalar field with array value");
3619     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
3620         parseToken(AsmToken::RCurly))
3621       return true;
3622   } else if (parseOptionalAngleBracketOpen()) {
3623     if (Field.LengthOf == 1)
3624       return Error(Loc, "Cannot initialize scalar field with array value");
3625     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
3626         parseAngleBracketClose())
3627       return true;
3628   } else if (Field.LengthOf > 1) {
3629     return Error(Loc, "Cannot initialize array field with scalar value");
3630   } else {
3631     AsIntValues.emplace_back();
3632     if (parseRealValue(*Semantics, AsIntValues.back()))
3633       return true;
3634   }
3635 
3636   if (AsIntValues.size() > Field.LengthOf) {
3637     return Error(Loc, "Initializer too long for field; expected at most " +
3638                           std::to_string(Field.LengthOf) + " elements, got " +
3639                           std::to_string(AsIntValues.size()));
3640   }
3641   // Default-initialize all remaining values.
3642   AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
3643                      Contents.AsIntValues.end());
3644 
3645   Initializer = FieldInitializer(std::move(AsIntValues));
3646   return false;
3647 }
3648 
parseFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,FieldInitializer & Initializer)3649 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3650                                        const StructFieldInfo &Contents,
3651                                        FieldInitializer &Initializer) {
3652   SMLoc Loc = getTok().getLoc();
3653 
3654   std::vector<StructInitializer> Initializers;
3655   if (Field.LengthOf > 1) {
3656     if (parseOptionalToken(AsmToken::LCurly)) {
3657       if (parseStructInstList(Contents.Structure, Initializers,
3658                               AsmToken::RCurly) ||
3659           parseToken(AsmToken::RCurly))
3660         return true;
3661     } else if (parseOptionalAngleBracketOpen()) {
3662       if (parseStructInstList(Contents.Structure, Initializers,
3663                               AsmToken::Greater) ||
3664           parseAngleBracketClose())
3665         return true;
3666     } else {
3667       return Error(Loc, "Cannot initialize array field with scalar value");
3668     }
3669   } else {
3670     Initializers.emplace_back();
3671     if (parseStructInitializer(Contents.Structure, Initializers.back()))
3672       return true;
3673   }
3674 
3675   if (Initializers.size() > Field.LengthOf) {
3676     return Error(Loc, "Initializer too long for field; expected at most " +
3677                           std::to_string(Field.LengthOf) + " elements, got " +
3678                           std::to_string(Initializers.size()));
3679   }
3680   // Default-initialize all remaining values.
3681   llvm::append_range(Initializers, llvm::drop_begin(Contents.Initializers,
3682                                                     Initializers.size()));
3683 
3684   Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
3685   return false;
3686 }
3687 
parseFieldInitializer(const FieldInfo & Field,FieldInitializer & Initializer)3688 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3689                                        FieldInitializer &Initializer) {
3690   switch (Field.Contents.FT) {
3691   case FT_INTEGRAL:
3692     return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
3693   case FT_REAL:
3694     return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
3695   case FT_STRUCT:
3696     return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
3697   }
3698   llvm_unreachable("Unhandled FieldType enum");
3699 }
3700 
parseStructInitializer(const StructInfo & Structure,StructInitializer & Initializer)3701 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
3702                                         StructInitializer &Initializer) {
3703   const AsmToken FirstToken = getTok();
3704 
3705   std::optional<AsmToken::TokenKind> EndToken;
3706   if (parseOptionalToken(AsmToken::LCurly)) {
3707     EndToken = AsmToken::RCurly;
3708   } else if (parseOptionalAngleBracketOpen()) {
3709     EndToken = AsmToken::Greater;
3710     AngleBracketDepth++;
3711   } else if (FirstToken.is(AsmToken::Identifier) &&
3712              FirstToken.getString() == "?") {
3713     // ? initializer; leave EndToken uninitialized to treat as empty.
3714     if (parseToken(AsmToken::Identifier))
3715       return true;
3716   } else {
3717     return Error(FirstToken.getLoc(), "Expected struct initializer");
3718   }
3719 
3720   auto &FieldInitializers = Initializer.FieldInitializers;
3721   size_t FieldIndex = 0;
3722   if (EndToken) {
3723     // Initialize all fields with given initializers.
3724     while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
3725       const FieldInfo &Field = Structure.Fields[FieldIndex++];
3726       if (parseOptionalToken(AsmToken::Comma)) {
3727         // Empty initializer; use the default and continue. (Also, allow line
3728         // continuation.)
3729         FieldInitializers.push_back(Field.Contents);
3730         parseOptionalToken(AsmToken::EndOfStatement);
3731         continue;
3732       }
3733       FieldInitializers.emplace_back(Field.Contents.FT);
3734       if (parseFieldInitializer(Field, FieldInitializers.back()))
3735         return true;
3736 
3737       // Continue if we see a comma. (Also, allow line continuation.)
3738       SMLoc CommaLoc = getTok().getLoc();
3739       if (!parseOptionalToken(AsmToken::Comma))
3740         break;
3741       if (FieldIndex == Structure.Fields.size())
3742         return Error(CommaLoc, "'" + Structure.Name +
3743                                    "' initializer initializes too many fields");
3744       parseOptionalToken(AsmToken::EndOfStatement);
3745     }
3746   }
3747   // Default-initialize all remaining fields.
3748   for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
3749     FieldInitializers.push_back(Field.Contents);
3750 
3751   if (EndToken) {
3752     if (*EndToken == AsmToken::Greater)
3753       return parseAngleBracketClose();
3754 
3755     return parseToken(*EndToken);
3756   }
3757 
3758   return false;
3759 }
3760 
parseStructInstList(const StructInfo & Structure,std::vector<StructInitializer> & Initializers,const AsmToken::TokenKind EndToken)3761 bool MasmParser::parseStructInstList(
3762     const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
3763     const AsmToken::TokenKind EndToken) {
3764   while (getTok().isNot(EndToken) ||
3765          (EndToken == AsmToken::Greater &&
3766           getTok().isNot(AsmToken::GreaterGreater))) {
3767     const AsmToken NextTok = peekTok();
3768     if (NextTok.is(AsmToken::Identifier) &&
3769         NextTok.getString().equals_insensitive("dup")) {
3770       const MCExpr *Value;
3771       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3772         return true;
3773       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3774       if (!MCE)
3775         return Error(Value->getLoc(),
3776                      "cannot repeat value a non-constant number of times");
3777       const int64_t Repetitions = MCE->getValue();
3778       if (Repetitions < 0)
3779         return Error(Value->getLoc(),
3780                      "cannot repeat value a negative number of times");
3781 
3782       std::vector<StructInitializer> DuplicatedValues;
3783       if (parseToken(AsmToken::LParen,
3784                      "parentheses required for 'dup' contents") ||
3785           parseStructInstList(Structure, DuplicatedValues) || parseRParen())
3786         return true;
3787 
3788       for (int i = 0; i < Repetitions; ++i)
3789         llvm::append_range(Initializers, DuplicatedValues);
3790     } else {
3791       Initializers.emplace_back();
3792       if (parseStructInitializer(Structure, Initializers.back()))
3793         return true;
3794     }
3795 
3796     // Continue if we see a comma. (Also, allow line continuation.)
3797     if (!parseOptionalToken(AsmToken::Comma))
3798       break;
3799     parseOptionalToken(AsmToken::EndOfStatement);
3800   }
3801 
3802   return false;
3803 }
3804 
emitFieldValue(const FieldInfo & Field,const IntFieldInfo & Contents)3805 bool MasmParser::emitFieldValue(const FieldInfo &Field,
3806                                 const IntFieldInfo &Contents) {
3807   // Default-initialize all values.
3808   for (const MCExpr *Value : Contents.Values) {
3809     if (emitIntValue(Value, Field.Type))
3810       return true;
3811   }
3812   return false;
3813 }
3814 
emitFieldValue(const FieldInfo & Field,const RealFieldInfo & Contents)3815 bool MasmParser::emitFieldValue(const FieldInfo &Field,
3816                                 const RealFieldInfo &Contents) {
3817   for (const APInt &AsInt : Contents.AsIntValues) {
3818     getStreamer().emitIntValue(AsInt.getLimitedValue(),
3819                                AsInt.getBitWidth() / 8);
3820   }
3821   return false;
3822 }
3823 
emitFieldValue(const FieldInfo & Field,const StructFieldInfo & Contents)3824 bool MasmParser::emitFieldValue(const FieldInfo &Field,
3825                                 const StructFieldInfo &Contents) {
3826   for (const auto &Initializer : Contents.Initializers) {
3827     size_t Index = 0, Offset = 0;
3828     for (const auto &SubField : Contents.Structure.Fields) {
3829       getStreamer().emitZeros(SubField.Offset - Offset);
3830       Offset = SubField.Offset + SubField.SizeOf;
3831       emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
3832     }
3833   }
3834   return false;
3835 }
3836 
emitFieldValue(const FieldInfo & Field)3837 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
3838   switch (Field.Contents.FT) {
3839   case FT_INTEGRAL:
3840     return emitFieldValue(Field, Field.Contents.IntInfo);
3841   case FT_REAL:
3842     return emitFieldValue(Field, Field.Contents.RealInfo);
3843   case FT_STRUCT:
3844     return emitFieldValue(Field, Field.Contents.StructInfo);
3845   }
3846   llvm_unreachable("Unhandled FieldType enum");
3847 }
3848 
emitFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,const IntFieldInfo & Initializer)3849 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3850                                       const IntFieldInfo &Contents,
3851                                       const IntFieldInfo &Initializer) {
3852   for (const auto &Value : Initializer.Values) {
3853     if (emitIntValue(Value, Field.Type))
3854       return true;
3855   }
3856   // Default-initialize all remaining values.
3857   for (const auto &Value :
3858            llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
3859     if (emitIntValue(Value, Field.Type))
3860       return true;
3861   }
3862   return false;
3863 }
3864 
emitFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,const RealFieldInfo & Initializer)3865 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3866                                       const RealFieldInfo &Contents,
3867                                       const RealFieldInfo &Initializer) {
3868   for (const auto &AsInt : Initializer.AsIntValues) {
3869     getStreamer().emitIntValue(AsInt.getLimitedValue(),
3870                                AsInt.getBitWidth() / 8);
3871   }
3872   // Default-initialize all remaining values.
3873   for (const auto &AsInt :
3874        llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
3875     getStreamer().emitIntValue(AsInt.getLimitedValue(),
3876                                AsInt.getBitWidth() / 8);
3877   }
3878   return false;
3879 }
3880 
emitFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,const StructFieldInfo & Initializer)3881 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3882                                       const StructFieldInfo &Contents,
3883                                       const StructFieldInfo &Initializer) {
3884   for (const auto &Init : Initializer.Initializers) {
3885     if (emitStructInitializer(Contents.Structure, Init))
3886       return true;
3887   }
3888   // Default-initialize all remaining values.
3889   for (const auto &Init : llvm::drop_begin(Contents.Initializers,
3890                                            Initializer.Initializers.size())) {
3891     if (emitStructInitializer(Contents.Structure, Init))
3892       return true;
3893   }
3894   return false;
3895 }
3896 
emitFieldInitializer(const FieldInfo & Field,const FieldInitializer & Initializer)3897 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3898                                       const FieldInitializer &Initializer) {
3899   switch (Field.Contents.FT) {
3900   case FT_INTEGRAL:
3901     return emitFieldInitializer(Field, Field.Contents.IntInfo,
3902                                 Initializer.IntInfo);
3903   case FT_REAL:
3904     return emitFieldInitializer(Field, Field.Contents.RealInfo,
3905                                 Initializer.RealInfo);
3906   case FT_STRUCT:
3907     return emitFieldInitializer(Field, Field.Contents.StructInfo,
3908                                 Initializer.StructInfo);
3909   }
3910   llvm_unreachable("Unhandled FieldType enum");
3911 }
3912 
emitStructInitializer(const StructInfo & Structure,const StructInitializer & Initializer)3913 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
3914                                        const StructInitializer &Initializer) {
3915   if (!Structure.Initializable)
3916     return Error(getLexer().getLoc(),
3917                  "cannot initialize a value of type '" + Structure.Name +
3918                      "'; 'org' was used in the type's declaration");
3919   size_t Index = 0, Offset = 0;
3920   for (const auto &Init : Initializer.FieldInitializers) {
3921     const auto &Field = Structure.Fields[Index++];
3922     getStreamer().emitZeros(Field.Offset - Offset);
3923     Offset = Field.Offset + Field.SizeOf;
3924     if (emitFieldInitializer(Field, Init))
3925       return true;
3926   }
3927   // Default-initialize all remaining fields.
3928   for (const auto &Field : llvm::drop_begin(
3929            Structure.Fields, Initializer.FieldInitializers.size())) {
3930     getStreamer().emitZeros(Field.Offset - Offset);
3931     Offset = Field.Offset + Field.SizeOf;
3932     if (emitFieldValue(Field))
3933       return true;
3934   }
3935   // Add final padding.
3936   if (Offset != Structure.Size)
3937     getStreamer().emitZeros(Structure.Size - Offset);
3938   return false;
3939 }
3940 
3941 // Set data values from initializers.
emitStructValues(const StructInfo & Structure,unsigned * Count)3942 bool MasmParser::emitStructValues(const StructInfo &Structure,
3943                                   unsigned *Count) {
3944   std::vector<StructInitializer> Initializers;
3945   if (parseStructInstList(Structure, Initializers))
3946     return true;
3947 
3948   for (const auto &Initializer : Initializers) {
3949     if (emitStructInitializer(Structure, Initializer))
3950       return true;
3951   }
3952 
3953   if (Count)
3954     *Count = Initializers.size();
3955   return false;
3956 }
3957 
3958 // Declare a field in the current struct.
addStructField(StringRef Name,const StructInfo & Structure)3959 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
3960   StructInfo &OwningStruct = StructInProgress.back();
3961   FieldInfo &Field =
3962       OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
3963   StructFieldInfo &StructInfo = Field.Contents.StructInfo;
3964 
3965   StructInfo.Structure = Structure;
3966   Field.Type = Structure.Size;
3967 
3968   if (parseStructInstList(Structure, StructInfo.Initializers))
3969     return true;
3970 
3971   Field.LengthOf = StructInfo.Initializers.size();
3972   Field.SizeOf = Field.Type * Field.LengthOf;
3973 
3974   const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3975   if (!OwningStruct.IsUnion) {
3976     OwningStruct.NextOffset = FieldEnd;
3977   }
3978   OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
3979 
3980   return false;
3981 }
3982 
3983 /// parseDirectiveStructValue
3984 ///  ::= struct-id (<struct-initializer> | {struct-initializer})
3985 ///                [, (<struct-initializer> | {struct-initializer})]*
parseDirectiveStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc)3986 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
3987                                            StringRef Directive, SMLoc DirLoc) {
3988   if (StructInProgress.empty()) {
3989     if (emitStructValues(Structure))
3990       return true;
3991   } else if (addStructField("", Structure)) {
3992     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
3993   }
3994 
3995   return false;
3996 }
3997 
3998 /// parseDirectiveNamedValue
3999 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc,StringRef Name)4000 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4001                                                 StringRef Directive,
4002                                                 SMLoc DirLoc, StringRef Name) {
4003   if (StructInProgress.empty()) {
4004     // Initialize named data value.
4005     MCSymbol *Sym = getContext().parseSymbol(Name);
4006     getStreamer().emitLabel(Sym);
4007     unsigned Count;
4008     if (emitStructValues(Structure, &Count))
4009       return true;
4010     AsmTypeInfo Type;
4011     Type.Name = Structure.Name;
4012     Type.Size = Structure.Size * Count;
4013     Type.ElementSize = Structure.Size;
4014     Type.Length = Count;
4015     KnownType[Name.lower()] = Type;
4016   } else if (addStructField(Name, Structure)) {
4017     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4018   }
4019 
4020   return false;
4021 }
4022 
4023 /// parseDirectiveStruct
4024 ///  ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4025 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4026 ///      <name> ENDS
4027 ////// dataDir = data declaration
4028 ////// offsetDir = EVEN, ORG, ALIGN
parseDirectiveStruct(StringRef Directive,DirectiveKind DirKind,StringRef Name,SMLoc NameLoc)4029 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4030                                       DirectiveKind DirKind, StringRef Name,
4031                                       SMLoc NameLoc) {
4032   // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4033   // anyway, so all field accesses must be qualified.
4034   AsmToken NextTok = getTok();
4035   int64_t AlignmentValue = 1;
4036   if (NextTok.isNot(AsmToken::Comma) &&
4037       NextTok.isNot(AsmToken::EndOfStatement) &&
4038       parseAbsoluteExpression(AlignmentValue)) {
4039     return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4040                           "' directive");
4041   }
4042   if (!isPowerOf2_64(AlignmentValue)) {
4043     return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4044                                        std::to_string(AlignmentValue));
4045   }
4046 
4047   StringRef Qualifier;
4048   SMLoc QualifierLoc;
4049   if (parseOptionalToken(AsmToken::Comma)) {
4050     QualifierLoc = getTok().getLoc();
4051     if (parseIdentifier(Qualifier))
4052       return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4053     if (!Qualifier.equals_insensitive("nonunique"))
4054       return Error(QualifierLoc, "Unrecognized qualifier for '" +
4055                                      Twine(Directive) +
4056                                      "' directive; expected none or NONUNIQUE");
4057   }
4058 
4059   if (parseEOL())
4060     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4061 
4062   StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4063   return false;
4064 }
4065 
4066 /// parseDirectiveNestedStruct
4067 ///  ::= (STRUC | STRUCT | UNION) [name]
4068 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4069 ///      ENDS
parseDirectiveNestedStruct(StringRef Directive,DirectiveKind DirKind)4070 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4071                                             DirectiveKind DirKind) {
4072   if (StructInProgress.empty())
4073     return TokError("missing name in top-level '" + Twine(Directive) +
4074                     "' directive");
4075 
4076   StringRef Name;
4077   if (getTok().is(AsmToken::Identifier)) {
4078     Name = getTok().getIdentifier();
4079     parseToken(AsmToken::Identifier);
4080   }
4081   if (parseEOL())
4082     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4083 
4084   // Reserve space to ensure Alignment doesn't get invalidated when
4085   // StructInProgress grows.
4086   StructInProgress.reserve(StructInProgress.size() + 1);
4087   StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4088                                 StructInProgress.back().Alignment);
4089   return false;
4090 }
4091 
parseDirectiveEnds(StringRef Name,SMLoc NameLoc)4092 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4093   if (StructInProgress.empty())
4094     return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4095   if (StructInProgress.size() > 1)
4096     return Error(NameLoc, "unexpected name in nested ENDS directive");
4097   if (StructInProgress.back().Name.compare_insensitive(Name))
4098     return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4099                               StructInProgress.back().Name + "'");
4100   StructInfo Structure = StructInProgress.pop_back_val();
4101   // Pad to make the structure's size divisible by the smaller of its alignment
4102   // and the size of its largest field.
4103   Structure.Size = llvm::alignTo(
4104       Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4105   Structs[Name.lower()] = Structure;
4106 
4107   if (parseEOL())
4108     return addErrorSuffix(" in ENDS directive");
4109 
4110   return false;
4111 }
4112 
parseDirectiveNestedEnds()4113 bool MasmParser::parseDirectiveNestedEnds() {
4114   if (StructInProgress.empty())
4115     return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4116   if (StructInProgress.size() == 1)
4117     return TokError("missing name in top-level ENDS directive");
4118 
4119   if (parseEOL())
4120     return addErrorSuffix(" in nested ENDS directive");
4121 
4122   StructInfo Structure = StructInProgress.pop_back_val();
4123   // Pad to make the structure's size divisible by its alignment.
4124   Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4125 
4126   StructInfo &ParentStruct = StructInProgress.back();
4127   if (Structure.Name.empty()) {
4128     // Anonymous substructures' fields are addressed as if they belong to the
4129     // parent structure - so we transfer them to the parent here.
4130     const size_t OldFields = ParentStruct.Fields.size();
4131     ParentStruct.Fields.insert(
4132         ParentStruct.Fields.end(),
4133         std::make_move_iterator(Structure.Fields.begin()),
4134         std::make_move_iterator(Structure.Fields.end()));
4135     for (const auto &FieldByName : Structure.FieldsByName) {
4136       ParentStruct.FieldsByName[FieldByName.getKey()] =
4137           FieldByName.getValue() + OldFields;
4138     }
4139 
4140     unsigned FirstFieldOffset = 0;
4141     if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4142       FirstFieldOffset = llvm::alignTo(
4143           ParentStruct.NextOffset,
4144           std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4145     }
4146 
4147     if (ParentStruct.IsUnion) {
4148       ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4149     } else {
4150       for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4151         Field.Offset += FirstFieldOffset;
4152 
4153       const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4154       if (!ParentStruct.IsUnion) {
4155         ParentStruct.NextOffset = StructureEnd;
4156       }
4157       ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4158     }
4159   } else {
4160     FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4161                                              Structure.AlignmentSize);
4162     StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4163     Field.Type = Structure.Size;
4164     Field.LengthOf = 1;
4165     Field.SizeOf = Structure.Size;
4166 
4167     const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4168     if (!ParentStruct.IsUnion) {
4169       ParentStruct.NextOffset = StructureEnd;
4170     }
4171     ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4172 
4173     StructInfo.Structure = Structure;
4174     StructInfo.Initializers.emplace_back();
4175     auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4176     for (const auto &SubField : Structure.Fields) {
4177       FieldInitializers.push_back(SubField.Contents);
4178     }
4179   }
4180 
4181   return false;
4182 }
4183 
4184 /// parseDirectiveOrg
4185 ///  ::= org expression
parseDirectiveOrg()4186 bool MasmParser::parseDirectiveOrg() {
4187   const MCExpr *Offset;
4188   SMLoc OffsetLoc = Lexer.getLoc();
4189   if (checkForValidSection() || parseExpression(Offset))
4190     return true;
4191   if (parseEOL())
4192     return addErrorSuffix(" in 'org' directive");
4193 
4194   if (StructInProgress.empty()) {
4195     // Not in a struct; change the offset for the next instruction or data
4196     if (checkForValidSection())
4197       return addErrorSuffix(" in 'org' directive");
4198 
4199     getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4200   } else {
4201     // Offset the next field of this struct
4202     StructInfo &Structure = StructInProgress.back();
4203     int64_t OffsetRes;
4204     if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4205       return Error(OffsetLoc,
4206                    "expected absolute expression in 'org' directive");
4207     if (OffsetRes < 0)
4208       return Error(
4209           OffsetLoc,
4210           "expected non-negative value in struct's 'org' directive; was " +
4211               std::to_string(OffsetRes));
4212     Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4213 
4214     // ORG-affected structures cannot be initialized
4215     Structure.Initializable = false;
4216   }
4217 
4218   return false;
4219 }
4220 
emitAlignTo(int64_t Alignment)4221 bool MasmParser::emitAlignTo(int64_t Alignment) {
4222   if (StructInProgress.empty()) {
4223     // Not in a struct; align the next instruction or data
4224     if (checkForValidSection())
4225       return true;
4226 
4227     // Check whether we should use optimal code alignment for this align
4228     // directive.
4229     const MCSection *Section = getStreamer().getCurrentSectionOnly();
4230     assert(Section && "must have section to emit alignment");
4231     if (Section->useCodeAlign()) {
4232       getStreamer().emitCodeAlignment(Align(Alignment),
4233                                       &getTargetParser().getSTI(),
4234                                       /*MaxBytesToEmit=*/0);
4235     } else {
4236       // FIXME: Target specific behavior about how the "extra" bytes are filled.
4237       getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4238                                          /*ValueSize=*/1,
4239                                          /*MaxBytesToEmit=*/0);
4240     }
4241   } else {
4242     // Align the next field of this struct
4243     StructInfo &Structure = StructInProgress.back();
4244     Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4245   }
4246 
4247   return false;
4248 }
4249 
4250 /// parseDirectiveAlign
4251 ///  ::= align expression
parseDirectiveAlign()4252 bool MasmParser::parseDirectiveAlign() {
4253   SMLoc AlignmentLoc = getLexer().getLoc();
4254   int64_t Alignment;
4255 
4256   // Ignore empty 'align' directives.
4257   if (getTok().is(AsmToken::EndOfStatement)) {
4258     return Warning(AlignmentLoc,
4259                    "align directive with no operand is ignored") &&
4260            parseEOL();
4261   }
4262   if (parseAbsoluteExpression(Alignment) || parseEOL())
4263     return addErrorSuffix(" in align directive");
4264 
4265   // Always emit an alignment here even if we throw an error.
4266   bool ReturnVal = false;
4267 
4268   // Reject alignments that aren't either a power of two or zero, for ML.exe
4269   // compatibility. Alignment of zero is silently rounded up to one.
4270   if (Alignment == 0)
4271     Alignment = 1;
4272   if (!isPowerOf2_64(Alignment))
4273     ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4274                                          std::to_string(Alignment));
4275 
4276   if (emitAlignTo(Alignment))
4277     ReturnVal |= addErrorSuffix(" in align directive");
4278 
4279   return ReturnVal;
4280 }
4281 
4282 /// parseDirectiveEven
4283 ///  ::= even
parseDirectiveEven()4284 bool MasmParser::parseDirectiveEven() {
4285   if (parseEOL() || emitAlignTo(2))
4286     return addErrorSuffix(" in even directive");
4287 
4288   return false;
4289 }
4290 
4291 /// parseDirectiveMacro
4292 /// ::= name macro [parameters]
4293 ///     ["LOCAL" identifiers]
4294 ///   parameters ::= parameter [, parameter]*
4295 ///   parameter ::= name ":" qualifier
4296 ///   qualifier ::= "req" | "vararg" | "=" macro_argument
parseDirectiveMacro(StringRef Name,SMLoc NameLoc)4297 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
4298   MCAsmMacroParameters Parameters;
4299   while (getLexer().isNot(AsmToken::EndOfStatement)) {
4300     if (!Parameters.empty() && Parameters.back().Vararg)
4301       return Error(Lexer.getLoc(),
4302                    "Vararg parameter '" + Parameters.back().Name +
4303                        "' should be last in the list of parameters");
4304 
4305     MCAsmMacroParameter Parameter;
4306     if (parseIdentifier(Parameter.Name))
4307       return TokError("expected identifier in 'macro' directive");
4308 
4309     // Emit an error if two (or more) named parameters share the same name.
4310     for (const MCAsmMacroParameter& CurrParam : Parameters)
4311       if (CurrParam.Name.equals_insensitive(Parameter.Name))
4312         return TokError("macro '" + Name + "' has multiple parameters"
4313                         " named '" + Parameter.Name + "'");
4314 
4315     if (Lexer.is(AsmToken::Colon)) {
4316       Lex();  // consume ':'
4317 
4318       if (parseOptionalToken(AsmToken::Equal)) {
4319         // Default value
4320         SMLoc ParamLoc;
4321 
4322         ParamLoc = Lexer.getLoc();
4323         if (parseMacroArgument(nullptr, Parameter.Value))
4324           return true;
4325       } else {
4326         SMLoc QualLoc;
4327         StringRef Qualifier;
4328 
4329         QualLoc = Lexer.getLoc();
4330         if (parseIdentifier(Qualifier))
4331           return Error(QualLoc, "missing parameter qualifier for "
4332                                 "'" +
4333                                     Parameter.Name + "' in macro '" + Name +
4334                                     "'");
4335 
4336         if (Qualifier.equals_insensitive("req"))
4337           Parameter.Required = true;
4338         else if (Qualifier.equals_insensitive("vararg"))
4339           Parameter.Vararg = true;
4340         else
4341           return Error(QualLoc,
4342                        Qualifier + " is not a valid parameter qualifier for '" +
4343                            Parameter.Name + "' in macro '" + Name + "'");
4344       }
4345     }
4346 
4347     Parameters.push_back(std::move(Parameter));
4348 
4349     if (getLexer().is(AsmToken::Comma))
4350       Lex();
4351   }
4352 
4353   // Eat just the end of statement.
4354   Lexer.Lex();
4355 
4356   std::vector<std::string> Locals;
4357   if (getTok().is(AsmToken::Identifier) &&
4358       getTok().getIdentifier().equals_insensitive("local")) {
4359     Lex(); // Eat the LOCAL directive.
4360 
4361     StringRef ID;
4362     while (true) {
4363       if (parseIdentifier(ID))
4364         return true;
4365       Locals.push_back(ID.lower());
4366 
4367       // If we see a comma, continue (and allow line continuation).
4368       if (!parseOptionalToken(AsmToken::Comma))
4369         break;
4370       parseOptionalToken(AsmToken::EndOfStatement);
4371     }
4372   }
4373 
4374   // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
4375   AsmToken EndToken, StartToken = getTok();
4376   unsigned MacroDepth = 0;
4377   bool IsMacroFunction = false;
4378   // Lex the macro definition.
4379   while (true) {
4380     // Ignore Lexing errors in macros.
4381     while (Lexer.is(AsmToken::Error)) {
4382       Lexer.Lex();
4383     }
4384 
4385     // Check whether we have reached the end of the file.
4386     if (getLexer().is(AsmToken::Eof))
4387       return Error(NameLoc, "no matching 'endm' in definition");
4388 
4389     // Otherwise, check whether we have reached the 'endm'... and determine if
4390     // this is a macro function.
4391     if (getLexer().is(AsmToken::Identifier)) {
4392       if (getTok().getIdentifier().equals_insensitive("endm")) {
4393         if (MacroDepth == 0) { // Outermost macro.
4394           EndToken = getTok();
4395           Lexer.Lex();
4396           if (getLexer().isNot(AsmToken::EndOfStatement))
4397             return TokError("unexpected token in '" + EndToken.getIdentifier() +
4398                             "' directive");
4399           break;
4400         } else {
4401           // Otherwise we just found the end of an inner macro.
4402           --MacroDepth;
4403         }
4404       } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
4405         if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
4406           IsMacroFunction = true;
4407         }
4408       } else if (isMacroLikeDirective()) {
4409         // We allow nested macros. Those aren't instantiated until the
4410         // outermost macro is expanded so just ignore them for now.
4411         ++MacroDepth;
4412       }
4413     }
4414 
4415     // Otherwise, scan til the end of the statement.
4416     eatToEndOfStatement();
4417   }
4418 
4419   if (getContext().lookupMacro(Name.lower())) {
4420     return Error(NameLoc, "macro '" + Name + "' is already defined");
4421   }
4422 
4423   const char *BodyStart = StartToken.getLoc().getPointer();
4424   const char *BodyEnd = EndToken.getLoc().getPointer();
4425   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
4426   MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
4427                    IsMacroFunction);
4428   DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
4429                   Macro.dump());
4430   getContext().defineMacro(Name.lower(), std::move(Macro));
4431   return false;
4432 }
4433 
4434 /// parseDirectiveExitMacro
4435 /// ::= "exitm" [textitem]
parseDirectiveExitMacro(SMLoc DirectiveLoc,StringRef Directive,std::string & Value)4436 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
4437                                          StringRef Directive,
4438                                          std::string &Value) {
4439   SMLoc EndLoc = getTok().getLoc();
4440   if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
4441     return Error(EndLoc,
4442                  "unable to parse text item in '" + Directive + "' directive");
4443   eatToEndOfStatement();
4444 
4445   if (!isInsideMacroInstantiation())
4446     return TokError("unexpected '" + Directive + "' in file, "
4447                                                  "no current macro definition");
4448 
4449   // Exit all conditionals that are active in the current macro.
4450   while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
4451     TheCondState = TheCondStack.back();
4452     TheCondStack.pop_back();
4453   }
4454 
4455   handleMacroExit();
4456   return false;
4457 }
4458 
4459 /// parseDirectiveEndMacro
4460 /// ::= endm
parseDirectiveEndMacro(StringRef Directive)4461 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
4462   if (getLexer().isNot(AsmToken::EndOfStatement))
4463     return TokError("unexpected token in '" + Directive + "' directive");
4464 
4465   // If we are inside a macro instantiation, terminate the current
4466   // instantiation.
4467   if (isInsideMacroInstantiation()) {
4468     handleMacroExit();
4469     return false;
4470   }
4471 
4472   // Otherwise, this .endmacro is a stray entry in the file; well formed
4473   // .endmacro directives are handled during the macro definition parsing.
4474   return TokError("unexpected '" + Directive + "' in file, "
4475                                                "no current macro definition");
4476 }
4477 
4478 /// parseDirectivePurgeMacro
4479 /// ::= purge identifier ( , identifier )*
parseDirectivePurgeMacro(SMLoc DirectiveLoc)4480 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
4481   StringRef Name;
4482   while (true) {
4483     SMLoc NameLoc;
4484     if (parseTokenLoc(NameLoc) ||
4485         check(parseIdentifier(Name), NameLoc,
4486               "expected identifier in 'purge' directive"))
4487       return true;
4488 
4489     DEBUG_WITH_TYPE("asm-macros", dbgs()
4490                                       << "Un-defining macro: " << Name << "\n");
4491     if (!getContext().lookupMacro(Name.lower()))
4492       return Error(NameLoc, "macro '" + Name + "' is not defined");
4493     getContext().undefineMacro(Name.lower());
4494 
4495     if (!parseOptionalToken(AsmToken::Comma))
4496       break;
4497     parseOptionalToken(AsmToken::EndOfStatement);
4498   }
4499 
4500   return false;
4501 }
4502 
parseDirectiveExtern()4503 bool MasmParser::parseDirectiveExtern() {
4504   // .extern is the default - but we still need to take any provided type info.
4505   auto parseOp = [&]() -> bool {
4506     MCSymbol *Sym;
4507     SMLoc NameLoc = getTok().getLoc();
4508     if (parseSymbol(Sym))
4509       return Error(NameLoc, "expected name");
4510     if (parseToken(AsmToken::Colon))
4511       return true;
4512 
4513     StringRef TypeName;
4514     SMLoc TypeLoc = getTok().getLoc();
4515     if (parseIdentifier(TypeName))
4516       return Error(TypeLoc, "expected type");
4517     if (!TypeName.equals_insensitive("proc")) {
4518       AsmTypeInfo Type;
4519       if (lookUpType(TypeName, Type))
4520         return Error(TypeLoc, "unrecognized type");
4521       KnownType[Sym->getName().lower()] = Type;
4522     }
4523 
4524     Sym->setExternal(true);
4525     getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
4526 
4527     return false;
4528   };
4529 
4530   if (parseMany(parseOp))
4531     return addErrorSuffix(" in directive 'extern'");
4532   return false;
4533 }
4534 
4535 /// parseDirectiveSymbolAttribute
4536 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
parseDirectiveSymbolAttribute(MCSymbolAttr Attr)4537 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
4538   auto parseOp = [&]() -> bool {
4539     SMLoc Loc = getTok().getLoc();
4540     MCSymbol *Sym;
4541     if (parseSymbol(Sym))
4542       return Error(Loc, "expected identifier");
4543 
4544     // Assembler local symbols don't make any sense here. Complain loudly.
4545     if (Sym->isTemporary())
4546       return Error(Loc, "non-local symbol required");
4547 
4548     if (!getStreamer().emitSymbolAttribute(Sym, Attr))
4549       return Error(Loc, "unable to emit symbol attribute");
4550     return false;
4551   };
4552 
4553   if (parseMany(parseOp))
4554     return addErrorSuffix(" in directive");
4555   return false;
4556 }
4557 
4558 /// parseDirectiveComm
4559 ///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
parseDirectiveComm(bool IsLocal)4560 bool MasmParser::parseDirectiveComm(bool IsLocal) {
4561   if (checkForValidSection())
4562     return true;
4563 
4564   SMLoc IDLoc = getLexer().getLoc();
4565   MCSymbol *Sym;
4566   if (parseSymbol(Sym))
4567     return TokError("expected identifier in directive");
4568 
4569   if (getLexer().isNot(AsmToken::Comma))
4570     return TokError("unexpected token in directive");
4571   Lex();
4572 
4573   int64_t Size;
4574   SMLoc SizeLoc = getLexer().getLoc();
4575   if (parseAbsoluteExpression(Size))
4576     return true;
4577 
4578   int64_t Pow2Alignment = 0;
4579   SMLoc Pow2AlignmentLoc;
4580   if (getLexer().is(AsmToken::Comma)) {
4581     Lex();
4582     Pow2AlignmentLoc = getLexer().getLoc();
4583     if (parseAbsoluteExpression(Pow2Alignment))
4584       return true;
4585 
4586     LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
4587     if (IsLocal && LCOMM == LCOMM::NoAlignment)
4588       return Error(Pow2AlignmentLoc, "alignment not supported on this target");
4589 
4590     // If this target takes alignments in bytes (not log) validate and convert.
4591     if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
4592         (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
4593       if (!isPowerOf2_64(Pow2Alignment))
4594         return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
4595       Pow2Alignment = Log2_64(Pow2Alignment);
4596     }
4597   }
4598 
4599   if (parseEOL())
4600     return true;
4601 
4602   // NOTE: a size of zero for a .comm should create a undefined symbol
4603   // but a size of .lcomm creates a bss symbol of size zero.
4604   if (Size < 0)
4605     return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
4606                           "be less than zero");
4607 
4608   // NOTE: The alignment in the directive is a power of 2 value, the assembler
4609   // may internally end up wanting an alignment in bytes.
4610   // FIXME: Diagnose overflow.
4611   if (Pow2Alignment < 0)
4612     return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
4613                                    "alignment, can't be less than zero");
4614 
4615   Sym->redefineIfPossible();
4616   if (!Sym->isUndefined())
4617     return Error(IDLoc, "invalid symbol redefinition");
4618 
4619   // Create the Symbol as a common or local common with Size and Pow2Alignment.
4620   if (IsLocal) {
4621     getStreamer().emitLocalCommonSymbol(Sym, Size,
4622                                         Align(1ULL << Pow2Alignment));
4623     return false;
4624   }
4625 
4626   getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment));
4627   return false;
4628 }
4629 
4630 /// parseDirectiveComment
4631 ///  ::= comment delimiter [[text]]
4632 ///              [[text]]
4633 ///              [[text]] delimiter [[text]]
parseDirectiveComment(SMLoc DirectiveLoc)4634 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
4635   std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
4636   size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
4637   assert(DelimiterEnd != std::string::npos);
4638   StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
4639   if (Delimiter.empty())
4640     return Error(DirectiveLoc, "no delimiter in 'comment' directive");
4641   do {
4642     if (getTok().is(AsmToken::Eof))
4643       return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
4644     Lex();  // eat end of statement
4645   } while (
4646       !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
4647   return parseEOL();
4648 }
4649 
4650 /// parseDirectiveInclude
4651 ///  ::= include <filename>
4652 ///    | include filename
parseDirectiveInclude()4653 bool MasmParser::parseDirectiveInclude() {
4654   // Allow the strings to have escaped octal character sequence.
4655   std::string Filename;
4656   SMLoc IncludeLoc = getTok().getLoc();
4657 
4658   if (parseAngleBracketString(Filename))
4659     Filename = parseStringTo(AsmToken::EndOfStatement);
4660   if (check(Filename.empty(), "missing filename in 'include' directive") ||
4661       check(getTok().isNot(AsmToken::EndOfStatement),
4662             "unexpected token in 'include' directive") ||
4663       // Attempt to switch the lexer to the included file before consuming the
4664       // end of statement to avoid losing it when we switch.
4665       check(enterIncludeFile(Filename), IncludeLoc,
4666             "Could not find include file '" + Filename + "'"))
4667     return true;
4668 
4669   return false;
4670 }
4671 
4672 /// parseDirectiveIf
4673 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
parseDirectiveIf(SMLoc DirectiveLoc,DirectiveKind DirKind)4674 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
4675   TheCondStack.push_back(TheCondState);
4676   TheCondState.TheCond = AsmCond::IfCond;
4677   if (TheCondState.Ignore) {
4678     eatToEndOfStatement();
4679   } else {
4680     int64_t ExprValue;
4681     if (parseAbsoluteExpression(ExprValue) || parseEOL())
4682       return true;
4683 
4684     switch (DirKind) {
4685     default:
4686       llvm_unreachable("unsupported directive");
4687     case DK_IF:
4688       break;
4689     case DK_IFE:
4690       ExprValue = ExprValue == 0;
4691       break;
4692     }
4693 
4694     TheCondState.CondMet = ExprValue;
4695     TheCondState.Ignore = !TheCondState.CondMet;
4696   }
4697 
4698   return false;
4699 }
4700 
4701 /// parseDirectiveIfb
4702 /// ::= .ifb textitem
parseDirectiveIfb(SMLoc DirectiveLoc,bool ExpectBlank)4703 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
4704   TheCondStack.push_back(TheCondState);
4705   TheCondState.TheCond = AsmCond::IfCond;
4706 
4707   if (TheCondState.Ignore) {
4708     eatToEndOfStatement();
4709   } else {
4710     std::string Str;
4711     if (parseTextItem(Str))
4712       return TokError("expected text item parameter for 'ifb' directive");
4713 
4714     if (parseEOL())
4715       return true;
4716 
4717     TheCondState.CondMet = ExpectBlank == Str.empty();
4718     TheCondState.Ignore = !TheCondState.CondMet;
4719   }
4720 
4721   return false;
4722 }
4723 
4724 /// parseDirectiveIfidn
4725 ///   ::= ifidn textitem, textitem
parseDirectiveIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)4726 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
4727                                      bool CaseInsensitive) {
4728   std::string String1, String2;
4729 
4730   if (parseTextItem(String1)) {
4731     if (ExpectEqual)
4732       return TokError("expected text item parameter for 'ifidn' directive");
4733     return TokError("expected text item parameter for 'ifdif' directive");
4734   }
4735 
4736   if (Lexer.isNot(AsmToken::Comma)) {
4737     if (ExpectEqual)
4738       return TokError(
4739           "expected comma after first string for 'ifidn' directive");
4740     return TokError("expected comma after first string for 'ifdif' directive");
4741   }
4742   Lex();
4743 
4744   if (parseTextItem(String2)) {
4745     if (ExpectEqual)
4746       return TokError("expected text item parameter for 'ifidn' directive");
4747     return TokError("expected text item parameter for 'ifdif' directive");
4748   }
4749 
4750   TheCondStack.push_back(TheCondState);
4751   TheCondState.TheCond = AsmCond::IfCond;
4752   if (CaseInsensitive)
4753     TheCondState.CondMet =
4754         ExpectEqual == (StringRef(String1).equals_insensitive(String2));
4755   else
4756     TheCondState.CondMet = ExpectEqual == (String1 == String2);
4757   TheCondState.Ignore = !TheCondState.CondMet;
4758 
4759   return false;
4760 }
4761 
4762 /// parseDirectiveIfdef
4763 /// ::= ifdef symbol
4764 ///   | ifdef variable
parseDirectiveIfdef(SMLoc DirectiveLoc,bool expect_defined)4765 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
4766   TheCondStack.push_back(TheCondState);
4767   TheCondState.TheCond = AsmCond::IfCond;
4768 
4769   if (TheCondState.Ignore) {
4770     eatToEndOfStatement();
4771   } else {
4772     bool is_defined = false;
4773     MCRegister Reg;
4774     SMLoc StartLoc, EndLoc;
4775     is_defined =
4776         getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
4777     if (!is_defined) {
4778       StringRef Name;
4779       if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
4780           parseEOL())
4781         return true;
4782 
4783       if (BuiltinSymbolMap.contains(Name.lower())) {
4784         is_defined = true;
4785       } else if (Variables.contains(Name.lower())) {
4786         is_defined = true;
4787       } else {
4788         MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
4789         is_defined = (Sym && !Sym->isUndefined());
4790       }
4791     }
4792 
4793     TheCondState.CondMet = (is_defined == expect_defined);
4794     TheCondState.Ignore = !TheCondState.CondMet;
4795   }
4796 
4797   return false;
4798 }
4799 
4800 /// parseDirectiveElseIf
4801 /// ::= elseif expression
parseDirectiveElseIf(SMLoc DirectiveLoc,DirectiveKind DirKind)4802 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
4803                                       DirectiveKind DirKind) {
4804   if (TheCondState.TheCond != AsmCond::IfCond &&
4805       TheCondState.TheCond != AsmCond::ElseIfCond)
4806     return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
4807                                " .if or  an .elseif");
4808   TheCondState.TheCond = AsmCond::ElseIfCond;
4809 
4810   bool LastIgnoreState = false;
4811   if (!TheCondStack.empty())
4812     LastIgnoreState = TheCondStack.back().Ignore;
4813   if (LastIgnoreState || TheCondState.CondMet) {
4814     TheCondState.Ignore = true;
4815     eatToEndOfStatement();
4816   } else {
4817     int64_t ExprValue;
4818     if (parseAbsoluteExpression(ExprValue))
4819       return true;
4820 
4821     if (parseEOL())
4822       return true;
4823 
4824     switch (DirKind) {
4825     default:
4826       llvm_unreachable("unsupported directive");
4827     case DK_ELSEIF:
4828       break;
4829     case DK_ELSEIFE:
4830       ExprValue = ExprValue == 0;
4831       break;
4832     }
4833 
4834     TheCondState.CondMet = ExprValue;
4835     TheCondState.Ignore = !TheCondState.CondMet;
4836   }
4837 
4838   return false;
4839 }
4840 
4841 /// parseDirectiveElseIfb
4842 /// ::= elseifb textitem
parseDirectiveElseIfb(SMLoc DirectiveLoc,bool ExpectBlank)4843 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
4844   if (TheCondState.TheCond != AsmCond::IfCond &&
4845       TheCondState.TheCond != AsmCond::ElseIfCond)
4846     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
4847                                " if or an elseif");
4848   TheCondState.TheCond = AsmCond::ElseIfCond;
4849 
4850   bool LastIgnoreState = false;
4851   if (!TheCondStack.empty())
4852     LastIgnoreState = TheCondStack.back().Ignore;
4853   if (LastIgnoreState || TheCondState.CondMet) {
4854     TheCondState.Ignore = true;
4855     eatToEndOfStatement();
4856   } else {
4857     std::string Str;
4858     if (parseTextItem(Str)) {
4859       if (ExpectBlank)
4860         return TokError("expected text item parameter for 'elseifb' directive");
4861       return TokError("expected text item parameter for 'elseifnb' directive");
4862     }
4863 
4864     if (parseEOL())
4865       return true;
4866 
4867     TheCondState.CondMet = ExpectBlank == Str.empty();
4868     TheCondState.Ignore = !TheCondState.CondMet;
4869   }
4870 
4871   return false;
4872 }
4873 
4874 /// parseDirectiveElseIfdef
4875 /// ::= elseifdef symbol
4876 ///   | elseifdef variable
parseDirectiveElseIfdef(SMLoc DirectiveLoc,bool expect_defined)4877 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
4878                                          bool expect_defined) {
4879   if (TheCondState.TheCond != AsmCond::IfCond &&
4880       TheCondState.TheCond != AsmCond::ElseIfCond)
4881     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
4882                                " if or an elseif");
4883   TheCondState.TheCond = AsmCond::ElseIfCond;
4884 
4885   bool LastIgnoreState = false;
4886   if (!TheCondStack.empty())
4887     LastIgnoreState = TheCondStack.back().Ignore;
4888   if (LastIgnoreState || TheCondState.CondMet) {
4889     TheCondState.Ignore = true;
4890     eatToEndOfStatement();
4891   } else {
4892     bool is_defined = false;
4893     MCRegister Reg;
4894     SMLoc StartLoc, EndLoc;
4895     is_defined =
4896         getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
4897     if (!is_defined) {
4898       StringRef Name;
4899       if (check(parseIdentifier(Name),
4900                 "expected identifier after 'elseifdef'") ||
4901           parseEOL())
4902         return true;
4903 
4904       if (BuiltinSymbolMap.contains(Name.lower())) {
4905         is_defined = true;
4906       } else if (Variables.contains(Name.lower())) {
4907         is_defined = true;
4908       } else {
4909         MCSymbol *Sym = getContext().lookupSymbol(Name);
4910         is_defined = (Sym && !Sym->isUndefined());
4911       }
4912     }
4913 
4914     TheCondState.CondMet = (is_defined == expect_defined);
4915     TheCondState.Ignore = !TheCondState.CondMet;
4916   }
4917 
4918   return false;
4919 }
4920 
4921 /// parseDirectiveElseIfidn
4922 /// ::= elseifidn textitem, textitem
parseDirectiveElseIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)4923 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
4924                                          bool CaseInsensitive) {
4925   if (TheCondState.TheCond != AsmCond::IfCond &&
4926       TheCondState.TheCond != AsmCond::ElseIfCond)
4927     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
4928                                " if or an elseif");
4929   TheCondState.TheCond = AsmCond::ElseIfCond;
4930 
4931   bool LastIgnoreState = false;
4932   if (!TheCondStack.empty())
4933     LastIgnoreState = TheCondStack.back().Ignore;
4934   if (LastIgnoreState || TheCondState.CondMet) {
4935     TheCondState.Ignore = true;
4936     eatToEndOfStatement();
4937   } else {
4938     std::string String1, String2;
4939 
4940     if (parseTextItem(String1)) {
4941       if (ExpectEqual)
4942         return TokError(
4943             "expected text item parameter for 'elseifidn' directive");
4944       return TokError("expected text item parameter for 'elseifdif' directive");
4945     }
4946 
4947     if (Lexer.isNot(AsmToken::Comma)) {
4948       if (ExpectEqual)
4949         return TokError(
4950             "expected comma after first string for 'elseifidn' directive");
4951       return TokError(
4952           "expected comma after first string for 'elseifdif' directive");
4953     }
4954     Lex();
4955 
4956     if (parseTextItem(String2)) {
4957       if (ExpectEqual)
4958         return TokError(
4959             "expected text item parameter for 'elseifidn' directive");
4960       return TokError("expected text item parameter for 'elseifdif' directive");
4961     }
4962 
4963     if (CaseInsensitive)
4964       TheCondState.CondMet =
4965           ExpectEqual == (StringRef(String1).equals_insensitive(String2));
4966     else
4967       TheCondState.CondMet = ExpectEqual == (String1 == String2);
4968     TheCondState.Ignore = !TheCondState.CondMet;
4969   }
4970 
4971   return false;
4972 }
4973 
4974 /// parseDirectiveElse
4975 /// ::= else
parseDirectiveElse(SMLoc DirectiveLoc)4976 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
4977   if (parseEOL())
4978     return true;
4979 
4980   if (TheCondState.TheCond != AsmCond::IfCond &&
4981       TheCondState.TheCond != AsmCond::ElseIfCond)
4982     return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
4983                                " or an elseif");
4984   TheCondState.TheCond = AsmCond::ElseCond;
4985   bool LastIgnoreState = false;
4986   if (!TheCondStack.empty())
4987     LastIgnoreState = TheCondStack.back().Ignore;
4988   if (LastIgnoreState || TheCondState.CondMet)
4989     TheCondState.Ignore = true;
4990   else
4991     TheCondState.Ignore = false;
4992 
4993   return false;
4994 }
4995 
4996 /// parseDirectiveEnd
4997 /// ::= end
parseDirectiveEnd(SMLoc DirectiveLoc)4998 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
4999   if (parseEOL())
5000     return true;
5001 
5002   while (Lexer.isNot(AsmToken::Eof))
5003     Lexer.Lex();
5004 
5005   return false;
5006 }
5007 
5008 /// parseDirectiveError
5009 ///   ::= .err [message]
parseDirectiveError(SMLoc DirectiveLoc)5010 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
5011   if (!TheCondStack.empty()) {
5012     if (TheCondStack.back().Ignore) {
5013       eatToEndOfStatement();
5014       return false;
5015     }
5016   }
5017 
5018   std::string Message = ".err directive invoked in source file";
5019   if (Lexer.isNot(AsmToken::EndOfStatement))
5020     Message = parseStringTo(AsmToken::EndOfStatement);
5021   Lex();
5022 
5023   return Error(DirectiveLoc, Message);
5024 }
5025 
5026 /// parseDirectiveErrorIfb
5027 ///   ::= .errb textitem[, message]
parseDirectiveErrorIfb(SMLoc DirectiveLoc,bool ExpectBlank)5028 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
5029   if (!TheCondStack.empty()) {
5030     if (TheCondStack.back().Ignore) {
5031       eatToEndOfStatement();
5032       return false;
5033     }
5034   }
5035 
5036   std::string Text;
5037   if (parseTextItem(Text))
5038     return Error(getTok().getLoc(), "missing text item in '.errb' directive");
5039 
5040   std::string Message = ".errb directive invoked in source file";
5041   if (Lexer.isNot(AsmToken::EndOfStatement)) {
5042     if (parseToken(AsmToken::Comma))
5043       return addErrorSuffix(" in '.errb' directive");
5044     Message = parseStringTo(AsmToken::EndOfStatement);
5045   }
5046   Lex();
5047 
5048   if (Text.empty() == ExpectBlank)
5049     return Error(DirectiveLoc, Message);
5050   return false;
5051 }
5052 
5053 /// parseDirectiveErrorIfdef
5054 ///   ::= .errdef name[, message]
parseDirectiveErrorIfdef(SMLoc DirectiveLoc,bool ExpectDefined)5055 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
5056                                           bool ExpectDefined) {
5057   if (!TheCondStack.empty()) {
5058     if (TheCondStack.back().Ignore) {
5059       eatToEndOfStatement();
5060       return false;
5061     }
5062   }
5063 
5064   bool IsDefined = false;
5065   MCRegister Reg;
5066   SMLoc StartLoc, EndLoc;
5067   IsDefined =
5068       getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
5069   if (!IsDefined) {
5070     StringRef Name;
5071     if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
5072       return true;
5073 
5074     if (BuiltinSymbolMap.contains(Name.lower())) {
5075       IsDefined = true;
5076     } else if (Variables.contains(Name.lower())) {
5077       IsDefined = true;
5078     } else {
5079       MCSymbol *Sym = getContext().lookupSymbol(Name);
5080       IsDefined = (Sym && !Sym->isUndefined());
5081     }
5082   }
5083 
5084   std::string Message = ".errdef directive invoked in source file";
5085   if (Lexer.isNot(AsmToken::EndOfStatement)) {
5086     if (parseToken(AsmToken::Comma))
5087       return addErrorSuffix(" in '.errdef' directive");
5088     Message = parseStringTo(AsmToken::EndOfStatement);
5089   }
5090   Lex();
5091 
5092   if (IsDefined == ExpectDefined)
5093     return Error(DirectiveLoc, Message);
5094   return false;
5095 }
5096 
5097 /// parseDirectiveErrorIfidn
5098 ///   ::= .erridn textitem, textitem[, message]
parseDirectiveErrorIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)5099 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
5100                                           bool CaseInsensitive) {
5101   if (!TheCondStack.empty()) {
5102     if (TheCondStack.back().Ignore) {
5103       eatToEndOfStatement();
5104       return false;
5105     }
5106   }
5107 
5108   std::string String1, String2;
5109 
5110   if (parseTextItem(String1)) {
5111     if (ExpectEqual)
5112       return TokError("expected string parameter for '.erridn' directive");
5113     return TokError("expected string parameter for '.errdif' directive");
5114   }
5115 
5116   if (Lexer.isNot(AsmToken::Comma)) {
5117     if (ExpectEqual)
5118       return TokError(
5119           "expected comma after first string for '.erridn' directive");
5120     return TokError(
5121         "expected comma after first string for '.errdif' directive");
5122   }
5123   Lex();
5124 
5125   if (parseTextItem(String2)) {
5126     if (ExpectEqual)
5127       return TokError("expected string parameter for '.erridn' directive");
5128     return TokError("expected string parameter for '.errdif' directive");
5129   }
5130 
5131   std::string Message;
5132   if (ExpectEqual)
5133     Message = ".erridn directive invoked in source file";
5134   else
5135     Message = ".errdif directive invoked in source file";
5136   if (Lexer.isNot(AsmToken::EndOfStatement)) {
5137     if (parseToken(AsmToken::Comma))
5138       return addErrorSuffix(" in '.erridn' directive");
5139     Message = parseStringTo(AsmToken::EndOfStatement);
5140   }
5141   Lex();
5142 
5143   if (CaseInsensitive)
5144     TheCondState.CondMet =
5145         ExpectEqual == (StringRef(String1).equals_insensitive(String2));
5146   else
5147     TheCondState.CondMet = ExpectEqual == (String1 == String2);
5148   TheCondState.Ignore = !TheCondState.CondMet;
5149 
5150   if ((CaseInsensitive &&
5151        ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
5152       (ExpectEqual == (String1 == String2)))
5153     return Error(DirectiveLoc, Message);
5154   return false;
5155 }
5156 
5157 /// parseDirectiveErrorIfe
5158 ///   ::= .erre expression[, message]
parseDirectiveErrorIfe(SMLoc DirectiveLoc,bool ExpectZero)5159 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
5160   if (!TheCondStack.empty()) {
5161     if (TheCondStack.back().Ignore) {
5162       eatToEndOfStatement();
5163       return false;
5164     }
5165   }
5166 
5167   int64_t ExprValue;
5168   if (parseAbsoluteExpression(ExprValue))
5169     return addErrorSuffix(" in '.erre' directive");
5170 
5171   std::string Message = ".erre directive invoked in source file";
5172   if (Lexer.isNot(AsmToken::EndOfStatement)) {
5173     if (parseToken(AsmToken::Comma))
5174       return addErrorSuffix(" in '.erre' directive");
5175     Message = parseStringTo(AsmToken::EndOfStatement);
5176   }
5177   Lex();
5178 
5179   if ((ExprValue == 0) == ExpectZero)
5180     return Error(DirectiveLoc, Message);
5181   return false;
5182 }
5183 
5184 /// parseDirectiveEndIf
5185 /// ::= .endif
parseDirectiveEndIf(SMLoc DirectiveLoc)5186 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
5187   if (parseEOL())
5188     return true;
5189 
5190   if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
5191     return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
5192                                "an .if or .else");
5193   if (!TheCondStack.empty()) {
5194     TheCondState = TheCondStack.back();
5195     TheCondStack.pop_back();
5196   }
5197 
5198   return false;
5199 }
5200 
initializeDirectiveKindMap()5201 void MasmParser::initializeDirectiveKindMap() {
5202   DirectiveKindMap["="] = DK_ASSIGN;
5203   DirectiveKindMap["equ"] = DK_EQU;
5204   DirectiveKindMap["textequ"] = DK_TEXTEQU;
5205   // DirectiveKindMap[".ascii"] = DK_ASCII;
5206   // DirectiveKindMap[".asciz"] = DK_ASCIZ;
5207   // DirectiveKindMap[".string"] = DK_STRING;
5208   DirectiveKindMap["byte"] = DK_BYTE;
5209   DirectiveKindMap["sbyte"] = DK_SBYTE;
5210   DirectiveKindMap["word"] = DK_WORD;
5211   DirectiveKindMap["sword"] = DK_SWORD;
5212   DirectiveKindMap["dword"] = DK_DWORD;
5213   DirectiveKindMap["sdword"] = DK_SDWORD;
5214   DirectiveKindMap["fword"] = DK_FWORD;
5215   DirectiveKindMap["qword"] = DK_QWORD;
5216   DirectiveKindMap["sqword"] = DK_SQWORD;
5217   DirectiveKindMap["real4"] = DK_REAL4;
5218   DirectiveKindMap["real8"] = DK_REAL8;
5219   DirectiveKindMap["real10"] = DK_REAL10;
5220   DirectiveKindMap["align"] = DK_ALIGN;
5221   DirectiveKindMap["even"] = DK_EVEN;
5222   DirectiveKindMap["org"] = DK_ORG;
5223   DirectiveKindMap["extern"] = DK_EXTERN;
5224   DirectiveKindMap["extrn"] = DK_EXTERN;
5225   DirectiveKindMap["public"] = DK_PUBLIC;
5226   // DirectiveKindMap[".comm"] = DK_COMM;
5227   DirectiveKindMap["comment"] = DK_COMMENT;
5228   DirectiveKindMap["include"] = DK_INCLUDE;
5229   DirectiveKindMap["repeat"] = DK_REPEAT;
5230   DirectiveKindMap["rept"] = DK_REPEAT;
5231   DirectiveKindMap["while"] = DK_WHILE;
5232   DirectiveKindMap["for"] = DK_FOR;
5233   DirectiveKindMap["irp"] = DK_FOR;
5234   DirectiveKindMap["forc"] = DK_FORC;
5235   DirectiveKindMap["irpc"] = DK_FORC;
5236   DirectiveKindMap["if"] = DK_IF;
5237   DirectiveKindMap["ife"] = DK_IFE;
5238   DirectiveKindMap["ifb"] = DK_IFB;
5239   DirectiveKindMap["ifnb"] = DK_IFNB;
5240   DirectiveKindMap["ifdef"] = DK_IFDEF;
5241   DirectiveKindMap["ifndef"] = DK_IFNDEF;
5242   DirectiveKindMap["ifdif"] = DK_IFDIF;
5243   DirectiveKindMap["ifdifi"] = DK_IFDIFI;
5244   DirectiveKindMap["ifidn"] = DK_IFIDN;
5245   DirectiveKindMap["ifidni"] = DK_IFIDNI;
5246   DirectiveKindMap["elseif"] = DK_ELSEIF;
5247   DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
5248   DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
5249   DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
5250   DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
5251   DirectiveKindMap["else"] = DK_ELSE;
5252   DirectiveKindMap["end"] = DK_END;
5253   DirectiveKindMap["endif"] = DK_ENDIF;
5254   // DirectiveKindMap[".file"] = DK_FILE;
5255   // DirectiveKindMap[".line"] = DK_LINE;
5256   // DirectiveKindMap[".loc"] = DK_LOC;
5257   // DirectiveKindMap[".stabs"] = DK_STABS;
5258   // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
5259   // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
5260   // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
5261   // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
5262   // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
5263   // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
5264   // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
5265   // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
5266   // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
5267   // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
5268   // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
5269   // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
5270   // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
5271   // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
5272   // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
5273   // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
5274   // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
5275   // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
5276   // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
5277   // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
5278   // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
5279   // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
5280   // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
5281   // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
5282   // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
5283   // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
5284   // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
5285   // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
5286   // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
5287   // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
5288   // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
5289   // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
5290   // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
5291   // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
5292   // DirectiveKindMap[".cfi_val_offset"] = DK_CFI_VAL_OFFSET;
5293   DirectiveKindMap["macro"] = DK_MACRO;
5294   DirectiveKindMap["exitm"] = DK_EXITM;
5295   DirectiveKindMap["endm"] = DK_ENDM;
5296   DirectiveKindMap["purge"] = DK_PURGE;
5297   DirectiveKindMap[".err"] = DK_ERR;
5298   DirectiveKindMap[".errb"] = DK_ERRB;
5299   DirectiveKindMap[".errnb"] = DK_ERRNB;
5300   DirectiveKindMap[".errdef"] = DK_ERRDEF;
5301   DirectiveKindMap[".errndef"] = DK_ERRNDEF;
5302   DirectiveKindMap[".errdif"] = DK_ERRDIF;
5303   DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
5304   DirectiveKindMap[".erridn"] = DK_ERRIDN;
5305   DirectiveKindMap[".erridni"] = DK_ERRIDNI;
5306   DirectiveKindMap[".erre"] = DK_ERRE;
5307   DirectiveKindMap[".errnz"] = DK_ERRNZ;
5308   DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
5309   DirectiveKindMap[".pushreg"] = DK_PUSHREG;
5310   DirectiveKindMap[".savereg"] = DK_SAVEREG;
5311   DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
5312   DirectiveKindMap[".setframe"] = DK_SETFRAME;
5313   DirectiveKindMap[".radix"] = DK_RADIX;
5314   DirectiveKindMap["db"] = DK_DB;
5315   DirectiveKindMap["dd"] = DK_DD;
5316   DirectiveKindMap["df"] = DK_DF;
5317   DirectiveKindMap["dq"] = DK_DQ;
5318   DirectiveKindMap["dw"] = DK_DW;
5319   DirectiveKindMap["echo"] = DK_ECHO;
5320   DirectiveKindMap["struc"] = DK_STRUCT;
5321   DirectiveKindMap["struct"] = DK_STRUCT;
5322   DirectiveKindMap["union"] = DK_UNION;
5323   DirectiveKindMap["ends"] = DK_ENDS;
5324 }
5325 
isMacroLikeDirective()5326 bool MasmParser::isMacroLikeDirective() {
5327   if (getLexer().is(AsmToken::Identifier)) {
5328     bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
5329                            .CasesLower("repeat", "rept", true)
5330                            .CaseLower("while", true)
5331                            .CasesLower("for", "irp", true)
5332                            .CasesLower("forc", "irpc", true)
5333                            .Default(false);
5334     if (IsMacroLike)
5335       return true;
5336   }
5337   if (peekTok().is(AsmToken::Identifier) &&
5338       peekTok().getIdentifier().equals_insensitive("macro"))
5339     return true;
5340 
5341   return false;
5342 }
5343 
parseMacroLikeBody(SMLoc DirectiveLoc)5344 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
5345   AsmToken EndToken, StartToken = getTok();
5346 
5347   unsigned NestLevel = 0;
5348   while (true) {
5349     // Check whether we have reached the end of the file.
5350     if (getLexer().is(AsmToken::Eof)) {
5351       printError(DirectiveLoc, "no matching 'endm' in definition");
5352       return nullptr;
5353     }
5354 
5355     if (isMacroLikeDirective())
5356       ++NestLevel;
5357 
5358     // Otherwise, check whether we have reached the endm.
5359     if (Lexer.is(AsmToken::Identifier) &&
5360         getTok().getIdentifier().equals_insensitive("endm")) {
5361       if (NestLevel == 0) {
5362         EndToken = getTok();
5363         Lex();
5364         if (Lexer.isNot(AsmToken::EndOfStatement)) {
5365           printError(getTok().getLoc(), "unexpected token in 'endm' directive");
5366           return nullptr;
5367         }
5368         break;
5369       }
5370       --NestLevel;
5371     }
5372 
5373     // Otherwise, scan till the end of the statement.
5374     eatToEndOfStatement();
5375   }
5376 
5377   const char *BodyStart = StartToken.getLoc().getPointer();
5378   const char *BodyEnd = EndToken.getLoc().getPointer();
5379   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5380 
5381   // We Are Anonymous.
5382   MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
5383   return &MacroLikeBodies.back();
5384 }
5385 
expandStatement(SMLoc Loc)5386 bool MasmParser::expandStatement(SMLoc Loc) {
5387   std::string Body = parseStringTo(AsmToken::EndOfStatement);
5388   SMLoc EndLoc = getTok().getLoc();
5389 
5390   MCAsmMacroParameters Parameters;
5391   MCAsmMacroArguments Arguments;
5392 
5393   StringMap<std::string> BuiltinValues;
5394   for (const auto &S : BuiltinSymbolMap) {
5395     const BuiltinSymbol &Sym = S.getValue();
5396     if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
5397       BuiltinValues[S.getKey().lower()] = std::move(*Text);
5398     }
5399   }
5400   for (const auto &B : BuiltinValues) {
5401     MCAsmMacroParameter P;
5402     MCAsmMacroArgument A;
5403     P.Name = B.getKey();
5404     P.Required = true;
5405     A.push_back(AsmToken(AsmToken::String, B.getValue()));
5406 
5407     Parameters.push_back(std::move(P));
5408     Arguments.push_back(std::move(A));
5409   }
5410 
5411   for (const auto &V : Variables) {
5412     const Variable &Var = V.getValue();
5413     if (Var.IsText) {
5414       MCAsmMacroParameter P;
5415       MCAsmMacroArgument A;
5416       P.Name = Var.Name;
5417       P.Required = true;
5418       A.push_back(AsmToken(AsmToken::String, Var.TextValue));
5419 
5420       Parameters.push_back(std::move(P));
5421       Arguments.push_back(std::move(A));
5422     }
5423   }
5424   MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
5425   MCAsmMacro M = MacroLikeBodies.back();
5426 
5427   // Expand the statement in a new buffer.
5428   SmallString<80> Buf;
5429   raw_svector_ostream OS(Buf);
5430   if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
5431     return true;
5432   std::unique_ptr<MemoryBuffer> Expansion =
5433       MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
5434 
5435   // Jump to the expanded statement and prime the lexer.
5436   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
5437   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
5438   EndStatementAtEOFStack.push_back(false);
5439   Lex();
5440   return false;
5441 }
5442 
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,raw_svector_ostream & OS)5443 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
5444                                           raw_svector_ostream &OS) {
5445   instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
5446 }
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,SMLoc ExitLoc,raw_svector_ostream & OS)5447 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
5448                                           SMLoc ExitLoc,
5449                                           raw_svector_ostream &OS) {
5450   OS << "endm\n";
5451 
5452   std::unique_ptr<MemoryBuffer> Instantiation =
5453       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
5454 
5455   // Create the macro instantiation object and add to the current macro
5456   // instantiation stack.
5457   MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
5458                                                   ExitLoc, TheCondStack.size()};
5459   ActiveMacros.push_back(MI);
5460 
5461   // Jump to the macro instantiation and prime the lexer.
5462   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
5463   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
5464   EndStatementAtEOFStack.push_back(true);
5465   Lex();
5466 }
5467 
5468 /// parseDirectiveRepeat
5469 ///   ::= ("repeat" | "rept") count
5470 ///       body
5471 ///     endm
parseDirectiveRepeat(SMLoc DirectiveLoc,StringRef Dir)5472 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
5473   const MCExpr *CountExpr;
5474   SMLoc CountLoc = getTok().getLoc();
5475   if (parseExpression(CountExpr))
5476     return true;
5477 
5478   int64_t Count;
5479   if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
5480     return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
5481   }
5482 
5483   if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
5484     return true;
5485 
5486   // Lex the repeat definition.
5487   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5488   if (!M)
5489     return true;
5490 
5491   // Macro instantiation is lexical, unfortunately. We construct a new buffer
5492   // to hold the macro body with substitutions.
5493   SmallString<256> Buf;
5494   raw_svector_ostream OS(Buf);
5495   while (Count--) {
5496     if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc()))
5497       return true;
5498   }
5499   instantiateMacroLikeBody(M, DirectiveLoc, OS);
5500 
5501   return false;
5502 }
5503 
5504 /// parseDirectiveWhile
5505 /// ::= "while" expression
5506 ///       body
5507 ///     endm
parseDirectiveWhile(SMLoc DirectiveLoc)5508 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
5509   const MCExpr *CondExpr;
5510   SMLoc CondLoc = getTok().getLoc();
5511   if (parseExpression(CondExpr))
5512     return true;
5513 
5514   // Lex the repeat definition.
5515   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5516   if (!M)
5517     return true;
5518 
5519   // Macro instantiation is lexical, unfortunately. We construct a new buffer
5520   // to hold the macro body with substitutions.
5521   SmallString<256> Buf;
5522   raw_svector_ostream OS(Buf);
5523   int64_t Condition;
5524   if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
5525     return Error(CondLoc, "expected absolute expression in 'while' directive");
5526   if (Condition) {
5527     // Instantiate the macro, then resume at this directive to recheck the
5528     // condition.
5529     if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc()))
5530       return true;
5531     instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
5532   }
5533 
5534   return false;
5535 }
5536 
5537 /// parseDirectiveFor
5538 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
5539 ///       body
5540 ///     endm
parseDirectiveFor(SMLoc DirectiveLoc,StringRef Dir)5541 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
5542   MCAsmMacroParameter Parameter;
5543   MCAsmMacroArguments A;
5544   if (check(parseIdentifier(Parameter.Name),
5545             "expected identifier in '" + Dir + "' directive"))
5546     return true;
5547 
5548   // Parse optional qualifier (default value, or "req")
5549   if (parseOptionalToken(AsmToken::Colon)) {
5550     if (parseOptionalToken(AsmToken::Equal)) {
5551       // Default value
5552       SMLoc ParamLoc;
5553 
5554       ParamLoc = Lexer.getLoc();
5555       if (parseMacroArgument(nullptr, Parameter.Value))
5556         return true;
5557     } else {
5558       SMLoc QualLoc;
5559       StringRef Qualifier;
5560 
5561       QualLoc = Lexer.getLoc();
5562       if (parseIdentifier(Qualifier))
5563         return Error(QualLoc, "missing parameter qualifier for "
5564                               "'" +
5565                                   Parameter.Name + "' in '" + Dir +
5566                                   "' directive");
5567 
5568       if (Qualifier.equals_insensitive("req"))
5569         Parameter.Required = true;
5570       else
5571         return Error(QualLoc,
5572                      Qualifier + " is not a valid parameter qualifier for '" +
5573                          Parameter.Name + "' in '" + Dir + "' directive");
5574     }
5575   }
5576 
5577   if (parseToken(AsmToken::Comma,
5578                  "expected comma in '" + Dir + "' directive") ||
5579       parseToken(AsmToken::Less,
5580                  "values in '" + Dir +
5581                      "' directive must be enclosed in angle brackets"))
5582     return true;
5583 
5584   while (true) {
5585     A.emplace_back();
5586     if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
5587       return addErrorSuffix(" in arguments for '" + Dir + "' directive");
5588 
5589     // If we see a comma, continue, and allow line continuation.
5590     if (!parseOptionalToken(AsmToken::Comma))
5591       break;
5592     parseOptionalToken(AsmToken::EndOfStatement);
5593   }
5594 
5595   if (parseToken(AsmToken::Greater,
5596                  "values in '" + Dir +
5597                      "' directive must be enclosed in angle brackets") ||
5598       parseEOL())
5599     return true;
5600 
5601   // Lex the for definition.
5602   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5603   if (!M)
5604     return true;
5605 
5606   // Macro instantiation is lexical, unfortunately. We construct a new buffer
5607   // to hold the macro body with substitutions.
5608   SmallString<256> Buf;
5609   raw_svector_ostream OS(Buf);
5610 
5611   for (const MCAsmMacroArgument &Arg : A) {
5612     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
5613       return true;
5614   }
5615 
5616   instantiateMacroLikeBody(M, DirectiveLoc, OS);
5617 
5618   return false;
5619 }
5620 
5621 /// parseDirectiveForc
5622 /// ::= ("forc" | "irpc") symbol, <string>
5623 ///       body
5624 ///     endm
parseDirectiveForc(SMLoc DirectiveLoc,StringRef Directive)5625 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
5626   MCAsmMacroParameter Parameter;
5627 
5628   std::string Argument;
5629   if (check(parseIdentifier(Parameter.Name),
5630             "expected identifier in '" + Directive + "' directive") ||
5631       parseToken(AsmToken::Comma,
5632                  "expected comma in '" + Directive + "' directive"))
5633     return true;
5634   if (parseAngleBracketString(Argument)) {
5635     // Match ml64.exe; treat all characters to end of statement as a string,
5636     // ignoring comment markers, then discard anything following a space (using
5637     // the C locale).
5638     Argument = parseStringTo(AsmToken::EndOfStatement);
5639     if (getTok().is(AsmToken::EndOfStatement))
5640       Argument += getTok().getString();
5641     size_t End = 0;
5642     for (; End < Argument.size(); ++End) {
5643       if (isSpace(Argument[End]))
5644         break;
5645     }
5646     Argument.resize(End);
5647   }
5648   if (parseEOL())
5649     return true;
5650 
5651   // Lex the irpc definition.
5652   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5653   if (!M)
5654     return true;
5655 
5656   // Macro instantiation is lexical, unfortunately. We construct a new buffer
5657   // to hold the macro body with substitutions.
5658   SmallString<256> Buf;
5659   raw_svector_ostream OS(Buf);
5660 
5661   StringRef Values(Argument);
5662   for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
5663     MCAsmMacroArgument Arg;
5664     Arg.emplace_back(AsmToken::Identifier, Values.substr(I, 1));
5665 
5666     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
5667       return true;
5668   }
5669 
5670   instantiateMacroLikeBody(M, DirectiveLoc, OS);
5671 
5672   return false;
5673 }
5674 
parseDirectiveMSEmit(SMLoc IDLoc,ParseStatementInfo & Info,size_t Len)5675 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
5676                                       size_t Len) {
5677   const MCExpr *Value;
5678   SMLoc ExprLoc = getLexer().getLoc();
5679   if (parseExpression(Value))
5680     return true;
5681   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
5682   if (!MCE)
5683     return Error(ExprLoc, "unexpected expression in _emit");
5684   uint64_t IntValue = MCE->getValue();
5685   if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
5686     return Error(ExprLoc, "literal value out of range for directive");
5687 
5688   Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
5689   return false;
5690 }
5691 
parseDirectiveMSAlign(SMLoc IDLoc,ParseStatementInfo & Info)5692 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
5693   const MCExpr *Value;
5694   SMLoc ExprLoc = getLexer().getLoc();
5695   if (parseExpression(Value))
5696     return true;
5697   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
5698   if (!MCE)
5699     return Error(ExprLoc, "unexpected expression in align");
5700   uint64_t IntValue = MCE->getValue();
5701   if (!isPowerOf2_64(IntValue))
5702     return Error(ExprLoc, "literal value not a power of two greater then zero");
5703 
5704   Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
5705   return false;
5706 }
5707 
parseDirectiveRadix(SMLoc DirectiveLoc)5708 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
5709   const SMLoc Loc = getLexer().getLoc();
5710   std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
5711   StringRef RadixString = StringRef(RadixStringRaw).trim();
5712   unsigned Radix;
5713   if (RadixString.getAsInteger(10, Radix)) {
5714     return Error(Loc,
5715                  "radix must be a decimal number in the range 2 to 16; was " +
5716                      RadixString);
5717   }
5718   if (Radix < 2 || Radix > 16)
5719     return Error(Loc, "radix must be in the range 2 to 16; was " +
5720                           std::to_string(Radix));
5721   getLexer().setMasmDefaultRadix(Radix);
5722   return false;
5723 }
5724 
5725 /// parseDirectiveEcho
5726 ///   ::= "echo" message
parseDirectiveEcho(SMLoc DirectiveLoc)5727 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
5728   std::string Message = parseStringTo(AsmToken::EndOfStatement);
5729   llvm::outs() << Message;
5730   if (!StringRef(Message).ends_with("\n"))
5731     llvm::outs() << '\n';
5732   return false;
5733 }
5734 
5735 // We are comparing pointers, but the pointers are relative to a single string.
5736 // Thus, this should always be deterministic.
rewritesSort(const AsmRewrite * AsmRewriteA,const AsmRewrite * AsmRewriteB)5737 static int rewritesSort(const AsmRewrite *AsmRewriteA,
5738                         const AsmRewrite *AsmRewriteB) {
5739   if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
5740     return -1;
5741   if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
5742     return 1;
5743 
5744   // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
5745   // rewrite to the same location.  Make sure the SizeDirective rewrite is
5746   // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
5747   // ensures the sort algorithm is stable.
5748   if (AsmRewritePrecedence[AsmRewriteA->Kind] >
5749       AsmRewritePrecedence[AsmRewriteB->Kind])
5750     return -1;
5751 
5752   if (AsmRewritePrecedence[AsmRewriteA->Kind] <
5753       AsmRewritePrecedence[AsmRewriteB->Kind])
5754     return 1;
5755   llvm_unreachable("Unstable rewrite sort.");
5756 }
5757 
defineMacro(StringRef Name,StringRef Value)5758 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
5759   Variable &Var = Variables[Name.lower()];
5760   if (Var.Name.empty()) {
5761     Var.Name = Name;
5762   } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
5763     return Error(SMLoc(), "invalid variable redefinition");
5764   } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
5765              Warning(SMLoc(), "redefining '" + Name +
5766                                   "', already defined on the command line")) {
5767     return true;
5768   }
5769   Var.Redefinable = Variable::WARN_ON_REDEFINITION;
5770   Var.IsText = true;
5771   Var.TextValue = Value.str();
5772   return false;
5773 }
5774 
lookUpField(StringRef Name,AsmFieldInfo & Info) const5775 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
5776   const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
5777   const StringRef Base = BaseMember.first, Member = BaseMember.second;
5778   return lookUpField(Base, Member, Info);
5779 }
5780 
lookUpField(StringRef Base,StringRef Member,AsmFieldInfo & Info) const5781 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
5782                              AsmFieldInfo &Info) const {
5783   if (Base.empty())
5784     return true;
5785 
5786   AsmFieldInfo BaseInfo;
5787   if (Base.contains('.') && !lookUpField(Base, BaseInfo))
5788     Base = BaseInfo.Type.Name;
5789 
5790   auto StructIt = Structs.find(Base.lower());
5791   auto TypeIt = KnownType.find(Base.lower());
5792   if (TypeIt != KnownType.end()) {
5793     StructIt = Structs.find(TypeIt->second.Name.lower());
5794   }
5795   if (StructIt != Structs.end())
5796     return lookUpField(StructIt->second, Member, Info);
5797 
5798   return true;
5799 }
5800 
lookUpField(const StructInfo & Structure,StringRef Member,AsmFieldInfo & Info) const5801 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
5802                              AsmFieldInfo &Info) const {
5803   if (Member.empty()) {
5804     Info.Type.Name = Structure.Name;
5805     Info.Type.Size = Structure.Size;
5806     Info.Type.ElementSize = Structure.Size;
5807     Info.Type.Length = 1;
5808     return false;
5809   }
5810 
5811   std::pair<StringRef, StringRef> Split = Member.split('.');
5812   const StringRef FieldName = Split.first, FieldMember = Split.second;
5813 
5814   auto StructIt = Structs.find(FieldName.lower());
5815   if (StructIt != Structs.end())
5816     return lookUpField(StructIt->second, FieldMember, Info);
5817 
5818   auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
5819   if (FieldIt == Structure.FieldsByName.end())
5820     return true;
5821 
5822   const FieldInfo &Field = Structure.Fields[FieldIt->second];
5823   if (FieldMember.empty()) {
5824     Info.Offset += Field.Offset;
5825     Info.Type.Size = Field.SizeOf;
5826     Info.Type.ElementSize = Field.Type;
5827     Info.Type.Length = Field.LengthOf;
5828     if (Field.Contents.FT == FT_STRUCT)
5829       Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
5830     else
5831       Info.Type.Name = "";
5832     return false;
5833   }
5834 
5835   if (Field.Contents.FT != FT_STRUCT)
5836     return true;
5837   const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
5838 
5839   if (lookUpField(StructInfo.Structure, FieldMember, Info))
5840     return true;
5841 
5842   Info.Offset += Field.Offset;
5843   return false;
5844 }
5845 
lookUpType(StringRef Name,AsmTypeInfo & Info) const5846 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
5847   unsigned Size = StringSwitch<unsigned>(Name)
5848                       .CasesLower("byte", "db", "sbyte", 1)
5849                       .CasesLower("word", "dw", "sword", 2)
5850                       .CasesLower("dword", "dd", "sdword", 4)
5851                       .CasesLower("fword", "df", 6)
5852                       .CasesLower("qword", "dq", "sqword", 8)
5853                       .CaseLower("real4", 4)
5854                       .CaseLower("real8", 8)
5855                       .CaseLower("real10", 10)
5856                       .Default(0);
5857   if (Size) {
5858     Info.Name = Name;
5859     Info.ElementSize = Size;
5860     Info.Length = 1;
5861     Info.Size = Size;
5862     return false;
5863   }
5864 
5865   auto StructIt = Structs.find(Name.lower());
5866   if (StructIt != Structs.end()) {
5867     const StructInfo &Structure = StructIt->second;
5868     Info.Name = Name;
5869     Info.ElementSize = Structure.Size;
5870     Info.Length = 1;
5871     Info.Size = Structure.Size;
5872     return false;
5873   }
5874 
5875   return true;
5876 }
5877 
parseMSInlineAsm(std::string & AsmString,unsigned & NumOutputs,unsigned & NumInputs,SmallVectorImpl<std::pair<void *,bool>> & OpDecls,SmallVectorImpl<std::string> & Constraints,SmallVectorImpl<std::string> & Clobbers,const MCInstrInfo * MII,MCInstPrinter * IP,MCAsmParserSemaCallback & SI)5878 bool MasmParser::parseMSInlineAsm(
5879     std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
5880     SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
5881     SmallVectorImpl<std::string> &Constraints,
5882     SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
5883     MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
5884   SmallVector<void *, 4> InputDecls;
5885   SmallVector<void *, 4> OutputDecls;
5886   SmallVector<bool, 4> InputDeclsAddressOf;
5887   SmallVector<bool, 4> OutputDeclsAddressOf;
5888   SmallVector<std::string, 4> InputConstraints;
5889   SmallVector<std::string, 4> OutputConstraints;
5890   SmallVector<MCRegister, 4> ClobberRegs;
5891 
5892   SmallVector<AsmRewrite, 4> AsmStrRewrites;
5893 
5894   // Prime the lexer.
5895   Lex();
5896 
5897   // While we have input, parse each statement.
5898   unsigned InputIdx = 0;
5899   unsigned OutputIdx = 0;
5900   while (getLexer().isNot(AsmToken::Eof)) {
5901     // Parse curly braces marking block start/end.
5902     if (parseCurlyBlockScope(AsmStrRewrites))
5903       continue;
5904 
5905     ParseStatementInfo Info(&AsmStrRewrites);
5906     bool StatementErr = parseStatement(Info, &SI);
5907 
5908     if (StatementErr || Info.ParseError) {
5909       // Emit pending errors if any exist.
5910       printPendingErrors();
5911       return true;
5912     }
5913 
5914     // No pending error should exist here.
5915     assert(!hasPendingError() && "unexpected error from parseStatement");
5916 
5917     if (Info.Opcode == ~0U)
5918       continue;
5919 
5920     const MCInstrDesc &Desc = MII->get(Info.Opcode);
5921 
5922     // Build the list of clobbers, outputs and inputs.
5923     for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
5924       MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
5925 
5926       // Register operand.
5927       if (Operand.isReg() && !Operand.needAddressOf() &&
5928           !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) {
5929         unsigned NumDefs = Desc.getNumDefs();
5930         // Clobber.
5931         if (NumDefs && Operand.getMCOperandNum() < NumDefs)
5932           ClobberRegs.push_back(Operand.getReg());
5933         continue;
5934       }
5935 
5936       // Expr/Input or Output.
5937       StringRef SymName = Operand.getSymName();
5938       if (SymName.empty())
5939         continue;
5940 
5941       void *OpDecl = Operand.getOpDecl();
5942       if (!OpDecl)
5943         continue;
5944 
5945       StringRef Constraint = Operand.getConstraint();
5946       if (Operand.isImm()) {
5947         // Offset as immediate.
5948         if (Operand.isOffsetOfLocal())
5949           Constraint = "r";
5950         else
5951           Constraint = "i";
5952       }
5953 
5954       bool isOutput = (i == 1) && Desc.mayStore();
5955       SMLoc Start = SMLoc::getFromPointer(SymName.data());
5956       if (isOutput) {
5957         ++InputIdx;
5958         OutputDecls.push_back(OpDecl);
5959         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
5960         OutputConstraints.push_back(("=" + Constraint).str());
5961         AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
5962       } else {
5963         InputDecls.push_back(OpDecl);
5964         InputDeclsAddressOf.push_back(Operand.needAddressOf());
5965         InputConstraints.push_back(Constraint.str());
5966         if (Desc.operands()[i - 1].isBranchTarget())
5967           AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
5968         else
5969           AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
5970       }
5971     }
5972 
5973     // Consider implicit defs to be clobbers.  Think of cpuid and push.
5974     llvm::append_range(ClobberRegs, Desc.implicit_defs());
5975   }
5976 
5977   // Set the number of Outputs and Inputs.
5978   NumOutputs = OutputDecls.size();
5979   NumInputs = InputDecls.size();
5980 
5981   // Set the unique clobbers.
5982   array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
5983   ClobberRegs.erase(llvm::unique(ClobberRegs), ClobberRegs.end());
5984   Clobbers.assign(ClobberRegs.size(), std::string());
5985   for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
5986     raw_string_ostream OS(Clobbers[I]);
5987     IP->printRegName(OS, ClobberRegs[I]);
5988   }
5989 
5990   // Merge the various outputs and inputs.  Output are expected first.
5991   if (NumOutputs || NumInputs) {
5992     unsigned NumExprs = NumOutputs + NumInputs;
5993     OpDecls.resize(NumExprs);
5994     Constraints.resize(NumExprs);
5995     for (unsigned i = 0; i < NumOutputs; ++i) {
5996       OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
5997       Constraints[i] = OutputConstraints[i];
5998     }
5999     for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
6000       OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
6001       Constraints[j] = InputConstraints[i];
6002     }
6003   }
6004 
6005   // Build the IR assembly string.
6006   std::string AsmStringIR;
6007   raw_string_ostream OS(AsmStringIR);
6008   StringRef ASMString =
6009       SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
6010   const char *AsmStart = ASMString.begin();
6011   const char *AsmEnd = ASMString.end();
6012   array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
6013   for (auto I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
6014     const AsmRewrite &AR = *I;
6015     // Check if this has already been covered by another rewrite...
6016     if (AR.Done)
6017       continue;
6018     AsmRewriteKind Kind = AR.Kind;
6019 
6020     const char *Loc = AR.Loc.getPointer();
6021     assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
6022 
6023     // Emit everything up to the immediate/expression.
6024     if (unsigned Len = Loc - AsmStart)
6025       OS << StringRef(AsmStart, Len);
6026 
6027     // Skip the original expression.
6028     if (Kind == AOK_Skip) {
6029       AsmStart = Loc + AR.Len;
6030       continue;
6031     }
6032 
6033     unsigned AdditionalSkip = 0;
6034     // Rewrite expressions in $N notation.
6035     switch (Kind) {
6036     default:
6037       break;
6038     case AOK_IntelExpr:
6039       assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
6040       if (AR.IntelExp.NeedBracs)
6041         OS << "[";
6042       if (AR.IntelExp.hasBaseReg())
6043         OS << AR.IntelExp.BaseReg;
6044       if (AR.IntelExp.hasIndexReg())
6045         OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
6046            << AR.IntelExp.IndexReg;
6047       if (AR.IntelExp.Scale > 1)
6048         OS << " * $$" << AR.IntelExp.Scale;
6049       if (AR.IntelExp.hasOffset()) {
6050         if (AR.IntelExp.hasRegs())
6051           OS << " + ";
6052         // Fuse this rewrite with a rewrite of the offset name, if present.
6053         StringRef OffsetName = AR.IntelExp.OffsetName;
6054         SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
6055         size_t OffsetLen = OffsetName.size();
6056         auto rewrite_it = std::find_if(
6057             I, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
6058               return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
6059                      (FusingAR.Kind == AOK_Input ||
6060                       FusingAR.Kind == AOK_CallInput);
6061             });
6062         if (rewrite_it == AsmStrRewrites.end()) {
6063           OS << "offset " << OffsetName;
6064         } else if (rewrite_it->Kind == AOK_CallInput) {
6065           OS << "${" << InputIdx++ << ":P}";
6066           rewrite_it->Done = true;
6067         } else {
6068           OS << '$' << InputIdx++;
6069           rewrite_it->Done = true;
6070         }
6071       }
6072       if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
6073         OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
6074       if (AR.IntelExp.NeedBracs)
6075         OS << "]";
6076       break;
6077     case AOK_Label:
6078       OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
6079       break;
6080     case AOK_Input:
6081       OS << '$' << InputIdx++;
6082       break;
6083     case AOK_CallInput:
6084       OS << "${" << InputIdx++ << ":P}";
6085       break;
6086     case AOK_Output:
6087       OS << '$' << OutputIdx++;
6088       break;
6089     case AOK_SizeDirective:
6090       switch (AR.Val) {
6091       default: break;
6092       case 8:  OS << "byte ptr "; break;
6093       case 16: OS << "word ptr "; break;
6094       case 32: OS << "dword ptr "; break;
6095       case 64: OS << "qword ptr "; break;
6096       case 80: OS << "xword ptr "; break;
6097       case 128: OS << "xmmword ptr "; break;
6098       case 256: OS << "ymmword ptr "; break;
6099       }
6100       break;
6101     case AOK_Emit:
6102       OS << ".byte";
6103       break;
6104     case AOK_Align: {
6105       // MS alignment directives are measured in bytes. If the native assembler
6106       // measures alignment in bytes, we can pass it straight through.
6107       OS << ".align";
6108       if (getContext().getAsmInfo()->getAlignmentIsInBytes())
6109         break;
6110 
6111       // Alignment is in log2 form, so print that instead and skip the original
6112       // immediate.
6113       unsigned Val = AR.Val;
6114       OS << ' ' << Val;
6115       assert(Val < 10 && "Expected alignment less then 2^10.");
6116       AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
6117       break;
6118     }
6119     case AOK_EVEN:
6120       OS << ".even";
6121       break;
6122     case AOK_EndOfStatement:
6123       OS << "\n\t";
6124       break;
6125     }
6126 
6127     // Skip the original expression.
6128     AsmStart = Loc + AR.Len + AdditionalSkip;
6129   }
6130 
6131   // Emit the remainder of the asm string.
6132   if (AsmStart != AsmEnd)
6133     OS << StringRef(AsmStart, AsmEnd - AsmStart);
6134 
6135   AsmString = OS.str();
6136   return false;
6137 }
6138 
initializeBuiltinSymbolMaps()6139 void MasmParser::initializeBuiltinSymbolMaps() {
6140   // Numeric built-ins (supported in all versions)
6141   BuiltinSymbolMap["@version"] = BI_VERSION;
6142   BuiltinSymbolMap["@line"] = BI_LINE;
6143 
6144   // Text built-ins (supported in all versions)
6145   BuiltinSymbolMap["@date"] = BI_DATE;
6146   BuiltinSymbolMap["@time"] = BI_TIME;
6147   BuiltinSymbolMap["@filecur"] = BI_FILECUR;
6148   BuiltinSymbolMap["@filename"] = BI_FILENAME;
6149   BuiltinSymbolMap["@curseg"] = BI_CURSEG;
6150 
6151   // Function built-ins (supported in all versions)
6152   BuiltinFunctionMap["@catstr"] = BI_CATSTR;
6153 
6154   // Some built-ins exist only for MASM32 (32-bit x86)
6155   if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
6156       Triple::x86) {
6157     // Numeric built-ins
6158     // BuiltinSymbolMap["@cpu"] = BI_CPU;
6159     // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
6160     // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
6161     // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
6162     // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
6163     // BuiltinSymbolMap["@model"] = BI_MODEL;
6164 
6165     // Text built-ins
6166     // BuiltinSymbolMap["@code"] = BI_CODE;
6167     // BuiltinSymbolMap["@data"] = BI_DATA;
6168     // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
6169     // BuiltinSymbolMap["@stack"] = BI_STACK;
6170   }
6171 }
6172 
evaluateBuiltinValue(BuiltinSymbol Symbol,SMLoc StartLoc)6173 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
6174                                                SMLoc StartLoc) {
6175   switch (Symbol) {
6176   default:
6177     return nullptr;
6178   case BI_VERSION:
6179     // Match a recent version of ML.EXE.
6180     return MCConstantExpr::create(1427, getContext());
6181   case BI_LINE: {
6182     int64_t Line;
6183     if (ActiveMacros.empty())
6184       Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
6185     else
6186       Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
6187                                    ActiveMacros.front()->ExitBuffer);
6188     return MCConstantExpr::create(Line, getContext());
6189   }
6190   }
6191   llvm_unreachable("unhandled built-in symbol");
6192 }
6193 
6194 std::optional<std::string>
evaluateBuiltinTextMacro(BuiltinSymbol Symbol,SMLoc StartLoc)6195 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
6196   switch (Symbol) {
6197   default:
6198     return {};
6199   case BI_DATE: {
6200     // Current local date, formatted MM/DD/YY
6201     char TmpBuffer[sizeof("mm/dd/yy")];
6202     const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
6203     return std::string(TmpBuffer, Len);
6204   }
6205   case BI_TIME: {
6206     // Current local time, formatted HH:MM:SS (24-hour clock)
6207     char TmpBuffer[sizeof("hh:mm:ss")];
6208     const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
6209     return std::string(TmpBuffer, Len);
6210   }
6211   case BI_FILECUR:
6212     return SrcMgr
6213         .getMemoryBuffer(
6214             ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
6215         ->getBufferIdentifier()
6216         .str();
6217   case BI_FILENAME:
6218     return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
6219                                ->getBufferIdentifier())
6220         .upper();
6221   case BI_CURSEG:
6222     return getStreamer().getCurrentSectionOnly()->getName().str();
6223   }
6224   llvm_unreachable("unhandled built-in symbol");
6225 }
6226 
evaluateBuiltinMacroFunction(BuiltinFunction Function,StringRef Name,std::string & Res)6227 bool MasmParser::evaluateBuiltinMacroFunction(BuiltinFunction Function,
6228                                               StringRef Name,
6229                                               std::string &Res) {
6230   if (parseToken(AsmToken::LParen, "invoking macro function '" + Name +
6231                                        "' requires arguments in parentheses")) {
6232     return true;
6233   }
6234 
6235   MCAsmMacroParameters P;
6236   switch (Function) {
6237   default:
6238     return true;
6239   case BI_CATSTR:
6240     break;
6241   }
6242   MCAsmMacro M(Name, "", P, {}, true);
6243 
6244   MCAsmMacroArguments A;
6245   if (parseMacroArguments(&M, A, AsmToken::RParen) || parseRParen()) {
6246     return true;
6247   }
6248 
6249   switch (Function) {
6250   default:
6251     llvm_unreachable("unhandled built-in function");
6252   case BI_CATSTR: {
6253     for (const MCAsmMacroArgument &Arg : A) {
6254       for (const AsmToken &Tok : Arg) {
6255         if (Tok.is(AsmToken::String)) {
6256           Res.append(Tok.getStringContents());
6257         } else {
6258           Res.append(Tok.getString());
6259         }
6260       }
6261     }
6262     return false;
6263   }
6264   }
6265   llvm_unreachable("unhandled built-in function");
6266   return true;
6267 }
6268 
6269 /// Create an MCAsmParser instance.
createMCMasmParser(SourceMgr & SM,MCContext & C,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)6270 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
6271                                       MCStreamer &Out, const MCAsmInfo &MAI,
6272                                       struct tm TM, unsigned CB) {
6273   return new MasmParser(SM, C, Out, MAI, TM, CB);
6274 }
6275