xref: /freebsd/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/BinaryFormat/Dwarf.h"
26 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCCodeView.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCDirectives.h"
31 #include "llvm/MC/MCDwarf.h"
32 #include "llvm/MC/MCExpr.h"
33 #include "llvm/MC/MCInstPrinter.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCInstrInfo.h"
36 #include "llvm/MC/MCParser/AsmCond.h"
37 #include "llvm/MC/MCParser/AsmLexer.h"
38 #include "llvm/MC/MCParser/MCAsmLexer.h"
39 #include "llvm/MC/MCParser/MCAsmParser.h"
40 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
41 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
42 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
43 #include "llvm/MC/MCRegisterInfo.h"
44 #include "llvm/MC/MCSection.h"
45 #include "llvm/MC/MCStreamer.h"
46 #include "llvm/MC/MCSubtargetInfo.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/MC/MCTargetOptions.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/Format.h"
53 #include "llvm/Support/MD5.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/Path.h"
57 #include "llvm/Support/SMLoc.h"
58 #include "llvm/Support/SourceMgr.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <climits>
63 #include <cstddef>
64 #include <cstdint>
65 #include <ctime>
66 #include <deque>
67 #include <memory>
68 #include <optional>
69 #include <sstream>
70 #include <string>
71 #include <tuple>
72 #include <utility>
73 #include <vector>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 /// Helper types for tracking macro definitions.
80 typedef std::vector<AsmToken> MCAsmMacroArgument;
81 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
82 
83 /// Helper class for storing information about an active macro instantiation.
84 struct MacroInstantiation {
85   /// The location of the instantiation.
86   SMLoc InstantiationLoc;
87 
88   /// The buffer where parsing should resume upon instantiation completion.
89   unsigned ExitBuffer;
90 
91   /// The location where parsing should resume upon instantiation completion.
92   SMLoc ExitLoc;
93 
94   /// The depth of TheCondStack at the start of the instantiation.
95   size_t CondStackDepth;
96 };
97 
98 struct ParseStatementInfo {
99   /// The parsed operands from the last parsed statement.
100   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
101 
102   /// The opcode from the last parsed instruction.
103   unsigned Opcode = ~0U;
104 
105   /// Was there an error parsing the inline assembly?
106   bool ParseError = false;
107 
108   /// The value associated with a macro exit.
109   std::optional<std::string> ExitValue;
110 
111   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
112 
113   ParseStatementInfo() = delete;
ParseStatementInfo__anon60b61cd60111::ParseStatementInfo114   ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
115       : AsmRewrites(rewrites) {}
116 };
117 
118 enum FieldType {
119   FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
120   FT_REAL,     // Initializer: real number, stored as an APInt.
121   FT_STRUCT    // Initializer: struct initializer, stored recursively.
122 };
123 
124 struct FieldInfo;
125 struct StructInfo {
126   StringRef Name;
127   bool IsUnion = false;
128   bool Initializable = true;
129   unsigned Alignment = 0;
130   unsigned AlignmentSize = 0;
131   unsigned NextOffset = 0;
132   unsigned Size = 0;
133   std::vector<FieldInfo> Fields;
134   StringMap<size_t> FieldsByName;
135 
136   FieldInfo &addField(StringRef FieldName, FieldType FT,
137                       unsigned FieldAlignmentSize);
138 
139   StructInfo() = default;
140   StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
141 };
142 
143 // FIXME: This should probably use a class hierarchy, raw pointers between the
144 // objects, and dynamic type resolution instead of a union. On the other hand,
145 // ownership then becomes much more complicated; the obvious thing would be to
146 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
147 
148 struct StructInitializer;
149 struct IntFieldInfo {
150   SmallVector<const MCExpr *, 1> Values;
151 
152   IntFieldInfo() = default;
IntFieldInfo__anon60b61cd60111::IntFieldInfo153   IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
IntFieldInfo__anon60b61cd60111::IntFieldInfo154   IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
155 };
156 struct RealFieldInfo {
157   SmallVector<APInt, 1> AsIntValues;
158 
159   RealFieldInfo() = default;
RealFieldInfo__anon60b61cd60111::RealFieldInfo160   RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
RealFieldInfo__anon60b61cd60111::RealFieldInfo161   RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
162 };
163 struct StructFieldInfo {
164   std::vector<StructInitializer> Initializers;
165   StructInfo Structure;
166 
167   StructFieldInfo() = default;
168   StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
169 };
170 
171 class FieldInitializer {
172 public:
173   FieldType FT;
174   union {
175     IntFieldInfo IntInfo;
176     RealFieldInfo RealInfo;
177     StructFieldInfo StructInfo;
178   };
179 
180   ~FieldInitializer();
181   FieldInitializer(FieldType FT);
182 
183   FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
184   FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
185   FieldInitializer(std::vector<StructInitializer> &&Initializers,
186                    struct StructInfo Structure);
187 
188   FieldInitializer(const FieldInitializer &Initializer);
189   FieldInitializer(FieldInitializer &&Initializer);
190 
191   FieldInitializer &operator=(const FieldInitializer &Initializer);
192   FieldInitializer &operator=(FieldInitializer &&Initializer);
193 };
194 
195 struct StructInitializer {
196   std::vector<FieldInitializer> FieldInitializers;
197 };
198 
199 struct FieldInfo {
200   // Offset of the field within the containing STRUCT.
201   unsigned Offset = 0;
202 
203   // Total size of the field (= LengthOf * Type).
204   unsigned SizeOf = 0;
205 
206   // Number of elements in the field (1 if scalar, >1 if an array).
207   unsigned LengthOf = 0;
208 
209   // Size of a single entry in this field, in bytes ("type" in MASM standards).
210   unsigned Type = 0;
211 
212   FieldInitializer Contents;
213 
FieldInfo__anon60b61cd60111::FieldInfo214   FieldInfo(FieldType FT) : Contents(FT) {}
215 };
216 
StructFieldInfo(std::vector<StructInitializer> V,StructInfo S)217 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
218                                  StructInfo S) {
219   Initializers = std::move(V);
220   Structure = S;
221 }
222 
StructInfo(StringRef StructName,bool Union,unsigned AlignmentValue)223 StructInfo::StructInfo(StringRef StructName, bool Union,
224                        unsigned AlignmentValue)
225     : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
226 
addField(StringRef FieldName,FieldType FT,unsigned FieldAlignmentSize)227 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
228                                 unsigned FieldAlignmentSize) {
229   if (!FieldName.empty())
230     FieldsByName[FieldName.lower()] = Fields.size();
231   Fields.emplace_back(FT);
232   FieldInfo &Field = Fields.back();
233   Field.Offset =
234       llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
235   if (!IsUnion) {
236     NextOffset = std::max(NextOffset, Field.Offset);
237   }
238   AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
239   return Field;
240 }
241 
~FieldInitializer()242 FieldInitializer::~FieldInitializer() {
243   switch (FT) {
244   case FT_INTEGRAL:
245     IntInfo.~IntFieldInfo();
246     break;
247   case FT_REAL:
248     RealInfo.~RealFieldInfo();
249     break;
250   case FT_STRUCT:
251     StructInfo.~StructFieldInfo();
252     break;
253   }
254 }
255 
FieldInitializer(FieldType FT)256 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
257   switch (FT) {
258   case FT_INTEGRAL:
259     new (&IntInfo) IntFieldInfo();
260     break;
261   case FT_REAL:
262     new (&RealInfo) RealFieldInfo();
263     break;
264   case FT_STRUCT:
265     new (&StructInfo) StructFieldInfo();
266     break;
267   }
268 }
269 
FieldInitializer(SmallVector<const MCExpr *,1> && Values)270 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
271     : FT(FT_INTEGRAL) {
272   new (&IntInfo) IntFieldInfo(std::move(Values));
273 }
274 
FieldInitializer(SmallVector<APInt,1> && AsIntValues)275 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
276     : FT(FT_REAL) {
277   new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
278 }
279 
FieldInitializer(std::vector<StructInitializer> && Initializers,struct StructInfo Structure)280 FieldInitializer::FieldInitializer(
281     std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
282     : FT(FT_STRUCT) {
283   new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
284 }
285 
FieldInitializer(const FieldInitializer & Initializer)286 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
287     : FT(Initializer.FT) {
288   switch (FT) {
289   case FT_INTEGRAL:
290     new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
291     break;
292   case FT_REAL:
293     new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
294     break;
295   case FT_STRUCT:
296     new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
297     break;
298   }
299 }
300 
FieldInitializer(FieldInitializer && Initializer)301 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
302     : FT(Initializer.FT) {
303   switch (FT) {
304   case FT_INTEGRAL:
305     new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
306     break;
307   case FT_REAL:
308     new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
309     break;
310   case FT_STRUCT:
311     new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
312     break;
313   }
314 }
315 
316 FieldInitializer &
operator =(const FieldInitializer & Initializer)317 FieldInitializer::operator=(const FieldInitializer &Initializer) {
318   if (FT != Initializer.FT) {
319     switch (FT) {
320     case FT_INTEGRAL:
321       IntInfo.~IntFieldInfo();
322       break;
323     case FT_REAL:
324       RealInfo.~RealFieldInfo();
325       break;
326     case FT_STRUCT:
327       StructInfo.~StructFieldInfo();
328       break;
329     }
330   }
331   FT = Initializer.FT;
332   switch (FT) {
333   case FT_INTEGRAL:
334     IntInfo = Initializer.IntInfo;
335     break;
336   case FT_REAL:
337     RealInfo = Initializer.RealInfo;
338     break;
339   case FT_STRUCT:
340     StructInfo = Initializer.StructInfo;
341     break;
342   }
343   return *this;
344 }
345 
operator =(FieldInitializer && Initializer)346 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
347   if (FT != Initializer.FT) {
348     switch (FT) {
349     case FT_INTEGRAL:
350       IntInfo.~IntFieldInfo();
351       break;
352     case FT_REAL:
353       RealInfo.~RealFieldInfo();
354       break;
355     case FT_STRUCT:
356       StructInfo.~StructFieldInfo();
357       break;
358     }
359   }
360   FT = Initializer.FT;
361   switch (FT) {
362   case FT_INTEGRAL:
363     IntInfo = Initializer.IntInfo;
364     break;
365   case FT_REAL:
366     RealInfo = Initializer.RealInfo;
367     break;
368   case FT_STRUCT:
369     StructInfo = Initializer.StructInfo;
370     break;
371   }
372   return *this;
373 }
374 
375 /// The concrete assembly parser instance.
376 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
377 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
378 class MasmParser : public MCAsmParser {
379 private:
380   AsmLexer Lexer;
381   MCContext &Ctx;
382   MCStreamer &Out;
383   const MCAsmInfo &MAI;
384   SourceMgr &SrcMgr;
385   SourceMgr::DiagHandlerTy SavedDiagHandler;
386   void *SavedDiagContext;
387   std::unique_ptr<MCAsmParserExtension> PlatformParser;
388 
389   /// This is the current buffer index we're lexing from as managed by the
390   /// SourceMgr object.
391   unsigned CurBuffer;
392 
393   /// time of assembly
394   struct tm TM;
395 
396   BitVector EndStatementAtEOFStack;
397 
398   AsmCond TheCondState;
399   std::vector<AsmCond> TheCondStack;
400 
401   /// maps directive names to handler methods in parser
402   /// extensions. Extensions register themselves in this map by calling
403   /// addDirectiveHandler.
404   StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
405 
406   /// maps assembly-time variable names to variables.
407   struct Variable {
408     enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
409 
410     StringRef Name;
411     RedefinableKind Redefinable = REDEFINABLE;
412     bool IsText = false;
413     std::string TextValue;
414   };
415   StringMap<Variable> Variables;
416 
417   /// Stack of active struct definitions.
418   SmallVector<StructInfo, 1> StructInProgress;
419 
420   /// Maps struct tags to struct definitions.
421   StringMap<StructInfo> Structs;
422 
423   /// Maps data location names to types.
424   StringMap<AsmTypeInfo> KnownType;
425 
426   /// Stack of active macro instantiations.
427   std::vector<MacroInstantiation*> ActiveMacros;
428 
429   /// List of bodies of anonymous macros.
430   std::deque<MCAsmMacro> MacroLikeBodies;
431 
432   /// Keeps track of how many .macro's have been instantiated.
433   unsigned NumOfMacroInstantiations;
434 
435   /// The values from the last parsed cpp hash file line comment if any.
436   struct CppHashInfoTy {
437     StringRef Filename;
438     int64_t LineNumber;
439     SMLoc Loc;
440     unsigned Buf;
CppHashInfoTy__anon60b61cd60111::MasmParser::CppHashInfoTy441     CppHashInfoTy() : LineNumber(0), Buf(0) {}
442   };
443   CppHashInfoTy CppHashInfo;
444 
445   /// The filename from the first cpp hash file line comment, if any.
446   StringRef FirstCppHashFilename;
447 
448   /// List of forward directional labels for diagnosis at the end.
449   SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
450 
451   /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
452   /// Defaults to 1U, meaning Intel.
453   unsigned AssemblerDialect = 1U;
454 
455   /// is Darwin compatibility enabled?
456   bool IsDarwin = false;
457 
458   /// Are we parsing ms-style inline assembly?
459   bool ParsingMSInlineAsm = false;
460 
461   /// Did we already inform the user about inconsistent MD5 usage?
462   bool ReportedInconsistentMD5 = false;
463 
464   // Current <...> expression depth.
465   unsigned AngleBracketDepth = 0U;
466 
467   // Number of locals defined.
468   uint16_t LocalCounter = 0;
469 
470 public:
471   MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
472              const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
473   MasmParser(const MasmParser &) = delete;
474   MasmParser &operator=(const MasmParser &) = delete;
475   ~MasmParser() override;
476 
477   bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
478 
addDirectiveHandler(StringRef Directive,ExtensionDirectiveHandler Handler)479   void addDirectiveHandler(StringRef Directive,
480                            ExtensionDirectiveHandler Handler) override {
481     ExtensionDirectiveMap[Directive] = Handler;
482     if (!DirectiveKindMap.contains(Directive)) {
483       DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
484     }
485   }
486 
addAliasForDirective(StringRef Directive,StringRef Alias)487   void addAliasForDirective(StringRef Directive, StringRef Alias) override {
488     DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
489   }
490 
491   /// @name MCAsmParser Interface
492   /// {
493 
getSourceManager()494   SourceMgr &getSourceManager() override { return SrcMgr; }
getLexer()495   MCAsmLexer &getLexer() override { return Lexer; }
getContext()496   MCContext &getContext() override { return Ctx; }
getStreamer()497   MCStreamer &getStreamer() override { return Out; }
498 
getCVContext()499   CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
500 
getAssemblerDialect()501   unsigned getAssemblerDialect() override {
502     if (AssemblerDialect == ~0U)
503       return MAI.getAssemblerDialect();
504     else
505       return AssemblerDialect;
506   }
setAssemblerDialect(unsigned i)507   void setAssemblerDialect(unsigned i) override {
508     AssemblerDialect = i;
509   }
510 
511   void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
512   bool Warning(SMLoc L, const Twine &Msg,
513                SMRange Range = std::nullopt) override;
514   bool printError(SMLoc L, const Twine &Msg,
515                   SMRange Range = std::nullopt) override;
516 
517   enum ExpandKind { ExpandMacros, DoNotExpandMacros };
518   const AsmToken &Lex(ExpandKind ExpandNextToken);
Lex()519   const AsmToken &Lex() override { return Lex(ExpandMacros); }
520 
setParsingMSInlineAsm(bool V)521   void setParsingMSInlineAsm(bool V) override {
522     ParsingMSInlineAsm = V;
523     // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
524     // hex integer literals.
525     Lexer.setLexMasmIntegers(V);
526   }
isParsingMSInlineAsm()527   bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
528 
isParsingMasm() const529   bool isParsingMasm() const override { return true; }
530 
531   bool defineMacro(StringRef Name, StringRef Value) override;
532 
533   bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
534   bool lookUpField(StringRef Base, StringRef Member,
535                    AsmFieldInfo &Info) const override;
536 
537   bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
538 
539   bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
540                         unsigned &NumInputs,
541                         SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
542                         SmallVectorImpl<std::string> &Constraints,
543                         SmallVectorImpl<std::string> &Clobbers,
544                         const MCInstrInfo *MII, const MCInstPrinter *IP,
545                         MCAsmParserSemaCallback &SI) override;
546 
547   bool parseExpression(const MCExpr *&Res);
548   bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
549   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
550                         AsmTypeInfo *TypeInfo) override;
551   bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
552   bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
553                              SMLoc &EndLoc) override;
554   bool parseAbsoluteExpression(int64_t &Res) override;
555 
556   /// Parse a floating point expression using the float \p Semantics
557   /// and set \p Res to the value.
558   bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
559 
560   /// Parse an identifier or string (as a quoted identifier)
561   /// and set \p Res to the identifier contents.
562   enum IdentifierPositionKind { StandardPosition, StartOfStatement };
563   bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
parseIdentifier(StringRef & Res)564   bool parseIdentifier(StringRef &Res) override {
565     return parseIdentifier(Res, StandardPosition);
566   }
567   void eatToEndOfStatement() override;
568 
569   bool checkForValidSection() override;
570 
571   /// }
572 
573 private:
574   bool expandMacros();
575   const AsmToken peekTok(bool ShouldSkipSpace = true);
576 
577   bool parseStatement(ParseStatementInfo &Info,
578                       MCAsmParserSemaCallback *SI);
579   bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
580   bool parseCppHashLineFilenameComment(SMLoc L);
581 
582   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
583                    ArrayRef<MCAsmMacroParameter> Parameters,
584                    ArrayRef<MCAsmMacroArgument> A,
585                    const std::vector<std::string> &Locals, SMLoc L);
586 
587   /// Are we inside a macro instantiation?
isInsideMacroInstantiation()588   bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
589 
590   /// Handle entry to macro instantiation.
591   ///
592   /// \param M The macro.
593   /// \param NameLoc Instantiation location.
594   bool handleMacroEntry(
595       const MCAsmMacro *M, SMLoc NameLoc,
596       AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
597 
598   /// Handle invocation of macro function.
599   ///
600   /// \param M The macro.
601   /// \param NameLoc Invocation location.
602   bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
603 
604   /// Handle exit from macro instantiation.
605   void handleMacroExit();
606 
607   /// Extract AsmTokens for a macro argument.
608   bool
609   parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
610                      AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
611 
612   /// Parse all macro arguments for a given macro.
613   bool
614   parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
615                       AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
616 
617   void printMacroInstantiations();
618 
619   bool expandStatement(SMLoc Loc);
620 
printMessage(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Msg,SMRange Range=std::nullopt) const621   void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
622                     SMRange Range = std::nullopt) const {
623     ArrayRef<SMRange> Ranges(Range);
624     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
625   }
626   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
627 
628   bool lookUpField(const StructInfo &Structure, StringRef Member,
629                    AsmFieldInfo &Info) const;
630 
631   /// Should we emit DWARF describing this assembler source?  (Returns false if
632   /// the source has .file directives, which means we don't want to generate
633   /// info describing the assembler source itself.)
634   bool enabledGenDwarfForAssembly();
635 
636   /// Enter the specified file. This returns true on failure.
637   bool enterIncludeFile(const std::string &Filename);
638 
639   /// Reset the current lexer position to that given by \p Loc. The
640   /// current token is not set; clients should ensure Lex() is called
641   /// subsequently.
642   ///
643   /// \param InBuffer If not 0, should be the known buffer id that contains the
644   /// location.
645   void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
646                  bool EndStatementAtEOF = true);
647 
648   /// Parse up to a token of kind \p EndTok and return the contents from the
649   /// current token up to (but not including) this token; the current token on
650   /// exit will be either this kind or EOF. Reads through instantiated macro
651   /// functions and text macros.
652   SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
653   std::string parseStringTo(AsmToken::TokenKind EndTok);
654 
655   /// Parse up to the end of statement and return the contents from the current
656   /// token until the end of the statement; the current token on exit will be
657   /// either the EndOfStatement or EOF.
658   StringRef parseStringToEndOfStatement() override;
659 
660   bool parseTextItem(std::string &Data);
661 
662   unsigned getBinOpPrecedence(AsmToken::TokenKind K,
663                               MCBinaryExpr::Opcode &Kind);
664 
665   bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
666   bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
667   bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
668 
669   bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
670 
671   bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
672   bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
673 
674   // Generic (target and platform independent) directive parsing.
675   enum DirectiveKind {
676     DK_NO_DIRECTIVE, // Placeholder
677     DK_HANDLER_DIRECTIVE,
678     DK_ASSIGN,
679     DK_EQU,
680     DK_TEXTEQU,
681     DK_ASCII,
682     DK_ASCIZ,
683     DK_STRING,
684     DK_BYTE,
685     DK_SBYTE,
686     DK_WORD,
687     DK_SWORD,
688     DK_DWORD,
689     DK_SDWORD,
690     DK_FWORD,
691     DK_QWORD,
692     DK_SQWORD,
693     DK_DB,
694     DK_DD,
695     DK_DF,
696     DK_DQ,
697     DK_DW,
698     DK_REAL4,
699     DK_REAL8,
700     DK_REAL10,
701     DK_ALIGN,
702     DK_EVEN,
703     DK_ORG,
704     DK_ENDR,
705     DK_EXTERN,
706     DK_PUBLIC,
707     DK_COMM,
708     DK_COMMENT,
709     DK_INCLUDE,
710     DK_REPEAT,
711     DK_WHILE,
712     DK_FOR,
713     DK_FORC,
714     DK_IF,
715     DK_IFE,
716     DK_IFB,
717     DK_IFNB,
718     DK_IFDEF,
719     DK_IFNDEF,
720     DK_IFDIF,
721     DK_IFDIFI,
722     DK_IFIDN,
723     DK_IFIDNI,
724     DK_ELSEIF,
725     DK_ELSEIFE,
726     DK_ELSEIFB,
727     DK_ELSEIFNB,
728     DK_ELSEIFDEF,
729     DK_ELSEIFNDEF,
730     DK_ELSEIFDIF,
731     DK_ELSEIFDIFI,
732     DK_ELSEIFIDN,
733     DK_ELSEIFIDNI,
734     DK_ELSE,
735     DK_ENDIF,
736     DK_FILE,
737     DK_LINE,
738     DK_LOC,
739     DK_STABS,
740     DK_CV_FILE,
741     DK_CV_FUNC_ID,
742     DK_CV_INLINE_SITE_ID,
743     DK_CV_LOC,
744     DK_CV_LINETABLE,
745     DK_CV_INLINE_LINETABLE,
746     DK_CV_DEF_RANGE,
747     DK_CV_STRINGTABLE,
748     DK_CV_STRING,
749     DK_CV_FILECHECKSUMS,
750     DK_CV_FILECHECKSUM_OFFSET,
751     DK_CV_FPO_DATA,
752     DK_CFI_SECTIONS,
753     DK_CFI_STARTPROC,
754     DK_CFI_ENDPROC,
755     DK_CFI_DEF_CFA,
756     DK_CFI_DEF_CFA_OFFSET,
757     DK_CFI_ADJUST_CFA_OFFSET,
758     DK_CFI_DEF_CFA_REGISTER,
759     DK_CFI_OFFSET,
760     DK_CFI_REL_OFFSET,
761     DK_CFI_PERSONALITY,
762     DK_CFI_LSDA,
763     DK_CFI_REMEMBER_STATE,
764     DK_CFI_RESTORE_STATE,
765     DK_CFI_SAME_VALUE,
766     DK_CFI_RESTORE,
767     DK_CFI_ESCAPE,
768     DK_CFI_RETURN_COLUMN,
769     DK_CFI_SIGNAL_FRAME,
770     DK_CFI_UNDEFINED,
771     DK_CFI_REGISTER,
772     DK_CFI_WINDOW_SAVE,
773     DK_CFI_B_KEY_FRAME,
774     DK_MACRO,
775     DK_EXITM,
776     DK_ENDM,
777     DK_PURGE,
778     DK_ERR,
779     DK_ERRB,
780     DK_ERRNB,
781     DK_ERRDEF,
782     DK_ERRNDEF,
783     DK_ERRDIF,
784     DK_ERRDIFI,
785     DK_ERRIDN,
786     DK_ERRIDNI,
787     DK_ERRE,
788     DK_ERRNZ,
789     DK_ECHO,
790     DK_STRUCT,
791     DK_UNION,
792     DK_ENDS,
793     DK_END,
794     DK_PUSHFRAME,
795     DK_PUSHREG,
796     DK_SAVEREG,
797     DK_SAVEXMM128,
798     DK_SETFRAME,
799     DK_RADIX,
800   };
801 
802   /// Maps directive name --> DirectiveKind enum, for directives parsed by this
803   /// class.
804   StringMap<DirectiveKind> DirectiveKindMap;
805 
806   bool isMacroLikeDirective();
807 
808   // Codeview def_range type parsing.
809   enum CVDefRangeType {
810     CVDR_DEFRANGE = 0, // Placeholder
811     CVDR_DEFRANGE_REGISTER,
812     CVDR_DEFRANGE_FRAMEPOINTER_REL,
813     CVDR_DEFRANGE_SUBFIELD_REGISTER,
814     CVDR_DEFRANGE_REGISTER_REL
815   };
816 
817   /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
818   /// def_range types parsed by this class.
819   StringMap<CVDefRangeType> CVDefRangeTypeMap;
820 
821   // Generic (target and platform independent) directive parsing.
822   enum BuiltinSymbol {
823     BI_NO_SYMBOL, // Placeholder
824     BI_DATE,
825     BI_TIME,
826     BI_VERSION,
827     BI_FILECUR,
828     BI_FILENAME,
829     BI_LINE,
830     BI_CURSEG,
831     BI_CPU,
832     BI_INTERFACE,
833     BI_CODE,
834     BI_DATA,
835     BI_FARDATA,
836     BI_WORDSIZE,
837     BI_CODESIZE,
838     BI_DATASIZE,
839     BI_MODEL,
840     BI_STACK,
841   };
842 
843   /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
844   /// class.
845   StringMap<BuiltinSymbol> BuiltinSymbolMap;
846 
847   const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
848 
849   std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
850                                                       SMLoc StartLoc);
851 
852   // ".ascii", ".asciz", ".string"
853   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
854 
855   // "byte", "word", ...
856   bool emitIntValue(const MCExpr *Value, unsigned Size);
857   bool parseScalarInitializer(unsigned Size,
858                               SmallVectorImpl<const MCExpr *> &Values,
859                               unsigned StringPadLength = 0);
860   bool parseScalarInstList(
861       unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
862       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
863   bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
864   bool addIntegralField(StringRef Name, unsigned Size);
865   bool parseDirectiveValue(StringRef IDVal, unsigned Size);
866   bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
867                                 StringRef Name, SMLoc NameLoc);
868 
869   // "real4", "real8", "real10"
870   bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
871   bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
872   bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
873                                size_t Size);
874   bool parseRealInstList(
875       const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
876       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
877   bool parseDirectiveNamedRealValue(StringRef TypeName,
878                                     const fltSemantics &Semantics,
879                                     unsigned Size, StringRef Name,
880                                     SMLoc NameLoc);
881 
882   bool parseOptionalAngleBracketOpen();
883   bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
884 
885   bool parseFieldInitializer(const FieldInfo &Field,
886                              FieldInitializer &Initializer);
887   bool parseFieldInitializer(const FieldInfo &Field,
888                              const IntFieldInfo &Contents,
889                              FieldInitializer &Initializer);
890   bool parseFieldInitializer(const FieldInfo &Field,
891                              const RealFieldInfo &Contents,
892                              FieldInitializer &Initializer);
893   bool parseFieldInitializer(const FieldInfo &Field,
894                              const StructFieldInfo &Contents,
895                              FieldInitializer &Initializer);
896 
897   bool parseStructInitializer(const StructInfo &Structure,
898                               StructInitializer &Initializer);
899   bool parseStructInstList(
900       const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
901       const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
902 
903   bool emitFieldValue(const FieldInfo &Field);
904   bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
905   bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
906   bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
907 
908   bool emitFieldInitializer(const FieldInfo &Field,
909                             const FieldInitializer &Initializer);
910   bool emitFieldInitializer(const FieldInfo &Field,
911                             const IntFieldInfo &Contents,
912                             const IntFieldInfo &Initializer);
913   bool emitFieldInitializer(const FieldInfo &Field,
914                             const RealFieldInfo &Contents,
915                             const RealFieldInfo &Initializer);
916   bool emitFieldInitializer(const FieldInfo &Field,
917                             const StructFieldInfo &Contents,
918                             const StructFieldInfo &Initializer);
919 
920   bool emitStructInitializer(const StructInfo &Structure,
921                              const StructInitializer &Initializer);
922 
923   // User-defined types (structs, unions):
924   bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
925   bool addStructField(StringRef Name, const StructInfo &Structure);
926   bool parseDirectiveStructValue(const StructInfo &Structure,
927                                  StringRef Directive, SMLoc DirLoc);
928   bool parseDirectiveNamedStructValue(const StructInfo &Structure,
929                                       StringRef Directive, SMLoc DirLoc,
930                                       StringRef Name);
931 
932   // "=", "equ", "textequ"
933   bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
934                             DirectiveKind DirKind, SMLoc NameLoc);
935 
936   bool parseDirectiveOrg(); // "org"
937 
938   bool emitAlignTo(int64_t Alignment);
939   bool parseDirectiveAlign();  // "align"
940   bool parseDirectiveEven();   // "even"
941 
942   // ".file", ".line", ".loc", ".stabs"
943   bool parseDirectiveFile(SMLoc DirectiveLoc);
944   bool parseDirectiveLine();
945   bool parseDirectiveLoc();
946   bool parseDirectiveStabs();
947 
948   // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
949   // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
950   bool parseDirectiveCVFile();
951   bool parseDirectiveCVFuncId();
952   bool parseDirectiveCVInlineSiteId();
953   bool parseDirectiveCVLoc();
954   bool parseDirectiveCVLinetable();
955   bool parseDirectiveCVInlineLinetable();
956   bool parseDirectiveCVDefRange();
957   bool parseDirectiveCVString();
958   bool parseDirectiveCVStringTable();
959   bool parseDirectiveCVFileChecksums();
960   bool parseDirectiveCVFileChecksumOffset();
961   bool parseDirectiveCVFPOData();
962 
963   // .cfi directives
964   bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
965   bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
966   bool parseDirectiveCFISections();
967   bool parseDirectiveCFIStartProc();
968   bool parseDirectiveCFIEndProc();
969   bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
970   bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
971   bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
972   bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
973   bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
974   bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
975   bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
976   bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
977   bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
978   bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
979   bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
980   bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
981   bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
982   bool parseDirectiveCFISignalFrame();
983   bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
984 
985   // macro directives
986   bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
987   bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
988                                std::string &Value);
989   bool parseDirectiveEndMacro(StringRef Directive);
990   bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
991 
992   bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
993                             StringRef Name, SMLoc NameLoc);
994   bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
995   bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
996   bool parseDirectiveNestedEnds();
997 
998   bool parseDirectiveExtern();
999 
1000   /// Parse a directive like ".globl" which accepts a single symbol (which
1001   /// should be a label or an external).
1002   bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1003 
1004   bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1005 
1006   bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1007 
1008   bool parseDirectiveInclude(); // "include"
1009 
1010   // "if" or "ife"
1011   bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1012   // "ifb" or "ifnb", depending on ExpectBlank.
1013   bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1014   // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1015   // CaseInsensitive.
1016   bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1017                            bool CaseInsensitive);
1018   // "ifdef" or "ifndef", depending on expect_defined
1019   bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1020   // "elseif" or "elseife"
1021   bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1022   // "elseifb" or "elseifnb", depending on ExpectBlank.
1023   bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1024   // ".elseifdef" or ".elseifndef", depending on expect_defined
1025   bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1026   // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1027   // ExpectEqual and CaseInsensitive.
1028   bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1029                                bool CaseInsensitive);
1030   bool parseDirectiveElse(SMLoc DirectiveLoc);   // "else"
1031   bool parseDirectiveEndIf(SMLoc DirectiveLoc);  // "endif"
1032   bool parseEscapedString(std::string &Data) override;
1033   bool parseAngleBracketString(std::string &Data) override;
1034 
1035   // Macro-like directives
1036   MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1037   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1038                                 raw_svector_ostream &OS);
1039   void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1040                                 SMLoc ExitLoc, raw_svector_ostream &OS);
1041   bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1042   bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1043   bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1044   bool parseDirectiveWhile(SMLoc DirectiveLoc);
1045 
1046   // "_emit" or "__emit"
1047   bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1048                             size_t Len);
1049 
1050   // "align"
1051   bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1052 
1053   // "end"
1054   bool parseDirectiveEnd(SMLoc DirectiveLoc);
1055 
1056   // ".err"
1057   bool parseDirectiveError(SMLoc DirectiveLoc);
1058   // ".errb" or ".errnb", depending on ExpectBlank.
1059   bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1060   // ".errdef" or ".errndef", depending on ExpectBlank.
1061   bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1062   // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1063   // and CaseInsensitive.
1064   bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1065                                 bool CaseInsensitive);
1066   // ".erre" or ".errnz", depending on ExpectZero.
1067   bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1068 
1069   // ".radix"
1070   bool parseDirectiveRadix(SMLoc DirectiveLoc);
1071 
1072   // "echo"
1073   bool parseDirectiveEcho(SMLoc DirectiveLoc);
1074 
1075   void initializeDirectiveKindMap();
1076   void initializeCVDefRangeTypeMap();
1077   void initializeBuiltinSymbolMap();
1078 };
1079 
1080 } // end anonymous namespace
1081 
1082 namespace llvm {
1083 
1084 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
1085 
1086 extern MCAsmParserExtension *createCOFFMasmParser();
1087 
1088 } // end namespace llvm
1089 
1090 enum { DEFAULT_ADDRSPACE = 0 };
1091 
MasmParser(SourceMgr & SM,MCContext & Ctx,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)1092 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1093                        const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1094     : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1095       CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1096   HadError = false;
1097   // Save the old handler.
1098   SavedDiagHandler = SrcMgr.getDiagHandler();
1099   SavedDiagContext = SrcMgr.getDiagContext();
1100   // Set our own handler which calls the saved handler.
1101   SrcMgr.setDiagHandler(DiagHandler, this);
1102   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1103   EndStatementAtEOFStack.push_back(true);
1104 
1105   // Initialize the platform / file format parser.
1106   switch (Ctx.getObjectFileType()) {
1107   case MCContext::IsCOFF:
1108     PlatformParser.reset(createCOFFMasmParser());
1109     break;
1110   default:
1111     report_fatal_error("llvm-ml currently supports only COFF output.");
1112     break;
1113   }
1114 
1115   initializeDirectiveKindMap();
1116   PlatformParser->Initialize(*this);
1117   initializeCVDefRangeTypeMap();
1118   initializeBuiltinSymbolMap();
1119 
1120   NumOfMacroInstantiations = 0;
1121 }
1122 
~MasmParser()1123 MasmParser::~MasmParser() {
1124   assert((HadError || ActiveMacros.empty()) &&
1125          "Unexpected active macro instantiation!");
1126 
1127   // Restore the saved diagnostics handler and context for use during
1128   // finalization.
1129   SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1130 }
1131 
printMacroInstantiations()1132 void MasmParser::printMacroInstantiations() {
1133   // Print the active macro instantiation stack.
1134   for (std::vector<MacroInstantiation *>::const_reverse_iterator
1135            it = ActiveMacros.rbegin(),
1136            ie = ActiveMacros.rend();
1137        it != ie; ++it)
1138     printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1139                  "while in macro instantiation");
1140 }
1141 
Note(SMLoc L,const Twine & Msg,SMRange Range)1142 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1143   printPendingErrors();
1144   printMessage(L, SourceMgr::DK_Note, Msg, Range);
1145   printMacroInstantiations();
1146 }
1147 
Warning(SMLoc L,const Twine & Msg,SMRange Range)1148 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1149   if (getTargetParser().getTargetOptions().MCNoWarn)
1150     return false;
1151   if (getTargetParser().getTargetOptions().MCFatalWarnings)
1152     return Error(L, Msg, Range);
1153   printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1154   printMacroInstantiations();
1155   return false;
1156 }
1157 
printError(SMLoc L,const Twine & Msg,SMRange Range)1158 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1159   HadError = true;
1160   printMessage(L, SourceMgr::DK_Error, Msg, Range);
1161   printMacroInstantiations();
1162   return true;
1163 }
1164 
enterIncludeFile(const std::string & Filename)1165 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1166   std::string IncludedFile;
1167   unsigned NewBuf =
1168       SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1169   if (!NewBuf)
1170     return true;
1171 
1172   CurBuffer = NewBuf;
1173   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1174   EndStatementAtEOFStack.push_back(true);
1175   return false;
1176 }
1177 
jumpToLoc(SMLoc Loc,unsigned InBuffer,bool EndStatementAtEOF)1178 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1179                            bool EndStatementAtEOF) {
1180   CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1181   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1182                   Loc.getPointer(), EndStatementAtEOF);
1183 }
1184 
expandMacros()1185 bool MasmParser::expandMacros() {
1186   const AsmToken &Tok = getTok();
1187   const std::string IDLower = Tok.getIdentifier().lower();
1188 
1189   const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1190   if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1191     // This is a macro function invocation; expand it in place.
1192     const SMLoc MacroLoc = Tok.getLoc();
1193     const StringRef MacroId = Tok.getIdentifier();
1194     Lexer.Lex();
1195     if (handleMacroInvocation(M, MacroLoc)) {
1196       Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1197       Lexer.Lex();
1198     }
1199     return false;
1200   }
1201 
1202   std::optional<std::string> ExpandedValue;
1203   auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1204   if (BuiltinIt != BuiltinSymbolMap.end()) {
1205     ExpandedValue =
1206         evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1207   } else {
1208     auto VarIt = Variables.find(IDLower);
1209     if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1210       ExpandedValue = VarIt->getValue().TextValue;
1211     }
1212   }
1213 
1214   if (!ExpandedValue)
1215     return true;
1216   std::unique_ptr<MemoryBuffer> Instantiation =
1217       MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1218 
1219   // Jump to the macro instantiation and prime the lexer.
1220   CurBuffer =
1221       SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1222   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1223                   /*EndStatementAtEOF=*/false);
1224   EndStatementAtEOFStack.push_back(false);
1225   Lexer.Lex();
1226   return false;
1227 }
1228 
Lex(ExpandKind ExpandNextToken)1229 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1230   if (Lexer.getTok().is(AsmToken::Error))
1231     Error(Lexer.getErrLoc(), Lexer.getErr());
1232 
1233   // if it's a end of statement with a comment in it
1234   if (getTok().is(AsmToken::EndOfStatement)) {
1235     // if this is a line comment output it.
1236     if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1237         getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1238       Out.addExplicitComment(Twine(getTok().getString()));
1239   }
1240 
1241   const AsmToken *tok = &Lexer.Lex();
1242   bool StartOfStatement = Lexer.isAtStartOfStatement();
1243 
1244   while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1245     if (StartOfStatement) {
1246       AsmToken NextTok;
1247       MutableArrayRef<AsmToken> Buf(NextTok);
1248       size_t ReadCount = Lexer.peekTokens(Buf);
1249       if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1250           (NextTok.getString().equals_insensitive("equ") ||
1251            NextTok.getString().equals_insensitive("textequ"))) {
1252         // This looks like an EQU or TEXTEQU directive; don't expand the
1253         // identifier, allowing for redefinitions.
1254         break;
1255       }
1256     }
1257     if (expandMacros())
1258       break;
1259   }
1260 
1261   // Parse comments here to be deferred until end of next statement.
1262   while (tok->is(AsmToken::Comment)) {
1263     if (MAI.preserveAsmComments())
1264       Out.addExplicitComment(Twine(tok->getString()));
1265     tok = &Lexer.Lex();
1266   }
1267 
1268   // Recognize and bypass line continuations.
1269   while (tok->is(AsmToken::BackSlash) &&
1270          peekTok().is(AsmToken::EndOfStatement)) {
1271     // Eat both the backslash and the end of statement.
1272     Lexer.Lex();
1273     tok = &Lexer.Lex();
1274   }
1275 
1276   if (tok->is(AsmToken::Eof)) {
1277     // If this is the end of an included file, pop the parent file off the
1278     // include stack.
1279     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1280     if (ParentIncludeLoc != SMLoc()) {
1281       EndStatementAtEOFStack.pop_back();
1282       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1283       return Lex();
1284     }
1285     EndStatementAtEOFStack.pop_back();
1286     assert(EndStatementAtEOFStack.empty());
1287   }
1288 
1289   return *tok;
1290 }
1291 
peekTok(bool ShouldSkipSpace)1292 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1293   AsmToken Tok;
1294 
1295   MutableArrayRef<AsmToken> Buf(Tok);
1296   size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1297 
1298   if (ReadCount == 0) {
1299     // If this is the end of an included file, pop the parent file off the
1300     // include stack.
1301     SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1302     if (ParentIncludeLoc != SMLoc()) {
1303       EndStatementAtEOFStack.pop_back();
1304       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1305       return peekTok(ShouldSkipSpace);
1306     }
1307     EndStatementAtEOFStack.pop_back();
1308     assert(EndStatementAtEOFStack.empty());
1309   }
1310 
1311   assert(ReadCount == 1);
1312   return Tok;
1313 }
1314 
enabledGenDwarfForAssembly()1315 bool MasmParser::enabledGenDwarfForAssembly() {
1316   // Check whether the user specified -g.
1317   if (!getContext().getGenDwarfForAssembly())
1318     return false;
1319   // If we haven't encountered any .file directives (which would imply that
1320   // the assembler source was produced with debug info already) then emit one
1321   // describing the assembler source file itself.
1322   if (getContext().getGenDwarfFileNumber() == 0) {
1323     // Use the first #line directive for this, if any. It's preprocessed, so
1324     // there is no checksum, and of course no source directive.
1325     if (!FirstCppHashFilename.empty())
1326       getContext().setMCLineTableRootFile(
1327           /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename,
1328           /*Cksum=*/std::nullopt, /*Source=*/std::nullopt);
1329     const MCDwarfFile &RootFile =
1330         getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1331     getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1332         /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1333         RootFile.Checksum, RootFile.Source));
1334   }
1335   return true;
1336 }
1337 
Run(bool NoInitialTextSection,bool NoFinalize)1338 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1339   // Create the initial section, if requested.
1340   if (!NoInitialTextSection)
1341     Out.initSections(false, getTargetParser().getSTI());
1342 
1343   // Prime the lexer.
1344   Lex();
1345 
1346   HadError = false;
1347   AsmCond StartingCondState = TheCondState;
1348   SmallVector<AsmRewrite, 4> AsmStrRewrites;
1349 
1350   // If we are generating dwarf for assembly source files save the initial text
1351   // section.  (Don't use enabledGenDwarfForAssembly() here, as we aren't
1352   // emitting any actual debug info yet and haven't had a chance to parse any
1353   // embedded .file directives.)
1354   if (getContext().getGenDwarfForAssembly()) {
1355     MCSection *Sec = getStreamer().getCurrentSectionOnly();
1356     if (!Sec->getBeginSymbol()) {
1357       MCSymbol *SectionStartSym = getContext().createTempSymbol();
1358       getStreamer().emitLabel(SectionStartSym);
1359       Sec->setBeginSymbol(SectionStartSym);
1360     }
1361     bool InsertResult = getContext().addGenDwarfSection(Sec);
1362     assert(InsertResult && ".text section should not have debug info yet");
1363     (void)InsertResult;
1364   }
1365 
1366   getTargetParser().onBeginOfFile();
1367 
1368   // While we have input, parse each statement.
1369   while (Lexer.isNot(AsmToken::Eof) ||
1370          SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1371     // Skip through the EOF at the end of an inclusion.
1372     if (Lexer.is(AsmToken::Eof))
1373       Lex();
1374 
1375     ParseStatementInfo Info(&AsmStrRewrites);
1376     bool Parsed = parseStatement(Info, nullptr);
1377 
1378     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1379     // for printing ErrMsg via Lex() only if no (presumably better) parser error
1380     // exists.
1381     if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1382       Lex();
1383     }
1384 
1385     // parseStatement returned true so may need to emit an error.
1386     printPendingErrors();
1387 
1388     // Skipping to the next line if needed.
1389     if (Parsed && !getLexer().isAtStartOfStatement())
1390       eatToEndOfStatement();
1391   }
1392 
1393   getTargetParser().onEndOfFile();
1394   printPendingErrors();
1395 
1396   // All errors should have been emitted.
1397   assert(!hasPendingError() && "unexpected error from parseStatement");
1398 
1399   getTargetParser().flushPendingInstructions(getStreamer());
1400 
1401   if (TheCondState.TheCond != StartingCondState.TheCond ||
1402       TheCondState.Ignore != StartingCondState.Ignore)
1403     printError(getTok().getLoc(), "unmatched .ifs or .elses");
1404   // Check to see there are no empty DwarfFile slots.
1405   const auto &LineTables = getContext().getMCDwarfLineTables();
1406   if (!LineTables.empty()) {
1407     unsigned Index = 0;
1408     for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1409       if (File.Name.empty() && Index != 0)
1410         printError(getTok().getLoc(), "unassigned file number: " +
1411                                           Twine(Index) +
1412                                           " for .file directives");
1413       ++Index;
1414     }
1415   }
1416 
1417   // Check to see that all assembler local symbols were actually defined.
1418   // Targets that don't do subsections via symbols may not want this, though,
1419   // so conservatively exclude them. Only do this if we're finalizing, though,
1420   // as otherwise we won't necessarilly have seen everything yet.
1421   if (!NoFinalize) {
1422     if (MAI.hasSubsectionsViaSymbols()) {
1423       for (const auto &TableEntry : getContext().getSymbols()) {
1424         MCSymbol *Sym = TableEntry.getValue().Symbol;
1425         // Variable symbols may not be marked as defined, so check those
1426         // explicitly. If we know it's a variable, we have a definition for
1427         // the purposes of this check.
1428         if (Sym && Sym->isTemporary() && !Sym->isVariable() &&
1429             !Sym->isDefined())
1430           // FIXME: We would really like to refer back to where the symbol was
1431           // first referenced for a source location. We need to add something
1432           // to track that. Currently, we just point to the end of the file.
1433           printError(getTok().getLoc(), "assembler local symbol '" +
1434                                             Sym->getName() + "' not defined");
1435       }
1436     }
1437 
1438     // Temporary symbols like the ones for directional jumps don't go in the
1439     // symbol table. They also need to be diagnosed in all (final) cases.
1440     for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1441       if (std::get<2>(LocSym)->isUndefined()) {
1442         // Reset the state of any "# line file" directives we've seen to the
1443         // context as it was at the diagnostic site.
1444         CppHashInfo = std::get<1>(LocSym);
1445         printError(std::get<0>(LocSym), "directional label undefined");
1446       }
1447     }
1448   }
1449 
1450   // Finalize the output stream if there are no errors and if the client wants
1451   // us to.
1452   if (!HadError && !NoFinalize)
1453     Out.finish(Lexer.getLoc());
1454 
1455   return HadError || getContext().hadError();
1456 }
1457 
checkForValidSection()1458 bool MasmParser::checkForValidSection() {
1459   if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1460     Out.initSections(false, getTargetParser().getSTI());
1461     return Error(getTok().getLoc(),
1462                  "expected section directive before assembly directive");
1463   }
1464   return false;
1465 }
1466 
1467 /// Throw away the rest of the line for testing purposes.
eatToEndOfStatement()1468 void MasmParser::eatToEndOfStatement() {
1469   while (Lexer.isNot(AsmToken::EndOfStatement)) {
1470     if (Lexer.is(AsmToken::Eof)) {
1471       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1472       if (ParentIncludeLoc == SMLoc()) {
1473         break;
1474       }
1475 
1476       EndStatementAtEOFStack.pop_back();
1477       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1478     }
1479 
1480     Lexer.Lex();
1481   }
1482 
1483   // Eat EOL.
1484   if (Lexer.is(AsmToken::EndOfStatement))
1485     Lexer.Lex();
1486 }
1487 
1488 SmallVector<StringRef, 1>
parseStringRefsTo(AsmToken::TokenKind EndTok)1489 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1490   SmallVector<StringRef, 1> Refs;
1491   const char *Start = getTok().getLoc().getPointer();
1492   while (Lexer.isNot(EndTok)) {
1493     if (Lexer.is(AsmToken::Eof)) {
1494       SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1495       if (ParentIncludeLoc == SMLoc()) {
1496         break;
1497       }
1498       Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1499 
1500       EndStatementAtEOFStack.pop_back();
1501       jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1502       Lexer.Lex();
1503       Start = getTok().getLoc().getPointer();
1504     } else {
1505       Lexer.Lex();
1506     }
1507   }
1508   Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1509   return Refs;
1510 }
1511 
parseStringTo(AsmToken::TokenKind EndTok)1512 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1513   SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1514   std::string Str;
1515   for (StringRef S : Refs) {
1516     Str.append(S.str());
1517   }
1518   return Str;
1519 }
1520 
parseStringToEndOfStatement()1521 StringRef MasmParser::parseStringToEndOfStatement() {
1522   const char *Start = getTok().getLoc().getPointer();
1523 
1524   while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1525     Lexer.Lex();
1526 
1527   const char *End = getTok().getLoc().getPointer();
1528   return StringRef(Start, End - Start);
1529 }
1530 
1531 /// Parse a paren expression and return it.
1532 /// NOTE: This assumes the leading '(' has already been consumed.
1533 ///
1534 /// parenexpr ::= expr)
1535 ///
parseParenExpr(const MCExpr * & Res,SMLoc & EndLoc)1536 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1537   if (parseExpression(Res))
1538     return true;
1539   EndLoc = Lexer.getTok().getEndLoc();
1540   return parseRParen();
1541 }
1542 
1543 /// Parse a bracket expression and return it.
1544 /// NOTE: This assumes the leading '[' has already been consumed.
1545 ///
1546 /// bracketexpr ::= expr]
1547 ///
parseBracketExpr(const MCExpr * & Res,SMLoc & EndLoc)1548 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1549   if (parseExpression(Res))
1550     return true;
1551   EndLoc = getTok().getEndLoc();
1552   if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1553     return true;
1554   return false;
1555 }
1556 
1557 /// Parse a primary expression and return it.
1558 ///  primaryexpr ::= (parenexpr
1559 ///  primaryexpr ::= symbol
1560 ///  primaryexpr ::= number
1561 ///  primaryexpr ::= '.'
1562 ///  primaryexpr ::= ~,+,-,'not' primaryexpr
1563 ///  primaryexpr ::= string
1564 ///          (a string is interpreted as a 64-bit number in big-endian base-256)
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc,AsmTypeInfo * TypeInfo)1565 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1566                                   AsmTypeInfo *TypeInfo) {
1567   SMLoc FirstTokenLoc = getLexer().getLoc();
1568   AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1569   switch (FirstTokenKind) {
1570   default:
1571     return TokError("unknown token in expression");
1572   // If we have an error assume that we've already handled it.
1573   case AsmToken::Error:
1574     return true;
1575   case AsmToken::Exclaim:
1576     Lex(); // Eat the operator.
1577     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1578       return true;
1579     Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1580     return false;
1581   case AsmToken::Dollar:
1582   case AsmToken::At:
1583   case AsmToken::Identifier: {
1584     StringRef Identifier;
1585     if (parseIdentifier(Identifier)) {
1586       // We may have failed but $ may be a valid token.
1587       if (getTok().is(AsmToken::Dollar)) {
1588         if (Lexer.getMAI().getDollarIsPC()) {
1589           Lex();
1590           // This is a '$' reference, which references the current PC.  Emit a
1591           // temporary label to the streamer and refer to it.
1592           MCSymbol *Sym = Ctx.createTempSymbol();
1593           Out.emitLabel(Sym);
1594           Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1595                                         getContext());
1596           EndLoc = FirstTokenLoc;
1597           return false;
1598         }
1599         return Error(FirstTokenLoc, "invalid token in expression");
1600       }
1601     }
1602     // Parse named bitwise negation.
1603     if (Identifier.equals_insensitive("not")) {
1604       if (parsePrimaryExpr(Res, EndLoc, nullptr))
1605         return true;
1606       Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1607       return false;
1608     }
1609     // Parse directional local label references.
1610     if (Identifier.equals_insensitive("@b") ||
1611         Identifier.equals_insensitive("@f")) {
1612       bool Before = Identifier.equals_insensitive("@b");
1613       MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1614       if (Before && Sym->isUndefined())
1615         return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1616       Res = MCSymbolRefExpr::create(Sym, getContext());
1617       return false;
1618     }
1619     // Parse symbol variant.
1620     std::pair<StringRef, StringRef> Split;
1621     if (!MAI.useParensForSymbolVariant()) {
1622       Split = Identifier.split('@');
1623     } else if (Lexer.is(AsmToken::LParen)) {
1624       Lex(); // eat '('.
1625       StringRef VName;
1626       parseIdentifier(VName);
1627       // eat ')'.
1628       if (parseToken(AsmToken::RParen,
1629                      "unexpected token in variant, expected ')'"))
1630         return true;
1631       Split = std::make_pair(Identifier, VName);
1632     }
1633 
1634     EndLoc = SMLoc::getFromPointer(Identifier.end());
1635 
1636     // This is a symbol reference.
1637     StringRef SymbolName = Identifier;
1638     if (SymbolName.empty())
1639       return Error(getLexer().getLoc(), "expected a symbol reference");
1640 
1641     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1642 
1643     // Look up the symbol variant if used.
1644     if (!Split.second.empty()) {
1645       Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1646       if (Variant != MCSymbolRefExpr::VK_Invalid) {
1647         SymbolName = Split.first;
1648       } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1649         Variant = MCSymbolRefExpr::VK_None;
1650       } else {
1651         return Error(SMLoc::getFromPointer(Split.second.begin()),
1652                      "invalid variant '" + Split.second + "'");
1653       }
1654     }
1655 
1656     // Find the field offset if used.
1657     AsmFieldInfo Info;
1658     Split = SymbolName.split('.');
1659     if (Split.second.empty()) {
1660     } else {
1661       SymbolName = Split.first;
1662       if (lookUpField(SymbolName, Split.second, Info)) {
1663         std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1664         StringRef Base = BaseMember.first, Member = BaseMember.second;
1665         lookUpField(Base, Member, Info);
1666       } else if (Structs.count(SymbolName.lower())) {
1667         // This is actually a reference to a field offset.
1668         Res = MCConstantExpr::create(Info.Offset, getContext());
1669         return false;
1670       }
1671     }
1672 
1673     MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1674     if (!Sym) {
1675       // If this is a built-in numeric value, treat it as a constant.
1676       auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1677       const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1678                                        ? BI_NO_SYMBOL
1679                                        : BuiltinIt->getValue();
1680       if (Symbol != BI_NO_SYMBOL) {
1681         const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1682         if (Value) {
1683           Res = Value;
1684           return false;
1685         }
1686       }
1687 
1688       // Variables use case-insensitive symbol names; if this is a variable, we
1689       // find the symbol using its canonical name.
1690       auto VarIt = Variables.find(SymbolName.lower());
1691       if (VarIt != Variables.end())
1692         SymbolName = VarIt->second.Name;
1693       Sym = getContext().getOrCreateSymbol(SymbolName);
1694     }
1695 
1696     // If this is an absolute variable reference, substitute it now to preserve
1697     // semantics in the face of reassignment.
1698     if (Sym->isVariable()) {
1699       auto V = Sym->getVariableValue(/*SetUsed=*/false);
1700       bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1701       if (auto TV = dyn_cast<MCTargetExpr>(V))
1702         DoInline = TV->inlineAssignedExpr();
1703       if (DoInline) {
1704         if (Variant)
1705           return Error(EndLoc, "unexpected modifier on variable reference");
1706         Res = Sym->getVariableValue(/*SetUsed=*/false);
1707         return false;
1708       }
1709     }
1710 
1711     // Otherwise create a symbol ref.
1712     const MCExpr *SymRef =
1713         MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1714     if (Info.Offset) {
1715       Res = MCBinaryExpr::create(
1716           MCBinaryExpr::Add, SymRef,
1717           MCConstantExpr::create(Info.Offset, getContext()), getContext());
1718     } else {
1719       Res = SymRef;
1720     }
1721     if (TypeInfo) {
1722       if (Info.Type.Name.empty()) {
1723         auto TypeIt = KnownType.find(Identifier.lower());
1724         if (TypeIt != KnownType.end()) {
1725           Info.Type = TypeIt->second;
1726         }
1727       }
1728 
1729       *TypeInfo = Info.Type;
1730     }
1731     return false;
1732   }
1733   case AsmToken::BigNum:
1734     return TokError("literal value out of range for directive");
1735   case AsmToken::Integer: {
1736     int64_t IntVal = getTok().getIntVal();
1737     Res = MCConstantExpr::create(IntVal, getContext());
1738     EndLoc = Lexer.getTok().getEndLoc();
1739     Lex(); // Eat token.
1740     return false;
1741   }
1742   case AsmToken::String: {
1743     // MASM strings (used as constants) are interpreted as big-endian base-256.
1744     SMLoc ValueLoc = getTok().getLoc();
1745     std::string Value;
1746     if (parseEscapedString(Value))
1747       return true;
1748     if (Value.size() > 8)
1749       return Error(ValueLoc, "literal value out of range");
1750     uint64_t IntValue = 0;
1751     for (const unsigned char CharVal : Value)
1752       IntValue = (IntValue << 8) | CharVal;
1753     Res = MCConstantExpr::create(IntValue, getContext());
1754     return false;
1755   }
1756   case AsmToken::Real: {
1757     APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1758     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1759     Res = MCConstantExpr::create(IntVal, getContext());
1760     EndLoc = Lexer.getTok().getEndLoc();
1761     Lex(); // Eat token.
1762     return false;
1763   }
1764   case AsmToken::Dot: {
1765     // This is a '.' reference, which references the current PC.  Emit a
1766     // temporary label to the streamer and refer to it.
1767     MCSymbol *Sym = Ctx.createTempSymbol();
1768     Out.emitLabel(Sym);
1769     Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1770     EndLoc = Lexer.getTok().getEndLoc();
1771     Lex(); // Eat identifier.
1772     return false;
1773   }
1774   case AsmToken::LParen:
1775     Lex(); // Eat the '('.
1776     return parseParenExpr(Res, EndLoc);
1777   case AsmToken::LBrac:
1778     if (!PlatformParser->HasBracketExpressions())
1779       return TokError("brackets expression not supported on this target");
1780     Lex(); // Eat the '['.
1781     return parseBracketExpr(Res, EndLoc);
1782   case AsmToken::Minus:
1783     Lex(); // Eat the operator.
1784     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1785       return true;
1786     Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1787     return false;
1788   case AsmToken::Plus:
1789     Lex(); // Eat the operator.
1790     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1791       return true;
1792     Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1793     return false;
1794   case AsmToken::Tilde:
1795     Lex(); // Eat the operator.
1796     if (parsePrimaryExpr(Res, EndLoc, nullptr))
1797       return true;
1798     Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1799     return false;
1800   // MIPS unary expression operators. The lexer won't generate these tokens if
1801   // MCAsmInfo::HasMipsExpressions is false for the target.
1802   case AsmToken::PercentCall16:
1803   case AsmToken::PercentCall_Hi:
1804   case AsmToken::PercentCall_Lo:
1805   case AsmToken::PercentDtprel_Hi:
1806   case AsmToken::PercentDtprel_Lo:
1807   case AsmToken::PercentGot:
1808   case AsmToken::PercentGot_Disp:
1809   case AsmToken::PercentGot_Hi:
1810   case AsmToken::PercentGot_Lo:
1811   case AsmToken::PercentGot_Ofst:
1812   case AsmToken::PercentGot_Page:
1813   case AsmToken::PercentGottprel:
1814   case AsmToken::PercentGp_Rel:
1815   case AsmToken::PercentHi:
1816   case AsmToken::PercentHigher:
1817   case AsmToken::PercentHighest:
1818   case AsmToken::PercentLo:
1819   case AsmToken::PercentNeg:
1820   case AsmToken::PercentPcrel_Hi:
1821   case AsmToken::PercentPcrel_Lo:
1822   case AsmToken::PercentTlsgd:
1823   case AsmToken::PercentTlsldm:
1824   case AsmToken::PercentTprel_Hi:
1825   case AsmToken::PercentTprel_Lo:
1826     Lex(); // Eat the operator.
1827     if (Lexer.isNot(AsmToken::LParen))
1828       return TokError("expected '(' after operator");
1829     Lex(); // Eat the operator.
1830     if (parseExpression(Res, EndLoc))
1831       return true;
1832     if (parseRParen())
1833       return true;
1834     Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1835     return !Res;
1836   }
1837 }
1838 
parseExpression(const MCExpr * & Res)1839 bool MasmParser::parseExpression(const MCExpr *&Res) {
1840   SMLoc EndLoc;
1841   return parseExpression(Res, EndLoc);
1842 }
1843 
1844 /// This function checks if the next token is <string> type or arithmetic.
1845 /// string that begin with character '<' must end with character '>'.
1846 /// otherwise it is arithmetics.
1847 /// If the function returns a 'true' value,
1848 /// the End argument will be filled with the last location pointed to the '>'
1849 /// character.
isAngleBracketString(SMLoc & StrLoc,SMLoc & EndLoc)1850 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1851   assert((StrLoc.getPointer() != nullptr) &&
1852          "Argument to the function cannot be a NULL value");
1853   const char *CharPtr = StrLoc.getPointer();
1854   while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1855          (*CharPtr != '\0')) {
1856     if (*CharPtr == '!')
1857       CharPtr++;
1858     CharPtr++;
1859   }
1860   if (*CharPtr == '>') {
1861     EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1862     return true;
1863   }
1864   return false;
1865 }
1866 
1867 /// creating a string without the escape characters '!'.
angleBracketString(StringRef BracketContents)1868 static std::string angleBracketString(StringRef BracketContents) {
1869   std::string Res;
1870   for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1871     if (BracketContents[Pos] == '!')
1872       Pos++;
1873     Res += BracketContents[Pos];
1874   }
1875   return Res;
1876 }
1877 
1878 /// Parse an expression and return it.
1879 ///
1880 ///  expr ::= expr &&,|| expr               -> lowest.
1881 ///  expr ::= expr |,^,&,! expr
1882 ///  expr ::= expr ==,!=,<>,<,<=,>,>= expr
1883 ///  expr ::= expr <<,>> expr
1884 ///  expr ::= expr +,- expr
1885 ///  expr ::= expr *,/,% expr               -> highest.
1886 ///  expr ::= primaryexpr
1887 ///
parseExpression(const MCExpr * & Res,SMLoc & EndLoc)1888 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1889   // Parse the expression.
1890   Res = nullptr;
1891   if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1892       parseBinOpRHS(1, Res, EndLoc))
1893     return true;
1894 
1895   // Try to constant fold it up front, if possible. Do not exploit
1896   // assembler here.
1897   int64_t Value;
1898   if (Res->evaluateAsAbsolute(Value))
1899     Res = MCConstantExpr::create(Value, getContext());
1900 
1901   return false;
1902 }
1903 
parseParenExpression(const MCExpr * & Res,SMLoc & EndLoc)1904 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1905   Res = nullptr;
1906   return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1907 }
1908 
parseParenExprOfDepth(unsigned ParenDepth,const MCExpr * & Res,SMLoc & EndLoc)1909 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1910                                        SMLoc &EndLoc) {
1911   if (parseParenExpr(Res, EndLoc))
1912     return true;
1913 
1914   for (; ParenDepth > 0; --ParenDepth) {
1915     if (parseBinOpRHS(1, Res, EndLoc))
1916       return true;
1917 
1918     // We don't Lex() the last RParen.
1919     // This is the same behavior as parseParenExpression().
1920     if (ParenDepth - 1 > 0) {
1921       EndLoc = getTok().getEndLoc();
1922       if (parseRParen())
1923         return true;
1924     }
1925   }
1926   return false;
1927 }
1928 
parseAbsoluteExpression(int64_t & Res)1929 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1930   const MCExpr *Expr;
1931 
1932   SMLoc StartLoc = Lexer.getLoc();
1933   if (parseExpression(Expr))
1934     return true;
1935 
1936   if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1937     return Error(StartLoc, "expected absolute expression");
1938 
1939   return false;
1940 }
1941 
getGNUBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind,bool ShouldUseLogicalShr,bool EndExpressionAtGreater)1942 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1943                                       MCBinaryExpr::Opcode &Kind,
1944                                       bool ShouldUseLogicalShr,
1945                                       bool EndExpressionAtGreater) {
1946   switch (K) {
1947   default:
1948     return 0; // not a binop.
1949 
1950   // Lowest Precedence: &&, ||
1951   case AsmToken::AmpAmp:
1952     Kind = MCBinaryExpr::LAnd;
1953     return 2;
1954   case AsmToken::PipePipe:
1955     Kind = MCBinaryExpr::LOr;
1956     return 1;
1957 
1958   // Low Precedence: ==, !=, <>, <, <=, >, >=
1959   case AsmToken::EqualEqual:
1960     Kind = MCBinaryExpr::EQ;
1961     return 3;
1962   case AsmToken::ExclaimEqual:
1963   case AsmToken::LessGreater:
1964     Kind = MCBinaryExpr::NE;
1965     return 3;
1966   case AsmToken::Less:
1967     Kind = MCBinaryExpr::LT;
1968     return 3;
1969   case AsmToken::LessEqual:
1970     Kind = MCBinaryExpr::LTE;
1971     return 3;
1972   case AsmToken::Greater:
1973     if (EndExpressionAtGreater)
1974       return 0;
1975     Kind = MCBinaryExpr::GT;
1976     return 3;
1977   case AsmToken::GreaterEqual:
1978     Kind = MCBinaryExpr::GTE;
1979     return 3;
1980 
1981   // Low Intermediate Precedence: +, -
1982   case AsmToken::Plus:
1983     Kind = MCBinaryExpr::Add;
1984     return 4;
1985   case AsmToken::Minus:
1986     Kind = MCBinaryExpr::Sub;
1987     return 4;
1988 
1989   // High Intermediate Precedence: |, &, ^
1990   case AsmToken::Pipe:
1991     Kind = MCBinaryExpr::Or;
1992     return 5;
1993   case AsmToken::Caret:
1994     Kind = MCBinaryExpr::Xor;
1995     return 5;
1996   case AsmToken::Amp:
1997     Kind = MCBinaryExpr::And;
1998     return 5;
1999 
2000   // Highest Precedence: *, /, %, <<, >>
2001   case AsmToken::Star:
2002     Kind = MCBinaryExpr::Mul;
2003     return 6;
2004   case AsmToken::Slash:
2005     Kind = MCBinaryExpr::Div;
2006     return 6;
2007   case AsmToken::Percent:
2008     Kind = MCBinaryExpr::Mod;
2009     return 6;
2010   case AsmToken::LessLess:
2011     Kind = MCBinaryExpr::Shl;
2012     return 6;
2013   case AsmToken::GreaterGreater:
2014     if (EndExpressionAtGreater)
2015       return 0;
2016     Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2017     return 6;
2018   }
2019 }
2020 
getBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind)2021 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2022                                         MCBinaryExpr::Opcode &Kind) {
2023   bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2024   return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2025                                AngleBracketDepth > 0);
2026 }
2027 
2028 /// Parse all binary operators with precedence >= 'Precedence'.
2029 /// Res contains the LHS of the expression on input.
parseBinOpRHS(unsigned Precedence,const MCExpr * & Res,SMLoc & EndLoc)2030 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2031                                SMLoc &EndLoc) {
2032   SMLoc StartLoc = Lexer.getLoc();
2033   while (true) {
2034     AsmToken::TokenKind TokKind = Lexer.getKind();
2035     if (Lexer.getKind() == AsmToken::Identifier) {
2036       TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
2037                     .CaseLower("and", AsmToken::Amp)
2038                     .CaseLower("not", AsmToken::Exclaim)
2039                     .CaseLower("or", AsmToken::Pipe)
2040                     .CaseLower("xor", AsmToken::Caret)
2041                     .CaseLower("shl", AsmToken::LessLess)
2042                     .CaseLower("shr", AsmToken::GreaterGreater)
2043                     .CaseLower("eq", AsmToken::EqualEqual)
2044                     .CaseLower("ne", AsmToken::ExclaimEqual)
2045                     .CaseLower("lt", AsmToken::Less)
2046                     .CaseLower("le", AsmToken::LessEqual)
2047                     .CaseLower("gt", AsmToken::Greater)
2048                     .CaseLower("ge", AsmToken::GreaterEqual)
2049                     .Default(TokKind);
2050     }
2051     MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
2052     unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2053 
2054     // If the next token is lower precedence than we are allowed to eat, return
2055     // successfully with what we ate already.
2056     if (TokPrec < Precedence)
2057       return false;
2058 
2059     Lex();
2060 
2061     // Eat the next primary expression.
2062     const MCExpr *RHS;
2063     if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2064       return true;
2065 
2066     // If BinOp binds less tightly with RHS than the operator after RHS, let
2067     // the pending operator take RHS as its LHS.
2068     MCBinaryExpr::Opcode Dummy;
2069     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2070     if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2071       return true;
2072 
2073     // Merge LHS and RHS according to operator.
2074     Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2075   }
2076 }
2077 
2078 /// ParseStatement:
2079 ///   ::= % statement
2080 ///   ::= EndOfStatement
2081 ///   ::= Label* Directive ...Operands... EndOfStatement
2082 ///   ::= Label* Identifier OperandList* EndOfStatement
parseStatement(ParseStatementInfo & Info,MCAsmParserSemaCallback * SI)2083 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2084                                 MCAsmParserSemaCallback *SI) {
2085   assert(!hasPendingError() && "parseStatement started with pending error");
2086   // Eat initial spaces and comments.
2087   while (Lexer.is(AsmToken::Space))
2088     Lex();
2089   if (Lexer.is(AsmToken::EndOfStatement)) {
2090     // If this is a line comment we can drop it safely.
2091     if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2092         getTok().getString().front() == '\n')
2093       Out.addBlankLine();
2094     Lex();
2095     return false;
2096   }
2097 
2098   // If preceded by an expansion operator, first expand all text macros and
2099   // macro functions.
2100   if (getTok().is(AsmToken::Percent)) {
2101     SMLoc ExpansionLoc = getTok().getLoc();
2102     if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2103       return true;
2104   }
2105 
2106   // Statements always start with an identifier, unless we're dealing with a
2107   // processor directive (.386, .686, etc.) that lexes as a real.
2108   AsmToken ID = getTok();
2109   SMLoc IDLoc = ID.getLoc();
2110   StringRef IDVal;
2111   if (Lexer.is(AsmToken::HashDirective))
2112     return parseCppHashLineFilenameComment(IDLoc);
2113   if (Lexer.is(AsmToken::Dot)) {
2114     // Treat '.' as a valid identifier in this context.
2115     Lex();
2116     IDVal = ".";
2117   } else if (Lexer.is(AsmToken::Real)) {
2118     // Treat ".<number>" as a valid identifier in this context.
2119     IDVal = getTok().getString();
2120     Lex(); // always eat a token
2121     if (!IDVal.starts_with("."))
2122       return Error(IDLoc, "unexpected token at start of statement");
2123   } else if (parseIdentifier(IDVal, StartOfStatement)) {
2124     if (!TheCondState.Ignore) {
2125       Lex(); // always eat a token
2126       return Error(IDLoc, "unexpected token at start of statement");
2127     }
2128     IDVal = "";
2129   }
2130 
2131   // Handle conditional assembly here before checking for skipping.  We
2132   // have to do this so that .endif isn't skipped in a ".if 0" block for
2133   // example.
2134   StringMap<DirectiveKind>::const_iterator DirKindIt =
2135       DirectiveKindMap.find(IDVal.lower());
2136   DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2137                               ? DK_NO_DIRECTIVE
2138                               : DirKindIt->getValue();
2139   switch (DirKind) {
2140   default:
2141     break;
2142   case DK_IF:
2143   case DK_IFE:
2144     return parseDirectiveIf(IDLoc, DirKind);
2145   case DK_IFB:
2146     return parseDirectiveIfb(IDLoc, true);
2147   case DK_IFNB:
2148     return parseDirectiveIfb(IDLoc, false);
2149   case DK_IFDEF:
2150     return parseDirectiveIfdef(IDLoc, true);
2151   case DK_IFNDEF:
2152     return parseDirectiveIfdef(IDLoc, false);
2153   case DK_IFDIF:
2154     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2155                                /*CaseInsensitive=*/false);
2156   case DK_IFDIFI:
2157     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2158                                /*CaseInsensitive=*/true);
2159   case DK_IFIDN:
2160     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2161                                /*CaseInsensitive=*/false);
2162   case DK_IFIDNI:
2163     return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2164                                /*CaseInsensitive=*/true);
2165   case DK_ELSEIF:
2166   case DK_ELSEIFE:
2167     return parseDirectiveElseIf(IDLoc, DirKind);
2168   case DK_ELSEIFB:
2169     return parseDirectiveElseIfb(IDLoc, true);
2170   case DK_ELSEIFNB:
2171     return parseDirectiveElseIfb(IDLoc, false);
2172   case DK_ELSEIFDEF:
2173     return parseDirectiveElseIfdef(IDLoc, true);
2174   case DK_ELSEIFNDEF:
2175     return parseDirectiveElseIfdef(IDLoc, false);
2176   case DK_ELSEIFDIF:
2177     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2178                                    /*CaseInsensitive=*/false);
2179   case DK_ELSEIFDIFI:
2180     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2181                                    /*CaseInsensitive=*/true);
2182   case DK_ELSEIFIDN:
2183     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2184                                    /*CaseInsensitive=*/false);
2185   case DK_ELSEIFIDNI:
2186     return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2187                                    /*CaseInsensitive=*/true);
2188   case DK_ELSE:
2189     return parseDirectiveElse(IDLoc);
2190   case DK_ENDIF:
2191     return parseDirectiveEndIf(IDLoc);
2192   }
2193 
2194   // Ignore the statement if in the middle of inactive conditional
2195   // (e.g. ".if 0").
2196   if (TheCondState.Ignore) {
2197     eatToEndOfStatement();
2198     return false;
2199   }
2200 
2201   // FIXME: Recurse on local labels?
2202 
2203   // Check for a label.
2204   //   ::= identifier ':'
2205   //   ::= number ':'
2206   if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
2207     if (checkForValidSection())
2208       return true;
2209 
2210     // identifier ':'   -> Label.
2211     Lex();
2212 
2213     // Diagnose attempt to use '.' as a label.
2214     if (IDVal == ".")
2215       return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2216 
2217     // Diagnose attempt to use a variable as a label.
2218     //
2219     // FIXME: Diagnostics. Note the location of the definition as a label.
2220     // FIXME: This doesn't diagnose assignment to a symbol which has been
2221     // implicitly marked as external.
2222     MCSymbol *Sym;
2223     if (ParsingMSInlineAsm && SI) {
2224       StringRef RewrittenLabel =
2225           SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2226       assert(!RewrittenLabel.empty() &&
2227              "We should have an internal name here.");
2228       Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2229                                      RewrittenLabel);
2230       IDVal = RewrittenLabel;
2231     }
2232     // Handle directional local labels
2233     if (IDVal == "@@") {
2234       Sym = Ctx.createDirectionalLocalSymbol(0);
2235     } else {
2236       Sym = getContext().getOrCreateSymbol(IDVal);
2237     }
2238 
2239     // End of Labels should be treated as end of line for lexing
2240     // purposes but that information is not available to the Lexer who
2241     // does not understand Labels. This may cause us to see a Hash
2242     // here instead of a preprocessor line comment.
2243     if (getTok().is(AsmToken::Hash)) {
2244       std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2245       Lexer.Lex();
2246       Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2247     }
2248 
2249     // Consume any end of statement token, if present, to avoid spurious
2250     // addBlankLine calls().
2251     if (getTok().is(AsmToken::EndOfStatement)) {
2252       Lex();
2253     }
2254 
2255     getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2256 
2257     // Emit the label.
2258     if (!getTargetParser().isParsingMSInlineAsm())
2259       Out.emitLabel(Sym, IDLoc);
2260 
2261     // If we are generating dwarf for assembly source files then gather the
2262     // info to make a dwarf label entry for this label if needed.
2263     if (enabledGenDwarfForAssembly())
2264       MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2265                                  IDLoc);
2266 
2267     getTargetParser().onLabelParsed(Sym);
2268 
2269     return false;
2270   }
2271 
2272   // If macros are enabled, check to see if this is a macro instantiation.
2273   if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2274     return handleMacroEntry(M, IDLoc);
2275   }
2276 
2277   // Otherwise, we have a normal instruction or directive.
2278 
2279   if (DirKind != DK_NO_DIRECTIVE) {
2280     // There are several entities interested in parsing directives:
2281     //
2282     // 1. Asm parser extensions. For example, platform-specific parsers
2283     //    (like the ELF parser) register themselves as extensions.
2284     // 2. The target-specific assembly parser. Some directives are target
2285     //    specific or may potentially behave differently on certain targets.
2286     // 3. The generic directive parser implemented by this class. These are
2287     //    all the directives that behave in a target and platform independent
2288     //    manner, or at least have a default behavior that's shared between
2289     //    all targets and platforms.
2290 
2291     getTargetParser().flushPendingInstructions(getStreamer());
2292 
2293     // Special-case handling of structure-end directives at higher priority,
2294     // since ENDS is overloaded as a segment-end directive.
2295     if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2296         getTok().is(AsmToken::EndOfStatement)) {
2297       return parseDirectiveNestedEnds();
2298     }
2299 
2300     // First, check the extension directive map to see if any extension has
2301     // registered itself to parse this directive.
2302     std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2303         ExtensionDirectiveMap.lookup(IDVal.lower());
2304     if (Handler.first)
2305       return (*Handler.second)(Handler.first, IDVal, IDLoc);
2306 
2307     // Next, let the target-specific assembly parser try.
2308     if (ID.isNot(AsmToken::Identifier))
2309       return false;
2310 
2311     ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2312     assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2313            "Should only return Failure iff there was an error");
2314     if (TPDirectiveReturn.isFailure())
2315       return true;
2316     if (TPDirectiveReturn.isSuccess())
2317       return false;
2318 
2319     // Finally, if no one else is interested in this directive, it must be
2320     // generic and familiar to this class.
2321     switch (DirKind) {
2322     default:
2323       break;
2324     case DK_ASCII:
2325       return parseDirectiveAscii(IDVal, false);
2326     case DK_ASCIZ:
2327     case DK_STRING:
2328       return parseDirectiveAscii(IDVal, true);
2329     case DK_BYTE:
2330     case DK_SBYTE:
2331     case DK_DB:
2332       return parseDirectiveValue(IDVal, 1);
2333     case DK_WORD:
2334     case DK_SWORD:
2335     case DK_DW:
2336       return parseDirectiveValue(IDVal, 2);
2337     case DK_DWORD:
2338     case DK_SDWORD:
2339     case DK_DD:
2340       return parseDirectiveValue(IDVal, 4);
2341     case DK_FWORD:
2342     case DK_DF:
2343       return parseDirectiveValue(IDVal, 6);
2344     case DK_QWORD:
2345     case DK_SQWORD:
2346     case DK_DQ:
2347       return parseDirectiveValue(IDVal, 8);
2348     case DK_REAL4:
2349       return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2350     case DK_REAL8:
2351       return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2352     case DK_REAL10:
2353       return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2354     case DK_STRUCT:
2355     case DK_UNION:
2356       return parseDirectiveNestedStruct(IDVal, DirKind);
2357     case DK_ENDS:
2358       return parseDirectiveNestedEnds();
2359     case DK_ALIGN:
2360       return parseDirectiveAlign();
2361     case DK_EVEN:
2362       return parseDirectiveEven();
2363     case DK_ORG:
2364       return parseDirectiveOrg();
2365     case DK_EXTERN:
2366       return parseDirectiveExtern();
2367     case DK_PUBLIC:
2368       return parseDirectiveSymbolAttribute(MCSA_Global);
2369     case DK_COMM:
2370       return parseDirectiveComm(/*IsLocal=*/false);
2371     case DK_COMMENT:
2372       return parseDirectiveComment(IDLoc);
2373     case DK_INCLUDE:
2374       return parseDirectiveInclude();
2375     case DK_REPEAT:
2376       return parseDirectiveRepeat(IDLoc, IDVal);
2377     case DK_WHILE:
2378       return parseDirectiveWhile(IDLoc);
2379     case DK_FOR:
2380       return parseDirectiveFor(IDLoc, IDVal);
2381     case DK_FORC:
2382       return parseDirectiveForc(IDLoc, IDVal);
2383     case DK_FILE:
2384       return parseDirectiveFile(IDLoc);
2385     case DK_LINE:
2386       return parseDirectiveLine();
2387     case DK_LOC:
2388       return parseDirectiveLoc();
2389     case DK_STABS:
2390       return parseDirectiveStabs();
2391     case DK_CV_FILE:
2392       return parseDirectiveCVFile();
2393     case DK_CV_FUNC_ID:
2394       return parseDirectiveCVFuncId();
2395     case DK_CV_INLINE_SITE_ID:
2396       return parseDirectiveCVInlineSiteId();
2397     case DK_CV_LOC:
2398       return parseDirectiveCVLoc();
2399     case DK_CV_LINETABLE:
2400       return parseDirectiveCVLinetable();
2401     case DK_CV_INLINE_LINETABLE:
2402       return parseDirectiveCVInlineLinetable();
2403     case DK_CV_DEF_RANGE:
2404       return parseDirectiveCVDefRange();
2405     case DK_CV_STRING:
2406       return parseDirectiveCVString();
2407     case DK_CV_STRINGTABLE:
2408       return parseDirectiveCVStringTable();
2409     case DK_CV_FILECHECKSUMS:
2410       return parseDirectiveCVFileChecksums();
2411     case DK_CV_FILECHECKSUM_OFFSET:
2412       return parseDirectiveCVFileChecksumOffset();
2413     case DK_CV_FPO_DATA:
2414       return parseDirectiveCVFPOData();
2415     case DK_CFI_SECTIONS:
2416       return parseDirectiveCFISections();
2417     case DK_CFI_STARTPROC:
2418       return parseDirectiveCFIStartProc();
2419     case DK_CFI_ENDPROC:
2420       return parseDirectiveCFIEndProc();
2421     case DK_CFI_DEF_CFA:
2422       return parseDirectiveCFIDefCfa(IDLoc);
2423     case DK_CFI_DEF_CFA_OFFSET:
2424       return parseDirectiveCFIDefCfaOffset(IDLoc);
2425     case DK_CFI_ADJUST_CFA_OFFSET:
2426       return parseDirectiveCFIAdjustCfaOffset(IDLoc);
2427     case DK_CFI_DEF_CFA_REGISTER:
2428       return parseDirectiveCFIDefCfaRegister(IDLoc);
2429     case DK_CFI_OFFSET:
2430       return parseDirectiveCFIOffset(IDLoc);
2431     case DK_CFI_REL_OFFSET:
2432       return parseDirectiveCFIRelOffset(IDLoc);
2433     case DK_CFI_PERSONALITY:
2434       return parseDirectiveCFIPersonalityOrLsda(true);
2435     case DK_CFI_LSDA:
2436       return parseDirectiveCFIPersonalityOrLsda(false);
2437     case DK_CFI_REMEMBER_STATE:
2438       return parseDirectiveCFIRememberState(IDLoc);
2439     case DK_CFI_RESTORE_STATE:
2440       return parseDirectiveCFIRestoreState(IDLoc);
2441     case DK_CFI_SAME_VALUE:
2442       return parseDirectiveCFISameValue(IDLoc);
2443     case DK_CFI_RESTORE:
2444       return parseDirectiveCFIRestore(IDLoc);
2445     case DK_CFI_ESCAPE:
2446       return parseDirectiveCFIEscape(IDLoc);
2447     case DK_CFI_RETURN_COLUMN:
2448       return parseDirectiveCFIReturnColumn(IDLoc);
2449     case DK_CFI_SIGNAL_FRAME:
2450       return parseDirectiveCFISignalFrame();
2451     case DK_CFI_UNDEFINED:
2452       return parseDirectiveCFIUndefined(IDLoc);
2453     case DK_CFI_REGISTER:
2454       return parseDirectiveCFIRegister(IDLoc);
2455     case DK_CFI_WINDOW_SAVE:
2456       return parseDirectiveCFIWindowSave(IDLoc);
2457     case DK_EXITM:
2458       Info.ExitValue = "";
2459       return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2460     case DK_ENDM:
2461       Info.ExitValue = "";
2462       return parseDirectiveEndMacro(IDVal);
2463     case DK_PURGE:
2464       return parseDirectivePurgeMacro(IDLoc);
2465     case DK_END:
2466       return parseDirectiveEnd(IDLoc);
2467     case DK_ERR:
2468       return parseDirectiveError(IDLoc);
2469     case DK_ERRB:
2470       return parseDirectiveErrorIfb(IDLoc, true);
2471     case DK_ERRNB:
2472       return parseDirectiveErrorIfb(IDLoc, false);
2473     case DK_ERRDEF:
2474       return parseDirectiveErrorIfdef(IDLoc, true);
2475     case DK_ERRNDEF:
2476       return parseDirectiveErrorIfdef(IDLoc, false);
2477     case DK_ERRDIF:
2478       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2479                                       /*CaseInsensitive=*/false);
2480     case DK_ERRDIFI:
2481       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2482                                       /*CaseInsensitive=*/true);
2483     case DK_ERRIDN:
2484       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2485                                       /*CaseInsensitive=*/false);
2486     case DK_ERRIDNI:
2487       return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2488                                       /*CaseInsensitive=*/true);
2489     case DK_ERRE:
2490       return parseDirectiveErrorIfe(IDLoc, true);
2491     case DK_ERRNZ:
2492       return parseDirectiveErrorIfe(IDLoc, false);
2493     case DK_RADIX:
2494       return parseDirectiveRadix(IDLoc);
2495     case DK_ECHO:
2496       return parseDirectiveEcho(IDLoc);
2497     }
2498 
2499     return Error(IDLoc, "unknown directive");
2500   }
2501 
2502   // We also check if this is allocating memory with user-defined type.
2503   auto IDIt = Structs.find(IDVal.lower());
2504   if (IDIt != Structs.end())
2505     return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2506                                      IDLoc);
2507 
2508   // Non-conditional Microsoft directives sometimes follow their first argument.
2509   const AsmToken nextTok = getTok();
2510   const StringRef nextVal = nextTok.getString();
2511   const SMLoc nextLoc = nextTok.getLoc();
2512 
2513   const AsmToken afterNextTok = peekTok();
2514 
2515   // There are several entities interested in parsing infix directives:
2516   //
2517   // 1. Asm parser extensions. For example, platform-specific parsers
2518   //    (like the ELF parser) register themselves as extensions.
2519   // 2. The generic directive parser implemented by this class. These are
2520   //    all the directives that behave in a target and platform independent
2521   //    manner, or at least have a default behavior that's shared between
2522   //    all targets and platforms.
2523 
2524   getTargetParser().flushPendingInstructions(getStreamer());
2525 
2526   // Special-case handling of structure-end directives at higher priority, since
2527   // ENDS is overloaded as a segment-end directive.
2528   if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2529     Lex();
2530     return parseDirectiveEnds(IDVal, IDLoc);
2531   }
2532 
2533   // First, check the extension directive map to see if any extension has
2534   // registered itself to parse this directive.
2535   std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2536       ExtensionDirectiveMap.lookup(nextVal.lower());
2537   if (Handler.first) {
2538     Lex();
2539     Lexer.UnLex(ID);
2540     return (*Handler.second)(Handler.first, nextVal, nextLoc);
2541   }
2542 
2543   // If no one else is interested in this directive, it must be
2544   // generic and familiar to this class.
2545   DirKindIt = DirectiveKindMap.find(nextVal.lower());
2546   DirKind = (DirKindIt == DirectiveKindMap.end())
2547                 ? DK_NO_DIRECTIVE
2548                 : DirKindIt->getValue();
2549   switch (DirKind) {
2550   default:
2551     break;
2552   case DK_ASSIGN:
2553   case DK_EQU:
2554   case DK_TEXTEQU:
2555     Lex();
2556     return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2557   case DK_BYTE:
2558     if (afterNextTok.is(AsmToken::Identifier) &&
2559         afterNextTok.getString().equals_insensitive("ptr")) {
2560       // Size directive; part of an instruction.
2561       break;
2562     }
2563     [[fallthrough]];
2564   case DK_SBYTE:
2565   case DK_DB:
2566     Lex();
2567     return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2568   case DK_WORD:
2569     if (afterNextTok.is(AsmToken::Identifier) &&
2570         afterNextTok.getString().equals_insensitive("ptr")) {
2571       // Size directive; part of an instruction.
2572       break;
2573     }
2574     [[fallthrough]];
2575   case DK_SWORD:
2576   case DK_DW:
2577     Lex();
2578     return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2579   case DK_DWORD:
2580     if (afterNextTok.is(AsmToken::Identifier) &&
2581         afterNextTok.getString().equals_insensitive("ptr")) {
2582       // Size directive; part of an instruction.
2583       break;
2584     }
2585     [[fallthrough]];
2586   case DK_SDWORD:
2587   case DK_DD:
2588     Lex();
2589     return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2590   case DK_FWORD:
2591     if (afterNextTok.is(AsmToken::Identifier) &&
2592         afterNextTok.getString().equals_insensitive("ptr")) {
2593       // Size directive; part of an instruction.
2594       break;
2595     }
2596     [[fallthrough]];
2597   case DK_DF:
2598     Lex();
2599     return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2600   case DK_QWORD:
2601     if (afterNextTok.is(AsmToken::Identifier) &&
2602         afterNextTok.getString().equals_insensitive("ptr")) {
2603       // Size directive; part of an instruction.
2604       break;
2605     }
2606     [[fallthrough]];
2607   case DK_SQWORD:
2608   case DK_DQ:
2609     Lex();
2610     return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2611   case DK_REAL4:
2612     Lex();
2613     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2614                                         IDVal, IDLoc);
2615   case DK_REAL8:
2616     Lex();
2617     return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2618                                         IDVal, IDLoc);
2619   case DK_REAL10:
2620     Lex();
2621     return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2622                                         10, IDVal, IDLoc);
2623   case DK_STRUCT:
2624   case DK_UNION:
2625     Lex();
2626     return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2627   case DK_ENDS:
2628     Lex();
2629     return parseDirectiveEnds(IDVal, IDLoc);
2630   case DK_MACRO:
2631     Lex();
2632     return parseDirectiveMacro(IDVal, IDLoc);
2633   }
2634 
2635   // Finally, we check if this is allocating a variable with user-defined type.
2636   auto NextIt = Structs.find(nextVal.lower());
2637   if (NextIt != Structs.end()) {
2638     Lex();
2639     return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2640                                           nextVal, nextLoc, IDVal);
2641   }
2642 
2643   // __asm _emit or __asm __emit
2644   if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2645                              IDVal == "_EMIT" || IDVal == "__EMIT"))
2646     return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2647 
2648   // __asm align
2649   if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2650     return parseDirectiveMSAlign(IDLoc, Info);
2651 
2652   if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2653     Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2654   if (checkForValidSection())
2655     return true;
2656 
2657   // Canonicalize the opcode to lower case.
2658   std::string OpcodeStr = IDVal.lower();
2659   ParseInstructionInfo IInfo(Info.AsmRewrites);
2660   bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2661                                                           Info.ParsedOperands);
2662   Info.ParseError = ParseHadError;
2663 
2664   // Dump the parsed representation, if requested.
2665   if (getShowParsedOperands()) {
2666     SmallString<256> Str;
2667     raw_svector_ostream OS(Str);
2668     OS << "parsed instruction: [";
2669     for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2670       if (i != 0)
2671         OS << ", ";
2672       Info.ParsedOperands[i]->print(OS);
2673     }
2674     OS << "]";
2675 
2676     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2677   }
2678 
2679   // Fail even if ParseInstruction erroneously returns false.
2680   if (hasPendingError() || ParseHadError)
2681     return true;
2682 
2683   // If we are generating dwarf for the current section then generate a .loc
2684   // directive for the instruction.
2685   if (!ParseHadError && enabledGenDwarfForAssembly() &&
2686       getContext().getGenDwarfSectionSyms().count(
2687           getStreamer().getCurrentSectionOnly())) {
2688     unsigned Line;
2689     if (ActiveMacros.empty())
2690       Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2691     else
2692       Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2693                                    ActiveMacros.front()->ExitBuffer);
2694 
2695     // If we previously parsed a cpp hash file line comment then make sure the
2696     // current Dwarf File is for the CppHashFilename if not then emit the
2697     // Dwarf File table for it and adjust the line number for the .loc.
2698     if (!CppHashInfo.Filename.empty()) {
2699       unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2700           0, StringRef(), CppHashInfo.Filename);
2701       getContext().setGenDwarfFileNumber(FileNumber);
2702 
2703       unsigned CppHashLocLineNo =
2704         SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2705       Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2706     }
2707 
2708     getStreamer().emitDwarfLocDirective(
2709         getContext().getGenDwarfFileNumber(), Line, 0,
2710         DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2711         StringRef());
2712   }
2713 
2714   // If parsing succeeded, match the instruction.
2715   if (!ParseHadError) {
2716     uint64_t ErrorInfo;
2717     if (getTargetParser().MatchAndEmitInstruction(
2718             IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2719             getTargetParser().isParsingMSInlineAsm()))
2720       return true;
2721   }
2722   return false;
2723 }
2724 
2725 // Parse and erase curly braces marking block start/end.
parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> & AsmStrRewrites)2726 bool MasmParser::parseCurlyBlockScope(
2727     SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2728   // Identify curly brace marking block start/end.
2729   if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2730     return false;
2731 
2732   SMLoc StartLoc = Lexer.getLoc();
2733   Lex(); // Eat the brace.
2734   if (Lexer.is(AsmToken::EndOfStatement))
2735     Lex(); // Eat EndOfStatement following the brace.
2736 
2737   // Erase the block start/end brace from the output asm string.
2738   AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2739                                                   StartLoc.getPointer());
2740   return true;
2741 }
2742 
2743 /// parseCppHashLineFilenameComment as this:
2744 ///   ::= # number "filename"
parseCppHashLineFilenameComment(SMLoc L)2745 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2746   Lex(); // Eat the hash token.
2747   // Lexer only ever emits HashDirective if it fully formed if it's
2748   // done the checking already so this is an internal error.
2749   assert(getTok().is(AsmToken::Integer) &&
2750          "Lexing Cpp line comment: Expected Integer");
2751   int64_t LineNumber = getTok().getIntVal();
2752   Lex();
2753   assert(getTok().is(AsmToken::String) &&
2754          "Lexing Cpp line comment: Expected String");
2755   StringRef Filename = getTok().getString();
2756   Lex();
2757 
2758   // Get rid of the enclosing quotes.
2759   Filename = Filename.substr(1, Filename.size() - 2);
2760 
2761   // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2762   // and possibly DWARF file info.
2763   CppHashInfo.Loc = L;
2764   CppHashInfo.Filename = Filename;
2765   CppHashInfo.LineNumber = LineNumber;
2766   CppHashInfo.Buf = CurBuffer;
2767   if (FirstCppHashFilename.empty())
2768     FirstCppHashFilename = Filename;
2769   return false;
2770 }
2771 
2772 /// will use the last parsed cpp hash line filename comment
2773 /// for the Filename and LineNo if any in the diagnostic.
DiagHandler(const SMDiagnostic & Diag,void * Context)2774 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2775   const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2776   raw_ostream &OS = errs();
2777 
2778   const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2779   SMLoc DiagLoc = Diag.getLoc();
2780   unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2781   unsigned CppHashBuf =
2782       Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2783 
2784   // Like SourceMgr::printMessage() we need to print the include stack if any
2785   // before printing the message.
2786   unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2787   if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2788       DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2789     SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2790     DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2791   }
2792 
2793   // If we have not parsed a cpp hash line filename comment or the source
2794   // manager changed or buffer changed (like in a nested include) then just
2795   // print the normal diagnostic using its Filename and LineNo.
2796   if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2797       DiagBuf != CppHashBuf) {
2798     if (Parser->SavedDiagHandler)
2799       Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2800     else
2801       Diag.print(nullptr, OS);
2802     return;
2803   }
2804 
2805   // Use the CppHashFilename and calculate a line number based on the
2806   // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2807   // for the diagnostic.
2808   const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2809 
2810   int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2811   int CppHashLocLineNo =
2812       Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2813   int LineNo =
2814       Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2815 
2816   SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2817                        Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2818                        Diag.getLineContents(), Diag.getRanges());
2819 
2820   if (Parser->SavedDiagHandler)
2821     Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2822   else
2823     NewDiag.print(nullptr, OS);
2824 }
2825 
2826 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2827 // not accept '.'.
isMacroParameterChar(char C)2828 static bool isMacroParameterChar(char C) {
2829   return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2830 }
2831 
expandMacro(raw_svector_ostream & OS,StringRef Body,ArrayRef<MCAsmMacroParameter> Parameters,ArrayRef<MCAsmMacroArgument> A,const std::vector<std::string> & Locals,SMLoc L)2832 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2833                              ArrayRef<MCAsmMacroParameter> Parameters,
2834                              ArrayRef<MCAsmMacroArgument> A,
2835                              const std::vector<std::string> &Locals, SMLoc L) {
2836   unsigned NParameters = Parameters.size();
2837   if (NParameters != A.size())
2838     return Error(L, "Wrong number of arguments");
2839   StringMap<std::string> LocalSymbols;
2840   std::string Name;
2841   Name.reserve(6);
2842   for (StringRef Local : Locals) {
2843     raw_string_ostream LocalName(Name);
2844     LocalName << "??"
2845               << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2846     LocalSymbols.insert({Local, Name});
2847     Name.clear();
2848   }
2849 
2850   std::optional<char> CurrentQuote;
2851   while (!Body.empty()) {
2852     // Scan for the next substitution.
2853     std::size_t End = Body.size(), Pos = 0;
2854     std::size_t IdentifierPos = End;
2855     for (; Pos != End; ++Pos) {
2856       // Find the next possible macro parameter, including preceding a '&'
2857       // inside quotes.
2858       if (Body[Pos] == '&')
2859         break;
2860       if (isMacroParameterChar(Body[Pos])) {
2861         if (!CurrentQuote)
2862           break;
2863         if (IdentifierPos == End)
2864           IdentifierPos = Pos;
2865       } else {
2866         IdentifierPos = End;
2867       }
2868 
2869       // Track quotation status
2870       if (!CurrentQuote) {
2871         if (Body[Pos] == '\'' || Body[Pos] == '"')
2872           CurrentQuote = Body[Pos];
2873       } else if (Body[Pos] == CurrentQuote) {
2874         if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2875           // Escaped quote, and quotes aren't identifier chars; skip
2876           ++Pos;
2877           continue;
2878         } else {
2879           CurrentQuote.reset();
2880         }
2881       }
2882     }
2883     if (IdentifierPos != End) {
2884       // We've recognized an identifier before an apostrophe inside quotes;
2885       // check once to see if we can expand it.
2886       Pos = IdentifierPos;
2887       IdentifierPos = End;
2888     }
2889 
2890     // Add the prefix.
2891     OS << Body.slice(0, Pos);
2892 
2893     // Check if we reached the end.
2894     if (Pos == End)
2895       break;
2896 
2897     unsigned I = Pos;
2898     bool InitialAmpersand = (Body[I] == '&');
2899     if (InitialAmpersand) {
2900       ++I;
2901       ++Pos;
2902     }
2903     while (I < End && isMacroParameterChar(Body[I]))
2904       ++I;
2905 
2906     const char *Begin = Body.data() + Pos;
2907     StringRef Argument(Begin, I - Pos);
2908     const std::string ArgumentLower = Argument.lower();
2909     unsigned Index = 0;
2910 
2911     for (; Index < NParameters; ++Index)
2912       if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2913         break;
2914 
2915     if (Index == NParameters) {
2916       if (InitialAmpersand)
2917         OS << '&';
2918       auto it = LocalSymbols.find(ArgumentLower);
2919       if (it != LocalSymbols.end())
2920         OS << it->second;
2921       else
2922         OS << Argument;
2923       Pos = I;
2924     } else {
2925       for (const AsmToken &Token : A[Index]) {
2926         // In MASM, you can write '%expr'.
2927         // The prefix '%' evaluates the expression 'expr'
2928         // and uses the result as a string (e.g. replace %(1+2) with the
2929         // string "3").
2930         // Here, we identify the integer token which is the result of the
2931         // absolute expression evaluation and replace it with its string
2932         // representation.
2933         if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2934           // Emit an integer value to the buffer.
2935           OS << Token.getIntVal();
2936         else
2937           OS << Token.getString();
2938       }
2939 
2940       Pos += Argument.size();
2941       if (Pos < End && Body[Pos] == '&') {
2942         ++Pos;
2943       }
2944     }
2945     // Update the scan point.
2946     Body = Body.substr(Pos);
2947   }
2948 
2949   return false;
2950 }
2951 
isOperator(AsmToken::TokenKind kind)2952 static bool isOperator(AsmToken::TokenKind kind) {
2953   switch (kind) {
2954   default:
2955     return false;
2956   case AsmToken::Plus:
2957   case AsmToken::Minus:
2958   case AsmToken::Tilde:
2959   case AsmToken::Slash:
2960   case AsmToken::Star:
2961   case AsmToken::Dot:
2962   case AsmToken::Equal:
2963   case AsmToken::EqualEqual:
2964   case AsmToken::Pipe:
2965   case AsmToken::PipePipe:
2966   case AsmToken::Caret:
2967   case AsmToken::Amp:
2968   case AsmToken::AmpAmp:
2969   case AsmToken::Exclaim:
2970   case AsmToken::ExclaimEqual:
2971   case AsmToken::Less:
2972   case AsmToken::LessEqual:
2973   case AsmToken::LessLess:
2974   case AsmToken::LessGreater:
2975   case AsmToken::Greater:
2976   case AsmToken::GreaterEqual:
2977   case AsmToken::GreaterGreater:
2978     return true;
2979   }
2980 }
2981 
2982 namespace {
2983 
2984 class AsmLexerSkipSpaceRAII {
2985 public:
AsmLexerSkipSpaceRAII(AsmLexer & Lexer,bool SkipSpace)2986   AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2987     Lexer.setSkipSpace(SkipSpace);
2988   }
2989 
~AsmLexerSkipSpaceRAII()2990   ~AsmLexerSkipSpaceRAII() {
2991     Lexer.setSkipSpace(true);
2992   }
2993 
2994 private:
2995   AsmLexer &Lexer;
2996 };
2997 
2998 } // end anonymous namespace
2999 
parseMacroArgument(const MCAsmMacroParameter * MP,MCAsmMacroArgument & MA,AsmToken::TokenKind EndTok)3000 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3001                                     MCAsmMacroArgument &MA,
3002                                     AsmToken::TokenKind EndTok) {
3003   if (MP && MP->Vararg) {
3004     if (Lexer.isNot(EndTok)) {
3005       SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3006       for (StringRef S : Str) {
3007         MA.emplace_back(AsmToken::String, S);
3008       }
3009     }
3010     return false;
3011   }
3012 
3013   SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3014   if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3015     const char *StrChar = StrLoc.getPointer() + 1;
3016     const char *EndChar = EndLoc.getPointer() - 1;
3017     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3018     /// Eat from '<' to '>'.
3019     Lex();
3020     MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3021     return false;
3022   }
3023 
3024   unsigned ParenLevel = 0;
3025 
3026   // Darwin doesn't use spaces to delmit arguments.
3027   AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3028 
3029   bool SpaceEaten;
3030 
3031   while (true) {
3032     SpaceEaten = false;
3033     if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3034       return TokError("unexpected token");
3035 
3036     if (ParenLevel == 0) {
3037       if (Lexer.is(AsmToken::Comma))
3038         break;
3039 
3040       if (Lexer.is(AsmToken::Space)) {
3041         SpaceEaten = true;
3042         Lex(); // Eat spaces.
3043       }
3044 
3045       // Spaces can delimit parameters, but could also be part an expression.
3046       // If the token after a space is an operator, add the token and the next
3047       // one into this argument
3048       if (!IsDarwin) {
3049         if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3050           MA.push_back(getTok());
3051           Lex();
3052 
3053           // Whitespace after an operator can be ignored.
3054           if (Lexer.is(AsmToken::Space))
3055             Lex();
3056 
3057           continue;
3058         }
3059       }
3060       if (SpaceEaten)
3061         break;
3062     }
3063 
3064     // handleMacroEntry relies on not advancing the lexer here
3065     // to be able to fill in the remaining default parameter values
3066     if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3067       break;
3068 
3069     // Adjust the current parentheses level.
3070     if (Lexer.is(AsmToken::LParen))
3071       ++ParenLevel;
3072     else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3073       --ParenLevel;
3074 
3075     // Append the token to the current argument list.
3076     MA.push_back(getTok());
3077     Lex();
3078   }
3079 
3080   if (ParenLevel != 0)
3081     return TokError("unbalanced parentheses in argument");
3082 
3083   if (MA.empty() && MP) {
3084     if (MP->Required) {
3085       return TokError("missing value for required parameter '" + MP->Name +
3086                       "'");
3087     } else {
3088       MA = MP->Value;
3089     }
3090   }
3091   return false;
3092 }
3093 
3094 // Parse the macro instantiation arguments.
parseMacroArguments(const MCAsmMacro * M,MCAsmMacroArguments & A,AsmToken::TokenKind EndTok)3095 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3096                                      MCAsmMacroArguments &A,
3097                                      AsmToken::TokenKind EndTok) {
3098   const unsigned NParameters = M ? M->Parameters.size() : 0;
3099   bool NamedParametersFound = false;
3100   SmallVector<SMLoc, 4> FALocs;
3101 
3102   A.resize(NParameters);
3103   FALocs.resize(NParameters);
3104 
3105   // Parse two kinds of macro invocations:
3106   // - macros defined without any parameters accept an arbitrary number of them
3107   // - macros defined with parameters accept at most that many of them
3108   for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3109        ++Parameter) {
3110     SMLoc IDLoc = Lexer.getLoc();
3111     MCAsmMacroParameter FA;
3112 
3113     if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3114       if (parseIdentifier(FA.Name))
3115         return Error(IDLoc, "invalid argument identifier for formal argument");
3116 
3117       if (Lexer.isNot(AsmToken::Equal))
3118         return TokError("expected '=' after formal parameter identifier");
3119 
3120       Lex();
3121 
3122       NamedParametersFound = true;
3123     }
3124 
3125     if (NamedParametersFound && FA.Name.empty())
3126       return Error(IDLoc, "cannot mix positional and keyword arguments");
3127 
3128     unsigned PI = Parameter;
3129     if (!FA.Name.empty()) {
3130       assert(M && "expected macro to be defined");
3131       unsigned FAI = 0;
3132       for (FAI = 0; FAI < NParameters; ++FAI)
3133         if (M->Parameters[FAI].Name == FA.Name)
3134           break;
3135 
3136       if (FAI >= NParameters) {
3137         return Error(IDLoc, "parameter named '" + FA.Name +
3138                                 "' does not exist for macro '" + M->Name + "'");
3139       }
3140       PI = FAI;
3141     }
3142     const MCAsmMacroParameter *MP = nullptr;
3143     if (M && PI < NParameters)
3144       MP = &M->Parameters[PI];
3145 
3146     SMLoc StrLoc = Lexer.getLoc();
3147     SMLoc EndLoc;
3148     if (Lexer.is(AsmToken::Percent)) {
3149       const MCExpr *AbsoluteExp;
3150       int64_t Value;
3151       /// Eat '%'.
3152       Lex();
3153       if (parseExpression(AbsoluteExp, EndLoc))
3154         return false;
3155       if (!AbsoluteExp->evaluateAsAbsolute(Value,
3156                                            getStreamer().getAssemblerPtr()))
3157         return Error(StrLoc, "expected absolute expression");
3158       const char *StrChar = StrLoc.getPointer();
3159       const char *EndChar = EndLoc.getPointer();
3160       AsmToken newToken(AsmToken::Integer,
3161                         StringRef(StrChar, EndChar - StrChar), Value);
3162       FA.Value.push_back(newToken);
3163     } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3164       if (M)
3165         return addErrorSuffix(" in '" + M->Name + "' macro");
3166       else
3167         return true;
3168     }
3169 
3170     if (!FA.Value.empty()) {
3171       if (A.size() <= PI)
3172         A.resize(PI + 1);
3173       A[PI] = FA.Value;
3174 
3175       if (FALocs.size() <= PI)
3176         FALocs.resize(PI + 1);
3177 
3178       FALocs[PI] = Lexer.getLoc();
3179     }
3180 
3181     // At the end of the statement, fill in remaining arguments that have
3182     // default values. If there aren't any, then the next argument is
3183     // required but missing
3184     if (Lexer.is(EndTok)) {
3185       bool Failure = false;
3186       for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3187         if (A[FAI].empty()) {
3188           if (M->Parameters[FAI].Required) {
3189             Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3190                   "missing value for required parameter "
3191                   "'" +
3192                       M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3193             Failure = true;
3194           }
3195 
3196           if (!M->Parameters[FAI].Value.empty())
3197             A[FAI] = M->Parameters[FAI].Value;
3198         }
3199       }
3200       return Failure;
3201     }
3202 
3203     if (Lexer.is(AsmToken::Comma))
3204       Lex();
3205   }
3206 
3207   return TokError("too many positional arguments");
3208 }
3209 
handleMacroEntry(const MCAsmMacro * M,SMLoc NameLoc,AsmToken::TokenKind ArgumentEndTok)3210 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3211                                   AsmToken::TokenKind ArgumentEndTok) {
3212   // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3213   // eliminate this, although we should protect against infinite loops.
3214   unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3215   if (ActiveMacros.size() == MaxNestingDepth) {
3216     std::ostringstream MaxNestingDepthError;
3217     MaxNestingDepthError << "macros cannot be nested more than "
3218                          << MaxNestingDepth << " levels deep."
3219                          << " Use -asm-macro-max-nesting-depth to increase "
3220                             "this limit.";
3221     return TokError(MaxNestingDepthError.str());
3222   }
3223 
3224   MCAsmMacroArguments A;
3225   if (parseMacroArguments(M, A, ArgumentEndTok))
3226     return true;
3227 
3228   // Macro instantiation is lexical, unfortunately. We construct a new buffer
3229   // to hold the macro body with substitutions.
3230   SmallString<256> Buf;
3231   StringRef Body = M->Body;
3232   raw_svector_ostream OS(Buf);
3233 
3234   if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3235     return true;
3236 
3237   // We include the endm in the buffer as our cue to exit the macro
3238   // instantiation.
3239   OS << "endm\n";
3240 
3241   std::unique_ptr<MemoryBuffer> Instantiation =
3242       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3243 
3244   // Create the macro instantiation object and add to the current macro
3245   // instantiation stack.
3246   MacroInstantiation *MI = new MacroInstantiation{
3247       NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3248   ActiveMacros.push_back(MI);
3249 
3250   ++NumOfMacroInstantiations;
3251 
3252   // Jump to the macro instantiation and prime the lexer.
3253   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3254   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3255   EndStatementAtEOFStack.push_back(true);
3256   Lex();
3257 
3258   return false;
3259 }
3260 
handleMacroExit()3261 void MasmParser::handleMacroExit() {
3262   // Jump to the token we should return to, and consume it.
3263   EndStatementAtEOFStack.pop_back();
3264   jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3265             EndStatementAtEOFStack.back());
3266   Lex();
3267 
3268   // Pop the instantiation entry.
3269   delete ActiveMacros.back();
3270   ActiveMacros.pop_back();
3271 }
3272 
handleMacroInvocation(const MCAsmMacro * M,SMLoc NameLoc)3273 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3274   if (!M->IsFunction)
3275     return Error(NameLoc, "cannot invoke macro procedure as function");
3276 
3277   if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3278                                        "' requires arguments in parentheses") ||
3279       handleMacroEntry(M, NameLoc, AsmToken::RParen))
3280     return true;
3281 
3282   // Parse all statements in the macro, retrieving the exit value when it ends.
3283   std::string ExitValue;
3284   SmallVector<AsmRewrite, 4> AsmStrRewrites;
3285   while (Lexer.isNot(AsmToken::Eof)) {
3286     ParseStatementInfo Info(&AsmStrRewrites);
3287     bool Parsed = parseStatement(Info, nullptr);
3288 
3289     if (!Parsed && Info.ExitValue) {
3290       ExitValue = std::move(*Info.ExitValue);
3291       break;
3292     }
3293 
3294     // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3295     // for printing ErrMsg via Lex() only if no (presumably better) parser error
3296     // exists.
3297     if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3298       Lex();
3299     }
3300 
3301     // parseStatement returned true so may need to emit an error.
3302     printPendingErrors();
3303 
3304     // Skipping to the next line if needed.
3305     if (Parsed && !getLexer().isAtStartOfStatement())
3306       eatToEndOfStatement();
3307   }
3308 
3309   // Consume the right-parenthesis on the other side of the arguments.
3310   if (parseRParen())
3311     return true;
3312 
3313   // Exit values may require lexing, unfortunately. We construct a new buffer to
3314   // hold the exit value.
3315   std::unique_ptr<MemoryBuffer> MacroValue =
3316       MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3317 
3318   // Jump from this location to the instantiated exit value, and prime the
3319   // lexer.
3320   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3321   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3322                   /*EndStatementAtEOF=*/false);
3323   EndStatementAtEOFStack.push_back(false);
3324   Lex();
3325 
3326   return false;
3327 }
3328 
3329 /// parseIdentifier:
3330 ///   ::= identifier
3331 ///   ::= string
parseIdentifier(StringRef & Res,IdentifierPositionKind Position)3332 bool MasmParser::parseIdentifier(StringRef &Res,
3333                                  IdentifierPositionKind Position) {
3334   // The assembler has relaxed rules for accepting identifiers, in particular we
3335   // allow things like '.globl $foo' and '.def @feat.00', which would normally
3336   // be separate tokens. At this level, we have already lexed so we cannot
3337   // (currently) handle this as a context dependent token, instead we detect
3338   // adjacent tokens and return the combined identifier.
3339   if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3340     SMLoc PrefixLoc = getLexer().getLoc();
3341 
3342     // Consume the prefix character, and check for a following identifier.
3343 
3344     AsmToken nextTok = peekTok(false);
3345 
3346     if (nextTok.isNot(AsmToken::Identifier))
3347       return true;
3348 
3349     // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3350     if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3351       return true;
3352 
3353     // eat $ or @
3354     Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3355     // Construct the joined identifier and consume the token.
3356     Res =
3357         StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3358     Lex(); // Parser Lex to maintain invariants.
3359     return false;
3360   }
3361 
3362   if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3363     return true;
3364 
3365   Res = getTok().getIdentifier();
3366 
3367   // Consume the identifier token - but if parsing certain directives, avoid
3368   // lexical expansion of the next token.
3369   ExpandKind ExpandNextToken = ExpandMacros;
3370   if (Position == StartOfStatement &&
3371       StringSwitch<bool>(Res)
3372           .CaseLower("echo", true)
3373           .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3374           .Default(false)) {
3375     ExpandNextToken = DoNotExpandMacros;
3376   }
3377   Lex(ExpandNextToken);
3378 
3379   return false;
3380 }
3381 
3382 /// parseDirectiveEquate:
3383 ///  ::= name "=" expression
3384 ///    | name "equ" expression    (not redefinable)
3385 ///    | name "equ" text-list
3386 ///    | name "textequ" text-list (redefinability unspecified)
parseDirectiveEquate(StringRef IDVal,StringRef Name,DirectiveKind DirKind,SMLoc NameLoc)3387 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3388                                       DirectiveKind DirKind, SMLoc NameLoc) {
3389   auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3390   if (BuiltinIt != BuiltinSymbolMap.end())
3391     return Error(NameLoc, "cannot redefine a built-in symbol");
3392 
3393   Variable &Var = Variables[Name.lower()];
3394   if (Var.Name.empty()) {
3395     Var.Name = Name;
3396   }
3397 
3398   SMLoc StartLoc = Lexer.getLoc();
3399   if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3400     // "equ" and "textequ" both allow text expressions.
3401     std::string Value;
3402     std::string TextItem;
3403     if (!parseTextItem(TextItem)) {
3404       Value += TextItem;
3405 
3406       // Accept a text-list, not just one text-item.
3407       auto parseItem = [&]() -> bool {
3408         if (parseTextItem(TextItem))
3409           return TokError("expected text item");
3410         Value += TextItem;
3411         return false;
3412       };
3413       if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3414         return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3415 
3416       if (!Var.IsText || Var.TextValue != Value) {
3417         switch (Var.Redefinable) {
3418         case Variable::NOT_REDEFINABLE:
3419           return Error(getTok().getLoc(), "invalid variable redefinition");
3420         case Variable::WARN_ON_REDEFINITION:
3421           if (Warning(NameLoc, "redefining '" + Name +
3422                                    "', already defined on the command line")) {
3423             return true;
3424           }
3425           break;
3426         default:
3427           break;
3428         }
3429       }
3430       Var.IsText = true;
3431       Var.TextValue = Value;
3432       Var.Redefinable = Variable::REDEFINABLE;
3433 
3434       return false;
3435     }
3436   }
3437   if (DirKind == DK_TEXTEQU)
3438     return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3439 
3440   // Parse as expression assignment.
3441   const MCExpr *Expr;
3442   SMLoc EndLoc;
3443   if (parseExpression(Expr, EndLoc))
3444     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3445   StringRef ExprAsString = StringRef(
3446       StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3447 
3448   int64_t Value;
3449   if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3450     if (DirKind == DK_ASSIGN)
3451       return Error(
3452           StartLoc,
3453           "expected absolute expression; not all symbols have known values",
3454           {StartLoc, EndLoc});
3455 
3456     // Not an absolute expression; define as a text replacement.
3457     if (!Var.IsText || Var.TextValue != ExprAsString) {
3458       switch (Var.Redefinable) {
3459       case Variable::NOT_REDEFINABLE:
3460         return Error(getTok().getLoc(), "invalid variable redefinition");
3461       case Variable::WARN_ON_REDEFINITION:
3462         if (Warning(NameLoc, "redefining '" + Name +
3463                                  "', already defined on the command line")) {
3464           return true;
3465         }
3466         break;
3467       default:
3468         break;
3469       }
3470     }
3471 
3472     Var.IsText = true;
3473     Var.TextValue = ExprAsString.str();
3474     Var.Redefinable = Variable::REDEFINABLE;
3475 
3476     return false;
3477   }
3478 
3479   MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3480 
3481   const MCConstantExpr *PrevValue =
3482       Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3483                               Sym->getVariableValue(/*SetUsed=*/false))
3484                         : nullptr;
3485   if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3486     switch (Var.Redefinable) {
3487     case Variable::NOT_REDEFINABLE:
3488       return Error(getTok().getLoc(), "invalid variable redefinition");
3489     case Variable::WARN_ON_REDEFINITION:
3490       if (Warning(NameLoc, "redefining '" + Name +
3491                                "', already defined on the command line")) {
3492         return true;
3493       }
3494       break;
3495     default:
3496       break;
3497     }
3498   }
3499 
3500   Var.IsText = false;
3501   Var.TextValue.clear();
3502   Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3503                                            : Variable::NOT_REDEFINABLE;
3504 
3505   Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3506   Sym->setVariableValue(Expr);
3507   Sym->setExternal(false);
3508 
3509   return false;
3510 }
3511 
parseEscapedString(std::string & Data)3512 bool MasmParser::parseEscapedString(std::string &Data) {
3513   if (check(getTok().isNot(AsmToken::String), "expected string"))
3514     return true;
3515 
3516   Data = "";
3517   char Quote = getTok().getString().front();
3518   StringRef Str = getTok().getStringContents();
3519   Data.reserve(Str.size());
3520   for (size_t i = 0, e = Str.size(); i != e; ++i) {
3521     Data.push_back(Str[i]);
3522     if (Str[i] == Quote) {
3523       // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3524       // If we're escaping the string's trailing delimiter, we're definitely
3525       // missing a quotation mark.
3526       if (i + 1 == Str.size())
3527         return Error(getTok().getLoc(), "missing quotation mark in string");
3528       if (Str[i + 1] == Quote)
3529         ++i;
3530     }
3531   }
3532 
3533   Lex();
3534   return false;
3535 }
3536 
parseAngleBracketString(std::string & Data)3537 bool MasmParser::parseAngleBracketString(std::string &Data) {
3538   SMLoc EndLoc, StartLoc = getTok().getLoc();
3539   if (isAngleBracketString(StartLoc, EndLoc)) {
3540     const char *StartChar = StartLoc.getPointer() + 1;
3541     const char *EndChar = EndLoc.getPointer() - 1;
3542     jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3543     // Eat from '<' to '>'.
3544     Lex();
3545 
3546     Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3547     return false;
3548   }
3549   return true;
3550 }
3551 
3552 /// textItem ::= textLiteral | textMacroID | % constExpr
parseTextItem(std::string & Data)3553 bool MasmParser::parseTextItem(std::string &Data) {
3554   switch (getTok().getKind()) {
3555   default:
3556     return true;
3557   case AsmToken::Percent: {
3558     int64_t Res;
3559     if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3560       return true;
3561     Data = std::to_string(Res);
3562     return false;
3563   }
3564   case AsmToken::Less:
3565   case AsmToken::LessEqual:
3566   case AsmToken::LessLess:
3567   case AsmToken::LessGreater:
3568     return parseAngleBracketString(Data);
3569   case AsmToken::Identifier: {
3570     // This must be a text macro; we need to expand it accordingly.
3571     StringRef ID;
3572     SMLoc StartLoc = getTok().getLoc();
3573     if (parseIdentifier(ID))
3574       return true;
3575     Data = ID.str();
3576 
3577     bool Expanded = false;
3578     while (true) {
3579       // Try to resolve as a built-in text macro
3580       auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3581       if (BuiltinIt != BuiltinSymbolMap.end()) {
3582         std::optional<std::string> BuiltinText =
3583             evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3584         if (!BuiltinText) {
3585           // Not a text macro; break without substituting
3586           break;
3587         }
3588         Data = std::move(*BuiltinText);
3589         ID = StringRef(Data);
3590         Expanded = true;
3591         continue;
3592       }
3593 
3594       // Try to resolve as a variable text macro
3595       auto VarIt = Variables.find(ID.lower());
3596       if (VarIt != Variables.end()) {
3597         const Variable &Var = VarIt->getValue();
3598         if (!Var.IsText) {
3599           // Not a text macro; break without substituting
3600           break;
3601         }
3602         Data = Var.TextValue;
3603         ID = StringRef(Data);
3604         Expanded = true;
3605         continue;
3606       }
3607 
3608       break;
3609     }
3610 
3611     if (!Expanded) {
3612       // Not a text macro; not usable in TextItem context. Since we haven't used
3613       // the token, put it back for better error recovery.
3614       getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3615       return true;
3616     }
3617     return false;
3618   }
3619   }
3620   llvm_unreachable("unhandled token kind");
3621 }
3622 
3623 /// parseDirectiveAscii:
3624 ///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
parseDirectiveAscii(StringRef IDVal,bool ZeroTerminated)3625 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3626   auto parseOp = [&]() -> bool {
3627     std::string Data;
3628     if (checkForValidSection() || parseEscapedString(Data))
3629       return true;
3630     getStreamer().emitBytes(Data);
3631     if (ZeroTerminated)
3632       getStreamer().emitBytes(StringRef("\0", 1));
3633     return false;
3634   };
3635 
3636   if (parseMany(parseOp))
3637     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3638   return false;
3639 }
3640 
emitIntValue(const MCExpr * Value,unsigned Size)3641 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3642   // Special case constant expressions to match code generator.
3643   if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3644     assert(Size <= 8 && "Invalid size");
3645     int64_t IntValue = MCE->getValue();
3646     if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3647       return Error(MCE->getLoc(), "out of range literal value");
3648     getStreamer().emitIntValue(IntValue, Size);
3649   } else {
3650     const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3651     if (MSE && MSE->getSymbol().getName() == "?") {
3652       // ? initializer; treat as 0.
3653       getStreamer().emitIntValue(0, Size);
3654     } else {
3655       getStreamer().emitValue(Value, Size, Value->getLoc());
3656     }
3657   }
3658   return false;
3659 }
3660 
parseScalarInitializer(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,unsigned StringPadLength)3661 bool MasmParser::parseScalarInitializer(unsigned Size,
3662                                         SmallVectorImpl<const MCExpr *> &Values,
3663                                         unsigned StringPadLength) {
3664   if (Size == 1 && getTok().is(AsmToken::String)) {
3665     std::string Value;
3666     if (parseEscapedString(Value))
3667       return true;
3668     // Treat each character as an initializer.
3669     for (const unsigned char CharVal : Value)
3670       Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3671 
3672     // Pad the string with spaces to the specified length.
3673     for (size_t i = Value.size(); i < StringPadLength; ++i)
3674       Values.push_back(MCConstantExpr::create(' ', getContext()));
3675   } else {
3676     const MCExpr *Value;
3677     if (parseExpression(Value))
3678       return true;
3679     if (getTok().is(AsmToken::Identifier) &&
3680         getTok().getString().equals_insensitive("dup")) {
3681       Lex(); // Eat 'dup'.
3682       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3683       if (!MCE)
3684         return Error(Value->getLoc(),
3685                      "cannot repeat value a non-constant number of times");
3686       const int64_t Repetitions = MCE->getValue();
3687       if (Repetitions < 0)
3688         return Error(Value->getLoc(),
3689                      "cannot repeat value a negative number of times");
3690 
3691       SmallVector<const MCExpr *, 1> DuplicatedValues;
3692       if (parseToken(AsmToken::LParen,
3693                      "parentheses required for 'dup' contents") ||
3694           parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3695         return true;
3696 
3697       for (int i = 0; i < Repetitions; ++i)
3698         Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3699     } else {
3700       Values.push_back(Value);
3701     }
3702   }
3703   return false;
3704 }
3705 
parseScalarInstList(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,const AsmToken::TokenKind EndToken)3706 bool MasmParser::parseScalarInstList(unsigned Size,
3707                                      SmallVectorImpl<const MCExpr *> &Values,
3708                                      const AsmToken::TokenKind EndToken) {
3709   while (getTok().isNot(EndToken) &&
3710          (EndToken != AsmToken::Greater ||
3711           getTok().isNot(AsmToken::GreaterGreater))) {
3712     parseScalarInitializer(Size, Values);
3713 
3714     // If we see a comma, continue, and allow line continuation.
3715     if (!parseOptionalToken(AsmToken::Comma))
3716       break;
3717     parseOptionalToken(AsmToken::EndOfStatement);
3718   }
3719   return false;
3720 }
3721 
emitIntegralValues(unsigned Size,unsigned * Count)3722 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3723   SmallVector<const MCExpr *, 1> Values;
3724   if (checkForValidSection() || parseScalarInstList(Size, Values))
3725     return true;
3726 
3727   for (const auto *Value : Values) {
3728     emitIntValue(Value, Size);
3729   }
3730   if (Count)
3731     *Count = Values.size();
3732   return false;
3733 }
3734 
3735 // Add a field to the current structure.
addIntegralField(StringRef Name,unsigned Size)3736 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3737   StructInfo &Struct = StructInProgress.back();
3738   FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3739   IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3740 
3741   Field.Type = Size;
3742 
3743   if (parseScalarInstList(Size, IntInfo.Values))
3744     return true;
3745 
3746   Field.SizeOf = Field.Type * IntInfo.Values.size();
3747   Field.LengthOf = IntInfo.Values.size();
3748   const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3749   if (!Struct.IsUnion) {
3750     Struct.NextOffset = FieldEnd;
3751   }
3752   Struct.Size = std::max(Struct.Size, FieldEnd);
3753   return false;
3754 }
3755 
3756 /// parseDirectiveValue
3757 ///  ::= (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveValue(StringRef IDVal,unsigned Size)3758 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3759   if (StructInProgress.empty()) {
3760     // Initialize data value.
3761     if (emitIntegralValues(Size))
3762       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3763   } else if (addIntegralField("", Size)) {
3764     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3765   }
3766 
3767   return false;
3768 }
3769 
3770 /// parseDirectiveNamedValue
3771 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedValue(StringRef TypeName,unsigned Size,StringRef Name,SMLoc NameLoc)3772 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3773                                           StringRef Name, SMLoc NameLoc) {
3774   if (StructInProgress.empty()) {
3775     // Initialize named data value.
3776     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3777     getStreamer().emitLabel(Sym);
3778     unsigned Count;
3779     if (emitIntegralValues(Size, &Count))
3780       return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3781 
3782     AsmTypeInfo Type;
3783     Type.Name = TypeName;
3784     Type.Size = Size * Count;
3785     Type.ElementSize = Size;
3786     Type.Length = Count;
3787     KnownType[Name.lower()] = Type;
3788   } else if (addIntegralField(Name, Size)) {
3789     return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3790   }
3791 
3792   return false;
3793 }
3794 
parseHexOcta(MasmParser & Asm,uint64_t & hi,uint64_t & lo)3795 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3796   if (Asm.getTok().isNot(AsmToken::Integer) &&
3797       Asm.getTok().isNot(AsmToken::BigNum))
3798     return Asm.TokError("unknown token in expression");
3799   SMLoc ExprLoc = Asm.getTok().getLoc();
3800   APInt IntValue = Asm.getTok().getAPIntVal();
3801   Asm.Lex();
3802   if (!IntValue.isIntN(128))
3803     return Asm.Error(ExprLoc, "out of range literal value");
3804   if (!IntValue.isIntN(64)) {
3805     hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3806     lo = IntValue.getLoBits(64).getZExtValue();
3807   } else {
3808     hi = 0;
3809     lo = IntValue.getZExtValue();
3810   }
3811   return false;
3812 }
3813 
parseRealValue(const fltSemantics & Semantics,APInt & Res)3814 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3815   // We don't truly support arithmetic on floating point expressions, so we
3816   // have to manually parse unary prefixes.
3817   bool IsNeg = false;
3818   SMLoc SignLoc;
3819   if (getLexer().is(AsmToken::Minus)) {
3820     SignLoc = getLexer().getLoc();
3821     Lexer.Lex();
3822     IsNeg = true;
3823   } else if (getLexer().is(AsmToken::Plus)) {
3824     SignLoc = getLexer().getLoc();
3825     Lexer.Lex();
3826   }
3827 
3828   if (Lexer.is(AsmToken::Error))
3829     return TokError(Lexer.getErr());
3830   if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3831       Lexer.isNot(AsmToken::Identifier))
3832     return TokError("unexpected token in directive");
3833 
3834   // Convert to an APFloat.
3835   APFloat Value(Semantics);
3836   StringRef IDVal = getTok().getString();
3837   if (getLexer().is(AsmToken::Identifier)) {
3838     if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3839       Value = APFloat::getInf(Semantics);
3840     else if (IDVal.equals_insensitive("nan"))
3841       Value = APFloat::getNaN(Semantics, false, ~0);
3842     else if (IDVal.equals_insensitive("?"))
3843       Value = APFloat::getZero(Semantics);
3844     else
3845       return TokError("invalid floating point literal");
3846   } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3847     // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3848     // To match ML64.exe, ignore the initial sign.
3849     unsigned SizeInBits = Value.getSizeInBits(Semantics);
3850     if (SizeInBits != (IDVal.size() << 2))
3851       return TokError("invalid floating point literal");
3852 
3853     // Consume the numeric token.
3854     Lex();
3855 
3856     Res = APInt(SizeInBits, IDVal, 16);
3857     if (SignLoc.isValid())
3858       return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3859     return false;
3860   } else if (errorToBool(
3861                  Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3862                      .takeError())) {
3863     return TokError("invalid floating point literal");
3864   }
3865   if (IsNeg)
3866     Value.changeSign();
3867 
3868   // Consume the numeric token.
3869   Lex();
3870 
3871   Res = Value.bitcastToAPInt();
3872 
3873   return false;
3874 }
3875 
parseRealInstList(const fltSemantics & Semantics,SmallVectorImpl<APInt> & ValuesAsInt,const AsmToken::TokenKind EndToken)3876 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3877                                    SmallVectorImpl<APInt> &ValuesAsInt,
3878                                    const AsmToken::TokenKind EndToken) {
3879   while (getTok().isNot(EndToken) ||
3880          (EndToken == AsmToken::Greater &&
3881           getTok().isNot(AsmToken::GreaterGreater))) {
3882     const AsmToken NextTok = peekTok();
3883     if (NextTok.is(AsmToken::Identifier) &&
3884         NextTok.getString().equals_insensitive("dup")) {
3885       const MCExpr *Value;
3886       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3887         return true;
3888       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3889       if (!MCE)
3890         return Error(Value->getLoc(),
3891                      "cannot repeat value a non-constant number of times");
3892       const int64_t Repetitions = MCE->getValue();
3893       if (Repetitions < 0)
3894         return Error(Value->getLoc(),
3895                      "cannot repeat value a negative number of times");
3896 
3897       SmallVector<APInt, 1> DuplicatedValues;
3898       if (parseToken(AsmToken::LParen,
3899                      "parentheses required for 'dup' contents") ||
3900           parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3901         return true;
3902 
3903       for (int i = 0; i < Repetitions; ++i)
3904         ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3905     } else {
3906       APInt AsInt;
3907       if (parseRealValue(Semantics, AsInt))
3908         return true;
3909       ValuesAsInt.push_back(AsInt);
3910     }
3911 
3912     // Continue if we see a comma. (Also, allow line continuation.)
3913     if (!parseOptionalToken(AsmToken::Comma))
3914       break;
3915     parseOptionalToken(AsmToken::EndOfStatement);
3916   }
3917 
3918   return false;
3919 }
3920 
3921 // Initialize real data values.
emitRealValues(const fltSemantics & Semantics,unsigned * Count)3922 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3923                                 unsigned *Count) {
3924   if (checkForValidSection())
3925     return true;
3926 
3927   SmallVector<APInt, 1> ValuesAsInt;
3928   if (parseRealInstList(Semantics, ValuesAsInt))
3929     return true;
3930 
3931   for (const APInt &AsInt : ValuesAsInt) {
3932     getStreamer().emitIntValue(AsInt);
3933   }
3934   if (Count)
3935     *Count = ValuesAsInt.size();
3936   return false;
3937 }
3938 
3939 // Add a real field to the current struct.
addRealField(StringRef Name,const fltSemantics & Semantics,size_t Size)3940 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3941                               size_t Size) {
3942   StructInfo &Struct = StructInProgress.back();
3943   FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3944   RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3945 
3946   Field.SizeOf = 0;
3947 
3948   if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3949     return true;
3950 
3951   Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3952   Field.LengthOf = RealInfo.AsIntValues.size();
3953   Field.SizeOf = Field.Type * Field.LengthOf;
3954 
3955   const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3956   if (!Struct.IsUnion) {
3957     Struct.NextOffset = FieldEnd;
3958   }
3959   Struct.Size = std::max(Struct.Size, FieldEnd);
3960   return false;
3961 }
3962 
3963 /// parseDirectiveRealValue
3964 ///  ::= (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveRealValue(StringRef IDVal,const fltSemantics & Semantics,size_t Size)3965 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3966                                          const fltSemantics &Semantics,
3967                                          size_t Size) {
3968   if (StructInProgress.empty()) {
3969     // Initialize data value.
3970     if (emitRealValues(Semantics))
3971       return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3972   } else if (addRealField("", Semantics, Size)) {
3973     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3974   }
3975   return false;
3976 }
3977 
3978 /// parseDirectiveNamedRealValue
3979 ///  ::= name (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveNamedRealValue(StringRef TypeName,const fltSemantics & Semantics,unsigned Size,StringRef Name,SMLoc NameLoc)3980 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3981                                               const fltSemantics &Semantics,
3982                                               unsigned Size, StringRef Name,
3983                                               SMLoc NameLoc) {
3984   if (StructInProgress.empty()) {
3985     // Initialize named data value.
3986     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3987     getStreamer().emitLabel(Sym);
3988     unsigned Count;
3989     if (emitRealValues(Semantics, &Count))
3990       return addErrorSuffix(" in '" + TypeName + "' directive");
3991 
3992     AsmTypeInfo Type;
3993     Type.Name = TypeName;
3994     Type.Size = Size * Count;
3995     Type.ElementSize = Size;
3996     Type.Length = Count;
3997     KnownType[Name.lower()] = Type;
3998   } else if (addRealField(Name, Semantics, Size)) {
3999     return addErrorSuffix(" in '" + TypeName + "' directive");
4000   }
4001   return false;
4002 }
4003 
parseOptionalAngleBracketOpen()4004 bool MasmParser::parseOptionalAngleBracketOpen() {
4005   const AsmToken Tok = getTok();
4006   if (parseOptionalToken(AsmToken::LessLess)) {
4007     AngleBracketDepth++;
4008     Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4009     return true;
4010   } else if (parseOptionalToken(AsmToken::LessGreater)) {
4011     AngleBracketDepth++;
4012     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4013     return true;
4014   } else if (parseOptionalToken(AsmToken::Less)) {
4015     AngleBracketDepth++;
4016     return true;
4017   }
4018 
4019   return false;
4020 }
4021 
parseAngleBracketClose(const Twine & Msg)4022 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4023   const AsmToken Tok = getTok();
4024   if (parseOptionalToken(AsmToken::GreaterGreater)) {
4025     Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4026   } else if (parseToken(AsmToken::Greater, Msg)) {
4027     return true;
4028   }
4029   AngleBracketDepth--;
4030   return false;
4031 }
4032 
parseFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,FieldInitializer & Initializer)4033 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4034                                        const IntFieldInfo &Contents,
4035                                        FieldInitializer &Initializer) {
4036   SMLoc Loc = getTok().getLoc();
4037 
4038   SmallVector<const MCExpr *, 1> Values;
4039   if (parseOptionalToken(AsmToken::LCurly)) {
4040     if (Field.LengthOf == 1 && Field.Type > 1)
4041       return Error(Loc, "Cannot initialize scalar field with array value");
4042     if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4043         parseToken(AsmToken::RCurly))
4044       return true;
4045   } else if (parseOptionalAngleBracketOpen()) {
4046     if (Field.LengthOf == 1 && Field.Type > 1)
4047       return Error(Loc, "Cannot initialize scalar field with array value");
4048     if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4049         parseAngleBracketClose())
4050       return true;
4051   } else if (Field.LengthOf > 1 && Field.Type > 1) {
4052     return Error(Loc, "Cannot initialize array field with scalar value");
4053   } else if (parseScalarInitializer(Field.Type, Values,
4054                                     /*StringPadLength=*/Field.LengthOf)) {
4055     return true;
4056   }
4057 
4058   if (Values.size() > Field.LengthOf) {
4059     return Error(Loc, "Initializer too long for field; expected at most " +
4060                           std::to_string(Field.LengthOf) + " elements, got " +
4061                           std::to_string(Values.size()));
4062   }
4063   // Default-initialize all remaining values.
4064   Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4065 
4066   Initializer = FieldInitializer(std::move(Values));
4067   return false;
4068 }
4069 
parseFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,FieldInitializer & Initializer)4070 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4071                                        const RealFieldInfo &Contents,
4072                                        FieldInitializer &Initializer) {
4073   const fltSemantics *Semantics;
4074   switch (Field.Type) {
4075   case 4:
4076     Semantics = &APFloat::IEEEsingle();
4077     break;
4078   case 8:
4079     Semantics = &APFloat::IEEEdouble();
4080     break;
4081   case 10:
4082     Semantics = &APFloat::x87DoubleExtended();
4083     break;
4084   default:
4085     llvm_unreachable("unknown real field type");
4086   }
4087 
4088   SMLoc Loc = getTok().getLoc();
4089 
4090   SmallVector<APInt, 1> AsIntValues;
4091   if (parseOptionalToken(AsmToken::LCurly)) {
4092     if (Field.LengthOf == 1)
4093       return Error(Loc, "Cannot initialize scalar field with array value");
4094     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4095         parseToken(AsmToken::RCurly))
4096       return true;
4097   } else if (parseOptionalAngleBracketOpen()) {
4098     if (Field.LengthOf == 1)
4099       return Error(Loc, "Cannot initialize scalar field with array value");
4100     if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4101         parseAngleBracketClose())
4102       return true;
4103   } else if (Field.LengthOf > 1) {
4104     return Error(Loc, "Cannot initialize array field with scalar value");
4105   } else {
4106     AsIntValues.emplace_back();
4107     if (parseRealValue(*Semantics, AsIntValues.back()))
4108       return true;
4109   }
4110 
4111   if (AsIntValues.size() > Field.LengthOf) {
4112     return Error(Loc, "Initializer too long for field; expected at most " +
4113                           std::to_string(Field.LengthOf) + " elements, got " +
4114                           std::to_string(AsIntValues.size()));
4115   }
4116   // Default-initialize all remaining values.
4117   AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4118                      Contents.AsIntValues.end());
4119 
4120   Initializer = FieldInitializer(std::move(AsIntValues));
4121   return false;
4122 }
4123 
parseFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,FieldInitializer & Initializer)4124 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4125                                        const StructFieldInfo &Contents,
4126                                        FieldInitializer &Initializer) {
4127   SMLoc Loc = getTok().getLoc();
4128 
4129   std::vector<StructInitializer> Initializers;
4130   if (Field.LengthOf > 1) {
4131     if (parseOptionalToken(AsmToken::LCurly)) {
4132       if (parseStructInstList(Contents.Structure, Initializers,
4133                               AsmToken::RCurly) ||
4134           parseToken(AsmToken::RCurly))
4135         return true;
4136     } else if (parseOptionalAngleBracketOpen()) {
4137       if (parseStructInstList(Contents.Structure, Initializers,
4138                               AsmToken::Greater) ||
4139           parseAngleBracketClose())
4140         return true;
4141     } else {
4142       return Error(Loc, "Cannot initialize array field with scalar value");
4143     }
4144   } else {
4145     Initializers.emplace_back();
4146     if (parseStructInitializer(Contents.Structure, Initializers.back()))
4147       return true;
4148   }
4149 
4150   if (Initializers.size() > Field.LengthOf) {
4151     return Error(Loc, "Initializer too long for field; expected at most " +
4152                           std::to_string(Field.LengthOf) + " elements, got " +
4153                           std::to_string(Initializers.size()));
4154   }
4155   // Default-initialize all remaining values.
4156   Initializers.insert(Initializers.end(),
4157                       Contents.Initializers.begin() + Initializers.size(),
4158                       Contents.Initializers.end());
4159 
4160   Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4161   return false;
4162 }
4163 
parseFieldInitializer(const FieldInfo & Field,FieldInitializer & Initializer)4164 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4165                                        FieldInitializer &Initializer) {
4166   switch (Field.Contents.FT) {
4167   case FT_INTEGRAL:
4168     return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4169   case FT_REAL:
4170     return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4171   case FT_STRUCT:
4172     return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4173   }
4174   llvm_unreachable("Unhandled FieldType enum");
4175 }
4176 
parseStructInitializer(const StructInfo & Structure,StructInitializer & Initializer)4177 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4178                                         StructInitializer &Initializer) {
4179   const AsmToken FirstToken = getTok();
4180 
4181   std::optional<AsmToken::TokenKind> EndToken;
4182   if (parseOptionalToken(AsmToken::LCurly)) {
4183     EndToken = AsmToken::RCurly;
4184   } else if (parseOptionalAngleBracketOpen()) {
4185     EndToken = AsmToken::Greater;
4186     AngleBracketDepth++;
4187   } else if (FirstToken.is(AsmToken::Identifier) &&
4188              FirstToken.getString() == "?") {
4189     // ? initializer; leave EndToken uninitialized to treat as empty.
4190     if (parseToken(AsmToken::Identifier))
4191       return true;
4192   } else {
4193     return Error(FirstToken.getLoc(), "Expected struct initializer");
4194   }
4195 
4196   auto &FieldInitializers = Initializer.FieldInitializers;
4197   size_t FieldIndex = 0;
4198   if (EndToken) {
4199     // Initialize all fields with given initializers.
4200     while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
4201       const FieldInfo &Field = Structure.Fields[FieldIndex++];
4202       if (parseOptionalToken(AsmToken::Comma)) {
4203         // Empty initializer; use the default and continue. (Also, allow line
4204         // continuation.)
4205         FieldInitializers.push_back(Field.Contents);
4206         parseOptionalToken(AsmToken::EndOfStatement);
4207         continue;
4208       }
4209       FieldInitializers.emplace_back(Field.Contents.FT);
4210       if (parseFieldInitializer(Field, FieldInitializers.back()))
4211         return true;
4212 
4213       // Continue if we see a comma. (Also, allow line continuation.)
4214       SMLoc CommaLoc = getTok().getLoc();
4215       if (!parseOptionalToken(AsmToken::Comma))
4216         break;
4217       if (FieldIndex == Structure.Fields.size())
4218         return Error(CommaLoc, "'" + Structure.Name +
4219                                    "' initializer initializes too many fields");
4220       parseOptionalToken(AsmToken::EndOfStatement);
4221     }
4222   }
4223   // Default-initialize all remaining fields.
4224   for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4225     FieldInitializers.push_back(Field.Contents);
4226 
4227   if (EndToken) {
4228     if (*EndToken == AsmToken::Greater)
4229       return parseAngleBracketClose();
4230 
4231     return parseToken(*EndToken);
4232   }
4233 
4234   return false;
4235 }
4236 
parseStructInstList(const StructInfo & Structure,std::vector<StructInitializer> & Initializers,const AsmToken::TokenKind EndToken)4237 bool MasmParser::parseStructInstList(
4238     const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4239     const AsmToken::TokenKind EndToken) {
4240   while (getTok().isNot(EndToken) ||
4241          (EndToken == AsmToken::Greater &&
4242           getTok().isNot(AsmToken::GreaterGreater))) {
4243     const AsmToken NextTok = peekTok();
4244     if (NextTok.is(AsmToken::Identifier) &&
4245         NextTok.getString().equals_insensitive("dup")) {
4246       const MCExpr *Value;
4247       if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4248         return true;
4249       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4250       if (!MCE)
4251         return Error(Value->getLoc(),
4252                      "cannot repeat value a non-constant number of times");
4253       const int64_t Repetitions = MCE->getValue();
4254       if (Repetitions < 0)
4255         return Error(Value->getLoc(),
4256                      "cannot repeat value a negative number of times");
4257 
4258       std::vector<StructInitializer> DuplicatedValues;
4259       if (parseToken(AsmToken::LParen,
4260                      "parentheses required for 'dup' contents") ||
4261           parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4262         return true;
4263 
4264       for (int i = 0; i < Repetitions; ++i)
4265         llvm::append_range(Initializers, DuplicatedValues);
4266     } else {
4267       Initializers.emplace_back();
4268       if (parseStructInitializer(Structure, Initializers.back()))
4269         return true;
4270     }
4271 
4272     // Continue if we see a comma. (Also, allow line continuation.)
4273     if (!parseOptionalToken(AsmToken::Comma))
4274       break;
4275     parseOptionalToken(AsmToken::EndOfStatement);
4276   }
4277 
4278   return false;
4279 }
4280 
emitFieldValue(const FieldInfo & Field,const IntFieldInfo & Contents)4281 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4282                                 const IntFieldInfo &Contents) {
4283   // Default-initialize all values.
4284   for (const MCExpr *Value : Contents.Values) {
4285     if (emitIntValue(Value, Field.Type))
4286       return true;
4287   }
4288   return false;
4289 }
4290 
emitFieldValue(const FieldInfo & Field,const RealFieldInfo & Contents)4291 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4292                                 const RealFieldInfo &Contents) {
4293   for (const APInt &AsInt : Contents.AsIntValues) {
4294     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4295                                AsInt.getBitWidth() / 8);
4296   }
4297   return false;
4298 }
4299 
emitFieldValue(const FieldInfo & Field,const StructFieldInfo & Contents)4300 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4301                                 const StructFieldInfo &Contents) {
4302   for (const auto &Initializer : Contents.Initializers) {
4303     size_t Index = 0, Offset = 0;
4304     for (const auto &SubField : Contents.Structure.Fields) {
4305       getStreamer().emitZeros(SubField.Offset - Offset);
4306       Offset = SubField.Offset + SubField.SizeOf;
4307       emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4308     }
4309   }
4310   return false;
4311 }
4312 
emitFieldValue(const FieldInfo & Field)4313 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4314   switch (Field.Contents.FT) {
4315   case FT_INTEGRAL:
4316     return emitFieldValue(Field, Field.Contents.IntInfo);
4317   case FT_REAL:
4318     return emitFieldValue(Field, Field.Contents.RealInfo);
4319   case FT_STRUCT:
4320     return emitFieldValue(Field, Field.Contents.StructInfo);
4321   }
4322   llvm_unreachable("Unhandled FieldType enum");
4323 }
4324 
emitFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,const IntFieldInfo & Initializer)4325 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4326                                       const IntFieldInfo &Contents,
4327                                       const IntFieldInfo &Initializer) {
4328   for (const auto &Value : Initializer.Values) {
4329     if (emitIntValue(Value, Field.Type))
4330       return true;
4331   }
4332   // Default-initialize all remaining values.
4333   for (const auto &Value :
4334            llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4335     if (emitIntValue(Value, Field.Type))
4336       return true;
4337   }
4338   return false;
4339 }
4340 
emitFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,const RealFieldInfo & Initializer)4341 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4342                                       const RealFieldInfo &Contents,
4343                                       const RealFieldInfo &Initializer) {
4344   for (const auto &AsInt : Initializer.AsIntValues) {
4345     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4346                                AsInt.getBitWidth() / 8);
4347   }
4348   // Default-initialize all remaining values.
4349   for (const auto &AsInt :
4350        llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4351     getStreamer().emitIntValue(AsInt.getLimitedValue(),
4352                                AsInt.getBitWidth() / 8);
4353   }
4354   return false;
4355 }
4356 
emitFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,const StructFieldInfo & Initializer)4357 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4358                                       const StructFieldInfo &Contents,
4359                                       const StructFieldInfo &Initializer) {
4360   for (const auto &Init : Initializer.Initializers) {
4361     if (emitStructInitializer(Contents.Structure, Init))
4362       return true;
4363   }
4364   // Default-initialize all remaining values.
4365   for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4366                                            Initializer.Initializers.size())) {
4367     if (emitStructInitializer(Contents.Structure, Init))
4368       return true;
4369   }
4370   return false;
4371 }
4372 
emitFieldInitializer(const FieldInfo & Field,const FieldInitializer & Initializer)4373 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4374                                       const FieldInitializer &Initializer) {
4375   switch (Field.Contents.FT) {
4376   case FT_INTEGRAL:
4377     return emitFieldInitializer(Field, Field.Contents.IntInfo,
4378                                 Initializer.IntInfo);
4379   case FT_REAL:
4380     return emitFieldInitializer(Field, Field.Contents.RealInfo,
4381                                 Initializer.RealInfo);
4382   case FT_STRUCT:
4383     return emitFieldInitializer(Field, Field.Contents.StructInfo,
4384                                 Initializer.StructInfo);
4385   }
4386   llvm_unreachable("Unhandled FieldType enum");
4387 }
4388 
emitStructInitializer(const StructInfo & Structure,const StructInitializer & Initializer)4389 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4390                                        const StructInitializer &Initializer) {
4391   if (!Structure.Initializable)
4392     return Error(getLexer().getLoc(),
4393                  "cannot initialize a value of type '" + Structure.Name +
4394                      "'; 'org' was used in the type's declaration");
4395   size_t Index = 0, Offset = 0;
4396   for (const auto &Init : Initializer.FieldInitializers) {
4397     const auto &Field = Structure.Fields[Index++];
4398     getStreamer().emitZeros(Field.Offset - Offset);
4399     Offset = Field.Offset + Field.SizeOf;
4400     if (emitFieldInitializer(Field, Init))
4401       return true;
4402   }
4403   // Default-initialize all remaining fields.
4404   for (const auto &Field : llvm::drop_begin(
4405            Structure.Fields, Initializer.FieldInitializers.size())) {
4406     getStreamer().emitZeros(Field.Offset - Offset);
4407     Offset = Field.Offset + Field.SizeOf;
4408     if (emitFieldValue(Field))
4409       return true;
4410   }
4411   // Add final padding.
4412   if (Offset != Structure.Size)
4413     getStreamer().emitZeros(Structure.Size - Offset);
4414   return false;
4415 }
4416 
4417 // Set data values from initializers.
emitStructValues(const StructInfo & Structure,unsigned * Count)4418 bool MasmParser::emitStructValues(const StructInfo &Structure,
4419                                   unsigned *Count) {
4420   std::vector<StructInitializer> Initializers;
4421   if (parseStructInstList(Structure, Initializers))
4422     return true;
4423 
4424   for (const auto &Initializer : Initializers) {
4425     if (emitStructInitializer(Structure, Initializer))
4426       return true;
4427   }
4428 
4429   if (Count)
4430     *Count = Initializers.size();
4431   return false;
4432 }
4433 
4434 // Declare a field in the current struct.
addStructField(StringRef Name,const StructInfo & Structure)4435 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4436   StructInfo &OwningStruct = StructInProgress.back();
4437   FieldInfo &Field =
4438       OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4439   StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4440 
4441   StructInfo.Structure = Structure;
4442   Field.Type = Structure.Size;
4443 
4444   if (parseStructInstList(Structure, StructInfo.Initializers))
4445     return true;
4446 
4447   Field.LengthOf = StructInfo.Initializers.size();
4448   Field.SizeOf = Field.Type * Field.LengthOf;
4449 
4450   const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4451   if (!OwningStruct.IsUnion) {
4452     OwningStruct.NextOffset = FieldEnd;
4453   }
4454   OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4455 
4456   return false;
4457 }
4458 
4459 /// parseDirectiveStructValue
4460 ///  ::= struct-id (<struct-initializer> | {struct-initializer})
4461 ///                [, (<struct-initializer> | {struct-initializer})]*
parseDirectiveStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc)4462 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4463                                            StringRef Directive, SMLoc DirLoc) {
4464   if (StructInProgress.empty()) {
4465     if (emitStructValues(Structure))
4466       return true;
4467   } else if (addStructField("", Structure)) {
4468     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4469   }
4470 
4471   return false;
4472 }
4473 
4474 /// parseDirectiveNamedValue
4475 ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc,StringRef Name)4476 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4477                                                 StringRef Directive,
4478                                                 SMLoc DirLoc, StringRef Name) {
4479   if (StructInProgress.empty()) {
4480     // Initialize named data value.
4481     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4482     getStreamer().emitLabel(Sym);
4483     unsigned Count;
4484     if (emitStructValues(Structure, &Count))
4485       return true;
4486     AsmTypeInfo Type;
4487     Type.Name = Structure.Name;
4488     Type.Size = Structure.Size * Count;
4489     Type.ElementSize = Structure.Size;
4490     Type.Length = Count;
4491     KnownType[Name.lower()] = Type;
4492   } else if (addStructField(Name, Structure)) {
4493     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4494   }
4495 
4496   return false;
4497 }
4498 
4499 /// parseDirectiveStruct
4500 ///  ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4501 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4502 ///      <name> ENDS
4503 ////// dataDir = data declaration
4504 ////// offsetDir = EVEN, ORG, ALIGN
parseDirectiveStruct(StringRef Directive,DirectiveKind DirKind,StringRef Name,SMLoc NameLoc)4505 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4506                                       DirectiveKind DirKind, StringRef Name,
4507                                       SMLoc NameLoc) {
4508   // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4509   // anyway, so all field accesses must be qualified.
4510   AsmToken NextTok = getTok();
4511   int64_t AlignmentValue = 1;
4512   if (NextTok.isNot(AsmToken::Comma) &&
4513       NextTok.isNot(AsmToken::EndOfStatement) &&
4514       parseAbsoluteExpression(AlignmentValue)) {
4515     return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4516                           "' directive");
4517   }
4518   if (!isPowerOf2_64(AlignmentValue)) {
4519     return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4520                                        std::to_string(AlignmentValue));
4521   }
4522 
4523   StringRef Qualifier;
4524   SMLoc QualifierLoc;
4525   if (parseOptionalToken(AsmToken::Comma)) {
4526     QualifierLoc = getTok().getLoc();
4527     if (parseIdentifier(Qualifier))
4528       return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4529     if (!Qualifier.equals_insensitive("nonunique"))
4530       return Error(QualifierLoc, "Unrecognized qualifier for '" +
4531                                      Twine(Directive) +
4532                                      "' directive; expected none or NONUNIQUE");
4533   }
4534 
4535   if (parseEOL())
4536     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4537 
4538   StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4539   return false;
4540 }
4541 
4542 /// parseDirectiveNestedStruct
4543 ///  ::= (STRUC | STRUCT | UNION) [name]
4544 ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4545 ///      ENDS
parseDirectiveNestedStruct(StringRef Directive,DirectiveKind DirKind)4546 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4547                                             DirectiveKind DirKind) {
4548   if (StructInProgress.empty())
4549     return TokError("missing name in top-level '" + Twine(Directive) +
4550                     "' directive");
4551 
4552   StringRef Name;
4553   if (getTok().is(AsmToken::Identifier)) {
4554     Name = getTok().getIdentifier();
4555     parseToken(AsmToken::Identifier);
4556   }
4557   if (parseEOL())
4558     return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4559 
4560   // Reserve space to ensure Alignment doesn't get invalidated when
4561   // StructInProgress grows.
4562   StructInProgress.reserve(StructInProgress.size() + 1);
4563   StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4564                                 StructInProgress.back().Alignment);
4565   return false;
4566 }
4567 
parseDirectiveEnds(StringRef Name,SMLoc NameLoc)4568 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4569   if (StructInProgress.empty())
4570     return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4571   if (StructInProgress.size() > 1)
4572     return Error(NameLoc, "unexpected name in nested ENDS directive");
4573   if (StructInProgress.back().Name.compare_insensitive(Name))
4574     return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4575                               StructInProgress.back().Name + "'");
4576   StructInfo Structure = StructInProgress.pop_back_val();
4577   // Pad to make the structure's size divisible by the smaller of its alignment
4578   // and the size of its largest field.
4579   Structure.Size = llvm::alignTo(
4580       Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4581   Structs[Name.lower()] = Structure;
4582 
4583   if (parseEOL())
4584     return addErrorSuffix(" in ENDS directive");
4585 
4586   return false;
4587 }
4588 
parseDirectiveNestedEnds()4589 bool MasmParser::parseDirectiveNestedEnds() {
4590   if (StructInProgress.empty())
4591     return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4592   if (StructInProgress.size() == 1)
4593     return TokError("missing name in top-level ENDS directive");
4594 
4595   if (parseEOL())
4596     return addErrorSuffix(" in nested ENDS directive");
4597 
4598   StructInfo Structure = StructInProgress.pop_back_val();
4599   // Pad to make the structure's size divisible by its alignment.
4600   Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4601 
4602   StructInfo &ParentStruct = StructInProgress.back();
4603   if (Structure.Name.empty()) {
4604     // Anonymous substructures' fields are addressed as if they belong to the
4605     // parent structure - so we transfer them to the parent here.
4606     const size_t OldFields = ParentStruct.Fields.size();
4607     ParentStruct.Fields.insert(
4608         ParentStruct.Fields.end(),
4609         std::make_move_iterator(Structure.Fields.begin()),
4610         std::make_move_iterator(Structure.Fields.end()));
4611     for (const auto &FieldByName : Structure.FieldsByName) {
4612       ParentStruct.FieldsByName[FieldByName.getKey()] =
4613           FieldByName.getValue() + OldFields;
4614     }
4615 
4616     unsigned FirstFieldOffset = 0;
4617     if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4618       FirstFieldOffset = llvm::alignTo(
4619           ParentStruct.NextOffset,
4620           std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4621     }
4622 
4623     if (ParentStruct.IsUnion) {
4624       ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4625     } else {
4626       for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4627         Field.Offset += FirstFieldOffset;
4628 
4629       const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4630       if (!ParentStruct.IsUnion) {
4631         ParentStruct.NextOffset = StructureEnd;
4632       }
4633       ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4634     }
4635   } else {
4636     FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4637                                              Structure.AlignmentSize);
4638     StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4639     Field.Type = Structure.Size;
4640     Field.LengthOf = 1;
4641     Field.SizeOf = Structure.Size;
4642 
4643     const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4644     if (!ParentStruct.IsUnion) {
4645       ParentStruct.NextOffset = StructureEnd;
4646     }
4647     ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4648 
4649     StructInfo.Structure = Structure;
4650     StructInfo.Initializers.emplace_back();
4651     auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4652     for (const auto &SubField : Structure.Fields) {
4653       FieldInitializers.push_back(SubField.Contents);
4654     }
4655   }
4656 
4657   return false;
4658 }
4659 
4660 /// parseDirectiveOrg
4661 ///  ::= org expression
parseDirectiveOrg()4662 bool MasmParser::parseDirectiveOrg() {
4663   const MCExpr *Offset;
4664   SMLoc OffsetLoc = Lexer.getLoc();
4665   if (checkForValidSection() || parseExpression(Offset))
4666     return true;
4667   if (parseEOL())
4668     return addErrorSuffix(" in 'org' directive");
4669 
4670   if (StructInProgress.empty()) {
4671     // Not in a struct; change the offset for the next instruction or data
4672     if (checkForValidSection())
4673       return addErrorSuffix(" in 'org' directive");
4674 
4675     getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4676   } else {
4677     // Offset the next field of this struct
4678     StructInfo &Structure = StructInProgress.back();
4679     int64_t OffsetRes;
4680     if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4681       return Error(OffsetLoc,
4682                    "expected absolute expression in 'org' directive");
4683     if (OffsetRes < 0)
4684       return Error(
4685           OffsetLoc,
4686           "expected non-negative value in struct's 'org' directive; was " +
4687               std::to_string(OffsetRes));
4688     Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4689 
4690     // ORG-affected structures cannot be initialized
4691     Structure.Initializable = false;
4692   }
4693 
4694   return false;
4695 }
4696 
emitAlignTo(int64_t Alignment)4697 bool MasmParser::emitAlignTo(int64_t Alignment) {
4698   if (StructInProgress.empty()) {
4699     // Not in a struct; align the next instruction or data
4700     if (checkForValidSection())
4701       return true;
4702 
4703     // Check whether we should use optimal code alignment for this align
4704     // directive.
4705     const MCSection *Section = getStreamer().getCurrentSectionOnly();
4706     assert(Section && "must have section to emit alignment");
4707     if (Section->useCodeAlign()) {
4708       getStreamer().emitCodeAlignment(Align(Alignment),
4709                                       &getTargetParser().getSTI(),
4710                                       /*MaxBytesToEmit=*/0);
4711     } else {
4712       // FIXME: Target specific behavior about how the "extra" bytes are filled.
4713       getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4714                                          /*ValueSize=*/1,
4715                                          /*MaxBytesToEmit=*/0);
4716     }
4717   } else {
4718     // Align the next field of this struct
4719     StructInfo &Structure = StructInProgress.back();
4720     Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4721   }
4722 
4723   return false;
4724 }
4725 
4726 /// parseDirectiveAlign
4727 ///  ::= align expression
parseDirectiveAlign()4728 bool MasmParser::parseDirectiveAlign() {
4729   SMLoc AlignmentLoc = getLexer().getLoc();
4730   int64_t Alignment;
4731 
4732   // Ignore empty 'align' directives.
4733   if (getTok().is(AsmToken::EndOfStatement)) {
4734     return Warning(AlignmentLoc,
4735                    "align directive with no operand is ignored") &&
4736            parseEOL();
4737   }
4738   if (parseAbsoluteExpression(Alignment) || parseEOL())
4739     return addErrorSuffix(" in align directive");
4740 
4741   // Always emit an alignment here even if we throw an error.
4742   bool ReturnVal = false;
4743 
4744   // Reject alignments that aren't either a power of two or zero, for ML.exe
4745   // compatibility. Alignment of zero is silently rounded up to one.
4746   if (Alignment == 0)
4747     Alignment = 1;
4748   if (!isPowerOf2_64(Alignment))
4749     ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4750                                          std::to_string(Alignment));
4751 
4752   if (emitAlignTo(Alignment))
4753     ReturnVal |= addErrorSuffix(" in align directive");
4754 
4755   return ReturnVal;
4756 }
4757 
4758 /// parseDirectiveEven
4759 ///  ::= even
parseDirectiveEven()4760 bool MasmParser::parseDirectiveEven() {
4761   if (parseEOL() || emitAlignTo(2))
4762     return addErrorSuffix(" in even directive");
4763 
4764   return false;
4765 }
4766 
4767 /// parseDirectiveFile
4768 /// ::= .file filename
4769 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
parseDirectiveFile(SMLoc DirectiveLoc)4770 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4771   // FIXME: I'm not sure what this is.
4772   int64_t FileNumber = -1;
4773   if (getLexer().is(AsmToken::Integer)) {
4774     FileNumber = getTok().getIntVal();
4775     Lex();
4776 
4777     if (FileNumber < 0)
4778       return TokError("negative file number");
4779   }
4780 
4781   std::string Path;
4782 
4783   // Usually the directory and filename together, otherwise just the directory.
4784   // Allow the strings to have escaped octal character sequence.
4785   if (check(getTok().isNot(AsmToken::String),
4786             "unexpected token in '.file' directive") ||
4787       parseEscapedString(Path))
4788     return true;
4789 
4790   StringRef Directory;
4791   StringRef Filename;
4792   std::string FilenameData;
4793   if (getLexer().is(AsmToken::String)) {
4794     if (check(FileNumber == -1,
4795               "explicit path specified, but no file number") ||
4796         parseEscapedString(FilenameData))
4797       return true;
4798     Filename = FilenameData;
4799     Directory = Path;
4800   } else {
4801     Filename = Path;
4802   }
4803 
4804   uint64_t MD5Hi, MD5Lo;
4805   bool HasMD5 = false;
4806 
4807   std::optional<StringRef> Source;
4808   bool HasSource = false;
4809   std::string SourceString;
4810 
4811   while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4812     StringRef Keyword;
4813     if (check(getTok().isNot(AsmToken::Identifier),
4814               "unexpected token in '.file' directive") ||
4815         parseIdentifier(Keyword))
4816       return true;
4817     if (Keyword == "md5") {
4818       HasMD5 = true;
4819       if (check(FileNumber == -1,
4820                 "MD5 checksum specified, but no file number") ||
4821           parseHexOcta(*this, MD5Hi, MD5Lo))
4822         return true;
4823     } else if (Keyword == "source") {
4824       HasSource = true;
4825       if (check(FileNumber == -1,
4826                 "source specified, but no file number") ||
4827           check(getTok().isNot(AsmToken::String),
4828                 "unexpected token in '.file' directive") ||
4829           parseEscapedString(SourceString))
4830         return true;
4831     } else {
4832       return TokError("unexpected token in '.file' directive");
4833     }
4834   }
4835 
4836   if (FileNumber == -1) {
4837     // Ignore the directive if there is no number and the target doesn't support
4838     // numberless .file directives. This allows some portability of assembler
4839     // between different object file formats.
4840     if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4841       getStreamer().emitFileDirective(Filename);
4842   } else {
4843     // In case there is a -g option as well as debug info from directive .file,
4844     // we turn off the -g option, directly use the existing debug info instead.
4845     // Throw away any implicit file table for the assembler source.
4846     if (Ctx.getGenDwarfForAssembly()) {
4847       Ctx.getMCDwarfLineTable(0).resetFileTable();
4848       Ctx.setGenDwarfForAssembly(false);
4849     }
4850 
4851     std::optional<MD5::MD5Result> CKMem;
4852     if (HasMD5) {
4853       MD5::MD5Result Sum;
4854       for (unsigned i = 0; i != 8; ++i) {
4855         Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4856         Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4857       }
4858       CKMem = Sum;
4859     }
4860     if (HasSource) {
4861       char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4862       memcpy(SourceBuf, SourceString.data(), SourceString.size());
4863       Source = StringRef(SourceBuf, SourceString.size());
4864     }
4865     if (FileNumber == 0) {
4866       if (Ctx.getDwarfVersion() < 5)
4867         return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4868       getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4869     } else {
4870       Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4871           FileNumber, Directory, Filename, CKMem, Source);
4872       if (!FileNumOrErr)
4873         return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4874     }
4875     // Alert the user if there are some .file directives with MD5 and some not.
4876     // But only do that once.
4877     if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4878       ReportedInconsistentMD5 = true;
4879       return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4880     }
4881   }
4882 
4883   return false;
4884 }
4885 
4886 /// parseDirectiveLine
4887 /// ::= .line [number]
parseDirectiveLine()4888 bool MasmParser::parseDirectiveLine() {
4889   int64_t LineNumber;
4890   if (getLexer().is(AsmToken::Integer)) {
4891     if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4892       return true;
4893     (void)LineNumber;
4894     // FIXME: Do something with the .line.
4895   }
4896   if (parseEOL())
4897     return true;
4898 
4899   return false;
4900 }
4901 
4902 /// parseDirectiveLoc
4903 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4904 ///                                [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4905 /// The first number is a file number, must have been previously assigned with
4906 /// a .file directive, the second number is the line number and optionally the
4907 /// third number is a column position (zero if not specified).  The remaining
4908 /// optional items are .loc sub-directives.
parseDirectiveLoc()4909 bool MasmParser::parseDirectiveLoc() {
4910   int64_t FileNumber = 0, LineNumber = 0;
4911   SMLoc Loc = getTok().getLoc();
4912   if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4913       check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4914             "file number less than one in '.loc' directive") ||
4915       check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4916             "unassigned file number in '.loc' directive"))
4917     return true;
4918 
4919   // optional
4920   if (getLexer().is(AsmToken::Integer)) {
4921     LineNumber = getTok().getIntVal();
4922     if (LineNumber < 0)
4923       return TokError("line number less than zero in '.loc' directive");
4924     Lex();
4925   }
4926 
4927   int64_t ColumnPos = 0;
4928   if (getLexer().is(AsmToken::Integer)) {
4929     ColumnPos = getTok().getIntVal();
4930     if (ColumnPos < 0)
4931       return TokError("column position less than zero in '.loc' directive");
4932     Lex();
4933   }
4934 
4935   auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4936   unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4937   unsigned Isa = 0;
4938   int64_t Discriminator = 0;
4939 
4940   auto parseLocOp = [&]() -> bool {
4941     StringRef Name;
4942     SMLoc Loc = getTok().getLoc();
4943     if (parseIdentifier(Name))
4944       return TokError("unexpected token in '.loc' directive");
4945 
4946     if (Name == "basic_block")
4947       Flags |= DWARF2_FLAG_BASIC_BLOCK;
4948     else if (Name == "prologue_end")
4949       Flags |= DWARF2_FLAG_PROLOGUE_END;
4950     else if (Name == "epilogue_begin")
4951       Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
4952     else if (Name == "is_stmt") {
4953       Loc = getTok().getLoc();
4954       const MCExpr *Value;
4955       if (parseExpression(Value))
4956         return true;
4957       // The expression must be the constant 0 or 1.
4958       if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4959         int Value = MCE->getValue();
4960         if (Value == 0)
4961           Flags &= ~DWARF2_FLAG_IS_STMT;
4962         else if (Value == 1)
4963           Flags |= DWARF2_FLAG_IS_STMT;
4964         else
4965           return Error(Loc, "is_stmt value not 0 or 1");
4966       } else {
4967         return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4968       }
4969     } else if (Name == "isa") {
4970       Loc = getTok().getLoc();
4971       const MCExpr *Value;
4972       if (parseExpression(Value))
4973         return true;
4974       // The expression must be a constant greater or equal to 0.
4975       if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4976         int Value = MCE->getValue();
4977         if (Value < 0)
4978           return Error(Loc, "isa number less than zero");
4979         Isa = Value;
4980       } else {
4981         return Error(Loc, "isa number not a constant value");
4982       }
4983     } else if (Name == "discriminator") {
4984       if (parseAbsoluteExpression(Discriminator))
4985         return true;
4986     } else {
4987       return Error(Loc, "unknown sub-directive in '.loc' directive");
4988     }
4989     return false;
4990   };
4991 
4992   if (parseMany(parseLocOp, false /*hasComma*/))
4993     return true;
4994 
4995   getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4996                                       Isa, Discriminator, StringRef());
4997 
4998   return false;
4999 }
5000 
5001 /// parseDirectiveStabs
5002 /// ::= .stabs string, number, number, number
parseDirectiveStabs()5003 bool MasmParser::parseDirectiveStabs() {
5004   return TokError("unsupported directive '.stabs'");
5005 }
5006 
5007 /// parseDirectiveCVFile
5008 /// ::= .cv_file number filename [checksum] [checksumkind]
parseDirectiveCVFile()5009 bool MasmParser::parseDirectiveCVFile() {
5010   SMLoc FileNumberLoc = getTok().getLoc();
5011   int64_t FileNumber;
5012   std::string Filename;
5013   std::string Checksum;
5014   int64_t ChecksumKind = 0;
5015 
5016   if (parseIntToken(FileNumber,
5017                     "expected file number in '.cv_file' directive") ||
5018       check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5019       check(getTok().isNot(AsmToken::String),
5020             "unexpected token in '.cv_file' directive") ||
5021       parseEscapedString(Filename))
5022     return true;
5023   if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5024     if (check(getTok().isNot(AsmToken::String),
5025               "unexpected token in '.cv_file' directive") ||
5026         parseEscapedString(Checksum) ||
5027         parseIntToken(ChecksumKind,
5028                       "expected checksum kind in '.cv_file' directive") ||
5029         parseEOL())
5030       return true;
5031   }
5032 
5033   Checksum = fromHex(Checksum);
5034   void *CKMem = Ctx.allocate(Checksum.size(), 1);
5035   memcpy(CKMem, Checksum.data(), Checksum.size());
5036   ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5037                                     Checksum.size());
5038 
5039   if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5040                                          static_cast<uint8_t>(ChecksumKind)))
5041     return Error(FileNumberLoc, "file number already allocated");
5042 
5043   return false;
5044 }
5045 
parseCVFunctionId(int64_t & FunctionId,StringRef DirectiveName)5046 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5047                                    StringRef DirectiveName) {
5048   SMLoc Loc;
5049   return parseTokenLoc(Loc) ||
5050          parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5051                                        "' directive") ||
5052          check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5053                "expected function id within range [0, UINT_MAX)");
5054 }
5055 
parseCVFileId(int64_t & FileNumber,StringRef DirectiveName)5056 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5057   SMLoc Loc;
5058   return parseTokenLoc(Loc) ||
5059          parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5060                                        "' directive") ||
5061          check(FileNumber < 1, Loc, "file number less than one in '" +
5062                                         DirectiveName + "' directive") ||
5063          check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5064                "unassigned file number in '" + DirectiveName + "' directive");
5065 }
5066 
5067 /// parseDirectiveCVFuncId
5068 /// ::= .cv_func_id FunctionId
5069 ///
5070 /// Introduces a function ID that can be used with .cv_loc.
parseDirectiveCVFuncId()5071 bool MasmParser::parseDirectiveCVFuncId() {
5072   SMLoc FunctionIdLoc = getTok().getLoc();
5073   int64_t FunctionId;
5074 
5075   if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5076     return true;
5077 
5078   if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5079     return Error(FunctionIdLoc, "function id already allocated");
5080 
5081   return false;
5082 }
5083 
5084 /// parseDirectiveCVInlineSiteId
5085 /// ::= .cv_inline_site_id FunctionId
5086 ///         "within" IAFunc
5087 ///         "inlined_at" IAFile IALine [IACol]
5088 ///
5089 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5090 /// at" source location information for use in the line table of the caller,
5091 /// whether the caller is a real function or another inlined call site.
parseDirectiveCVInlineSiteId()5092 bool MasmParser::parseDirectiveCVInlineSiteId() {
5093   SMLoc FunctionIdLoc = getTok().getLoc();
5094   int64_t FunctionId;
5095   int64_t IAFunc;
5096   int64_t IAFile;
5097   int64_t IALine;
5098   int64_t IACol = 0;
5099 
5100   // FunctionId
5101   if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5102     return true;
5103 
5104   // "within"
5105   if (check((getLexer().isNot(AsmToken::Identifier) ||
5106              getTok().getIdentifier() != "within"),
5107             "expected 'within' identifier in '.cv_inline_site_id' directive"))
5108     return true;
5109   Lex();
5110 
5111   // IAFunc
5112   if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5113     return true;
5114 
5115   // "inlined_at"
5116   if (check((getLexer().isNot(AsmToken::Identifier) ||
5117              getTok().getIdentifier() != "inlined_at"),
5118             "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5119             "directive") )
5120     return true;
5121   Lex();
5122 
5123   // IAFile IALine
5124   if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5125       parseIntToken(IALine, "expected line number after 'inlined_at'"))
5126     return true;
5127 
5128   // [IACol]
5129   if (getLexer().is(AsmToken::Integer)) {
5130     IACol = getTok().getIntVal();
5131     Lex();
5132   }
5133 
5134   if (parseEOL())
5135     return true;
5136 
5137   if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5138                                                  IALine, IACol, FunctionIdLoc))
5139     return Error(FunctionIdLoc, "function id already allocated");
5140 
5141   return false;
5142 }
5143 
5144 /// parseDirectiveCVLoc
5145 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5146 ///                                [is_stmt VALUE]
5147 /// The first number is a file number, must have been previously assigned with
5148 /// a .file directive, the second number is the line number and optionally the
5149 /// third number is a column position (zero if not specified).  The remaining
5150 /// optional items are .loc sub-directives.
parseDirectiveCVLoc()5151 bool MasmParser::parseDirectiveCVLoc() {
5152   SMLoc DirectiveLoc = getTok().getLoc();
5153   int64_t FunctionId, FileNumber;
5154   if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5155       parseCVFileId(FileNumber, ".cv_loc"))
5156     return true;
5157 
5158   int64_t LineNumber = 0;
5159   if (getLexer().is(AsmToken::Integer)) {
5160     LineNumber = getTok().getIntVal();
5161     if (LineNumber < 0)
5162       return TokError("line number less than zero in '.cv_loc' directive");
5163     Lex();
5164   }
5165 
5166   int64_t ColumnPos = 0;
5167   if (getLexer().is(AsmToken::Integer)) {
5168     ColumnPos = getTok().getIntVal();
5169     if (ColumnPos < 0)
5170       return TokError("column position less than zero in '.cv_loc' directive");
5171     Lex();
5172   }
5173 
5174   bool PrologueEnd = false;
5175   uint64_t IsStmt = 0;
5176 
5177   auto parseOp = [&]() -> bool {
5178     StringRef Name;
5179     SMLoc Loc = getTok().getLoc();
5180     if (parseIdentifier(Name))
5181       return TokError("unexpected token in '.cv_loc' directive");
5182     if (Name == "prologue_end")
5183       PrologueEnd = true;
5184     else if (Name == "is_stmt") {
5185       Loc = getTok().getLoc();
5186       const MCExpr *Value;
5187       if (parseExpression(Value))
5188         return true;
5189       // The expression must be the constant 0 or 1.
5190       IsStmt = ~0ULL;
5191       if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5192         IsStmt = MCE->getValue();
5193 
5194       if (IsStmt > 1)
5195         return Error(Loc, "is_stmt value not 0 or 1");
5196     } else {
5197       return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5198     }
5199     return false;
5200   };
5201 
5202   if (parseMany(parseOp, false /*hasComma*/))
5203     return true;
5204 
5205   getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5206                                    ColumnPos, PrologueEnd, IsStmt, StringRef(),
5207                                    DirectiveLoc);
5208   return false;
5209 }
5210 
5211 /// parseDirectiveCVLinetable
5212 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
parseDirectiveCVLinetable()5213 bool MasmParser::parseDirectiveCVLinetable() {
5214   int64_t FunctionId;
5215   StringRef FnStartName, FnEndName;
5216   SMLoc Loc = getTok().getLoc();
5217   if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5218       parseToken(AsmToken::Comma,
5219                  "unexpected token in '.cv_linetable' directive") ||
5220       parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5221                                   "expected identifier in directive") ||
5222       parseToken(AsmToken::Comma,
5223                  "unexpected token in '.cv_linetable' directive") ||
5224       parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5225                                   "expected identifier in directive"))
5226     return true;
5227 
5228   MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5229   MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5230 
5231   getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5232   return false;
5233 }
5234 
5235 /// parseDirectiveCVInlineLinetable
5236 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
parseDirectiveCVInlineLinetable()5237 bool MasmParser::parseDirectiveCVInlineLinetable() {
5238   int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5239   StringRef FnStartName, FnEndName;
5240   SMLoc Loc = getTok().getLoc();
5241   if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5242       parseTokenLoc(Loc) ||
5243       parseIntToken(
5244           SourceFileId,
5245           "expected SourceField in '.cv_inline_linetable' directive") ||
5246       check(SourceFileId <= 0, Loc,
5247             "File id less than zero in '.cv_inline_linetable' directive") ||
5248       parseTokenLoc(Loc) ||
5249       parseIntToken(
5250           SourceLineNum,
5251           "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5252       check(SourceLineNum < 0, Loc,
5253             "Line number less than zero in '.cv_inline_linetable' directive") ||
5254       parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5255                                   "expected identifier in directive") ||
5256       parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5257                                   "expected identifier in directive"))
5258     return true;
5259 
5260   if (parseEOL())
5261     return true;
5262 
5263   MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5264   MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5265   getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5266                                                SourceLineNum, FnStartSym,
5267                                                FnEndSym);
5268   return false;
5269 }
5270 
initializeCVDefRangeTypeMap()5271 void MasmParser::initializeCVDefRangeTypeMap() {
5272   CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5273   CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5274   CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5275   CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5276 }
5277 
5278 /// parseDirectiveCVDefRange
5279 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
parseDirectiveCVDefRange()5280 bool MasmParser::parseDirectiveCVDefRange() {
5281   SMLoc Loc;
5282   std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5283   while (getLexer().is(AsmToken::Identifier)) {
5284     Loc = getLexer().getLoc();
5285     StringRef GapStartName;
5286     if (parseIdentifier(GapStartName))
5287       return Error(Loc, "expected identifier in directive");
5288     MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5289 
5290     Loc = getLexer().getLoc();
5291     StringRef GapEndName;
5292     if (parseIdentifier(GapEndName))
5293       return Error(Loc, "expected identifier in directive");
5294     MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5295 
5296     Ranges.push_back({GapStartSym, GapEndSym});
5297   }
5298 
5299   StringRef CVDefRangeTypeStr;
5300   if (parseToken(
5301           AsmToken::Comma,
5302           "expected comma before def_range type in .cv_def_range directive") ||
5303       parseIdentifier(CVDefRangeTypeStr))
5304     return Error(Loc, "expected def_range type in directive");
5305 
5306   StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5307       CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5308   CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5309                                 ? CVDR_DEFRANGE
5310                                 : CVTypeIt->getValue();
5311   switch (CVDRType) {
5312   case CVDR_DEFRANGE_REGISTER: {
5313     int64_t DRRegister;
5314     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5315                                     ".cv_def_range directive") ||
5316         parseAbsoluteExpression(DRRegister))
5317       return Error(Loc, "expected register number");
5318 
5319     codeview::DefRangeRegisterHeader DRHdr;
5320     DRHdr.Register = DRRegister;
5321     DRHdr.MayHaveNoName = 0;
5322     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5323     break;
5324   }
5325   case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5326     int64_t DROffset;
5327     if (parseToken(AsmToken::Comma,
5328                    "expected comma before offset in .cv_def_range directive") ||
5329         parseAbsoluteExpression(DROffset))
5330       return Error(Loc, "expected offset value");
5331 
5332     codeview::DefRangeFramePointerRelHeader DRHdr;
5333     DRHdr.Offset = DROffset;
5334     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5335     break;
5336   }
5337   case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5338     int64_t DRRegister;
5339     int64_t DROffsetInParent;
5340     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5341                                     ".cv_def_range directive") ||
5342         parseAbsoluteExpression(DRRegister))
5343       return Error(Loc, "expected register number");
5344     if (parseToken(AsmToken::Comma,
5345                    "expected comma before offset in .cv_def_range directive") ||
5346         parseAbsoluteExpression(DROffsetInParent))
5347       return Error(Loc, "expected offset value");
5348 
5349     codeview::DefRangeSubfieldRegisterHeader DRHdr;
5350     DRHdr.Register = DRRegister;
5351     DRHdr.MayHaveNoName = 0;
5352     DRHdr.OffsetInParent = DROffsetInParent;
5353     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5354     break;
5355   }
5356   case CVDR_DEFRANGE_REGISTER_REL: {
5357     int64_t DRRegister;
5358     int64_t DRFlags;
5359     int64_t DRBasePointerOffset;
5360     if (parseToken(AsmToken::Comma, "expected comma before register number in "
5361                                     ".cv_def_range directive") ||
5362         parseAbsoluteExpression(DRRegister))
5363       return Error(Loc, "expected register value");
5364     if (parseToken(
5365             AsmToken::Comma,
5366             "expected comma before flag value in .cv_def_range directive") ||
5367         parseAbsoluteExpression(DRFlags))
5368       return Error(Loc, "expected flag value");
5369     if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5370                                     "in .cv_def_range directive") ||
5371         parseAbsoluteExpression(DRBasePointerOffset))
5372       return Error(Loc, "expected base pointer offset value");
5373 
5374     codeview::DefRangeRegisterRelHeader DRHdr;
5375     DRHdr.Register = DRRegister;
5376     DRHdr.Flags = DRFlags;
5377     DRHdr.BasePointerOffset = DRBasePointerOffset;
5378     getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5379     break;
5380   }
5381   default:
5382     return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5383   }
5384   return true;
5385 }
5386 
5387 /// parseDirectiveCVString
5388 /// ::= .cv_stringtable "string"
parseDirectiveCVString()5389 bool MasmParser::parseDirectiveCVString() {
5390   std::string Data;
5391   if (checkForValidSection() || parseEscapedString(Data))
5392     return addErrorSuffix(" in '.cv_string' directive");
5393 
5394   // Put the string in the table and emit the offset.
5395   std::pair<StringRef, unsigned> Insertion =
5396       getCVContext().addToStringTable(Data);
5397   getStreamer().emitIntValue(Insertion.second, 4);
5398   return false;
5399 }
5400 
5401 /// parseDirectiveCVStringTable
5402 /// ::= .cv_stringtable
parseDirectiveCVStringTable()5403 bool MasmParser::parseDirectiveCVStringTable() {
5404   getStreamer().emitCVStringTableDirective();
5405   return false;
5406 }
5407 
5408 /// parseDirectiveCVFileChecksums
5409 /// ::= .cv_filechecksums
parseDirectiveCVFileChecksums()5410 bool MasmParser::parseDirectiveCVFileChecksums() {
5411   getStreamer().emitCVFileChecksumsDirective();
5412   return false;
5413 }
5414 
5415 /// parseDirectiveCVFileChecksumOffset
5416 /// ::= .cv_filechecksumoffset fileno
parseDirectiveCVFileChecksumOffset()5417 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5418   int64_t FileNo;
5419   if (parseIntToken(FileNo, "expected identifier in directive"))
5420     return true;
5421   if (parseEOL())
5422     return true;
5423   getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5424   return false;
5425 }
5426 
5427 /// parseDirectiveCVFPOData
5428 /// ::= .cv_fpo_data procsym
parseDirectiveCVFPOData()5429 bool MasmParser::parseDirectiveCVFPOData() {
5430   SMLoc DirLoc = getLexer().getLoc();
5431   StringRef ProcName;
5432   if (parseIdentifier(ProcName))
5433     return TokError("expected symbol name");
5434   if (parseEOL("unexpected tokens"))
5435     return addErrorSuffix(" in '.cv_fpo_data' directive");
5436   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5437   getStreamer().emitCVFPOData(ProcSym, DirLoc);
5438   return false;
5439 }
5440 
5441 /// parseDirectiveCFISections
5442 /// ::= .cfi_sections section [, section]
parseDirectiveCFISections()5443 bool MasmParser::parseDirectiveCFISections() {
5444   StringRef Name;
5445   bool EH = false;
5446   bool Debug = false;
5447 
5448   if (parseIdentifier(Name))
5449     return TokError("Expected an identifier");
5450 
5451   if (Name == ".eh_frame")
5452     EH = true;
5453   else if (Name == ".debug_frame")
5454     Debug = true;
5455 
5456   if (getLexer().is(AsmToken::Comma)) {
5457     Lex();
5458 
5459     if (parseIdentifier(Name))
5460       return TokError("Expected an identifier");
5461 
5462     if (Name == ".eh_frame")
5463       EH = true;
5464     else if (Name == ".debug_frame")
5465       Debug = true;
5466   }
5467 
5468   getStreamer().emitCFISections(EH, Debug);
5469   return false;
5470 }
5471 
5472 /// parseDirectiveCFIStartProc
5473 /// ::= .cfi_startproc [simple]
parseDirectiveCFIStartProc()5474 bool MasmParser::parseDirectiveCFIStartProc() {
5475   StringRef Simple;
5476   if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5477     if (check(parseIdentifier(Simple) || Simple != "simple",
5478               "unexpected token") ||
5479         parseEOL())
5480       return addErrorSuffix(" in '.cfi_startproc' directive");
5481   }
5482 
5483   // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5484   // being produced if this directive is emitted as part of preprocessor macro
5485   // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5486   // Tools like llvm-mc on the other hand are not affected by it, and report
5487   // correct context information.
5488   getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5489   return false;
5490 }
5491 
5492 /// parseDirectiveCFIEndProc
5493 /// ::= .cfi_endproc
parseDirectiveCFIEndProc()5494 bool MasmParser::parseDirectiveCFIEndProc() {
5495   getStreamer().emitCFIEndProc();
5496   return false;
5497 }
5498 
5499 /// parse register name or number.
parseRegisterOrRegisterNumber(int64_t & Register,SMLoc DirectiveLoc)5500 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5501                                                SMLoc DirectiveLoc) {
5502   MCRegister RegNo;
5503 
5504   if (getLexer().isNot(AsmToken::Integer)) {
5505     if (getTargetParser().parseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5506       return true;
5507     Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5508   } else
5509     return parseAbsoluteExpression(Register);
5510 
5511   return false;
5512 }
5513 
5514 /// parseDirectiveCFIDefCfa
5515 /// ::= .cfi_def_cfa register,  offset
parseDirectiveCFIDefCfa(SMLoc DirectiveLoc)5516 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5517   int64_t Register = 0, Offset = 0;
5518   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5519       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5520       parseAbsoluteExpression(Offset))
5521     return true;
5522 
5523   getStreamer().emitCFIDefCfa(Register, Offset);
5524   return false;
5525 }
5526 
5527 /// parseDirectiveCFIDefCfaOffset
5528 /// ::= .cfi_def_cfa_offset offset
parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc)5529 bool MasmParser::parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc) {
5530   int64_t Offset = 0;
5531   if (parseAbsoluteExpression(Offset))
5532     return true;
5533 
5534   getStreamer().emitCFIDefCfaOffset(Offset, DirectiveLoc);
5535   return false;
5536 }
5537 
5538 /// parseDirectiveCFIRegister
5539 /// ::= .cfi_register register, register
parseDirectiveCFIRegister(SMLoc DirectiveLoc)5540 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5541   int64_t Register1 = 0, Register2 = 0;
5542   if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5543       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5544       parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5545     return true;
5546 
5547   getStreamer().emitCFIRegister(Register1, Register2, DirectiveLoc);
5548   return false;
5549 }
5550 
5551 /// parseDirectiveCFIWindowSave
5552 /// ::= .cfi_window_save
parseDirectiveCFIWindowSave(SMLoc DirectiveLoc)5553 bool MasmParser::parseDirectiveCFIWindowSave(SMLoc DirectiveLoc) {
5554   getStreamer().emitCFIWindowSave(DirectiveLoc);
5555   return false;
5556 }
5557 
5558 /// parseDirectiveCFIAdjustCfaOffset
5559 /// ::= .cfi_adjust_cfa_offset adjustment
parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc)5560 bool MasmParser::parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc) {
5561   int64_t Adjustment = 0;
5562   if (parseAbsoluteExpression(Adjustment))
5563     return true;
5564 
5565   getStreamer().emitCFIAdjustCfaOffset(Adjustment, DirectiveLoc);
5566   return false;
5567 }
5568 
5569 /// parseDirectiveCFIDefCfaRegister
5570 /// ::= .cfi_def_cfa_register register
parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc)5571 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5572   int64_t Register = 0;
5573   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5574     return true;
5575 
5576   getStreamer().emitCFIDefCfaRegister(Register);
5577   return false;
5578 }
5579 
5580 /// parseDirectiveCFIOffset
5581 /// ::= .cfi_offset register, offset
parseDirectiveCFIOffset(SMLoc DirectiveLoc)5582 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5583   int64_t Register = 0;
5584   int64_t Offset = 0;
5585 
5586   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5587       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5588       parseAbsoluteExpression(Offset))
5589     return true;
5590 
5591   getStreamer().emitCFIOffset(Register, Offset);
5592   return false;
5593 }
5594 
5595 /// parseDirectiveCFIRelOffset
5596 /// ::= .cfi_rel_offset register, offset
parseDirectiveCFIRelOffset(SMLoc DirectiveLoc)5597 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5598   int64_t Register = 0, Offset = 0;
5599 
5600   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5601       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5602       parseAbsoluteExpression(Offset))
5603     return true;
5604 
5605   getStreamer().emitCFIRelOffset(Register, Offset, DirectiveLoc);
5606   return false;
5607 }
5608 
isValidEncoding(int64_t Encoding)5609 static bool isValidEncoding(int64_t Encoding) {
5610   if (Encoding & ~0xff)
5611     return false;
5612 
5613   if (Encoding == dwarf::DW_EH_PE_omit)
5614     return true;
5615 
5616   const unsigned Format = Encoding & 0xf;
5617   if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5618       Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5619       Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5620       Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5621     return false;
5622 
5623   const unsigned Application = Encoding & 0x70;
5624   if (Application != dwarf::DW_EH_PE_absptr &&
5625       Application != dwarf::DW_EH_PE_pcrel)
5626     return false;
5627 
5628   return true;
5629 }
5630 
5631 /// parseDirectiveCFIPersonalityOrLsda
5632 /// IsPersonality true for cfi_personality, false for cfi_lsda
5633 /// ::= .cfi_personality encoding, [symbol_name]
5634 /// ::= .cfi_lsda encoding, [symbol_name]
parseDirectiveCFIPersonalityOrLsda(bool IsPersonality)5635 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5636   int64_t Encoding = 0;
5637   if (parseAbsoluteExpression(Encoding))
5638     return true;
5639   if (Encoding == dwarf::DW_EH_PE_omit)
5640     return false;
5641 
5642   StringRef Name;
5643   if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5644       parseToken(AsmToken::Comma, "unexpected token in directive") ||
5645       check(parseIdentifier(Name), "expected identifier in directive"))
5646     return true;
5647 
5648   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5649 
5650   if (IsPersonality)
5651     getStreamer().emitCFIPersonality(Sym, Encoding);
5652   else
5653     getStreamer().emitCFILsda(Sym, Encoding);
5654   return false;
5655 }
5656 
5657 /// parseDirectiveCFIRememberState
5658 /// ::= .cfi_remember_state
parseDirectiveCFIRememberState(SMLoc DirectiveLoc)5659 bool MasmParser::parseDirectiveCFIRememberState(SMLoc DirectiveLoc) {
5660   getStreamer().emitCFIRememberState(DirectiveLoc);
5661   return false;
5662 }
5663 
5664 /// parseDirectiveCFIRestoreState
5665 /// ::= .cfi_remember_state
parseDirectiveCFIRestoreState(SMLoc DirectiveLoc)5666 bool MasmParser::parseDirectiveCFIRestoreState(SMLoc DirectiveLoc) {
5667   getStreamer().emitCFIRestoreState(DirectiveLoc);
5668   return false;
5669 }
5670 
5671 /// parseDirectiveCFISameValue
5672 /// ::= .cfi_same_value register
parseDirectiveCFISameValue(SMLoc DirectiveLoc)5673 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5674   int64_t Register = 0;
5675 
5676   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5677     return true;
5678 
5679   getStreamer().emitCFISameValue(Register, DirectiveLoc);
5680   return false;
5681 }
5682 
5683 /// parseDirectiveCFIRestore
5684 /// ::= .cfi_restore register
parseDirectiveCFIRestore(SMLoc DirectiveLoc)5685 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5686   int64_t Register = 0;
5687   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5688     return true;
5689 
5690   getStreamer().emitCFIRestore(Register);
5691   return false;
5692 }
5693 
5694 /// parseDirectiveCFIEscape
5695 /// ::= .cfi_escape expression[,...]
parseDirectiveCFIEscape(SMLoc DirectiveLoc)5696 bool MasmParser::parseDirectiveCFIEscape(SMLoc DirectiveLoc) {
5697   std::string Values;
5698   int64_t CurrValue;
5699   if (parseAbsoluteExpression(CurrValue))
5700     return true;
5701 
5702   Values.push_back((uint8_t)CurrValue);
5703 
5704   while (getLexer().is(AsmToken::Comma)) {
5705     Lex();
5706 
5707     if (parseAbsoluteExpression(CurrValue))
5708       return true;
5709 
5710     Values.push_back((uint8_t)CurrValue);
5711   }
5712 
5713   getStreamer().emitCFIEscape(Values, DirectiveLoc);
5714   return false;
5715 }
5716 
5717 /// parseDirectiveCFIReturnColumn
5718 /// ::= .cfi_return_column register
parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc)5719 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5720   int64_t Register = 0;
5721   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5722     return true;
5723   getStreamer().emitCFIReturnColumn(Register);
5724   return false;
5725 }
5726 
5727 /// parseDirectiveCFISignalFrame
5728 /// ::= .cfi_signal_frame
parseDirectiveCFISignalFrame()5729 bool MasmParser::parseDirectiveCFISignalFrame() {
5730   if (parseEOL())
5731     return true;
5732 
5733   getStreamer().emitCFISignalFrame();
5734   return false;
5735 }
5736 
5737 /// parseDirectiveCFIUndefined
5738 /// ::= .cfi_undefined register
parseDirectiveCFIUndefined(SMLoc DirectiveLoc)5739 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5740   int64_t Register = 0;
5741 
5742   if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5743     return true;
5744 
5745   getStreamer().emitCFIUndefined(Register);
5746   return false;
5747 }
5748 
5749 /// parseDirectiveMacro
5750 /// ::= name macro [parameters]
5751 ///     ["LOCAL" identifiers]
5752 ///   parameters ::= parameter [, parameter]*
5753 ///   parameter ::= name ":" qualifier
5754 ///   qualifier ::= "req" | "vararg" | "=" macro_argument
parseDirectiveMacro(StringRef Name,SMLoc NameLoc)5755 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5756   MCAsmMacroParameters Parameters;
5757   while (getLexer().isNot(AsmToken::EndOfStatement)) {
5758     if (!Parameters.empty() && Parameters.back().Vararg)
5759       return Error(Lexer.getLoc(),
5760                    "Vararg parameter '" + Parameters.back().Name +
5761                        "' should be last in the list of parameters");
5762 
5763     MCAsmMacroParameter Parameter;
5764     if (parseIdentifier(Parameter.Name))
5765       return TokError("expected identifier in 'macro' directive");
5766 
5767     // Emit an error if two (or more) named parameters share the same name.
5768     for (const MCAsmMacroParameter& CurrParam : Parameters)
5769       if (CurrParam.Name.equals_insensitive(Parameter.Name))
5770         return TokError("macro '" + Name + "' has multiple parameters"
5771                         " named '" + Parameter.Name + "'");
5772 
5773     if (Lexer.is(AsmToken::Colon)) {
5774       Lex();  // consume ':'
5775 
5776       if (parseOptionalToken(AsmToken::Equal)) {
5777         // Default value
5778         SMLoc ParamLoc;
5779 
5780         ParamLoc = Lexer.getLoc();
5781         if (parseMacroArgument(nullptr, Parameter.Value))
5782           return true;
5783       } else {
5784         SMLoc QualLoc;
5785         StringRef Qualifier;
5786 
5787         QualLoc = Lexer.getLoc();
5788         if (parseIdentifier(Qualifier))
5789           return Error(QualLoc, "missing parameter qualifier for "
5790                                 "'" +
5791                                     Parameter.Name + "' in macro '" + Name +
5792                                     "'");
5793 
5794         if (Qualifier.equals_insensitive("req"))
5795           Parameter.Required = true;
5796         else if (Qualifier.equals_insensitive("vararg"))
5797           Parameter.Vararg = true;
5798         else
5799           return Error(QualLoc,
5800                        Qualifier + " is not a valid parameter qualifier for '" +
5801                            Parameter.Name + "' in macro '" + Name + "'");
5802       }
5803     }
5804 
5805     Parameters.push_back(std::move(Parameter));
5806 
5807     if (getLexer().is(AsmToken::Comma))
5808       Lex();
5809   }
5810 
5811   // Eat just the end of statement.
5812   Lexer.Lex();
5813 
5814   std::vector<std::string> Locals;
5815   if (getTok().is(AsmToken::Identifier) &&
5816       getTok().getIdentifier().equals_insensitive("local")) {
5817     Lex(); // Eat the LOCAL directive.
5818 
5819     StringRef ID;
5820     while (true) {
5821       if (parseIdentifier(ID))
5822         return true;
5823       Locals.push_back(ID.lower());
5824 
5825       // If we see a comma, continue (and allow line continuation).
5826       if (!parseOptionalToken(AsmToken::Comma))
5827         break;
5828       parseOptionalToken(AsmToken::EndOfStatement);
5829     }
5830   }
5831 
5832   // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5833   AsmToken EndToken, StartToken = getTok();
5834   unsigned MacroDepth = 0;
5835   bool IsMacroFunction = false;
5836   // Lex the macro definition.
5837   while (true) {
5838     // Ignore Lexing errors in macros.
5839     while (Lexer.is(AsmToken::Error)) {
5840       Lexer.Lex();
5841     }
5842 
5843     // Check whether we have reached the end of the file.
5844     if (getLexer().is(AsmToken::Eof))
5845       return Error(NameLoc, "no matching 'endm' in definition");
5846 
5847     // Otherwise, check whether we have reached the 'endm'... and determine if
5848     // this is a macro function.
5849     if (getLexer().is(AsmToken::Identifier)) {
5850       if (getTok().getIdentifier().equals_insensitive("endm")) {
5851         if (MacroDepth == 0) { // Outermost macro.
5852           EndToken = getTok();
5853           Lexer.Lex();
5854           if (getLexer().isNot(AsmToken::EndOfStatement))
5855             return TokError("unexpected token in '" + EndToken.getIdentifier() +
5856                             "' directive");
5857           break;
5858         } else {
5859           // Otherwise we just found the end of an inner macro.
5860           --MacroDepth;
5861         }
5862       } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
5863         if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
5864           IsMacroFunction = true;
5865         }
5866       } else if (isMacroLikeDirective()) {
5867         // We allow nested macros. Those aren't instantiated until the
5868         // outermost macro is expanded so just ignore them for now.
5869         ++MacroDepth;
5870       }
5871     }
5872 
5873     // Otherwise, scan til the end of the statement.
5874     eatToEndOfStatement();
5875   }
5876 
5877   if (getContext().lookupMacro(Name.lower())) {
5878     return Error(NameLoc, "macro '" + Name + "' is already defined");
5879   }
5880 
5881   const char *BodyStart = StartToken.getLoc().getPointer();
5882   const char *BodyEnd = EndToken.getLoc().getPointer();
5883   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5884   MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5885                    IsMacroFunction);
5886   DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5887                   Macro.dump());
5888   getContext().defineMacro(Name.lower(), std::move(Macro));
5889   return false;
5890 }
5891 
5892 /// parseDirectiveExitMacro
5893 /// ::= "exitm" [textitem]
parseDirectiveExitMacro(SMLoc DirectiveLoc,StringRef Directive,std::string & Value)5894 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5895                                          StringRef Directive,
5896                                          std::string &Value) {
5897   SMLoc EndLoc = getTok().getLoc();
5898   if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5899     return Error(EndLoc,
5900                  "unable to parse text item in '" + Directive + "' directive");
5901   eatToEndOfStatement();
5902 
5903   if (!isInsideMacroInstantiation())
5904     return TokError("unexpected '" + Directive + "' in file, "
5905                                                  "no current macro definition");
5906 
5907   // Exit all conditionals that are active in the current macro.
5908   while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5909     TheCondState = TheCondStack.back();
5910     TheCondStack.pop_back();
5911   }
5912 
5913   handleMacroExit();
5914   return false;
5915 }
5916 
5917 /// parseDirectiveEndMacro
5918 /// ::= endm
parseDirectiveEndMacro(StringRef Directive)5919 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5920   if (getLexer().isNot(AsmToken::EndOfStatement))
5921     return TokError("unexpected token in '" + Directive + "' directive");
5922 
5923   // If we are inside a macro instantiation, terminate the current
5924   // instantiation.
5925   if (isInsideMacroInstantiation()) {
5926     handleMacroExit();
5927     return false;
5928   }
5929 
5930   // Otherwise, this .endmacro is a stray entry in the file; well formed
5931   // .endmacro directives are handled during the macro definition parsing.
5932   return TokError("unexpected '" + Directive + "' in file, "
5933                                                "no current macro definition");
5934 }
5935 
5936 /// parseDirectivePurgeMacro
5937 /// ::= purge identifier ( , identifier )*
parseDirectivePurgeMacro(SMLoc DirectiveLoc)5938 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
5939   StringRef Name;
5940   while (true) {
5941     SMLoc NameLoc;
5942     if (parseTokenLoc(NameLoc) ||
5943         check(parseIdentifier(Name), NameLoc,
5944               "expected identifier in 'purge' directive"))
5945       return true;
5946 
5947     DEBUG_WITH_TYPE("asm-macros", dbgs()
5948                                       << "Un-defining macro: " << Name << "\n");
5949     if (!getContext().lookupMacro(Name.lower()))
5950       return Error(NameLoc, "macro '" + Name + "' is not defined");
5951     getContext().undefineMacro(Name.lower());
5952 
5953     if (!parseOptionalToken(AsmToken::Comma))
5954       break;
5955     parseOptionalToken(AsmToken::EndOfStatement);
5956   }
5957 
5958   return false;
5959 }
5960 
parseDirectiveExtern()5961 bool MasmParser::parseDirectiveExtern() {
5962   // .extern is the default - but we still need to take any provided type info.
5963   auto parseOp = [&]() -> bool {
5964     StringRef Name;
5965     SMLoc NameLoc = getTok().getLoc();
5966     if (parseIdentifier(Name))
5967       return Error(NameLoc, "expected name");
5968     if (parseToken(AsmToken::Colon))
5969       return true;
5970 
5971     StringRef TypeName;
5972     SMLoc TypeLoc = getTok().getLoc();
5973     if (parseIdentifier(TypeName))
5974       return Error(TypeLoc, "expected type");
5975     if (!TypeName.equals_insensitive("proc")) {
5976       AsmTypeInfo Type;
5977       if (lookUpType(TypeName, Type))
5978         return Error(TypeLoc, "unrecognized type");
5979       KnownType[Name.lower()] = Type;
5980     }
5981 
5982     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5983     Sym->setExternal(true);
5984     getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
5985 
5986     return false;
5987   };
5988 
5989   if (parseMany(parseOp))
5990     return addErrorSuffix(" in directive 'extern'");
5991   return false;
5992 }
5993 
5994 /// parseDirectiveSymbolAttribute
5995 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
parseDirectiveSymbolAttribute(MCSymbolAttr Attr)5996 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
5997   auto parseOp = [&]() -> bool {
5998     StringRef Name;
5999     SMLoc Loc = getTok().getLoc();
6000     if (parseIdentifier(Name))
6001       return Error(Loc, "expected identifier");
6002     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6003 
6004     // Assembler local symbols don't make any sense here. Complain loudly.
6005     if (Sym->isTemporary())
6006       return Error(Loc, "non-local symbol required");
6007 
6008     if (!getStreamer().emitSymbolAttribute(Sym, Attr))
6009       return Error(Loc, "unable to emit symbol attribute");
6010     return false;
6011   };
6012 
6013   if (parseMany(parseOp))
6014     return addErrorSuffix(" in directive");
6015   return false;
6016 }
6017 
6018 /// parseDirectiveComm
6019 ///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
parseDirectiveComm(bool IsLocal)6020 bool MasmParser::parseDirectiveComm(bool IsLocal) {
6021   if (checkForValidSection())
6022     return true;
6023 
6024   SMLoc IDLoc = getLexer().getLoc();
6025   StringRef Name;
6026   if (parseIdentifier(Name))
6027     return TokError("expected identifier in directive");
6028 
6029   // Handle the identifier as the key symbol.
6030   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6031 
6032   if (getLexer().isNot(AsmToken::Comma))
6033     return TokError("unexpected token in directive");
6034   Lex();
6035 
6036   int64_t Size;
6037   SMLoc SizeLoc = getLexer().getLoc();
6038   if (parseAbsoluteExpression(Size))
6039     return true;
6040 
6041   int64_t Pow2Alignment = 0;
6042   SMLoc Pow2AlignmentLoc;
6043   if (getLexer().is(AsmToken::Comma)) {
6044     Lex();
6045     Pow2AlignmentLoc = getLexer().getLoc();
6046     if (parseAbsoluteExpression(Pow2Alignment))
6047       return true;
6048 
6049     LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
6050     if (IsLocal && LCOMM == LCOMM::NoAlignment)
6051       return Error(Pow2AlignmentLoc, "alignment not supported on this target");
6052 
6053     // If this target takes alignments in bytes (not log) validate and convert.
6054     if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
6055         (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
6056       if (!isPowerOf2_64(Pow2Alignment))
6057         return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
6058       Pow2Alignment = Log2_64(Pow2Alignment);
6059     }
6060   }
6061 
6062   if (parseEOL())
6063     return true;
6064 
6065   // NOTE: a size of zero for a .comm should create a undefined symbol
6066   // but a size of .lcomm creates a bss symbol of size zero.
6067   if (Size < 0)
6068     return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
6069                           "be less than zero");
6070 
6071   // NOTE: The alignment in the directive is a power of 2 value, the assembler
6072   // may internally end up wanting an alignment in bytes.
6073   // FIXME: Diagnose overflow.
6074   if (Pow2Alignment < 0)
6075     return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
6076                                    "alignment, can't be less than zero");
6077 
6078   Sym->redefineIfPossible();
6079   if (!Sym->isUndefined())
6080     return Error(IDLoc, "invalid symbol redefinition");
6081 
6082   // Create the Symbol as a common or local common with Size and Pow2Alignment.
6083   if (IsLocal) {
6084     getStreamer().emitLocalCommonSymbol(Sym, Size,
6085                                         Align(1ULL << Pow2Alignment));
6086     return false;
6087   }
6088 
6089   getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment));
6090   return false;
6091 }
6092 
6093 /// parseDirectiveComment
6094 ///  ::= comment delimiter [[text]]
6095 ///              [[text]]
6096 ///              [[text]] delimiter [[text]]
parseDirectiveComment(SMLoc DirectiveLoc)6097 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
6098   std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
6099   size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
6100   assert(DelimiterEnd != std::string::npos);
6101   StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
6102   if (Delimiter.empty())
6103     return Error(DirectiveLoc, "no delimiter in 'comment' directive");
6104   do {
6105     if (getTok().is(AsmToken::Eof))
6106       return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
6107     Lex();  // eat end of statement
6108   } while (
6109       !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
6110   return parseEOL();
6111 }
6112 
6113 /// parseDirectiveInclude
6114 ///  ::= include <filename>
6115 ///    | include filename
parseDirectiveInclude()6116 bool MasmParser::parseDirectiveInclude() {
6117   // Allow the strings to have escaped octal character sequence.
6118   std::string Filename;
6119   SMLoc IncludeLoc = getTok().getLoc();
6120 
6121   if (parseAngleBracketString(Filename))
6122     Filename = parseStringTo(AsmToken::EndOfStatement);
6123   if (check(Filename.empty(), "missing filename in 'include' directive") ||
6124       check(getTok().isNot(AsmToken::EndOfStatement),
6125             "unexpected token in 'include' directive") ||
6126       // Attempt to switch the lexer to the included file before consuming the
6127       // end of statement to avoid losing it when we switch.
6128       check(enterIncludeFile(Filename), IncludeLoc,
6129             "Could not find include file '" + Filename + "'"))
6130     return true;
6131 
6132   return false;
6133 }
6134 
6135 /// parseDirectiveIf
6136 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
parseDirectiveIf(SMLoc DirectiveLoc,DirectiveKind DirKind)6137 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
6138   TheCondStack.push_back(TheCondState);
6139   TheCondState.TheCond = AsmCond::IfCond;
6140   if (TheCondState.Ignore) {
6141     eatToEndOfStatement();
6142   } else {
6143     int64_t ExprValue;
6144     if (parseAbsoluteExpression(ExprValue) || parseEOL())
6145       return true;
6146 
6147     switch (DirKind) {
6148     default:
6149       llvm_unreachable("unsupported directive");
6150     case DK_IF:
6151       break;
6152     case DK_IFE:
6153       ExprValue = ExprValue == 0;
6154       break;
6155     }
6156 
6157     TheCondState.CondMet = ExprValue;
6158     TheCondState.Ignore = !TheCondState.CondMet;
6159   }
6160 
6161   return false;
6162 }
6163 
6164 /// parseDirectiveIfb
6165 /// ::= .ifb textitem
parseDirectiveIfb(SMLoc DirectiveLoc,bool ExpectBlank)6166 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6167   TheCondStack.push_back(TheCondState);
6168   TheCondState.TheCond = AsmCond::IfCond;
6169 
6170   if (TheCondState.Ignore) {
6171     eatToEndOfStatement();
6172   } else {
6173     std::string Str;
6174     if (parseTextItem(Str))
6175       return TokError("expected text item parameter for 'ifb' directive");
6176 
6177     if (parseEOL())
6178       return true;
6179 
6180     TheCondState.CondMet = ExpectBlank == Str.empty();
6181     TheCondState.Ignore = !TheCondState.CondMet;
6182   }
6183 
6184   return false;
6185 }
6186 
6187 /// parseDirectiveIfidn
6188 ///   ::= ifidn textitem, textitem
parseDirectiveIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6189 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6190                                      bool CaseInsensitive) {
6191   std::string String1, String2;
6192 
6193   if (parseTextItem(String1)) {
6194     if (ExpectEqual)
6195       return TokError("expected text item parameter for 'ifidn' directive");
6196     return TokError("expected text item parameter for 'ifdif' directive");
6197   }
6198 
6199   if (Lexer.isNot(AsmToken::Comma)) {
6200     if (ExpectEqual)
6201       return TokError(
6202           "expected comma after first string for 'ifidn' directive");
6203     return TokError("expected comma after first string for 'ifdif' directive");
6204   }
6205   Lex();
6206 
6207   if (parseTextItem(String2)) {
6208     if (ExpectEqual)
6209       return TokError("expected text item parameter for 'ifidn' directive");
6210     return TokError("expected text item parameter for 'ifdif' directive");
6211   }
6212 
6213   TheCondStack.push_back(TheCondState);
6214   TheCondState.TheCond = AsmCond::IfCond;
6215   if (CaseInsensitive)
6216     TheCondState.CondMet =
6217         ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6218   else
6219     TheCondState.CondMet = ExpectEqual == (String1 == String2);
6220   TheCondState.Ignore = !TheCondState.CondMet;
6221 
6222   return false;
6223 }
6224 
6225 /// parseDirectiveIfdef
6226 /// ::= ifdef symbol
6227 ///   | ifdef variable
parseDirectiveIfdef(SMLoc DirectiveLoc,bool expect_defined)6228 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
6229   TheCondStack.push_back(TheCondState);
6230   TheCondState.TheCond = AsmCond::IfCond;
6231 
6232   if (TheCondState.Ignore) {
6233     eatToEndOfStatement();
6234   } else {
6235     bool is_defined = false;
6236     MCRegister Reg;
6237     SMLoc StartLoc, EndLoc;
6238     is_defined =
6239         getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6240     if (!is_defined) {
6241       StringRef Name;
6242       if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
6243           parseEOL())
6244         return true;
6245 
6246       if (BuiltinSymbolMap.contains(Name.lower())) {
6247         is_defined = true;
6248       } else if (Variables.contains(Name.lower())) {
6249         is_defined = true;
6250       } else {
6251         MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
6252         is_defined = (Sym && !Sym->isUndefined(false));
6253       }
6254     }
6255 
6256     TheCondState.CondMet = (is_defined == expect_defined);
6257     TheCondState.Ignore = !TheCondState.CondMet;
6258   }
6259 
6260   return false;
6261 }
6262 
6263 /// parseDirectiveElseIf
6264 /// ::= elseif expression
parseDirectiveElseIf(SMLoc DirectiveLoc,DirectiveKind DirKind)6265 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
6266                                       DirectiveKind DirKind) {
6267   if (TheCondState.TheCond != AsmCond::IfCond &&
6268       TheCondState.TheCond != AsmCond::ElseIfCond)
6269     return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
6270                                " .if or  an .elseif");
6271   TheCondState.TheCond = AsmCond::ElseIfCond;
6272 
6273   bool LastIgnoreState = false;
6274   if (!TheCondStack.empty())
6275     LastIgnoreState = TheCondStack.back().Ignore;
6276   if (LastIgnoreState || TheCondState.CondMet) {
6277     TheCondState.Ignore = true;
6278     eatToEndOfStatement();
6279   } else {
6280     int64_t ExprValue;
6281     if (parseAbsoluteExpression(ExprValue))
6282       return true;
6283 
6284     if (parseEOL())
6285       return true;
6286 
6287     switch (DirKind) {
6288     default:
6289       llvm_unreachable("unsupported directive");
6290     case DK_ELSEIF:
6291       break;
6292     case DK_ELSEIFE:
6293       ExprValue = ExprValue == 0;
6294       break;
6295     }
6296 
6297     TheCondState.CondMet = ExprValue;
6298     TheCondState.Ignore = !TheCondState.CondMet;
6299   }
6300 
6301   return false;
6302 }
6303 
6304 /// parseDirectiveElseIfb
6305 /// ::= elseifb textitem
parseDirectiveElseIfb(SMLoc DirectiveLoc,bool ExpectBlank)6306 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6307   if (TheCondState.TheCond != AsmCond::IfCond &&
6308       TheCondState.TheCond != AsmCond::ElseIfCond)
6309     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6310                                " if or an elseif");
6311   TheCondState.TheCond = AsmCond::ElseIfCond;
6312 
6313   bool LastIgnoreState = false;
6314   if (!TheCondStack.empty())
6315     LastIgnoreState = TheCondStack.back().Ignore;
6316   if (LastIgnoreState || TheCondState.CondMet) {
6317     TheCondState.Ignore = true;
6318     eatToEndOfStatement();
6319   } else {
6320     std::string Str;
6321     if (parseTextItem(Str)) {
6322       if (ExpectBlank)
6323         return TokError("expected text item parameter for 'elseifb' directive");
6324       return TokError("expected text item parameter for 'elseifnb' directive");
6325     }
6326 
6327     if (parseEOL())
6328       return true;
6329 
6330     TheCondState.CondMet = ExpectBlank == Str.empty();
6331     TheCondState.Ignore = !TheCondState.CondMet;
6332   }
6333 
6334   return false;
6335 }
6336 
6337 /// parseDirectiveElseIfdef
6338 /// ::= elseifdef symbol
6339 ///   | elseifdef variable
parseDirectiveElseIfdef(SMLoc DirectiveLoc,bool expect_defined)6340 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6341                                          bool expect_defined) {
6342   if (TheCondState.TheCond != AsmCond::IfCond &&
6343       TheCondState.TheCond != AsmCond::ElseIfCond)
6344     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6345                                " if or an elseif");
6346   TheCondState.TheCond = AsmCond::ElseIfCond;
6347 
6348   bool LastIgnoreState = false;
6349   if (!TheCondStack.empty())
6350     LastIgnoreState = TheCondStack.back().Ignore;
6351   if (LastIgnoreState || TheCondState.CondMet) {
6352     TheCondState.Ignore = true;
6353     eatToEndOfStatement();
6354   } else {
6355     bool is_defined = false;
6356     MCRegister Reg;
6357     SMLoc StartLoc, EndLoc;
6358     is_defined =
6359         getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6360     if (!is_defined) {
6361       StringRef Name;
6362       if (check(parseIdentifier(Name),
6363                 "expected identifier after 'elseifdef'") ||
6364           parseEOL())
6365         return true;
6366 
6367       if (BuiltinSymbolMap.contains(Name.lower())) {
6368         is_defined = true;
6369       } else if (Variables.contains(Name.lower())) {
6370         is_defined = true;
6371       } else {
6372         MCSymbol *Sym = getContext().lookupSymbol(Name);
6373         is_defined = (Sym && !Sym->isUndefined(false));
6374       }
6375     }
6376 
6377     TheCondState.CondMet = (is_defined == expect_defined);
6378     TheCondState.Ignore = !TheCondState.CondMet;
6379   }
6380 
6381   return false;
6382 }
6383 
6384 /// parseDirectiveElseIfidn
6385 /// ::= elseifidn textitem, textitem
parseDirectiveElseIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6386 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6387                                          bool CaseInsensitive) {
6388   if (TheCondState.TheCond != AsmCond::IfCond &&
6389       TheCondState.TheCond != AsmCond::ElseIfCond)
6390     return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6391                                " if or an elseif");
6392   TheCondState.TheCond = AsmCond::ElseIfCond;
6393 
6394   bool LastIgnoreState = false;
6395   if (!TheCondStack.empty())
6396     LastIgnoreState = TheCondStack.back().Ignore;
6397   if (LastIgnoreState || TheCondState.CondMet) {
6398     TheCondState.Ignore = true;
6399     eatToEndOfStatement();
6400   } else {
6401     std::string String1, String2;
6402 
6403     if (parseTextItem(String1)) {
6404       if (ExpectEqual)
6405         return TokError(
6406             "expected text item parameter for 'elseifidn' directive");
6407       return TokError("expected text item parameter for 'elseifdif' directive");
6408     }
6409 
6410     if (Lexer.isNot(AsmToken::Comma)) {
6411       if (ExpectEqual)
6412         return TokError(
6413             "expected comma after first string for 'elseifidn' directive");
6414       return TokError(
6415           "expected comma after first string for 'elseifdif' directive");
6416     }
6417     Lex();
6418 
6419     if (parseTextItem(String2)) {
6420       if (ExpectEqual)
6421         return TokError(
6422             "expected text item parameter for 'elseifidn' directive");
6423       return TokError("expected text item parameter for 'elseifdif' directive");
6424     }
6425 
6426     if (CaseInsensitive)
6427       TheCondState.CondMet =
6428           ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6429     else
6430       TheCondState.CondMet = ExpectEqual == (String1 == String2);
6431     TheCondState.Ignore = !TheCondState.CondMet;
6432   }
6433 
6434   return false;
6435 }
6436 
6437 /// parseDirectiveElse
6438 /// ::= else
parseDirectiveElse(SMLoc DirectiveLoc)6439 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6440   if (parseEOL())
6441     return true;
6442 
6443   if (TheCondState.TheCond != AsmCond::IfCond &&
6444       TheCondState.TheCond != AsmCond::ElseIfCond)
6445     return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6446                                " or an elseif");
6447   TheCondState.TheCond = AsmCond::ElseCond;
6448   bool LastIgnoreState = false;
6449   if (!TheCondStack.empty())
6450     LastIgnoreState = TheCondStack.back().Ignore;
6451   if (LastIgnoreState || TheCondState.CondMet)
6452     TheCondState.Ignore = true;
6453   else
6454     TheCondState.Ignore = false;
6455 
6456   return false;
6457 }
6458 
6459 /// parseDirectiveEnd
6460 /// ::= end
parseDirectiveEnd(SMLoc DirectiveLoc)6461 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6462   if (parseEOL())
6463     return true;
6464 
6465   while (Lexer.isNot(AsmToken::Eof))
6466     Lexer.Lex();
6467 
6468   return false;
6469 }
6470 
6471 /// parseDirectiveError
6472 ///   ::= .err [message]
parseDirectiveError(SMLoc DirectiveLoc)6473 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6474   if (!TheCondStack.empty()) {
6475     if (TheCondStack.back().Ignore) {
6476       eatToEndOfStatement();
6477       return false;
6478     }
6479   }
6480 
6481   std::string Message = ".err directive invoked in source file";
6482   if (Lexer.isNot(AsmToken::EndOfStatement))
6483     Message = parseStringTo(AsmToken::EndOfStatement);
6484   Lex();
6485 
6486   return Error(DirectiveLoc, Message);
6487 }
6488 
6489 /// parseDirectiveErrorIfb
6490 ///   ::= .errb textitem[, message]
parseDirectiveErrorIfb(SMLoc DirectiveLoc,bool ExpectBlank)6491 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6492   if (!TheCondStack.empty()) {
6493     if (TheCondStack.back().Ignore) {
6494       eatToEndOfStatement();
6495       return false;
6496     }
6497   }
6498 
6499   std::string Text;
6500   if (parseTextItem(Text))
6501     return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6502 
6503   std::string Message = ".errb directive invoked in source file";
6504   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6505     if (parseToken(AsmToken::Comma))
6506       return addErrorSuffix(" in '.errb' directive");
6507     Message = parseStringTo(AsmToken::EndOfStatement);
6508   }
6509   Lex();
6510 
6511   if (Text.empty() == ExpectBlank)
6512     return Error(DirectiveLoc, Message);
6513   return false;
6514 }
6515 
6516 /// parseDirectiveErrorIfdef
6517 ///   ::= .errdef name[, message]
parseDirectiveErrorIfdef(SMLoc DirectiveLoc,bool ExpectDefined)6518 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6519                                           bool ExpectDefined) {
6520   if (!TheCondStack.empty()) {
6521     if (TheCondStack.back().Ignore) {
6522       eatToEndOfStatement();
6523       return false;
6524     }
6525   }
6526 
6527   bool IsDefined = false;
6528   MCRegister Reg;
6529   SMLoc StartLoc, EndLoc;
6530   IsDefined =
6531       getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6532   if (!IsDefined) {
6533     StringRef Name;
6534     if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6535       return true;
6536 
6537     if (BuiltinSymbolMap.contains(Name.lower())) {
6538       IsDefined = true;
6539     } else if (Variables.contains(Name.lower())) {
6540       IsDefined = true;
6541     } else {
6542       MCSymbol *Sym = getContext().lookupSymbol(Name);
6543       IsDefined = (Sym && !Sym->isUndefined(false));
6544     }
6545   }
6546 
6547   std::string Message = ".errdef directive invoked in source file";
6548   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6549     if (parseToken(AsmToken::Comma))
6550       return addErrorSuffix(" in '.errdef' directive");
6551     Message = parseStringTo(AsmToken::EndOfStatement);
6552   }
6553   Lex();
6554 
6555   if (IsDefined == ExpectDefined)
6556     return Error(DirectiveLoc, Message);
6557   return false;
6558 }
6559 
6560 /// parseDirectiveErrorIfidn
6561 ///   ::= .erridn textitem, textitem[, message]
parseDirectiveErrorIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6562 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6563                                           bool CaseInsensitive) {
6564   if (!TheCondStack.empty()) {
6565     if (TheCondStack.back().Ignore) {
6566       eatToEndOfStatement();
6567       return false;
6568     }
6569   }
6570 
6571   std::string String1, String2;
6572 
6573   if (parseTextItem(String1)) {
6574     if (ExpectEqual)
6575       return TokError("expected string parameter for '.erridn' directive");
6576     return TokError("expected string parameter for '.errdif' directive");
6577   }
6578 
6579   if (Lexer.isNot(AsmToken::Comma)) {
6580     if (ExpectEqual)
6581       return TokError(
6582           "expected comma after first string for '.erridn' directive");
6583     return TokError(
6584         "expected comma after first string for '.errdif' directive");
6585   }
6586   Lex();
6587 
6588   if (parseTextItem(String2)) {
6589     if (ExpectEqual)
6590       return TokError("expected string parameter for '.erridn' directive");
6591     return TokError("expected string parameter for '.errdif' directive");
6592   }
6593 
6594   std::string Message;
6595   if (ExpectEqual)
6596     Message = ".erridn directive invoked in source file";
6597   else
6598     Message = ".errdif directive invoked in source file";
6599   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6600     if (parseToken(AsmToken::Comma))
6601       return addErrorSuffix(" in '.erridn' directive");
6602     Message = parseStringTo(AsmToken::EndOfStatement);
6603   }
6604   Lex();
6605 
6606   if (CaseInsensitive)
6607     TheCondState.CondMet =
6608         ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6609   else
6610     TheCondState.CondMet = ExpectEqual == (String1 == String2);
6611   TheCondState.Ignore = !TheCondState.CondMet;
6612 
6613   if ((CaseInsensitive &&
6614        ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
6615       (ExpectEqual == (String1 == String2)))
6616     return Error(DirectiveLoc, Message);
6617   return false;
6618 }
6619 
6620 /// parseDirectiveErrorIfe
6621 ///   ::= .erre expression[, message]
parseDirectiveErrorIfe(SMLoc DirectiveLoc,bool ExpectZero)6622 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6623   if (!TheCondStack.empty()) {
6624     if (TheCondStack.back().Ignore) {
6625       eatToEndOfStatement();
6626       return false;
6627     }
6628   }
6629 
6630   int64_t ExprValue;
6631   if (parseAbsoluteExpression(ExprValue))
6632     return addErrorSuffix(" in '.erre' directive");
6633 
6634   std::string Message = ".erre directive invoked in source file";
6635   if (Lexer.isNot(AsmToken::EndOfStatement)) {
6636     if (parseToken(AsmToken::Comma))
6637       return addErrorSuffix(" in '.erre' directive");
6638     Message = parseStringTo(AsmToken::EndOfStatement);
6639   }
6640   Lex();
6641 
6642   if ((ExprValue == 0) == ExpectZero)
6643     return Error(DirectiveLoc, Message);
6644   return false;
6645 }
6646 
6647 /// parseDirectiveEndIf
6648 /// ::= .endif
parseDirectiveEndIf(SMLoc DirectiveLoc)6649 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6650   if (parseEOL())
6651     return true;
6652 
6653   if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6654     return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6655                                "an .if or .else");
6656   if (!TheCondStack.empty()) {
6657     TheCondState = TheCondStack.back();
6658     TheCondStack.pop_back();
6659   }
6660 
6661   return false;
6662 }
6663 
initializeDirectiveKindMap()6664 void MasmParser::initializeDirectiveKindMap() {
6665   DirectiveKindMap["="] = DK_ASSIGN;
6666   DirectiveKindMap["equ"] = DK_EQU;
6667   DirectiveKindMap["textequ"] = DK_TEXTEQU;
6668   // DirectiveKindMap[".ascii"] = DK_ASCII;
6669   // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6670   // DirectiveKindMap[".string"] = DK_STRING;
6671   DirectiveKindMap["byte"] = DK_BYTE;
6672   DirectiveKindMap["sbyte"] = DK_SBYTE;
6673   DirectiveKindMap["word"] = DK_WORD;
6674   DirectiveKindMap["sword"] = DK_SWORD;
6675   DirectiveKindMap["dword"] = DK_DWORD;
6676   DirectiveKindMap["sdword"] = DK_SDWORD;
6677   DirectiveKindMap["fword"] = DK_FWORD;
6678   DirectiveKindMap["qword"] = DK_QWORD;
6679   DirectiveKindMap["sqword"] = DK_SQWORD;
6680   DirectiveKindMap["real4"] = DK_REAL4;
6681   DirectiveKindMap["real8"] = DK_REAL8;
6682   DirectiveKindMap["real10"] = DK_REAL10;
6683   DirectiveKindMap["align"] = DK_ALIGN;
6684   DirectiveKindMap["even"] = DK_EVEN;
6685   DirectiveKindMap["org"] = DK_ORG;
6686   DirectiveKindMap["extern"] = DK_EXTERN;
6687   DirectiveKindMap["extrn"] = DK_EXTERN;
6688   DirectiveKindMap["public"] = DK_PUBLIC;
6689   // DirectiveKindMap[".comm"] = DK_COMM;
6690   DirectiveKindMap["comment"] = DK_COMMENT;
6691   DirectiveKindMap["include"] = DK_INCLUDE;
6692   DirectiveKindMap["repeat"] = DK_REPEAT;
6693   DirectiveKindMap["rept"] = DK_REPEAT;
6694   DirectiveKindMap["while"] = DK_WHILE;
6695   DirectiveKindMap["for"] = DK_FOR;
6696   DirectiveKindMap["irp"] = DK_FOR;
6697   DirectiveKindMap["forc"] = DK_FORC;
6698   DirectiveKindMap["irpc"] = DK_FORC;
6699   DirectiveKindMap["if"] = DK_IF;
6700   DirectiveKindMap["ife"] = DK_IFE;
6701   DirectiveKindMap["ifb"] = DK_IFB;
6702   DirectiveKindMap["ifnb"] = DK_IFNB;
6703   DirectiveKindMap["ifdef"] = DK_IFDEF;
6704   DirectiveKindMap["ifndef"] = DK_IFNDEF;
6705   DirectiveKindMap["ifdif"] = DK_IFDIF;
6706   DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6707   DirectiveKindMap["ifidn"] = DK_IFIDN;
6708   DirectiveKindMap["ifidni"] = DK_IFIDNI;
6709   DirectiveKindMap["elseif"] = DK_ELSEIF;
6710   DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6711   DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6712   DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6713   DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6714   DirectiveKindMap["else"] = DK_ELSE;
6715   DirectiveKindMap["end"] = DK_END;
6716   DirectiveKindMap["endif"] = DK_ENDIF;
6717   // DirectiveKindMap[".file"] = DK_FILE;
6718   // DirectiveKindMap[".line"] = DK_LINE;
6719   // DirectiveKindMap[".loc"] = DK_LOC;
6720   // DirectiveKindMap[".stabs"] = DK_STABS;
6721   // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6722   // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6723   // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6724   // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6725   // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6726   // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6727   // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6728   // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6729   // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6730   // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6731   // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6732   // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6733   // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6734   // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6735   // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6736   // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6737   // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6738   // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6739   // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6740   // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6741   // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6742   // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6743   // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6744   // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6745   // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6746   // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6747   // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6748   // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6749   // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6750   // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6751   // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6752   // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6753   // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6754   // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6755   DirectiveKindMap["macro"] = DK_MACRO;
6756   DirectiveKindMap["exitm"] = DK_EXITM;
6757   DirectiveKindMap["endm"] = DK_ENDM;
6758   DirectiveKindMap["purge"] = DK_PURGE;
6759   DirectiveKindMap[".err"] = DK_ERR;
6760   DirectiveKindMap[".errb"] = DK_ERRB;
6761   DirectiveKindMap[".errnb"] = DK_ERRNB;
6762   DirectiveKindMap[".errdef"] = DK_ERRDEF;
6763   DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6764   DirectiveKindMap[".errdif"] = DK_ERRDIF;
6765   DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6766   DirectiveKindMap[".erridn"] = DK_ERRIDN;
6767   DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6768   DirectiveKindMap[".erre"] = DK_ERRE;
6769   DirectiveKindMap[".errnz"] = DK_ERRNZ;
6770   DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6771   DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6772   DirectiveKindMap[".savereg"] = DK_SAVEREG;
6773   DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6774   DirectiveKindMap[".setframe"] = DK_SETFRAME;
6775   DirectiveKindMap[".radix"] = DK_RADIX;
6776   DirectiveKindMap["db"] = DK_DB;
6777   DirectiveKindMap["dd"] = DK_DD;
6778   DirectiveKindMap["df"] = DK_DF;
6779   DirectiveKindMap["dq"] = DK_DQ;
6780   DirectiveKindMap["dw"] = DK_DW;
6781   DirectiveKindMap["echo"] = DK_ECHO;
6782   DirectiveKindMap["struc"] = DK_STRUCT;
6783   DirectiveKindMap["struct"] = DK_STRUCT;
6784   DirectiveKindMap["union"] = DK_UNION;
6785   DirectiveKindMap["ends"] = DK_ENDS;
6786 }
6787 
isMacroLikeDirective()6788 bool MasmParser::isMacroLikeDirective() {
6789   if (getLexer().is(AsmToken::Identifier)) {
6790     bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6791                            .CasesLower("repeat", "rept", true)
6792                            .CaseLower("while", true)
6793                            .CasesLower("for", "irp", true)
6794                            .CasesLower("forc", "irpc", true)
6795                            .Default(false);
6796     if (IsMacroLike)
6797       return true;
6798   }
6799   if (peekTok().is(AsmToken::Identifier) &&
6800       peekTok().getIdentifier().equals_insensitive("macro"))
6801     return true;
6802 
6803   return false;
6804 }
6805 
parseMacroLikeBody(SMLoc DirectiveLoc)6806 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6807   AsmToken EndToken, StartToken = getTok();
6808 
6809   unsigned NestLevel = 0;
6810   while (true) {
6811     // Check whether we have reached the end of the file.
6812     if (getLexer().is(AsmToken::Eof)) {
6813       printError(DirectiveLoc, "no matching 'endm' in definition");
6814       return nullptr;
6815     }
6816 
6817     if (isMacroLikeDirective())
6818       ++NestLevel;
6819 
6820     // Otherwise, check whether we have reached the endm.
6821     if (Lexer.is(AsmToken::Identifier) &&
6822         getTok().getIdentifier().equals_insensitive("endm")) {
6823       if (NestLevel == 0) {
6824         EndToken = getTok();
6825         Lex();
6826         if (Lexer.isNot(AsmToken::EndOfStatement)) {
6827           printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6828           return nullptr;
6829         }
6830         break;
6831       }
6832       --NestLevel;
6833     }
6834 
6835     // Otherwise, scan till the end of the statement.
6836     eatToEndOfStatement();
6837   }
6838 
6839   const char *BodyStart = StartToken.getLoc().getPointer();
6840   const char *BodyEnd = EndToken.getLoc().getPointer();
6841   StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6842 
6843   // We Are Anonymous.
6844   MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6845   return &MacroLikeBodies.back();
6846 }
6847 
expandStatement(SMLoc Loc)6848 bool MasmParser::expandStatement(SMLoc Loc) {
6849   std::string Body = parseStringTo(AsmToken::EndOfStatement);
6850   SMLoc EndLoc = getTok().getLoc();
6851 
6852   MCAsmMacroParameters Parameters;
6853   MCAsmMacroArguments Arguments;
6854 
6855   StringMap<std::string> BuiltinValues;
6856   for (const auto &S : BuiltinSymbolMap) {
6857     const BuiltinSymbol &Sym = S.getValue();
6858     if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
6859       BuiltinValues[S.getKey().lower()] = std::move(*Text);
6860     }
6861   }
6862   for (const auto &B : BuiltinValues) {
6863     MCAsmMacroParameter P;
6864     MCAsmMacroArgument A;
6865     P.Name = B.getKey();
6866     P.Required = true;
6867     A.push_back(AsmToken(AsmToken::String, B.getValue()));
6868 
6869     Parameters.push_back(std::move(P));
6870     Arguments.push_back(std::move(A));
6871   }
6872 
6873   for (const auto &V : Variables) {
6874     const Variable &Var = V.getValue();
6875     if (Var.IsText) {
6876       MCAsmMacroParameter P;
6877       MCAsmMacroArgument A;
6878       P.Name = Var.Name;
6879       P.Required = true;
6880       A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6881 
6882       Parameters.push_back(std::move(P));
6883       Arguments.push_back(std::move(A));
6884     }
6885   }
6886   MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6887   MCAsmMacro M = MacroLikeBodies.back();
6888 
6889   // Expand the statement in a new buffer.
6890   SmallString<80> Buf;
6891   raw_svector_ostream OS(Buf);
6892   if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6893     return true;
6894   std::unique_ptr<MemoryBuffer> Expansion =
6895       MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6896 
6897   // Jump to the expanded statement and prime the lexer.
6898   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6899   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6900   EndStatementAtEOFStack.push_back(false);
6901   Lex();
6902   return false;
6903 }
6904 
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,raw_svector_ostream & OS)6905 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6906                                           raw_svector_ostream &OS) {
6907   instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6908 }
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,SMLoc ExitLoc,raw_svector_ostream & OS)6909 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6910                                           SMLoc ExitLoc,
6911                                           raw_svector_ostream &OS) {
6912   OS << "endm\n";
6913 
6914   std::unique_ptr<MemoryBuffer> Instantiation =
6915       MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6916 
6917   // Create the macro instantiation object and add to the current macro
6918   // instantiation stack.
6919   MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6920                                                   ExitLoc, TheCondStack.size()};
6921   ActiveMacros.push_back(MI);
6922 
6923   // Jump to the macro instantiation and prime the lexer.
6924   CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6925   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6926   EndStatementAtEOFStack.push_back(true);
6927   Lex();
6928 }
6929 
6930 /// parseDirectiveRepeat
6931 ///   ::= ("repeat" | "rept") count
6932 ///       body
6933 ///     endm
parseDirectiveRepeat(SMLoc DirectiveLoc,StringRef Dir)6934 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6935   const MCExpr *CountExpr;
6936   SMLoc CountLoc = getTok().getLoc();
6937   if (parseExpression(CountExpr))
6938     return true;
6939 
6940   int64_t Count;
6941   if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6942     return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6943   }
6944 
6945   if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
6946     return true;
6947 
6948   // Lex the repeat definition.
6949   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6950   if (!M)
6951     return true;
6952 
6953   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6954   // to hold the macro body with substitutions.
6955   SmallString<256> Buf;
6956   raw_svector_ostream OS(Buf);
6957   while (Count--) {
6958     if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6959                     getTok().getLoc()))
6960       return true;
6961   }
6962   instantiateMacroLikeBody(M, DirectiveLoc, OS);
6963 
6964   return false;
6965 }
6966 
6967 /// parseDirectiveWhile
6968 /// ::= "while" expression
6969 ///       body
6970 ///     endm
parseDirectiveWhile(SMLoc DirectiveLoc)6971 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
6972   const MCExpr *CondExpr;
6973   SMLoc CondLoc = getTok().getLoc();
6974   if (parseExpression(CondExpr))
6975     return true;
6976 
6977   // Lex the repeat definition.
6978   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6979   if (!M)
6980     return true;
6981 
6982   // Macro instantiation is lexical, unfortunately. We construct a new buffer
6983   // to hold the macro body with substitutions.
6984   SmallString<256> Buf;
6985   raw_svector_ostream OS(Buf);
6986   int64_t Condition;
6987   if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
6988     return Error(CondLoc, "expected absolute expression in 'while' directive");
6989   if (Condition) {
6990     // Instantiate the macro, then resume at this directive to recheck the
6991     // condition.
6992     if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6993                     getTok().getLoc()))
6994       return true;
6995     instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
6996   }
6997 
6998   return false;
6999 }
7000 
7001 /// parseDirectiveFor
7002 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
7003 ///       body
7004 ///     endm
parseDirectiveFor(SMLoc DirectiveLoc,StringRef Dir)7005 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
7006   MCAsmMacroParameter Parameter;
7007   MCAsmMacroArguments A;
7008   if (check(parseIdentifier(Parameter.Name),
7009             "expected identifier in '" + Dir + "' directive"))
7010     return true;
7011 
7012   // Parse optional qualifier (default value, or "req")
7013   if (parseOptionalToken(AsmToken::Colon)) {
7014     if (parseOptionalToken(AsmToken::Equal)) {
7015       // Default value
7016       SMLoc ParamLoc;
7017 
7018       ParamLoc = Lexer.getLoc();
7019       if (parseMacroArgument(nullptr, Parameter.Value))
7020         return true;
7021     } else {
7022       SMLoc QualLoc;
7023       StringRef Qualifier;
7024 
7025       QualLoc = Lexer.getLoc();
7026       if (parseIdentifier(Qualifier))
7027         return Error(QualLoc, "missing parameter qualifier for "
7028                               "'" +
7029                                   Parameter.Name + "' in '" + Dir +
7030                                   "' directive");
7031 
7032       if (Qualifier.equals_insensitive("req"))
7033         Parameter.Required = true;
7034       else
7035         return Error(QualLoc,
7036                      Qualifier + " is not a valid parameter qualifier for '" +
7037                          Parameter.Name + "' in '" + Dir + "' directive");
7038     }
7039   }
7040 
7041   if (parseToken(AsmToken::Comma,
7042                  "expected comma in '" + Dir + "' directive") ||
7043       parseToken(AsmToken::Less,
7044                  "values in '" + Dir +
7045                      "' directive must be enclosed in angle brackets"))
7046     return true;
7047 
7048   while (true) {
7049     A.emplace_back();
7050     if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
7051       return addErrorSuffix(" in arguments for '" + Dir + "' directive");
7052 
7053     // If we see a comma, continue, and allow line continuation.
7054     if (!parseOptionalToken(AsmToken::Comma))
7055       break;
7056     parseOptionalToken(AsmToken::EndOfStatement);
7057   }
7058 
7059   if (parseToken(AsmToken::Greater,
7060                  "values in '" + Dir +
7061                      "' directive must be enclosed in angle brackets") ||
7062       parseEOL())
7063     return true;
7064 
7065   // Lex the for definition.
7066   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7067   if (!M)
7068     return true;
7069 
7070   // Macro instantiation is lexical, unfortunately. We construct a new buffer
7071   // to hold the macro body with substitutions.
7072   SmallString<256> Buf;
7073   raw_svector_ostream OS(Buf);
7074 
7075   for (const MCAsmMacroArgument &Arg : A) {
7076     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7077       return true;
7078   }
7079 
7080   instantiateMacroLikeBody(M, DirectiveLoc, OS);
7081 
7082   return false;
7083 }
7084 
7085 /// parseDirectiveForc
7086 /// ::= ("forc" | "irpc") symbol, <string>
7087 ///       body
7088 ///     endm
parseDirectiveForc(SMLoc DirectiveLoc,StringRef Directive)7089 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
7090   MCAsmMacroParameter Parameter;
7091 
7092   std::string Argument;
7093   if (check(parseIdentifier(Parameter.Name),
7094             "expected identifier in '" + Directive + "' directive") ||
7095       parseToken(AsmToken::Comma,
7096                  "expected comma in '" + Directive + "' directive"))
7097     return true;
7098   if (parseAngleBracketString(Argument)) {
7099     // Match ml64.exe; treat all characters to end of statement as a string,
7100     // ignoring comment markers, then discard anything following a space (using
7101     // the C locale).
7102     Argument = parseStringTo(AsmToken::EndOfStatement);
7103     if (getTok().is(AsmToken::EndOfStatement))
7104       Argument += getTok().getString();
7105     size_t End = 0;
7106     for (; End < Argument.size(); ++End) {
7107       if (isSpace(Argument[End]))
7108         break;
7109     }
7110     Argument.resize(End);
7111   }
7112   if (parseEOL())
7113     return true;
7114 
7115   // Lex the irpc definition.
7116   MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7117   if (!M)
7118     return true;
7119 
7120   // Macro instantiation is lexical, unfortunately. We construct a new buffer
7121   // to hold the macro body with substitutions.
7122   SmallString<256> Buf;
7123   raw_svector_ostream OS(Buf);
7124 
7125   StringRef Values(Argument);
7126   for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
7127     MCAsmMacroArgument Arg;
7128     Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
7129 
7130     if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7131       return true;
7132   }
7133 
7134   instantiateMacroLikeBody(M, DirectiveLoc, OS);
7135 
7136   return false;
7137 }
7138 
parseDirectiveMSEmit(SMLoc IDLoc,ParseStatementInfo & Info,size_t Len)7139 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
7140                                       size_t Len) {
7141   const MCExpr *Value;
7142   SMLoc ExprLoc = getLexer().getLoc();
7143   if (parseExpression(Value))
7144     return true;
7145   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7146   if (!MCE)
7147     return Error(ExprLoc, "unexpected expression in _emit");
7148   uint64_t IntValue = MCE->getValue();
7149   if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
7150     return Error(ExprLoc, "literal value out of range for directive");
7151 
7152   Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
7153   return false;
7154 }
7155 
parseDirectiveMSAlign(SMLoc IDLoc,ParseStatementInfo & Info)7156 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
7157   const MCExpr *Value;
7158   SMLoc ExprLoc = getLexer().getLoc();
7159   if (parseExpression(Value))
7160     return true;
7161   const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7162   if (!MCE)
7163     return Error(ExprLoc, "unexpected expression in align");
7164   uint64_t IntValue = MCE->getValue();
7165   if (!isPowerOf2_64(IntValue))
7166     return Error(ExprLoc, "literal value not a power of two greater then zero");
7167 
7168   Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
7169   return false;
7170 }
7171 
parseDirectiveRadix(SMLoc DirectiveLoc)7172 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
7173   const SMLoc Loc = getLexer().getLoc();
7174   std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
7175   StringRef RadixString = StringRef(RadixStringRaw).trim();
7176   unsigned Radix;
7177   if (RadixString.getAsInteger(10, Radix)) {
7178     return Error(Loc,
7179                  "radix must be a decimal number in the range 2 to 16; was " +
7180                      RadixString);
7181   }
7182   if (Radix < 2 || Radix > 16)
7183     return Error(Loc, "radix must be in the range 2 to 16; was " +
7184                           std::to_string(Radix));
7185   getLexer().setMasmDefaultRadix(Radix);
7186   return false;
7187 }
7188 
7189 /// parseDirectiveEcho
7190 ///   ::= "echo" message
parseDirectiveEcho(SMLoc DirectiveLoc)7191 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
7192   std::string Message = parseStringTo(AsmToken::EndOfStatement);
7193   llvm::outs() << Message;
7194   if (!StringRef(Message).ends_with("\n"))
7195     llvm::outs() << '\n';
7196   return false;
7197 }
7198 
7199 // We are comparing pointers, but the pointers are relative to a single string.
7200 // Thus, this should always be deterministic.
rewritesSort(const AsmRewrite * AsmRewriteA,const AsmRewrite * AsmRewriteB)7201 static int rewritesSort(const AsmRewrite *AsmRewriteA,
7202                         const AsmRewrite *AsmRewriteB) {
7203   if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
7204     return -1;
7205   if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
7206     return 1;
7207 
7208   // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
7209   // rewrite to the same location.  Make sure the SizeDirective rewrite is
7210   // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
7211   // ensures the sort algorithm is stable.
7212   if (AsmRewritePrecedence[AsmRewriteA->Kind] >
7213       AsmRewritePrecedence[AsmRewriteB->Kind])
7214     return -1;
7215 
7216   if (AsmRewritePrecedence[AsmRewriteA->Kind] <
7217       AsmRewritePrecedence[AsmRewriteB->Kind])
7218     return 1;
7219   llvm_unreachable("Unstable rewrite sort.");
7220 }
7221 
defineMacro(StringRef Name,StringRef Value)7222 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
7223   Variable &Var = Variables[Name.lower()];
7224   if (Var.Name.empty()) {
7225     Var.Name = Name;
7226   } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
7227     return Error(SMLoc(), "invalid variable redefinition");
7228   } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
7229              Warning(SMLoc(), "redefining '" + Name +
7230                                   "', already defined on the command line")) {
7231     return true;
7232   }
7233   Var.Redefinable = Variable::WARN_ON_REDEFINITION;
7234   Var.IsText = true;
7235   Var.TextValue = Value.str();
7236   return false;
7237 }
7238 
lookUpField(StringRef Name,AsmFieldInfo & Info) const7239 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
7240   const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
7241   const StringRef Base = BaseMember.first, Member = BaseMember.second;
7242   return lookUpField(Base, Member, Info);
7243 }
7244 
lookUpField(StringRef Base,StringRef Member,AsmFieldInfo & Info) const7245 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
7246                              AsmFieldInfo &Info) const {
7247   if (Base.empty())
7248     return true;
7249 
7250   AsmFieldInfo BaseInfo;
7251   if (Base.contains('.') && !lookUpField(Base, BaseInfo))
7252     Base = BaseInfo.Type.Name;
7253 
7254   auto StructIt = Structs.find(Base.lower());
7255   auto TypeIt = KnownType.find(Base.lower());
7256   if (TypeIt != KnownType.end()) {
7257     StructIt = Structs.find(TypeIt->second.Name.lower());
7258   }
7259   if (StructIt != Structs.end())
7260     return lookUpField(StructIt->second, Member, Info);
7261 
7262   return true;
7263 }
7264 
lookUpField(const StructInfo & Structure,StringRef Member,AsmFieldInfo & Info) const7265 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
7266                              AsmFieldInfo &Info) const {
7267   if (Member.empty()) {
7268     Info.Type.Name = Structure.Name;
7269     Info.Type.Size = Structure.Size;
7270     Info.Type.ElementSize = Structure.Size;
7271     Info.Type.Length = 1;
7272     return false;
7273   }
7274 
7275   std::pair<StringRef, StringRef> Split = Member.split('.');
7276   const StringRef FieldName = Split.first, FieldMember = Split.second;
7277 
7278   auto StructIt = Structs.find(FieldName.lower());
7279   if (StructIt != Structs.end())
7280     return lookUpField(StructIt->second, FieldMember, Info);
7281 
7282   auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
7283   if (FieldIt == Structure.FieldsByName.end())
7284     return true;
7285 
7286   const FieldInfo &Field = Structure.Fields[FieldIt->second];
7287   if (FieldMember.empty()) {
7288     Info.Offset += Field.Offset;
7289     Info.Type.Size = Field.SizeOf;
7290     Info.Type.ElementSize = Field.Type;
7291     Info.Type.Length = Field.LengthOf;
7292     if (Field.Contents.FT == FT_STRUCT)
7293       Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
7294     else
7295       Info.Type.Name = "";
7296     return false;
7297   }
7298 
7299   if (Field.Contents.FT != FT_STRUCT)
7300     return true;
7301   const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
7302 
7303   if (lookUpField(StructInfo.Structure, FieldMember, Info))
7304     return true;
7305 
7306   Info.Offset += Field.Offset;
7307   return false;
7308 }
7309 
lookUpType(StringRef Name,AsmTypeInfo & Info) const7310 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
7311   unsigned Size = StringSwitch<unsigned>(Name)
7312                       .CasesLower("byte", "db", "sbyte", 1)
7313                       .CasesLower("word", "dw", "sword", 2)
7314                       .CasesLower("dword", "dd", "sdword", 4)
7315                       .CasesLower("fword", "df", 6)
7316                       .CasesLower("qword", "dq", "sqword", 8)
7317                       .CaseLower("real4", 4)
7318                       .CaseLower("real8", 8)
7319                       .CaseLower("real10", 10)
7320                       .Default(0);
7321   if (Size) {
7322     Info.Name = Name;
7323     Info.ElementSize = Size;
7324     Info.Length = 1;
7325     Info.Size = Size;
7326     return false;
7327   }
7328 
7329   auto StructIt = Structs.find(Name.lower());
7330   if (StructIt != Structs.end()) {
7331     const StructInfo &Structure = StructIt->second;
7332     Info.Name = Name;
7333     Info.ElementSize = Structure.Size;
7334     Info.Length = 1;
7335     Info.Size = Structure.Size;
7336     return false;
7337   }
7338 
7339   return true;
7340 }
7341 
parseMSInlineAsm(std::string & AsmString,unsigned & NumOutputs,unsigned & NumInputs,SmallVectorImpl<std::pair<void *,bool>> & OpDecls,SmallVectorImpl<std::string> & Constraints,SmallVectorImpl<std::string> & Clobbers,const MCInstrInfo * MII,const MCInstPrinter * IP,MCAsmParserSemaCallback & SI)7342 bool MasmParser::parseMSInlineAsm(
7343     std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
7344     SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7345     SmallVectorImpl<std::string> &Constraints,
7346     SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7347     const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7348   SmallVector<void *, 4> InputDecls;
7349   SmallVector<void *, 4> OutputDecls;
7350   SmallVector<bool, 4> InputDeclsAddressOf;
7351   SmallVector<bool, 4> OutputDeclsAddressOf;
7352   SmallVector<std::string, 4> InputConstraints;
7353   SmallVector<std::string, 4> OutputConstraints;
7354   SmallVector<unsigned, 4> ClobberRegs;
7355 
7356   SmallVector<AsmRewrite, 4> AsmStrRewrites;
7357 
7358   // Prime the lexer.
7359   Lex();
7360 
7361   // While we have input, parse each statement.
7362   unsigned InputIdx = 0;
7363   unsigned OutputIdx = 0;
7364   while (getLexer().isNot(AsmToken::Eof)) {
7365     // Parse curly braces marking block start/end.
7366     if (parseCurlyBlockScope(AsmStrRewrites))
7367       continue;
7368 
7369     ParseStatementInfo Info(&AsmStrRewrites);
7370     bool StatementErr = parseStatement(Info, &SI);
7371 
7372     if (StatementErr || Info.ParseError) {
7373       // Emit pending errors if any exist.
7374       printPendingErrors();
7375       return true;
7376     }
7377 
7378     // No pending error should exist here.
7379     assert(!hasPendingError() && "unexpected error from parseStatement");
7380 
7381     if (Info.Opcode == ~0U)
7382       continue;
7383 
7384     const MCInstrDesc &Desc = MII->get(Info.Opcode);
7385 
7386     // Build the list of clobbers, outputs and inputs.
7387     for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7388       MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7389 
7390       // Register operand.
7391       if (Operand.isReg() && !Operand.needAddressOf() &&
7392           !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
7393         unsigned NumDefs = Desc.getNumDefs();
7394         // Clobber.
7395         if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7396           ClobberRegs.push_back(Operand.getReg());
7397         continue;
7398       }
7399 
7400       // Expr/Input or Output.
7401       StringRef SymName = Operand.getSymName();
7402       if (SymName.empty())
7403         continue;
7404 
7405       void *OpDecl = Operand.getOpDecl();
7406       if (!OpDecl)
7407         continue;
7408 
7409       StringRef Constraint = Operand.getConstraint();
7410       if (Operand.isImm()) {
7411         // Offset as immediate.
7412         if (Operand.isOffsetOfLocal())
7413           Constraint = "r";
7414         else
7415           Constraint = "i";
7416       }
7417 
7418       bool isOutput = (i == 1) && Desc.mayStore();
7419       SMLoc Start = SMLoc::getFromPointer(SymName.data());
7420       if (isOutput) {
7421         ++InputIdx;
7422         OutputDecls.push_back(OpDecl);
7423         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7424         OutputConstraints.push_back(("=" + Constraint).str());
7425         AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7426       } else {
7427         InputDecls.push_back(OpDecl);
7428         InputDeclsAddressOf.push_back(Operand.needAddressOf());
7429         InputConstraints.push_back(Constraint.str());
7430         if (Desc.operands()[i - 1].isBranchTarget())
7431           AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7432         else
7433           AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7434       }
7435     }
7436 
7437     // Consider implicit defs to be clobbers.  Think of cpuid and push.
7438     llvm::append_range(ClobberRegs, Desc.implicit_defs());
7439   }
7440 
7441   // Set the number of Outputs and Inputs.
7442   NumOutputs = OutputDecls.size();
7443   NumInputs = InputDecls.size();
7444 
7445   // Set the unique clobbers.
7446   array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7447   ClobberRegs.erase(llvm::unique(ClobberRegs), ClobberRegs.end());
7448   Clobbers.assign(ClobberRegs.size(), std::string());
7449   for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7450     raw_string_ostream OS(Clobbers[I]);
7451     IP->printRegName(OS, ClobberRegs[I]);
7452   }
7453 
7454   // Merge the various outputs and inputs.  Output are expected first.
7455   if (NumOutputs || NumInputs) {
7456     unsigned NumExprs = NumOutputs + NumInputs;
7457     OpDecls.resize(NumExprs);
7458     Constraints.resize(NumExprs);
7459     for (unsigned i = 0; i < NumOutputs; ++i) {
7460       OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7461       Constraints[i] = OutputConstraints[i];
7462     }
7463     for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7464       OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7465       Constraints[j] = InputConstraints[i];
7466     }
7467   }
7468 
7469   // Build the IR assembly string.
7470   std::string AsmStringIR;
7471   raw_string_ostream OS(AsmStringIR);
7472   StringRef ASMString =
7473       SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7474   const char *AsmStart = ASMString.begin();
7475   const char *AsmEnd = ASMString.end();
7476   array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7477   for (auto I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
7478     const AsmRewrite &AR = *I;
7479     // Check if this has already been covered by another rewrite...
7480     if (AR.Done)
7481       continue;
7482     AsmRewriteKind Kind = AR.Kind;
7483 
7484     const char *Loc = AR.Loc.getPointer();
7485     assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7486 
7487     // Emit everything up to the immediate/expression.
7488     if (unsigned Len = Loc - AsmStart)
7489       OS << StringRef(AsmStart, Len);
7490 
7491     // Skip the original expression.
7492     if (Kind == AOK_Skip) {
7493       AsmStart = Loc + AR.Len;
7494       continue;
7495     }
7496 
7497     unsigned AdditionalSkip = 0;
7498     // Rewrite expressions in $N notation.
7499     switch (Kind) {
7500     default:
7501       break;
7502     case AOK_IntelExpr:
7503       assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7504       if (AR.IntelExp.NeedBracs)
7505         OS << "[";
7506       if (AR.IntelExp.hasBaseReg())
7507         OS << AR.IntelExp.BaseReg;
7508       if (AR.IntelExp.hasIndexReg())
7509         OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7510            << AR.IntelExp.IndexReg;
7511       if (AR.IntelExp.Scale > 1)
7512         OS << " * $$" << AR.IntelExp.Scale;
7513       if (AR.IntelExp.hasOffset()) {
7514         if (AR.IntelExp.hasRegs())
7515           OS << " + ";
7516         // Fuse this rewrite with a rewrite of the offset name, if present.
7517         StringRef OffsetName = AR.IntelExp.OffsetName;
7518         SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7519         size_t OffsetLen = OffsetName.size();
7520         auto rewrite_it = std::find_if(
7521             I, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7522               return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7523                      (FusingAR.Kind == AOK_Input ||
7524                       FusingAR.Kind == AOK_CallInput);
7525             });
7526         if (rewrite_it == AsmStrRewrites.end()) {
7527           OS << "offset " << OffsetName;
7528         } else if (rewrite_it->Kind == AOK_CallInput) {
7529           OS << "${" << InputIdx++ << ":P}";
7530           rewrite_it->Done = true;
7531         } else {
7532           OS << '$' << InputIdx++;
7533           rewrite_it->Done = true;
7534         }
7535       }
7536       if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7537         OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7538       if (AR.IntelExp.NeedBracs)
7539         OS << "]";
7540       break;
7541     case AOK_Label:
7542       OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7543       break;
7544     case AOK_Input:
7545       OS << '$' << InputIdx++;
7546       break;
7547     case AOK_CallInput:
7548       OS << "${" << InputIdx++ << ":P}";
7549       break;
7550     case AOK_Output:
7551       OS << '$' << OutputIdx++;
7552       break;
7553     case AOK_SizeDirective:
7554       switch (AR.Val) {
7555       default: break;
7556       case 8:  OS << "byte ptr "; break;
7557       case 16: OS << "word ptr "; break;
7558       case 32: OS << "dword ptr "; break;
7559       case 64: OS << "qword ptr "; break;
7560       case 80: OS << "xword ptr "; break;
7561       case 128: OS << "xmmword ptr "; break;
7562       case 256: OS << "ymmword ptr "; break;
7563       }
7564       break;
7565     case AOK_Emit:
7566       OS << ".byte";
7567       break;
7568     case AOK_Align: {
7569       // MS alignment directives are measured in bytes. If the native assembler
7570       // measures alignment in bytes, we can pass it straight through.
7571       OS << ".align";
7572       if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7573         break;
7574 
7575       // Alignment is in log2 form, so print that instead and skip the original
7576       // immediate.
7577       unsigned Val = AR.Val;
7578       OS << ' ' << Val;
7579       assert(Val < 10 && "Expected alignment less then 2^10.");
7580       AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7581       break;
7582     }
7583     case AOK_EVEN:
7584       OS << ".even";
7585       break;
7586     case AOK_EndOfStatement:
7587       OS << "\n\t";
7588       break;
7589     }
7590 
7591     // Skip the original expression.
7592     AsmStart = Loc + AR.Len + AdditionalSkip;
7593   }
7594 
7595   // Emit the remainder of the asm string.
7596   if (AsmStart != AsmEnd)
7597     OS << StringRef(AsmStart, AsmEnd - AsmStart);
7598 
7599   AsmString = OS.str();
7600   return false;
7601 }
7602 
initializeBuiltinSymbolMap()7603 void MasmParser::initializeBuiltinSymbolMap() {
7604   // Numeric built-ins (supported in all versions)
7605   BuiltinSymbolMap["@version"] = BI_VERSION;
7606   BuiltinSymbolMap["@line"] = BI_LINE;
7607 
7608   // Text built-ins (supported in all versions)
7609   BuiltinSymbolMap["@date"] = BI_DATE;
7610   BuiltinSymbolMap["@time"] = BI_TIME;
7611   BuiltinSymbolMap["@filecur"] = BI_FILECUR;
7612   BuiltinSymbolMap["@filename"] = BI_FILENAME;
7613   BuiltinSymbolMap["@curseg"] = BI_CURSEG;
7614 
7615   // Some built-ins exist only for MASM32 (32-bit x86)
7616   if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
7617       Triple::x86) {
7618     // Numeric built-ins
7619     // BuiltinSymbolMap["@cpu"] = BI_CPU;
7620     // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
7621     // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
7622     // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
7623     // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
7624     // BuiltinSymbolMap["@model"] = BI_MODEL;
7625 
7626     // Text built-ins
7627     // BuiltinSymbolMap["@code"] = BI_CODE;
7628     // BuiltinSymbolMap["@data"] = BI_DATA;
7629     // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
7630     // BuiltinSymbolMap["@stack"] = BI_STACK;
7631   }
7632 }
7633 
evaluateBuiltinValue(BuiltinSymbol Symbol,SMLoc StartLoc)7634 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
7635                                                SMLoc StartLoc) {
7636   switch (Symbol) {
7637   default:
7638     return nullptr;
7639   case BI_VERSION:
7640     // Match a recent version of ML.EXE.
7641     return MCConstantExpr::create(1427, getContext());
7642   case BI_LINE: {
7643     int64_t Line;
7644     if (ActiveMacros.empty())
7645       Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
7646     else
7647       Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
7648                                    ActiveMacros.front()->ExitBuffer);
7649     return MCConstantExpr::create(Line, getContext());
7650   }
7651   }
7652   llvm_unreachable("unhandled built-in symbol");
7653 }
7654 
7655 std::optional<std::string>
evaluateBuiltinTextMacro(BuiltinSymbol Symbol,SMLoc StartLoc)7656 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
7657   switch (Symbol) {
7658   default:
7659     return {};
7660   case BI_DATE: {
7661     // Current local date, formatted MM/DD/YY
7662     char TmpBuffer[sizeof("mm/dd/yy")];
7663     const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
7664     return std::string(TmpBuffer, Len);
7665   }
7666   case BI_TIME: {
7667     // Current local time, formatted HH:MM:SS (24-hour clock)
7668     char TmpBuffer[sizeof("hh:mm:ss")];
7669     const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
7670     return std::string(TmpBuffer, Len);
7671   }
7672   case BI_FILECUR:
7673     return SrcMgr
7674         .getMemoryBuffer(
7675             ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
7676         ->getBufferIdentifier()
7677         .str();
7678   case BI_FILENAME:
7679     return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
7680                                ->getBufferIdentifier())
7681         .upper();
7682   case BI_CURSEG:
7683     return getStreamer().getCurrentSectionOnly()->getName().str();
7684   }
7685   llvm_unreachable("unhandled built-in symbol");
7686 }
7687 
7688 /// Create an MCAsmParser instance.
createMCMasmParser(SourceMgr & SM,MCContext & C,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)7689 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7690                                       MCStreamer &Out, const MCAsmInfo &MAI,
7691                                       struct tm TM, unsigned CB) {
7692   return new MasmParser(SM, C, Out, MAI, TM, CB);
7693 }
7694