xref: /freebsd/contrib/llvm-project/llvm/lib/MC/MCParser/MasmParser.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This class implements the parser for assembly files.
10  //
11  //===----------------------------------------------------------------------===//
12  
13  #include "llvm/ADT/APFloat.h"
14  #include "llvm/ADT/APInt.h"
15  #include "llvm/ADT/ArrayRef.h"
16  #include "llvm/ADT/BitVector.h"
17  #include "llvm/ADT/STLExtras.h"
18  #include "llvm/ADT/SmallString.h"
19  #include "llvm/ADT/SmallVector.h"
20  #include "llvm/ADT/StringExtras.h"
21  #include "llvm/ADT/StringMap.h"
22  #include "llvm/ADT/StringRef.h"
23  #include "llvm/ADT/StringSwitch.h"
24  #include "llvm/ADT/Twine.h"
25  #include "llvm/BinaryFormat/Dwarf.h"
26  #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
27  #include "llvm/MC/MCAsmInfo.h"
28  #include "llvm/MC/MCCodeView.h"
29  #include "llvm/MC/MCContext.h"
30  #include "llvm/MC/MCDirectives.h"
31  #include "llvm/MC/MCDwarf.h"
32  #include "llvm/MC/MCExpr.h"
33  #include "llvm/MC/MCInstPrinter.h"
34  #include "llvm/MC/MCInstrDesc.h"
35  #include "llvm/MC/MCInstrInfo.h"
36  #include "llvm/MC/MCParser/AsmCond.h"
37  #include "llvm/MC/MCParser/AsmLexer.h"
38  #include "llvm/MC/MCParser/MCAsmLexer.h"
39  #include "llvm/MC/MCParser/MCAsmParser.h"
40  #include "llvm/MC/MCParser/MCAsmParserExtension.h"
41  #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
42  #include "llvm/MC/MCParser/MCTargetAsmParser.h"
43  #include "llvm/MC/MCRegisterInfo.h"
44  #include "llvm/MC/MCSection.h"
45  #include "llvm/MC/MCStreamer.h"
46  #include "llvm/MC/MCSubtargetInfo.h"
47  #include "llvm/MC/MCSymbol.h"
48  #include "llvm/MC/MCTargetOptions.h"
49  #include "llvm/Support/Casting.h"
50  #include "llvm/Support/CommandLine.h"
51  #include "llvm/Support/ErrorHandling.h"
52  #include "llvm/Support/Format.h"
53  #include "llvm/Support/MD5.h"
54  #include "llvm/Support/MathExtras.h"
55  #include "llvm/Support/MemoryBuffer.h"
56  #include "llvm/Support/Path.h"
57  #include "llvm/Support/SMLoc.h"
58  #include "llvm/Support/SourceMgr.h"
59  #include "llvm/Support/raw_ostream.h"
60  #include <algorithm>
61  #include <cassert>
62  #include <climits>
63  #include <cstddef>
64  #include <cstdint>
65  #include <ctime>
66  #include <deque>
67  #include <memory>
68  #include <optional>
69  #include <sstream>
70  #include <string>
71  #include <tuple>
72  #include <utility>
73  #include <vector>
74  
75  using namespace llvm;
76  
77  namespace {
78  
79  /// Helper types for tracking macro definitions.
80  typedef std::vector<AsmToken> MCAsmMacroArgument;
81  typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
82  
83  /// Helper class for storing information about an active macro instantiation.
84  struct MacroInstantiation {
85    /// The location of the instantiation.
86    SMLoc InstantiationLoc;
87  
88    /// The buffer where parsing should resume upon instantiation completion.
89    unsigned ExitBuffer;
90  
91    /// The location where parsing should resume upon instantiation completion.
92    SMLoc ExitLoc;
93  
94    /// The depth of TheCondStack at the start of the instantiation.
95    size_t CondStackDepth;
96  };
97  
98  struct ParseStatementInfo {
99    /// The parsed operands from the last parsed statement.
100    SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
101  
102    /// The opcode from the last parsed instruction.
103    unsigned Opcode = ~0U;
104  
105    /// Was there an error parsing the inline assembly?
106    bool ParseError = false;
107  
108    /// The value associated with a macro exit.
109    std::optional<std::string> ExitValue;
110  
111    SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
112  
113    ParseStatementInfo() = delete;
ParseStatementInfo__anon60b61cd60111::ParseStatementInfo114    ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
115        : AsmRewrites(rewrites) {}
116  };
117  
118  enum FieldType {
119    FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
120    FT_REAL,     // Initializer: real number, stored as an APInt.
121    FT_STRUCT    // Initializer: struct initializer, stored recursively.
122  };
123  
124  struct FieldInfo;
125  struct StructInfo {
126    StringRef Name;
127    bool IsUnion = false;
128    bool Initializable = true;
129    unsigned Alignment = 0;
130    unsigned AlignmentSize = 0;
131    unsigned NextOffset = 0;
132    unsigned Size = 0;
133    std::vector<FieldInfo> Fields;
134    StringMap<size_t> FieldsByName;
135  
136    FieldInfo &addField(StringRef FieldName, FieldType FT,
137                        unsigned FieldAlignmentSize);
138  
139    StructInfo() = default;
140    StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
141  };
142  
143  // FIXME: This should probably use a class hierarchy, raw pointers between the
144  // objects, and dynamic type resolution instead of a union. On the other hand,
145  // ownership then becomes much more complicated; the obvious thing would be to
146  // use BumpPtrAllocator, but the lack of a destructor makes that messy.
147  
148  struct StructInitializer;
149  struct IntFieldInfo {
150    SmallVector<const MCExpr *, 1> Values;
151  
152    IntFieldInfo() = default;
IntFieldInfo__anon60b61cd60111::IntFieldInfo153    IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
IntFieldInfo__anon60b61cd60111::IntFieldInfo154    IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
155  };
156  struct RealFieldInfo {
157    SmallVector<APInt, 1> AsIntValues;
158  
159    RealFieldInfo() = default;
RealFieldInfo__anon60b61cd60111::RealFieldInfo160    RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
RealFieldInfo__anon60b61cd60111::RealFieldInfo161    RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
162  };
163  struct StructFieldInfo {
164    std::vector<StructInitializer> Initializers;
165    StructInfo Structure;
166  
167    StructFieldInfo() = default;
168    StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
169  };
170  
171  class FieldInitializer {
172  public:
173    FieldType FT;
174    union {
175      IntFieldInfo IntInfo;
176      RealFieldInfo RealInfo;
177      StructFieldInfo StructInfo;
178    };
179  
180    ~FieldInitializer();
181    FieldInitializer(FieldType FT);
182  
183    FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
184    FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
185    FieldInitializer(std::vector<StructInitializer> &&Initializers,
186                     struct StructInfo Structure);
187  
188    FieldInitializer(const FieldInitializer &Initializer);
189    FieldInitializer(FieldInitializer &&Initializer);
190  
191    FieldInitializer &operator=(const FieldInitializer &Initializer);
192    FieldInitializer &operator=(FieldInitializer &&Initializer);
193  };
194  
195  struct StructInitializer {
196    std::vector<FieldInitializer> FieldInitializers;
197  };
198  
199  struct FieldInfo {
200    // Offset of the field within the containing STRUCT.
201    unsigned Offset = 0;
202  
203    // Total size of the field (= LengthOf * Type).
204    unsigned SizeOf = 0;
205  
206    // Number of elements in the field (1 if scalar, >1 if an array).
207    unsigned LengthOf = 0;
208  
209    // Size of a single entry in this field, in bytes ("type" in MASM standards).
210    unsigned Type = 0;
211  
212    FieldInitializer Contents;
213  
FieldInfo__anon60b61cd60111::FieldInfo214    FieldInfo(FieldType FT) : Contents(FT) {}
215  };
216  
StructFieldInfo(std::vector<StructInitializer> V,StructInfo S)217  StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
218                                   StructInfo S) {
219    Initializers = std::move(V);
220    Structure = S;
221  }
222  
StructInfo(StringRef StructName,bool Union,unsigned AlignmentValue)223  StructInfo::StructInfo(StringRef StructName, bool Union,
224                         unsigned AlignmentValue)
225      : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
226  
addField(StringRef FieldName,FieldType FT,unsigned FieldAlignmentSize)227  FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
228                                  unsigned FieldAlignmentSize) {
229    if (!FieldName.empty())
230      FieldsByName[FieldName.lower()] = Fields.size();
231    Fields.emplace_back(FT);
232    FieldInfo &Field = Fields.back();
233    Field.Offset =
234        llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
235    if (!IsUnion) {
236      NextOffset = std::max(NextOffset, Field.Offset);
237    }
238    AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
239    return Field;
240  }
241  
~FieldInitializer()242  FieldInitializer::~FieldInitializer() {
243    switch (FT) {
244    case FT_INTEGRAL:
245      IntInfo.~IntFieldInfo();
246      break;
247    case FT_REAL:
248      RealInfo.~RealFieldInfo();
249      break;
250    case FT_STRUCT:
251      StructInfo.~StructFieldInfo();
252      break;
253    }
254  }
255  
FieldInitializer(FieldType FT)256  FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
257    switch (FT) {
258    case FT_INTEGRAL:
259      new (&IntInfo) IntFieldInfo();
260      break;
261    case FT_REAL:
262      new (&RealInfo) RealFieldInfo();
263      break;
264    case FT_STRUCT:
265      new (&StructInfo) StructFieldInfo();
266      break;
267    }
268  }
269  
FieldInitializer(SmallVector<const MCExpr *,1> && Values)270  FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
271      : FT(FT_INTEGRAL) {
272    new (&IntInfo) IntFieldInfo(std::move(Values));
273  }
274  
FieldInitializer(SmallVector<APInt,1> && AsIntValues)275  FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
276      : FT(FT_REAL) {
277    new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
278  }
279  
FieldInitializer(std::vector<StructInitializer> && Initializers,struct StructInfo Structure)280  FieldInitializer::FieldInitializer(
281      std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
282      : FT(FT_STRUCT) {
283    new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
284  }
285  
FieldInitializer(const FieldInitializer & Initializer)286  FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
287      : FT(Initializer.FT) {
288    switch (FT) {
289    case FT_INTEGRAL:
290      new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
291      break;
292    case FT_REAL:
293      new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
294      break;
295    case FT_STRUCT:
296      new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
297      break;
298    }
299  }
300  
FieldInitializer(FieldInitializer && Initializer)301  FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
302      : FT(Initializer.FT) {
303    switch (FT) {
304    case FT_INTEGRAL:
305      new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
306      break;
307    case FT_REAL:
308      new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
309      break;
310    case FT_STRUCT:
311      new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
312      break;
313    }
314  }
315  
316  FieldInitializer &
operator =(const FieldInitializer & Initializer)317  FieldInitializer::operator=(const FieldInitializer &Initializer) {
318    if (FT != Initializer.FT) {
319      switch (FT) {
320      case FT_INTEGRAL:
321        IntInfo.~IntFieldInfo();
322        break;
323      case FT_REAL:
324        RealInfo.~RealFieldInfo();
325        break;
326      case FT_STRUCT:
327        StructInfo.~StructFieldInfo();
328        break;
329      }
330    }
331    FT = Initializer.FT;
332    switch (FT) {
333    case FT_INTEGRAL:
334      IntInfo = Initializer.IntInfo;
335      break;
336    case FT_REAL:
337      RealInfo = Initializer.RealInfo;
338      break;
339    case FT_STRUCT:
340      StructInfo = Initializer.StructInfo;
341      break;
342    }
343    return *this;
344  }
345  
operator =(FieldInitializer && Initializer)346  FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
347    if (FT != Initializer.FT) {
348      switch (FT) {
349      case FT_INTEGRAL:
350        IntInfo.~IntFieldInfo();
351        break;
352      case FT_REAL:
353        RealInfo.~RealFieldInfo();
354        break;
355      case FT_STRUCT:
356        StructInfo.~StructFieldInfo();
357        break;
358      }
359    }
360    FT = Initializer.FT;
361    switch (FT) {
362    case FT_INTEGRAL:
363      IntInfo = Initializer.IntInfo;
364      break;
365    case FT_REAL:
366      RealInfo = Initializer.RealInfo;
367      break;
368    case FT_STRUCT:
369      StructInfo = Initializer.StructInfo;
370      break;
371    }
372    return *this;
373  }
374  
375  /// The concrete assembly parser instance.
376  // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
377  // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
378  class MasmParser : public MCAsmParser {
379  private:
380    AsmLexer Lexer;
381    MCContext &Ctx;
382    MCStreamer &Out;
383    const MCAsmInfo &MAI;
384    SourceMgr &SrcMgr;
385    SourceMgr::DiagHandlerTy SavedDiagHandler;
386    void *SavedDiagContext;
387    std::unique_ptr<MCAsmParserExtension> PlatformParser;
388  
389    /// This is the current buffer index we're lexing from as managed by the
390    /// SourceMgr object.
391    unsigned CurBuffer;
392  
393    /// time of assembly
394    struct tm TM;
395  
396    BitVector EndStatementAtEOFStack;
397  
398    AsmCond TheCondState;
399    std::vector<AsmCond> TheCondStack;
400  
401    /// maps directive names to handler methods in parser
402    /// extensions. Extensions register themselves in this map by calling
403    /// addDirectiveHandler.
404    StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
405  
406    /// maps assembly-time variable names to variables.
407    struct Variable {
408      enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
409  
410      StringRef Name;
411      RedefinableKind Redefinable = REDEFINABLE;
412      bool IsText = false;
413      std::string TextValue;
414    };
415    StringMap<Variable> Variables;
416  
417    /// Stack of active struct definitions.
418    SmallVector<StructInfo, 1> StructInProgress;
419  
420    /// Maps struct tags to struct definitions.
421    StringMap<StructInfo> Structs;
422  
423    /// Maps data location names to types.
424    StringMap<AsmTypeInfo> KnownType;
425  
426    /// Stack of active macro instantiations.
427    std::vector<MacroInstantiation*> ActiveMacros;
428  
429    /// List of bodies of anonymous macros.
430    std::deque<MCAsmMacro> MacroLikeBodies;
431  
432    /// Keeps track of how many .macro's have been instantiated.
433    unsigned NumOfMacroInstantiations;
434  
435    /// The values from the last parsed cpp hash file line comment if any.
436    struct CppHashInfoTy {
437      StringRef Filename;
438      int64_t LineNumber;
439      SMLoc Loc;
440      unsigned Buf;
CppHashInfoTy__anon60b61cd60111::MasmParser::CppHashInfoTy441      CppHashInfoTy() : LineNumber(0), Buf(0) {}
442    };
443    CppHashInfoTy CppHashInfo;
444  
445    /// The filename from the first cpp hash file line comment, if any.
446    StringRef FirstCppHashFilename;
447  
448    /// List of forward directional labels for diagnosis at the end.
449    SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
450  
451    /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
452    /// Defaults to 1U, meaning Intel.
453    unsigned AssemblerDialect = 1U;
454  
455    /// is Darwin compatibility enabled?
456    bool IsDarwin = false;
457  
458    /// Are we parsing ms-style inline assembly?
459    bool ParsingMSInlineAsm = false;
460  
461    /// Did we already inform the user about inconsistent MD5 usage?
462    bool ReportedInconsistentMD5 = false;
463  
464    // Current <...> expression depth.
465    unsigned AngleBracketDepth = 0U;
466  
467    // Number of locals defined.
468    uint16_t LocalCounter = 0;
469  
470  public:
471    MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
472               const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
473    MasmParser(const MasmParser &) = delete;
474    MasmParser &operator=(const MasmParser &) = delete;
475    ~MasmParser() override;
476  
477    bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
478  
addDirectiveHandler(StringRef Directive,ExtensionDirectiveHandler Handler)479    void addDirectiveHandler(StringRef Directive,
480                             ExtensionDirectiveHandler Handler) override {
481      ExtensionDirectiveMap[Directive] = Handler;
482      if (!DirectiveKindMap.contains(Directive)) {
483        DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
484      }
485    }
486  
addAliasForDirective(StringRef Directive,StringRef Alias)487    void addAliasForDirective(StringRef Directive, StringRef Alias) override {
488      DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
489    }
490  
491    /// @name MCAsmParser Interface
492    /// {
493  
getSourceManager()494    SourceMgr &getSourceManager() override { return SrcMgr; }
getLexer()495    MCAsmLexer &getLexer() override { return Lexer; }
getContext()496    MCContext &getContext() override { return Ctx; }
getStreamer()497    MCStreamer &getStreamer() override { return Out; }
498  
getCVContext()499    CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
500  
getAssemblerDialect()501    unsigned getAssemblerDialect() override {
502      if (AssemblerDialect == ~0U)
503        return MAI.getAssemblerDialect();
504      else
505        return AssemblerDialect;
506    }
setAssemblerDialect(unsigned i)507    void setAssemblerDialect(unsigned i) override {
508      AssemblerDialect = i;
509    }
510  
511    void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
512    bool Warning(SMLoc L, const Twine &Msg,
513                 SMRange Range = std::nullopt) override;
514    bool printError(SMLoc L, const Twine &Msg,
515                    SMRange Range = std::nullopt) override;
516  
517    enum ExpandKind { ExpandMacros, DoNotExpandMacros };
518    const AsmToken &Lex(ExpandKind ExpandNextToken);
Lex()519    const AsmToken &Lex() override { return Lex(ExpandMacros); }
520  
setParsingMSInlineAsm(bool V)521    void setParsingMSInlineAsm(bool V) override {
522      ParsingMSInlineAsm = V;
523      // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
524      // hex integer literals.
525      Lexer.setLexMasmIntegers(V);
526    }
isParsingMSInlineAsm()527    bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
528  
isParsingMasm() const529    bool isParsingMasm() const override { return true; }
530  
531    bool defineMacro(StringRef Name, StringRef Value) override;
532  
533    bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
534    bool lookUpField(StringRef Base, StringRef Member,
535                     AsmFieldInfo &Info) const override;
536  
537    bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
538  
539    bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
540                          unsigned &NumInputs,
541                          SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
542                          SmallVectorImpl<std::string> &Constraints,
543                          SmallVectorImpl<std::string> &Clobbers,
544                          const MCInstrInfo *MII, const MCInstPrinter *IP,
545                          MCAsmParserSemaCallback &SI) override;
546  
547    bool parseExpression(const MCExpr *&Res);
548    bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
549    bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
550                          AsmTypeInfo *TypeInfo) override;
551    bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
552    bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
553                               SMLoc &EndLoc) override;
554    bool parseAbsoluteExpression(int64_t &Res) override;
555  
556    /// Parse a floating point expression using the float \p Semantics
557    /// and set \p Res to the value.
558    bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
559  
560    /// Parse an identifier or string (as a quoted identifier)
561    /// and set \p Res to the identifier contents.
562    enum IdentifierPositionKind { StandardPosition, StartOfStatement };
563    bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
parseIdentifier(StringRef & Res)564    bool parseIdentifier(StringRef &Res) override {
565      return parseIdentifier(Res, StandardPosition);
566    }
567    void eatToEndOfStatement() override;
568  
569    bool checkForValidSection() override;
570  
571    /// }
572  
573  private:
574    bool expandMacros();
575    const AsmToken peekTok(bool ShouldSkipSpace = true);
576  
577    bool parseStatement(ParseStatementInfo &Info,
578                        MCAsmParserSemaCallback *SI);
579    bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
580    bool parseCppHashLineFilenameComment(SMLoc L);
581  
582    bool expandMacro(raw_svector_ostream &OS, StringRef Body,
583                     ArrayRef<MCAsmMacroParameter> Parameters,
584                     ArrayRef<MCAsmMacroArgument> A,
585                     const std::vector<std::string> &Locals, SMLoc L);
586  
587    /// Are we inside a macro instantiation?
isInsideMacroInstantiation()588    bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
589  
590    /// Handle entry to macro instantiation.
591    ///
592    /// \param M The macro.
593    /// \param NameLoc Instantiation location.
594    bool handleMacroEntry(
595        const MCAsmMacro *M, SMLoc NameLoc,
596        AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
597  
598    /// Handle invocation of macro function.
599    ///
600    /// \param M The macro.
601    /// \param NameLoc Invocation location.
602    bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
603  
604    /// Handle exit from macro instantiation.
605    void handleMacroExit();
606  
607    /// Extract AsmTokens for a macro argument.
608    bool
609    parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
610                       AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
611  
612    /// Parse all macro arguments for a given macro.
613    bool
614    parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
615                        AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
616  
617    void printMacroInstantiations();
618  
619    bool expandStatement(SMLoc Loc);
620  
printMessage(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Msg,SMRange Range=std::nullopt) const621    void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
622                      SMRange Range = std::nullopt) const {
623      ArrayRef<SMRange> Ranges(Range);
624      SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
625    }
626    static void DiagHandler(const SMDiagnostic &Diag, void *Context);
627  
628    bool lookUpField(const StructInfo &Structure, StringRef Member,
629                     AsmFieldInfo &Info) const;
630  
631    /// Should we emit DWARF describing this assembler source?  (Returns false if
632    /// the source has .file directives, which means we don't want to generate
633    /// info describing the assembler source itself.)
634    bool enabledGenDwarfForAssembly();
635  
636    /// Enter the specified file. This returns true on failure.
637    bool enterIncludeFile(const std::string &Filename);
638  
639    /// Reset the current lexer position to that given by \p Loc. The
640    /// current token is not set; clients should ensure Lex() is called
641    /// subsequently.
642    ///
643    /// \param InBuffer If not 0, should be the known buffer id that contains the
644    /// location.
645    void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
646                   bool EndStatementAtEOF = true);
647  
648    /// Parse up to a token of kind \p EndTok and return the contents from the
649    /// current token up to (but not including) this token; the current token on
650    /// exit will be either this kind or EOF. Reads through instantiated macro
651    /// functions and text macros.
652    SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
653    std::string parseStringTo(AsmToken::TokenKind EndTok);
654  
655    /// Parse up to the end of statement and return the contents from the current
656    /// token until the end of the statement; the current token on exit will be
657    /// either the EndOfStatement or EOF.
658    StringRef parseStringToEndOfStatement() override;
659  
660    bool parseTextItem(std::string &Data);
661  
662    unsigned getBinOpPrecedence(AsmToken::TokenKind K,
663                                MCBinaryExpr::Opcode &Kind);
664  
665    bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
666    bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
667    bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
668  
669    bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
670  
671    bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
672    bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
673  
674    // Generic (target and platform independent) directive parsing.
675    enum DirectiveKind {
676      DK_NO_DIRECTIVE, // Placeholder
677      DK_HANDLER_DIRECTIVE,
678      DK_ASSIGN,
679      DK_EQU,
680      DK_TEXTEQU,
681      DK_ASCII,
682      DK_ASCIZ,
683      DK_STRING,
684      DK_BYTE,
685      DK_SBYTE,
686      DK_WORD,
687      DK_SWORD,
688      DK_DWORD,
689      DK_SDWORD,
690      DK_FWORD,
691      DK_QWORD,
692      DK_SQWORD,
693      DK_DB,
694      DK_DD,
695      DK_DF,
696      DK_DQ,
697      DK_DW,
698      DK_REAL4,
699      DK_REAL8,
700      DK_REAL10,
701      DK_ALIGN,
702      DK_EVEN,
703      DK_ORG,
704      DK_ENDR,
705      DK_EXTERN,
706      DK_PUBLIC,
707      DK_COMM,
708      DK_COMMENT,
709      DK_INCLUDE,
710      DK_REPEAT,
711      DK_WHILE,
712      DK_FOR,
713      DK_FORC,
714      DK_IF,
715      DK_IFE,
716      DK_IFB,
717      DK_IFNB,
718      DK_IFDEF,
719      DK_IFNDEF,
720      DK_IFDIF,
721      DK_IFDIFI,
722      DK_IFIDN,
723      DK_IFIDNI,
724      DK_ELSEIF,
725      DK_ELSEIFE,
726      DK_ELSEIFB,
727      DK_ELSEIFNB,
728      DK_ELSEIFDEF,
729      DK_ELSEIFNDEF,
730      DK_ELSEIFDIF,
731      DK_ELSEIFDIFI,
732      DK_ELSEIFIDN,
733      DK_ELSEIFIDNI,
734      DK_ELSE,
735      DK_ENDIF,
736      DK_FILE,
737      DK_LINE,
738      DK_LOC,
739      DK_STABS,
740      DK_CV_FILE,
741      DK_CV_FUNC_ID,
742      DK_CV_INLINE_SITE_ID,
743      DK_CV_LOC,
744      DK_CV_LINETABLE,
745      DK_CV_INLINE_LINETABLE,
746      DK_CV_DEF_RANGE,
747      DK_CV_STRINGTABLE,
748      DK_CV_STRING,
749      DK_CV_FILECHECKSUMS,
750      DK_CV_FILECHECKSUM_OFFSET,
751      DK_CV_FPO_DATA,
752      DK_CFI_SECTIONS,
753      DK_CFI_STARTPROC,
754      DK_CFI_ENDPROC,
755      DK_CFI_DEF_CFA,
756      DK_CFI_DEF_CFA_OFFSET,
757      DK_CFI_ADJUST_CFA_OFFSET,
758      DK_CFI_DEF_CFA_REGISTER,
759      DK_CFI_OFFSET,
760      DK_CFI_REL_OFFSET,
761      DK_CFI_PERSONALITY,
762      DK_CFI_LSDA,
763      DK_CFI_REMEMBER_STATE,
764      DK_CFI_RESTORE_STATE,
765      DK_CFI_SAME_VALUE,
766      DK_CFI_RESTORE,
767      DK_CFI_ESCAPE,
768      DK_CFI_RETURN_COLUMN,
769      DK_CFI_SIGNAL_FRAME,
770      DK_CFI_UNDEFINED,
771      DK_CFI_REGISTER,
772      DK_CFI_WINDOW_SAVE,
773      DK_CFI_B_KEY_FRAME,
774      DK_MACRO,
775      DK_EXITM,
776      DK_ENDM,
777      DK_PURGE,
778      DK_ERR,
779      DK_ERRB,
780      DK_ERRNB,
781      DK_ERRDEF,
782      DK_ERRNDEF,
783      DK_ERRDIF,
784      DK_ERRDIFI,
785      DK_ERRIDN,
786      DK_ERRIDNI,
787      DK_ERRE,
788      DK_ERRNZ,
789      DK_ECHO,
790      DK_STRUCT,
791      DK_UNION,
792      DK_ENDS,
793      DK_END,
794      DK_PUSHFRAME,
795      DK_PUSHREG,
796      DK_SAVEREG,
797      DK_SAVEXMM128,
798      DK_SETFRAME,
799      DK_RADIX,
800    };
801  
802    /// Maps directive name --> DirectiveKind enum, for directives parsed by this
803    /// class.
804    StringMap<DirectiveKind> DirectiveKindMap;
805  
806    bool isMacroLikeDirective();
807  
808    // Codeview def_range type parsing.
809    enum CVDefRangeType {
810      CVDR_DEFRANGE = 0, // Placeholder
811      CVDR_DEFRANGE_REGISTER,
812      CVDR_DEFRANGE_FRAMEPOINTER_REL,
813      CVDR_DEFRANGE_SUBFIELD_REGISTER,
814      CVDR_DEFRANGE_REGISTER_REL
815    };
816  
817    /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
818    /// def_range types parsed by this class.
819    StringMap<CVDefRangeType> CVDefRangeTypeMap;
820  
821    // Generic (target and platform independent) directive parsing.
822    enum BuiltinSymbol {
823      BI_NO_SYMBOL, // Placeholder
824      BI_DATE,
825      BI_TIME,
826      BI_VERSION,
827      BI_FILECUR,
828      BI_FILENAME,
829      BI_LINE,
830      BI_CURSEG,
831      BI_CPU,
832      BI_INTERFACE,
833      BI_CODE,
834      BI_DATA,
835      BI_FARDATA,
836      BI_WORDSIZE,
837      BI_CODESIZE,
838      BI_DATASIZE,
839      BI_MODEL,
840      BI_STACK,
841    };
842  
843    /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
844    /// class.
845    StringMap<BuiltinSymbol> BuiltinSymbolMap;
846  
847    const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
848  
849    std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
850                                                        SMLoc StartLoc);
851  
852    // ".ascii", ".asciz", ".string"
853    bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
854  
855    // "byte", "word", ...
856    bool emitIntValue(const MCExpr *Value, unsigned Size);
857    bool parseScalarInitializer(unsigned Size,
858                                SmallVectorImpl<const MCExpr *> &Values,
859                                unsigned StringPadLength = 0);
860    bool parseScalarInstList(
861        unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
862        const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
863    bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
864    bool addIntegralField(StringRef Name, unsigned Size);
865    bool parseDirectiveValue(StringRef IDVal, unsigned Size);
866    bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
867                                  StringRef Name, SMLoc NameLoc);
868  
869    // "real4", "real8", "real10"
870    bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
871    bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
872    bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
873                                 size_t Size);
874    bool parseRealInstList(
875        const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
876        const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
877    bool parseDirectiveNamedRealValue(StringRef TypeName,
878                                      const fltSemantics &Semantics,
879                                      unsigned Size, StringRef Name,
880                                      SMLoc NameLoc);
881  
882    bool parseOptionalAngleBracketOpen();
883    bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
884  
885    bool parseFieldInitializer(const FieldInfo &Field,
886                               FieldInitializer &Initializer);
887    bool parseFieldInitializer(const FieldInfo &Field,
888                               const IntFieldInfo &Contents,
889                               FieldInitializer &Initializer);
890    bool parseFieldInitializer(const FieldInfo &Field,
891                               const RealFieldInfo &Contents,
892                               FieldInitializer &Initializer);
893    bool parseFieldInitializer(const FieldInfo &Field,
894                               const StructFieldInfo &Contents,
895                               FieldInitializer &Initializer);
896  
897    bool parseStructInitializer(const StructInfo &Structure,
898                                StructInitializer &Initializer);
899    bool parseStructInstList(
900        const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
901        const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
902  
903    bool emitFieldValue(const FieldInfo &Field);
904    bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
905    bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
906    bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
907  
908    bool emitFieldInitializer(const FieldInfo &Field,
909                              const FieldInitializer &Initializer);
910    bool emitFieldInitializer(const FieldInfo &Field,
911                              const IntFieldInfo &Contents,
912                              const IntFieldInfo &Initializer);
913    bool emitFieldInitializer(const FieldInfo &Field,
914                              const RealFieldInfo &Contents,
915                              const RealFieldInfo &Initializer);
916    bool emitFieldInitializer(const FieldInfo &Field,
917                              const StructFieldInfo &Contents,
918                              const StructFieldInfo &Initializer);
919  
920    bool emitStructInitializer(const StructInfo &Structure,
921                               const StructInitializer &Initializer);
922  
923    // User-defined types (structs, unions):
924    bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
925    bool addStructField(StringRef Name, const StructInfo &Structure);
926    bool parseDirectiveStructValue(const StructInfo &Structure,
927                                   StringRef Directive, SMLoc DirLoc);
928    bool parseDirectiveNamedStructValue(const StructInfo &Structure,
929                                        StringRef Directive, SMLoc DirLoc,
930                                        StringRef Name);
931  
932    // "=", "equ", "textequ"
933    bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
934                              DirectiveKind DirKind, SMLoc NameLoc);
935  
936    bool parseDirectiveOrg(); // "org"
937  
938    bool emitAlignTo(int64_t Alignment);
939    bool parseDirectiveAlign();  // "align"
940    bool parseDirectiveEven();   // "even"
941  
942    // ".file", ".line", ".loc", ".stabs"
943    bool parseDirectiveFile(SMLoc DirectiveLoc);
944    bool parseDirectiveLine();
945    bool parseDirectiveLoc();
946    bool parseDirectiveStabs();
947  
948    // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
949    // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
950    bool parseDirectiveCVFile();
951    bool parseDirectiveCVFuncId();
952    bool parseDirectiveCVInlineSiteId();
953    bool parseDirectiveCVLoc();
954    bool parseDirectiveCVLinetable();
955    bool parseDirectiveCVInlineLinetable();
956    bool parseDirectiveCVDefRange();
957    bool parseDirectiveCVString();
958    bool parseDirectiveCVStringTable();
959    bool parseDirectiveCVFileChecksums();
960    bool parseDirectiveCVFileChecksumOffset();
961    bool parseDirectiveCVFPOData();
962  
963    // .cfi directives
964    bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
965    bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
966    bool parseDirectiveCFISections();
967    bool parseDirectiveCFIStartProc();
968    bool parseDirectiveCFIEndProc();
969    bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
970    bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
971    bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
972    bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
973    bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
974    bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
975    bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
976    bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
977    bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
978    bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
979    bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
980    bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
981    bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
982    bool parseDirectiveCFISignalFrame();
983    bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
984  
985    // macro directives
986    bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
987    bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
988                                 std::string &Value);
989    bool parseDirectiveEndMacro(StringRef Directive);
990    bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
991  
992    bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
993                              StringRef Name, SMLoc NameLoc);
994    bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
995    bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
996    bool parseDirectiveNestedEnds();
997  
998    bool parseDirectiveExtern();
999  
1000    /// Parse a directive like ".globl" which accepts a single symbol (which
1001    /// should be a label or an external).
1002    bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1003  
1004    bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1005  
1006    bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1007  
1008    bool parseDirectiveInclude(); // "include"
1009  
1010    // "if" or "ife"
1011    bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1012    // "ifb" or "ifnb", depending on ExpectBlank.
1013    bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1014    // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1015    // CaseInsensitive.
1016    bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1017                             bool CaseInsensitive);
1018    // "ifdef" or "ifndef", depending on expect_defined
1019    bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1020    // "elseif" or "elseife"
1021    bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1022    // "elseifb" or "elseifnb", depending on ExpectBlank.
1023    bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1024    // ".elseifdef" or ".elseifndef", depending on expect_defined
1025    bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1026    // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1027    // ExpectEqual and CaseInsensitive.
1028    bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1029                                 bool CaseInsensitive);
1030    bool parseDirectiveElse(SMLoc DirectiveLoc);   // "else"
1031    bool parseDirectiveEndIf(SMLoc DirectiveLoc);  // "endif"
1032    bool parseEscapedString(std::string &Data) override;
1033    bool parseAngleBracketString(std::string &Data) override;
1034  
1035    // Macro-like directives
1036    MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1037    void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1038                                  raw_svector_ostream &OS);
1039    void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1040                                  SMLoc ExitLoc, raw_svector_ostream &OS);
1041    bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1042    bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1043    bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1044    bool parseDirectiveWhile(SMLoc DirectiveLoc);
1045  
1046    // "_emit" or "__emit"
1047    bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1048                              size_t Len);
1049  
1050    // "align"
1051    bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1052  
1053    // "end"
1054    bool parseDirectiveEnd(SMLoc DirectiveLoc);
1055  
1056    // ".err"
1057    bool parseDirectiveError(SMLoc DirectiveLoc);
1058    // ".errb" or ".errnb", depending on ExpectBlank.
1059    bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1060    // ".errdef" or ".errndef", depending on ExpectBlank.
1061    bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1062    // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1063    // and CaseInsensitive.
1064    bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1065                                  bool CaseInsensitive);
1066    // ".erre" or ".errnz", depending on ExpectZero.
1067    bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1068  
1069    // ".radix"
1070    bool parseDirectiveRadix(SMLoc DirectiveLoc);
1071  
1072    // "echo"
1073    bool parseDirectiveEcho(SMLoc DirectiveLoc);
1074  
1075    void initializeDirectiveKindMap();
1076    void initializeCVDefRangeTypeMap();
1077    void initializeBuiltinSymbolMap();
1078  };
1079  
1080  } // end anonymous namespace
1081  
1082  namespace llvm {
1083  
1084  extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
1085  
1086  extern MCAsmParserExtension *createCOFFMasmParser();
1087  
1088  } // end namespace llvm
1089  
1090  enum { DEFAULT_ADDRSPACE = 0 };
1091  
MasmParser(SourceMgr & SM,MCContext & Ctx,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)1092  MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1093                         const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1094      : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1095        CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1096    HadError = false;
1097    // Save the old handler.
1098    SavedDiagHandler = SrcMgr.getDiagHandler();
1099    SavedDiagContext = SrcMgr.getDiagContext();
1100    // Set our own handler which calls the saved handler.
1101    SrcMgr.setDiagHandler(DiagHandler, this);
1102    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1103    EndStatementAtEOFStack.push_back(true);
1104  
1105    // Initialize the platform / file format parser.
1106    switch (Ctx.getObjectFileType()) {
1107    case MCContext::IsCOFF:
1108      PlatformParser.reset(createCOFFMasmParser());
1109      break;
1110    default:
1111      report_fatal_error("llvm-ml currently supports only COFF output.");
1112      break;
1113    }
1114  
1115    initializeDirectiveKindMap();
1116    PlatformParser->Initialize(*this);
1117    initializeCVDefRangeTypeMap();
1118    initializeBuiltinSymbolMap();
1119  
1120    NumOfMacroInstantiations = 0;
1121  }
1122  
~MasmParser()1123  MasmParser::~MasmParser() {
1124    assert((HadError || ActiveMacros.empty()) &&
1125           "Unexpected active macro instantiation!");
1126  
1127    // Restore the saved diagnostics handler and context for use during
1128    // finalization.
1129    SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1130  }
1131  
printMacroInstantiations()1132  void MasmParser::printMacroInstantiations() {
1133    // Print the active macro instantiation stack.
1134    for (std::vector<MacroInstantiation *>::const_reverse_iterator
1135             it = ActiveMacros.rbegin(),
1136             ie = ActiveMacros.rend();
1137         it != ie; ++it)
1138      printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1139                   "while in macro instantiation");
1140  }
1141  
Note(SMLoc L,const Twine & Msg,SMRange Range)1142  void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1143    printPendingErrors();
1144    printMessage(L, SourceMgr::DK_Note, Msg, Range);
1145    printMacroInstantiations();
1146  }
1147  
Warning(SMLoc L,const Twine & Msg,SMRange Range)1148  bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1149    if (getTargetParser().getTargetOptions().MCNoWarn)
1150      return false;
1151    if (getTargetParser().getTargetOptions().MCFatalWarnings)
1152      return Error(L, Msg, Range);
1153    printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1154    printMacroInstantiations();
1155    return false;
1156  }
1157  
printError(SMLoc L,const Twine & Msg,SMRange Range)1158  bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1159    HadError = true;
1160    printMessage(L, SourceMgr::DK_Error, Msg, Range);
1161    printMacroInstantiations();
1162    return true;
1163  }
1164  
enterIncludeFile(const std::string & Filename)1165  bool MasmParser::enterIncludeFile(const std::string &Filename) {
1166    std::string IncludedFile;
1167    unsigned NewBuf =
1168        SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1169    if (!NewBuf)
1170      return true;
1171  
1172    CurBuffer = NewBuf;
1173    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1174    EndStatementAtEOFStack.push_back(true);
1175    return false;
1176  }
1177  
jumpToLoc(SMLoc Loc,unsigned InBuffer,bool EndStatementAtEOF)1178  void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1179                             bool EndStatementAtEOF) {
1180    CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1181    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1182                    Loc.getPointer(), EndStatementAtEOF);
1183  }
1184  
expandMacros()1185  bool MasmParser::expandMacros() {
1186    const AsmToken &Tok = getTok();
1187    const std::string IDLower = Tok.getIdentifier().lower();
1188  
1189    const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1190    if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1191      // This is a macro function invocation; expand it in place.
1192      const SMLoc MacroLoc = Tok.getLoc();
1193      const StringRef MacroId = Tok.getIdentifier();
1194      Lexer.Lex();
1195      if (handleMacroInvocation(M, MacroLoc)) {
1196        Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1197        Lexer.Lex();
1198      }
1199      return false;
1200    }
1201  
1202    std::optional<std::string> ExpandedValue;
1203    auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1204    if (BuiltinIt != BuiltinSymbolMap.end()) {
1205      ExpandedValue =
1206          evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1207    } else {
1208      auto VarIt = Variables.find(IDLower);
1209      if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1210        ExpandedValue = VarIt->getValue().TextValue;
1211      }
1212    }
1213  
1214    if (!ExpandedValue)
1215      return true;
1216    std::unique_ptr<MemoryBuffer> Instantiation =
1217        MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1218  
1219    // Jump to the macro instantiation and prime the lexer.
1220    CurBuffer =
1221        SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1222    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1223                    /*EndStatementAtEOF=*/false);
1224    EndStatementAtEOFStack.push_back(false);
1225    Lexer.Lex();
1226    return false;
1227  }
1228  
Lex(ExpandKind ExpandNextToken)1229  const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1230    if (Lexer.getTok().is(AsmToken::Error))
1231      Error(Lexer.getErrLoc(), Lexer.getErr());
1232  
1233    // if it's a end of statement with a comment in it
1234    if (getTok().is(AsmToken::EndOfStatement)) {
1235      // if this is a line comment output it.
1236      if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1237          getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1238        Out.addExplicitComment(Twine(getTok().getString()));
1239    }
1240  
1241    const AsmToken *tok = &Lexer.Lex();
1242    bool StartOfStatement = Lexer.isAtStartOfStatement();
1243  
1244    while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1245      if (StartOfStatement) {
1246        AsmToken NextTok;
1247        MutableArrayRef<AsmToken> Buf(NextTok);
1248        size_t ReadCount = Lexer.peekTokens(Buf);
1249        if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1250            (NextTok.getString().equals_insensitive("equ") ||
1251             NextTok.getString().equals_insensitive("textequ"))) {
1252          // This looks like an EQU or TEXTEQU directive; don't expand the
1253          // identifier, allowing for redefinitions.
1254          break;
1255        }
1256      }
1257      if (expandMacros())
1258        break;
1259    }
1260  
1261    // Parse comments here to be deferred until end of next statement.
1262    while (tok->is(AsmToken::Comment)) {
1263      if (MAI.preserveAsmComments())
1264        Out.addExplicitComment(Twine(tok->getString()));
1265      tok = &Lexer.Lex();
1266    }
1267  
1268    // Recognize and bypass line continuations.
1269    while (tok->is(AsmToken::BackSlash) &&
1270           peekTok().is(AsmToken::EndOfStatement)) {
1271      // Eat both the backslash and the end of statement.
1272      Lexer.Lex();
1273      tok = &Lexer.Lex();
1274    }
1275  
1276    if (tok->is(AsmToken::Eof)) {
1277      // If this is the end of an included file, pop the parent file off the
1278      // include stack.
1279      SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1280      if (ParentIncludeLoc != SMLoc()) {
1281        EndStatementAtEOFStack.pop_back();
1282        jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1283        return Lex();
1284      }
1285      EndStatementAtEOFStack.pop_back();
1286      assert(EndStatementAtEOFStack.empty());
1287    }
1288  
1289    return *tok;
1290  }
1291  
peekTok(bool ShouldSkipSpace)1292  const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1293    AsmToken Tok;
1294  
1295    MutableArrayRef<AsmToken> Buf(Tok);
1296    size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1297  
1298    if (ReadCount == 0) {
1299      // If this is the end of an included file, pop the parent file off the
1300      // include stack.
1301      SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1302      if (ParentIncludeLoc != SMLoc()) {
1303        EndStatementAtEOFStack.pop_back();
1304        jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1305        return peekTok(ShouldSkipSpace);
1306      }
1307      EndStatementAtEOFStack.pop_back();
1308      assert(EndStatementAtEOFStack.empty());
1309    }
1310  
1311    assert(ReadCount == 1);
1312    return Tok;
1313  }
1314  
enabledGenDwarfForAssembly()1315  bool MasmParser::enabledGenDwarfForAssembly() {
1316    // Check whether the user specified -g.
1317    if (!getContext().getGenDwarfForAssembly())
1318      return false;
1319    // If we haven't encountered any .file directives (which would imply that
1320    // the assembler source was produced with debug info already) then emit one
1321    // describing the assembler source file itself.
1322    if (getContext().getGenDwarfFileNumber() == 0) {
1323      // Use the first #line directive for this, if any. It's preprocessed, so
1324      // there is no checksum, and of course no source directive.
1325      if (!FirstCppHashFilename.empty())
1326        getContext().setMCLineTableRootFile(
1327            /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename,
1328            /*Cksum=*/std::nullopt, /*Source=*/std::nullopt);
1329      const MCDwarfFile &RootFile =
1330          getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1331      getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1332          /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1333          RootFile.Checksum, RootFile.Source));
1334    }
1335    return true;
1336  }
1337  
Run(bool NoInitialTextSection,bool NoFinalize)1338  bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1339    // Create the initial section, if requested.
1340    if (!NoInitialTextSection)
1341      Out.initSections(false, getTargetParser().getSTI());
1342  
1343    // Prime the lexer.
1344    Lex();
1345  
1346    HadError = false;
1347    AsmCond StartingCondState = TheCondState;
1348    SmallVector<AsmRewrite, 4> AsmStrRewrites;
1349  
1350    // If we are generating dwarf for assembly source files save the initial text
1351    // section.  (Don't use enabledGenDwarfForAssembly() here, as we aren't
1352    // emitting any actual debug info yet and haven't had a chance to parse any
1353    // embedded .file directives.)
1354    if (getContext().getGenDwarfForAssembly()) {
1355      MCSection *Sec = getStreamer().getCurrentSectionOnly();
1356      if (!Sec->getBeginSymbol()) {
1357        MCSymbol *SectionStartSym = getContext().createTempSymbol();
1358        getStreamer().emitLabel(SectionStartSym);
1359        Sec->setBeginSymbol(SectionStartSym);
1360      }
1361      bool InsertResult = getContext().addGenDwarfSection(Sec);
1362      assert(InsertResult && ".text section should not have debug info yet");
1363      (void)InsertResult;
1364    }
1365  
1366    getTargetParser().onBeginOfFile();
1367  
1368    // While we have input, parse each statement.
1369    while (Lexer.isNot(AsmToken::Eof) ||
1370           SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1371      // Skip through the EOF at the end of an inclusion.
1372      if (Lexer.is(AsmToken::Eof))
1373        Lex();
1374  
1375      ParseStatementInfo Info(&AsmStrRewrites);
1376      bool Parsed = parseStatement(Info, nullptr);
1377  
1378      // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1379      // for printing ErrMsg via Lex() only if no (presumably better) parser error
1380      // exists.
1381      if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1382        Lex();
1383      }
1384  
1385      // parseStatement returned true so may need to emit an error.
1386      printPendingErrors();
1387  
1388      // Skipping to the next line if needed.
1389      if (Parsed && !getLexer().isAtStartOfStatement())
1390        eatToEndOfStatement();
1391    }
1392  
1393    getTargetParser().onEndOfFile();
1394    printPendingErrors();
1395  
1396    // All errors should have been emitted.
1397    assert(!hasPendingError() && "unexpected error from parseStatement");
1398  
1399    getTargetParser().flushPendingInstructions(getStreamer());
1400  
1401    if (TheCondState.TheCond != StartingCondState.TheCond ||
1402        TheCondState.Ignore != StartingCondState.Ignore)
1403      printError(getTok().getLoc(), "unmatched .ifs or .elses");
1404    // Check to see there are no empty DwarfFile slots.
1405    const auto &LineTables = getContext().getMCDwarfLineTables();
1406    if (!LineTables.empty()) {
1407      unsigned Index = 0;
1408      for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1409        if (File.Name.empty() && Index != 0)
1410          printError(getTok().getLoc(), "unassigned file number: " +
1411                                            Twine(Index) +
1412                                            " for .file directives");
1413        ++Index;
1414      }
1415    }
1416  
1417    // Check to see that all assembler local symbols were actually defined.
1418    // Targets that don't do subsections via symbols may not want this, though,
1419    // so conservatively exclude them. Only do this if we're finalizing, though,
1420    // as otherwise we won't necessarilly have seen everything yet.
1421    if (!NoFinalize) {
1422      if (MAI.hasSubsectionsViaSymbols()) {
1423        for (const auto &TableEntry : getContext().getSymbols()) {
1424          MCSymbol *Sym = TableEntry.getValue().Symbol;
1425          // Variable symbols may not be marked as defined, so check those
1426          // explicitly. If we know it's a variable, we have a definition for
1427          // the purposes of this check.
1428          if (Sym && Sym->isTemporary() && !Sym->isVariable() &&
1429              !Sym->isDefined())
1430            // FIXME: We would really like to refer back to where the symbol was
1431            // first referenced for a source location. We need to add something
1432            // to track that. Currently, we just point to the end of the file.
1433            printError(getTok().getLoc(), "assembler local symbol '" +
1434                                              Sym->getName() + "' not defined");
1435        }
1436      }
1437  
1438      // Temporary symbols like the ones for directional jumps don't go in the
1439      // symbol table. They also need to be diagnosed in all (final) cases.
1440      for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1441        if (std::get<2>(LocSym)->isUndefined()) {
1442          // Reset the state of any "# line file" directives we've seen to the
1443          // context as it was at the diagnostic site.
1444          CppHashInfo = std::get<1>(LocSym);
1445          printError(std::get<0>(LocSym), "directional label undefined");
1446        }
1447      }
1448    }
1449  
1450    // Finalize the output stream if there are no errors and if the client wants
1451    // us to.
1452    if (!HadError && !NoFinalize)
1453      Out.finish(Lexer.getLoc());
1454  
1455    return HadError || getContext().hadError();
1456  }
1457  
checkForValidSection()1458  bool MasmParser::checkForValidSection() {
1459    if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1460      Out.initSections(false, getTargetParser().getSTI());
1461      return Error(getTok().getLoc(),
1462                   "expected section directive before assembly directive");
1463    }
1464    return false;
1465  }
1466  
1467  /// Throw away the rest of the line for testing purposes.
eatToEndOfStatement()1468  void MasmParser::eatToEndOfStatement() {
1469    while (Lexer.isNot(AsmToken::EndOfStatement)) {
1470      if (Lexer.is(AsmToken::Eof)) {
1471        SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1472        if (ParentIncludeLoc == SMLoc()) {
1473          break;
1474        }
1475  
1476        EndStatementAtEOFStack.pop_back();
1477        jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1478      }
1479  
1480      Lexer.Lex();
1481    }
1482  
1483    // Eat EOL.
1484    if (Lexer.is(AsmToken::EndOfStatement))
1485      Lexer.Lex();
1486  }
1487  
1488  SmallVector<StringRef, 1>
parseStringRefsTo(AsmToken::TokenKind EndTok)1489  MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1490    SmallVector<StringRef, 1> Refs;
1491    const char *Start = getTok().getLoc().getPointer();
1492    while (Lexer.isNot(EndTok)) {
1493      if (Lexer.is(AsmToken::Eof)) {
1494        SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1495        if (ParentIncludeLoc == SMLoc()) {
1496          break;
1497        }
1498        Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1499  
1500        EndStatementAtEOFStack.pop_back();
1501        jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1502        Lexer.Lex();
1503        Start = getTok().getLoc().getPointer();
1504      } else {
1505        Lexer.Lex();
1506      }
1507    }
1508    Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1509    return Refs;
1510  }
1511  
parseStringTo(AsmToken::TokenKind EndTok)1512  std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1513    SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1514    std::string Str;
1515    for (StringRef S : Refs) {
1516      Str.append(S.str());
1517    }
1518    return Str;
1519  }
1520  
parseStringToEndOfStatement()1521  StringRef MasmParser::parseStringToEndOfStatement() {
1522    const char *Start = getTok().getLoc().getPointer();
1523  
1524    while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1525      Lexer.Lex();
1526  
1527    const char *End = getTok().getLoc().getPointer();
1528    return StringRef(Start, End - Start);
1529  }
1530  
1531  /// Parse a paren expression and return it.
1532  /// NOTE: This assumes the leading '(' has already been consumed.
1533  ///
1534  /// parenexpr ::= expr)
1535  ///
parseParenExpr(const MCExpr * & Res,SMLoc & EndLoc)1536  bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1537    if (parseExpression(Res))
1538      return true;
1539    EndLoc = Lexer.getTok().getEndLoc();
1540    return parseRParen();
1541  }
1542  
1543  /// Parse a bracket expression and return it.
1544  /// NOTE: This assumes the leading '[' has already been consumed.
1545  ///
1546  /// bracketexpr ::= expr]
1547  ///
parseBracketExpr(const MCExpr * & Res,SMLoc & EndLoc)1548  bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1549    if (parseExpression(Res))
1550      return true;
1551    EndLoc = getTok().getEndLoc();
1552    if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1553      return true;
1554    return false;
1555  }
1556  
1557  /// Parse a primary expression and return it.
1558  ///  primaryexpr ::= (parenexpr
1559  ///  primaryexpr ::= symbol
1560  ///  primaryexpr ::= number
1561  ///  primaryexpr ::= '.'
1562  ///  primaryexpr ::= ~,+,-,'not' primaryexpr
1563  ///  primaryexpr ::= string
1564  ///          (a string is interpreted as a 64-bit number in big-endian base-256)
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc,AsmTypeInfo * TypeInfo)1565  bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1566                                    AsmTypeInfo *TypeInfo) {
1567    SMLoc FirstTokenLoc = getLexer().getLoc();
1568    AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1569    switch (FirstTokenKind) {
1570    default:
1571      return TokError("unknown token in expression");
1572    // If we have an error assume that we've already handled it.
1573    case AsmToken::Error:
1574      return true;
1575    case AsmToken::Exclaim:
1576      Lex(); // Eat the operator.
1577      if (parsePrimaryExpr(Res, EndLoc, nullptr))
1578        return true;
1579      Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1580      return false;
1581    case AsmToken::Dollar:
1582    case AsmToken::At:
1583    case AsmToken::Identifier: {
1584      StringRef Identifier;
1585      if (parseIdentifier(Identifier)) {
1586        // We may have failed but $ may be a valid token.
1587        if (getTok().is(AsmToken::Dollar)) {
1588          if (Lexer.getMAI().getDollarIsPC()) {
1589            Lex();
1590            // This is a '$' reference, which references the current PC.  Emit a
1591            // temporary label to the streamer and refer to it.
1592            MCSymbol *Sym = Ctx.createTempSymbol();
1593            Out.emitLabel(Sym);
1594            Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1595                                          getContext());
1596            EndLoc = FirstTokenLoc;
1597            return false;
1598          }
1599          return Error(FirstTokenLoc, "invalid token in expression");
1600        }
1601      }
1602      // Parse named bitwise negation.
1603      if (Identifier.equals_insensitive("not")) {
1604        if (parsePrimaryExpr(Res, EndLoc, nullptr))
1605          return true;
1606        Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1607        return false;
1608      }
1609      // Parse directional local label references.
1610      if (Identifier.equals_insensitive("@b") ||
1611          Identifier.equals_insensitive("@f")) {
1612        bool Before = Identifier.equals_insensitive("@b");
1613        MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1614        if (Before && Sym->isUndefined())
1615          return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1616        Res = MCSymbolRefExpr::create(Sym, getContext());
1617        return false;
1618      }
1619      // Parse symbol variant.
1620      std::pair<StringRef, StringRef> Split;
1621      if (!MAI.useParensForSymbolVariant()) {
1622        Split = Identifier.split('@');
1623      } else if (Lexer.is(AsmToken::LParen)) {
1624        Lex(); // eat '('.
1625        StringRef VName;
1626        parseIdentifier(VName);
1627        // eat ')'.
1628        if (parseToken(AsmToken::RParen,
1629                       "unexpected token in variant, expected ')'"))
1630          return true;
1631        Split = std::make_pair(Identifier, VName);
1632      }
1633  
1634      EndLoc = SMLoc::getFromPointer(Identifier.end());
1635  
1636      // This is a symbol reference.
1637      StringRef SymbolName = Identifier;
1638      if (SymbolName.empty())
1639        return Error(getLexer().getLoc(), "expected a symbol reference");
1640  
1641      MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1642  
1643      // Look up the symbol variant if used.
1644      if (!Split.second.empty()) {
1645        Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1646        if (Variant != MCSymbolRefExpr::VK_Invalid) {
1647          SymbolName = Split.first;
1648        } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1649          Variant = MCSymbolRefExpr::VK_None;
1650        } else {
1651          return Error(SMLoc::getFromPointer(Split.second.begin()),
1652                       "invalid variant '" + Split.second + "'");
1653        }
1654      }
1655  
1656      // Find the field offset if used.
1657      AsmFieldInfo Info;
1658      Split = SymbolName.split('.');
1659      if (Split.second.empty()) {
1660      } else {
1661        SymbolName = Split.first;
1662        if (lookUpField(SymbolName, Split.second, Info)) {
1663          std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1664          StringRef Base = BaseMember.first, Member = BaseMember.second;
1665          lookUpField(Base, Member, Info);
1666        } else if (Structs.count(SymbolName.lower())) {
1667          // This is actually a reference to a field offset.
1668          Res = MCConstantExpr::create(Info.Offset, getContext());
1669          return false;
1670        }
1671      }
1672  
1673      MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1674      if (!Sym) {
1675        // If this is a built-in numeric value, treat it as a constant.
1676        auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1677        const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1678                                         ? BI_NO_SYMBOL
1679                                         : BuiltinIt->getValue();
1680        if (Symbol != BI_NO_SYMBOL) {
1681          const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1682          if (Value) {
1683            Res = Value;
1684            return false;
1685          }
1686        }
1687  
1688        // Variables use case-insensitive symbol names; if this is a variable, we
1689        // find the symbol using its canonical name.
1690        auto VarIt = Variables.find(SymbolName.lower());
1691        if (VarIt != Variables.end())
1692          SymbolName = VarIt->second.Name;
1693        Sym = getContext().getOrCreateSymbol(SymbolName);
1694      }
1695  
1696      // If this is an absolute variable reference, substitute it now to preserve
1697      // semantics in the face of reassignment.
1698      if (Sym->isVariable()) {
1699        auto V = Sym->getVariableValue(/*SetUsed=*/false);
1700        bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1701        if (auto TV = dyn_cast<MCTargetExpr>(V))
1702          DoInline = TV->inlineAssignedExpr();
1703        if (DoInline) {
1704          if (Variant)
1705            return Error(EndLoc, "unexpected modifier on variable reference");
1706          Res = Sym->getVariableValue(/*SetUsed=*/false);
1707          return false;
1708        }
1709      }
1710  
1711      // Otherwise create a symbol ref.
1712      const MCExpr *SymRef =
1713          MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1714      if (Info.Offset) {
1715        Res = MCBinaryExpr::create(
1716            MCBinaryExpr::Add, SymRef,
1717            MCConstantExpr::create(Info.Offset, getContext()), getContext());
1718      } else {
1719        Res = SymRef;
1720      }
1721      if (TypeInfo) {
1722        if (Info.Type.Name.empty()) {
1723          auto TypeIt = KnownType.find(Identifier.lower());
1724          if (TypeIt != KnownType.end()) {
1725            Info.Type = TypeIt->second;
1726          }
1727        }
1728  
1729        *TypeInfo = Info.Type;
1730      }
1731      return false;
1732    }
1733    case AsmToken::BigNum:
1734      return TokError("literal value out of range for directive");
1735    case AsmToken::Integer: {
1736      int64_t IntVal = getTok().getIntVal();
1737      Res = MCConstantExpr::create(IntVal, getContext());
1738      EndLoc = Lexer.getTok().getEndLoc();
1739      Lex(); // Eat token.
1740      return false;
1741    }
1742    case AsmToken::String: {
1743      // MASM strings (used as constants) are interpreted as big-endian base-256.
1744      SMLoc ValueLoc = getTok().getLoc();
1745      std::string Value;
1746      if (parseEscapedString(Value))
1747        return true;
1748      if (Value.size() > 8)
1749        return Error(ValueLoc, "literal value out of range");
1750      uint64_t IntValue = 0;
1751      for (const unsigned char CharVal : Value)
1752        IntValue = (IntValue << 8) | CharVal;
1753      Res = MCConstantExpr::create(IntValue, getContext());
1754      return false;
1755    }
1756    case AsmToken::Real: {
1757      APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1758      uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1759      Res = MCConstantExpr::create(IntVal, getContext());
1760      EndLoc = Lexer.getTok().getEndLoc();
1761      Lex(); // Eat token.
1762      return false;
1763    }
1764    case AsmToken::Dot: {
1765      // This is a '.' reference, which references the current PC.  Emit a
1766      // temporary label to the streamer and refer to it.
1767      MCSymbol *Sym = Ctx.createTempSymbol();
1768      Out.emitLabel(Sym);
1769      Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1770      EndLoc = Lexer.getTok().getEndLoc();
1771      Lex(); // Eat identifier.
1772      return false;
1773    }
1774    case AsmToken::LParen:
1775      Lex(); // Eat the '('.
1776      return parseParenExpr(Res, EndLoc);
1777    case AsmToken::LBrac:
1778      if (!PlatformParser->HasBracketExpressions())
1779        return TokError("brackets expression not supported on this target");
1780      Lex(); // Eat the '['.
1781      return parseBracketExpr(Res, EndLoc);
1782    case AsmToken::Minus:
1783      Lex(); // Eat the operator.
1784      if (parsePrimaryExpr(Res, EndLoc, nullptr))
1785        return true;
1786      Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1787      return false;
1788    case AsmToken::Plus:
1789      Lex(); // Eat the operator.
1790      if (parsePrimaryExpr(Res, EndLoc, nullptr))
1791        return true;
1792      Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1793      return false;
1794    case AsmToken::Tilde:
1795      Lex(); // Eat the operator.
1796      if (parsePrimaryExpr(Res, EndLoc, nullptr))
1797        return true;
1798      Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1799      return false;
1800    // MIPS unary expression operators. The lexer won't generate these tokens if
1801    // MCAsmInfo::HasMipsExpressions is false for the target.
1802    case AsmToken::PercentCall16:
1803    case AsmToken::PercentCall_Hi:
1804    case AsmToken::PercentCall_Lo:
1805    case AsmToken::PercentDtprel_Hi:
1806    case AsmToken::PercentDtprel_Lo:
1807    case AsmToken::PercentGot:
1808    case AsmToken::PercentGot_Disp:
1809    case AsmToken::PercentGot_Hi:
1810    case AsmToken::PercentGot_Lo:
1811    case AsmToken::PercentGot_Ofst:
1812    case AsmToken::PercentGot_Page:
1813    case AsmToken::PercentGottprel:
1814    case AsmToken::PercentGp_Rel:
1815    case AsmToken::PercentHi:
1816    case AsmToken::PercentHigher:
1817    case AsmToken::PercentHighest:
1818    case AsmToken::PercentLo:
1819    case AsmToken::PercentNeg:
1820    case AsmToken::PercentPcrel_Hi:
1821    case AsmToken::PercentPcrel_Lo:
1822    case AsmToken::PercentTlsgd:
1823    case AsmToken::PercentTlsldm:
1824    case AsmToken::PercentTprel_Hi:
1825    case AsmToken::PercentTprel_Lo:
1826      Lex(); // Eat the operator.
1827      if (Lexer.isNot(AsmToken::LParen))
1828        return TokError("expected '(' after operator");
1829      Lex(); // Eat the operator.
1830      if (parseExpression(Res, EndLoc))
1831        return true;
1832      if (parseRParen())
1833        return true;
1834      Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1835      return !Res;
1836    }
1837  }
1838  
parseExpression(const MCExpr * & Res)1839  bool MasmParser::parseExpression(const MCExpr *&Res) {
1840    SMLoc EndLoc;
1841    return parseExpression(Res, EndLoc);
1842  }
1843  
1844  /// This function checks if the next token is <string> type or arithmetic.
1845  /// string that begin with character '<' must end with character '>'.
1846  /// otherwise it is arithmetics.
1847  /// If the function returns a 'true' value,
1848  /// the End argument will be filled with the last location pointed to the '>'
1849  /// character.
isAngleBracketString(SMLoc & StrLoc,SMLoc & EndLoc)1850  static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1851    assert((StrLoc.getPointer() != nullptr) &&
1852           "Argument to the function cannot be a NULL value");
1853    const char *CharPtr = StrLoc.getPointer();
1854    while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1855           (*CharPtr != '\0')) {
1856      if (*CharPtr == '!')
1857        CharPtr++;
1858      CharPtr++;
1859    }
1860    if (*CharPtr == '>') {
1861      EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1862      return true;
1863    }
1864    return false;
1865  }
1866  
1867  /// creating a string without the escape characters '!'.
angleBracketString(StringRef BracketContents)1868  static std::string angleBracketString(StringRef BracketContents) {
1869    std::string Res;
1870    for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1871      if (BracketContents[Pos] == '!')
1872        Pos++;
1873      Res += BracketContents[Pos];
1874    }
1875    return Res;
1876  }
1877  
1878  /// Parse an expression and return it.
1879  ///
1880  ///  expr ::= expr &&,|| expr               -> lowest.
1881  ///  expr ::= expr |,^,&,! expr
1882  ///  expr ::= expr ==,!=,<>,<,<=,>,>= expr
1883  ///  expr ::= expr <<,>> expr
1884  ///  expr ::= expr +,- expr
1885  ///  expr ::= expr *,/,% expr               -> highest.
1886  ///  expr ::= primaryexpr
1887  ///
parseExpression(const MCExpr * & Res,SMLoc & EndLoc)1888  bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1889    // Parse the expression.
1890    Res = nullptr;
1891    if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1892        parseBinOpRHS(1, Res, EndLoc))
1893      return true;
1894  
1895    // Try to constant fold it up front, if possible. Do not exploit
1896    // assembler here.
1897    int64_t Value;
1898    if (Res->evaluateAsAbsolute(Value))
1899      Res = MCConstantExpr::create(Value, getContext());
1900  
1901    return false;
1902  }
1903  
parseParenExpression(const MCExpr * & Res,SMLoc & EndLoc)1904  bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1905    Res = nullptr;
1906    return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1907  }
1908  
parseParenExprOfDepth(unsigned ParenDepth,const MCExpr * & Res,SMLoc & EndLoc)1909  bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1910                                         SMLoc &EndLoc) {
1911    if (parseParenExpr(Res, EndLoc))
1912      return true;
1913  
1914    for (; ParenDepth > 0; --ParenDepth) {
1915      if (parseBinOpRHS(1, Res, EndLoc))
1916        return true;
1917  
1918      // We don't Lex() the last RParen.
1919      // This is the same behavior as parseParenExpression().
1920      if (ParenDepth - 1 > 0) {
1921        EndLoc = getTok().getEndLoc();
1922        if (parseRParen())
1923          return true;
1924      }
1925    }
1926    return false;
1927  }
1928  
parseAbsoluteExpression(int64_t & Res)1929  bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1930    const MCExpr *Expr;
1931  
1932    SMLoc StartLoc = Lexer.getLoc();
1933    if (parseExpression(Expr))
1934      return true;
1935  
1936    if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1937      return Error(StartLoc, "expected absolute expression");
1938  
1939    return false;
1940  }
1941  
getGNUBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind,bool ShouldUseLogicalShr,bool EndExpressionAtGreater)1942  static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1943                                        MCBinaryExpr::Opcode &Kind,
1944                                        bool ShouldUseLogicalShr,
1945                                        bool EndExpressionAtGreater) {
1946    switch (K) {
1947    default:
1948      return 0; // not a binop.
1949  
1950    // Lowest Precedence: &&, ||
1951    case AsmToken::AmpAmp:
1952      Kind = MCBinaryExpr::LAnd;
1953      return 2;
1954    case AsmToken::PipePipe:
1955      Kind = MCBinaryExpr::LOr;
1956      return 1;
1957  
1958    // Low Precedence: ==, !=, <>, <, <=, >, >=
1959    case AsmToken::EqualEqual:
1960      Kind = MCBinaryExpr::EQ;
1961      return 3;
1962    case AsmToken::ExclaimEqual:
1963    case AsmToken::LessGreater:
1964      Kind = MCBinaryExpr::NE;
1965      return 3;
1966    case AsmToken::Less:
1967      Kind = MCBinaryExpr::LT;
1968      return 3;
1969    case AsmToken::LessEqual:
1970      Kind = MCBinaryExpr::LTE;
1971      return 3;
1972    case AsmToken::Greater:
1973      if (EndExpressionAtGreater)
1974        return 0;
1975      Kind = MCBinaryExpr::GT;
1976      return 3;
1977    case AsmToken::GreaterEqual:
1978      Kind = MCBinaryExpr::GTE;
1979      return 3;
1980  
1981    // Low Intermediate Precedence: +, -
1982    case AsmToken::Plus:
1983      Kind = MCBinaryExpr::Add;
1984      return 4;
1985    case AsmToken::Minus:
1986      Kind = MCBinaryExpr::Sub;
1987      return 4;
1988  
1989    // High Intermediate Precedence: |, &, ^
1990    case AsmToken::Pipe:
1991      Kind = MCBinaryExpr::Or;
1992      return 5;
1993    case AsmToken::Caret:
1994      Kind = MCBinaryExpr::Xor;
1995      return 5;
1996    case AsmToken::Amp:
1997      Kind = MCBinaryExpr::And;
1998      return 5;
1999  
2000    // Highest Precedence: *, /, %, <<, >>
2001    case AsmToken::Star:
2002      Kind = MCBinaryExpr::Mul;
2003      return 6;
2004    case AsmToken::Slash:
2005      Kind = MCBinaryExpr::Div;
2006      return 6;
2007    case AsmToken::Percent:
2008      Kind = MCBinaryExpr::Mod;
2009      return 6;
2010    case AsmToken::LessLess:
2011      Kind = MCBinaryExpr::Shl;
2012      return 6;
2013    case AsmToken::GreaterGreater:
2014      if (EndExpressionAtGreater)
2015        return 0;
2016      Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2017      return 6;
2018    }
2019  }
2020  
getBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind)2021  unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2022                                          MCBinaryExpr::Opcode &Kind) {
2023    bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2024    return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2025                                 AngleBracketDepth > 0);
2026  }
2027  
2028  /// Parse all binary operators with precedence >= 'Precedence'.
2029  /// Res contains the LHS of the expression on input.
parseBinOpRHS(unsigned Precedence,const MCExpr * & Res,SMLoc & EndLoc)2030  bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2031                                 SMLoc &EndLoc) {
2032    SMLoc StartLoc = Lexer.getLoc();
2033    while (true) {
2034      AsmToken::TokenKind TokKind = Lexer.getKind();
2035      if (Lexer.getKind() == AsmToken::Identifier) {
2036        TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
2037                      .CaseLower("and", AsmToken::Amp)
2038                      .CaseLower("not", AsmToken::Exclaim)
2039                      .CaseLower("or", AsmToken::Pipe)
2040                      .CaseLower("xor", AsmToken::Caret)
2041                      .CaseLower("shl", AsmToken::LessLess)
2042                      .CaseLower("shr", AsmToken::GreaterGreater)
2043                      .CaseLower("eq", AsmToken::EqualEqual)
2044                      .CaseLower("ne", AsmToken::ExclaimEqual)
2045                      .CaseLower("lt", AsmToken::Less)
2046                      .CaseLower("le", AsmToken::LessEqual)
2047                      .CaseLower("gt", AsmToken::Greater)
2048                      .CaseLower("ge", AsmToken::GreaterEqual)
2049                      .Default(TokKind);
2050      }
2051      MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
2052      unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2053  
2054      // If the next token is lower precedence than we are allowed to eat, return
2055      // successfully with what we ate already.
2056      if (TokPrec < Precedence)
2057        return false;
2058  
2059      Lex();
2060  
2061      // Eat the next primary expression.
2062      const MCExpr *RHS;
2063      if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2064        return true;
2065  
2066      // If BinOp binds less tightly with RHS than the operator after RHS, let
2067      // the pending operator take RHS as its LHS.
2068      MCBinaryExpr::Opcode Dummy;
2069      unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2070      if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2071        return true;
2072  
2073      // Merge LHS and RHS according to operator.
2074      Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2075    }
2076  }
2077  
2078  /// ParseStatement:
2079  ///   ::= % statement
2080  ///   ::= EndOfStatement
2081  ///   ::= Label* Directive ...Operands... EndOfStatement
2082  ///   ::= Label* Identifier OperandList* EndOfStatement
parseStatement(ParseStatementInfo & Info,MCAsmParserSemaCallback * SI)2083  bool MasmParser::parseStatement(ParseStatementInfo &Info,
2084                                  MCAsmParserSemaCallback *SI) {
2085    assert(!hasPendingError() && "parseStatement started with pending error");
2086    // Eat initial spaces and comments.
2087    while (Lexer.is(AsmToken::Space))
2088      Lex();
2089    if (Lexer.is(AsmToken::EndOfStatement)) {
2090      // If this is a line comment we can drop it safely.
2091      if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2092          getTok().getString().front() == '\n')
2093        Out.addBlankLine();
2094      Lex();
2095      return false;
2096    }
2097  
2098    // If preceded by an expansion operator, first expand all text macros and
2099    // macro functions.
2100    if (getTok().is(AsmToken::Percent)) {
2101      SMLoc ExpansionLoc = getTok().getLoc();
2102      if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2103        return true;
2104    }
2105  
2106    // Statements always start with an identifier, unless we're dealing with a
2107    // processor directive (.386, .686, etc.) that lexes as a real.
2108    AsmToken ID = getTok();
2109    SMLoc IDLoc = ID.getLoc();
2110    StringRef IDVal;
2111    if (Lexer.is(AsmToken::HashDirective))
2112      return parseCppHashLineFilenameComment(IDLoc);
2113    if (Lexer.is(AsmToken::Dot)) {
2114      // Treat '.' as a valid identifier in this context.
2115      Lex();
2116      IDVal = ".";
2117    } else if (Lexer.is(AsmToken::Real)) {
2118      // Treat ".<number>" as a valid identifier in this context.
2119      IDVal = getTok().getString();
2120      Lex(); // always eat a token
2121      if (!IDVal.starts_with("."))
2122        return Error(IDLoc, "unexpected token at start of statement");
2123    } else if (parseIdentifier(IDVal, StartOfStatement)) {
2124      if (!TheCondState.Ignore) {
2125        Lex(); // always eat a token
2126        return Error(IDLoc, "unexpected token at start of statement");
2127      }
2128      IDVal = "";
2129    }
2130  
2131    // Handle conditional assembly here before checking for skipping.  We
2132    // have to do this so that .endif isn't skipped in a ".if 0" block for
2133    // example.
2134    StringMap<DirectiveKind>::const_iterator DirKindIt =
2135        DirectiveKindMap.find(IDVal.lower());
2136    DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2137                                ? DK_NO_DIRECTIVE
2138                                : DirKindIt->getValue();
2139    switch (DirKind) {
2140    default:
2141      break;
2142    case DK_IF:
2143    case DK_IFE:
2144      return parseDirectiveIf(IDLoc, DirKind);
2145    case DK_IFB:
2146      return parseDirectiveIfb(IDLoc, true);
2147    case DK_IFNB:
2148      return parseDirectiveIfb(IDLoc, false);
2149    case DK_IFDEF:
2150      return parseDirectiveIfdef(IDLoc, true);
2151    case DK_IFNDEF:
2152      return parseDirectiveIfdef(IDLoc, false);
2153    case DK_IFDIF:
2154      return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2155                                 /*CaseInsensitive=*/false);
2156    case DK_IFDIFI:
2157      return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2158                                 /*CaseInsensitive=*/true);
2159    case DK_IFIDN:
2160      return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2161                                 /*CaseInsensitive=*/false);
2162    case DK_IFIDNI:
2163      return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2164                                 /*CaseInsensitive=*/true);
2165    case DK_ELSEIF:
2166    case DK_ELSEIFE:
2167      return parseDirectiveElseIf(IDLoc, DirKind);
2168    case DK_ELSEIFB:
2169      return parseDirectiveElseIfb(IDLoc, true);
2170    case DK_ELSEIFNB:
2171      return parseDirectiveElseIfb(IDLoc, false);
2172    case DK_ELSEIFDEF:
2173      return parseDirectiveElseIfdef(IDLoc, true);
2174    case DK_ELSEIFNDEF:
2175      return parseDirectiveElseIfdef(IDLoc, false);
2176    case DK_ELSEIFDIF:
2177      return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2178                                     /*CaseInsensitive=*/false);
2179    case DK_ELSEIFDIFI:
2180      return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2181                                     /*CaseInsensitive=*/true);
2182    case DK_ELSEIFIDN:
2183      return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2184                                     /*CaseInsensitive=*/false);
2185    case DK_ELSEIFIDNI:
2186      return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2187                                     /*CaseInsensitive=*/true);
2188    case DK_ELSE:
2189      return parseDirectiveElse(IDLoc);
2190    case DK_ENDIF:
2191      return parseDirectiveEndIf(IDLoc);
2192    }
2193  
2194    // Ignore the statement if in the middle of inactive conditional
2195    // (e.g. ".if 0").
2196    if (TheCondState.Ignore) {
2197      eatToEndOfStatement();
2198      return false;
2199    }
2200  
2201    // FIXME: Recurse on local labels?
2202  
2203    // Check for a label.
2204    //   ::= identifier ':'
2205    //   ::= number ':'
2206    if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
2207      if (checkForValidSection())
2208        return true;
2209  
2210      // identifier ':'   -> Label.
2211      Lex();
2212  
2213      // Diagnose attempt to use '.' as a label.
2214      if (IDVal == ".")
2215        return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2216  
2217      // Diagnose attempt to use a variable as a label.
2218      //
2219      // FIXME: Diagnostics. Note the location of the definition as a label.
2220      // FIXME: This doesn't diagnose assignment to a symbol which has been
2221      // implicitly marked as external.
2222      MCSymbol *Sym;
2223      if (ParsingMSInlineAsm && SI) {
2224        StringRef RewrittenLabel =
2225            SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2226        assert(!RewrittenLabel.empty() &&
2227               "We should have an internal name here.");
2228        Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2229                                       RewrittenLabel);
2230        IDVal = RewrittenLabel;
2231      }
2232      // Handle directional local labels
2233      if (IDVal == "@@") {
2234        Sym = Ctx.createDirectionalLocalSymbol(0);
2235      } else {
2236        Sym = getContext().getOrCreateSymbol(IDVal);
2237      }
2238  
2239      // End of Labels should be treated as end of line for lexing
2240      // purposes but that information is not available to the Lexer who
2241      // does not understand Labels. This may cause us to see a Hash
2242      // here instead of a preprocessor line comment.
2243      if (getTok().is(AsmToken::Hash)) {
2244        std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2245        Lexer.Lex();
2246        Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2247      }
2248  
2249      // Consume any end of statement token, if present, to avoid spurious
2250      // addBlankLine calls().
2251      if (getTok().is(AsmToken::EndOfStatement)) {
2252        Lex();
2253      }
2254  
2255      getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2256  
2257      // Emit the label.
2258      if (!getTargetParser().isParsingMSInlineAsm())
2259        Out.emitLabel(Sym, IDLoc);
2260  
2261      // If we are generating dwarf for assembly source files then gather the
2262      // info to make a dwarf label entry for this label if needed.
2263      if (enabledGenDwarfForAssembly())
2264        MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2265                                   IDLoc);
2266  
2267      getTargetParser().onLabelParsed(Sym);
2268  
2269      return false;
2270    }
2271  
2272    // If macros are enabled, check to see if this is a macro instantiation.
2273    if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2274      return handleMacroEntry(M, IDLoc);
2275    }
2276  
2277    // Otherwise, we have a normal instruction or directive.
2278  
2279    if (DirKind != DK_NO_DIRECTIVE) {
2280      // There are several entities interested in parsing directives:
2281      //
2282      // 1. Asm parser extensions. For example, platform-specific parsers
2283      //    (like the ELF parser) register themselves as extensions.
2284      // 2. The target-specific assembly parser. Some directives are target
2285      //    specific or may potentially behave differently on certain targets.
2286      // 3. The generic directive parser implemented by this class. These are
2287      //    all the directives that behave in a target and platform independent
2288      //    manner, or at least have a default behavior that's shared between
2289      //    all targets and platforms.
2290  
2291      getTargetParser().flushPendingInstructions(getStreamer());
2292  
2293      // Special-case handling of structure-end directives at higher priority,
2294      // since ENDS is overloaded as a segment-end directive.
2295      if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2296          getTok().is(AsmToken::EndOfStatement)) {
2297        return parseDirectiveNestedEnds();
2298      }
2299  
2300      // First, check the extension directive map to see if any extension has
2301      // registered itself to parse this directive.
2302      std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2303          ExtensionDirectiveMap.lookup(IDVal.lower());
2304      if (Handler.first)
2305        return (*Handler.second)(Handler.first, IDVal, IDLoc);
2306  
2307      // Next, let the target-specific assembly parser try.
2308      if (ID.isNot(AsmToken::Identifier))
2309        return false;
2310  
2311      ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2312      assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2313             "Should only return Failure iff there was an error");
2314      if (TPDirectiveReturn.isFailure())
2315        return true;
2316      if (TPDirectiveReturn.isSuccess())
2317        return false;
2318  
2319      // Finally, if no one else is interested in this directive, it must be
2320      // generic and familiar to this class.
2321      switch (DirKind) {
2322      default:
2323        break;
2324      case DK_ASCII:
2325        return parseDirectiveAscii(IDVal, false);
2326      case DK_ASCIZ:
2327      case DK_STRING:
2328        return parseDirectiveAscii(IDVal, true);
2329      case DK_BYTE:
2330      case DK_SBYTE:
2331      case DK_DB:
2332        return parseDirectiveValue(IDVal, 1);
2333      case DK_WORD:
2334      case DK_SWORD:
2335      case DK_DW:
2336        return parseDirectiveValue(IDVal, 2);
2337      case DK_DWORD:
2338      case DK_SDWORD:
2339      case DK_DD:
2340        return parseDirectiveValue(IDVal, 4);
2341      case DK_FWORD:
2342      case DK_DF:
2343        return parseDirectiveValue(IDVal, 6);
2344      case DK_QWORD:
2345      case DK_SQWORD:
2346      case DK_DQ:
2347        return parseDirectiveValue(IDVal, 8);
2348      case DK_REAL4:
2349        return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2350      case DK_REAL8:
2351        return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2352      case DK_REAL10:
2353        return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2354      case DK_STRUCT:
2355      case DK_UNION:
2356        return parseDirectiveNestedStruct(IDVal, DirKind);
2357      case DK_ENDS:
2358        return parseDirectiveNestedEnds();
2359      case DK_ALIGN:
2360        return parseDirectiveAlign();
2361      case DK_EVEN:
2362        return parseDirectiveEven();
2363      case DK_ORG:
2364        return parseDirectiveOrg();
2365      case DK_EXTERN:
2366        return parseDirectiveExtern();
2367      case DK_PUBLIC:
2368        return parseDirectiveSymbolAttribute(MCSA_Global);
2369      case DK_COMM:
2370        return parseDirectiveComm(/*IsLocal=*/false);
2371      case DK_COMMENT:
2372        return parseDirectiveComment(IDLoc);
2373      case DK_INCLUDE:
2374        return parseDirectiveInclude();
2375      case DK_REPEAT:
2376        return parseDirectiveRepeat(IDLoc, IDVal);
2377      case DK_WHILE:
2378        return parseDirectiveWhile(IDLoc);
2379      case DK_FOR:
2380        return parseDirectiveFor(IDLoc, IDVal);
2381      case DK_FORC:
2382        return parseDirectiveForc(IDLoc, IDVal);
2383      case DK_FILE:
2384        return parseDirectiveFile(IDLoc);
2385      case DK_LINE:
2386        return parseDirectiveLine();
2387      case DK_LOC:
2388        return parseDirectiveLoc();
2389      case DK_STABS:
2390        return parseDirectiveStabs();
2391      case DK_CV_FILE:
2392        return parseDirectiveCVFile();
2393      case DK_CV_FUNC_ID:
2394        return parseDirectiveCVFuncId();
2395      case DK_CV_INLINE_SITE_ID:
2396        return parseDirectiveCVInlineSiteId();
2397      case DK_CV_LOC:
2398        return parseDirectiveCVLoc();
2399      case DK_CV_LINETABLE:
2400        return parseDirectiveCVLinetable();
2401      case DK_CV_INLINE_LINETABLE:
2402        return parseDirectiveCVInlineLinetable();
2403      case DK_CV_DEF_RANGE:
2404        return parseDirectiveCVDefRange();
2405      case DK_CV_STRING:
2406        return parseDirectiveCVString();
2407      case DK_CV_STRINGTABLE:
2408        return parseDirectiveCVStringTable();
2409      case DK_CV_FILECHECKSUMS:
2410        return parseDirectiveCVFileChecksums();
2411      case DK_CV_FILECHECKSUM_OFFSET:
2412        return parseDirectiveCVFileChecksumOffset();
2413      case DK_CV_FPO_DATA:
2414        return parseDirectiveCVFPOData();
2415      case DK_CFI_SECTIONS:
2416        return parseDirectiveCFISections();
2417      case DK_CFI_STARTPROC:
2418        return parseDirectiveCFIStartProc();
2419      case DK_CFI_ENDPROC:
2420        return parseDirectiveCFIEndProc();
2421      case DK_CFI_DEF_CFA:
2422        return parseDirectiveCFIDefCfa(IDLoc);
2423      case DK_CFI_DEF_CFA_OFFSET:
2424        return parseDirectiveCFIDefCfaOffset(IDLoc);
2425      case DK_CFI_ADJUST_CFA_OFFSET:
2426        return parseDirectiveCFIAdjustCfaOffset(IDLoc);
2427      case DK_CFI_DEF_CFA_REGISTER:
2428        return parseDirectiveCFIDefCfaRegister(IDLoc);
2429      case DK_CFI_OFFSET:
2430        return parseDirectiveCFIOffset(IDLoc);
2431      case DK_CFI_REL_OFFSET:
2432        return parseDirectiveCFIRelOffset(IDLoc);
2433      case DK_CFI_PERSONALITY:
2434        return parseDirectiveCFIPersonalityOrLsda(true);
2435      case DK_CFI_LSDA:
2436        return parseDirectiveCFIPersonalityOrLsda(false);
2437      case DK_CFI_REMEMBER_STATE:
2438        return parseDirectiveCFIRememberState(IDLoc);
2439      case DK_CFI_RESTORE_STATE:
2440        return parseDirectiveCFIRestoreState(IDLoc);
2441      case DK_CFI_SAME_VALUE:
2442        return parseDirectiveCFISameValue(IDLoc);
2443      case DK_CFI_RESTORE:
2444        return parseDirectiveCFIRestore(IDLoc);
2445      case DK_CFI_ESCAPE:
2446        return parseDirectiveCFIEscape(IDLoc);
2447      case DK_CFI_RETURN_COLUMN:
2448        return parseDirectiveCFIReturnColumn(IDLoc);
2449      case DK_CFI_SIGNAL_FRAME:
2450        return parseDirectiveCFISignalFrame();
2451      case DK_CFI_UNDEFINED:
2452        return parseDirectiveCFIUndefined(IDLoc);
2453      case DK_CFI_REGISTER:
2454        return parseDirectiveCFIRegister(IDLoc);
2455      case DK_CFI_WINDOW_SAVE:
2456        return parseDirectiveCFIWindowSave(IDLoc);
2457      case DK_EXITM:
2458        Info.ExitValue = "";
2459        return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2460      case DK_ENDM:
2461        Info.ExitValue = "";
2462        return parseDirectiveEndMacro(IDVal);
2463      case DK_PURGE:
2464        return parseDirectivePurgeMacro(IDLoc);
2465      case DK_END:
2466        return parseDirectiveEnd(IDLoc);
2467      case DK_ERR:
2468        return parseDirectiveError(IDLoc);
2469      case DK_ERRB:
2470        return parseDirectiveErrorIfb(IDLoc, true);
2471      case DK_ERRNB:
2472        return parseDirectiveErrorIfb(IDLoc, false);
2473      case DK_ERRDEF:
2474        return parseDirectiveErrorIfdef(IDLoc, true);
2475      case DK_ERRNDEF:
2476        return parseDirectiveErrorIfdef(IDLoc, false);
2477      case DK_ERRDIF:
2478        return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2479                                        /*CaseInsensitive=*/false);
2480      case DK_ERRDIFI:
2481        return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2482                                        /*CaseInsensitive=*/true);
2483      case DK_ERRIDN:
2484        return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2485                                        /*CaseInsensitive=*/false);
2486      case DK_ERRIDNI:
2487        return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2488                                        /*CaseInsensitive=*/true);
2489      case DK_ERRE:
2490        return parseDirectiveErrorIfe(IDLoc, true);
2491      case DK_ERRNZ:
2492        return parseDirectiveErrorIfe(IDLoc, false);
2493      case DK_RADIX:
2494        return parseDirectiveRadix(IDLoc);
2495      case DK_ECHO:
2496        return parseDirectiveEcho(IDLoc);
2497      }
2498  
2499      return Error(IDLoc, "unknown directive");
2500    }
2501  
2502    // We also check if this is allocating memory with user-defined type.
2503    auto IDIt = Structs.find(IDVal.lower());
2504    if (IDIt != Structs.end())
2505      return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2506                                       IDLoc);
2507  
2508    // Non-conditional Microsoft directives sometimes follow their first argument.
2509    const AsmToken nextTok = getTok();
2510    const StringRef nextVal = nextTok.getString();
2511    const SMLoc nextLoc = nextTok.getLoc();
2512  
2513    const AsmToken afterNextTok = peekTok();
2514  
2515    // There are several entities interested in parsing infix directives:
2516    //
2517    // 1. Asm parser extensions. For example, platform-specific parsers
2518    //    (like the ELF parser) register themselves as extensions.
2519    // 2. The generic directive parser implemented by this class. These are
2520    //    all the directives that behave in a target and platform independent
2521    //    manner, or at least have a default behavior that's shared between
2522    //    all targets and platforms.
2523  
2524    getTargetParser().flushPendingInstructions(getStreamer());
2525  
2526    // Special-case handling of structure-end directives at higher priority, since
2527    // ENDS is overloaded as a segment-end directive.
2528    if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2529      Lex();
2530      return parseDirectiveEnds(IDVal, IDLoc);
2531    }
2532  
2533    // First, check the extension directive map to see if any extension has
2534    // registered itself to parse this directive.
2535    std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2536        ExtensionDirectiveMap.lookup(nextVal.lower());
2537    if (Handler.first) {
2538      Lex();
2539      Lexer.UnLex(ID);
2540      return (*Handler.second)(Handler.first, nextVal, nextLoc);
2541    }
2542  
2543    // If no one else is interested in this directive, it must be
2544    // generic and familiar to this class.
2545    DirKindIt = DirectiveKindMap.find(nextVal.lower());
2546    DirKind = (DirKindIt == DirectiveKindMap.end())
2547                  ? DK_NO_DIRECTIVE
2548                  : DirKindIt->getValue();
2549    switch (DirKind) {
2550    default:
2551      break;
2552    case DK_ASSIGN:
2553    case DK_EQU:
2554    case DK_TEXTEQU:
2555      Lex();
2556      return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2557    case DK_BYTE:
2558      if (afterNextTok.is(AsmToken::Identifier) &&
2559          afterNextTok.getString().equals_insensitive("ptr")) {
2560        // Size directive; part of an instruction.
2561        break;
2562      }
2563      [[fallthrough]];
2564    case DK_SBYTE:
2565    case DK_DB:
2566      Lex();
2567      return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2568    case DK_WORD:
2569      if (afterNextTok.is(AsmToken::Identifier) &&
2570          afterNextTok.getString().equals_insensitive("ptr")) {
2571        // Size directive; part of an instruction.
2572        break;
2573      }
2574      [[fallthrough]];
2575    case DK_SWORD:
2576    case DK_DW:
2577      Lex();
2578      return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2579    case DK_DWORD:
2580      if (afterNextTok.is(AsmToken::Identifier) &&
2581          afterNextTok.getString().equals_insensitive("ptr")) {
2582        // Size directive; part of an instruction.
2583        break;
2584      }
2585      [[fallthrough]];
2586    case DK_SDWORD:
2587    case DK_DD:
2588      Lex();
2589      return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2590    case DK_FWORD:
2591      if (afterNextTok.is(AsmToken::Identifier) &&
2592          afterNextTok.getString().equals_insensitive("ptr")) {
2593        // Size directive; part of an instruction.
2594        break;
2595      }
2596      [[fallthrough]];
2597    case DK_DF:
2598      Lex();
2599      return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2600    case DK_QWORD:
2601      if (afterNextTok.is(AsmToken::Identifier) &&
2602          afterNextTok.getString().equals_insensitive("ptr")) {
2603        // Size directive; part of an instruction.
2604        break;
2605      }
2606      [[fallthrough]];
2607    case DK_SQWORD:
2608    case DK_DQ:
2609      Lex();
2610      return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2611    case DK_REAL4:
2612      Lex();
2613      return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2614                                          IDVal, IDLoc);
2615    case DK_REAL8:
2616      Lex();
2617      return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2618                                          IDVal, IDLoc);
2619    case DK_REAL10:
2620      Lex();
2621      return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2622                                          10, IDVal, IDLoc);
2623    case DK_STRUCT:
2624    case DK_UNION:
2625      Lex();
2626      return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2627    case DK_ENDS:
2628      Lex();
2629      return parseDirectiveEnds(IDVal, IDLoc);
2630    case DK_MACRO:
2631      Lex();
2632      return parseDirectiveMacro(IDVal, IDLoc);
2633    }
2634  
2635    // Finally, we check if this is allocating a variable with user-defined type.
2636    auto NextIt = Structs.find(nextVal.lower());
2637    if (NextIt != Structs.end()) {
2638      Lex();
2639      return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2640                                            nextVal, nextLoc, IDVal);
2641    }
2642  
2643    // __asm _emit or __asm __emit
2644    if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2645                               IDVal == "_EMIT" || IDVal == "__EMIT"))
2646      return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2647  
2648    // __asm align
2649    if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2650      return parseDirectiveMSAlign(IDLoc, Info);
2651  
2652    if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2653      Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2654    if (checkForValidSection())
2655      return true;
2656  
2657    // Canonicalize the opcode to lower case.
2658    std::string OpcodeStr = IDVal.lower();
2659    ParseInstructionInfo IInfo(Info.AsmRewrites);
2660    bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2661                                                            Info.ParsedOperands);
2662    Info.ParseError = ParseHadError;
2663  
2664    // Dump the parsed representation, if requested.
2665    if (getShowParsedOperands()) {
2666      SmallString<256> Str;
2667      raw_svector_ostream OS(Str);
2668      OS << "parsed instruction: [";
2669      for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2670        if (i != 0)
2671          OS << ", ";
2672        Info.ParsedOperands[i]->print(OS);
2673      }
2674      OS << "]";
2675  
2676      printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2677    }
2678  
2679    // Fail even if ParseInstruction erroneously returns false.
2680    if (hasPendingError() || ParseHadError)
2681      return true;
2682  
2683    // If we are generating dwarf for the current section then generate a .loc
2684    // directive for the instruction.
2685    if (!ParseHadError && enabledGenDwarfForAssembly() &&
2686        getContext().getGenDwarfSectionSyms().count(
2687            getStreamer().getCurrentSectionOnly())) {
2688      unsigned Line;
2689      if (ActiveMacros.empty())
2690        Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2691      else
2692        Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2693                                     ActiveMacros.front()->ExitBuffer);
2694  
2695      // If we previously parsed a cpp hash file line comment then make sure the
2696      // current Dwarf File is for the CppHashFilename if not then emit the
2697      // Dwarf File table for it and adjust the line number for the .loc.
2698      if (!CppHashInfo.Filename.empty()) {
2699        unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2700            0, StringRef(), CppHashInfo.Filename);
2701        getContext().setGenDwarfFileNumber(FileNumber);
2702  
2703        unsigned CppHashLocLineNo =
2704          SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2705        Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2706      }
2707  
2708      getStreamer().emitDwarfLocDirective(
2709          getContext().getGenDwarfFileNumber(), Line, 0,
2710          DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2711          StringRef());
2712    }
2713  
2714    // If parsing succeeded, match the instruction.
2715    if (!ParseHadError) {
2716      uint64_t ErrorInfo;
2717      if (getTargetParser().MatchAndEmitInstruction(
2718              IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2719              getTargetParser().isParsingMSInlineAsm()))
2720        return true;
2721    }
2722    return false;
2723  }
2724  
2725  // Parse and erase curly braces marking block start/end.
parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> & AsmStrRewrites)2726  bool MasmParser::parseCurlyBlockScope(
2727      SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2728    // Identify curly brace marking block start/end.
2729    if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2730      return false;
2731  
2732    SMLoc StartLoc = Lexer.getLoc();
2733    Lex(); // Eat the brace.
2734    if (Lexer.is(AsmToken::EndOfStatement))
2735      Lex(); // Eat EndOfStatement following the brace.
2736  
2737    // Erase the block start/end brace from the output asm string.
2738    AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2739                                                    StartLoc.getPointer());
2740    return true;
2741  }
2742  
2743  /// parseCppHashLineFilenameComment as this:
2744  ///   ::= # number "filename"
parseCppHashLineFilenameComment(SMLoc L)2745  bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2746    Lex(); // Eat the hash token.
2747    // Lexer only ever emits HashDirective if it fully formed if it's
2748    // done the checking already so this is an internal error.
2749    assert(getTok().is(AsmToken::Integer) &&
2750           "Lexing Cpp line comment: Expected Integer");
2751    int64_t LineNumber = getTok().getIntVal();
2752    Lex();
2753    assert(getTok().is(AsmToken::String) &&
2754           "Lexing Cpp line comment: Expected String");
2755    StringRef Filename = getTok().getString();
2756    Lex();
2757  
2758    // Get rid of the enclosing quotes.
2759    Filename = Filename.substr(1, Filename.size() - 2);
2760  
2761    // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2762    // and possibly DWARF file info.
2763    CppHashInfo.Loc = L;
2764    CppHashInfo.Filename = Filename;
2765    CppHashInfo.LineNumber = LineNumber;
2766    CppHashInfo.Buf = CurBuffer;
2767    if (FirstCppHashFilename.empty())
2768      FirstCppHashFilename = Filename;
2769    return false;
2770  }
2771  
2772  /// will use the last parsed cpp hash line filename comment
2773  /// for the Filename and LineNo if any in the diagnostic.
DiagHandler(const SMDiagnostic & Diag,void * Context)2774  void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2775    const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2776    raw_ostream &OS = errs();
2777  
2778    const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2779    SMLoc DiagLoc = Diag.getLoc();
2780    unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2781    unsigned CppHashBuf =
2782        Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2783  
2784    // Like SourceMgr::printMessage() we need to print the include stack if any
2785    // before printing the message.
2786    unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2787    if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2788        DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2789      SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2790      DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2791    }
2792  
2793    // If we have not parsed a cpp hash line filename comment or the source
2794    // manager changed or buffer changed (like in a nested include) then just
2795    // print the normal diagnostic using its Filename and LineNo.
2796    if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2797        DiagBuf != CppHashBuf) {
2798      if (Parser->SavedDiagHandler)
2799        Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2800      else
2801        Diag.print(nullptr, OS);
2802      return;
2803    }
2804  
2805    // Use the CppHashFilename and calculate a line number based on the
2806    // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2807    // for the diagnostic.
2808    const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2809  
2810    int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2811    int CppHashLocLineNo =
2812        Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2813    int LineNo =
2814        Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2815  
2816    SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2817                         Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2818                         Diag.getLineContents(), Diag.getRanges());
2819  
2820    if (Parser->SavedDiagHandler)
2821      Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2822    else
2823      NewDiag.print(nullptr, OS);
2824  }
2825  
2826  // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2827  // not accept '.'.
isMacroParameterChar(char C)2828  static bool isMacroParameterChar(char C) {
2829    return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2830  }
2831  
expandMacro(raw_svector_ostream & OS,StringRef Body,ArrayRef<MCAsmMacroParameter> Parameters,ArrayRef<MCAsmMacroArgument> A,const std::vector<std::string> & Locals,SMLoc L)2832  bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2833                               ArrayRef<MCAsmMacroParameter> Parameters,
2834                               ArrayRef<MCAsmMacroArgument> A,
2835                               const std::vector<std::string> &Locals, SMLoc L) {
2836    unsigned NParameters = Parameters.size();
2837    if (NParameters != A.size())
2838      return Error(L, "Wrong number of arguments");
2839    StringMap<std::string> LocalSymbols;
2840    std::string Name;
2841    Name.reserve(6);
2842    for (StringRef Local : Locals) {
2843      raw_string_ostream LocalName(Name);
2844      LocalName << "??"
2845                << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2846      LocalSymbols.insert({Local, Name});
2847      Name.clear();
2848    }
2849  
2850    std::optional<char> CurrentQuote;
2851    while (!Body.empty()) {
2852      // Scan for the next substitution.
2853      std::size_t End = Body.size(), Pos = 0;
2854      std::size_t IdentifierPos = End;
2855      for (; Pos != End; ++Pos) {
2856        // Find the next possible macro parameter, including preceding a '&'
2857        // inside quotes.
2858        if (Body[Pos] == '&')
2859          break;
2860        if (isMacroParameterChar(Body[Pos])) {
2861          if (!CurrentQuote)
2862            break;
2863          if (IdentifierPos == End)
2864            IdentifierPos = Pos;
2865        } else {
2866          IdentifierPos = End;
2867        }
2868  
2869        // Track quotation status
2870        if (!CurrentQuote) {
2871          if (Body[Pos] == '\'' || Body[Pos] == '"')
2872            CurrentQuote = Body[Pos];
2873        } else if (Body[Pos] == CurrentQuote) {
2874          if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2875            // Escaped quote, and quotes aren't identifier chars; skip
2876            ++Pos;
2877            continue;
2878          } else {
2879            CurrentQuote.reset();
2880          }
2881        }
2882      }
2883      if (IdentifierPos != End) {
2884        // We've recognized an identifier before an apostrophe inside quotes;
2885        // check once to see if we can expand it.
2886        Pos = IdentifierPos;
2887        IdentifierPos = End;
2888      }
2889  
2890      // Add the prefix.
2891      OS << Body.slice(0, Pos);
2892  
2893      // Check if we reached the end.
2894      if (Pos == End)
2895        break;
2896  
2897      unsigned I = Pos;
2898      bool InitialAmpersand = (Body[I] == '&');
2899      if (InitialAmpersand) {
2900        ++I;
2901        ++Pos;
2902      }
2903      while (I < End && isMacroParameterChar(Body[I]))
2904        ++I;
2905  
2906      const char *Begin = Body.data() + Pos;
2907      StringRef Argument(Begin, I - Pos);
2908      const std::string ArgumentLower = Argument.lower();
2909      unsigned Index = 0;
2910  
2911      for (; Index < NParameters; ++Index)
2912        if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2913          break;
2914  
2915      if (Index == NParameters) {
2916        if (InitialAmpersand)
2917          OS << '&';
2918        auto it = LocalSymbols.find(ArgumentLower);
2919        if (it != LocalSymbols.end())
2920          OS << it->second;
2921        else
2922          OS << Argument;
2923        Pos = I;
2924      } else {
2925        for (const AsmToken &Token : A[Index]) {
2926          // In MASM, you can write '%expr'.
2927          // The prefix '%' evaluates the expression 'expr'
2928          // and uses the result as a string (e.g. replace %(1+2) with the
2929          // string "3").
2930          // Here, we identify the integer token which is the result of the
2931          // absolute expression evaluation and replace it with its string
2932          // representation.
2933          if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2934            // Emit an integer value to the buffer.
2935            OS << Token.getIntVal();
2936          else
2937            OS << Token.getString();
2938        }
2939  
2940        Pos += Argument.size();
2941        if (Pos < End && Body[Pos] == '&') {
2942          ++Pos;
2943        }
2944      }
2945      // Update the scan point.
2946      Body = Body.substr(Pos);
2947    }
2948  
2949    return false;
2950  }
2951  
isOperator(AsmToken::TokenKind kind)2952  static bool isOperator(AsmToken::TokenKind kind) {
2953    switch (kind) {
2954    default:
2955      return false;
2956    case AsmToken::Plus:
2957    case AsmToken::Minus:
2958    case AsmToken::Tilde:
2959    case AsmToken::Slash:
2960    case AsmToken::Star:
2961    case AsmToken::Dot:
2962    case AsmToken::Equal:
2963    case AsmToken::EqualEqual:
2964    case AsmToken::Pipe:
2965    case AsmToken::PipePipe:
2966    case AsmToken::Caret:
2967    case AsmToken::Amp:
2968    case AsmToken::AmpAmp:
2969    case AsmToken::Exclaim:
2970    case AsmToken::ExclaimEqual:
2971    case AsmToken::Less:
2972    case AsmToken::LessEqual:
2973    case AsmToken::LessLess:
2974    case AsmToken::LessGreater:
2975    case AsmToken::Greater:
2976    case AsmToken::GreaterEqual:
2977    case AsmToken::GreaterGreater:
2978      return true;
2979    }
2980  }
2981  
2982  namespace {
2983  
2984  class AsmLexerSkipSpaceRAII {
2985  public:
AsmLexerSkipSpaceRAII(AsmLexer & Lexer,bool SkipSpace)2986    AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2987      Lexer.setSkipSpace(SkipSpace);
2988    }
2989  
~AsmLexerSkipSpaceRAII()2990    ~AsmLexerSkipSpaceRAII() {
2991      Lexer.setSkipSpace(true);
2992    }
2993  
2994  private:
2995    AsmLexer &Lexer;
2996  };
2997  
2998  } // end anonymous namespace
2999  
parseMacroArgument(const MCAsmMacroParameter * MP,MCAsmMacroArgument & MA,AsmToken::TokenKind EndTok)3000  bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3001                                      MCAsmMacroArgument &MA,
3002                                      AsmToken::TokenKind EndTok) {
3003    if (MP && MP->Vararg) {
3004      if (Lexer.isNot(EndTok)) {
3005        SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3006        for (StringRef S : Str) {
3007          MA.emplace_back(AsmToken::String, S);
3008        }
3009      }
3010      return false;
3011    }
3012  
3013    SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3014    if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3015      const char *StrChar = StrLoc.getPointer() + 1;
3016      const char *EndChar = EndLoc.getPointer() - 1;
3017      jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3018      /// Eat from '<' to '>'.
3019      Lex();
3020      MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3021      return false;
3022    }
3023  
3024    unsigned ParenLevel = 0;
3025  
3026    // Darwin doesn't use spaces to delmit arguments.
3027    AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3028  
3029    bool SpaceEaten;
3030  
3031    while (true) {
3032      SpaceEaten = false;
3033      if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3034        return TokError("unexpected token");
3035  
3036      if (ParenLevel == 0) {
3037        if (Lexer.is(AsmToken::Comma))
3038          break;
3039  
3040        if (Lexer.is(AsmToken::Space)) {
3041          SpaceEaten = true;
3042          Lex(); // Eat spaces.
3043        }
3044  
3045        // Spaces can delimit parameters, but could also be part an expression.
3046        // If the token after a space is an operator, add the token and the next
3047        // one into this argument
3048        if (!IsDarwin) {
3049          if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3050            MA.push_back(getTok());
3051            Lex();
3052  
3053            // Whitespace after an operator can be ignored.
3054            if (Lexer.is(AsmToken::Space))
3055              Lex();
3056  
3057            continue;
3058          }
3059        }
3060        if (SpaceEaten)
3061          break;
3062      }
3063  
3064      // handleMacroEntry relies on not advancing the lexer here
3065      // to be able to fill in the remaining default parameter values
3066      if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3067        break;
3068  
3069      // Adjust the current parentheses level.
3070      if (Lexer.is(AsmToken::LParen))
3071        ++ParenLevel;
3072      else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3073        --ParenLevel;
3074  
3075      // Append the token to the current argument list.
3076      MA.push_back(getTok());
3077      Lex();
3078    }
3079  
3080    if (ParenLevel != 0)
3081      return TokError("unbalanced parentheses in argument");
3082  
3083    if (MA.empty() && MP) {
3084      if (MP->Required) {
3085        return TokError("missing value for required parameter '" + MP->Name +
3086                        "'");
3087      } else {
3088        MA = MP->Value;
3089      }
3090    }
3091    return false;
3092  }
3093  
3094  // Parse the macro instantiation arguments.
parseMacroArguments(const MCAsmMacro * M,MCAsmMacroArguments & A,AsmToken::TokenKind EndTok)3095  bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3096                                       MCAsmMacroArguments &A,
3097                                       AsmToken::TokenKind EndTok) {
3098    const unsigned NParameters = M ? M->Parameters.size() : 0;
3099    bool NamedParametersFound = false;
3100    SmallVector<SMLoc, 4> FALocs;
3101  
3102    A.resize(NParameters);
3103    FALocs.resize(NParameters);
3104  
3105    // Parse two kinds of macro invocations:
3106    // - macros defined without any parameters accept an arbitrary number of them
3107    // - macros defined with parameters accept at most that many of them
3108    for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3109         ++Parameter) {
3110      SMLoc IDLoc = Lexer.getLoc();
3111      MCAsmMacroParameter FA;
3112  
3113      if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3114        if (parseIdentifier(FA.Name))
3115          return Error(IDLoc, "invalid argument identifier for formal argument");
3116  
3117        if (Lexer.isNot(AsmToken::Equal))
3118          return TokError("expected '=' after formal parameter identifier");
3119  
3120        Lex();
3121  
3122        NamedParametersFound = true;
3123      }
3124  
3125      if (NamedParametersFound && FA.Name.empty())
3126        return Error(IDLoc, "cannot mix positional and keyword arguments");
3127  
3128      unsigned PI = Parameter;
3129      if (!FA.Name.empty()) {
3130        assert(M && "expected macro to be defined");
3131        unsigned FAI = 0;
3132        for (FAI = 0; FAI < NParameters; ++FAI)
3133          if (M->Parameters[FAI].Name == FA.Name)
3134            break;
3135  
3136        if (FAI >= NParameters) {
3137          return Error(IDLoc, "parameter named '" + FA.Name +
3138                                  "' does not exist for macro '" + M->Name + "'");
3139        }
3140        PI = FAI;
3141      }
3142      const MCAsmMacroParameter *MP = nullptr;
3143      if (M && PI < NParameters)
3144        MP = &M->Parameters[PI];
3145  
3146      SMLoc StrLoc = Lexer.getLoc();
3147      SMLoc EndLoc;
3148      if (Lexer.is(AsmToken::Percent)) {
3149        const MCExpr *AbsoluteExp;
3150        int64_t Value;
3151        /// Eat '%'.
3152        Lex();
3153        if (parseExpression(AbsoluteExp, EndLoc))
3154          return false;
3155        if (!AbsoluteExp->evaluateAsAbsolute(Value,
3156                                             getStreamer().getAssemblerPtr()))
3157          return Error(StrLoc, "expected absolute expression");
3158        const char *StrChar = StrLoc.getPointer();
3159        const char *EndChar = EndLoc.getPointer();
3160        AsmToken newToken(AsmToken::Integer,
3161                          StringRef(StrChar, EndChar - StrChar), Value);
3162        FA.Value.push_back(newToken);
3163      } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3164        if (M)
3165          return addErrorSuffix(" in '" + M->Name + "' macro");
3166        else
3167          return true;
3168      }
3169  
3170      if (!FA.Value.empty()) {
3171        if (A.size() <= PI)
3172          A.resize(PI + 1);
3173        A[PI] = FA.Value;
3174  
3175        if (FALocs.size() <= PI)
3176          FALocs.resize(PI + 1);
3177  
3178        FALocs[PI] = Lexer.getLoc();
3179      }
3180  
3181      // At the end of the statement, fill in remaining arguments that have
3182      // default values. If there aren't any, then the next argument is
3183      // required but missing
3184      if (Lexer.is(EndTok)) {
3185        bool Failure = false;
3186        for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3187          if (A[FAI].empty()) {
3188            if (M->Parameters[FAI].Required) {
3189              Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3190                    "missing value for required parameter "
3191                    "'" +
3192                        M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3193              Failure = true;
3194            }
3195  
3196            if (!M->Parameters[FAI].Value.empty())
3197              A[FAI] = M->Parameters[FAI].Value;
3198          }
3199        }
3200        return Failure;
3201      }
3202  
3203      if (Lexer.is(AsmToken::Comma))
3204        Lex();
3205    }
3206  
3207    return TokError("too many positional arguments");
3208  }
3209  
handleMacroEntry(const MCAsmMacro * M,SMLoc NameLoc,AsmToken::TokenKind ArgumentEndTok)3210  bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3211                                    AsmToken::TokenKind ArgumentEndTok) {
3212    // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3213    // eliminate this, although we should protect against infinite loops.
3214    unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3215    if (ActiveMacros.size() == MaxNestingDepth) {
3216      std::ostringstream MaxNestingDepthError;
3217      MaxNestingDepthError << "macros cannot be nested more than "
3218                           << MaxNestingDepth << " levels deep."
3219                           << " Use -asm-macro-max-nesting-depth to increase "
3220                              "this limit.";
3221      return TokError(MaxNestingDepthError.str());
3222    }
3223  
3224    MCAsmMacroArguments A;
3225    if (parseMacroArguments(M, A, ArgumentEndTok))
3226      return true;
3227  
3228    // Macro instantiation is lexical, unfortunately. We construct a new buffer
3229    // to hold the macro body with substitutions.
3230    SmallString<256> Buf;
3231    StringRef Body = M->Body;
3232    raw_svector_ostream OS(Buf);
3233  
3234    if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3235      return true;
3236  
3237    // We include the endm in the buffer as our cue to exit the macro
3238    // instantiation.
3239    OS << "endm\n";
3240  
3241    std::unique_ptr<MemoryBuffer> Instantiation =
3242        MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3243  
3244    // Create the macro instantiation object and add to the current macro
3245    // instantiation stack.
3246    MacroInstantiation *MI = new MacroInstantiation{
3247        NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3248    ActiveMacros.push_back(MI);
3249  
3250    ++NumOfMacroInstantiations;
3251  
3252    // Jump to the macro instantiation and prime the lexer.
3253    CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3254    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3255    EndStatementAtEOFStack.push_back(true);
3256    Lex();
3257  
3258    return false;
3259  }
3260  
handleMacroExit()3261  void MasmParser::handleMacroExit() {
3262    // Jump to the token we should return to, and consume it.
3263    EndStatementAtEOFStack.pop_back();
3264    jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3265              EndStatementAtEOFStack.back());
3266    Lex();
3267  
3268    // Pop the instantiation entry.
3269    delete ActiveMacros.back();
3270    ActiveMacros.pop_back();
3271  }
3272  
handleMacroInvocation(const MCAsmMacro * M,SMLoc NameLoc)3273  bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3274    if (!M->IsFunction)
3275      return Error(NameLoc, "cannot invoke macro procedure as function");
3276  
3277    if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3278                                         "' requires arguments in parentheses") ||
3279        handleMacroEntry(M, NameLoc, AsmToken::RParen))
3280      return true;
3281  
3282    // Parse all statements in the macro, retrieving the exit value when it ends.
3283    std::string ExitValue;
3284    SmallVector<AsmRewrite, 4> AsmStrRewrites;
3285    while (Lexer.isNot(AsmToken::Eof)) {
3286      ParseStatementInfo Info(&AsmStrRewrites);
3287      bool Parsed = parseStatement(Info, nullptr);
3288  
3289      if (!Parsed && Info.ExitValue) {
3290        ExitValue = std::move(*Info.ExitValue);
3291        break;
3292      }
3293  
3294      // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3295      // for printing ErrMsg via Lex() only if no (presumably better) parser error
3296      // exists.
3297      if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3298        Lex();
3299      }
3300  
3301      // parseStatement returned true so may need to emit an error.
3302      printPendingErrors();
3303  
3304      // Skipping to the next line if needed.
3305      if (Parsed && !getLexer().isAtStartOfStatement())
3306        eatToEndOfStatement();
3307    }
3308  
3309    // Consume the right-parenthesis on the other side of the arguments.
3310    if (parseRParen())
3311      return true;
3312  
3313    // Exit values may require lexing, unfortunately. We construct a new buffer to
3314    // hold the exit value.
3315    std::unique_ptr<MemoryBuffer> MacroValue =
3316        MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3317  
3318    // Jump from this location to the instantiated exit value, and prime the
3319    // lexer.
3320    CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3321    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3322                    /*EndStatementAtEOF=*/false);
3323    EndStatementAtEOFStack.push_back(false);
3324    Lex();
3325  
3326    return false;
3327  }
3328  
3329  /// parseIdentifier:
3330  ///   ::= identifier
3331  ///   ::= string
parseIdentifier(StringRef & Res,IdentifierPositionKind Position)3332  bool MasmParser::parseIdentifier(StringRef &Res,
3333                                   IdentifierPositionKind Position) {
3334    // The assembler has relaxed rules for accepting identifiers, in particular we
3335    // allow things like '.globl $foo' and '.def @feat.00', which would normally
3336    // be separate tokens. At this level, we have already lexed so we cannot
3337    // (currently) handle this as a context dependent token, instead we detect
3338    // adjacent tokens and return the combined identifier.
3339    if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3340      SMLoc PrefixLoc = getLexer().getLoc();
3341  
3342      // Consume the prefix character, and check for a following identifier.
3343  
3344      AsmToken nextTok = peekTok(false);
3345  
3346      if (nextTok.isNot(AsmToken::Identifier))
3347        return true;
3348  
3349      // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3350      if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3351        return true;
3352  
3353      // eat $ or @
3354      Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3355      // Construct the joined identifier and consume the token.
3356      Res =
3357          StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3358      Lex(); // Parser Lex to maintain invariants.
3359      return false;
3360    }
3361  
3362    if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3363      return true;
3364  
3365    Res = getTok().getIdentifier();
3366  
3367    // Consume the identifier token - but if parsing certain directives, avoid
3368    // lexical expansion of the next token.
3369    ExpandKind ExpandNextToken = ExpandMacros;
3370    if (Position == StartOfStatement &&
3371        StringSwitch<bool>(Res)
3372            .CaseLower("echo", true)
3373            .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3374            .Default(false)) {
3375      ExpandNextToken = DoNotExpandMacros;
3376    }
3377    Lex(ExpandNextToken);
3378  
3379    return false;
3380  }
3381  
3382  /// parseDirectiveEquate:
3383  ///  ::= name "=" expression
3384  ///    | name "equ" expression    (not redefinable)
3385  ///    | name "equ" text-list
3386  ///    | name "textequ" text-list (redefinability unspecified)
parseDirectiveEquate(StringRef IDVal,StringRef Name,DirectiveKind DirKind,SMLoc NameLoc)3387  bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3388                                        DirectiveKind DirKind, SMLoc NameLoc) {
3389    auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3390    if (BuiltinIt != BuiltinSymbolMap.end())
3391      return Error(NameLoc, "cannot redefine a built-in symbol");
3392  
3393    Variable &Var = Variables[Name.lower()];
3394    if (Var.Name.empty()) {
3395      Var.Name = Name;
3396    }
3397  
3398    SMLoc StartLoc = Lexer.getLoc();
3399    if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3400      // "equ" and "textequ" both allow text expressions.
3401      std::string Value;
3402      std::string TextItem;
3403      if (!parseTextItem(TextItem)) {
3404        Value += TextItem;
3405  
3406        // Accept a text-list, not just one text-item.
3407        auto parseItem = [&]() -> bool {
3408          if (parseTextItem(TextItem))
3409            return TokError("expected text item");
3410          Value += TextItem;
3411          return false;
3412        };
3413        if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3414          return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3415  
3416        if (!Var.IsText || Var.TextValue != Value) {
3417          switch (Var.Redefinable) {
3418          case Variable::NOT_REDEFINABLE:
3419            return Error(getTok().getLoc(), "invalid variable redefinition");
3420          case Variable::WARN_ON_REDEFINITION:
3421            if (Warning(NameLoc, "redefining '" + Name +
3422                                     "', already defined on the command line")) {
3423              return true;
3424            }
3425            break;
3426          default:
3427            break;
3428          }
3429        }
3430        Var.IsText = true;
3431        Var.TextValue = Value;
3432        Var.Redefinable = Variable::REDEFINABLE;
3433  
3434        return false;
3435      }
3436    }
3437    if (DirKind == DK_TEXTEQU)
3438      return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3439  
3440    // Parse as expression assignment.
3441    const MCExpr *Expr;
3442    SMLoc EndLoc;
3443    if (parseExpression(Expr, EndLoc))
3444      return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3445    StringRef ExprAsString = StringRef(
3446        StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3447  
3448    int64_t Value;
3449    if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3450      if (DirKind == DK_ASSIGN)
3451        return Error(
3452            StartLoc,
3453            "expected absolute expression; not all symbols have known values",
3454            {StartLoc, EndLoc});
3455  
3456      // Not an absolute expression; define as a text replacement.
3457      if (!Var.IsText || Var.TextValue != ExprAsString) {
3458        switch (Var.Redefinable) {
3459        case Variable::NOT_REDEFINABLE:
3460          return Error(getTok().getLoc(), "invalid variable redefinition");
3461        case Variable::WARN_ON_REDEFINITION:
3462          if (Warning(NameLoc, "redefining '" + Name +
3463                                   "', already defined on the command line")) {
3464            return true;
3465          }
3466          break;
3467        default:
3468          break;
3469        }
3470      }
3471  
3472      Var.IsText = true;
3473      Var.TextValue = ExprAsString.str();
3474      Var.Redefinable = Variable::REDEFINABLE;
3475  
3476      return false;
3477    }
3478  
3479    MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3480  
3481    const MCConstantExpr *PrevValue =
3482        Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3483                                Sym->getVariableValue(/*SetUsed=*/false))
3484                          : nullptr;
3485    if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3486      switch (Var.Redefinable) {
3487      case Variable::NOT_REDEFINABLE:
3488        return Error(getTok().getLoc(), "invalid variable redefinition");
3489      case Variable::WARN_ON_REDEFINITION:
3490        if (Warning(NameLoc, "redefining '" + Name +
3491                                 "', already defined on the command line")) {
3492          return true;
3493        }
3494        break;
3495      default:
3496        break;
3497      }
3498    }
3499  
3500    Var.IsText = false;
3501    Var.TextValue.clear();
3502    Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3503                                             : Variable::NOT_REDEFINABLE;
3504  
3505    Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3506    Sym->setVariableValue(Expr);
3507    Sym->setExternal(false);
3508  
3509    return false;
3510  }
3511  
parseEscapedString(std::string & Data)3512  bool MasmParser::parseEscapedString(std::string &Data) {
3513    if (check(getTok().isNot(AsmToken::String), "expected string"))
3514      return true;
3515  
3516    Data = "";
3517    char Quote = getTok().getString().front();
3518    StringRef Str = getTok().getStringContents();
3519    Data.reserve(Str.size());
3520    for (size_t i = 0, e = Str.size(); i != e; ++i) {
3521      Data.push_back(Str[i]);
3522      if (Str[i] == Quote) {
3523        // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3524        // If we're escaping the string's trailing delimiter, we're definitely
3525        // missing a quotation mark.
3526        if (i + 1 == Str.size())
3527          return Error(getTok().getLoc(), "missing quotation mark in string");
3528        if (Str[i + 1] == Quote)
3529          ++i;
3530      }
3531    }
3532  
3533    Lex();
3534    return false;
3535  }
3536  
parseAngleBracketString(std::string & Data)3537  bool MasmParser::parseAngleBracketString(std::string &Data) {
3538    SMLoc EndLoc, StartLoc = getTok().getLoc();
3539    if (isAngleBracketString(StartLoc, EndLoc)) {
3540      const char *StartChar = StartLoc.getPointer() + 1;
3541      const char *EndChar = EndLoc.getPointer() - 1;
3542      jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3543      // Eat from '<' to '>'.
3544      Lex();
3545  
3546      Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3547      return false;
3548    }
3549    return true;
3550  }
3551  
3552  /// textItem ::= textLiteral | textMacroID | % constExpr
parseTextItem(std::string & Data)3553  bool MasmParser::parseTextItem(std::string &Data) {
3554    switch (getTok().getKind()) {
3555    default:
3556      return true;
3557    case AsmToken::Percent: {
3558      int64_t Res;
3559      if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3560        return true;
3561      Data = std::to_string(Res);
3562      return false;
3563    }
3564    case AsmToken::Less:
3565    case AsmToken::LessEqual:
3566    case AsmToken::LessLess:
3567    case AsmToken::LessGreater:
3568      return parseAngleBracketString(Data);
3569    case AsmToken::Identifier: {
3570      // This must be a text macro; we need to expand it accordingly.
3571      StringRef ID;
3572      SMLoc StartLoc = getTok().getLoc();
3573      if (parseIdentifier(ID))
3574        return true;
3575      Data = ID.str();
3576  
3577      bool Expanded = false;
3578      while (true) {
3579        // Try to resolve as a built-in text macro
3580        auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3581        if (BuiltinIt != BuiltinSymbolMap.end()) {
3582          std::optional<std::string> BuiltinText =
3583              evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3584          if (!BuiltinText) {
3585            // Not a text macro; break without substituting
3586            break;
3587          }
3588          Data = std::move(*BuiltinText);
3589          ID = StringRef(Data);
3590          Expanded = true;
3591          continue;
3592        }
3593  
3594        // Try to resolve as a variable text macro
3595        auto VarIt = Variables.find(ID.lower());
3596        if (VarIt != Variables.end()) {
3597          const Variable &Var = VarIt->getValue();
3598          if (!Var.IsText) {
3599            // Not a text macro; break without substituting
3600            break;
3601          }
3602          Data = Var.TextValue;
3603          ID = StringRef(Data);
3604          Expanded = true;
3605          continue;
3606        }
3607  
3608        break;
3609      }
3610  
3611      if (!Expanded) {
3612        // Not a text macro; not usable in TextItem context. Since we haven't used
3613        // the token, put it back for better error recovery.
3614        getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3615        return true;
3616      }
3617      return false;
3618    }
3619    }
3620    llvm_unreachable("unhandled token kind");
3621  }
3622  
3623  /// parseDirectiveAscii:
3624  ///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
parseDirectiveAscii(StringRef IDVal,bool ZeroTerminated)3625  bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3626    auto parseOp = [&]() -> bool {
3627      std::string Data;
3628      if (checkForValidSection() || parseEscapedString(Data))
3629        return true;
3630      getStreamer().emitBytes(Data);
3631      if (ZeroTerminated)
3632        getStreamer().emitBytes(StringRef("\0", 1));
3633      return false;
3634    };
3635  
3636    if (parseMany(parseOp))
3637      return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3638    return false;
3639  }
3640  
emitIntValue(const MCExpr * Value,unsigned Size)3641  bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3642    // Special case constant expressions to match code generator.
3643    if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3644      assert(Size <= 8 && "Invalid size");
3645      int64_t IntValue = MCE->getValue();
3646      if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3647        return Error(MCE->getLoc(), "out of range literal value");
3648      getStreamer().emitIntValue(IntValue, Size);
3649    } else {
3650      const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3651      if (MSE && MSE->getSymbol().getName() == "?") {
3652        // ? initializer; treat as 0.
3653        getStreamer().emitIntValue(0, Size);
3654      } else {
3655        getStreamer().emitValue(Value, Size, Value->getLoc());
3656      }
3657    }
3658    return false;
3659  }
3660  
parseScalarInitializer(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,unsigned StringPadLength)3661  bool MasmParser::parseScalarInitializer(unsigned Size,
3662                                          SmallVectorImpl<const MCExpr *> &Values,
3663                                          unsigned StringPadLength) {
3664    if (Size == 1 && getTok().is(AsmToken::String)) {
3665      std::string Value;
3666      if (parseEscapedString(Value))
3667        return true;
3668      // Treat each character as an initializer.
3669      for (const unsigned char CharVal : Value)
3670        Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3671  
3672      // Pad the string with spaces to the specified length.
3673      for (size_t i = Value.size(); i < StringPadLength; ++i)
3674        Values.push_back(MCConstantExpr::create(' ', getContext()));
3675    } else {
3676      const MCExpr *Value;
3677      if (parseExpression(Value))
3678        return true;
3679      if (getTok().is(AsmToken::Identifier) &&
3680          getTok().getString().equals_insensitive("dup")) {
3681        Lex(); // Eat 'dup'.
3682        const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3683        if (!MCE)
3684          return Error(Value->getLoc(),
3685                       "cannot repeat value a non-constant number of times");
3686        const int64_t Repetitions = MCE->getValue();
3687        if (Repetitions < 0)
3688          return Error(Value->getLoc(),
3689                       "cannot repeat value a negative number of times");
3690  
3691        SmallVector<const MCExpr *, 1> DuplicatedValues;
3692        if (parseToken(AsmToken::LParen,
3693                       "parentheses required for 'dup' contents") ||
3694            parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3695          return true;
3696  
3697        for (int i = 0; i < Repetitions; ++i)
3698          Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3699      } else {
3700        Values.push_back(Value);
3701      }
3702    }
3703    return false;
3704  }
3705  
parseScalarInstList(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,const AsmToken::TokenKind EndToken)3706  bool MasmParser::parseScalarInstList(unsigned Size,
3707                                       SmallVectorImpl<const MCExpr *> &Values,
3708                                       const AsmToken::TokenKind EndToken) {
3709    while (getTok().isNot(EndToken) &&
3710           (EndToken != AsmToken::Greater ||
3711            getTok().isNot(AsmToken::GreaterGreater))) {
3712      parseScalarInitializer(Size, Values);
3713  
3714      // If we see a comma, continue, and allow line continuation.
3715      if (!parseOptionalToken(AsmToken::Comma))
3716        break;
3717      parseOptionalToken(AsmToken::EndOfStatement);
3718    }
3719    return false;
3720  }
3721  
emitIntegralValues(unsigned Size,unsigned * Count)3722  bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3723    SmallVector<const MCExpr *, 1> Values;
3724    if (checkForValidSection() || parseScalarInstList(Size, Values))
3725      return true;
3726  
3727    for (const auto *Value : Values) {
3728      emitIntValue(Value, Size);
3729    }
3730    if (Count)
3731      *Count = Values.size();
3732    return false;
3733  }
3734  
3735  // Add a field to the current structure.
addIntegralField(StringRef Name,unsigned Size)3736  bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3737    StructInfo &Struct = StructInProgress.back();
3738    FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3739    IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3740  
3741    Field.Type = Size;
3742  
3743    if (parseScalarInstList(Size, IntInfo.Values))
3744      return true;
3745  
3746    Field.SizeOf = Field.Type * IntInfo.Values.size();
3747    Field.LengthOf = IntInfo.Values.size();
3748    const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3749    if (!Struct.IsUnion) {
3750      Struct.NextOffset = FieldEnd;
3751    }
3752    Struct.Size = std::max(Struct.Size, FieldEnd);
3753    return false;
3754  }
3755  
3756  /// parseDirectiveValue
3757  ///  ::= (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveValue(StringRef IDVal,unsigned Size)3758  bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3759    if (StructInProgress.empty()) {
3760      // Initialize data value.
3761      if (emitIntegralValues(Size))
3762        return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3763    } else if (addIntegralField("", Size)) {
3764      return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3765    }
3766  
3767    return false;
3768  }
3769  
3770  /// parseDirectiveNamedValue
3771  ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedValue(StringRef TypeName,unsigned Size,StringRef Name,SMLoc NameLoc)3772  bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3773                                            StringRef Name, SMLoc NameLoc) {
3774    if (StructInProgress.empty()) {
3775      // Initialize named data value.
3776      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3777      getStreamer().emitLabel(Sym);
3778      unsigned Count;
3779      if (emitIntegralValues(Size, &Count))
3780        return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3781  
3782      AsmTypeInfo Type;
3783      Type.Name = TypeName;
3784      Type.Size = Size * Count;
3785      Type.ElementSize = Size;
3786      Type.Length = Count;
3787      KnownType[Name.lower()] = Type;
3788    } else if (addIntegralField(Name, Size)) {
3789      return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3790    }
3791  
3792    return false;
3793  }
3794  
parseHexOcta(MasmParser & Asm,uint64_t & hi,uint64_t & lo)3795  static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3796    if (Asm.getTok().isNot(AsmToken::Integer) &&
3797        Asm.getTok().isNot(AsmToken::BigNum))
3798      return Asm.TokError("unknown token in expression");
3799    SMLoc ExprLoc = Asm.getTok().getLoc();
3800    APInt IntValue = Asm.getTok().getAPIntVal();
3801    Asm.Lex();
3802    if (!IntValue.isIntN(128))
3803      return Asm.Error(ExprLoc, "out of range literal value");
3804    if (!IntValue.isIntN(64)) {
3805      hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3806      lo = IntValue.getLoBits(64).getZExtValue();
3807    } else {
3808      hi = 0;
3809      lo = IntValue.getZExtValue();
3810    }
3811    return false;
3812  }
3813  
parseRealValue(const fltSemantics & Semantics,APInt & Res)3814  bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3815    // We don't truly support arithmetic on floating point expressions, so we
3816    // have to manually parse unary prefixes.
3817    bool IsNeg = false;
3818    SMLoc SignLoc;
3819    if (getLexer().is(AsmToken::Minus)) {
3820      SignLoc = getLexer().getLoc();
3821      Lexer.Lex();
3822      IsNeg = true;
3823    } else if (getLexer().is(AsmToken::Plus)) {
3824      SignLoc = getLexer().getLoc();
3825      Lexer.Lex();
3826    }
3827  
3828    if (Lexer.is(AsmToken::Error))
3829      return TokError(Lexer.getErr());
3830    if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3831        Lexer.isNot(AsmToken::Identifier))
3832      return TokError("unexpected token in directive");
3833  
3834    // Convert to an APFloat.
3835    APFloat Value(Semantics);
3836    StringRef IDVal = getTok().getString();
3837    if (getLexer().is(AsmToken::Identifier)) {
3838      if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3839        Value = APFloat::getInf(Semantics);
3840      else if (IDVal.equals_insensitive("nan"))
3841        Value = APFloat::getNaN(Semantics, false, ~0);
3842      else if (IDVal.equals_insensitive("?"))
3843        Value = APFloat::getZero(Semantics);
3844      else
3845        return TokError("invalid floating point literal");
3846    } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3847      // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3848      // To match ML64.exe, ignore the initial sign.
3849      unsigned SizeInBits = Value.getSizeInBits(Semantics);
3850      if (SizeInBits != (IDVal.size() << 2))
3851        return TokError("invalid floating point literal");
3852  
3853      // Consume the numeric token.
3854      Lex();
3855  
3856      Res = APInt(SizeInBits, IDVal, 16);
3857      if (SignLoc.isValid())
3858        return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3859      return false;
3860    } else if (errorToBool(
3861                   Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3862                       .takeError())) {
3863      return TokError("invalid floating point literal");
3864    }
3865    if (IsNeg)
3866      Value.changeSign();
3867  
3868    // Consume the numeric token.
3869    Lex();
3870  
3871    Res = Value.bitcastToAPInt();
3872  
3873    return false;
3874  }
3875  
parseRealInstList(const fltSemantics & Semantics,SmallVectorImpl<APInt> & ValuesAsInt,const AsmToken::TokenKind EndToken)3876  bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3877                                     SmallVectorImpl<APInt> &ValuesAsInt,
3878                                     const AsmToken::TokenKind EndToken) {
3879    while (getTok().isNot(EndToken) ||
3880           (EndToken == AsmToken::Greater &&
3881            getTok().isNot(AsmToken::GreaterGreater))) {
3882      const AsmToken NextTok = peekTok();
3883      if (NextTok.is(AsmToken::Identifier) &&
3884          NextTok.getString().equals_insensitive("dup")) {
3885        const MCExpr *Value;
3886        if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3887          return true;
3888        const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3889        if (!MCE)
3890          return Error(Value->getLoc(),
3891                       "cannot repeat value a non-constant number of times");
3892        const int64_t Repetitions = MCE->getValue();
3893        if (Repetitions < 0)
3894          return Error(Value->getLoc(),
3895                       "cannot repeat value a negative number of times");
3896  
3897        SmallVector<APInt, 1> DuplicatedValues;
3898        if (parseToken(AsmToken::LParen,
3899                       "parentheses required for 'dup' contents") ||
3900            parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3901          return true;
3902  
3903        for (int i = 0; i < Repetitions; ++i)
3904          ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3905      } else {
3906        APInt AsInt;
3907        if (parseRealValue(Semantics, AsInt))
3908          return true;
3909        ValuesAsInt.push_back(AsInt);
3910      }
3911  
3912      // Continue if we see a comma. (Also, allow line continuation.)
3913      if (!parseOptionalToken(AsmToken::Comma))
3914        break;
3915      parseOptionalToken(AsmToken::EndOfStatement);
3916    }
3917  
3918    return false;
3919  }
3920  
3921  // Initialize real data values.
emitRealValues(const fltSemantics & Semantics,unsigned * Count)3922  bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3923                                  unsigned *Count) {
3924    if (checkForValidSection())
3925      return true;
3926  
3927    SmallVector<APInt, 1> ValuesAsInt;
3928    if (parseRealInstList(Semantics, ValuesAsInt))
3929      return true;
3930  
3931    for (const APInt &AsInt : ValuesAsInt) {
3932      getStreamer().emitIntValue(AsInt);
3933    }
3934    if (Count)
3935      *Count = ValuesAsInt.size();
3936    return false;
3937  }
3938  
3939  // Add a real field to the current struct.
addRealField(StringRef Name,const fltSemantics & Semantics,size_t Size)3940  bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3941                                size_t Size) {
3942    StructInfo &Struct = StructInProgress.back();
3943    FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3944    RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3945  
3946    Field.SizeOf = 0;
3947  
3948    if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3949      return true;
3950  
3951    Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3952    Field.LengthOf = RealInfo.AsIntValues.size();
3953    Field.SizeOf = Field.Type * Field.LengthOf;
3954  
3955    const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3956    if (!Struct.IsUnion) {
3957      Struct.NextOffset = FieldEnd;
3958    }
3959    Struct.Size = std::max(Struct.Size, FieldEnd);
3960    return false;
3961  }
3962  
3963  /// parseDirectiveRealValue
3964  ///  ::= (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveRealValue(StringRef IDVal,const fltSemantics & Semantics,size_t Size)3965  bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3966                                           const fltSemantics &Semantics,
3967                                           size_t Size) {
3968    if (StructInProgress.empty()) {
3969      // Initialize data value.
3970      if (emitRealValues(Semantics))
3971        return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3972    } else if (addRealField("", Semantics, Size)) {
3973      return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3974    }
3975    return false;
3976  }
3977  
3978  /// parseDirectiveNamedRealValue
3979  ///  ::= name (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveNamedRealValue(StringRef TypeName,const fltSemantics & Semantics,unsigned Size,StringRef Name,SMLoc NameLoc)3980  bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3981                                                const fltSemantics &Semantics,
3982                                                unsigned Size, StringRef Name,
3983                                                SMLoc NameLoc) {
3984    if (StructInProgress.empty()) {
3985      // Initialize named data value.
3986      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3987      getStreamer().emitLabel(Sym);
3988      unsigned Count;
3989      if (emitRealValues(Semantics, &Count))
3990        return addErrorSuffix(" in '" + TypeName + "' directive");
3991  
3992      AsmTypeInfo Type;
3993      Type.Name = TypeName;
3994      Type.Size = Size * Count;
3995      Type.ElementSize = Size;
3996      Type.Length = Count;
3997      KnownType[Name.lower()] = Type;
3998    } else if (addRealField(Name, Semantics, Size)) {
3999      return addErrorSuffix(" in '" + TypeName + "' directive");
4000    }
4001    return false;
4002  }
4003  
parseOptionalAngleBracketOpen()4004  bool MasmParser::parseOptionalAngleBracketOpen() {
4005    const AsmToken Tok = getTok();
4006    if (parseOptionalToken(AsmToken::LessLess)) {
4007      AngleBracketDepth++;
4008      Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4009      return true;
4010    } else if (parseOptionalToken(AsmToken::LessGreater)) {
4011      AngleBracketDepth++;
4012      Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4013      return true;
4014    } else if (parseOptionalToken(AsmToken::Less)) {
4015      AngleBracketDepth++;
4016      return true;
4017    }
4018  
4019    return false;
4020  }
4021  
parseAngleBracketClose(const Twine & Msg)4022  bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4023    const AsmToken Tok = getTok();
4024    if (parseOptionalToken(AsmToken::GreaterGreater)) {
4025      Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4026    } else if (parseToken(AsmToken::Greater, Msg)) {
4027      return true;
4028    }
4029    AngleBracketDepth--;
4030    return false;
4031  }
4032  
parseFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,FieldInitializer & Initializer)4033  bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4034                                         const IntFieldInfo &Contents,
4035                                         FieldInitializer &Initializer) {
4036    SMLoc Loc = getTok().getLoc();
4037  
4038    SmallVector<const MCExpr *, 1> Values;
4039    if (parseOptionalToken(AsmToken::LCurly)) {
4040      if (Field.LengthOf == 1 && Field.Type > 1)
4041        return Error(Loc, "Cannot initialize scalar field with array value");
4042      if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4043          parseToken(AsmToken::RCurly))
4044        return true;
4045    } else if (parseOptionalAngleBracketOpen()) {
4046      if (Field.LengthOf == 1 && Field.Type > 1)
4047        return Error(Loc, "Cannot initialize scalar field with array value");
4048      if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4049          parseAngleBracketClose())
4050        return true;
4051    } else if (Field.LengthOf > 1 && Field.Type > 1) {
4052      return Error(Loc, "Cannot initialize array field with scalar value");
4053    } else if (parseScalarInitializer(Field.Type, Values,
4054                                      /*StringPadLength=*/Field.LengthOf)) {
4055      return true;
4056    }
4057  
4058    if (Values.size() > Field.LengthOf) {
4059      return Error(Loc, "Initializer too long for field; expected at most " +
4060                            std::to_string(Field.LengthOf) + " elements, got " +
4061                            std::to_string(Values.size()));
4062    }
4063    // Default-initialize all remaining values.
4064    Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4065  
4066    Initializer = FieldInitializer(std::move(Values));
4067    return false;
4068  }
4069  
parseFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,FieldInitializer & Initializer)4070  bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4071                                         const RealFieldInfo &Contents,
4072                                         FieldInitializer &Initializer) {
4073    const fltSemantics *Semantics;
4074    switch (Field.Type) {
4075    case 4:
4076      Semantics = &APFloat::IEEEsingle();
4077      break;
4078    case 8:
4079      Semantics = &APFloat::IEEEdouble();
4080      break;
4081    case 10:
4082      Semantics = &APFloat::x87DoubleExtended();
4083      break;
4084    default:
4085      llvm_unreachable("unknown real field type");
4086    }
4087  
4088    SMLoc Loc = getTok().getLoc();
4089  
4090    SmallVector<APInt, 1> AsIntValues;
4091    if (parseOptionalToken(AsmToken::LCurly)) {
4092      if (Field.LengthOf == 1)
4093        return Error(Loc, "Cannot initialize scalar field with array value");
4094      if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4095          parseToken(AsmToken::RCurly))
4096        return true;
4097    } else if (parseOptionalAngleBracketOpen()) {
4098      if (Field.LengthOf == 1)
4099        return Error(Loc, "Cannot initialize scalar field with array value");
4100      if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4101          parseAngleBracketClose())
4102        return true;
4103    } else if (Field.LengthOf > 1) {
4104      return Error(Loc, "Cannot initialize array field with scalar value");
4105    } else {
4106      AsIntValues.emplace_back();
4107      if (parseRealValue(*Semantics, AsIntValues.back()))
4108        return true;
4109    }
4110  
4111    if (AsIntValues.size() > Field.LengthOf) {
4112      return Error(Loc, "Initializer too long for field; expected at most " +
4113                            std::to_string(Field.LengthOf) + " elements, got " +
4114                            std::to_string(AsIntValues.size()));
4115    }
4116    // Default-initialize all remaining values.
4117    AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4118                       Contents.AsIntValues.end());
4119  
4120    Initializer = FieldInitializer(std::move(AsIntValues));
4121    return false;
4122  }
4123  
parseFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,FieldInitializer & Initializer)4124  bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4125                                         const StructFieldInfo &Contents,
4126                                         FieldInitializer &Initializer) {
4127    SMLoc Loc = getTok().getLoc();
4128  
4129    std::vector<StructInitializer> Initializers;
4130    if (Field.LengthOf > 1) {
4131      if (parseOptionalToken(AsmToken::LCurly)) {
4132        if (parseStructInstList(Contents.Structure, Initializers,
4133                                AsmToken::RCurly) ||
4134            parseToken(AsmToken::RCurly))
4135          return true;
4136      } else if (parseOptionalAngleBracketOpen()) {
4137        if (parseStructInstList(Contents.Structure, Initializers,
4138                                AsmToken::Greater) ||
4139            parseAngleBracketClose())
4140          return true;
4141      } else {
4142        return Error(Loc, "Cannot initialize array field with scalar value");
4143      }
4144    } else {
4145      Initializers.emplace_back();
4146      if (parseStructInitializer(Contents.Structure, Initializers.back()))
4147        return true;
4148    }
4149  
4150    if (Initializers.size() > Field.LengthOf) {
4151      return Error(Loc, "Initializer too long for field; expected at most " +
4152                            std::to_string(Field.LengthOf) + " elements, got " +
4153                            std::to_string(Initializers.size()));
4154    }
4155    // Default-initialize all remaining values.
4156    Initializers.insert(Initializers.end(),
4157                        Contents.Initializers.begin() + Initializers.size(),
4158                        Contents.Initializers.end());
4159  
4160    Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4161    return false;
4162  }
4163  
parseFieldInitializer(const FieldInfo & Field,FieldInitializer & Initializer)4164  bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4165                                         FieldInitializer &Initializer) {
4166    switch (Field.Contents.FT) {
4167    case FT_INTEGRAL:
4168      return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4169    case FT_REAL:
4170      return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4171    case FT_STRUCT:
4172      return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4173    }
4174    llvm_unreachable("Unhandled FieldType enum");
4175  }
4176  
parseStructInitializer(const StructInfo & Structure,StructInitializer & Initializer)4177  bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4178                                          StructInitializer &Initializer) {
4179    const AsmToken FirstToken = getTok();
4180  
4181    std::optional<AsmToken::TokenKind> EndToken;
4182    if (parseOptionalToken(AsmToken::LCurly)) {
4183      EndToken = AsmToken::RCurly;
4184    } else if (parseOptionalAngleBracketOpen()) {
4185      EndToken = AsmToken::Greater;
4186      AngleBracketDepth++;
4187    } else if (FirstToken.is(AsmToken::Identifier) &&
4188               FirstToken.getString() == "?") {
4189      // ? initializer; leave EndToken uninitialized to treat as empty.
4190      if (parseToken(AsmToken::Identifier))
4191        return true;
4192    } else {
4193      return Error(FirstToken.getLoc(), "Expected struct initializer");
4194    }
4195  
4196    auto &FieldInitializers = Initializer.FieldInitializers;
4197    size_t FieldIndex = 0;
4198    if (EndToken) {
4199      // Initialize all fields with given initializers.
4200      while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
4201        const FieldInfo &Field = Structure.Fields[FieldIndex++];
4202        if (parseOptionalToken(AsmToken::Comma)) {
4203          // Empty initializer; use the default and continue. (Also, allow line
4204          // continuation.)
4205          FieldInitializers.push_back(Field.Contents);
4206          parseOptionalToken(AsmToken::EndOfStatement);
4207          continue;
4208        }
4209        FieldInitializers.emplace_back(Field.Contents.FT);
4210        if (parseFieldInitializer(Field, FieldInitializers.back()))
4211          return true;
4212  
4213        // Continue if we see a comma. (Also, allow line continuation.)
4214        SMLoc CommaLoc = getTok().getLoc();
4215        if (!parseOptionalToken(AsmToken::Comma))
4216          break;
4217        if (FieldIndex == Structure.Fields.size())
4218          return Error(CommaLoc, "'" + Structure.Name +
4219                                     "' initializer initializes too many fields");
4220        parseOptionalToken(AsmToken::EndOfStatement);
4221      }
4222    }
4223    // Default-initialize all remaining fields.
4224    for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4225      FieldInitializers.push_back(Field.Contents);
4226  
4227    if (EndToken) {
4228      if (*EndToken == AsmToken::Greater)
4229        return parseAngleBracketClose();
4230  
4231      return parseToken(*EndToken);
4232    }
4233  
4234    return false;
4235  }
4236  
parseStructInstList(const StructInfo & Structure,std::vector<StructInitializer> & Initializers,const AsmToken::TokenKind EndToken)4237  bool MasmParser::parseStructInstList(
4238      const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4239      const AsmToken::TokenKind EndToken) {
4240    while (getTok().isNot(EndToken) ||
4241           (EndToken == AsmToken::Greater &&
4242            getTok().isNot(AsmToken::GreaterGreater))) {
4243      const AsmToken NextTok = peekTok();
4244      if (NextTok.is(AsmToken::Identifier) &&
4245          NextTok.getString().equals_insensitive("dup")) {
4246        const MCExpr *Value;
4247        if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4248          return true;
4249        const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4250        if (!MCE)
4251          return Error(Value->getLoc(),
4252                       "cannot repeat value a non-constant number of times");
4253        const int64_t Repetitions = MCE->getValue();
4254        if (Repetitions < 0)
4255          return Error(Value->getLoc(),
4256                       "cannot repeat value a negative number of times");
4257  
4258        std::vector<StructInitializer> DuplicatedValues;
4259        if (parseToken(AsmToken::LParen,
4260                       "parentheses required for 'dup' contents") ||
4261            parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4262          return true;
4263  
4264        for (int i = 0; i < Repetitions; ++i)
4265          llvm::append_range(Initializers, DuplicatedValues);
4266      } else {
4267        Initializers.emplace_back();
4268        if (parseStructInitializer(Structure, Initializers.back()))
4269          return true;
4270      }
4271  
4272      // Continue if we see a comma. (Also, allow line continuation.)
4273      if (!parseOptionalToken(AsmToken::Comma))
4274        break;
4275      parseOptionalToken(AsmToken::EndOfStatement);
4276    }
4277  
4278    return false;
4279  }
4280  
emitFieldValue(const FieldInfo & Field,const IntFieldInfo & Contents)4281  bool MasmParser::emitFieldValue(const FieldInfo &Field,
4282                                  const IntFieldInfo &Contents) {
4283    // Default-initialize all values.
4284    for (const MCExpr *Value : Contents.Values) {
4285      if (emitIntValue(Value, Field.Type))
4286        return true;
4287    }
4288    return false;
4289  }
4290  
emitFieldValue(const FieldInfo & Field,const RealFieldInfo & Contents)4291  bool MasmParser::emitFieldValue(const FieldInfo &Field,
4292                                  const RealFieldInfo &Contents) {
4293    for (const APInt &AsInt : Contents.AsIntValues) {
4294      getStreamer().emitIntValue(AsInt.getLimitedValue(),
4295                                 AsInt.getBitWidth() / 8);
4296    }
4297    return false;
4298  }
4299  
emitFieldValue(const FieldInfo & Field,const StructFieldInfo & Contents)4300  bool MasmParser::emitFieldValue(const FieldInfo &Field,
4301                                  const StructFieldInfo &Contents) {
4302    for (const auto &Initializer : Contents.Initializers) {
4303      size_t Index = 0, Offset = 0;
4304      for (const auto &SubField : Contents.Structure.Fields) {
4305        getStreamer().emitZeros(SubField.Offset - Offset);
4306        Offset = SubField.Offset + SubField.SizeOf;
4307        emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4308      }
4309    }
4310    return false;
4311  }
4312  
emitFieldValue(const FieldInfo & Field)4313  bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4314    switch (Field.Contents.FT) {
4315    case FT_INTEGRAL:
4316      return emitFieldValue(Field, Field.Contents.IntInfo);
4317    case FT_REAL:
4318      return emitFieldValue(Field, Field.Contents.RealInfo);
4319    case FT_STRUCT:
4320      return emitFieldValue(Field, Field.Contents.StructInfo);
4321    }
4322    llvm_unreachable("Unhandled FieldType enum");
4323  }
4324  
emitFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,const IntFieldInfo & Initializer)4325  bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4326                                        const IntFieldInfo &Contents,
4327                                        const IntFieldInfo &Initializer) {
4328    for (const auto &Value : Initializer.Values) {
4329      if (emitIntValue(Value, Field.Type))
4330        return true;
4331    }
4332    // Default-initialize all remaining values.
4333    for (const auto &Value :
4334             llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4335      if (emitIntValue(Value, Field.Type))
4336        return true;
4337    }
4338    return false;
4339  }
4340  
emitFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,const RealFieldInfo & Initializer)4341  bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4342                                        const RealFieldInfo &Contents,
4343                                        const RealFieldInfo &Initializer) {
4344    for (const auto &AsInt : Initializer.AsIntValues) {
4345      getStreamer().emitIntValue(AsInt.getLimitedValue(),
4346                                 AsInt.getBitWidth() / 8);
4347    }
4348    // Default-initialize all remaining values.
4349    for (const auto &AsInt :
4350         llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4351      getStreamer().emitIntValue(AsInt.getLimitedValue(),
4352                                 AsInt.getBitWidth() / 8);
4353    }
4354    return false;
4355  }
4356  
emitFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,const StructFieldInfo & Initializer)4357  bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4358                                        const StructFieldInfo &Contents,
4359                                        const StructFieldInfo &Initializer) {
4360    for (const auto &Init : Initializer.Initializers) {
4361      if (emitStructInitializer(Contents.Structure, Init))
4362        return true;
4363    }
4364    // Default-initialize all remaining values.
4365    for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4366                                             Initializer.Initializers.size())) {
4367      if (emitStructInitializer(Contents.Structure, Init))
4368        return true;
4369    }
4370    return false;
4371  }
4372  
emitFieldInitializer(const FieldInfo & Field,const FieldInitializer & Initializer)4373  bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4374                                        const FieldInitializer &Initializer) {
4375    switch (Field.Contents.FT) {
4376    case FT_INTEGRAL:
4377      return emitFieldInitializer(Field, Field.Contents.IntInfo,
4378                                  Initializer.IntInfo);
4379    case FT_REAL:
4380      return emitFieldInitializer(Field, Field.Contents.RealInfo,
4381                                  Initializer.RealInfo);
4382    case FT_STRUCT:
4383      return emitFieldInitializer(Field, Field.Contents.StructInfo,
4384                                  Initializer.StructInfo);
4385    }
4386    llvm_unreachable("Unhandled FieldType enum");
4387  }
4388  
emitStructInitializer(const StructInfo & Structure,const StructInitializer & Initializer)4389  bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4390                                         const StructInitializer &Initializer) {
4391    if (!Structure.Initializable)
4392      return Error(getLexer().getLoc(),
4393                   "cannot initialize a value of type '" + Structure.Name +
4394                       "'; 'org' was used in the type's declaration");
4395    size_t Index = 0, Offset = 0;
4396    for (const auto &Init : Initializer.FieldInitializers) {
4397      const auto &Field = Structure.Fields[Index++];
4398      getStreamer().emitZeros(Field.Offset - Offset);
4399      Offset = Field.Offset + Field.SizeOf;
4400      if (emitFieldInitializer(Field, Init))
4401        return true;
4402    }
4403    // Default-initialize all remaining fields.
4404    for (const auto &Field : llvm::drop_begin(
4405             Structure.Fields, Initializer.FieldInitializers.size())) {
4406      getStreamer().emitZeros(Field.Offset - Offset);
4407      Offset = Field.Offset + Field.SizeOf;
4408      if (emitFieldValue(Field))
4409        return true;
4410    }
4411    // Add final padding.
4412    if (Offset != Structure.Size)
4413      getStreamer().emitZeros(Structure.Size - Offset);
4414    return false;
4415  }
4416  
4417  // Set data values from initializers.
emitStructValues(const StructInfo & Structure,unsigned * Count)4418  bool MasmParser::emitStructValues(const StructInfo &Structure,
4419                                    unsigned *Count) {
4420    std::vector<StructInitializer> Initializers;
4421    if (parseStructInstList(Structure, Initializers))
4422      return true;
4423  
4424    for (const auto &Initializer : Initializers) {
4425      if (emitStructInitializer(Structure, Initializer))
4426        return true;
4427    }
4428  
4429    if (Count)
4430      *Count = Initializers.size();
4431    return false;
4432  }
4433  
4434  // Declare a field in the current struct.
addStructField(StringRef Name,const StructInfo & Structure)4435  bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4436    StructInfo &OwningStruct = StructInProgress.back();
4437    FieldInfo &Field =
4438        OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4439    StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4440  
4441    StructInfo.Structure = Structure;
4442    Field.Type = Structure.Size;
4443  
4444    if (parseStructInstList(Structure, StructInfo.Initializers))
4445      return true;
4446  
4447    Field.LengthOf = StructInfo.Initializers.size();
4448    Field.SizeOf = Field.Type * Field.LengthOf;
4449  
4450    const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4451    if (!OwningStruct.IsUnion) {
4452      OwningStruct.NextOffset = FieldEnd;
4453    }
4454    OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4455  
4456    return false;
4457  }
4458  
4459  /// parseDirectiveStructValue
4460  ///  ::= struct-id (<struct-initializer> | {struct-initializer})
4461  ///                [, (<struct-initializer> | {struct-initializer})]*
parseDirectiveStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc)4462  bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4463                                             StringRef Directive, SMLoc DirLoc) {
4464    if (StructInProgress.empty()) {
4465      if (emitStructValues(Structure))
4466        return true;
4467    } else if (addStructField("", Structure)) {
4468      return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4469    }
4470  
4471    return false;
4472  }
4473  
4474  /// parseDirectiveNamedValue
4475  ///  ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc,StringRef Name)4476  bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4477                                                  StringRef Directive,
4478                                                  SMLoc DirLoc, StringRef Name) {
4479    if (StructInProgress.empty()) {
4480      // Initialize named data value.
4481      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4482      getStreamer().emitLabel(Sym);
4483      unsigned Count;
4484      if (emitStructValues(Structure, &Count))
4485        return true;
4486      AsmTypeInfo Type;
4487      Type.Name = Structure.Name;
4488      Type.Size = Structure.Size * Count;
4489      Type.ElementSize = Structure.Size;
4490      Type.Length = Count;
4491      KnownType[Name.lower()] = Type;
4492    } else if (addStructField(Name, Structure)) {
4493      return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4494    }
4495  
4496    return false;
4497  }
4498  
4499  /// parseDirectiveStruct
4500  ///  ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4501  ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4502  ///      <name> ENDS
4503  ////// dataDir = data declaration
4504  ////// offsetDir = EVEN, ORG, ALIGN
parseDirectiveStruct(StringRef Directive,DirectiveKind DirKind,StringRef Name,SMLoc NameLoc)4505  bool MasmParser::parseDirectiveStruct(StringRef Directive,
4506                                        DirectiveKind DirKind, StringRef Name,
4507                                        SMLoc NameLoc) {
4508    // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4509    // anyway, so all field accesses must be qualified.
4510    AsmToken NextTok = getTok();
4511    int64_t AlignmentValue = 1;
4512    if (NextTok.isNot(AsmToken::Comma) &&
4513        NextTok.isNot(AsmToken::EndOfStatement) &&
4514        parseAbsoluteExpression(AlignmentValue)) {
4515      return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4516                            "' directive");
4517    }
4518    if (!isPowerOf2_64(AlignmentValue)) {
4519      return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4520                                         std::to_string(AlignmentValue));
4521    }
4522  
4523    StringRef Qualifier;
4524    SMLoc QualifierLoc;
4525    if (parseOptionalToken(AsmToken::Comma)) {
4526      QualifierLoc = getTok().getLoc();
4527      if (parseIdentifier(Qualifier))
4528        return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4529      if (!Qualifier.equals_insensitive("nonunique"))
4530        return Error(QualifierLoc, "Unrecognized qualifier for '" +
4531                                       Twine(Directive) +
4532                                       "' directive; expected none or NONUNIQUE");
4533    }
4534  
4535    if (parseEOL())
4536      return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4537  
4538    StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4539    return false;
4540  }
4541  
4542  /// parseDirectiveNestedStruct
4543  ///  ::= (STRUC | STRUCT | UNION) [name]
4544  ///      (dataDir | generalDir | offsetDir | nestedStruct)+
4545  ///      ENDS
parseDirectiveNestedStruct(StringRef Directive,DirectiveKind DirKind)4546  bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4547                                              DirectiveKind DirKind) {
4548    if (StructInProgress.empty())
4549      return TokError("missing name in top-level '" + Twine(Directive) +
4550                      "' directive");
4551  
4552    StringRef Name;
4553    if (getTok().is(AsmToken::Identifier)) {
4554      Name = getTok().getIdentifier();
4555      parseToken(AsmToken::Identifier);
4556    }
4557    if (parseEOL())
4558      return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4559  
4560    // Reserve space to ensure Alignment doesn't get invalidated when
4561    // StructInProgress grows.
4562    StructInProgress.reserve(StructInProgress.size() + 1);
4563    StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4564                                  StructInProgress.back().Alignment);
4565    return false;
4566  }
4567  
parseDirectiveEnds(StringRef Name,SMLoc NameLoc)4568  bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4569    if (StructInProgress.empty())
4570      return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4571    if (StructInProgress.size() > 1)
4572      return Error(NameLoc, "unexpected name in nested ENDS directive");
4573    if (StructInProgress.back().Name.compare_insensitive(Name))
4574      return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4575                                StructInProgress.back().Name + "'");
4576    StructInfo Structure = StructInProgress.pop_back_val();
4577    // Pad to make the structure's size divisible by the smaller of its alignment
4578    // and the size of its largest field.
4579    Structure.Size = llvm::alignTo(
4580        Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4581    Structs[Name.lower()] = Structure;
4582  
4583    if (parseEOL())
4584      return addErrorSuffix(" in ENDS directive");
4585  
4586    return false;
4587  }
4588  
parseDirectiveNestedEnds()4589  bool MasmParser::parseDirectiveNestedEnds() {
4590    if (StructInProgress.empty())
4591      return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4592    if (StructInProgress.size() == 1)
4593      return TokError("missing name in top-level ENDS directive");
4594  
4595    if (parseEOL())
4596      return addErrorSuffix(" in nested ENDS directive");
4597  
4598    StructInfo Structure = StructInProgress.pop_back_val();
4599    // Pad to make the structure's size divisible by its alignment.
4600    Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4601  
4602    StructInfo &ParentStruct = StructInProgress.back();
4603    if (Structure.Name.empty()) {
4604      // Anonymous substructures' fields are addressed as if they belong to the
4605      // parent structure - so we transfer them to the parent here.
4606      const size_t OldFields = ParentStruct.Fields.size();
4607      ParentStruct.Fields.insert(
4608          ParentStruct.Fields.end(),
4609          std::make_move_iterator(Structure.Fields.begin()),
4610          std::make_move_iterator(Structure.Fields.end()));
4611      for (const auto &FieldByName : Structure.FieldsByName) {
4612        ParentStruct.FieldsByName[FieldByName.getKey()] =
4613            FieldByName.getValue() + OldFields;
4614      }
4615  
4616      unsigned FirstFieldOffset = 0;
4617      if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4618        FirstFieldOffset = llvm::alignTo(
4619            ParentStruct.NextOffset,
4620            std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4621      }
4622  
4623      if (ParentStruct.IsUnion) {
4624        ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4625      } else {
4626        for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4627          Field.Offset += FirstFieldOffset;
4628  
4629        const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4630        if (!ParentStruct.IsUnion) {
4631          ParentStruct.NextOffset = StructureEnd;
4632        }
4633        ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4634      }
4635    } else {
4636      FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4637                                               Structure.AlignmentSize);
4638      StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4639      Field.Type = Structure.Size;
4640      Field.LengthOf = 1;
4641      Field.SizeOf = Structure.Size;
4642  
4643      const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4644      if (!ParentStruct.IsUnion) {
4645        ParentStruct.NextOffset = StructureEnd;
4646      }
4647      ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4648  
4649      StructInfo.Structure = Structure;
4650      StructInfo.Initializers.emplace_back();
4651      auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4652      for (const auto &SubField : Structure.Fields) {
4653        FieldInitializers.push_back(SubField.Contents);
4654      }
4655    }
4656  
4657    return false;
4658  }
4659  
4660  /// parseDirectiveOrg
4661  ///  ::= org expression
parseDirectiveOrg()4662  bool MasmParser::parseDirectiveOrg() {
4663    const MCExpr *Offset;
4664    SMLoc OffsetLoc = Lexer.getLoc();
4665    if (checkForValidSection() || parseExpression(Offset))
4666      return true;
4667    if (parseEOL())
4668      return addErrorSuffix(" in 'org' directive");
4669  
4670    if (StructInProgress.empty()) {
4671      // Not in a struct; change the offset for the next instruction or data
4672      if (checkForValidSection())
4673        return addErrorSuffix(" in 'org' directive");
4674  
4675      getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4676    } else {
4677      // Offset the next field of this struct
4678      StructInfo &Structure = StructInProgress.back();
4679      int64_t OffsetRes;
4680      if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4681        return Error(OffsetLoc,
4682                     "expected absolute expression in 'org' directive");
4683      if (OffsetRes < 0)
4684        return Error(
4685            OffsetLoc,
4686            "expected non-negative value in struct's 'org' directive; was " +
4687                std::to_string(OffsetRes));
4688      Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4689  
4690      // ORG-affected structures cannot be initialized
4691      Structure.Initializable = false;
4692    }
4693  
4694    return false;
4695  }
4696  
emitAlignTo(int64_t Alignment)4697  bool MasmParser::emitAlignTo(int64_t Alignment) {
4698    if (StructInProgress.empty()) {
4699      // Not in a struct; align the next instruction or data
4700      if (checkForValidSection())
4701        return true;
4702  
4703      // Check whether we should use optimal code alignment for this align
4704      // directive.
4705      const MCSection *Section = getStreamer().getCurrentSectionOnly();
4706      assert(Section && "must have section to emit alignment");
4707      if (Section->useCodeAlign()) {
4708        getStreamer().emitCodeAlignment(Align(Alignment),
4709                                        &getTargetParser().getSTI(),
4710                                        /*MaxBytesToEmit=*/0);
4711      } else {
4712        // FIXME: Target specific behavior about how the "extra" bytes are filled.
4713        getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4714                                           /*ValueSize=*/1,
4715                                           /*MaxBytesToEmit=*/0);
4716      }
4717    } else {
4718      // Align the next field of this struct
4719      StructInfo &Structure = StructInProgress.back();
4720      Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4721    }
4722  
4723    return false;
4724  }
4725  
4726  /// parseDirectiveAlign
4727  ///  ::= align expression
parseDirectiveAlign()4728  bool MasmParser::parseDirectiveAlign() {
4729    SMLoc AlignmentLoc = getLexer().getLoc();
4730    int64_t Alignment;
4731  
4732    // Ignore empty 'align' directives.
4733    if (getTok().is(AsmToken::EndOfStatement)) {
4734      return Warning(AlignmentLoc,
4735                     "align directive with no operand is ignored") &&
4736             parseEOL();
4737    }
4738    if (parseAbsoluteExpression(Alignment) || parseEOL())
4739      return addErrorSuffix(" in align directive");
4740  
4741    // Always emit an alignment here even if we throw an error.
4742    bool ReturnVal = false;
4743  
4744    // Reject alignments that aren't either a power of two or zero, for ML.exe
4745    // compatibility. Alignment of zero is silently rounded up to one.
4746    if (Alignment == 0)
4747      Alignment = 1;
4748    if (!isPowerOf2_64(Alignment))
4749      ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4750                                           std::to_string(Alignment));
4751  
4752    if (emitAlignTo(Alignment))
4753      ReturnVal |= addErrorSuffix(" in align directive");
4754  
4755    return ReturnVal;
4756  }
4757  
4758  /// parseDirectiveEven
4759  ///  ::= even
parseDirectiveEven()4760  bool MasmParser::parseDirectiveEven() {
4761    if (parseEOL() || emitAlignTo(2))
4762      return addErrorSuffix(" in even directive");
4763  
4764    return false;
4765  }
4766  
4767  /// parseDirectiveFile
4768  /// ::= .file filename
4769  /// ::= .file number [directory] filename [md5 checksum] [source source-text]
parseDirectiveFile(SMLoc DirectiveLoc)4770  bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4771    // FIXME: I'm not sure what this is.
4772    int64_t FileNumber = -1;
4773    if (getLexer().is(AsmToken::Integer)) {
4774      FileNumber = getTok().getIntVal();
4775      Lex();
4776  
4777      if (FileNumber < 0)
4778        return TokError("negative file number");
4779    }
4780  
4781    std::string Path;
4782  
4783    // Usually the directory and filename together, otherwise just the directory.
4784    // Allow the strings to have escaped octal character sequence.
4785    if (check(getTok().isNot(AsmToken::String),
4786              "unexpected token in '.file' directive") ||
4787        parseEscapedString(Path))
4788      return true;
4789  
4790    StringRef Directory;
4791    StringRef Filename;
4792    std::string FilenameData;
4793    if (getLexer().is(AsmToken::String)) {
4794      if (check(FileNumber == -1,
4795                "explicit path specified, but no file number") ||
4796          parseEscapedString(FilenameData))
4797        return true;
4798      Filename = FilenameData;
4799      Directory = Path;
4800    } else {
4801      Filename = Path;
4802    }
4803  
4804    uint64_t MD5Hi, MD5Lo;
4805    bool HasMD5 = false;
4806  
4807    std::optional<StringRef> Source;
4808    bool HasSource = false;
4809    std::string SourceString;
4810  
4811    while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4812      StringRef Keyword;
4813      if (check(getTok().isNot(AsmToken::Identifier),
4814                "unexpected token in '.file' directive") ||
4815          parseIdentifier(Keyword))
4816        return true;
4817      if (Keyword == "md5") {
4818        HasMD5 = true;
4819        if (check(FileNumber == -1,
4820                  "MD5 checksum specified, but no file number") ||
4821            parseHexOcta(*this, MD5Hi, MD5Lo))
4822          return true;
4823      } else if (Keyword == "source") {
4824        HasSource = true;
4825        if (check(FileNumber == -1,
4826                  "source specified, but no file number") ||
4827            check(getTok().isNot(AsmToken::String),
4828                  "unexpected token in '.file' directive") ||
4829            parseEscapedString(SourceString))
4830          return true;
4831      } else {
4832        return TokError("unexpected token in '.file' directive");
4833      }
4834    }
4835  
4836    if (FileNumber == -1) {
4837      // Ignore the directive if there is no number and the target doesn't support
4838      // numberless .file directives. This allows some portability of assembler
4839      // between different object file formats.
4840      if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4841        getStreamer().emitFileDirective(Filename);
4842    } else {
4843      // In case there is a -g option as well as debug info from directive .file,
4844      // we turn off the -g option, directly use the existing debug info instead.
4845      // Throw away any implicit file table for the assembler source.
4846      if (Ctx.getGenDwarfForAssembly()) {
4847        Ctx.getMCDwarfLineTable(0).resetFileTable();
4848        Ctx.setGenDwarfForAssembly(false);
4849      }
4850  
4851      std::optional<MD5::MD5Result> CKMem;
4852      if (HasMD5) {
4853        MD5::MD5Result Sum;
4854        for (unsigned i = 0; i != 8; ++i) {
4855          Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4856          Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4857        }
4858        CKMem = Sum;
4859      }
4860      if (HasSource) {
4861        char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4862        memcpy(SourceBuf, SourceString.data(), SourceString.size());
4863        Source = StringRef(SourceBuf, SourceString.size());
4864      }
4865      if (FileNumber == 0) {
4866        if (Ctx.getDwarfVersion() < 5)
4867          return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4868        getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4869      } else {
4870        Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4871            FileNumber, Directory, Filename, CKMem, Source);
4872        if (!FileNumOrErr)
4873          return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4874      }
4875      // Alert the user if there are some .file directives with MD5 and some not.
4876      // But only do that once.
4877      if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4878        ReportedInconsistentMD5 = true;
4879        return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4880      }
4881    }
4882  
4883    return false;
4884  }
4885  
4886  /// parseDirectiveLine
4887  /// ::= .line [number]
parseDirectiveLine()4888  bool MasmParser::parseDirectiveLine() {
4889    int64_t LineNumber;
4890    if (getLexer().is(AsmToken::Integer)) {
4891      if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4892        return true;
4893      (void)LineNumber;
4894      // FIXME: Do something with the .line.
4895    }
4896    if (parseEOL())
4897      return true;
4898  
4899    return false;
4900  }
4901  
4902  /// parseDirectiveLoc
4903  /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4904  ///                                [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4905  /// The first number is a file number, must have been previously assigned with
4906  /// a .file directive, the second number is the line number and optionally the
4907  /// third number is a column position (zero if not specified).  The remaining
4908  /// optional items are .loc sub-directives.
parseDirectiveLoc()4909  bool MasmParser::parseDirectiveLoc() {
4910    int64_t FileNumber = 0, LineNumber = 0;
4911    SMLoc Loc = getTok().getLoc();
4912    if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4913        check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4914              "file number less than one in '.loc' directive") ||
4915        check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4916              "unassigned file number in '.loc' directive"))
4917      return true;
4918  
4919    // optional
4920    if (getLexer().is(AsmToken::Integer)) {
4921      LineNumber = getTok().getIntVal();
4922      if (LineNumber < 0)
4923        return TokError("line number less than zero in '.loc' directive");
4924      Lex();
4925    }
4926  
4927    int64_t ColumnPos = 0;
4928    if (getLexer().is(AsmToken::Integer)) {
4929      ColumnPos = getTok().getIntVal();
4930      if (ColumnPos < 0)
4931        return TokError("column position less than zero in '.loc' directive");
4932      Lex();
4933    }
4934  
4935    auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4936    unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4937    unsigned Isa = 0;
4938    int64_t Discriminator = 0;
4939  
4940    auto parseLocOp = [&]() -> bool {
4941      StringRef Name;
4942      SMLoc Loc = getTok().getLoc();
4943      if (parseIdentifier(Name))
4944        return TokError("unexpected token in '.loc' directive");
4945  
4946      if (Name == "basic_block")
4947        Flags |= DWARF2_FLAG_BASIC_BLOCK;
4948      else if (Name == "prologue_end")
4949        Flags |= DWARF2_FLAG_PROLOGUE_END;
4950      else if (Name == "epilogue_begin")
4951        Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
4952      else if (Name == "is_stmt") {
4953        Loc = getTok().getLoc();
4954        const MCExpr *Value;
4955        if (parseExpression(Value))
4956          return true;
4957        // The expression must be the constant 0 or 1.
4958        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4959          int Value = MCE->getValue();
4960          if (Value == 0)
4961            Flags &= ~DWARF2_FLAG_IS_STMT;
4962          else if (Value == 1)
4963            Flags |= DWARF2_FLAG_IS_STMT;
4964          else
4965            return Error(Loc, "is_stmt value not 0 or 1");
4966        } else {
4967          return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4968        }
4969      } else if (Name == "isa") {
4970        Loc = getTok().getLoc();
4971        const MCExpr *Value;
4972        if (parseExpression(Value))
4973          return true;
4974        // The expression must be a constant greater or equal to 0.
4975        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4976          int Value = MCE->getValue();
4977          if (Value < 0)
4978            return Error(Loc, "isa number less than zero");
4979          Isa = Value;
4980        } else {
4981          return Error(Loc, "isa number not a constant value");
4982        }
4983      } else if (Name == "discriminator") {
4984        if (parseAbsoluteExpression(Discriminator))
4985          return true;
4986      } else {
4987        return Error(Loc, "unknown sub-directive in '.loc' directive");
4988      }
4989      return false;
4990    };
4991  
4992    if (parseMany(parseLocOp, false /*hasComma*/))
4993      return true;
4994  
4995    getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4996                                        Isa, Discriminator, StringRef());
4997  
4998    return false;
4999  }
5000  
5001  /// parseDirectiveStabs
5002  /// ::= .stabs string, number, number, number
parseDirectiveStabs()5003  bool MasmParser::parseDirectiveStabs() {
5004    return TokError("unsupported directive '.stabs'");
5005  }
5006  
5007  /// parseDirectiveCVFile
5008  /// ::= .cv_file number filename [checksum] [checksumkind]
parseDirectiveCVFile()5009  bool MasmParser::parseDirectiveCVFile() {
5010    SMLoc FileNumberLoc = getTok().getLoc();
5011    int64_t FileNumber;
5012    std::string Filename;
5013    std::string Checksum;
5014    int64_t ChecksumKind = 0;
5015  
5016    if (parseIntToken(FileNumber,
5017                      "expected file number in '.cv_file' directive") ||
5018        check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5019        check(getTok().isNot(AsmToken::String),
5020              "unexpected token in '.cv_file' directive") ||
5021        parseEscapedString(Filename))
5022      return true;
5023    if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5024      if (check(getTok().isNot(AsmToken::String),
5025                "unexpected token in '.cv_file' directive") ||
5026          parseEscapedString(Checksum) ||
5027          parseIntToken(ChecksumKind,
5028                        "expected checksum kind in '.cv_file' directive") ||
5029          parseEOL())
5030        return true;
5031    }
5032  
5033    Checksum = fromHex(Checksum);
5034    void *CKMem = Ctx.allocate(Checksum.size(), 1);
5035    memcpy(CKMem, Checksum.data(), Checksum.size());
5036    ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5037                                      Checksum.size());
5038  
5039    if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5040                                           static_cast<uint8_t>(ChecksumKind)))
5041      return Error(FileNumberLoc, "file number already allocated");
5042  
5043    return false;
5044  }
5045  
parseCVFunctionId(int64_t & FunctionId,StringRef DirectiveName)5046  bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5047                                     StringRef DirectiveName) {
5048    SMLoc Loc;
5049    return parseTokenLoc(Loc) ||
5050           parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5051                                         "' directive") ||
5052           check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5053                 "expected function id within range [0, UINT_MAX)");
5054  }
5055  
parseCVFileId(int64_t & FileNumber,StringRef DirectiveName)5056  bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5057    SMLoc Loc;
5058    return parseTokenLoc(Loc) ||
5059           parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5060                                         "' directive") ||
5061           check(FileNumber < 1, Loc, "file number less than one in '" +
5062                                          DirectiveName + "' directive") ||
5063           check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5064                 "unassigned file number in '" + DirectiveName + "' directive");
5065  }
5066  
5067  /// parseDirectiveCVFuncId
5068  /// ::= .cv_func_id FunctionId
5069  ///
5070  /// Introduces a function ID that can be used with .cv_loc.
parseDirectiveCVFuncId()5071  bool MasmParser::parseDirectiveCVFuncId() {
5072    SMLoc FunctionIdLoc = getTok().getLoc();
5073    int64_t FunctionId;
5074  
5075    if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5076      return true;
5077  
5078    if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5079      return Error(FunctionIdLoc, "function id already allocated");
5080  
5081    return false;
5082  }
5083  
5084  /// parseDirectiveCVInlineSiteId
5085  /// ::= .cv_inline_site_id FunctionId
5086  ///         "within" IAFunc
5087  ///         "inlined_at" IAFile IALine [IACol]
5088  ///
5089  /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5090  /// at" source location information for use in the line table of the caller,
5091  /// whether the caller is a real function or another inlined call site.
parseDirectiveCVInlineSiteId()5092  bool MasmParser::parseDirectiveCVInlineSiteId() {
5093    SMLoc FunctionIdLoc = getTok().getLoc();
5094    int64_t FunctionId;
5095    int64_t IAFunc;
5096    int64_t IAFile;
5097    int64_t IALine;
5098    int64_t IACol = 0;
5099  
5100    // FunctionId
5101    if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5102      return true;
5103  
5104    // "within"
5105    if (check((getLexer().isNot(AsmToken::Identifier) ||
5106               getTok().getIdentifier() != "within"),
5107              "expected 'within' identifier in '.cv_inline_site_id' directive"))
5108      return true;
5109    Lex();
5110  
5111    // IAFunc
5112    if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5113      return true;
5114  
5115    // "inlined_at"
5116    if (check((getLexer().isNot(AsmToken::Identifier) ||
5117               getTok().getIdentifier() != "inlined_at"),
5118              "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5119              "directive") )
5120      return true;
5121    Lex();
5122  
5123    // IAFile IALine
5124    if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5125        parseIntToken(IALine, "expected line number after 'inlined_at'"))
5126      return true;
5127  
5128    // [IACol]
5129    if (getLexer().is(AsmToken::Integer)) {
5130      IACol = getTok().getIntVal();
5131      Lex();
5132    }
5133  
5134    if (parseEOL())
5135      return true;
5136  
5137    if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5138                                                   IALine, IACol, FunctionIdLoc))
5139      return Error(FunctionIdLoc, "function id already allocated");
5140  
5141    return false;
5142  }
5143  
5144  /// parseDirectiveCVLoc
5145  /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5146  ///                                [is_stmt VALUE]
5147  /// The first number is a file number, must have been previously assigned with
5148  /// a .file directive, the second number is the line number and optionally the
5149  /// third number is a column position (zero if not specified).  The remaining
5150  /// optional items are .loc sub-directives.
parseDirectiveCVLoc()5151  bool MasmParser::parseDirectiveCVLoc() {
5152    SMLoc DirectiveLoc = getTok().getLoc();
5153    int64_t FunctionId, FileNumber;
5154    if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5155        parseCVFileId(FileNumber, ".cv_loc"))
5156      return true;
5157  
5158    int64_t LineNumber = 0;
5159    if (getLexer().is(AsmToken::Integer)) {
5160      LineNumber = getTok().getIntVal();
5161      if (LineNumber < 0)
5162        return TokError("line number less than zero in '.cv_loc' directive");
5163      Lex();
5164    }
5165  
5166    int64_t ColumnPos = 0;
5167    if (getLexer().is(AsmToken::Integer)) {
5168      ColumnPos = getTok().getIntVal();
5169      if (ColumnPos < 0)
5170        return TokError("column position less than zero in '.cv_loc' directive");
5171      Lex();
5172    }
5173  
5174    bool PrologueEnd = false;
5175    uint64_t IsStmt = 0;
5176  
5177    auto parseOp = [&]() -> bool {
5178      StringRef Name;
5179      SMLoc Loc = getTok().getLoc();
5180      if (parseIdentifier(Name))
5181        return TokError("unexpected token in '.cv_loc' directive");
5182      if (Name == "prologue_end")
5183        PrologueEnd = true;
5184      else if (Name == "is_stmt") {
5185        Loc = getTok().getLoc();
5186        const MCExpr *Value;
5187        if (parseExpression(Value))
5188          return true;
5189        // The expression must be the constant 0 or 1.
5190        IsStmt = ~0ULL;
5191        if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5192          IsStmt = MCE->getValue();
5193  
5194        if (IsStmt > 1)
5195          return Error(Loc, "is_stmt value not 0 or 1");
5196      } else {
5197        return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5198      }
5199      return false;
5200    };
5201  
5202    if (parseMany(parseOp, false /*hasComma*/))
5203      return true;
5204  
5205    getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5206                                     ColumnPos, PrologueEnd, IsStmt, StringRef(),
5207                                     DirectiveLoc);
5208    return false;
5209  }
5210  
5211  /// parseDirectiveCVLinetable
5212  /// ::= .cv_linetable FunctionId, FnStart, FnEnd
parseDirectiveCVLinetable()5213  bool MasmParser::parseDirectiveCVLinetable() {
5214    int64_t FunctionId;
5215    StringRef FnStartName, FnEndName;
5216    SMLoc Loc = getTok().getLoc();
5217    if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5218        parseToken(AsmToken::Comma,
5219                   "unexpected token in '.cv_linetable' directive") ||
5220        parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5221                                    "expected identifier in directive") ||
5222        parseToken(AsmToken::Comma,
5223                   "unexpected token in '.cv_linetable' directive") ||
5224        parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5225                                    "expected identifier in directive"))
5226      return true;
5227  
5228    MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5229    MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5230  
5231    getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5232    return false;
5233  }
5234  
5235  /// parseDirectiveCVInlineLinetable
5236  /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
parseDirectiveCVInlineLinetable()5237  bool MasmParser::parseDirectiveCVInlineLinetable() {
5238    int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5239    StringRef FnStartName, FnEndName;
5240    SMLoc Loc = getTok().getLoc();
5241    if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5242        parseTokenLoc(Loc) ||
5243        parseIntToken(
5244            SourceFileId,
5245            "expected SourceField in '.cv_inline_linetable' directive") ||
5246        check(SourceFileId <= 0, Loc,
5247              "File id less than zero in '.cv_inline_linetable' directive") ||
5248        parseTokenLoc(Loc) ||
5249        parseIntToken(
5250            SourceLineNum,
5251            "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5252        check(SourceLineNum < 0, Loc,
5253              "Line number less than zero in '.cv_inline_linetable' directive") ||
5254        parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5255                                    "expected identifier in directive") ||
5256        parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5257                                    "expected identifier in directive"))
5258      return true;
5259  
5260    if (parseEOL())
5261      return true;
5262  
5263    MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5264    MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5265    getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5266                                                 SourceLineNum, FnStartSym,
5267                                                 FnEndSym);
5268    return false;
5269  }
5270  
initializeCVDefRangeTypeMap()5271  void MasmParser::initializeCVDefRangeTypeMap() {
5272    CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5273    CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5274    CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5275    CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5276  }
5277  
5278  /// parseDirectiveCVDefRange
5279  /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
parseDirectiveCVDefRange()5280  bool MasmParser::parseDirectiveCVDefRange() {
5281    SMLoc Loc;
5282    std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5283    while (getLexer().is(AsmToken::Identifier)) {
5284      Loc = getLexer().getLoc();
5285      StringRef GapStartName;
5286      if (parseIdentifier(GapStartName))
5287        return Error(Loc, "expected identifier in directive");
5288      MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5289  
5290      Loc = getLexer().getLoc();
5291      StringRef GapEndName;
5292      if (parseIdentifier(GapEndName))
5293        return Error(Loc, "expected identifier in directive");
5294      MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5295  
5296      Ranges.push_back({GapStartSym, GapEndSym});
5297    }
5298  
5299    StringRef CVDefRangeTypeStr;
5300    if (parseToken(
5301            AsmToken::Comma,
5302            "expected comma before def_range type in .cv_def_range directive") ||
5303        parseIdentifier(CVDefRangeTypeStr))
5304      return Error(Loc, "expected def_range type in directive");
5305  
5306    StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5307        CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5308    CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5309                                  ? CVDR_DEFRANGE
5310                                  : CVTypeIt->getValue();
5311    switch (CVDRType) {
5312    case CVDR_DEFRANGE_REGISTER: {
5313      int64_t DRRegister;
5314      if (parseToken(AsmToken::Comma, "expected comma before register number in "
5315                                      ".cv_def_range directive") ||
5316          parseAbsoluteExpression(DRRegister))
5317        return Error(Loc, "expected register number");
5318  
5319      codeview::DefRangeRegisterHeader DRHdr;
5320      DRHdr.Register = DRRegister;
5321      DRHdr.MayHaveNoName = 0;
5322      getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5323      break;
5324    }
5325    case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5326      int64_t DROffset;
5327      if (parseToken(AsmToken::Comma,
5328                     "expected comma before offset in .cv_def_range directive") ||
5329          parseAbsoluteExpression(DROffset))
5330        return Error(Loc, "expected offset value");
5331  
5332      codeview::DefRangeFramePointerRelHeader DRHdr;
5333      DRHdr.Offset = DROffset;
5334      getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5335      break;
5336    }
5337    case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5338      int64_t DRRegister;
5339      int64_t DROffsetInParent;
5340      if (parseToken(AsmToken::Comma, "expected comma before register number in "
5341                                      ".cv_def_range directive") ||
5342          parseAbsoluteExpression(DRRegister))
5343        return Error(Loc, "expected register number");
5344      if (parseToken(AsmToken::Comma,
5345                     "expected comma before offset in .cv_def_range directive") ||
5346          parseAbsoluteExpression(DROffsetInParent))
5347        return Error(Loc, "expected offset value");
5348  
5349      codeview::DefRangeSubfieldRegisterHeader DRHdr;
5350      DRHdr.Register = DRRegister;
5351      DRHdr.MayHaveNoName = 0;
5352      DRHdr.OffsetInParent = DROffsetInParent;
5353      getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5354      break;
5355    }
5356    case CVDR_DEFRANGE_REGISTER_REL: {
5357      int64_t DRRegister;
5358      int64_t DRFlags;
5359      int64_t DRBasePointerOffset;
5360      if (parseToken(AsmToken::Comma, "expected comma before register number in "
5361                                      ".cv_def_range directive") ||
5362          parseAbsoluteExpression(DRRegister))
5363        return Error(Loc, "expected register value");
5364      if (parseToken(
5365              AsmToken::Comma,
5366              "expected comma before flag value in .cv_def_range directive") ||
5367          parseAbsoluteExpression(DRFlags))
5368        return Error(Loc, "expected flag value");
5369      if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5370                                      "in .cv_def_range directive") ||
5371          parseAbsoluteExpression(DRBasePointerOffset))
5372        return Error(Loc, "expected base pointer offset value");
5373  
5374      codeview::DefRangeRegisterRelHeader DRHdr;
5375      DRHdr.Register = DRRegister;
5376      DRHdr.Flags = DRFlags;
5377      DRHdr.BasePointerOffset = DRBasePointerOffset;
5378      getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5379      break;
5380    }
5381    default:
5382      return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5383    }
5384    return true;
5385  }
5386  
5387  /// parseDirectiveCVString
5388  /// ::= .cv_stringtable "string"
parseDirectiveCVString()5389  bool MasmParser::parseDirectiveCVString() {
5390    std::string Data;
5391    if (checkForValidSection() || parseEscapedString(Data))
5392      return addErrorSuffix(" in '.cv_string' directive");
5393  
5394    // Put the string in the table and emit the offset.
5395    std::pair<StringRef, unsigned> Insertion =
5396        getCVContext().addToStringTable(Data);
5397    getStreamer().emitIntValue(Insertion.second, 4);
5398    return false;
5399  }
5400  
5401  /// parseDirectiveCVStringTable
5402  /// ::= .cv_stringtable
parseDirectiveCVStringTable()5403  bool MasmParser::parseDirectiveCVStringTable() {
5404    getStreamer().emitCVStringTableDirective();
5405    return false;
5406  }
5407  
5408  /// parseDirectiveCVFileChecksums
5409  /// ::= .cv_filechecksums
parseDirectiveCVFileChecksums()5410  bool MasmParser::parseDirectiveCVFileChecksums() {
5411    getStreamer().emitCVFileChecksumsDirective();
5412    return false;
5413  }
5414  
5415  /// parseDirectiveCVFileChecksumOffset
5416  /// ::= .cv_filechecksumoffset fileno
parseDirectiveCVFileChecksumOffset()5417  bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5418    int64_t FileNo;
5419    if (parseIntToken(FileNo, "expected identifier in directive"))
5420      return true;
5421    if (parseEOL())
5422      return true;
5423    getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5424    return false;
5425  }
5426  
5427  /// parseDirectiveCVFPOData
5428  /// ::= .cv_fpo_data procsym
parseDirectiveCVFPOData()5429  bool MasmParser::parseDirectiveCVFPOData() {
5430    SMLoc DirLoc = getLexer().getLoc();
5431    StringRef ProcName;
5432    if (parseIdentifier(ProcName))
5433      return TokError("expected symbol name");
5434    if (parseEOL("unexpected tokens"))
5435      return addErrorSuffix(" in '.cv_fpo_data' directive");
5436    MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5437    getStreamer().emitCVFPOData(ProcSym, DirLoc);
5438    return false;
5439  }
5440  
5441  /// parseDirectiveCFISections
5442  /// ::= .cfi_sections section [, section]
parseDirectiveCFISections()5443  bool MasmParser::parseDirectiveCFISections() {
5444    StringRef Name;
5445    bool EH = false;
5446    bool Debug = false;
5447  
5448    if (parseIdentifier(Name))
5449      return TokError("Expected an identifier");
5450  
5451    if (Name == ".eh_frame")
5452      EH = true;
5453    else if (Name == ".debug_frame")
5454      Debug = true;
5455  
5456    if (getLexer().is(AsmToken::Comma)) {
5457      Lex();
5458  
5459      if (parseIdentifier(Name))
5460        return TokError("Expected an identifier");
5461  
5462      if (Name == ".eh_frame")
5463        EH = true;
5464      else if (Name == ".debug_frame")
5465        Debug = true;
5466    }
5467  
5468    getStreamer().emitCFISections(EH, Debug);
5469    return false;
5470  }
5471  
5472  /// parseDirectiveCFIStartProc
5473  /// ::= .cfi_startproc [simple]
parseDirectiveCFIStartProc()5474  bool MasmParser::parseDirectiveCFIStartProc() {
5475    StringRef Simple;
5476    if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5477      if (check(parseIdentifier(Simple) || Simple != "simple",
5478                "unexpected token") ||
5479          parseEOL())
5480        return addErrorSuffix(" in '.cfi_startproc' directive");
5481    }
5482  
5483    // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5484    // being produced if this directive is emitted as part of preprocessor macro
5485    // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5486    // Tools like llvm-mc on the other hand are not affected by it, and report
5487    // correct context information.
5488    getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5489    return false;
5490  }
5491  
5492  /// parseDirectiveCFIEndProc
5493  /// ::= .cfi_endproc
parseDirectiveCFIEndProc()5494  bool MasmParser::parseDirectiveCFIEndProc() {
5495    getStreamer().emitCFIEndProc();
5496    return false;
5497  }
5498  
5499  /// parse register name or number.
parseRegisterOrRegisterNumber(int64_t & Register,SMLoc DirectiveLoc)5500  bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5501                                                 SMLoc DirectiveLoc) {
5502    MCRegister RegNo;
5503  
5504    if (getLexer().isNot(AsmToken::Integer)) {
5505      if (getTargetParser().parseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5506        return true;
5507      Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5508    } else
5509      return parseAbsoluteExpression(Register);
5510  
5511    return false;
5512  }
5513  
5514  /// parseDirectiveCFIDefCfa
5515  /// ::= .cfi_def_cfa register,  offset
parseDirectiveCFIDefCfa(SMLoc DirectiveLoc)5516  bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5517    int64_t Register = 0, Offset = 0;
5518    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5519        parseToken(AsmToken::Comma, "unexpected token in directive") ||
5520        parseAbsoluteExpression(Offset))
5521      return true;
5522  
5523    getStreamer().emitCFIDefCfa(Register, Offset);
5524    return false;
5525  }
5526  
5527  /// parseDirectiveCFIDefCfaOffset
5528  /// ::= .cfi_def_cfa_offset offset
parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc)5529  bool MasmParser::parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc) {
5530    int64_t Offset = 0;
5531    if (parseAbsoluteExpression(Offset))
5532      return true;
5533  
5534    getStreamer().emitCFIDefCfaOffset(Offset, DirectiveLoc);
5535    return false;
5536  }
5537  
5538  /// parseDirectiveCFIRegister
5539  /// ::= .cfi_register register, register
parseDirectiveCFIRegister(SMLoc DirectiveLoc)5540  bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5541    int64_t Register1 = 0, Register2 = 0;
5542    if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5543        parseToken(AsmToken::Comma, "unexpected token in directive") ||
5544        parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5545      return true;
5546  
5547    getStreamer().emitCFIRegister(Register1, Register2, DirectiveLoc);
5548    return false;
5549  }
5550  
5551  /// parseDirectiveCFIWindowSave
5552  /// ::= .cfi_window_save
parseDirectiveCFIWindowSave(SMLoc DirectiveLoc)5553  bool MasmParser::parseDirectiveCFIWindowSave(SMLoc DirectiveLoc) {
5554    getStreamer().emitCFIWindowSave(DirectiveLoc);
5555    return false;
5556  }
5557  
5558  /// parseDirectiveCFIAdjustCfaOffset
5559  /// ::= .cfi_adjust_cfa_offset adjustment
parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc)5560  bool MasmParser::parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc) {
5561    int64_t Adjustment = 0;
5562    if (parseAbsoluteExpression(Adjustment))
5563      return true;
5564  
5565    getStreamer().emitCFIAdjustCfaOffset(Adjustment, DirectiveLoc);
5566    return false;
5567  }
5568  
5569  /// parseDirectiveCFIDefCfaRegister
5570  /// ::= .cfi_def_cfa_register register
parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc)5571  bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5572    int64_t Register = 0;
5573    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5574      return true;
5575  
5576    getStreamer().emitCFIDefCfaRegister(Register);
5577    return false;
5578  }
5579  
5580  /// parseDirectiveCFIOffset
5581  /// ::= .cfi_offset register, offset
parseDirectiveCFIOffset(SMLoc DirectiveLoc)5582  bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5583    int64_t Register = 0;
5584    int64_t Offset = 0;
5585  
5586    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5587        parseToken(AsmToken::Comma, "unexpected token in directive") ||
5588        parseAbsoluteExpression(Offset))
5589      return true;
5590  
5591    getStreamer().emitCFIOffset(Register, Offset);
5592    return false;
5593  }
5594  
5595  /// parseDirectiveCFIRelOffset
5596  /// ::= .cfi_rel_offset register, offset
parseDirectiveCFIRelOffset(SMLoc DirectiveLoc)5597  bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5598    int64_t Register = 0, Offset = 0;
5599  
5600    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5601        parseToken(AsmToken::Comma, "unexpected token in directive") ||
5602        parseAbsoluteExpression(Offset))
5603      return true;
5604  
5605    getStreamer().emitCFIRelOffset(Register, Offset, DirectiveLoc);
5606    return false;
5607  }
5608  
isValidEncoding(int64_t Encoding)5609  static bool isValidEncoding(int64_t Encoding) {
5610    if (Encoding & ~0xff)
5611      return false;
5612  
5613    if (Encoding == dwarf::DW_EH_PE_omit)
5614      return true;
5615  
5616    const unsigned Format = Encoding & 0xf;
5617    if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5618        Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5619        Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5620        Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5621      return false;
5622  
5623    const unsigned Application = Encoding & 0x70;
5624    if (Application != dwarf::DW_EH_PE_absptr &&
5625        Application != dwarf::DW_EH_PE_pcrel)
5626      return false;
5627  
5628    return true;
5629  }
5630  
5631  /// parseDirectiveCFIPersonalityOrLsda
5632  /// IsPersonality true for cfi_personality, false for cfi_lsda
5633  /// ::= .cfi_personality encoding, [symbol_name]
5634  /// ::= .cfi_lsda encoding, [symbol_name]
parseDirectiveCFIPersonalityOrLsda(bool IsPersonality)5635  bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5636    int64_t Encoding = 0;
5637    if (parseAbsoluteExpression(Encoding))
5638      return true;
5639    if (Encoding == dwarf::DW_EH_PE_omit)
5640      return false;
5641  
5642    StringRef Name;
5643    if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5644        parseToken(AsmToken::Comma, "unexpected token in directive") ||
5645        check(parseIdentifier(Name), "expected identifier in directive"))
5646      return true;
5647  
5648    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5649  
5650    if (IsPersonality)
5651      getStreamer().emitCFIPersonality(Sym, Encoding);
5652    else
5653      getStreamer().emitCFILsda(Sym, Encoding);
5654    return false;
5655  }
5656  
5657  /// parseDirectiveCFIRememberState
5658  /// ::= .cfi_remember_state
parseDirectiveCFIRememberState(SMLoc DirectiveLoc)5659  bool MasmParser::parseDirectiveCFIRememberState(SMLoc DirectiveLoc) {
5660    getStreamer().emitCFIRememberState(DirectiveLoc);
5661    return false;
5662  }
5663  
5664  /// parseDirectiveCFIRestoreState
5665  /// ::= .cfi_remember_state
parseDirectiveCFIRestoreState(SMLoc DirectiveLoc)5666  bool MasmParser::parseDirectiveCFIRestoreState(SMLoc DirectiveLoc) {
5667    getStreamer().emitCFIRestoreState(DirectiveLoc);
5668    return false;
5669  }
5670  
5671  /// parseDirectiveCFISameValue
5672  /// ::= .cfi_same_value register
parseDirectiveCFISameValue(SMLoc DirectiveLoc)5673  bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5674    int64_t Register = 0;
5675  
5676    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5677      return true;
5678  
5679    getStreamer().emitCFISameValue(Register, DirectiveLoc);
5680    return false;
5681  }
5682  
5683  /// parseDirectiveCFIRestore
5684  /// ::= .cfi_restore register
parseDirectiveCFIRestore(SMLoc DirectiveLoc)5685  bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5686    int64_t Register = 0;
5687    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5688      return true;
5689  
5690    getStreamer().emitCFIRestore(Register);
5691    return false;
5692  }
5693  
5694  /// parseDirectiveCFIEscape
5695  /// ::= .cfi_escape expression[,...]
parseDirectiveCFIEscape(SMLoc DirectiveLoc)5696  bool MasmParser::parseDirectiveCFIEscape(SMLoc DirectiveLoc) {
5697    std::string Values;
5698    int64_t CurrValue;
5699    if (parseAbsoluteExpression(CurrValue))
5700      return true;
5701  
5702    Values.push_back((uint8_t)CurrValue);
5703  
5704    while (getLexer().is(AsmToken::Comma)) {
5705      Lex();
5706  
5707      if (parseAbsoluteExpression(CurrValue))
5708        return true;
5709  
5710      Values.push_back((uint8_t)CurrValue);
5711    }
5712  
5713    getStreamer().emitCFIEscape(Values, DirectiveLoc);
5714    return false;
5715  }
5716  
5717  /// parseDirectiveCFIReturnColumn
5718  /// ::= .cfi_return_column register
parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc)5719  bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5720    int64_t Register = 0;
5721    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5722      return true;
5723    getStreamer().emitCFIReturnColumn(Register);
5724    return false;
5725  }
5726  
5727  /// parseDirectiveCFISignalFrame
5728  /// ::= .cfi_signal_frame
parseDirectiveCFISignalFrame()5729  bool MasmParser::parseDirectiveCFISignalFrame() {
5730    if (parseEOL())
5731      return true;
5732  
5733    getStreamer().emitCFISignalFrame();
5734    return false;
5735  }
5736  
5737  /// parseDirectiveCFIUndefined
5738  /// ::= .cfi_undefined register
parseDirectiveCFIUndefined(SMLoc DirectiveLoc)5739  bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5740    int64_t Register = 0;
5741  
5742    if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5743      return true;
5744  
5745    getStreamer().emitCFIUndefined(Register);
5746    return false;
5747  }
5748  
5749  /// parseDirectiveMacro
5750  /// ::= name macro [parameters]
5751  ///     ["LOCAL" identifiers]
5752  ///   parameters ::= parameter [, parameter]*
5753  ///   parameter ::= name ":" qualifier
5754  ///   qualifier ::= "req" | "vararg" | "=" macro_argument
parseDirectiveMacro(StringRef Name,SMLoc NameLoc)5755  bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5756    MCAsmMacroParameters Parameters;
5757    while (getLexer().isNot(AsmToken::EndOfStatement)) {
5758      if (!Parameters.empty() && Parameters.back().Vararg)
5759        return Error(Lexer.getLoc(),
5760                     "Vararg parameter '" + Parameters.back().Name +
5761                         "' should be last in the list of parameters");
5762  
5763      MCAsmMacroParameter Parameter;
5764      if (parseIdentifier(Parameter.Name))
5765        return TokError("expected identifier in 'macro' directive");
5766  
5767      // Emit an error if two (or more) named parameters share the same name.
5768      for (const MCAsmMacroParameter& CurrParam : Parameters)
5769        if (CurrParam.Name.equals_insensitive(Parameter.Name))
5770          return TokError("macro '" + Name + "' has multiple parameters"
5771                          " named '" + Parameter.Name + "'");
5772  
5773      if (Lexer.is(AsmToken::Colon)) {
5774        Lex();  // consume ':'
5775  
5776        if (parseOptionalToken(AsmToken::Equal)) {
5777          // Default value
5778          SMLoc ParamLoc;
5779  
5780          ParamLoc = Lexer.getLoc();
5781          if (parseMacroArgument(nullptr, Parameter.Value))
5782            return true;
5783        } else {
5784          SMLoc QualLoc;
5785          StringRef Qualifier;
5786  
5787          QualLoc = Lexer.getLoc();
5788          if (parseIdentifier(Qualifier))
5789            return Error(QualLoc, "missing parameter qualifier for "
5790                                  "'" +
5791                                      Parameter.Name + "' in macro '" + Name +
5792                                      "'");
5793  
5794          if (Qualifier.equals_insensitive("req"))
5795            Parameter.Required = true;
5796          else if (Qualifier.equals_insensitive("vararg"))
5797            Parameter.Vararg = true;
5798          else
5799            return Error(QualLoc,
5800                         Qualifier + " is not a valid parameter qualifier for '" +
5801                             Parameter.Name + "' in macro '" + Name + "'");
5802        }
5803      }
5804  
5805      Parameters.push_back(std::move(Parameter));
5806  
5807      if (getLexer().is(AsmToken::Comma))
5808        Lex();
5809    }
5810  
5811    // Eat just the end of statement.
5812    Lexer.Lex();
5813  
5814    std::vector<std::string> Locals;
5815    if (getTok().is(AsmToken::Identifier) &&
5816        getTok().getIdentifier().equals_insensitive("local")) {
5817      Lex(); // Eat the LOCAL directive.
5818  
5819      StringRef ID;
5820      while (true) {
5821        if (parseIdentifier(ID))
5822          return true;
5823        Locals.push_back(ID.lower());
5824  
5825        // If we see a comma, continue (and allow line continuation).
5826        if (!parseOptionalToken(AsmToken::Comma))
5827          break;
5828        parseOptionalToken(AsmToken::EndOfStatement);
5829      }
5830    }
5831  
5832    // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5833    AsmToken EndToken, StartToken = getTok();
5834    unsigned MacroDepth = 0;
5835    bool IsMacroFunction = false;
5836    // Lex the macro definition.
5837    while (true) {
5838      // Ignore Lexing errors in macros.
5839      while (Lexer.is(AsmToken::Error)) {
5840        Lexer.Lex();
5841      }
5842  
5843      // Check whether we have reached the end of the file.
5844      if (getLexer().is(AsmToken::Eof))
5845        return Error(NameLoc, "no matching 'endm' in definition");
5846  
5847      // Otherwise, check whether we have reached the 'endm'... and determine if
5848      // this is a macro function.
5849      if (getLexer().is(AsmToken::Identifier)) {
5850        if (getTok().getIdentifier().equals_insensitive("endm")) {
5851          if (MacroDepth == 0) { // Outermost macro.
5852            EndToken = getTok();
5853            Lexer.Lex();
5854            if (getLexer().isNot(AsmToken::EndOfStatement))
5855              return TokError("unexpected token in '" + EndToken.getIdentifier() +
5856                              "' directive");
5857            break;
5858          } else {
5859            // Otherwise we just found the end of an inner macro.
5860            --MacroDepth;
5861          }
5862        } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
5863          if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
5864            IsMacroFunction = true;
5865          }
5866        } else if (isMacroLikeDirective()) {
5867          // We allow nested macros. Those aren't instantiated until the
5868          // outermost macro is expanded so just ignore them for now.
5869          ++MacroDepth;
5870        }
5871      }
5872  
5873      // Otherwise, scan til the end of the statement.
5874      eatToEndOfStatement();
5875    }
5876  
5877    if (getContext().lookupMacro(Name.lower())) {
5878      return Error(NameLoc, "macro '" + Name + "' is already defined");
5879    }
5880  
5881    const char *BodyStart = StartToken.getLoc().getPointer();
5882    const char *BodyEnd = EndToken.getLoc().getPointer();
5883    StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5884    MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5885                     IsMacroFunction);
5886    DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5887                    Macro.dump());
5888    getContext().defineMacro(Name.lower(), std::move(Macro));
5889    return false;
5890  }
5891  
5892  /// parseDirectiveExitMacro
5893  /// ::= "exitm" [textitem]
parseDirectiveExitMacro(SMLoc DirectiveLoc,StringRef Directive,std::string & Value)5894  bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5895                                           StringRef Directive,
5896                                           std::string &Value) {
5897    SMLoc EndLoc = getTok().getLoc();
5898    if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5899      return Error(EndLoc,
5900                   "unable to parse text item in '" + Directive + "' directive");
5901    eatToEndOfStatement();
5902  
5903    if (!isInsideMacroInstantiation())
5904      return TokError("unexpected '" + Directive + "' in file, "
5905                                                   "no current macro definition");
5906  
5907    // Exit all conditionals that are active in the current macro.
5908    while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5909      TheCondState = TheCondStack.back();
5910      TheCondStack.pop_back();
5911    }
5912  
5913    handleMacroExit();
5914    return false;
5915  }
5916  
5917  /// parseDirectiveEndMacro
5918  /// ::= endm
parseDirectiveEndMacro(StringRef Directive)5919  bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5920    if (getLexer().isNot(AsmToken::EndOfStatement))
5921      return TokError("unexpected token in '" + Directive + "' directive");
5922  
5923    // If we are inside a macro instantiation, terminate the current
5924    // instantiation.
5925    if (isInsideMacroInstantiation()) {
5926      handleMacroExit();
5927      return false;
5928    }
5929  
5930    // Otherwise, this .endmacro is a stray entry in the file; well formed
5931    // .endmacro directives are handled during the macro definition parsing.
5932    return TokError("unexpected '" + Directive + "' in file, "
5933                                                 "no current macro definition");
5934  }
5935  
5936  /// parseDirectivePurgeMacro
5937  /// ::= purge identifier ( , identifier )*
parseDirectivePurgeMacro(SMLoc DirectiveLoc)5938  bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
5939    StringRef Name;
5940    while (true) {
5941      SMLoc NameLoc;
5942      if (parseTokenLoc(NameLoc) ||
5943          check(parseIdentifier(Name), NameLoc,
5944                "expected identifier in 'purge' directive"))
5945        return true;
5946  
5947      DEBUG_WITH_TYPE("asm-macros", dbgs()
5948                                        << "Un-defining macro: " << Name << "\n");
5949      if (!getContext().lookupMacro(Name.lower()))
5950        return Error(NameLoc, "macro '" + Name + "' is not defined");
5951      getContext().undefineMacro(Name.lower());
5952  
5953      if (!parseOptionalToken(AsmToken::Comma))
5954        break;
5955      parseOptionalToken(AsmToken::EndOfStatement);
5956    }
5957  
5958    return false;
5959  }
5960  
parseDirectiveExtern()5961  bool MasmParser::parseDirectiveExtern() {
5962    // .extern is the default - but we still need to take any provided type info.
5963    auto parseOp = [&]() -> bool {
5964      StringRef Name;
5965      SMLoc NameLoc = getTok().getLoc();
5966      if (parseIdentifier(Name))
5967        return Error(NameLoc, "expected name");
5968      if (parseToken(AsmToken::Colon))
5969        return true;
5970  
5971      StringRef TypeName;
5972      SMLoc TypeLoc = getTok().getLoc();
5973      if (parseIdentifier(TypeName))
5974        return Error(TypeLoc, "expected type");
5975      if (!TypeName.equals_insensitive("proc")) {
5976        AsmTypeInfo Type;
5977        if (lookUpType(TypeName, Type))
5978          return Error(TypeLoc, "unrecognized type");
5979        KnownType[Name.lower()] = Type;
5980      }
5981  
5982      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5983      Sym->setExternal(true);
5984      getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
5985  
5986      return false;
5987    };
5988  
5989    if (parseMany(parseOp))
5990      return addErrorSuffix(" in directive 'extern'");
5991    return false;
5992  }
5993  
5994  /// parseDirectiveSymbolAttribute
5995  ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
parseDirectiveSymbolAttribute(MCSymbolAttr Attr)5996  bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
5997    auto parseOp = [&]() -> bool {
5998      StringRef Name;
5999      SMLoc Loc = getTok().getLoc();
6000      if (parseIdentifier(Name))
6001        return Error(Loc, "expected identifier");
6002      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6003  
6004      // Assembler local symbols don't make any sense here. Complain loudly.
6005      if (Sym->isTemporary())
6006        return Error(Loc, "non-local symbol required");
6007  
6008      if (!getStreamer().emitSymbolAttribute(Sym, Attr))
6009        return Error(Loc, "unable to emit symbol attribute");
6010      return false;
6011    };
6012  
6013    if (parseMany(parseOp))
6014      return addErrorSuffix(" in directive");
6015    return false;
6016  }
6017  
6018  /// parseDirectiveComm
6019  ///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
parseDirectiveComm(bool IsLocal)6020  bool MasmParser::parseDirectiveComm(bool IsLocal) {
6021    if (checkForValidSection())
6022      return true;
6023  
6024    SMLoc IDLoc = getLexer().getLoc();
6025    StringRef Name;
6026    if (parseIdentifier(Name))
6027      return TokError("expected identifier in directive");
6028  
6029    // Handle the identifier as the key symbol.
6030    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6031  
6032    if (getLexer().isNot(AsmToken::Comma))
6033      return TokError("unexpected token in directive");
6034    Lex();
6035  
6036    int64_t Size;
6037    SMLoc SizeLoc = getLexer().getLoc();
6038    if (parseAbsoluteExpression(Size))
6039      return true;
6040  
6041    int64_t Pow2Alignment = 0;
6042    SMLoc Pow2AlignmentLoc;
6043    if (getLexer().is(AsmToken::Comma)) {
6044      Lex();
6045      Pow2AlignmentLoc = getLexer().getLoc();
6046      if (parseAbsoluteExpression(Pow2Alignment))
6047        return true;
6048  
6049      LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
6050      if (IsLocal && LCOMM == LCOMM::NoAlignment)
6051        return Error(Pow2AlignmentLoc, "alignment not supported on this target");
6052  
6053      // If this target takes alignments in bytes (not log) validate and convert.
6054      if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
6055          (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
6056        if (!isPowerOf2_64(Pow2Alignment))
6057          return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
6058        Pow2Alignment = Log2_64(Pow2Alignment);
6059      }
6060    }
6061  
6062    if (parseEOL())
6063      return true;
6064  
6065    // NOTE: a size of zero for a .comm should create a undefined symbol
6066    // but a size of .lcomm creates a bss symbol of size zero.
6067    if (Size < 0)
6068      return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
6069                            "be less than zero");
6070  
6071    // NOTE: The alignment in the directive is a power of 2 value, the assembler
6072    // may internally end up wanting an alignment in bytes.
6073    // FIXME: Diagnose overflow.
6074    if (Pow2Alignment < 0)
6075      return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
6076                                     "alignment, can't be less than zero");
6077  
6078    Sym->redefineIfPossible();
6079    if (!Sym->isUndefined())
6080      return Error(IDLoc, "invalid symbol redefinition");
6081  
6082    // Create the Symbol as a common or local common with Size and Pow2Alignment.
6083    if (IsLocal) {
6084      getStreamer().emitLocalCommonSymbol(Sym, Size,
6085                                          Align(1ULL << Pow2Alignment));
6086      return false;
6087    }
6088  
6089    getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment));
6090    return false;
6091  }
6092  
6093  /// parseDirectiveComment
6094  ///  ::= comment delimiter [[text]]
6095  ///              [[text]]
6096  ///              [[text]] delimiter [[text]]
parseDirectiveComment(SMLoc DirectiveLoc)6097  bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
6098    std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
6099    size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
6100    assert(DelimiterEnd != std::string::npos);
6101    StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
6102    if (Delimiter.empty())
6103      return Error(DirectiveLoc, "no delimiter in 'comment' directive");
6104    do {
6105      if (getTok().is(AsmToken::Eof))
6106        return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
6107      Lex();  // eat end of statement
6108    } while (
6109        !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
6110    return parseEOL();
6111  }
6112  
6113  /// parseDirectiveInclude
6114  ///  ::= include <filename>
6115  ///    | include filename
parseDirectiveInclude()6116  bool MasmParser::parseDirectiveInclude() {
6117    // Allow the strings to have escaped octal character sequence.
6118    std::string Filename;
6119    SMLoc IncludeLoc = getTok().getLoc();
6120  
6121    if (parseAngleBracketString(Filename))
6122      Filename = parseStringTo(AsmToken::EndOfStatement);
6123    if (check(Filename.empty(), "missing filename in 'include' directive") ||
6124        check(getTok().isNot(AsmToken::EndOfStatement),
6125              "unexpected token in 'include' directive") ||
6126        // Attempt to switch the lexer to the included file before consuming the
6127        // end of statement to avoid losing it when we switch.
6128        check(enterIncludeFile(Filename), IncludeLoc,
6129              "Could not find include file '" + Filename + "'"))
6130      return true;
6131  
6132    return false;
6133  }
6134  
6135  /// parseDirectiveIf
6136  /// ::= .if{,eq,ge,gt,le,lt,ne} expression
parseDirectiveIf(SMLoc DirectiveLoc,DirectiveKind DirKind)6137  bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
6138    TheCondStack.push_back(TheCondState);
6139    TheCondState.TheCond = AsmCond::IfCond;
6140    if (TheCondState.Ignore) {
6141      eatToEndOfStatement();
6142    } else {
6143      int64_t ExprValue;
6144      if (parseAbsoluteExpression(ExprValue) || parseEOL())
6145        return true;
6146  
6147      switch (DirKind) {
6148      default:
6149        llvm_unreachable("unsupported directive");
6150      case DK_IF:
6151        break;
6152      case DK_IFE:
6153        ExprValue = ExprValue == 0;
6154        break;
6155      }
6156  
6157      TheCondState.CondMet = ExprValue;
6158      TheCondState.Ignore = !TheCondState.CondMet;
6159    }
6160  
6161    return false;
6162  }
6163  
6164  /// parseDirectiveIfb
6165  /// ::= .ifb textitem
parseDirectiveIfb(SMLoc DirectiveLoc,bool ExpectBlank)6166  bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6167    TheCondStack.push_back(TheCondState);
6168    TheCondState.TheCond = AsmCond::IfCond;
6169  
6170    if (TheCondState.Ignore) {
6171      eatToEndOfStatement();
6172    } else {
6173      std::string Str;
6174      if (parseTextItem(Str))
6175        return TokError("expected text item parameter for 'ifb' directive");
6176  
6177      if (parseEOL())
6178        return true;
6179  
6180      TheCondState.CondMet = ExpectBlank == Str.empty();
6181      TheCondState.Ignore = !TheCondState.CondMet;
6182    }
6183  
6184    return false;
6185  }
6186  
6187  /// parseDirectiveIfidn
6188  ///   ::= ifidn textitem, textitem
parseDirectiveIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6189  bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6190                                       bool CaseInsensitive) {
6191    std::string String1, String2;
6192  
6193    if (parseTextItem(String1)) {
6194      if (ExpectEqual)
6195        return TokError("expected text item parameter for 'ifidn' directive");
6196      return TokError("expected text item parameter for 'ifdif' directive");
6197    }
6198  
6199    if (Lexer.isNot(AsmToken::Comma)) {
6200      if (ExpectEqual)
6201        return TokError(
6202            "expected comma after first string for 'ifidn' directive");
6203      return TokError("expected comma after first string for 'ifdif' directive");
6204    }
6205    Lex();
6206  
6207    if (parseTextItem(String2)) {
6208      if (ExpectEqual)
6209        return TokError("expected text item parameter for 'ifidn' directive");
6210      return TokError("expected text item parameter for 'ifdif' directive");
6211    }
6212  
6213    TheCondStack.push_back(TheCondState);
6214    TheCondState.TheCond = AsmCond::IfCond;
6215    if (CaseInsensitive)
6216      TheCondState.CondMet =
6217          ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6218    else
6219      TheCondState.CondMet = ExpectEqual == (String1 == String2);
6220    TheCondState.Ignore = !TheCondState.CondMet;
6221  
6222    return false;
6223  }
6224  
6225  /// parseDirectiveIfdef
6226  /// ::= ifdef symbol
6227  ///   | ifdef variable
parseDirectiveIfdef(SMLoc DirectiveLoc,bool expect_defined)6228  bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
6229    TheCondStack.push_back(TheCondState);
6230    TheCondState.TheCond = AsmCond::IfCond;
6231  
6232    if (TheCondState.Ignore) {
6233      eatToEndOfStatement();
6234    } else {
6235      bool is_defined = false;
6236      MCRegister Reg;
6237      SMLoc StartLoc, EndLoc;
6238      is_defined =
6239          getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6240      if (!is_defined) {
6241        StringRef Name;
6242        if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
6243            parseEOL())
6244          return true;
6245  
6246        if (BuiltinSymbolMap.contains(Name.lower())) {
6247          is_defined = true;
6248        } else if (Variables.contains(Name.lower())) {
6249          is_defined = true;
6250        } else {
6251          MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
6252          is_defined = (Sym && !Sym->isUndefined(false));
6253        }
6254      }
6255  
6256      TheCondState.CondMet = (is_defined == expect_defined);
6257      TheCondState.Ignore = !TheCondState.CondMet;
6258    }
6259  
6260    return false;
6261  }
6262  
6263  /// parseDirectiveElseIf
6264  /// ::= elseif expression
parseDirectiveElseIf(SMLoc DirectiveLoc,DirectiveKind DirKind)6265  bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
6266                                        DirectiveKind DirKind) {
6267    if (TheCondState.TheCond != AsmCond::IfCond &&
6268        TheCondState.TheCond != AsmCond::ElseIfCond)
6269      return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
6270                                 " .if or  an .elseif");
6271    TheCondState.TheCond = AsmCond::ElseIfCond;
6272  
6273    bool LastIgnoreState = false;
6274    if (!TheCondStack.empty())
6275      LastIgnoreState = TheCondStack.back().Ignore;
6276    if (LastIgnoreState || TheCondState.CondMet) {
6277      TheCondState.Ignore = true;
6278      eatToEndOfStatement();
6279    } else {
6280      int64_t ExprValue;
6281      if (parseAbsoluteExpression(ExprValue))
6282        return true;
6283  
6284      if (parseEOL())
6285        return true;
6286  
6287      switch (DirKind) {
6288      default:
6289        llvm_unreachable("unsupported directive");
6290      case DK_ELSEIF:
6291        break;
6292      case DK_ELSEIFE:
6293        ExprValue = ExprValue == 0;
6294        break;
6295      }
6296  
6297      TheCondState.CondMet = ExprValue;
6298      TheCondState.Ignore = !TheCondState.CondMet;
6299    }
6300  
6301    return false;
6302  }
6303  
6304  /// parseDirectiveElseIfb
6305  /// ::= elseifb textitem
parseDirectiveElseIfb(SMLoc DirectiveLoc,bool ExpectBlank)6306  bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6307    if (TheCondState.TheCond != AsmCond::IfCond &&
6308        TheCondState.TheCond != AsmCond::ElseIfCond)
6309      return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6310                                 " if or an elseif");
6311    TheCondState.TheCond = AsmCond::ElseIfCond;
6312  
6313    bool LastIgnoreState = false;
6314    if (!TheCondStack.empty())
6315      LastIgnoreState = TheCondStack.back().Ignore;
6316    if (LastIgnoreState || TheCondState.CondMet) {
6317      TheCondState.Ignore = true;
6318      eatToEndOfStatement();
6319    } else {
6320      std::string Str;
6321      if (parseTextItem(Str)) {
6322        if (ExpectBlank)
6323          return TokError("expected text item parameter for 'elseifb' directive");
6324        return TokError("expected text item parameter for 'elseifnb' directive");
6325      }
6326  
6327      if (parseEOL())
6328        return true;
6329  
6330      TheCondState.CondMet = ExpectBlank == Str.empty();
6331      TheCondState.Ignore = !TheCondState.CondMet;
6332    }
6333  
6334    return false;
6335  }
6336  
6337  /// parseDirectiveElseIfdef
6338  /// ::= elseifdef symbol
6339  ///   | elseifdef variable
parseDirectiveElseIfdef(SMLoc DirectiveLoc,bool expect_defined)6340  bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6341                                           bool expect_defined) {
6342    if (TheCondState.TheCond != AsmCond::IfCond &&
6343        TheCondState.TheCond != AsmCond::ElseIfCond)
6344      return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6345                                 " if or an elseif");
6346    TheCondState.TheCond = AsmCond::ElseIfCond;
6347  
6348    bool LastIgnoreState = false;
6349    if (!TheCondStack.empty())
6350      LastIgnoreState = TheCondStack.back().Ignore;
6351    if (LastIgnoreState || TheCondState.CondMet) {
6352      TheCondState.Ignore = true;
6353      eatToEndOfStatement();
6354    } else {
6355      bool is_defined = false;
6356      MCRegister Reg;
6357      SMLoc StartLoc, EndLoc;
6358      is_defined =
6359          getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6360      if (!is_defined) {
6361        StringRef Name;
6362        if (check(parseIdentifier(Name),
6363                  "expected identifier after 'elseifdef'") ||
6364            parseEOL())
6365          return true;
6366  
6367        if (BuiltinSymbolMap.contains(Name.lower())) {
6368          is_defined = true;
6369        } else if (Variables.contains(Name.lower())) {
6370          is_defined = true;
6371        } else {
6372          MCSymbol *Sym = getContext().lookupSymbol(Name);
6373          is_defined = (Sym && !Sym->isUndefined(false));
6374        }
6375      }
6376  
6377      TheCondState.CondMet = (is_defined == expect_defined);
6378      TheCondState.Ignore = !TheCondState.CondMet;
6379    }
6380  
6381    return false;
6382  }
6383  
6384  /// parseDirectiveElseIfidn
6385  /// ::= elseifidn textitem, textitem
parseDirectiveElseIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6386  bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6387                                           bool CaseInsensitive) {
6388    if (TheCondState.TheCond != AsmCond::IfCond &&
6389        TheCondState.TheCond != AsmCond::ElseIfCond)
6390      return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6391                                 " if or an elseif");
6392    TheCondState.TheCond = AsmCond::ElseIfCond;
6393  
6394    bool LastIgnoreState = false;
6395    if (!TheCondStack.empty())
6396      LastIgnoreState = TheCondStack.back().Ignore;
6397    if (LastIgnoreState || TheCondState.CondMet) {
6398      TheCondState.Ignore = true;
6399      eatToEndOfStatement();
6400    } else {
6401      std::string String1, String2;
6402  
6403      if (parseTextItem(String1)) {
6404        if (ExpectEqual)
6405          return TokError(
6406              "expected text item parameter for 'elseifidn' directive");
6407        return TokError("expected text item parameter for 'elseifdif' directive");
6408      }
6409  
6410      if (Lexer.isNot(AsmToken::Comma)) {
6411        if (ExpectEqual)
6412          return TokError(
6413              "expected comma after first string for 'elseifidn' directive");
6414        return TokError(
6415            "expected comma after first string for 'elseifdif' directive");
6416      }
6417      Lex();
6418  
6419      if (parseTextItem(String2)) {
6420        if (ExpectEqual)
6421          return TokError(
6422              "expected text item parameter for 'elseifidn' directive");
6423        return TokError("expected text item parameter for 'elseifdif' directive");
6424      }
6425  
6426      if (CaseInsensitive)
6427        TheCondState.CondMet =
6428            ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6429      else
6430        TheCondState.CondMet = ExpectEqual == (String1 == String2);
6431      TheCondState.Ignore = !TheCondState.CondMet;
6432    }
6433  
6434    return false;
6435  }
6436  
6437  /// parseDirectiveElse
6438  /// ::= else
parseDirectiveElse(SMLoc DirectiveLoc)6439  bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6440    if (parseEOL())
6441      return true;
6442  
6443    if (TheCondState.TheCond != AsmCond::IfCond &&
6444        TheCondState.TheCond != AsmCond::ElseIfCond)
6445      return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6446                                 " or an elseif");
6447    TheCondState.TheCond = AsmCond::ElseCond;
6448    bool LastIgnoreState = false;
6449    if (!TheCondStack.empty())
6450      LastIgnoreState = TheCondStack.back().Ignore;
6451    if (LastIgnoreState || TheCondState.CondMet)
6452      TheCondState.Ignore = true;
6453    else
6454      TheCondState.Ignore = false;
6455  
6456    return false;
6457  }
6458  
6459  /// parseDirectiveEnd
6460  /// ::= end
parseDirectiveEnd(SMLoc DirectiveLoc)6461  bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6462    if (parseEOL())
6463      return true;
6464  
6465    while (Lexer.isNot(AsmToken::Eof))
6466      Lexer.Lex();
6467  
6468    return false;
6469  }
6470  
6471  /// parseDirectiveError
6472  ///   ::= .err [message]
parseDirectiveError(SMLoc DirectiveLoc)6473  bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6474    if (!TheCondStack.empty()) {
6475      if (TheCondStack.back().Ignore) {
6476        eatToEndOfStatement();
6477        return false;
6478      }
6479    }
6480  
6481    std::string Message = ".err directive invoked in source file";
6482    if (Lexer.isNot(AsmToken::EndOfStatement))
6483      Message = parseStringTo(AsmToken::EndOfStatement);
6484    Lex();
6485  
6486    return Error(DirectiveLoc, Message);
6487  }
6488  
6489  /// parseDirectiveErrorIfb
6490  ///   ::= .errb textitem[, message]
parseDirectiveErrorIfb(SMLoc DirectiveLoc,bool ExpectBlank)6491  bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6492    if (!TheCondStack.empty()) {
6493      if (TheCondStack.back().Ignore) {
6494        eatToEndOfStatement();
6495        return false;
6496      }
6497    }
6498  
6499    std::string Text;
6500    if (parseTextItem(Text))
6501      return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6502  
6503    std::string Message = ".errb directive invoked in source file";
6504    if (Lexer.isNot(AsmToken::EndOfStatement)) {
6505      if (parseToken(AsmToken::Comma))
6506        return addErrorSuffix(" in '.errb' directive");
6507      Message = parseStringTo(AsmToken::EndOfStatement);
6508    }
6509    Lex();
6510  
6511    if (Text.empty() == ExpectBlank)
6512      return Error(DirectiveLoc, Message);
6513    return false;
6514  }
6515  
6516  /// parseDirectiveErrorIfdef
6517  ///   ::= .errdef name[, message]
parseDirectiveErrorIfdef(SMLoc DirectiveLoc,bool ExpectDefined)6518  bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6519                                            bool ExpectDefined) {
6520    if (!TheCondStack.empty()) {
6521      if (TheCondStack.back().Ignore) {
6522        eatToEndOfStatement();
6523        return false;
6524      }
6525    }
6526  
6527    bool IsDefined = false;
6528    MCRegister Reg;
6529    SMLoc StartLoc, EndLoc;
6530    IsDefined =
6531        getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6532    if (!IsDefined) {
6533      StringRef Name;
6534      if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6535        return true;
6536  
6537      if (BuiltinSymbolMap.contains(Name.lower())) {
6538        IsDefined = true;
6539      } else if (Variables.contains(Name.lower())) {
6540        IsDefined = true;
6541      } else {
6542        MCSymbol *Sym = getContext().lookupSymbol(Name);
6543        IsDefined = (Sym && !Sym->isUndefined(false));
6544      }
6545    }
6546  
6547    std::string Message = ".errdef directive invoked in source file";
6548    if (Lexer.isNot(AsmToken::EndOfStatement)) {
6549      if (parseToken(AsmToken::Comma))
6550        return addErrorSuffix(" in '.errdef' directive");
6551      Message = parseStringTo(AsmToken::EndOfStatement);
6552    }
6553    Lex();
6554  
6555    if (IsDefined == ExpectDefined)
6556      return Error(DirectiveLoc, Message);
6557    return false;
6558  }
6559  
6560  /// parseDirectiveErrorIfidn
6561  ///   ::= .erridn textitem, textitem[, message]
parseDirectiveErrorIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6562  bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6563                                            bool CaseInsensitive) {
6564    if (!TheCondStack.empty()) {
6565      if (TheCondStack.back().Ignore) {
6566        eatToEndOfStatement();
6567        return false;
6568      }
6569    }
6570  
6571    std::string String1, String2;
6572  
6573    if (parseTextItem(String1)) {
6574      if (ExpectEqual)
6575        return TokError("expected string parameter for '.erridn' directive");
6576      return TokError("expected string parameter for '.errdif' directive");
6577    }
6578  
6579    if (Lexer.isNot(AsmToken::Comma)) {
6580      if (ExpectEqual)
6581        return TokError(
6582            "expected comma after first string for '.erridn' directive");
6583      return TokError(
6584          "expected comma after first string for '.errdif' directive");
6585    }
6586    Lex();
6587  
6588    if (parseTextItem(String2)) {
6589      if (ExpectEqual)
6590        return TokError("expected string parameter for '.erridn' directive");
6591      return TokError("expected string parameter for '.errdif' directive");
6592    }
6593  
6594    std::string Message;
6595    if (ExpectEqual)
6596      Message = ".erridn directive invoked in source file";
6597    else
6598      Message = ".errdif directive invoked in source file";
6599    if (Lexer.isNot(AsmToken::EndOfStatement)) {
6600      if (parseToken(AsmToken::Comma))
6601        return addErrorSuffix(" in '.erridn' directive");
6602      Message = parseStringTo(AsmToken::EndOfStatement);
6603    }
6604    Lex();
6605  
6606    if (CaseInsensitive)
6607      TheCondState.CondMet =
6608          ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6609    else
6610      TheCondState.CondMet = ExpectEqual == (String1 == String2);
6611    TheCondState.Ignore = !TheCondState.CondMet;
6612  
6613    if ((CaseInsensitive &&
6614         ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
6615        (ExpectEqual == (String1 == String2)))
6616      return Error(DirectiveLoc, Message);
6617    return false;
6618  }
6619  
6620  /// parseDirectiveErrorIfe
6621  ///   ::= .erre expression[, message]
parseDirectiveErrorIfe(SMLoc DirectiveLoc,bool ExpectZero)6622  bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6623    if (!TheCondStack.empty()) {
6624      if (TheCondStack.back().Ignore) {
6625        eatToEndOfStatement();
6626        return false;
6627      }
6628    }
6629  
6630    int64_t ExprValue;
6631    if (parseAbsoluteExpression(ExprValue))
6632      return addErrorSuffix(" in '.erre' directive");
6633  
6634    std::string Message = ".erre directive invoked in source file";
6635    if (Lexer.isNot(AsmToken::EndOfStatement)) {
6636      if (parseToken(AsmToken::Comma))
6637        return addErrorSuffix(" in '.erre' directive");
6638      Message = parseStringTo(AsmToken::EndOfStatement);
6639    }
6640    Lex();
6641  
6642    if ((ExprValue == 0) == ExpectZero)
6643      return Error(DirectiveLoc, Message);
6644    return false;
6645  }
6646  
6647  /// parseDirectiveEndIf
6648  /// ::= .endif
parseDirectiveEndIf(SMLoc DirectiveLoc)6649  bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6650    if (parseEOL())
6651      return true;
6652  
6653    if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6654      return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6655                                 "an .if or .else");
6656    if (!TheCondStack.empty()) {
6657      TheCondState = TheCondStack.back();
6658      TheCondStack.pop_back();
6659    }
6660  
6661    return false;
6662  }
6663  
initializeDirectiveKindMap()6664  void MasmParser::initializeDirectiveKindMap() {
6665    DirectiveKindMap["="] = DK_ASSIGN;
6666    DirectiveKindMap["equ"] = DK_EQU;
6667    DirectiveKindMap["textequ"] = DK_TEXTEQU;
6668    // DirectiveKindMap[".ascii"] = DK_ASCII;
6669    // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6670    // DirectiveKindMap[".string"] = DK_STRING;
6671    DirectiveKindMap["byte"] = DK_BYTE;
6672    DirectiveKindMap["sbyte"] = DK_SBYTE;
6673    DirectiveKindMap["word"] = DK_WORD;
6674    DirectiveKindMap["sword"] = DK_SWORD;
6675    DirectiveKindMap["dword"] = DK_DWORD;
6676    DirectiveKindMap["sdword"] = DK_SDWORD;
6677    DirectiveKindMap["fword"] = DK_FWORD;
6678    DirectiveKindMap["qword"] = DK_QWORD;
6679    DirectiveKindMap["sqword"] = DK_SQWORD;
6680    DirectiveKindMap["real4"] = DK_REAL4;
6681    DirectiveKindMap["real8"] = DK_REAL8;
6682    DirectiveKindMap["real10"] = DK_REAL10;
6683    DirectiveKindMap["align"] = DK_ALIGN;
6684    DirectiveKindMap["even"] = DK_EVEN;
6685    DirectiveKindMap["org"] = DK_ORG;
6686    DirectiveKindMap["extern"] = DK_EXTERN;
6687    DirectiveKindMap["extrn"] = DK_EXTERN;
6688    DirectiveKindMap["public"] = DK_PUBLIC;
6689    // DirectiveKindMap[".comm"] = DK_COMM;
6690    DirectiveKindMap["comment"] = DK_COMMENT;
6691    DirectiveKindMap["include"] = DK_INCLUDE;
6692    DirectiveKindMap["repeat"] = DK_REPEAT;
6693    DirectiveKindMap["rept"] = DK_REPEAT;
6694    DirectiveKindMap["while"] = DK_WHILE;
6695    DirectiveKindMap["for"] = DK_FOR;
6696    DirectiveKindMap["irp"] = DK_FOR;
6697    DirectiveKindMap["forc"] = DK_FORC;
6698    DirectiveKindMap["irpc"] = DK_FORC;
6699    DirectiveKindMap["if"] = DK_IF;
6700    DirectiveKindMap["ife"] = DK_IFE;
6701    DirectiveKindMap["ifb"] = DK_IFB;
6702    DirectiveKindMap["ifnb"] = DK_IFNB;
6703    DirectiveKindMap["ifdef"] = DK_IFDEF;
6704    DirectiveKindMap["ifndef"] = DK_IFNDEF;
6705    DirectiveKindMap["ifdif"] = DK_IFDIF;
6706    DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6707    DirectiveKindMap["ifidn"] = DK_IFIDN;
6708    DirectiveKindMap["ifidni"] = DK_IFIDNI;
6709    DirectiveKindMap["elseif"] = DK_ELSEIF;
6710    DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6711    DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6712    DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6713    DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6714    DirectiveKindMap["else"] = DK_ELSE;
6715    DirectiveKindMap["end"] = DK_END;
6716    DirectiveKindMap["endif"] = DK_ENDIF;
6717    // DirectiveKindMap[".file"] = DK_FILE;
6718    // DirectiveKindMap[".line"] = DK_LINE;
6719    // DirectiveKindMap[".loc"] = DK_LOC;
6720    // DirectiveKindMap[".stabs"] = DK_STABS;
6721    // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6722    // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6723    // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6724    // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6725    // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6726    // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6727    // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6728    // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6729    // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6730    // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6731    // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6732    // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6733    // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6734    // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6735    // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6736    // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6737    // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6738    // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6739    // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6740    // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6741    // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6742    // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6743    // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6744    // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6745    // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6746    // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6747    // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6748    // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6749    // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6750    // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6751    // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6752    // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6753    // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6754    // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6755    DirectiveKindMap["macro"] = DK_MACRO;
6756    DirectiveKindMap["exitm"] = DK_EXITM;
6757    DirectiveKindMap["endm"] = DK_ENDM;
6758    DirectiveKindMap["purge"] = DK_PURGE;
6759    DirectiveKindMap[".err"] = DK_ERR;
6760    DirectiveKindMap[".errb"] = DK_ERRB;
6761    DirectiveKindMap[".errnb"] = DK_ERRNB;
6762    DirectiveKindMap[".errdef"] = DK_ERRDEF;
6763    DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6764    DirectiveKindMap[".errdif"] = DK_ERRDIF;
6765    DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6766    DirectiveKindMap[".erridn"] = DK_ERRIDN;
6767    DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6768    DirectiveKindMap[".erre"] = DK_ERRE;
6769    DirectiveKindMap[".errnz"] = DK_ERRNZ;
6770    DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6771    DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6772    DirectiveKindMap[".savereg"] = DK_SAVEREG;
6773    DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6774    DirectiveKindMap[".setframe"] = DK_SETFRAME;
6775    DirectiveKindMap[".radix"] = DK_RADIX;
6776    DirectiveKindMap["db"] = DK_DB;
6777    DirectiveKindMap["dd"] = DK_DD;
6778    DirectiveKindMap["df"] = DK_DF;
6779    DirectiveKindMap["dq"] = DK_DQ;
6780    DirectiveKindMap["dw"] = DK_DW;
6781    DirectiveKindMap["echo"] = DK_ECHO;
6782    DirectiveKindMap["struc"] = DK_STRUCT;
6783    DirectiveKindMap["struct"] = DK_STRUCT;
6784    DirectiveKindMap["union"] = DK_UNION;
6785    DirectiveKindMap["ends"] = DK_ENDS;
6786  }
6787  
isMacroLikeDirective()6788  bool MasmParser::isMacroLikeDirective() {
6789    if (getLexer().is(AsmToken::Identifier)) {
6790      bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6791                             .CasesLower("repeat", "rept", true)
6792                             .CaseLower("while", true)
6793                             .CasesLower("for", "irp", true)
6794                             .CasesLower("forc", "irpc", true)
6795                             .Default(false);
6796      if (IsMacroLike)
6797        return true;
6798    }
6799    if (peekTok().is(AsmToken::Identifier) &&
6800        peekTok().getIdentifier().equals_insensitive("macro"))
6801      return true;
6802  
6803    return false;
6804  }
6805  
parseMacroLikeBody(SMLoc DirectiveLoc)6806  MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6807    AsmToken EndToken, StartToken = getTok();
6808  
6809    unsigned NestLevel = 0;
6810    while (true) {
6811      // Check whether we have reached the end of the file.
6812      if (getLexer().is(AsmToken::Eof)) {
6813        printError(DirectiveLoc, "no matching 'endm' in definition");
6814        return nullptr;
6815      }
6816  
6817      if (isMacroLikeDirective())
6818        ++NestLevel;
6819  
6820      // Otherwise, check whether we have reached the endm.
6821      if (Lexer.is(AsmToken::Identifier) &&
6822          getTok().getIdentifier().equals_insensitive("endm")) {
6823        if (NestLevel == 0) {
6824          EndToken = getTok();
6825          Lex();
6826          if (Lexer.isNot(AsmToken::EndOfStatement)) {
6827            printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6828            return nullptr;
6829          }
6830          break;
6831        }
6832        --NestLevel;
6833      }
6834  
6835      // Otherwise, scan till the end of the statement.
6836      eatToEndOfStatement();
6837    }
6838  
6839    const char *BodyStart = StartToken.getLoc().getPointer();
6840    const char *BodyEnd = EndToken.getLoc().getPointer();
6841    StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6842  
6843    // We Are Anonymous.
6844    MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6845    return &MacroLikeBodies.back();
6846  }
6847  
expandStatement(SMLoc Loc)6848  bool MasmParser::expandStatement(SMLoc Loc) {
6849    std::string Body = parseStringTo(AsmToken::EndOfStatement);
6850    SMLoc EndLoc = getTok().getLoc();
6851  
6852    MCAsmMacroParameters Parameters;
6853    MCAsmMacroArguments Arguments;
6854  
6855    StringMap<std::string> BuiltinValues;
6856    for (const auto &S : BuiltinSymbolMap) {
6857      const BuiltinSymbol &Sym = S.getValue();
6858      if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
6859        BuiltinValues[S.getKey().lower()] = std::move(*Text);
6860      }
6861    }
6862    for (const auto &B : BuiltinValues) {
6863      MCAsmMacroParameter P;
6864      MCAsmMacroArgument A;
6865      P.Name = B.getKey();
6866      P.Required = true;
6867      A.push_back(AsmToken(AsmToken::String, B.getValue()));
6868  
6869      Parameters.push_back(std::move(P));
6870      Arguments.push_back(std::move(A));
6871    }
6872  
6873    for (const auto &V : Variables) {
6874      const Variable &Var = V.getValue();
6875      if (Var.IsText) {
6876        MCAsmMacroParameter P;
6877        MCAsmMacroArgument A;
6878        P.Name = Var.Name;
6879        P.Required = true;
6880        A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6881  
6882        Parameters.push_back(std::move(P));
6883        Arguments.push_back(std::move(A));
6884      }
6885    }
6886    MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6887    MCAsmMacro M = MacroLikeBodies.back();
6888  
6889    // Expand the statement in a new buffer.
6890    SmallString<80> Buf;
6891    raw_svector_ostream OS(Buf);
6892    if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6893      return true;
6894    std::unique_ptr<MemoryBuffer> Expansion =
6895        MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6896  
6897    // Jump to the expanded statement and prime the lexer.
6898    CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6899    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6900    EndStatementAtEOFStack.push_back(false);
6901    Lex();
6902    return false;
6903  }
6904  
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,raw_svector_ostream & OS)6905  void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6906                                            raw_svector_ostream &OS) {
6907    instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6908  }
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,SMLoc ExitLoc,raw_svector_ostream & OS)6909  void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6910                                            SMLoc ExitLoc,
6911                                            raw_svector_ostream &OS) {
6912    OS << "endm\n";
6913  
6914    std::unique_ptr<MemoryBuffer> Instantiation =
6915        MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6916  
6917    // Create the macro instantiation object and add to the current macro
6918    // instantiation stack.
6919    MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6920                                                    ExitLoc, TheCondStack.size()};
6921    ActiveMacros.push_back(MI);
6922  
6923    // Jump to the macro instantiation and prime the lexer.
6924    CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6925    Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6926    EndStatementAtEOFStack.push_back(true);
6927    Lex();
6928  }
6929  
6930  /// parseDirectiveRepeat
6931  ///   ::= ("repeat" | "rept") count
6932  ///       body
6933  ///     endm
parseDirectiveRepeat(SMLoc DirectiveLoc,StringRef Dir)6934  bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6935    const MCExpr *CountExpr;
6936    SMLoc CountLoc = getTok().getLoc();
6937    if (parseExpression(CountExpr))
6938      return true;
6939  
6940    int64_t Count;
6941    if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6942      return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6943    }
6944  
6945    if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
6946      return true;
6947  
6948    // Lex the repeat definition.
6949    MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6950    if (!M)
6951      return true;
6952  
6953    // Macro instantiation is lexical, unfortunately. We construct a new buffer
6954    // to hold the macro body with substitutions.
6955    SmallString<256> Buf;
6956    raw_svector_ostream OS(Buf);
6957    while (Count--) {
6958      if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6959                      getTok().getLoc()))
6960        return true;
6961    }
6962    instantiateMacroLikeBody(M, DirectiveLoc, OS);
6963  
6964    return false;
6965  }
6966  
6967  /// parseDirectiveWhile
6968  /// ::= "while" expression
6969  ///       body
6970  ///     endm
parseDirectiveWhile(SMLoc DirectiveLoc)6971  bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
6972    const MCExpr *CondExpr;
6973    SMLoc CondLoc = getTok().getLoc();
6974    if (parseExpression(CondExpr))
6975      return true;
6976  
6977    // Lex the repeat definition.
6978    MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6979    if (!M)
6980      return true;
6981  
6982    // Macro instantiation is lexical, unfortunately. We construct a new buffer
6983    // to hold the macro body with substitutions.
6984    SmallString<256> Buf;
6985    raw_svector_ostream OS(Buf);
6986    int64_t Condition;
6987    if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
6988      return Error(CondLoc, "expected absolute expression in 'while' directive");
6989    if (Condition) {
6990      // Instantiate the macro, then resume at this directive to recheck the
6991      // condition.
6992      if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6993                      getTok().getLoc()))
6994        return true;
6995      instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
6996    }
6997  
6998    return false;
6999  }
7000  
7001  /// parseDirectiveFor
7002  /// ::= ("for" | "irp") symbol [":" qualifier], <values>
7003  ///       body
7004  ///     endm
parseDirectiveFor(SMLoc DirectiveLoc,StringRef Dir)7005  bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
7006    MCAsmMacroParameter Parameter;
7007    MCAsmMacroArguments A;
7008    if (check(parseIdentifier(Parameter.Name),
7009              "expected identifier in '" + Dir + "' directive"))
7010      return true;
7011  
7012    // Parse optional qualifier (default value, or "req")
7013    if (parseOptionalToken(AsmToken::Colon)) {
7014      if (parseOptionalToken(AsmToken::Equal)) {
7015        // Default value
7016        SMLoc ParamLoc;
7017  
7018        ParamLoc = Lexer.getLoc();
7019        if (parseMacroArgument(nullptr, Parameter.Value))
7020          return true;
7021      } else {
7022        SMLoc QualLoc;
7023        StringRef Qualifier;
7024  
7025        QualLoc = Lexer.getLoc();
7026        if (parseIdentifier(Qualifier))
7027          return Error(QualLoc, "missing parameter qualifier for "
7028                                "'" +
7029                                    Parameter.Name + "' in '" + Dir +
7030                                    "' directive");
7031  
7032        if (Qualifier.equals_insensitive("req"))
7033          Parameter.Required = true;
7034        else
7035          return Error(QualLoc,
7036                       Qualifier + " is not a valid parameter qualifier for '" +
7037                           Parameter.Name + "' in '" + Dir + "' directive");
7038      }
7039    }
7040  
7041    if (parseToken(AsmToken::Comma,
7042                   "expected comma in '" + Dir + "' directive") ||
7043        parseToken(AsmToken::Less,
7044                   "values in '" + Dir +
7045                       "' directive must be enclosed in angle brackets"))
7046      return true;
7047  
7048    while (true) {
7049      A.emplace_back();
7050      if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
7051        return addErrorSuffix(" in arguments for '" + Dir + "' directive");
7052  
7053      // If we see a comma, continue, and allow line continuation.
7054      if (!parseOptionalToken(AsmToken::Comma))
7055        break;
7056      parseOptionalToken(AsmToken::EndOfStatement);
7057    }
7058  
7059    if (parseToken(AsmToken::Greater,
7060                   "values in '" + Dir +
7061                       "' directive must be enclosed in angle brackets") ||
7062        parseEOL())
7063      return true;
7064  
7065    // Lex the for definition.
7066    MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7067    if (!M)
7068      return true;
7069  
7070    // Macro instantiation is lexical, unfortunately. We construct a new buffer
7071    // to hold the macro body with substitutions.
7072    SmallString<256> Buf;
7073    raw_svector_ostream OS(Buf);
7074  
7075    for (const MCAsmMacroArgument &Arg : A) {
7076      if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7077        return true;
7078    }
7079  
7080    instantiateMacroLikeBody(M, DirectiveLoc, OS);
7081  
7082    return false;
7083  }
7084  
7085  /// parseDirectiveForc
7086  /// ::= ("forc" | "irpc") symbol, <string>
7087  ///       body
7088  ///     endm
parseDirectiveForc(SMLoc DirectiveLoc,StringRef Directive)7089  bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
7090    MCAsmMacroParameter Parameter;
7091  
7092    std::string Argument;
7093    if (check(parseIdentifier(Parameter.Name),
7094              "expected identifier in '" + Directive + "' directive") ||
7095        parseToken(AsmToken::Comma,
7096                   "expected comma in '" + Directive + "' directive"))
7097      return true;
7098    if (parseAngleBracketString(Argument)) {
7099      // Match ml64.exe; treat all characters to end of statement as a string,
7100      // ignoring comment markers, then discard anything following a space (using
7101      // the C locale).
7102      Argument = parseStringTo(AsmToken::EndOfStatement);
7103      if (getTok().is(AsmToken::EndOfStatement))
7104        Argument += getTok().getString();
7105      size_t End = 0;
7106      for (; End < Argument.size(); ++End) {
7107        if (isSpace(Argument[End]))
7108          break;
7109      }
7110      Argument.resize(End);
7111    }
7112    if (parseEOL())
7113      return true;
7114  
7115    // Lex the irpc definition.
7116    MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7117    if (!M)
7118      return true;
7119  
7120    // Macro instantiation is lexical, unfortunately. We construct a new buffer
7121    // to hold the macro body with substitutions.
7122    SmallString<256> Buf;
7123    raw_svector_ostream OS(Buf);
7124  
7125    StringRef Values(Argument);
7126    for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
7127      MCAsmMacroArgument Arg;
7128      Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
7129  
7130      if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7131        return true;
7132    }
7133  
7134    instantiateMacroLikeBody(M, DirectiveLoc, OS);
7135  
7136    return false;
7137  }
7138  
parseDirectiveMSEmit(SMLoc IDLoc,ParseStatementInfo & Info,size_t Len)7139  bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
7140                                        size_t Len) {
7141    const MCExpr *Value;
7142    SMLoc ExprLoc = getLexer().getLoc();
7143    if (parseExpression(Value))
7144      return true;
7145    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7146    if (!MCE)
7147      return Error(ExprLoc, "unexpected expression in _emit");
7148    uint64_t IntValue = MCE->getValue();
7149    if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
7150      return Error(ExprLoc, "literal value out of range for directive");
7151  
7152    Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
7153    return false;
7154  }
7155  
parseDirectiveMSAlign(SMLoc IDLoc,ParseStatementInfo & Info)7156  bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
7157    const MCExpr *Value;
7158    SMLoc ExprLoc = getLexer().getLoc();
7159    if (parseExpression(Value))
7160      return true;
7161    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7162    if (!MCE)
7163      return Error(ExprLoc, "unexpected expression in align");
7164    uint64_t IntValue = MCE->getValue();
7165    if (!isPowerOf2_64(IntValue))
7166      return Error(ExprLoc, "literal value not a power of two greater then zero");
7167  
7168    Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
7169    return false;
7170  }
7171  
parseDirectiveRadix(SMLoc DirectiveLoc)7172  bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
7173    const SMLoc Loc = getLexer().getLoc();
7174    std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
7175    StringRef RadixString = StringRef(RadixStringRaw).trim();
7176    unsigned Radix;
7177    if (RadixString.getAsInteger(10, Radix)) {
7178      return Error(Loc,
7179                   "radix must be a decimal number in the range 2 to 16; was " +
7180                       RadixString);
7181    }
7182    if (Radix < 2 || Radix > 16)
7183      return Error(Loc, "radix must be in the range 2 to 16; was " +
7184                            std::to_string(Radix));
7185    getLexer().setMasmDefaultRadix(Radix);
7186    return false;
7187  }
7188  
7189  /// parseDirectiveEcho
7190  ///   ::= "echo" message
parseDirectiveEcho(SMLoc DirectiveLoc)7191  bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
7192    std::string Message = parseStringTo(AsmToken::EndOfStatement);
7193    llvm::outs() << Message;
7194    if (!StringRef(Message).ends_with("\n"))
7195      llvm::outs() << '\n';
7196    return false;
7197  }
7198  
7199  // We are comparing pointers, but the pointers are relative to a single string.
7200  // Thus, this should always be deterministic.
rewritesSort(const AsmRewrite * AsmRewriteA,const AsmRewrite * AsmRewriteB)7201  static int rewritesSort(const AsmRewrite *AsmRewriteA,
7202                          const AsmRewrite *AsmRewriteB) {
7203    if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
7204      return -1;
7205    if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
7206      return 1;
7207  
7208    // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
7209    // rewrite to the same location.  Make sure the SizeDirective rewrite is
7210    // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
7211    // ensures the sort algorithm is stable.
7212    if (AsmRewritePrecedence[AsmRewriteA->Kind] >
7213        AsmRewritePrecedence[AsmRewriteB->Kind])
7214      return -1;
7215  
7216    if (AsmRewritePrecedence[AsmRewriteA->Kind] <
7217        AsmRewritePrecedence[AsmRewriteB->Kind])
7218      return 1;
7219    llvm_unreachable("Unstable rewrite sort.");
7220  }
7221  
defineMacro(StringRef Name,StringRef Value)7222  bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
7223    Variable &Var = Variables[Name.lower()];
7224    if (Var.Name.empty()) {
7225      Var.Name = Name;
7226    } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
7227      return Error(SMLoc(), "invalid variable redefinition");
7228    } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
7229               Warning(SMLoc(), "redefining '" + Name +
7230                                    "', already defined on the command line")) {
7231      return true;
7232    }
7233    Var.Redefinable = Variable::WARN_ON_REDEFINITION;
7234    Var.IsText = true;
7235    Var.TextValue = Value.str();
7236    return false;
7237  }
7238  
lookUpField(StringRef Name,AsmFieldInfo & Info) const7239  bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
7240    const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
7241    const StringRef Base = BaseMember.first, Member = BaseMember.second;
7242    return lookUpField(Base, Member, Info);
7243  }
7244  
lookUpField(StringRef Base,StringRef Member,AsmFieldInfo & Info) const7245  bool MasmParser::lookUpField(StringRef Base, StringRef Member,
7246                               AsmFieldInfo &Info) const {
7247    if (Base.empty())
7248      return true;
7249  
7250    AsmFieldInfo BaseInfo;
7251    if (Base.contains('.') && !lookUpField(Base, BaseInfo))
7252      Base = BaseInfo.Type.Name;
7253  
7254    auto StructIt = Structs.find(Base.lower());
7255    auto TypeIt = KnownType.find(Base.lower());
7256    if (TypeIt != KnownType.end()) {
7257      StructIt = Structs.find(TypeIt->second.Name.lower());
7258    }
7259    if (StructIt != Structs.end())
7260      return lookUpField(StructIt->second, Member, Info);
7261  
7262    return true;
7263  }
7264  
lookUpField(const StructInfo & Structure,StringRef Member,AsmFieldInfo & Info) const7265  bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
7266                               AsmFieldInfo &Info) const {
7267    if (Member.empty()) {
7268      Info.Type.Name = Structure.Name;
7269      Info.Type.Size = Structure.Size;
7270      Info.Type.ElementSize = Structure.Size;
7271      Info.Type.Length = 1;
7272      return false;
7273    }
7274  
7275    std::pair<StringRef, StringRef> Split = Member.split('.');
7276    const StringRef FieldName = Split.first, FieldMember = Split.second;
7277  
7278    auto StructIt = Structs.find(FieldName.lower());
7279    if (StructIt != Structs.end())
7280      return lookUpField(StructIt->second, FieldMember, Info);
7281  
7282    auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
7283    if (FieldIt == Structure.FieldsByName.end())
7284      return true;
7285  
7286    const FieldInfo &Field = Structure.Fields[FieldIt->second];
7287    if (FieldMember.empty()) {
7288      Info.Offset += Field.Offset;
7289      Info.Type.Size = Field.SizeOf;
7290      Info.Type.ElementSize = Field.Type;
7291      Info.Type.Length = Field.LengthOf;
7292      if (Field.Contents.FT == FT_STRUCT)
7293        Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
7294      else
7295        Info.Type.Name = "";
7296      return false;
7297    }
7298  
7299    if (Field.Contents.FT != FT_STRUCT)
7300      return true;
7301    const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
7302  
7303    if (lookUpField(StructInfo.Structure, FieldMember, Info))
7304      return true;
7305  
7306    Info.Offset += Field.Offset;
7307    return false;
7308  }
7309  
lookUpType(StringRef Name,AsmTypeInfo & Info) const7310  bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
7311    unsigned Size = StringSwitch<unsigned>(Name)
7312                        .CasesLower("byte", "db", "sbyte", 1)
7313                        .CasesLower("word", "dw", "sword", 2)
7314                        .CasesLower("dword", "dd", "sdword", 4)
7315                        .CasesLower("fword", "df", 6)
7316                        .CasesLower("qword", "dq", "sqword", 8)
7317                        .CaseLower("real4", 4)
7318                        .CaseLower("real8", 8)
7319                        .CaseLower("real10", 10)
7320                        .Default(0);
7321    if (Size) {
7322      Info.Name = Name;
7323      Info.ElementSize = Size;
7324      Info.Length = 1;
7325      Info.Size = Size;
7326      return false;
7327    }
7328  
7329    auto StructIt = Structs.find(Name.lower());
7330    if (StructIt != Structs.end()) {
7331      const StructInfo &Structure = StructIt->second;
7332      Info.Name = Name;
7333      Info.ElementSize = Structure.Size;
7334      Info.Length = 1;
7335      Info.Size = Structure.Size;
7336      return false;
7337    }
7338  
7339    return true;
7340  }
7341  
parseMSInlineAsm(std::string & AsmString,unsigned & NumOutputs,unsigned & NumInputs,SmallVectorImpl<std::pair<void *,bool>> & OpDecls,SmallVectorImpl<std::string> & Constraints,SmallVectorImpl<std::string> & Clobbers,const MCInstrInfo * MII,const MCInstPrinter * IP,MCAsmParserSemaCallback & SI)7342  bool MasmParser::parseMSInlineAsm(
7343      std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
7344      SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7345      SmallVectorImpl<std::string> &Constraints,
7346      SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7347      const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7348    SmallVector<void *, 4> InputDecls;
7349    SmallVector<void *, 4> OutputDecls;
7350    SmallVector<bool, 4> InputDeclsAddressOf;
7351    SmallVector<bool, 4> OutputDeclsAddressOf;
7352    SmallVector<std::string, 4> InputConstraints;
7353    SmallVector<std::string, 4> OutputConstraints;
7354    SmallVector<unsigned, 4> ClobberRegs;
7355  
7356    SmallVector<AsmRewrite, 4> AsmStrRewrites;
7357  
7358    // Prime the lexer.
7359    Lex();
7360  
7361    // While we have input, parse each statement.
7362    unsigned InputIdx = 0;
7363    unsigned OutputIdx = 0;
7364    while (getLexer().isNot(AsmToken::Eof)) {
7365      // Parse curly braces marking block start/end.
7366      if (parseCurlyBlockScope(AsmStrRewrites))
7367        continue;
7368  
7369      ParseStatementInfo Info(&AsmStrRewrites);
7370      bool StatementErr = parseStatement(Info, &SI);
7371  
7372      if (StatementErr || Info.ParseError) {
7373        // Emit pending errors if any exist.
7374        printPendingErrors();
7375        return true;
7376      }
7377  
7378      // No pending error should exist here.
7379      assert(!hasPendingError() && "unexpected error from parseStatement");
7380  
7381      if (Info.Opcode == ~0U)
7382        continue;
7383  
7384      const MCInstrDesc &Desc = MII->get(Info.Opcode);
7385  
7386      // Build the list of clobbers, outputs and inputs.
7387      for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7388        MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7389  
7390        // Register operand.
7391        if (Operand.isReg() && !Operand.needAddressOf() &&
7392            !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
7393          unsigned NumDefs = Desc.getNumDefs();
7394          // Clobber.
7395          if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7396            ClobberRegs.push_back(Operand.getReg());
7397          continue;
7398        }
7399  
7400        // Expr/Input or Output.
7401        StringRef SymName = Operand.getSymName();
7402        if (SymName.empty())
7403          continue;
7404  
7405        void *OpDecl = Operand.getOpDecl();
7406        if (!OpDecl)
7407          continue;
7408  
7409        StringRef Constraint = Operand.getConstraint();
7410        if (Operand.isImm()) {
7411          // Offset as immediate.
7412          if (Operand.isOffsetOfLocal())
7413            Constraint = "r";
7414          else
7415            Constraint = "i";
7416        }
7417  
7418        bool isOutput = (i == 1) && Desc.mayStore();
7419        SMLoc Start = SMLoc::getFromPointer(SymName.data());
7420        if (isOutput) {
7421          ++InputIdx;
7422          OutputDecls.push_back(OpDecl);
7423          OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7424          OutputConstraints.push_back(("=" + Constraint).str());
7425          AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7426        } else {
7427          InputDecls.push_back(OpDecl);
7428          InputDeclsAddressOf.push_back(Operand.needAddressOf());
7429          InputConstraints.push_back(Constraint.str());
7430          if (Desc.operands()[i - 1].isBranchTarget())
7431            AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7432          else
7433            AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7434        }
7435      }
7436  
7437      // Consider implicit defs to be clobbers.  Think of cpuid and push.
7438      llvm::append_range(ClobberRegs, Desc.implicit_defs());
7439    }
7440  
7441    // Set the number of Outputs and Inputs.
7442    NumOutputs = OutputDecls.size();
7443    NumInputs = InputDecls.size();
7444  
7445    // Set the unique clobbers.
7446    array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7447    ClobberRegs.erase(llvm::unique(ClobberRegs), ClobberRegs.end());
7448    Clobbers.assign(ClobberRegs.size(), std::string());
7449    for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7450      raw_string_ostream OS(Clobbers[I]);
7451      IP->printRegName(OS, ClobberRegs[I]);
7452    }
7453  
7454    // Merge the various outputs and inputs.  Output are expected first.
7455    if (NumOutputs || NumInputs) {
7456      unsigned NumExprs = NumOutputs + NumInputs;
7457      OpDecls.resize(NumExprs);
7458      Constraints.resize(NumExprs);
7459      for (unsigned i = 0; i < NumOutputs; ++i) {
7460        OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7461        Constraints[i] = OutputConstraints[i];
7462      }
7463      for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7464        OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7465        Constraints[j] = InputConstraints[i];
7466      }
7467    }
7468  
7469    // Build the IR assembly string.
7470    std::string AsmStringIR;
7471    raw_string_ostream OS(AsmStringIR);
7472    StringRef ASMString =
7473        SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7474    const char *AsmStart = ASMString.begin();
7475    const char *AsmEnd = ASMString.end();
7476    array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7477    for (auto I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
7478      const AsmRewrite &AR = *I;
7479      // Check if this has already been covered by another rewrite...
7480      if (AR.Done)
7481        continue;
7482      AsmRewriteKind Kind = AR.Kind;
7483  
7484      const char *Loc = AR.Loc.getPointer();
7485      assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7486  
7487      // Emit everything up to the immediate/expression.
7488      if (unsigned Len = Loc - AsmStart)
7489        OS << StringRef(AsmStart, Len);
7490  
7491      // Skip the original expression.
7492      if (Kind == AOK_Skip) {
7493        AsmStart = Loc + AR.Len;
7494        continue;
7495      }
7496  
7497      unsigned AdditionalSkip = 0;
7498      // Rewrite expressions in $N notation.
7499      switch (Kind) {
7500      default:
7501        break;
7502      case AOK_IntelExpr:
7503        assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7504        if (AR.IntelExp.NeedBracs)
7505          OS << "[";
7506        if (AR.IntelExp.hasBaseReg())
7507          OS << AR.IntelExp.BaseReg;
7508        if (AR.IntelExp.hasIndexReg())
7509          OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7510             << AR.IntelExp.IndexReg;
7511        if (AR.IntelExp.Scale > 1)
7512          OS << " * $$" << AR.IntelExp.Scale;
7513        if (AR.IntelExp.hasOffset()) {
7514          if (AR.IntelExp.hasRegs())
7515            OS << " + ";
7516          // Fuse this rewrite with a rewrite of the offset name, if present.
7517          StringRef OffsetName = AR.IntelExp.OffsetName;
7518          SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7519          size_t OffsetLen = OffsetName.size();
7520          auto rewrite_it = std::find_if(
7521              I, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7522                return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7523                       (FusingAR.Kind == AOK_Input ||
7524                        FusingAR.Kind == AOK_CallInput);
7525              });
7526          if (rewrite_it == AsmStrRewrites.end()) {
7527            OS << "offset " << OffsetName;
7528          } else if (rewrite_it->Kind == AOK_CallInput) {
7529            OS << "${" << InputIdx++ << ":P}";
7530            rewrite_it->Done = true;
7531          } else {
7532            OS << '$' << InputIdx++;
7533            rewrite_it->Done = true;
7534          }
7535        }
7536        if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7537          OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7538        if (AR.IntelExp.NeedBracs)
7539          OS << "]";
7540        break;
7541      case AOK_Label:
7542        OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7543        break;
7544      case AOK_Input:
7545        OS << '$' << InputIdx++;
7546        break;
7547      case AOK_CallInput:
7548        OS << "${" << InputIdx++ << ":P}";
7549        break;
7550      case AOK_Output:
7551        OS << '$' << OutputIdx++;
7552        break;
7553      case AOK_SizeDirective:
7554        switch (AR.Val) {
7555        default: break;
7556        case 8:  OS << "byte ptr "; break;
7557        case 16: OS << "word ptr "; break;
7558        case 32: OS << "dword ptr "; break;
7559        case 64: OS << "qword ptr "; break;
7560        case 80: OS << "xword ptr "; break;
7561        case 128: OS << "xmmword ptr "; break;
7562        case 256: OS << "ymmword ptr "; break;
7563        }
7564        break;
7565      case AOK_Emit:
7566        OS << ".byte";
7567        break;
7568      case AOK_Align: {
7569        // MS alignment directives are measured in bytes. If the native assembler
7570        // measures alignment in bytes, we can pass it straight through.
7571        OS << ".align";
7572        if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7573          break;
7574  
7575        // Alignment is in log2 form, so print that instead and skip the original
7576        // immediate.
7577        unsigned Val = AR.Val;
7578        OS << ' ' << Val;
7579        assert(Val < 10 && "Expected alignment less then 2^10.");
7580        AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7581        break;
7582      }
7583      case AOK_EVEN:
7584        OS << ".even";
7585        break;
7586      case AOK_EndOfStatement:
7587        OS << "\n\t";
7588        break;
7589      }
7590  
7591      // Skip the original expression.
7592      AsmStart = Loc + AR.Len + AdditionalSkip;
7593    }
7594  
7595    // Emit the remainder of the asm string.
7596    if (AsmStart != AsmEnd)
7597      OS << StringRef(AsmStart, AsmEnd - AsmStart);
7598  
7599    AsmString = OS.str();
7600    return false;
7601  }
7602  
initializeBuiltinSymbolMap()7603  void MasmParser::initializeBuiltinSymbolMap() {
7604    // Numeric built-ins (supported in all versions)
7605    BuiltinSymbolMap["@version"] = BI_VERSION;
7606    BuiltinSymbolMap["@line"] = BI_LINE;
7607  
7608    // Text built-ins (supported in all versions)
7609    BuiltinSymbolMap["@date"] = BI_DATE;
7610    BuiltinSymbolMap["@time"] = BI_TIME;
7611    BuiltinSymbolMap["@filecur"] = BI_FILECUR;
7612    BuiltinSymbolMap["@filename"] = BI_FILENAME;
7613    BuiltinSymbolMap["@curseg"] = BI_CURSEG;
7614  
7615    // Some built-ins exist only for MASM32 (32-bit x86)
7616    if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
7617        Triple::x86) {
7618      // Numeric built-ins
7619      // BuiltinSymbolMap["@cpu"] = BI_CPU;
7620      // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
7621      // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
7622      // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
7623      // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
7624      // BuiltinSymbolMap["@model"] = BI_MODEL;
7625  
7626      // Text built-ins
7627      // BuiltinSymbolMap["@code"] = BI_CODE;
7628      // BuiltinSymbolMap["@data"] = BI_DATA;
7629      // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
7630      // BuiltinSymbolMap["@stack"] = BI_STACK;
7631    }
7632  }
7633  
evaluateBuiltinValue(BuiltinSymbol Symbol,SMLoc StartLoc)7634  const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
7635                                                 SMLoc StartLoc) {
7636    switch (Symbol) {
7637    default:
7638      return nullptr;
7639    case BI_VERSION:
7640      // Match a recent version of ML.EXE.
7641      return MCConstantExpr::create(1427, getContext());
7642    case BI_LINE: {
7643      int64_t Line;
7644      if (ActiveMacros.empty())
7645        Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
7646      else
7647        Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
7648                                     ActiveMacros.front()->ExitBuffer);
7649      return MCConstantExpr::create(Line, getContext());
7650    }
7651    }
7652    llvm_unreachable("unhandled built-in symbol");
7653  }
7654  
7655  std::optional<std::string>
evaluateBuiltinTextMacro(BuiltinSymbol Symbol,SMLoc StartLoc)7656  MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
7657    switch (Symbol) {
7658    default:
7659      return {};
7660    case BI_DATE: {
7661      // Current local date, formatted MM/DD/YY
7662      char TmpBuffer[sizeof("mm/dd/yy")];
7663      const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
7664      return std::string(TmpBuffer, Len);
7665    }
7666    case BI_TIME: {
7667      // Current local time, formatted HH:MM:SS (24-hour clock)
7668      char TmpBuffer[sizeof("hh:mm:ss")];
7669      const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
7670      return std::string(TmpBuffer, Len);
7671    }
7672    case BI_FILECUR:
7673      return SrcMgr
7674          .getMemoryBuffer(
7675              ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
7676          ->getBufferIdentifier()
7677          .str();
7678    case BI_FILENAME:
7679      return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
7680                                 ->getBufferIdentifier())
7681          .upper();
7682    case BI_CURSEG:
7683      return getStreamer().getCurrentSectionOnly()->getName().str();
7684    }
7685    llvm_unreachable("unhandled built-in symbol");
7686  }
7687  
7688  /// Create an MCAsmParser instance.
createMCMasmParser(SourceMgr & SM,MCContext & C,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)7689  MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7690                                        MCStreamer &Out, const MCAsmInfo &MAI,
7691                                        struct tm TM, unsigned CB) {
7692    return new MasmParser(SM, C, Out, MAI, TM, CB);
7693  }
7694