1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCCodeView.h"
27 #include "llvm/MC/MCContext.h"
28 #include "llvm/MC/MCDirectives.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/MC/MCInstPrinter.h"
31 #include "llvm/MC/MCInstrDesc.h"
32 #include "llvm/MC/MCInstrInfo.h"
33 #include "llvm/MC/MCParser/AsmCond.h"
34 #include "llvm/MC/MCParser/AsmLexer.h"
35 #include "llvm/MC/MCParser/MCAsmParser.h"
36 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
38 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
39 #include "llvm/MC/MCSection.h"
40 #include "llvm/MC/MCStreamer.h"
41 #include "llvm/MC/MCSubtargetInfo.h"
42 #include "llvm/MC/MCSymbol.h"
43 #include "llvm/MC/MCTargetOptions.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/CommandLine.h"
46 #include "llvm/Support/ErrorHandling.h"
47 #include "llvm/Support/Format.h"
48 #include "llvm/Support/MD5.h"
49 #include "llvm/Support/MathExtras.h"
50 #include "llvm/Support/MemoryBuffer.h"
51 #include "llvm/Support/Path.h"
52 #include "llvm/Support/SMLoc.h"
53 #include "llvm/Support/SourceMgr.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <climits>
58 #include <cstddef>
59 #include <cstdint>
60 #include <ctime>
61 #include <deque>
62 #include <memory>
63 #include <optional>
64 #include <sstream>
65 #include <string>
66 #include <tuple>
67 #include <utility>
68 #include <vector>
69
70 using namespace llvm;
71
72 namespace {
73
74 /// Helper types for tracking macro definitions.
75 typedef std::vector<AsmToken> MCAsmMacroArgument;
76 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
77
78 /// Helper class for storing information about an active macro instantiation.
79 struct MacroInstantiation {
80 /// The location of the instantiation.
81 SMLoc InstantiationLoc;
82
83 /// The buffer where parsing should resume upon instantiation completion.
84 unsigned ExitBuffer;
85
86 /// The location where parsing should resume upon instantiation completion.
87 SMLoc ExitLoc;
88
89 /// The depth of TheCondStack at the start of the instantiation.
90 size_t CondStackDepth;
91 };
92
93 struct ParseStatementInfo {
94 /// The parsed operands from the last parsed statement.
95 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
96
97 /// The opcode from the last parsed instruction.
98 unsigned Opcode = ~0U;
99
100 /// Was there an error parsing the inline assembly?
101 bool ParseError = false;
102
103 /// The value associated with a macro exit.
104 std::optional<std::string> ExitValue;
105
106 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
107
108 ParseStatementInfo() = delete;
ParseStatementInfo__anon60b61cd60111::ParseStatementInfo109 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
110 : AsmRewrites(rewrites) {}
111 };
112
113 enum FieldType {
114 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
115 FT_REAL, // Initializer: real number, stored as an APInt.
116 FT_STRUCT // Initializer: struct initializer, stored recursively.
117 };
118
119 struct FieldInfo;
120 struct StructInfo {
121 StringRef Name;
122 bool IsUnion = false;
123 bool Initializable = true;
124 unsigned Alignment = 0;
125 unsigned AlignmentSize = 0;
126 unsigned NextOffset = 0;
127 unsigned Size = 0;
128 std::vector<FieldInfo> Fields;
129 StringMap<size_t> FieldsByName;
130
131 FieldInfo &addField(StringRef FieldName, FieldType FT,
132 unsigned FieldAlignmentSize);
133
134 StructInfo() = default;
135 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
136 };
137
138 // FIXME: This should probably use a class hierarchy, raw pointers between the
139 // objects, and dynamic type resolution instead of a union. On the other hand,
140 // ownership then becomes much more complicated; the obvious thing would be to
141 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
142
143 struct StructInitializer;
144 struct IntFieldInfo {
145 SmallVector<const MCExpr *, 1> Values;
146
147 IntFieldInfo() = default;
IntFieldInfo__anon60b61cd60111::IntFieldInfo148 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
IntFieldInfo__anon60b61cd60111::IntFieldInfo149 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
150 };
151 struct RealFieldInfo {
152 SmallVector<APInt, 1> AsIntValues;
153
154 RealFieldInfo() = default;
RealFieldInfo__anon60b61cd60111::RealFieldInfo155 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
RealFieldInfo__anon60b61cd60111::RealFieldInfo156 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
157 };
158 struct StructFieldInfo {
159 std::vector<StructInitializer> Initializers;
160 StructInfo Structure;
161
162 StructFieldInfo() = default;
163 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
164 };
165
166 class FieldInitializer {
167 public:
168 FieldType FT;
169 union {
170 IntFieldInfo IntInfo;
171 RealFieldInfo RealInfo;
172 StructFieldInfo StructInfo;
173 };
174
175 ~FieldInitializer();
176 FieldInitializer(FieldType FT);
177
178 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
179 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
180 FieldInitializer(std::vector<StructInitializer> &&Initializers,
181 struct StructInfo Structure);
182
183 FieldInitializer(const FieldInitializer &Initializer);
184 FieldInitializer(FieldInitializer &&Initializer);
185
186 FieldInitializer &operator=(const FieldInitializer &Initializer);
187 FieldInitializer &operator=(FieldInitializer &&Initializer);
188 };
189
190 struct StructInitializer {
191 std::vector<FieldInitializer> FieldInitializers;
192 };
193
194 struct FieldInfo {
195 // Offset of the field within the containing STRUCT.
196 unsigned Offset = 0;
197
198 // Total size of the field (= LengthOf * Type).
199 unsigned SizeOf = 0;
200
201 // Number of elements in the field (1 if scalar, >1 if an array).
202 unsigned LengthOf = 0;
203
204 // Size of a single entry in this field, in bytes ("type" in MASM standards).
205 unsigned Type = 0;
206
207 FieldInitializer Contents;
208
FieldInfo__anon60b61cd60111::FieldInfo209 FieldInfo(FieldType FT) : Contents(FT) {}
210 };
211
StructFieldInfo(std::vector<StructInitializer> V,StructInfo S)212 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
213 StructInfo S) {
214 Initializers = std::move(V);
215 Structure = S;
216 }
217
StructInfo(StringRef StructName,bool Union,unsigned AlignmentValue)218 StructInfo::StructInfo(StringRef StructName, bool Union,
219 unsigned AlignmentValue)
220 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
221
addField(StringRef FieldName,FieldType FT,unsigned FieldAlignmentSize)222 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
223 unsigned FieldAlignmentSize) {
224 if (!FieldName.empty())
225 FieldsByName[FieldName.lower()] = Fields.size();
226 Fields.emplace_back(FT);
227 FieldInfo &Field = Fields.back();
228 Field.Offset =
229 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
230 if (!IsUnion) {
231 NextOffset = std::max(NextOffset, Field.Offset);
232 }
233 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
234 return Field;
235 }
236
~FieldInitializer()237 FieldInitializer::~FieldInitializer() {
238 switch (FT) {
239 case FT_INTEGRAL:
240 IntInfo.~IntFieldInfo();
241 break;
242 case FT_REAL:
243 RealInfo.~RealFieldInfo();
244 break;
245 case FT_STRUCT:
246 StructInfo.~StructFieldInfo();
247 break;
248 }
249 }
250
FieldInitializer(FieldType FT)251 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
252 switch (FT) {
253 case FT_INTEGRAL:
254 new (&IntInfo) IntFieldInfo();
255 break;
256 case FT_REAL:
257 new (&RealInfo) RealFieldInfo();
258 break;
259 case FT_STRUCT:
260 new (&StructInfo) StructFieldInfo();
261 break;
262 }
263 }
264
FieldInitializer(SmallVector<const MCExpr *,1> && Values)265 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
266 : FT(FT_INTEGRAL) {
267 new (&IntInfo) IntFieldInfo(std::move(Values));
268 }
269
FieldInitializer(SmallVector<APInt,1> && AsIntValues)270 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
271 : FT(FT_REAL) {
272 new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
273 }
274
FieldInitializer(std::vector<StructInitializer> && Initializers,struct StructInfo Structure)275 FieldInitializer::FieldInitializer(
276 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
277 : FT(FT_STRUCT) {
278 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
279 }
280
FieldInitializer(const FieldInitializer & Initializer)281 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
282 : FT(Initializer.FT) {
283 switch (FT) {
284 case FT_INTEGRAL:
285 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
286 break;
287 case FT_REAL:
288 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
289 break;
290 case FT_STRUCT:
291 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
292 break;
293 }
294 }
295
FieldInitializer(FieldInitializer && Initializer)296 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
297 : FT(Initializer.FT) {
298 switch (FT) {
299 case FT_INTEGRAL:
300 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
301 break;
302 case FT_REAL:
303 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
304 break;
305 case FT_STRUCT:
306 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
307 break;
308 }
309 }
310
311 FieldInitializer &
operator =(const FieldInitializer & Initializer)312 FieldInitializer::operator=(const FieldInitializer &Initializer) {
313 if (FT != Initializer.FT) {
314 switch (FT) {
315 case FT_INTEGRAL:
316 IntInfo.~IntFieldInfo();
317 break;
318 case FT_REAL:
319 RealInfo.~RealFieldInfo();
320 break;
321 case FT_STRUCT:
322 StructInfo.~StructFieldInfo();
323 break;
324 }
325 }
326 FT = Initializer.FT;
327 switch (FT) {
328 case FT_INTEGRAL:
329 IntInfo = Initializer.IntInfo;
330 break;
331 case FT_REAL:
332 RealInfo = Initializer.RealInfo;
333 break;
334 case FT_STRUCT:
335 StructInfo = Initializer.StructInfo;
336 break;
337 }
338 return *this;
339 }
340
operator =(FieldInitializer && Initializer)341 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
342 if (FT != Initializer.FT) {
343 switch (FT) {
344 case FT_INTEGRAL:
345 IntInfo.~IntFieldInfo();
346 break;
347 case FT_REAL:
348 RealInfo.~RealFieldInfo();
349 break;
350 case FT_STRUCT:
351 StructInfo.~StructFieldInfo();
352 break;
353 }
354 }
355 FT = Initializer.FT;
356 switch (FT) {
357 case FT_INTEGRAL:
358 IntInfo = Initializer.IntInfo;
359 break;
360 case FT_REAL:
361 RealInfo = Initializer.RealInfo;
362 break;
363 case FT_STRUCT:
364 StructInfo = Initializer.StructInfo;
365 break;
366 }
367 return *this;
368 }
369
370 /// The concrete assembly parser instance.
371 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
372 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
373 class MasmParser : public MCAsmParser {
374 private:
375 SourceMgr::DiagHandlerTy SavedDiagHandler;
376 void *SavedDiagContext;
377 std::unique_ptr<MCAsmParserExtension> PlatformParser;
378
379 /// This is the current buffer index we're lexing from as managed by the
380 /// SourceMgr object.
381 unsigned CurBuffer;
382
383 /// time of assembly
384 struct tm TM;
385
386 BitVector EndStatementAtEOFStack;
387
388 AsmCond TheCondState;
389 std::vector<AsmCond> TheCondStack;
390
391 /// maps directive names to handler methods in parser
392 /// extensions. Extensions register themselves in this map by calling
393 /// addDirectiveHandler.
394 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
395
396 /// maps assembly-time variable names to variables.
397 struct Variable {
398 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
399
400 StringRef Name;
401 RedefinableKind Redefinable = REDEFINABLE;
402 bool IsText = false;
403 std::string TextValue;
404 };
405 StringMap<Variable> Variables;
406
407 /// Stack of active struct definitions.
408 SmallVector<StructInfo, 1> StructInProgress;
409
410 /// Maps struct tags to struct definitions.
411 StringMap<StructInfo> Structs;
412
413 /// Maps data location names to types.
414 StringMap<AsmTypeInfo> KnownType;
415
416 /// Stack of active macro instantiations.
417 std::vector<MacroInstantiation*> ActiveMacros;
418
419 /// List of bodies of anonymous macros.
420 std::deque<MCAsmMacro> MacroLikeBodies;
421
422 /// Keeps track of how many .macro's have been instantiated.
423 unsigned NumOfMacroInstantiations;
424
425 /// The values from the last parsed cpp hash file line comment if any.
426 struct CppHashInfoTy {
427 StringRef Filename;
428 int64_t LineNumber;
429 SMLoc Loc;
430 unsigned Buf;
CppHashInfoTy__anon60b61cd60111::MasmParser::CppHashInfoTy431 CppHashInfoTy() : LineNumber(0), Buf(0) {}
432 };
433 CppHashInfoTy CppHashInfo;
434
435 /// The filename from the first cpp hash file line comment, if any.
436 StringRef FirstCppHashFilename;
437
438 /// List of forward directional labels for diagnosis at the end.
439 SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
440
441 /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
442 /// Defaults to 1U, meaning Intel.
443 unsigned AssemblerDialect = 1U;
444
445 /// Are we parsing ms-style inline assembly?
446 bool ParsingMSInlineAsm = false;
447
448 // Current <...> expression depth.
449 unsigned AngleBracketDepth = 0U;
450
451 // Number of locals defined.
452 uint16_t LocalCounter = 0;
453
454 public:
455 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
456 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
457 MasmParser(const MasmParser &) = delete;
458 MasmParser &operator=(const MasmParser &) = delete;
459 ~MasmParser() override;
460
461 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
462
addDirectiveHandler(StringRef Directive,ExtensionDirectiveHandler Handler)463 void addDirectiveHandler(StringRef Directive,
464 ExtensionDirectiveHandler Handler) override {
465 ExtensionDirectiveMap[Directive] = Handler;
466 DirectiveKindMap.try_emplace(Directive, DK_HANDLER_DIRECTIVE);
467 }
468
addAliasForDirective(StringRef Directive,StringRef Alias)469 void addAliasForDirective(StringRef Directive, StringRef Alias) override {
470 DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
471 }
472
473 /// @name MCAsmParser Interface
474 /// {
475
getAssemblerDialect()476 unsigned getAssemblerDialect() override {
477 if (AssemblerDialect == ~0U)
478 return MAI.getAssemblerDialect();
479 else
480 return AssemblerDialect;
481 }
setAssemblerDialect(unsigned i)482 void setAssemblerDialect(unsigned i) override {
483 AssemblerDialect = i;
484 }
485
486 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
487 bool Warning(SMLoc L, const Twine &Msg,
488 SMRange Range = std::nullopt) override;
489 bool printError(SMLoc L, const Twine &Msg,
490 SMRange Range = std::nullopt) override;
491
492 enum ExpandKind { ExpandMacros, DoNotExpandMacros };
493 const AsmToken &Lex(ExpandKind ExpandNextToken);
Lex()494 const AsmToken &Lex() override { return Lex(ExpandMacros); }
495
setParsingMSInlineAsm(bool V)496 void setParsingMSInlineAsm(bool V) override {
497 ParsingMSInlineAsm = V;
498 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
499 // hex integer literals.
500 Lexer.setLexMasmIntegers(V);
501 }
isParsingMSInlineAsm()502 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
503
isParsingMasm() const504 bool isParsingMasm() const override { return true; }
505
506 bool defineMacro(StringRef Name, StringRef Value) override;
507
508 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
509 bool lookUpField(StringRef Base, StringRef Member,
510 AsmFieldInfo &Info) const override;
511
512 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
513
514 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
515 unsigned &NumInputs,
516 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
517 SmallVectorImpl<std::string> &Constraints,
518 SmallVectorImpl<std::string> &Clobbers,
519 const MCInstrInfo *MII, MCInstPrinter *IP,
520 MCAsmParserSemaCallback &SI) override;
521
522 bool parseExpression(const MCExpr *&Res);
523 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
524 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
525 AsmTypeInfo *TypeInfo) override;
526 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
527 bool parseAbsoluteExpression(int64_t &Res) override;
528
529 /// Parse a floating point expression using the float \p Semantics
530 /// and set \p Res to the value.
531 bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
532
533 /// Parse an identifier or string (as a quoted identifier)
534 /// and set \p Res to the identifier contents.
535 enum IdentifierPositionKind { StandardPosition, StartOfStatement };
536 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
parseIdentifier(StringRef & Res)537 bool parseIdentifier(StringRef &Res) override {
538 return parseIdentifier(Res, StandardPosition);
539 }
540 void eatToEndOfStatement() override;
541
542 bool checkForValidSection() override;
543
544 /// }
545
546 private:
547 bool expandMacros();
548 const AsmToken peekTok(bool ShouldSkipSpace = true);
549
550 bool parseStatement(ParseStatementInfo &Info,
551 MCAsmParserSemaCallback *SI);
552 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
553 bool parseCppHashLineFilenameComment(SMLoc L);
554
555 bool expandMacro(raw_svector_ostream &OS, StringRef Body,
556 ArrayRef<MCAsmMacroParameter> Parameters,
557 ArrayRef<MCAsmMacroArgument> A,
558 const std::vector<std::string> &Locals, SMLoc L);
559
560 /// Are we inside a macro instantiation?
isInsideMacroInstantiation()561 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
562
563 /// Handle entry to macro instantiation.
564 ///
565 /// \param M The macro.
566 /// \param NameLoc Instantiation location.
567 bool handleMacroEntry(
568 const MCAsmMacro *M, SMLoc NameLoc,
569 AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
570
571 /// Handle invocation of macro function.
572 ///
573 /// \param M The macro.
574 /// \param NameLoc Invocation location.
575 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
576
577 /// Handle exit from macro instantiation.
578 void handleMacroExit();
579
580 /// Extract AsmTokens for a macro argument.
581 bool
582 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
583 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
584
585 /// Parse all macro arguments for a given macro.
586 bool
587 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
588 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
589
590 void printMacroInstantiations();
591
592 bool expandStatement(SMLoc Loc);
593
printMessage(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Msg,SMRange Range=std::nullopt) const594 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
595 SMRange Range = std::nullopt) const {
596 ArrayRef<SMRange> Ranges(Range);
597 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
598 }
599 static void DiagHandler(const SMDiagnostic &Diag, void *Context);
600
601 bool lookUpField(const StructInfo &Structure, StringRef Member,
602 AsmFieldInfo &Info) const;
603
604 /// Enter the specified file. This returns true on failure.
605 bool enterIncludeFile(const std::string &Filename);
606
607 /// Reset the current lexer position to that given by \p Loc. The
608 /// current token is not set; clients should ensure Lex() is called
609 /// subsequently.
610 ///
611 /// \param InBuffer If not 0, should be the known buffer id that contains the
612 /// location.
613 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
614 bool EndStatementAtEOF = true);
615
616 /// Parse up to a token of kind \p EndTok and return the contents from the
617 /// current token up to (but not including) this token; the current token on
618 /// exit will be either this kind or EOF. Reads through instantiated macro
619 /// functions and text macros.
620 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
621 std::string parseStringTo(AsmToken::TokenKind EndTok);
622
623 /// Parse up to the end of statement and return the contents from the current
624 /// token until the end of the statement; the current token on exit will be
625 /// either the EndOfStatement or EOF.
626 StringRef parseStringToEndOfStatement() override;
627
628 bool parseTextItem(std::string &Data);
629
630 unsigned getBinOpPrecedence(AsmToken::TokenKind K,
631 MCBinaryExpr::Opcode &Kind);
632
633 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
634 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
635 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
636
637 // Generic (target and platform independent) directive parsing.
638 enum DirectiveKind {
639 DK_NO_DIRECTIVE, // Placeholder
640 DK_HANDLER_DIRECTIVE,
641 DK_ASSIGN,
642 DK_EQU,
643 DK_TEXTEQU,
644 DK_ASCII,
645 DK_ASCIZ,
646 DK_STRING,
647 DK_BYTE,
648 DK_SBYTE,
649 DK_WORD,
650 DK_SWORD,
651 DK_DWORD,
652 DK_SDWORD,
653 DK_FWORD,
654 DK_QWORD,
655 DK_SQWORD,
656 DK_DB,
657 DK_DD,
658 DK_DF,
659 DK_DQ,
660 DK_DW,
661 DK_REAL4,
662 DK_REAL8,
663 DK_REAL10,
664 DK_ALIGN,
665 DK_EVEN,
666 DK_ORG,
667 DK_ENDR,
668 DK_EXTERN,
669 DK_PUBLIC,
670 DK_COMM,
671 DK_COMMENT,
672 DK_INCLUDE,
673 DK_REPEAT,
674 DK_WHILE,
675 DK_FOR,
676 DK_FORC,
677 DK_IF,
678 DK_IFE,
679 DK_IFB,
680 DK_IFNB,
681 DK_IFDEF,
682 DK_IFNDEF,
683 DK_IFDIF,
684 DK_IFDIFI,
685 DK_IFIDN,
686 DK_IFIDNI,
687 DK_ELSEIF,
688 DK_ELSEIFE,
689 DK_ELSEIFB,
690 DK_ELSEIFNB,
691 DK_ELSEIFDEF,
692 DK_ELSEIFNDEF,
693 DK_ELSEIFDIF,
694 DK_ELSEIFDIFI,
695 DK_ELSEIFIDN,
696 DK_ELSEIFIDNI,
697 DK_ELSE,
698 DK_ENDIF,
699
700 DK_MACRO,
701 DK_EXITM,
702 DK_ENDM,
703 DK_PURGE,
704 DK_ERR,
705 DK_ERRB,
706 DK_ERRNB,
707 DK_ERRDEF,
708 DK_ERRNDEF,
709 DK_ERRDIF,
710 DK_ERRDIFI,
711 DK_ERRIDN,
712 DK_ERRIDNI,
713 DK_ERRE,
714 DK_ERRNZ,
715 DK_ECHO,
716 DK_STRUCT,
717 DK_UNION,
718 DK_ENDS,
719 DK_END,
720 DK_PUSHFRAME,
721 DK_PUSHREG,
722 DK_SAVEREG,
723 DK_SAVEXMM128,
724 DK_SETFRAME,
725 DK_RADIX,
726 };
727
728 /// Maps directive name --> DirectiveKind enum, for directives parsed by this
729 /// class.
730 StringMap<DirectiveKind> DirectiveKindMap;
731
732 bool isMacroLikeDirective();
733
734 // Generic (target and platform independent) directive parsing.
735 enum BuiltinSymbol {
736 BI_NO_SYMBOL, // Placeholder
737 BI_DATE,
738 BI_TIME,
739 BI_VERSION,
740 BI_FILECUR,
741 BI_FILENAME,
742 BI_LINE,
743 BI_CURSEG,
744 BI_CPU,
745 BI_INTERFACE,
746 BI_CODE,
747 BI_DATA,
748 BI_FARDATA,
749 BI_WORDSIZE,
750 BI_CODESIZE,
751 BI_DATASIZE,
752 BI_MODEL,
753 BI_STACK,
754 };
755
756 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
757 /// class.
758 StringMap<BuiltinSymbol> BuiltinSymbolMap;
759
760 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
761
762 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
763 SMLoc StartLoc);
764
765 // Generic (target and platform independent) directive parsing.
766 enum BuiltinFunction {
767 BI_NO_FUNCTION, // Placeholder
768 BI_CATSTR,
769 };
770
771 /// Maps builtin name --> BuiltinFunction enum, for builtins handled by this
772 /// class.
773 StringMap<BuiltinFunction> BuiltinFunctionMap;
774
775 bool evaluateBuiltinMacroFunction(BuiltinFunction Function, StringRef Name,
776 std::string &Res);
777
778 // ".ascii", ".asciz", ".string"
779 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
780
781 // "byte", "word", ...
782 bool emitIntValue(const MCExpr *Value, unsigned Size);
783 bool parseScalarInitializer(unsigned Size,
784 SmallVectorImpl<const MCExpr *> &Values,
785 unsigned StringPadLength = 0);
786 bool parseScalarInstList(
787 unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
788 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
789 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
790 bool addIntegralField(StringRef Name, unsigned Size);
791 bool parseDirectiveValue(StringRef IDVal, unsigned Size);
792 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
793 StringRef Name, SMLoc NameLoc);
794
795 // "real4", "real8", "real10"
796 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
797 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
798 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
799 size_t Size);
800 bool parseRealInstList(
801 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
802 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
803 bool parseDirectiveNamedRealValue(StringRef TypeName,
804 const fltSemantics &Semantics,
805 unsigned Size, StringRef Name,
806 SMLoc NameLoc);
807
808 bool parseOptionalAngleBracketOpen();
809 bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
810
811 bool parseFieldInitializer(const FieldInfo &Field,
812 FieldInitializer &Initializer);
813 bool parseFieldInitializer(const FieldInfo &Field,
814 const IntFieldInfo &Contents,
815 FieldInitializer &Initializer);
816 bool parseFieldInitializer(const FieldInfo &Field,
817 const RealFieldInfo &Contents,
818 FieldInitializer &Initializer);
819 bool parseFieldInitializer(const FieldInfo &Field,
820 const StructFieldInfo &Contents,
821 FieldInitializer &Initializer);
822
823 bool parseStructInitializer(const StructInfo &Structure,
824 StructInitializer &Initializer);
825 bool parseStructInstList(
826 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
827 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
828
829 bool emitFieldValue(const FieldInfo &Field);
830 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
831 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
832 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
833
834 bool emitFieldInitializer(const FieldInfo &Field,
835 const FieldInitializer &Initializer);
836 bool emitFieldInitializer(const FieldInfo &Field,
837 const IntFieldInfo &Contents,
838 const IntFieldInfo &Initializer);
839 bool emitFieldInitializer(const FieldInfo &Field,
840 const RealFieldInfo &Contents,
841 const RealFieldInfo &Initializer);
842 bool emitFieldInitializer(const FieldInfo &Field,
843 const StructFieldInfo &Contents,
844 const StructFieldInfo &Initializer);
845
846 bool emitStructInitializer(const StructInfo &Structure,
847 const StructInitializer &Initializer);
848
849 // User-defined types (structs, unions):
850 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
851 bool addStructField(StringRef Name, const StructInfo &Structure);
852 bool parseDirectiveStructValue(const StructInfo &Structure,
853 StringRef Directive, SMLoc DirLoc);
854 bool parseDirectiveNamedStructValue(const StructInfo &Structure,
855 StringRef Directive, SMLoc DirLoc,
856 StringRef Name);
857
858 // "=", "equ", "textequ"
859 bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
860 DirectiveKind DirKind, SMLoc NameLoc);
861
862 bool parseDirectiveOrg(); // "org"
863
864 bool emitAlignTo(int64_t Alignment);
865 bool parseDirectiveAlign(); // "align"
866 bool parseDirectiveEven(); // "even"
867
868 // macro directives
869 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
870 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
871 std::string &Value);
872 bool parseDirectiveEndMacro(StringRef Directive);
873 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
874
875 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
876 StringRef Name, SMLoc NameLoc);
877 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
878 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
879 bool parseDirectiveNestedEnds();
880
881 bool parseDirectiveExtern();
882
883 /// Parse a directive like ".globl" which accepts a single symbol (which
884 /// should be a label or an external).
885 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
886
887 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
888
889 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
890
891 bool parseDirectiveInclude(); // "include"
892
893 // "if" or "ife"
894 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
895 // "ifb" or "ifnb", depending on ExpectBlank.
896 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
897 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
898 // CaseInsensitive.
899 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
900 bool CaseInsensitive);
901 // "ifdef" or "ifndef", depending on expect_defined
902 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
903 // "elseif" or "elseife"
904 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
905 // "elseifb" or "elseifnb", depending on ExpectBlank.
906 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
907 // ".elseifdef" or ".elseifndef", depending on expect_defined
908 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
909 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
910 // ExpectEqual and CaseInsensitive.
911 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
912 bool CaseInsensitive);
913 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
914 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
915 bool parseEscapedString(std::string &Data) override;
916 bool parseAngleBracketString(std::string &Data) override;
917
918 // Macro-like directives
919 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
920 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
921 raw_svector_ostream &OS);
922 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
923 SMLoc ExitLoc, raw_svector_ostream &OS);
924 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
925 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
926 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
927 bool parseDirectiveWhile(SMLoc DirectiveLoc);
928
929 // "_emit" or "__emit"
930 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
931 size_t Len);
932
933 // "align"
934 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
935
936 // "end"
937 bool parseDirectiveEnd(SMLoc DirectiveLoc);
938
939 // ".err"
940 bool parseDirectiveError(SMLoc DirectiveLoc);
941 // ".errb" or ".errnb", depending on ExpectBlank.
942 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
943 // ".errdef" or ".errndef", depending on ExpectBlank.
944 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
945 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
946 // and CaseInsensitive.
947 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
948 bool CaseInsensitive);
949 // ".erre" or ".errnz", depending on ExpectZero.
950 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
951
952 // ".radix"
953 bool parseDirectiveRadix(SMLoc DirectiveLoc);
954
955 // "echo"
956 bool parseDirectiveEcho(SMLoc DirectiveLoc);
957
958 void initializeDirectiveKindMap();
959 void initializeBuiltinSymbolMaps();
960 };
961
962 } // end anonymous namespace
963
964 namespace llvm {
965
966 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
967
968 extern MCAsmParserExtension *createCOFFMasmParser();
969
970 } // end namespace llvm
971
972 enum { DEFAULT_ADDRSPACE = 0 };
973
MasmParser(SourceMgr & SM,MCContext & Ctx,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)974 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
975 const MCAsmInfo &MAI, struct tm TM, unsigned CB)
976 : MCAsmParser(Ctx, Out, SM, MAI), CurBuffer(CB ? CB : SM.getMainFileID()),
977 TM(TM) {
978 HadError = false;
979 // Save the old handler.
980 SavedDiagHandler = SrcMgr.getDiagHandler();
981 SavedDiagContext = SrcMgr.getDiagContext();
982 // Set our own handler which calls the saved handler.
983 SrcMgr.setDiagHandler(DiagHandler, this);
984 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
985 EndStatementAtEOFStack.push_back(true);
986
987 // Initialize the platform / file format parser.
988 switch (Ctx.getObjectFileType()) {
989 case MCContext::IsCOFF:
990 PlatformParser.reset(createCOFFMasmParser());
991 break;
992 default:
993 report_fatal_error("llvm-ml currently supports only COFF output.");
994 break;
995 }
996
997 initializeDirectiveKindMap();
998 PlatformParser->Initialize(*this);
999 initializeBuiltinSymbolMaps();
1000
1001 NumOfMacroInstantiations = 0;
1002 }
1003
~MasmParser()1004 MasmParser::~MasmParser() {
1005 assert((HadError || ActiveMacros.empty()) &&
1006 "Unexpected active macro instantiation!");
1007
1008 // Restore the saved diagnostics handler and context for use during
1009 // finalization.
1010 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1011 }
1012
printMacroInstantiations()1013 void MasmParser::printMacroInstantiations() {
1014 // Print the active macro instantiation stack.
1015 for (std::vector<MacroInstantiation *>::const_reverse_iterator
1016 it = ActiveMacros.rbegin(),
1017 ie = ActiveMacros.rend();
1018 it != ie; ++it)
1019 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1020 "while in macro instantiation");
1021 }
1022
Note(SMLoc L,const Twine & Msg,SMRange Range)1023 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1024 printPendingErrors();
1025 printMessage(L, SourceMgr::DK_Note, Msg, Range);
1026 printMacroInstantiations();
1027 }
1028
Warning(SMLoc L,const Twine & Msg,SMRange Range)1029 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1030 if (getTargetParser().getTargetOptions().MCNoWarn)
1031 return false;
1032 if (getTargetParser().getTargetOptions().MCFatalWarnings)
1033 return Error(L, Msg, Range);
1034 printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1035 printMacroInstantiations();
1036 return false;
1037 }
1038
printError(SMLoc L,const Twine & Msg,SMRange Range)1039 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1040 HadError = true;
1041 printMessage(L, SourceMgr::DK_Error, Msg, Range);
1042 printMacroInstantiations();
1043 return true;
1044 }
1045
enterIncludeFile(const std::string & Filename)1046 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1047 std::string IncludedFile;
1048 unsigned NewBuf =
1049 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1050 if (!NewBuf)
1051 return true;
1052
1053 CurBuffer = NewBuf;
1054 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1055 EndStatementAtEOFStack.push_back(true);
1056 return false;
1057 }
1058
jumpToLoc(SMLoc Loc,unsigned InBuffer,bool EndStatementAtEOF)1059 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1060 bool EndStatementAtEOF) {
1061 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1062 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1063 Loc.getPointer(), EndStatementAtEOF);
1064 }
1065
expandMacros()1066 bool MasmParser::expandMacros() {
1067 const AsmToken &Tok = getTok();
1068 const std::string IDLower = Tok.getIdentifier().lower();
1069
1070 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1071 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1072 // This is a macro function invocation; expand it in place.
1073 const SMLoc MacroLoc = Tok.getLoc();
1074 const StringRef MacroId = Tok.getIdentifier();
1075 Lexer.Lex();
1076 if (handleMacroInvocation(M, MacroLoc)) {
1077 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1078 Lexer.Lex();
1079 }
1080 return false;
1081 }
1082
1083 std::optional<std::string> ExpandedValue;
1084
1085 if (auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1086 BuiltinIt != BuiltinSymbolMap.end()) {
1087 ExpandedValue =
1088 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1089 } else if (auto BuiltinFuncIt = BuiltinFunctionMap.find(IDLower);
1090 BuiltinFuncIt != BuiltinFunctionMap.end()) {
1091 StringRef Name;
1092 if (parseIdentifier(Name)) {
1093 return true;
1094 }
1095 std::string Res;
1096 if (evaluateBuiltinMacroFunction(BuiltinFuncIt->getValue(), Name, Res)) {
1097 return true;
1098 }
1099 ExpandedValue = Res;
1100 } else if (auto VarIt = Variables.find(IDLower);
1101 VarIt != Variables.end() && VarIt->getValue().IsText) {
1102 ExpandedValue = VarIt->getValue().TextValue;
1103 }
1104
1105 if (!ExpandedValue)
1106 return true;
1107 std::unique_ptr<MemoryBuffer> Instantiation =
1108 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1109
1110 // Jump to the macro instantiation and prime the lexer.
1111 CurBuffer =
1112 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1113 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1114 /*EndStatementAtEOF=*/false);
1115 EndStatementAtEOFStack.push_back(false);
1116 Lexer.Lex();
1117 return false;
1118 }
1119
Lex(ExpandKind ExpandNextToken)1120 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1121 if (Lexer.getTok().is(AsmToken::Error))
1122 Error(Lexer.getErrLoc(), Lexer.getErr());
1123 bool StartOfStatement = false;
1124
1125 // if it's a end of statement with a comment in it
1126 if (getTok().is(AsmToken::EndOfStatement)) {
1127 // if this is a line comment output it.
1128 if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1129 getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1130 Out.addExplicitComment(Twine(getTok().getString()));
1131 StartOfStatement = true;
1132 }
1133
1134 const AsmToken *tok = &Lexer.Lex();
1135
1136 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1137 if (StartOfStatement) {
1138 AsmToken NextTok;
1139 MutableArrayRef<AsmToken> Buf(NextTok);
1140 size_t ReadCount = Lexer.peekTokens(Buf);
1141 if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1142 (NextTok.getString().equals_insensitive("equ") ||
1143 NextTok.getString().equals_insensitive("textequ"))) {
1144 // This looks like an EQU or TEXTEQU directive; don't expand the
1145 // identifier, allowing for redefinitions.
1146 break;
1147 }
1148 }
1149 if (expandMacros())
1150 break;
1151 }
1152
1153 // Parse comments here to be deferred until end of next statement.
1154 while (tok->is(AsmToken::Comment)) {
1155 if (MAI.preserveAsmComments())
1156 Out.addExplicitComment(Twine(tok->getString()));
1157 tok = &Lexer.Lex();
1158 }
1159
1160 // Recognize and bypass line continuations.
1161 while (tok->is(AsmToken::BackSlash) &&
1162 peekTok().is(AsmToken::EndOfStatement)) {
1163 // Eat both the backslash and the end of statement.
1164 Lexer.Lex();
1165 tok = &Lexer.Lex();
1166 }
1167
1168 if (tok->is(AsmToken::Eof)) {
1169 // If this is the end of an included file, pop the parent file off the
1170 // include stack.
1171 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1172 if (ParentIncludeLoc != SMLoc()) {
1173 EndStatementAtEOFStack.pop_back();
1174 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1175 return Lex();
1176 }
1177 EndStatementAtEOFStack.pop_back();
1178 assert(EndStatementAtEOFStack.empty());
1179 }
1180
1181 return *tok;
1182 }
1183
peekTok(bool ShouldSkipSpace)1184 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1185 AsmToken Tok;
1186
1187 MutableArrayRef<AsmToken> Buf(Tok);
1188 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1189
1190 if (ReadCount == 0) {
1191 // If this is the end of an included file, pop the parent file off the
1192 // include stack.
1193 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1194 if (ParentIncludeLoc != SMLoc()) {
1195 EndStatementAtEOFStack.pop_back();
1196 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1197 return peekTok(ShouldSkipSpace);
1198 }
1199 EndStatementAtEOFStack.pop_back();
1200 assert(EndStatementAtEOFStack.empty());
1201 }
1202
1203 assert(ReadCount == 1);
1204 return Tok;
1205 }
1206
Run(bool NoInitialTextSection,bool NoFinalize)1207 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1208 // Create the initial section, if requested.
1209 if (!NoInitialTextSection)
1210 Out.initSections(false, getTargetParser().getSTI());
1211
1212 // Prime the lexer.
1213 Lex();
1214
1215 HadError = false;
1216 AsmCond StartingCondState = TheCondState;
1217 SmallVector<AsmRewrite, 4> AsmStrRewrites;
1218
1219 // While we have input, parse each statement.
1220 while (Lexer.isNot(AsmToken::Eof) ||
1221 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1222 // Skip through the EOF at the end of an inclusion.
1223 if (Lexer.is(AsmToken::Eof))
1224 Lex();
1225
1226 ParseStatementInfo Info(&AsmStrRewrites);
1227 bool HasError = parseStatement(Info, nullptr);
1228
1229 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1230 // for printing ErrMsg via Lex() only if no (presumably better) parser error
1231 // exists.
1232 if (HasError && !hasPendingError() && Lexer.getTok().is(AsmToken::Error))
1233 Lex();
1234
1235 // parseStatement returned true so may need to emit an error.
1236 printPendingErrors();
1237
1238 // Skipping to the next line if needed.
1239 if (HasError && !getLexer().justConsumedEOL())
1240 eatToEndOfStatement();
1241 }
1242
1243 printPendingErrors();
1244
1245 // All errors should have been emitted.
1246 assert(!hasPendingError() && "unexpected error from parseStatement");
1247
1248 if (TheCondState.TheCond != StartingCondState.TheCond ||
1249 TheCondState.Ignore != StartingCondState.Ignore)
1250 printError(getTok().getLoc(), "unmatched .ifs or .elses");
1251
1252 // Check to see that all assembler local symbols were actually defined.
1253 // Targets that don't do subsections via symbols may not want this, though,
1254 // so conservatively exclude them. Only do this if we're finalizing, though,
1255 // as otherwise we won't necessarily have seen everything yet.
1256 if (!NoFinalize) {
1257 // Temporary symbols like the ones for directional jumps don't go in the
1258 // symbol table. They also need to be diagnosed in all (final) cases.
1259 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1260 if (std::get<2>(LocSym)->isUndefined()) {
1261 // Reset the state of any "# line file" directives we've seen to the
1262 // context as it was at the diagnostic site.
1263 CppHashInfo = std::get<1>(LocSym);
1264 printError(std::get<0>(LocSym), "directional label undefined");
1265 }
1266 }
1267 }
1268
1269 // Finalize the output stream if there are no errors and if the client wants
1270 // us to.
1271 if (!HadError && !NoFinalize)
1272 Out.finish(Lexer.getLoc());
1273
1274 return HadError || getContext().hadError();
1275 }
1276
checkForValidSection()1277 bool MasmParser::checkForValidSection() {
1278 if (!ParsingMSInlineAsm && !(getStreamer().getCurrentFragment() &&
1279 getStreamer().getCurrentSectionOnly())) {
1280 Out.initSections(false, getTargetParser().getSTI());
1281 return Error(getTok().getLoc(),
1282 "expected section directive before assembly directive");
1283 }
1284 return false;
1285 }
1286
1287 /// Throw away the rest of the line for testing purposes.
eatToEndOfStatement()1288 void MasmParser::eatToEndOfStatement() {
1289 while (Lexer.isNot(AsmToken::EndOfStatement)) {
1290 if (Lexer.is(AsmToken::Eof)) {
1291 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1292 if (ParentIncludeLoc == SMLoc()) {
1293 break;
1294 }
1295
1296 EndStatementAtEOFStack.pop_back();
1297 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1298 }
1299
1300 Lexer.Lex();
1301 }
1302
1303 // Eat EOL.
1304 if (Lexer.is(AsmToken::EndOfStatement))
1305 Lexer.Lex();
1306 }
1307
1308 SmallVector<StringRef, 1>
parseStringRefsTo(AsmToken::TokenKind EndTok)1309 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1310 SmallVector<StringRef, 1> Refs;
1311 const char *Start = getTok().getLoc().getPointer();
1312 while (Lexer.isNot(EndTok)) {
1313 if (Lexer.is(AsmToken::Eof)) {
1314 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1315 if (ParentIncludeLoc == SMLoc()) {
1316 break;
1317 }
1318 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1319
1320 EndStatementAtEOFStack.pop_back();
1321 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1322 Lexer.Lex();
1323 Start = getTok().getLoc().getPointer();
1324 } else {
1325 Lexer.Lex();
1326 }
1327 }
1328 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1329 return Refs;
1330 }
1331
parseStringTo(AsmToken::TokenKind EndTok)1332 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1333 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1334 std::string Str;
1335 for (StringRef S : Refs) {
1336 Str.append(S.str());
1337 }
1338 return Str;
1339 }
1340
parseStringToEndOfStatement()1341 StringRef MasmParser::parseStringToEndOfStatement() {
1342 const char *Start = getTok().getLoc().getPointer();
1343
1344 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1345 Lexer.Lex();
1346
1347 const char *End = getTok().getLoc().getPointer();
1348 return StringRef(Start, End - Start);
1349 }
1350
1351 /// Parse a paren expression and return it.
1352 /// NOTE: This assumes the leading '(' has already been consumed.
1353 ///
1354 /// parenexpr ::= expr)
1355 ///
parseParenExpr(const MCExpr * & Res,SMLoc & EndLoc)1356 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1357 if (parseExpression(Res))
1358 return true;
1359 EndLoc = Lexer.getTok().getEndLoc();
1360 return parseRParen();
1361 }
1362
1363 /// Parse a bracket expression and return it.
1364 /// NOTE: This assumes the leading '[' has already been consumed.
1365 ///
1366 /// bracketexpr ::= expr]
1367 ///
parseBracketExpr(const MCExpr * & Res,SMLoc & EndLoc)1368 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1369 if (parseExpression(Res))
1370 return true;
1371 EndLoc = getTok().getEndLoc();
1372 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1373 return true;
1374 return false;
1375 }
1376
1377 /// Parse a primary expression and return it.
1378 /// primaryexpr ::= (parenexpr
1379 /// primaryexpr ::= symbol
1380 /// primaryexpr ::= number
1381 /// primaryexpr ::= '.'
1382 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1383 /// primaryexpr ::= string
1384 /// (a string is interpreted as a 64-bit number in big-endian base-256)
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc,AsmTypeInfo * TypeInfo)1385 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1386 AsmTypeInfo *TypeInfo) {
1387 SMLoc FirstTokenLoc = getLexer().getLoc();
1388 AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1389 switch (FirstTokenKind) {
1390 default:
1391 return TokError("unknown token in expression");
1392 // If we have an error assume that we've already handled it.
1393 case AsmToken::Error:
1394 return true;
1395 case AsmToken::Exclaim:
1396 Lex(); // Eat the operator.
1397 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1398 return true;
1399 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1400 return false;
1401 case AsmToken::Dollar:
1402 case AsmToken::At:
1403 case AsmToken::Identifier: {
1404 StringRef Identifier;
1405 if (parseIdentifier(Identifier)) {
1406 // We may have failed but $ may be a valid token.
1407 if (getTok().is(AsmToken::Dollar)) {
1408 if (Lexer.getMAI().getDollarIsPC()) {
1409 Lex();
1410 // This is a '$' reference, which references the current PC. Emit a
1411 // temporary label to the streamer and refer to it.
1412 MCSymbol *Sym = Ctx.createTempSymbol();
1413 Out.emitLabel(Sym);
1414 Res = MCSymbolRefExpr::create(Sym, getContext());
1415 EndLoc = FirstTokenLoc;
1416 return false;
1417 }
1418 return Error(FirstTokenLoc, "invalid token in expression");
1419 }
1420 }
1421 // Parse named bitwise negation.
1422 if (Identifier.equals_insensitive("not")) {
1423 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1424 return true;
1425 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1426 return false;
1427 }
1428 // Parse directional local label references.
1429 if (Identifier.equals_insensitive("@b") ||
1430 Identifier.equals_insensitive("@f")) {
1431 bool Before = Identifier.equals_insensitive("@b");
1432 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1433 if (Before && Sym->isUndefined())
1434 return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1435 Res = MCSymbolRefExpr::create(Sym, getContext());
1436 return false;
1437 }
1438
1439 EndLoc = SMLoc::getFromPointer(Identifier.end());
1440
1441 // This is a symbol reference.
1442 StringRef SymbolName = Identifier;
1443 if (SymbolName.empty())
1444 return Error(getLexer().getLoc(), "expected a symbol reference");
1445
1446 // Find the field offset if used.
1447 AsmFieldInfo Info;
1448 auto Split = SymbolName.split('.');
1449 if (Split.second.empty()) {
1450 } else {
1451 SymbolName = Split.first;
1452 if (lookUpField(SymbolName, Split.second, Info)) {
1453 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1454 StringRef Base = BaseMember.first, Member = BaseMember.second;
1455 lookUpField(Base, Member, Info);
1456 } else if (Structs.count(SymbolName.lower())) {
1457 // This is actually a reference to a field offset.
1458 Res = MCConstantExpr::create(Info.Offset, getContext());
1459 return false;
1460 }
1461 }
1462
1463 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1464 if (!Sym) {
1465 // If this is a built-in numeric value, treat it as a constant.
1466 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1467 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1468 ? BI_NO_SYMBOL
1469 : BuiltinIt->getValue();
1470 if (Symbol != BI_NO_SYMBOL) {
1471 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1472 if (Value) {
1473 Res = Value;
1474 return false;
1475 }
1476 }
1477
1478 // Variables use case-insensitive symbol names; if this is a variable, we
1479 // find the symbol using its canonical name.
1480 auto VarIt = Variables.find(SymbolName.lower());
1481 if (VarIt != Variables.end())
1482 SymbolName = VarIt->second.Name;
1483 Sym = getContext().parseSymbol(SymbolName);
1484 }
1485
1486 // If this is an absolute variable reference, substitute it now to preserve
1487 // semantics in the face of reassignment.
1488 if (Sym->isVariable()) {
1489 auto V = Sym->getVariableValue();
1490 bool DoInline = isa<MCConstantExpr>(V);
1491 if (auto TV = dyn_cast<MCTargetExpr>(V))
1492 DoInline = TV->inlineAssignedExpr();
1493 if (DoInline) {
1494 Res = Sym->getVariableValue();
1495 return false;
1496 }
1497 }
1498
1499 // Otherwise create a symbol ref.
1500 const MCExpr *SymRef =
1501 MCSymbolRefExpr::create(Sym, getContext(), FirstTokenLoc);
1502 if (Info.Offset) {
1503 Res = MCBinaryExpr::create(
1504 MCBinaryExpr::Add, SymRef,
1505 MCConstantExpr::create(Info.Offset, getContext()), getContext());
1506 } else {
1507 Res = SymRef;
1508 }
1509 if (TypeInfo) {
1510 if (Info.Type.Name.empty()) {
1511 auto TypeIt = KnownType.find(Identifier.lower());
1512 if (TypeIt != KnownType.end()) {
1513 Info.Type = TypeIt->second;
1514 }
1515 }
1516
1517 *TypeInfo = Info.Type;
1518 }
1519 return false;
1520 }
1521 case AsmToken::BigNum:
1522 return TokError("literal value out of range for directive");
1523 case AsmToken::Integer: {
1524 int64_t IntVal = getTok().getIntVal();
1525 Res = MCConstantExpr::create(IntVal, getContext());
1526 EndLoc = Lexer.getTok().getEndLoc();
1527 Lex(); // Eat token.
1528 return false;
1529 }
1530 case AsmToken::String: {
1531 // MASM strings (used as constants) are interpreted as big-endian base-256.
1532 SMLoc ValueLoc = getTok().getLoc();
1533 std::string Value;
1534 if (parseEscapedString(Value))
1535 return true;
1536 if (Value.size() > 8)
1537 return Error(ValueLoc, "literal value out of range");
1538 uint64_t IntValue = 0;
1539 for (const unsigned char CharVal : Value)
1540 IntValue = (IntValue << 8) | CharVal;
1541 Res = MCConstantExpr::create(IntValue, getContext());
1542 return false;
1543 }
1544 case AsmToken::Real: {
1545 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1546 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1547 Res = MCConstantExpr::create(IntVal, getContext());
1548 EndLoc = Lexer.getTok().getEndLoc();
1549 Lex(); // Eat token.
1550 return false;
1551 }
1552 case AsmToken::Dot: {
1553 // This is a '.' reference, which references the current PC. Emit a
1554 // temporary label to the streamer and refer to it.
1555 MCSymbol *Sym = Ctx.createTempSymbol();
1556 Out.emitLabel(Sym);
1557 Res = MCSymbolRefExpr::create(Sym, getContext());
1558 EndLoc = Lexer.getTok().getEndLoc();
1559 Lex(); // Eat identifier.
1560 return false;
1561 }
1562 case AsmToken::LParen:
1563 Lex(); // Eat the '('.
1564 return parseParenExpr(Res, EndLoc);
1565 case AsmToken::LBrac:
1566 if (!PlatformParser->HasBracketExpressions())
1567 return TokError("brackets expression not supported on this target");
1568 Lex(); // Eat the '['.
1569 return parseBracketExpr(Res, EndLoc);
1570 case AsmToken::Minus:
1571 Lex(); // Eat the operator.
1572 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1573 return true;
1574 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1575 return false;
1576 case AsmToken::Plus:
1577 Lex(); // Eat the operator.
1578 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1579 return true;
1580 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1581 return false;
1582 case AsmToken::Tilde:
1583 Lex(); // Eat the operator.
1584 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1585 return true;
1586 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1587 return false;
1588 }
1589 }
1590
parseExpression(const MCExpr * & Res)1591 bool MasmParser::parseExpression(const MCExpr *&Res) {
1592 SMLoc EndLoc;
1593 return parseExpression(Res, EndLoc);
1594 }
1595
1596 /// This function checks if the next token is <string> type or arithmetic.
1597 /// string that begin with character '<' must end with character '>'.
1598 /// otherwise it is arithmetics.
1599 /// If the function returns a 'true' value,
1600 /// the End argument will be filled with the last location pointed to the '>'
1601 /// character.
isAngleBracketString(SMLoc & StrLoc,SMLoc & EndLoc)1602 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1603 assert((StrLoc.getPointer() != nullptr) &&
1604 "Argument to the function cannot be a NULL value");
1605 const char *CharPtr = StrLoc.getPointer();
1606 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1607 (*CharPtr != '\0')) {
1608 if (*CharPtr == '!')
1609 CharPtr++;
1610 CharPtr++;
1611 }
1612 if (*CharPtr == '>') {
1613 EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1614 return true;
1615 }
1616 return false;
1617 }
1618
1619 /// creating a string without the escape characters '!'.
angleBracketString(StringRef BracketContents)1620 static std::string angleBracketString(StringRef BracketContents) {
1621 std::string Res;
1622 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1623 if (BracketContents[Pos] == '!')
1624 Pos++;
1625 Res += BracketContents[Pos];
1626 }
1627 return Res;
1628 }
1629
1630 /// Parse an expression and return it.
1631 ///
1632 /// expr ::= expr &&,|| expr -> lowest.
1633 /// expr ::= expr |,^,&,! expr
1634 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1635 /// expr ::= expr <<,>> expr
1636 /// expr ::= expr +,- expr
1637 /// expr ::= expr *,/,% expr -> highest.
1638 /// expr ::= primaryexpr
1639 ///
parseExpression(const MCExpr * & Res,SMLoc & EndLoc)1640 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1641 // Parse the expression.
1642 Res = nullptr;
1643 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1644 parseBinOpRHS(1, Res, EndLoc))
1645 return true;
1646
1647 // Try to constant fold it up front, if possible. Do not exploit
1648 // assembler here.
1649 int64_t Value;
1650 if (Res->evaluateAsAbsolute(Value))
1651 Res = MCConstantExpr::create(Value, getContext());
1652
1653 return false;
1654 }
1655
parseParenExpression(const MCExpr * & Res,SMLoc & EndLoc)1656 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1657 Res = nullptr;
1658 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1659 }
1660
parseAbsoluteExpression(int64_t & Res)1661 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1662 const MCExpr *Expr;
1663
1664 SMLoc StartLoc = Lexer.getLoc();
1665 if (parseExpression(Expr))
1666 return true;
1667
1668 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1669 return Error(StartLoc, "expected absolute expression");
1670
1671 return false;
1672 }
1673
getGNUBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind,bool ShouldUseLogicalShr,bool EndExpressionAtGreater)1674 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1675 MCBinaryExpr::Opcode &Kind,
1676 bool ShouldUseLogicalShr,
1677 bool EndExpressionAtGreater) {
1678 switch (K) {
1679 default:
1680 return 0; // not a binop.
1681
1682 // Lowest Precedence: &&, ||
1683 case AsmToken::AmpAmp:
1684 Kind = MCBinaryExpr::LAnd;
1685 return 2;
1686 case AsmToken::PipePipe:
1687 Kind = MCBinaryExpr::LOr;
1688 return 1;
1689
1690 // Low Precedence: ==, !=, <>, <, <=, >, >=
1691 case AsmToken::EqualEqual:
1692 Kind = MCBinaryExpr::EQ;
1693 return 3;
1694 case AsmToken::ExclaimEqual:
1695 case AsmToken::LessGreater:
1696 Kind = MCBinaryExpr::NE;
1697 return 3;
1698 case AsmToken::Less:
1699 Kind = MCBinaryExpr::LT;
1700 return 3;
1701 case AsmToken::LessEqual:
1702 Kind = MCBinaryExpr::LTE;
1703 return 3;
1704 case AsmToken::Greater:
1705 if (EndExpressionAtGreater)
1706 return 0;
1707 Kind = MCBinaryExpr::GT;
1708 return 3;
1709 case AsmToken::GreaterEqual:
1710 Kind = MCBinaryExpr::GTE;
1711 return 3;
1712
1713 // Low Intermediate Precedence: +, -
1714 case AsmToken::Plus:
1715 Kind = MCBinaryExpr::Add;
1716 return 4;
1717 case AsmToken::Minus:
1718 Kind = MCBinaryExpr::Sub;
1719 return 4;
1720
1721 // High Intermediate Precedence: |, &, ^
1722 case AsmToken::Pipe:
1723 Kind = MCBinaryExpr::Or;
1724 return 5;
1725 case AsmToken::Caret:
1726 Kind = MCBinaryExpr::Xor;
1727 return 5;
1728 case AsmToken::Amp:
1729 Kind = MCBinaryExpr::And;
1730 return 5;
1731
1732 // Highest Precedence: *, /, %, <<, >>
1733 case AsmToken::Star:
1734 Kind = MCBinaryExpr::Mul;
1735 return 6;
1736 case AsmToken::Slash:
1737 Kind = MCBinaryExpr::Div;
1738 return 6;
1739 case AsmToken::Percent:
1740 Kind = MCBinaryExpr::Mod;
1741 return 6;
1742 case AsmToken::LessLess:
1743 Kind = MCBinaryExpr::Shl;
1744 return 6;
1745 case AsmToken::GreaterGreater:
1746 if (EndExpressionAtGreater)
1747 return 0;
1748 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
1749 return 6;
1750 }
1751 }
1752
getBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind)1753 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
1754 MCBinaryExpr::Opcode &Kind) {
1755 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
1756 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
1757 AngleBracketDepth > 0);
1758 }
1759
1760 /// Parse all binary operators with precedence >= 'Precedence'.
1761 /// Res contains the LHS of the expression on input.
parseBinOpRHS(unsigned Precedence,const MCExpr * & Res,SMLoc & EndLoc)1762 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
1763 SMLoc &EndLoc) {
1764 SMLoc StartLoc = Lexer.getLoc();
1765 while (true) {
1766 AsmToken::TokenKind TokKind = Lexer.getKind();
1767 if (Lexer.getKind() == AsmToken::Identifier) {
1768 TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
1769 .CaseLower("and", AsmToken::Amp)
1770 .CaseLower("not", AsmToken::Exclaim)
1771 .CaseLower("or", AsmToken::Pipe)
1772 .CaseLower("xor", AsmToken::Caret)
1773 .CaseLower("shl", AsmToken::LessLess)
1774 .CaseLower("shr", AsmToken::GreaterGreater)
1775 .CaseLower("eq", AsmToken::EqualEqual)
1776 .CaseLower("ne", AsmToken::ExclaimEqual)
1777 .CaseLower("lt", AsmToken::Less)
1778 .CaseLower("le", AsmToken::LessEqual)
1779 .CaseLower("gt", AsmToken::Greater)
1780 .CaseLower("ge", AsmToken::GreaterEqual)
1781 .Default(TokKind);
1782 }
1783 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
1784 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
1785
1786 // If the next token is lower precedence than we are allowed to eat, return
1787 // successfully with what we ate already.
1788 if (TokPrec < Precedence)
1789 return false;
1790
1791 Lex();
1792
1793 // Eat the next primary expression.
1794 const MCExpr *RHS;
1795 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
1796 return true;
1797
1798 // If BinOp binds less tightly with RHS than the operator after RHS, let
1799 // the pending operator take RHS as its LHS.
1800 MCBinaryExpr::Opcode Dummy;
1801 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
1802 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
1803 return true;
1804
1805 // Merge LHS and RHS according to operator.
1806 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
1807 }
1808 }
1809
1810 /// ParseStatement:
1811 /// ::= % statement
1812 /// ::= EndOfStatement
1813 /// ::= Label* Directive ...Operands... EndOfStatement
1814 /// ::= Label* Identifier OperandList* EndOfStatement
parseStatement(ParseStatementInfo & Info,MCAsmParserSemaCallback * SI)1815 bool MasmParser::parseStatement(ParseStatementInfo &Info,
1816 MCAsmParserSemaCallback *SI) {
1817 assert(!hasPendingError() && "parseStatement started with pending error");
1818 // Eat initial spaces and comments.
1819 while (Lexer.is(AsmToken::Space))
1820 Lex();
1821 if (Lexer.is(AsmToken::EndOfStatement)) {
1822 // If this is a line comment we can drop it safely.
1823 if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
1824 getTok().getString().front() == '\n')
1825 Out.addBlankLine();
1826 Lex();
1827 return false;
1828 }
1829
1830 // If preceded by an expansion operator, first expand all text macros and
1831 // macro functions.
1832 if (getTok().is(AsmToken::Percent)) {
1833 SMLoc ExpansionLoc = getTok().getLoc();
1834 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
1835 return true;
1836 }
1837
1838 // Statements always start with an identifier, unless we're dealing with a
1839 // processor directive (.386, .686, etc.) that lexes as a real.
1840 AsmToken ID = getTok();
1841 SMLoc IDLoc = ID.getLoc();
1842 StringRef IDVal;
1843 if (Lexer.is(AsmToken::HashDirective))
1844 return parseCppHashLineFilenameComment(IDLoc);
1845 if (Lexer.is(AsmToken::Dot)) {
1846 // Treat '.' as a valid identifier in this context.
1847 Lex();
1848 IDVal = ".";
1849 } else if (Lexer.is(AsmToken::Real)) {
1850 // Treat ".<number>" as a valid identifier in this context.
1851 IDVal = getTok().getString();
1852 Lex(); // always eat a token
1853 if (!IDVal.starts_with("."))
1854 return Error(IDLoc, "unexpected token at start of statement");
1855 } else if (parseIdentifier(IDVal, StartOfStatement)) {
1856 if (!TheCondState.Ignore) {
1857 Lex(); // always eat a token
1858 return Error(IDLoc, "unexpected token at start of statement");
1859 }
1860 IDVal = "";
1861 }
1862
1863 // Handle conditional assembly here before checking for skipping. We
1864 // have to do this so that .endif isn't skipped in a ".if 0" block for
1865 // example.
1866 StringMap<DirectiveKind>::const_iterator DirKindIt =
1867 DirectiveKindMap.find(IDVal.lower());
1868 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
1869 ? DK_NO_DIRECTIVE
1870 : DirKindIt->getValue();
1871 switch (DirKind) {
1872 default:
1873 break;
1874 case DK_IF:
1875 case DK_IFE:
1876 return parseDirectiveIf(IDLoc, DirKind);
1877 case DK_IFB:
1878 return parseDirectiveIfb(IDLoc, true);
1879 case DK_IFNB:
1880 return parseDirectiveIfb(IDLoc, false);
1881 case DK_IFDEF:
1882 return parseDirectiveIfdef(IDLoc, true);
1883 case DK_IFNDEF:
1884 return parseDirectiveIfdef(IDLoc, false);
1885 case DK_IFDIF:
1886 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
1887 /*CaseInsensitive=*/false);
1888 case DK_IFDIFI:
1889 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
1890 /*CaseInsensitive=*/true);
1891 case DK_IFIDN:
1892 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
1893 /*CaseInsensitive=*/false);
1894 case DK_IFIDNI:
1895 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
1896 /*CaseInsensitive=*/true);
1897 case DK_ELSEIF:
1898 case DK_ELSEIFE:
1899 return parseDirectiveElseIf(IDLoc, DirKind);
1900 case DK_ELSEIFB:
1901 return parseDirectiveElseIfb(IDLoc, true);
1902 case DK_ELSEIFNB:
1903 return parseDirectiveElseIfb(IDLoc, false);
1904 case DK_ELSEIFDEF:
1905 return parseDirectiveElseIfdef(IDLoc, true);
1906 case DK_ELSEIFNDEF:
1907 return parseDirectiveElseIfdef(IDLoc, false);
1908 case DK_ELSEIFDIF:
1909 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
1910 /*CaseInsensitive=*/false);
1911 case DK_ELSEIFDIFI:
1912 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
1913 /*CaseInsensitive=*/true);
1914 case DK_ELSEIFIDN:
1915 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
1916 /*CaseInsensitive=*/false);
1917 case DK_ELSEIFIDNI:
1918 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
1919 /*CaseInsensitive=*/true);
1920 case DK_ELSE:
1921 return parseDirectiveElse(IDLoc);
1922 case DK_ENDIF:
1923 return parseDirectiveEndIf(IDLoc);
1924 }
1925
1926 // Ignore the statement if in the middle of inactive conditional
1927 // (e.g. ".if 0").
1928 if (TheCondState.Ignore) {
1929 eatToEndOfStatement();
1930 return false;
1931 }
1932
1933 // FIXME: Recurse on local labels?
1934
1935 // Check for a label.
1936 // ::= identifier ':'
1937 // ::= number ':'
1938 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
1939 if (checkForValidSection())
1940 return true;
1941
1942 // identifier ':' -> Label.
1943 Lex();
1944
1945 // Diagnose attempt to use '.' as a label.
1946 if (IDVal == ".")
1947 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
1948
1949 // Diagnose attempt to use a variable as a label.
1950 //
1951 // FIXME: Diagnostics. Note the location of the definition as a label.
1952 // FIXME: This doesn't diagnose assignment to a symbol which has been
1953 // implicitly marked as external.
1954 MCSymbol *Sym;
1955 if (ParsingMSInlineAsm && SI) {
1956 StringRef RewrittenLabel =
1957 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
1958 assert(!RewrittenLabel.empty() &&
1959 "We should have an internal name here.");
1960 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
1961 RewrittenLabel);
1962 IDVal = RewrittenLabel;
1963 }
1964 // Handle directional local labels
1965 if (IDVal == "@@") {
1966 Sym = Ctx.createDirectionalLocalSymbol(0);
1967 } else {
1968 Sym = getContext().parseSymbol(IDVal);
1969 }
1970
1971 // End of Labels should be treated as end of line for lexing
1972 // purposes but that information is not available to the Lexer who
1973 // does not understand Labels. This may cause us to see a Hash
1974 // here instead of a preprocessor line comment.
1975 if (getTok().is(AsmToken::Hash)) {
1976 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
1977 Lexer.Lex();
1978 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
1979 }
1980
1981 // Consume any end of statement token, if present, to avoid spurious
1982 // addBlankLine calls().
1983 if (getTok().is(AsmToken::EndOfStatement)) {
1984 Lex();
1985 }
1986
1987 // Emit the label.
1988 if (!getTargetParser().isParsingMSInlineAsm())
1989 Out.emitLabel(Sym, IDLoc);
1990 return false;
1991 }
1992
1993 // If macros are enabled, check to see if this is a macro instantiation.
1994 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
1995 AsmToken::TokenKind ArgumentEndTok = parseOptionalToken(AsmToken::LParen)
1996 ? AsmToken::RParen
1997 : AsmToken::EndOfStatement;
1998 return handleMacroEntry(M, IDLoc, ArgumentEndTok);
1999 }
2000
2001 // Otherwise, we have a normal instruction or directive.
2002
2003 if (DirKind != DK_NO_DIRECTIVE) {
2004 // There are several entities interested in parsing directives:
2005 //
2006 // 1. Asm parser extensions. For example, platform-specific parsers
2007 // (like the ELF parser) register themselves as extensions.
2008 // 2. The target-specific assembly parser. Some directives are target
2009 // specific or may potentially behave differently on certain targets.
2010 // 3. The generic directive parser implemented by this class. These are
2011 // all the directives that behave in a target and platform independent
2012 // manner, or at least have a default behavior that's shared between
2013 // all targets and platforms.
2014
2015 // Special-case handling of structure-end directives at higher priority,
2016 // since ENDS is overloaded as a segment-end directive.
2017 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2018 getTok().is(AsmToken::EndOfStatement)) {
2019 return parseDirectiveNestedEnds();
2020 }
2021
2022 // First, check the extension directive map to see if any extension has
2023 // registered itself to parse this directive.
2024 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2025 ExtensionDirectiveMap.lookup(IDVal.lower());
2026 if (Handler.first)
2027 return (*Handler.second)(Handler.first, IDVal, IDLoc);
2028
2029 // Next, let the target-specific assembly parser try.
2030 if (ID.isNot(AsmToken::Identifier))
2031 return false;
2032
2033 ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2034 assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2035 "Should only return Failure iff there was an error");
2036 if (TPDirectiveReturn.isFailure())
2037 return true;
2038 if (TPDirectiveReturn.isSuccess())
2039 return false;
2040
2041 // Finally, if no one else is interested in this directive, it must be
2042 // generic and familiar to this class.
2043 switch (DirKind) {
2044 default:
2045 break;
2046 case DK_ASCII:
2047 return parseDirectiveAscii(IDVal, false);
2048 case DK_ASCIZ:
2049 case DK_STRING:
2050 return parseDirectiveAscii(IDVal, true);
2051 case DK_BYTE:
2052 case DK_SBYTE:
2053 case DK_DB:
2054 return parseDirectiveValue(IDVal, 1);
2055 case DK_WORD:
2056 case DK_SWORD:
2057 case DK_DW:
2058 return parseDirectiveValue(IDVal, 2);
2059 case DK_DWORD:
2060 case DK_SDWORD:
2061 case DK_DD:
2062 return parseDirectiveValue(IDVal, 4);
2063 case DK_FWORD:
2064 case DK_DF:
2065 return parseDirectiveValue(IDVal, 6);
2066 case DK_QWORD:
2067 case DK_SQWORD:
2068 case DK_DQ:
2069 return parseDirectiveValue(IDVal, 8);
2070 case DK_REAL4:
2071 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2072 case DK_REAL8:
2073 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2074 case DK_REAL10:
2075 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2076 case DK_STRUCT:
2077 case DK_UNION:
2078 return parseDirectiveNestedStruct(IDVal, DirKind);
2079 case DK_ENDS:
2080 return parseDirectiveNestedEnds();
2081 case DK_ALIGN:
2082 return parseDirectiveAlign();
2083 case DK_EVEN:
2084 return parseDirectiveEven();
2085 case DK_ORG:
2086 return parseDirectiveOrg();
2087 case DK_EXTERN:
2088 return parseDirectiveExtern();
2089 case DK_PUBLIC:
2090 return parseDirectiveSymbolAttribute(MCSA_Global);
2091 case DK_COMM:
2092 return parseDirectiveComm(/*IsLocal=*/false);
2093 case DK_COMMENT:
2094 return parseDirectiveComment(IDLoc);
2095 case DK_INCLUDE:
2096 return parseDirectiveInclude();
2097 case DK_REPEAT:
2098 return parseDirectiveRepeat(IDLoc, IDVal);
2099 case DK_WHILE:
2100 return parseDirectiveWhile(IDLoc);
2101 case DK_FOR:
2102 return parseDirectiveFor(IDLoc, IDVal);
2103 case DK_FORC:
2104 return parseDirectiveForc(IDLoc, IDVal);
2105 case DK_EXITM:
2106 Info.ExitValue = "";
2107 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2108 case DK_ENDM:
2109 Info.ExitValue = "";
2110 return parseDirectiveEndMacro(IDVal);
2111 case DK_PURGE:
2112 return parseDirectivePurgeMacro(IDLoc);
2113 case DK_END:
2114 return parseDirectiveEnd(IDLoc);
2115 case DK_ERR:
2116 return parseDirectiveError(IDLoc);
2117 case DK_ERRB:
2118 return parseDirectiveErrorIfb(IDLoc, true);
2119 case DK_ERRNB:
2120 return parseDirectiveErrorIfb(IDLoc, false);
2121 case DK_ERRDEF:
2122 return parseDirectiveErrorIfdef(IDLoc, true);
2123 case DK_ERRNDEF:
2124 return parseDirectiveErrorIfdef(IDLoc, false);
2125 case DK_ERRDIF:
2126 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2127 /*CaseInsensitive=*/false);
2128 case DK_ERRDIFI:
2129 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2130 /*CaseInsensitive=*/true);
2131 case DK_ERRIDN:
2132 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2133 /*CaseInsensitive=*/false);
2134 case DK_ERRIDNI:
2135 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2136 /*CaseInsensitive=*/true);
2137 case DK_ERRE:
2138 return parseDirectiveErrorIfe(IDLoc, true);
2139 case DK_ERRNZ:
2140 return parseDirectiveErrorIfe(IDLoc, false);
2141 case DK_RADIX:
2142 return parseDirectiveRadix(IDLoc);
2143 case DK_ECHO:
2144 return parseDirectiveEcho(IDLoc);
2145 }
2146
2147 return Error(IDLoc, "unknown directive");
2148 }
2149
2150 // We also check if this is allocating memory with user-defined type.
2151 auto IDIt = Structs.find(IDVal.lower());
2152 if (IDIt != Structs.end())
2153 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2154 IDLoc);
2155
2156 // Non-conditional Microsoft directives sometimes follow their first argument.
2157 const AsmToken nextTok = getTok();
2158 const StringRef nextVal = nextTok.getString();
2159 const SMLoc nextLoc = nextTok.getLoc();
2160
2161 const AsmToken afterNextTok = peekTok();
2162
2163 // There are several entities interested in parsing infix directives:
2164 //
2165 // 1. Asm parser extensions. For example, platform-specific parsers
2166 // (like the ELF parser) register themselves as extensions.
2167 // 2. The generic directive parser implemented by this class. These are
2168 // all the directives that behave in a target and platform independent
2169 // manner, or at least have a default behavior that's shared between
2170 // all targets and platforms.
2171
2172 getTargetParser().flushPendingInstructions(getStreamer());
2173
2174 // Special-case handling of structure-end directives at higher priority, since
2175 // ENDS is overloaded as a segment-end directive.
2176 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2177 Lex();
2178 return parseDirectiveEnds(IDVal, IDLoc);
2179 }
2180
2181 // First, check the extension directive map to see if any extension has
2182 // registered itself to parse this directive.
2183 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2184 ExtensionDirectiveMap.lookup(nextVal.lower());
2185 if (Handler.first) {
2186 Lex();
2187 Lexer.UnLex(ID);
2188 return (*Handler.second)(Handler.first, nextVal, nextLoc);
2189 }
2190
2191 // If no one else is interested in this directive, it must be
2192 // generic and familiar to this class.
2193 DirKindIt = DirectiveKindMap.find(nextVal.lower());
2194 DirKind = (DirKindIt == DirectiveKindMap.end())
2195 ? DK_NO_DIRECTIVE
2196 : DirKindIt->getValue();
2197 switch (DirKind) {
2198 default:
2199 break;
2200 case DK_ASSIGN:
2201 case DK_EQU:
2202 case DK_TEXTEQU:
2203 Lex();
2204 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2205 case DK_BYTE:
2206 if (afterNextTok.is(AsmToken::Identifier) &&
2207 afterNextTok.getString().equals_insensitive("ptr")) {
2208 // Size directive; part of an instruction.
2209 break;
2210 }
2211 [[fallthrough]];
2212 case DK_SBYTE:
2213 case DK_DB:
2214 Lex();
2215 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2216 case DK_WORD:
2217 if (afterNextTok.is(AsmToken::Identifier) &&
2218 afterNextTok.getString().equals_insensitive("ptr")) {
2219 // Size directive; part of an instruction.
2220 break;
2221 }
2222 [[fallthrough]];
2223 case DK_SWORD:
2224 case DK_DW:
2225 Lex();
2226 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2227 case DK_DWORD:
2228 if (afterNextTok.is(AsmToken::Identifier) &&
2229 afterNextTok.getString().equals_insensitive("ptr")) {
2230 // Size directive; part of an instruction.
2231 break;
2232 }
2233 [[fallthrough]];
2234 case DK_SDWORD:
2235 case DK_DD:
2236 Lex();
2237 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2238 case DK_FWORD:
2239 if (afterNextTok.is(AsmToken::Identifier) &&
2240 afterNextTok.getString().equals_insensitive("ptr")) {
2241 // Size directive; part of an instruction.
2242 break;
2243 }
2244 [[fallthrough]];
2245 case DK_DF:
2246 Lex();
2247 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2248 case DK_QWORD:
2249 if (afterNextTok.is(AsmToken::Identifier) &&
2250 afterNextTok.getString().equals_insensitive("ptr")) {
2251 // Size directive; part of an instruction.
2252 break;
2253 }
2254 [[fallthrough]];
2255 case DK_SQWORD:
2256 case DK_DQ:
2257 Lex();
2258 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2259 case DK_REAL4:
2260 Lex();
2261 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2262 IDVal, IDLoc);
2263 case DK_REAL8:
2264 Lex();
2265 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2266 IDVal, IDLoc);
2267 case DK_REAL10:
2268 Lex();
2269 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2270 10, IDVal, IDLoc);
2271 case DK_STRUCT:
2272 case DK_UNION:
2273 Lex();
2274 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2275 case DK_ENDS:
2276 Lex();
2277 return parseDirectiveEnds(IDVal, IDLoc);
2278 case DK_MACRO:
2279 Lex();
2280 return parseDirectiveMacro(IDVal, IDLoc);
2281 }
2282
2283 // Finally, we check if this is allocating a variable with user-defined type.
2284 auto NextIt = Structs.find(nextVal.lower());
2285 if (NextIt != Structs.end()) {
2286 Lex();
2287 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2288 nextVal, nextLoc, IDVal);
2289 }
2290
2291 // __asm _emit or __asm __emit
2292 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2293 IDVal == "_EMIT" || IDVal == "__EMIT"))
2294 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2295
2296 // __asm align
2297 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2298 return parseDirectiveMSAlign(IDLoc, Info);
2299
2300 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2301 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2302 if (checkForValidSection())
2303 return true;
2304
2305 // Canonicalize the opcode to lower case.
2306 std::string OpcodeStr = IDVal.lower();
2307 ParseInstructionInfo IInfo(Info.AsmRewrites);
2308 bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID,
2309 Info.ParsedOperands);
2310 Info.ParseError = ParseHadError;
2311
2312 // Dump the parsed representation, if requested.
2313 if (getShowParsedOperands()) {
2314 SmallString<256> Str;
2315 raw_svector_ostream OS(Str);
2316 OS << "parsed instruction: [";
2317 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2318 if (i != 0)
2319 OS << ", ";
2320 Info.ParsedOperands[i]->print(OS, MAI);
2321 }
2322 OS << "]";
2323
2324 printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2325 }
2326
2327 // Fail even if ParseInstruction erroneously returns false.
2328 if (hasPendingError() || ParseHadError)
2329 return true;
2330
2331 // If parsing succeeded, match the instruction.
2332 if (!ParseHadError) {
2333 uint64_t ErrorInfo;
2334 if (getTargetParser().matchAndEmitInstruction(
2335 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2336 getTargetParser().isParsingMSInlineAsm()))
2337 return true;
2338 }
2339 return false;
2340 }
2341
2342 // Parse and erase curly braces marking block start/end.
parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> & AsmStrRewrites)2343 bool MasmParser::parseCurlyBlockScope(
2344 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2345 // Identify curly brace marking block start/end.
2346 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2347 return false;
2348
2349 SMLoc StartLoc = Lexer.getLoc();
2350 Lex(); // Eat the brace.
2351 if (Lexer.is(AsmToken::EndOfStatement))
2352 Lex(); // Eat EndOfStatement following the brace.
2353
2354 // Erase the block start/end brace from the output asm string.
2355 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2356 StartLoc.getPointer());
2357 return true;
2358 }
2359
2360 /// parseCppHashLineFilenameComment as this:
2361 /// ::= # number "filename"
parseCppHashLineFilenameComment(SMLoc L)2362 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2363 Lex(); // Eat the hash token.
2364 // Lexer only ever emits HashDirective if it fully formed if it's
2365 // done the checking already so this is an internal error.
2366 assert(getTok().is(AsmToken::Integer) &&
2367 "Lexing Cpp line comment: Expected Integer");
2368 int64_t LineNumber = getTok().getIntVal();
2369 Lex();
2370 assert(getTok().is(AsmToken::String) &&
2371 "Lexing Cpp line comment: Expected String");
2372 StringRef Filename = getTok().getString();
2373 Lex();
2374
2375 // Get rid of the enclosing quotes.
2376 Filename = Filename.substr(1, Filename.size() - 2);
2377
2378 // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2379 // and possibly DWARF file info.
2380 CppHashInfo.Loc = L;
2381 CppHashInfo.Filename = Filename;
2382 CppHashInfo.LineNumber = LineNumber;
2383 CppHashInfo.Buf = CurBuffer;
2384 if (FirstCppHashFilename.empty())
2385 FirstCppHashFilename = Filename;
2386 return false;
2387 }
2388
2389 /// will use the last parsed cpp hash line filename comment
2390 /// for the Filename and LineNo if any in the diagnostic.
DiagHandler(const SMDiagnostic & Diag,void * Context)2391 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2392 const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2393 raw_ostream &OS = errs();
2394
2395 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2396 SMLoc DiagLoc = Diag.getLoc();
2397 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2398 unsigned CppHashBuf =
2399 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2400
2401 // Like SourceMgr::printMessage() we need to print the include stack if any
2402 // before printing the message.
2403 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2404 if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2405 DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2406 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2407 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2408 }
2409
2410 // If we have not parsed a cpp hash line filename comment or the source
2411 // manager changed or buffer changed (like in a nested include) then just
2412 // print the normal diagnostic using its Filename and LineNo.
2413 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2414 DiagBuf != CppHashBuf) {
2415 if (Parser->SavedDiagHandler)
2416 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2417 else
2418 Diag.print(nullptr, OS);
2419 return;
2420 }
2421
2422 // Use the CppHashFilename and calculate a line number based on the
2423 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2424 // for the diagnostic.
2425 const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2426
2427 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2428 int CppHashLocLineNo =
2429 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2430 int LineNo =
2431 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2432
2433 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2434 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2435 Diag.getLineContents(), Diag.getRanges());
2436
2437 if (Parser->SavedDiagHandler)
2438 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2439 else
2440 NewDiag.print(nullptr, OS);
2441 }
2442
2443 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2444 // not accept '.'.
isMacroParameterChar(char C)2445 static bool isMacroParameterChar(char C) {
2446 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2447 }
2448
expandMacro(raw_svector_ostream & OS,StringRef Body,ArrayRef<MCAsmMacroParameter> Parameters,ArrayRef<MCAsmMacroArgument> A,const std::vector<std::string> & Locals,SMLoc L)2449 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2450 ArrayRef<MCAsmMacroParameter> Parameters,
2451 ArrayRef<MCAsmMacroArgument> A,
2452 const std::vector<std::string> &Locals, SMLoc L) {
2453 unsigned NParameters = Parameters.size();
2454 if (NParameters != A.size())
2455 return Error(L, "Wrong number of arguments");
2456 StringMap<std::string> LocalSymbols;
2457 std::string Name;
2458 Name.reserve(6);
2459 for (StringRef Local : Locals) {
2460 raw_string_ostream LocalName(Name);
2461 LocalName << "??"
2462 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2463 LocalSymbols.insert({Local, Name});
2464 Name.clear();
2465 }
2466
2467 std::optional<char> CurrentQuote;
2468 while (!Body.empty()) {
2469 // Scan for the next substitution.
2470 std::size_t End = Body.size(), Pos = 0;
2471 std::size_t IdentifierPos = End;
2472 for (; Pos != End; ++Pos) {
2473 // Find the next possible macro parameter, including preceding a '&'
2474 // inside quotes.
2475 if (Body[Pos] == '&')
2476 break;
2477 if (isMacroParameterChar(Body[Pos])) {
2478 if (!CurrentQuote)
2479 break;
2480 if (IdentifierPos == End)
2481 IdentifierPos = Pos;
2482 } else {
2483 IdentifierPos = End;
2484 }
2485
2486 // Track quotation status
2487 if (!CurrentQuote) {
2488 if (Body[Pos] == '\'' || Body[Pos] == '"')
2489 CurrentQuote = Body[Pos];
2490 } else if (Body[Pos] == CurrentQuote) {
2491 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2492 // Escaped quote, and quotes aren't identifier chars; skip
2493 ++Pos;
2494 continue;
2495 } else {
2496 CurrentQuote.reset();
2497 }
2498 }
2499 }
2500 if (IdentifierPos != End) {
2501 // We've recognized an identifier before an apostrophe inside quotes;
2502 // check once to see if we can expand it.
2503 Pos = IdentifierPos;
2504 IdentifierPos = End;
2505 }
2506
2507 // Add the prefix.
2508 OS << Body.slice(0, Pos);
2509
2510 // Check if we reached the end.
2511 if (Pos == End)
2512 break;
2513
2514 unsigned I = Pos;
2515 bool InitialAmpersand = (Body[I] == '&');
2516 if (InitialAmpersand) {
2517 ++I;
2518 ++Pos;
2519 }
2520 while (I < End && isMacroParameterChar(Body[I]))
2521 ++I;
2522
2523 const char *Begin = Body.data() + Pos;
2524 StringRef Argument(Begin, I - Pos);
2525 const std::string ArgumentLower = Argument.lower();
2526 unsigned Index = 0;
2527
2528 for (; Index < NParameters; ++Index)
2529 if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2530 break;
2531
2532 if (Index == NParameters) {
2533 if (InitialAmpersand)
2534 OS << '&';
2535 auto it = LocalSymbols.find(ArgumentLower);
2536 if (it != LocalSymbols.end())
2537 OS << it->second;
2538 else
2539 OS << Argument;
2540 Pos = I;
2541 } else {
2542 for (const AsmToken &Token : A[Index]) {
2543 // In MASM, you can write '%expr'.
2544 // The prefix '%' evaluates the expression 'expr'
2545 // and uses the result as a string (e.g. replace %(1+2) with the
2546 // string "3").
2547 // Here, we identify the integer token which is the result of the
2548 // absolute expression evaluation and replace it with its string
2549 // representation.
2550 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2551 // Emit an integer value to the buffer.
2552 OS << Token.getIntVal();
2553 else
2554 OS << Token.getString();
2555 }
2556
2557 Pos += Argument.size();
2558 if (Pos < End && Body[Pos] == '&') {
2559 ++Pos;
2560 }
2561 }
2562 // Update the scan point.
2563 Body = Body.substr(Pos);
2564 }
2565
2566 return false;
2567 }
2568
parseMacroArgument(const MCAsmMacroParameter * MP,MCAsmMacroArgument & MA,AsmToken::TokenKind EndTok)2569 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
2570 MCAsmMacroArgument &MA,
2571 AsmToken::TokenKind EndTok) {
2572 if (MP && MP->Vararg) {
2573 if (Lexer.isNot(EndTok)) {
2574 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
2575 for (StringRef S : Str) {
2576 MA.emplace_back(AsmToken::String, S);
2577 }
2578 }
2579 return false;
2580 }
2581
2582 SMLoc StrLoc = Lexer.getLoc(), EndLoc;
2583 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
2584 const char *StrChar = StrLoc.getPointer() + 1;
2585 const char *EndChar = EndLoc.getPointer() - 1;
2586 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
2587 /// Eat from '<' to '>'.
2588 Lex();
2589 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
2590 return false;
2591 }
2592
2593 unsigned ParenLevel = 0;
2594
2595 while (true) {
2596 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
2597 return TokError("unexpected token");
2598
2599 if (ParenLevel == 0 && Lexer.is(AsmToken::Comma))
2600 break;
2601
2602 // handleMacroEntry relies on not advancing the lexer here
2603 // to be able to fill in the remaining default parameter values
2604 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
2605 break;
2606
2607 // Adjust the current parentheses level.
2608 if (Lexer.is(AsmToken::LParen))
2609 ++ParenLevel;
2610 else if (Lexer.is(AsmToken::RParen) && ParenLevel)
2611 --ParenLevel;
2612
2613 // Append the token to the current argument list.
2614 MA.push_back(getTok());
2615 Lex();
2616 }
2617
2618 if (ParenLevel != 0)
2619 return TokError("unbalanced parentheses in argument");
2620
2621 if (MA.empty() && MP) {
2622 if (MP->Required) {
2623 return TokError("missing value for required parameter '" + MP->Name +
2624 "'");
2625 } else {
2626 MA = MP->Value;
2627 }
2628 }
2629 return false;
2630 }
2631
2632 // Parse the macro instantiation arguments.
parseMacroArguments(const MCAsmMacro * M,MCAsmMacroArguments & A,AsmToken::TokenKind EndTok)2633 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
2634 MCAsmMacroArguments &A,
2635 AsmToken::TokenKind EndTok) {
2636 const unsigned NParameters = M ? M->Parameters.size() : 0;
2637 bool NamedParametersFound = false;
2638 SmallVector<SMLoc, 4> FALocs;
2639
2640 A.resize(NParameters);
2641 FALocs.resize(NParameters);
2642
2643 // Parse two kinds of macro invocations:
2644 // - macros defined without any parameters accept an arbitrary number of them
2645 // - macros defined with parameters accept at most that many of them
2646 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
2647 ++Parameter) {
2648 SMLoc IDLoc = Lexer.getLoc();
2649 MCAsmMacroParameter FA;
2650
2651 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
2652 if (parseIdentifier(FA.Name))
2653 return Error(IDLoc, "invalid argument identifier for formal argument");
2654
2655 if (Lexer.isNot(AsmToken::Equal))
2656 return TokError("expected '=' after formal parameter identifier");
2657
2658 Lex();
2659
2660 NamedParametersFound = true;
2661 }
2662
2663 if (NamedParametersFound && FA.Name.empty())
2664 return Error(IDLoc, "cannot mix positional and keyword arguments");
2665
2666 unsigned PI = Parameter;
2667 if (!FA.Name.empty()) {
2668 assert(M && "expected macro to be defined");
2669 unsigned FAI = 0;
2670 for (FAI = 0; FAI < NParameters; ++FAI)
2671 if (M->Parameters[FAI].Name == FA.Name)
2672 break;
2673
2674 if (FAI >= NParameters) {
2675 return Error(IDLoc, "parameter named '" + FA.Name +
2676 "' does not exist for macro '" + M->Name + "'");
2677 }
2678 PI = FAI;
2679 }
2680 const MCAsmMacroParameter *MP = nullptr;
2681 if (M && PI < NParameters)
2682 MP = &M->Parameters[PI];
2683
2684 SMLoc StrLoc = Lexer.getLoc();
2685 SMLoc EndLoc;
2686 if (Lexer.is(AsmToken::Percent)) {
2687 const MCExpr *AbsoluteExp;
2688 int64_t Value;
2689 /// Eat '%'.
2690 Lex();
2691 if (parseExpression(AbsoluteExp, EndLoc))
2692 return false;
2693 if (!AbsoluteExp->evaluateAsAbsolute(Value,
2694 getStreamer().getAssemblerPtr()))
2695 return Error(StrLoc, "expected absolute expression");
2696 const char *StrChar = StrLoc.getPointer();
2697 const char *EndChar = EndLoc.getPointer();
2698 AsmToken newToken(AsmToken::Integer,
2699 StringRef(StrChar, EndChar - StrChar), Value);
2700 FA.Value.push_back(newToken);
2701 } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
2702 if (M)
2703 return addErrorSuffix(" in '" + M->Name + "' macro");
2704 else
2705 return true;
2706 }
2707
2708 if (!FA.Value.empty()) {
2709 if (A.size() <= PI)
2710 A.resize(PI + 1);
2711 A[PI] = FA.Value;
2712
2713 if (FALocs.size() <= PI)
2714 FALocs.resize(PI + 1);
2715
2716 FALocs[PI] = Lexer.getLoc();
2717 }
2718
2719 // At the end of the statement, fill in remaining arguments that have
2720 // default values. If there aren't any, then the next argument is
2721 // required but missing
2722 if (Lexer.is(EndTok)) {
2723 bool Failure = false;
2724 for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
2725 if (A[FAI].empty()) {
2726 if (M->Parameters[FAI].Required) {
2727 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
2728 "missing value for required parameter "
2729 "'" +
2730 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
2731 Failure = true;
2732 }
2733
2734 if (!M->Parameters[FAI].Value.empty())
2735 A[FAI] = M->Parameters[FAI].Value;
2736 }
2737 }
2738 return Failure;
2739 }
2740
2741 if (Lexer.is(AsmToken::Comma))
2742 Lex();
2743 }
2744
2745 return TokError("too many positional arguments");
2746 }
2747
handleMacroEntry(const MCAsmMacro * M,SMLoc NameLoc,AsmToken::TokenKind ArgumentEndTok)2748 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
2749 AsmToken::TokenKind ArgumentEndTok) {
2750 // Arbitrarily limit macro nesting depth (default matches 'as'). We can
2751 // eliminate this, although we should protect against infinite loops.
2752 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
2753 if (ActiveMacros.size() == MaxNestingDepth) {
2754 std::ostringstream MaxNestingDepthError;
2755 MaxNestingDepthError << "macros cannot be nested more than "
2756 << MaxNestingDepth << " levels deep."
2757 << " Use -asm-macro-max-nesting-depth to increase "
2758 "this limit.";
2759 return TokError(MaxNestingDepthError.str());
2760 }
2761
2762 MCAsmMacroArguments A;
2763 if (parseMacroArguments(M, A, ArgumentEndTok) || parseToken(ArgumentEndTok))
2764 return true;
2765
2766 // Macro instantiation is lexical, unfortunately. We construct a new buffer
2767 // to hold the macro body with substitutions.
2768 SmallString<256> Buf;
2769 StringRef Body = M->Body;
2770 raw_svector_ostream OS(Buf);
2771
2772 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
2773 return true;
2774
2775 // We include the endm in the buffer as our cue to exit the macro
2776 // instantiation.
2777 OS << "endm\n";
2778
2779 std::unique_ptr<MemoryBuffer> Instantiation =
2780 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
2781
2782 // Create the macro instantiation object and add to the current macro
2783 // instantiation stack.
2784 MacroInstantiation *MI = new MacroInstantiation{
2785 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
2786 ActiveMacros.push_back(MI);
2787
2788 ++NumOfMacroInstantiations;
2789
2790 // Jump to the macro instantiation and prime the lexer.
2791 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
2792 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
2793 EndStatementAtEOFStack.push_back(true);
2794 Lex();
2795
2796 return false;
2797 }
2798
handleMacroExit()2799 void MasmParser::handleMacroExit() {
2800 // Jump to the token we should return to, and consume it.
2801 EndStatementAtEOFStack.pop_back();
2802 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
2803 EndStatementAtEOFStack.back());
2804 Lex();
2805
2806 // Pop the instantiation entry.
2807 delete ActiveMacros.back();
2808 ActiveMacros.pop_back();
2809 }
2810
handleMacroInvocation(const MCAsmMacro * M,SMLoc NameLoc)2811 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
2812 if (!M->IsFunction)
2813 return Error(NameLoc, "cannot invoke macro procedure as function");
2814
2815 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
2816 "' requires arguments in parentheses") ||
2817 handleMacroEntry(M, NameLoc, AsmToken::RParen))
2818 return true;
2819
2820 // Parse all statements in the macro, retrieving the exit value when it ends.
2821 std::string ExitValue;
2822 SmallVector<AsmRewrite, 4> AsmStrRewrites;
2823 while (Lexer.isNot(AsmToken::Eof)) {
2824 ParseStatementInfo Info(&AsmStrRewrites);
2825 bool HasError = parseStatement(Info, nullptr);
2826
2827 if (!HasError && Info.ExitValue) {
2828 ExitValue = std::move(*Info.ExitValue);
2829 break;
2830 }
2831
2832 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
2833 // for printing ErrMsg via Lex() only if no (presumably better) parser error
2834 // exists.
2835 if (HasError && !hasPendingError() && Lexer.getTok().is(AsmToken::Error))
2836 Lex();
2837
2838 // parseStatement returned true so may need to emit an error.
2839 printPendingErrors();
2840
2841 // Skipping to the next line if needed.
2842 if (HasError && !getLexer().justConsumedEOL())
2843 eatToEndOfStatement();
2844 }
2845
2846 // Exit values may require lexing, unfortunately. We construct a new buffer to
2847 // hold the exit value.
2848 std::unique_ptr<MemoryBuffer> MacroValue =
2849 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
2850
2851 // Jump from this location to the instantiated exit value, and prime the
2852 // lexer.
2853 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
2854 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
2855 /*EndStatementAtEOF=*/false);
2856 EndStatementAtEOFStack.push_back(false);
2857 Lex();
2858
2859 return false;
2860 }
2861
2862 /// parseIdentifier:
2863 /// ::= identifier
2864 /// ::= string
parseIdentifier(StringRef & Res,IdentifierPositionKind Position)2865 bool MasmParser::parseIdentifier(StringRef &Res,
2866 IdentifierPositionKind Position) {
2867 // The assembler has relaxed rules for accepting identifiers, in particular we
2868 // allow things like '.globl $foo' and '.def @feat.00', which would normally
2869 // be separate tokens. At this level, we have already lexed so we cannot
2870 // (currently) handle this as a context dependent token, instead we detect
2871 // adjacent tokens and return the combined identifier.
2872 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
2873 SMLoc PrefixLoc = getLexer().getLoc();
2874
2875 // Consume the prefix character, and check for a following identifier.
2876
2877 AsmToken nextTok = peekTok(false);
2878
2879 if (nextTok.isNot(AsmToken::Identifier))
2880 return true;
2881
2882 // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
2883 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
2884 return true;
2885
2886 // eat $ or @
2887 Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
2888 // Construct the joined identifier and consume the token.
2889 Res =
2890 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
2891 Lex(); // Parser Lex to maintain invariants.
2892 return false;
2893 }
2894
2895 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
2896 return true;
2897
2898 Res = getTok().getIdentifier();
2899
2900 // Consume the identifier token - but if parsing certain directives, avoid
2901 // lexical expansion of the next token.
2902 ExpandKind ExpandNextToken = ExpandMacros;
2903 if (Position == StartOfStatement &&
2904 StringSwitch<bool>(Res)
2905 .CaseLower("echo", true)
2906 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
2907 .Default(false)) {
2908 ExpandNextToken = DoNotExpandMacros;
2909 }
2910 Lex(ExpandNextToken);
2911
2912 return false;
2913 }
2914
2915 /// parseDirectiveEquate:
2916 /// ::= name "=" expression
2917 /// | name "equ" expression (not redefinable)
2918 /// | name "equ" text-list
2919 /// | name "textequ" text-list (redefinability unspecified)
parseDirectiveEquate(StringRef IDVal,StringRef Name,DirectiveKind DirKind,SMLoc NameLoc)2920 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
2921 DirectiveKind DirKind, SMLoc NameLoc) {
2922 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
2923 if (BuiltinIt != BuiltinSymbolMap.end())
2924 return Error(NameLoc, "cannot redefine a built-in symbol");
2925
2926 Variable &Var = Variables[Name.lower()];
2927 if (Var.Name.empty()) {
2928 Var.Name = Name;
2929 }
2930
2931 SMLoc StartLoc = Lexer.getLoc();
2932 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
2933 // "equ" and "textequ" both allow text expressions.
2934 std::string Value;
2935 std::string TextItem;
2936 if (!parseTextItem(TextItem)) {
2937 Value += TextItem;
2938
2939 // Accept a text-list, not just one text-item.
2940 auto parseItem = [&]() -> bool {
2941 if (parseTextItem(TextItem))
2942 return TokError("expected text item");
2943 Value += TextItem;
2944 return false;
2945 };
2946 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
2947 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2948
2949 if (!Var.IsText || Var.TextValue != Value) {
2950 switch (Var.Redefinable) {
2951 case Variable::NOT_REDEFINABLE:
2952 return Error(getTok().getLoc(), "invalid variable redefinition");
2953 case Variable::WARN_ON_REDEFINITION:
2954 if (Warning(NameLoc, "redefining '" + Name +
2955 "', already defined on the command line")) {
2956 return true;
2957 }
2958 break;
2959 default:
2960 break;
2961 }
2962 }
2963 Var.IsText = true;
2964 Var.TextValue = Value;
2965 Var.Redefinable = Variable::REDEFINABLE;
2966
2967 return false;
2968 }
2969 }
2970 if (DirKind == DK_TEXTEQU)
2971 return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
2972
2973 // Parse as expression assignment.
2974 const MCExpr *Expr;
2975 SMLoc EndLoc;
2976 if (parseExpression(Expr, EndLoc))
2977 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
2978 StringRef ExprAsString = StringRef(
2979 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
2980
2981 int64_t Value;
2982 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
2983 if (DirKind == DK_ASSIGN)
2984 return Error(
2985 StartLoc,
2986 "expected absolute expression; not all symbols have known values",
2987 {StartLoc, EndLoc});
2988
2989 // Not an absolute expression; define as a text replacement.
2990 if (!Var.IsText || Var.TextValue != ExprAsString) {
2991 switch (Var.Redefinable) {
2992 case Variable::NOT_REDEFINABLE:
2993 return Error(getTok().getLoc(), "invalid variable redefinition");
2994 case Variable::WARN_ON_REDEFINITION:
2995 if (Warning(NameLoc, "redefining '" + Name +
2996 "', already defined on the command line")) {
2997 return true;
2998 }
2999 break;
3000 default:
3001 break;
3002 }
3003 }
3004
3005 Var.IsText = true;
3006 Var.TextValue = ExprAsString.str();
3007 Var.Redefinable = Variable::REDEFINABLE;
3008
3009 return false;
3010 }
3011
3012 auto *Sym = getContext().parseSymbol(Var.Name);
3013 const MCConstantExpr *PrevValue =
3014 Sym->isVariable()
3015 ? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue())
3016 : nullptr;
3017 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3018 switch (Var.Redefinable) {
3019 case Variable::NOT_REDEFINABLE:
3020 return Error(getTok().getLoc(), "invalid variable redefinition");
3021 case Variable::WARN_ON_REDEFINITION:
3022 if (Warning(NameLoc, "redefining '" + Name +
3023 "', already defined on the command line")) {
3024 return true;
3025 }
3026 break;
3027 default:
3028 break;
3029 }
3030 }
3031
3032 Var.IsText = false;
3033 Var.TextValue.clear();
3034 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3035 : Variable::NOT_REDEFINABLE;
3036
3037 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3038 Sym->setVariableValue(Expr);
3039 Sym->setExternal(false);
3040
3041 return false;
3042 }
3043
parseEscapedString(std::string & Data)3044 bool MasmParser::parseEscapedString(std::string &Data) {
3045 if (check(getTok().isNot(AsmToken::String), "expected string"))
3046 return true;
3047
3048 Data = "";
3049 char Quote = getTok().getString().front();
3050 StringRef Str = getTok().getStringContents();
3051 Data.reserve(Str.size());
3052 for (size_t i = 0, e = Str.size(); i != e; ++i) {
3053 Data.push_back(Str[i]);
3054 if (Str[i] == Quote) {
3055 // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3056 // If we're escaping the string's trailing delimiter, we're definitely
3057 // missing a quotation mark.
3058 if (i + 1 == Str.size())
3059 return Error(getTok().getLoc(), "missing quotation mark in string");
3060 if (Str[i + 1] == Quote)
3061 ++i;
3062 }
3063 }
3064
3065 Lex();
3066 return false;
3067 }
3068
parseAngleBracketString(std::string & Data)3069 bool MasmParser::parseAngleBracketString(std::string &Data) {
3070 SMLoc EndLoc, StartLoc = getTok().getLoc();
3071 if (isAngleBracketString(StartLoc, EndLoc)) {
3072 const char *StartChar = StartLoc.getPointer() + 1;
3073 const char *EndChar = EndLoc.getPointer() - 1;
3074 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3075 // Eat from '<' to '>'.
3076 Lex();
3077
3078 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3079 return false;
3080 }
3081 return true;
3082 }
3083
3084 /// textItem ::= textLiteral | textMacroID | % constExpr
parseTextItem(std::string & Data)3085 bool MasmParser::parseTextItem(std::string &Data) {
3086 switch (getTok().getKind()) {
3087 default:
3088 return true;
3089 case AsmToken::Percent: {
3090 int64_t Res;
3091 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3092 return true;
3093 Data = std::to_string(Res);
3094 return false;
3095 }
3096 case AsmToken::Less:
3097 case AsmToken::LessEqual:
3098 case AsmToken::LessLess:
3099 case AsmToken::LessGreater:
3100 return parseAngleBracketString(Data);
3101 case AsmToken::Identifier: {
3102 // This must be a text macro; we need to expand it accordingly.
3103 StringRef ID;
3104 SMLoc StartLoc = getTok().getLoc();
3105 if (parseIdentifier(ID))
3106 return true;
3107 Data = ID.str();
3108
3109 bool Expanded = false;
3110 while (true) {
3111 // Try to resolve as a built-in text macro
3112 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3113 if (BuiltinIt != BuiltinSymbolMap.end()) {
3114 std::optional<std::string> BuiltinText =
3115 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3116 if (!BuiltinText) {
3117 // Not a text macro; break without substituting
3118 break;
3119 }
3120 Data = std::move(*BuiltinText);
3121 ID = StringRef(Data);
3122 Expanded = true;
3123 continue;
3124 }
3125
3126 // Try to resolve as a built-in macro function
3127 auto BuiltinFuncIt = BuiltinFunctionMap.find(ID.lower());
3128 if (BuiltinFuncIt != BuiltinFunctionMap.end()) {
3129 Data.clear();
3130 if (evaluateBuiltinMacroFunction(BuiltinFuncIt->getValue(), ID, Data)) {
3131 return true;
3132 }
3133 ID = StringRef(Data);
3134 Expanded = true;
3135 continue;
3136 }
3137
3138 // Try to resolve as a variable text macro
3139 auto VarIt = Variables.find(ID.lower());
3140 if (VarIt != Variables.end()) {
3141 const Variable &Var = VarIt->getValue();
3142 if (!Var.IsText) {
3143 // Not a text macro; break without substituting
3144 break;
3145 }
3146 Data = Var.TextValue;
3147 ID = StringRef(Data);
3148 Expanded = true;
3149 continue;
3150 }
3151
3152 break;
3153 }
3154
3155 if (!Expanded) {
3156 // Not a text macro; not usable in TextItem context. Since we haven't used
3157 // the token, put it back for better error recovery.
3158 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3159 return true;
3160 }
3161 return false;
3162 }
3163 }
3164 llvm_unreachable("unhandled token kind");
3165 }
3166
3167 /// parseDirectiveAscii:
3168 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
parseDirectiveAscii(StringRef IDVal,bool ZeroTerminated)3169 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3170 auto parseOp = [&]() -> bool {
3171 std::string Data;
3172 if (checkForValidSection() || parseEscapedString(Data))
3173 return true;
3174 getStreamer().emitBytes(Data);
3175 if (ZeroTerminated)
3176 getStreamer().emitBytes(StringRef("\0", 1));
3177 return false;
3178 };
3179
3180 if (parseMany(parseOp))
3181 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3182 return false;
3183 }
3184
emitIntValue(const MCExpr * Value,unsigned Size)3185 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3186 // Special case constant expressions to match code generator.
3187 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3188 assert(Size <= 8 && "Invalid size");
3189 int64_t IntValue = MCE->getValue();
3190 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3191 return Error(MCE->getLoc(), "out of range literal value");
3192 getStreamer().emitIntValue(IntValue, Size);
3193 } else {
3194 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3195 if (MSE && MSE->getSymbol().getName() == "?") {
3196 // ? initializer; treat as 0.
3197 getStreamer().emitIntValue(0, Size);
3198 } else {
3199 getStreamer().emitValue(Value, Size, Value->getLoc());
3200 }
3201 }
3202 return false;
3203 }
3204
parseScalarInitializer(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,unsigned StringPadLength)3205 bool MasmParser::parseScalarInitializer(unsigned Size,
3206 SmallVectorImpl<const MCExpr *> &Values,
3207 unsigned StringPadLength) {
3208 if (Size == 1 && getTok().is(AsmToken::String)) {
3209 std::string Value;
3210 if (parseEscapedString(Value))
3211 return true;
3212 // Treat each character as an initializer.
3213 for (const unsigned char CharVal : Value)
3214 Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3215
3216 // Pad the string with spaces to the specified length.
3217 for (size_t i = Value.size(); i < StringPadLength; ++i)
3218 Values.push_back(MCConstantExpr::create(' ', getContext()));
3219 } else {
3220 const MCExpr *Value;
3221 if (parseExpression(Value))
3222 return true;
3223 if (getTok().is(AsmToken::Identifier) &&
3224 getTok().getString().equals_insensitive("dup")) {
3225 Lex(); // Eat 'dup'.
3226 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3227 if (!MCE)
3228 return Error(Value->getLoc(),
3229 "cannot repeat value a non-constant number of times");
3230 const int64_t Repetitions = MCE->getValue();
3231 if (Repetitions < 0)
3232 return Error(Value->getLoc(),
3233 "cannot repeat value a negative number of times");
3234
3235 SmallVector<const MCExpr *, 1> DuplicatedValues;
3236 if (parseToken(AsmToken::LParen,
3237 "parentheses required for 'dup' contents") ||
3238 parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3239 return true;
3240
3241 for (int i = 0; i < Repetitions; ++i)
3242 Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3243 } else {
3244 Values.push_back(Value);
3245 }
3246 }
3247 return false;
3248 }
3249
parseScalarInstList(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,const AsmToken::TokenKind EndToken)3250 bool MasmParser::parseScalarInstList(unsigned Size,
3251 SmallVectorImpl<const MCExpr *> &Values,
3252 const AsmToken::TokenKind EndToken) {
3253 while (getTok().isNot(EndToken) &&
3254 (EndToken != AsmToken::Greater ||
3255 getTok().isNot(AsmToken::GreaterGreater))) {
3256 parseScalarInitializer(Size, Values);
3257
3258 // If we see a comma, continue, and allow line continuation.
3259 if (!parseOptionalToken(AsmToken::Comma))
3260 break;
3261 parseOptionalToken(AsmToken::EndOfStatement);
3262 }
3263 return false;
3264 }
3265
emitIntegralValues(unsigned Size,unsigned * Count)3266 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3267 SmallVector<const MCExpr *, 1> Values;
3268 if (checkForValidSection() || parseScalarInstList(Size, Values))
3269 return true;
3270
3271 for (const auto *Value : Values) {
3272 emitIntValue(Value, Size);
3273 }
3274 if (Count)
3275 *Count = Values.size();
3276 return false;
3277 }
3278
3279 // Add a field to the current structure.
addIntegralField(StringRef Name,unsigned Size)3280 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3281 StructInfo &Struct = StructInProgress.back();
3282 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3283 IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3284
3285 Field.Type = Size;
3286
3287 if (parseScalarInstList(Size, IntInfo.Values))
3288 return true;
3289
3290 Field.SizeOf = Field.Type * IntInfo.Values.size();
3291 Field.LengthOf = IntInfo.Values.size();
3292 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3293 if (!Struct.IsUnion) {
3294 Struct.NextOffset = FieldEnd;
3295 }
3296 Struct.Size = std::max(Struct.Size, FieldEnd);
3297 return false;
3298 }
3299
3300 /// parseDirectiveValue
3301 /// ::= (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveValue(StringRef IDVal,unsigned Size)3302 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3303 if (StructInProgress.empty()) {
3304 // Initialize data value.
3305 if (emitIntegralValues(Size))
3306 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3307 } else if (addIntegralField("", Size)) {
3308 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3309 }
3310
3311 return false;
3312 }
3313
3314 /// parseDirectiveNamedValue
3315 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedValue(StringRef TypeName,unsigned Size,StringRef Name,SMLoc NameLoc)3316 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3317 StringRef Name, SMLoc NameLoc) {
3318 if (StructInProgress.empty()) {
3319 // Initialize named data value.
3320 MCSymbol *Sym = getContext().parseSymbol(Name);
3321 getStreamer().emitLabel(Sym);
3322 unsigned Count;
3323 if (emitIntegralValues(Size, &Count))
3324 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3325
3326 AsmTypeInfo Type;
3327 Type.Name = TypeName;
3328 Type.Size = Size * Count;
3329 Type.ElementSize = Size;
3330 Type.Length = Count;
3331 KnownType[Name.lower()] = Type;
3332 } else if (addIntegralField(Name, Size)) {
3333 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3334 }
3335
3336 return false;
3337 }
3338
parseRealValue(const fltSemantics & Semantics,APInt & Res)3339 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3340 // We don't truly support arithmetic on floating point expressions, so we
3341 // have to manually parse unary prefixes.
3342 bool IsNeg = false;
3343 SMLoc SignLoc;
3344 if (getLexer().is(AsmToken::Minus)) {
3345 SignLoc = getLexer().getLoc();
3346 Lexer.Lex();
3347 IsNeg = true;
3348 } else if (getLexer().is(AsmToken::Plus)) {
3349 SignLoc = getLexer().getLoc();
3350 Lexer.Lex();
3351 }
3352
3353 if (Lexer.is(AsmToken::Error))
3354 return TokError(Lexer.getErr());
3355 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3356 Lexer.isNot(AsmToken::Identifier))
3357 return TokError("unexpected token in directive");
3358
3359 // Convert to an APFloat.
3360 APFloat Value(Semantics);
3361 StringRef IDVal = getTok().getString();
3362 if (getLexer().is(AsmToken::Identifier)) {
3363 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3364 Value = APFloat::getInf(Semantics);
3365 else if (IDVal.equals_insensitive("nan"))
3366 Value = APFloat::getNaN(Semantics, false, ~0);
3367 else if (IDVal.equals_insensitive("?"))
3368 Value = APFloat::getZero(Semantics);
3369 else
3370 return TokError("invalid floating point literal");
3371 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3372 // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3373 // To match ML64.exe, ignore the initial sign.
3374 unsigned SizeInBits = Value.getSizeInBits(Semantics);
3375 if (SizeInBits != (IDVal.size() << 2))
3376 return TokError("invalid floating point literal");
3377
3378 // Consume the numeric token.
3379 Lex();
3380
3381 Res = APInt(SizeInBits, IDVal, 16);
3382 if (SignLoc.isValid())
3383 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3384 return false;
3385 } else if (errorToBool(
3386 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3387 .takeError())) {
3388 return TokError("invalid floating point literal");
3389 }
3390 if (IsNeg)
3391 Value.changeSign();
3392
3393 // Consume the numeric token.
3394 Lex();
3395
3396 Res = Value.bitcastToAPInt();
3397
3398 return false;
3399 }
3400
parseRealInstList(const fltSemantics & Semantics,SmallVectorImpl<APInt> & ValuesAsInt,const AsmToken::TokenKind EndToken)3401 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3402 SmallVectorImpl<APInt> &ValuesAsInt,
3403 const AsmToken::TokenKind EndToken) {
3404 while (getTok().isNot(EndToken) ||
3405 (EndToken == AsmToken::Greater &&
3406 getTok().isNot(AsmToken::GreaterGreater))) {
3407 const AsmToken NextTok = peekTok();
3408 if (NextTok.is(AsmToken::Identifier) &&
3409 NextTok.getString().equals_insensitive("dup")) {
3410 const MCExpr *Value;
3411 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3412 return true;
3413 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3414 if (!MCE)
3415 return Error(Value->getLoc(),
3416 "cannot repeat value a non-constant number of times");
3417 const int64_t Repetitions = MCE->getValue();
3418 if (Repetitions < 0)
3419 return Error(Value->getLoc(),
3420 "cannot repeat value a negative number of times");
3421
3422 SmallVector<APInt, 1> DuplicatedValues;
3423 if (parseToken(AsmToken::LParen,
3424 "parentheses required for 'dup' contents") ||
3425 parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3426 return true;
3427
3428 for (int i = 0; i < Repetitions; ++i)
3429 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3430 } else {
3431 APInt AsInt;
3432 if (parseRealValue(Semantics, AsInt))
3433 return true;
3434 ValuesAsInt.push_back(AsInt);
3435 }
3436
3437 // Continue if we see a comma. (Also, allow line continuation.)
3438 if (!parseOptionalToken(AsmToken::Comma))
3439 break;
3440 parseOptionalToken(AsmToken::EndOfStatement);
3441 }
3442
3443 return false;
3444 }
3445
3446 // Initialize real data values.
emitRealValues(const fltSemantics & Semantics,unsigned * Count)3447 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3448 unsigned *Count) {
3449 if (checkForValidSection())
3450 return true;
3451
3452 SmallVector<APInt, 1> ValuesAsInt;
3453 if (parseRealInstList(Semantics, ValuesAsInt))
3454 return true;
3455
3456 for (const APInt &AsInt : ValuesAsInt) {
3457 getStreamer().emitIntValue(AsInt);
3458 }
3459 if (Count)
3460 *Count = ValuesAsInt.size();
3461 return false;
3462 }
3463
3464 // Add a real field to the current struct.
addRealField(StringRef Name,const fltSemantics & Semantics,size_t Size)3465 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3466 size_t Size) {
3467 StructInfo &Struct = StructInProgress.back();
3468 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3469 RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3470
3471 Field.SizeOf = 0;
3472
3473 if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3474 return true;
3475
3476 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3477 Field.LengthOf = RealInfo.AsIntValues.size();
3478 Field.SizeOf = Field.Type * Field.LengthOf;
3479
3480 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3481 if (!Struct.IsUnion) {
3482 Struct.NextOffset = FieldEnd;
3483 }
3484 Struct.Size = std::max(Struct.Size, FieldEnd);
3485 return false;
3486 }
3487
3488 /// parseDirectiveRealValue
3489 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveRealValue(StringRef IDVal,const fltSemantics & Semantics,size_t Size)3490 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3491 const fltSemantics &Semantics,
3492 size_t Size) {
3493 if (StructInProgress.empty()) {
3494 // Initialize data value.
3495 if (emitRealValues(Semantics))
3496 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3497 } else if (addRealField("", Semantics, Size)) {
3498 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3499 }
3500 return false;
3501 }
3502
3503 /// parseDirectiveNamedRealValue
3504 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveNamedRealValue(StringRef TypeName,const fltSemantics & Semantics,unsigned Size,StringRef Name,SMLoc NameLoc)3505 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3506 const fltSemantics &Semantics,
3507 unsigned Size, StringRef Name,
3508 SMLoc NameLoc) {
3509 if (StructInProgress.empty()) {
3510 // Initialize named data value.
3511 MCSymbol *Sym = getContext().parseSymbol(Name);
3512 getStreamer().emitLabel(Sym);
3513 unsigned Count;
3514 if (emitRealValues(Semantics, &Count))
3515 return addErrorSuffix(" in '" + TypeName + "' directive");
3516
3517 AsmTypeInfo Type;
3518 Type.Name = TypeName;
3519 Type.Size = Size * Count;
3520 Type.ElementSize = Size;
3521 Type.Length = Count;
3522 KnownType[Name.lower()] = Type;
3523 } else if (addRealField(Name, Semantics, Size)) {
3524 return addErrorSuffix(" in '" + TypeName + "' directive");
3525 }
3526 return false;
3527 }
3528
parseOptionalAngleBracketOpen()3529 bool MasmParser::parseOptionalAngleBracketOpen() {
3530 const AsmToken Tok = getTok();
3531 if (parseOptionalToken(AsmToken::LessLess)) {
3532 AngleBracketDepth++;
3533 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
3534 return true;
3535 } else if (parseOptionalToken(AsmToken::LessGreater)) {
3536 AngleBracketDepth++;
3537 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3538 return true;
3539 } else if (parseOptionalToken(AsmToken::Less)) {
3540 AngleBracketDepth++;
3541 return true;
3542 }
3543
3544 return false;
3545 }
3546
parseAngleBracketClose(const Twine & Msg)3547 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
3548 const AsmToken Tok = getTok();
3549 if (parseOptionalToken(AsmToken::GreaterGreater)) {
3550 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
3551 } else if (parseToken(AsmToken::Greater, Msg)) {
3552 return true;
3553 }
3554 AngleBracketDepth--;
3555 return false;
3556 }
3557
parseFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,FieldInitializer & Initializer)3558 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3559 const IntFieldInfo &Contents,
3560 FieldInitializer &Initializer) {
3561 SMLoc Loc = getTok().getLoc();
3562
3563 SmallVector<const MCExpr *, 1> Values;
3564 if (parseOptionalToken(AsmToken::LCurly)) {
3565 if (Field.LengthOf == 1 && Field.Type > 1)
3566 return Error(Loc, "Cannot initialize scalar field with array value");
3567 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
3568 parseToken(AsmToken::RCurly))
3569 return true;
3570 } else if (parseOptionalAngleBracketOpen()) {
3571 if (Field.LengthOf == 1 && Field.Type > 1)
3572 return Error(Loc, "Cannot initialize scalar field with array value");
3573 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
3574 parseAngleBracketClose())
3575 return true;
3576 } else if (Field.LengthOf > 1 && Field.Type > 1) {
3577 return Error(Loc, "Cannot initialize array field with scalar value");
3578 } else if (parseScalarInitializer(Field.Type, Values,
3579 /*StringPadLength=*/Field.LengthOf)) {
3580 return true;
3581 }
3582
3583 if (Values.size() > Field.LengthOf) {
3584 return Error(Loc, "Initializer too long for field; expected at most " +
3585 std::to_string(Field.LengthOf) + " elements, got " +
3586 std::to_string(Values.size()));
3587 }
3588 // Default-initialize all remaining values.
3589 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
3590
3591 Initializer = FieldInitializer(std::move(Values));
3592 return false;
3593 }
3594
parseFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,FieldInitializer & Initializer)3595 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3596 const RealFieldInfo &Contents,
3597 FieldInitializer &Initializer) {
3598 const fltSemantics *Semantics;
3599 switch (Field.Type) {
3600 case 4:
3601 Semantics = &APFloat::IEEEsingle();
3602 break;
3603 case 8:
3604 Semantics = &APFloat::IEEEdouble();
3605 break;
3606 case 10:
3607 Semantics = &APFloat::x87DoubleExtended();
3608 break;
3609 default:
3610 llvm_unreachable("unknown real field type");
3611 }
3612
3613 SMLoc Loc = getTok().getLoc();
3614
3615 SmallVector<APInt, 1> AsIntValues;
3616 if (parseOptionalToken(AsmToken::LCurly)) {
3617 if (Field.LengthOf == 1)
3618 return Error(Loc, "Cannot initialize scalar field with array value");
3619 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
3620 parseToken(AsmToken::RCurly))
3621 return true;
3622 } else if (parseOptionalAngleBracketOpen()) {
3623 if (Field.LengthOf == 1)
3624 return Error(Loc, "Cannot initialize scalar field with array value");
3625 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
3626 parseAngleBracketClose())
3627 return true;
3628 } else if (Field.LengthOf > 1) {
3629 return Error(Loc, "Cannot initialize array field with scalar value");
3630 } else {
3631 AsIntValues.emplace_back();
3632 if (parseRealValue(*Semantics, AsIntValues.back()))
3633 return true;
3634 }
3635
3636 if (AsIntValues.size() > Field.LengthOf) {
3637 return Error(Loc, "Initializer too long for field; expected at most " +
3638 std::to_string(Field.LengthOf) + " elements, got " +
3639 std::to_string(AsIntValues.size()));
3640 }
3641 // Default-initialize all remaining values.
3642 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
3643 Contents.AsIntValues.end());
3644
3645 Initializer = FieldInitializer(std::move(AsIntValues));
3646 return false;
3647 }
3648
parseFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,FieldInitializer & Initializer)3649 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3650 const StructFieldInfo &Contents,
3651 FieldInitializer &Initializer) {
3652 SMLoc Loc = getTok().getLoc();
3653
3654 std::vector<StructInitializer> Initializers;
3655 if (Field.LengthOf > 1) {
3656 if (parseOptionalToken(AsmToken::LCurly)) {
3657 if (parseStructInstList(Contents.Structure, Initializers,
3658 AsmToken::RCurly) ||
3659 parseToken(AsmToken::RCurly))
3660 return true;
3661 } else if (parseOptionalAngleBracketOpen()) {
3662 if (parseStructInstList(Contents.Structure, Initializers,
3663 AsmToken::Greater) ||
3664 parseAngleBracketClose())
3665 return true;
3666 } else {
3667 return Error(Loc, "Cannot initialize array field with scalar value");
3668 }
3669 } else {
3670 Initializers.emplace_back();
3671 if (parseStructInitializer(Contents.Structure, Initializers.back()))
3672 return true;
3673 }
3674
3675 if (Initializers.size() > Field.LengthOf) {
3676 return Error(Loc, "Initializer too long for field; expected at most " +
3677 std::to_string(Field.LengthOf) + " elements, got " +
3678 std::to_string(Initializers.size()));
3679 }
3680 // Default-initialize all remaining values.
3681 llvm::append_range(Initializers, llvm::drop_begin(Contents.Initializers,
3682 Initializers.size()));
3683
3684 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
3685 return false;
3686 }
3687
parseFieldInitializer(const FieldInfo & Field,FieldInitializer & Initializer)3688 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
3689 FieldInitializer &Initializer) {
3690 switch (Field.Contents.FT) {
3691 case FT_INTEGRAL:
3692 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
3693 case FT_REAL:
3694 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
3695 case FT_STRUCT:
3696 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
3697 }
3698 llvm_unreachable("Unhandled FieldType enum");
3699 }
3700
parseStructInitializer(const StructInfo & Structure,StructInitializer & Initializer)3701 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
3702 StructInitializer &Initializer) {
3703 const AsmToken FirstToken = getTok();
3704
3705 std::optional<AsmToken::TokenKind> EndToken;
3706 if (parseOptionalToken(AsmToken::LCurly)) {
3707 EndToken = AsmToken::RCurly;
3708 } else if (parseOptionalAngleBracketOpen()) {
3709 EndToken = AsmToken::Greater;
3710 AngleBracketDepth++;
3711 } else if (FirstToken.is(AsmToken::Identifier) &&
3712 FirstToken.getString() == "?") {
3713 // ? initializer; leave EndToken uninitialized to treat as empty.
3714 if (parseToken(AsmToken::Identifier))
3715 return true;
3716 } else {
3717 return Error(FirstToken.getLoc(), "Expected struct initializer");
3718 }
3719
3720 auto &FieldInitializers = Initializer.FieldInitializers;
3721 size_t FieldIndex = 0;
3722 if (EndToken) {
3723 // Initialize all fields with given initializers.
3724 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
3725 const FieldInfo &Field = Structure.Fields[FieldIndex++];
3726 if (parseOptionalToken(AsmToken::Comma)) {
3727 // Empty initializer; use the default and continue. (Also, allow line
3728 // continuation.)
3729 FieldInitializers.push_back(Field.Contents);
3730 parseOptionalToken(AsmToken::EndOfStatement);
3731 continue;
3732 }
3733 FieldInitializers.emplace_back(Field.Contents.FT);
3734 if (parseFieldInitializer(Field, FieldInitializers.back()))
3735 return true;
3736
3737 // Continue if we see a comma. (Also, allow line continuation.)
3738 SMLoc CommaLoc = getTok().getLoc();
3739 if (!parseOptionalToken(AsmToken::Comma))
3740 break;
3741 if (FieldIndex == Structure.Fields.size())
3742 return Error(CommaLoc, "'" + Structure.Name +
3743 "' initializer initializes too many fields");
3744 parseOptionalToken(AsmToken::EndOfStatement);
3745 }
3746 }
3747 // Default-initialize all remaining fields.
3748 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
3749 FieldInitializers.push_back(Field.Contents);
3750
3751 if (EndToken) {
3752 if (*EndToken == AsmToken::Greater)
3753 return parseAngleBracketClose();
3754
3755 return parseToken(*EndToken);
3756 }
3757
3758 return false;
3759 }
3760
parseStructInstList(const StructInfo & Structure,std::vector<StructInitializer> & Initializers,const AsmToken::TokenKind EndToken)3761 bool MasmParser::parseStructInstList(
3762 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
3763 const AsmToken::TokenKind EndToken) {
3764 while (getTok().isNot(EndToken) ||
3765 (EndToken == AsmToken::Greater &&
3766 getTok().isNot(AsmToken::GreaterGreater))) {
3767 const AsmToken NextTok = peekTok();
3768 if (NextTok.is(AsmToken::Identifier) &&
3769 NextTok.getString().equals_insensitive("dup")) {
3770 const MCExpr *Value;
3771 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3772 return true;
3773 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3774 if (!MCE)
3775 return Error(Value->getLoc(),
3776 "cannot repeat value a non-constant number of times");
3777 const int64_t Repetitions = MCE->getValue();
3778 if (Repetitions < 0)
3779 return Error(Value->getLoc(),
3780 "cannot repeat value a negative number of times");
3781
3782 std::vector<StructInitializer> DuplicatedValues;
3783 if (parseToken(AsmToken::LParen,
3784 "parentheses required for 'dup' contents") ||
3785 parseStructInstList(Structure, DuplicatedValues) || parseRParen())
3786 return true;
3787
3788 for (int i = 0; i < Repetitions; ++i)
3789 llvm::append_range(Initializers, DuplicatedValues);
3790 } else {
3791 Initializers.emplace_back();
3792 if (parseStructInitializer(Structure, Initializers.back()))
3793 return true;
3794 }
3795
3796 // Continue if we see a comma. (Also, allow line continuation.)
3797 if (!parseOptionalToken(AsmToken::Comma))
3798 break;
3799 parseOptionalToken(AsmToken::EndOfStatement);
3800 }
3801
3802 return false;
3803 }
3804
emitFieldValue(const FieldInfo & Field,const IntFieldInfo & Contents)3805 bool MasmParser::emitFieldValue(const FieldInfo &Field,
3806 const IntFieldInfo &Contents) {
3807 // Default-initialize all values.
3808 for (const MCExpr *Value : Contents.Values) {
3809 if (emitIntValue(Value, Field.Type))
3810 return true;
3811 }
3812 return false;
3813 }
3814
emitFieldValue(const FieldInfo & Field,const RealFieldInfo & Contents)3815 bool MasmParser::emitFieldValue(const FieldInfo &Field,
3816 const RealFieldInfo &Contents) {
3817 for (const APInt &AsInt : Contents.AsIntValues) {
3818 getStreamer().emitIntValue(AsInt.getLimitedValue(),
3819 AsInt.getBitWidth() / 8);
3820 }
3821 return false;
3822 }
3823
emitFieldValue(const FieldInfo & Field,const StructFieldInfo & Contents)3824 bool MasmParser::emitFieldValue(const FieldInfo &Field,
3825 const StructFieldInfo &Contents) {
3826 for (const auto &Initializer : Contents.Initializers) {
3827 size_t Index = 0, Offset = 0;
3828 for (const auto &SubField : Contents.Structure.Fields) {
3829 getStreamer().emitZeros(SubField.Offset - Offset);
3830 Offset = SubField.Offset + SubField.SizeOf;
3831 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
3832 }
3833 }
3834 return false;
3835 }
3836
emitFieldValue(const FieldInfo & Field)3837 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
3838 switch (Field.Contents.FT) {
3839 case FT_INTEGRAL:
3840 return emitFieldValue(Field, Field.Contents.IntInfo);
3841 case FT_REAL:
3842 return emitFieldValue(Field, Field.Contents.RealInfo);
3843 case FT_STRUCT:
3844 return emitFieldValue(Field, Field.Contents.StructInfo);
3845 }
3846 llvm_unreachable("Unhandled FieldType enum");
3847 }
3848
emitFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,const IntFieldInfo & Initializer)3849 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3850 const IntFieldInfo &Contents,
3851 const IntFieldInfo &Initializer) {
3852 for (const auto &Value : Initializer.Values) {
3853 if (emitIntValue(Value, Field.Type))
3854 return true;
3855 }
3856 // Default-initialize all remaining values.
3857 for (const auto &Value :
3858 llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
3859 if (emitIntValue(Value, Field.Type))
3860 return true;
3861 }
3862 return false;
3863 }
3864
emitFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,const RealFieldInfo & Initializer)3865 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3866 const RealFieldInfo &Contents,
3867 const RealFieldInfo &Initializer) {
3868 for (const auto &AsInt : Initializer.AsIntValues) {
3869 getStreamer().emitIntValue(AsInt.getLimitedValue(),
3870 AsInt.getBitWidth() / 8);
3871 }
3872 // Default-initialize all remaining values.
3873 for (const auto &AsInt :
3874 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
3875 getStreamer().emitIntValue(AsInt.getLimitedValue(),
3876 AsInt.getBitWidth() / 8);
3877 }
3878 return false;
3879 }
3880
emitFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,const StructFieldInfo & Initializer)3881 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3882 const StructFieldInfo &Contents,
3883 const StructFieldInfo &Initializer) {
3884 for (const auto &Init : Initializer.Initializers) {
3885 if (emitStructInitializer(Contents.Structure, Init))
3886 return true;
3887 }
3888 // Default-initialize all remaining values.
3889 for (const auto &Init : llvm::drop_begin(Contents.Initializers,
3890 Initializer.Initializers.size())) {
3891 if (emitStructInitializer(Contents.Structure, Init))
3892 return true;
3893 }
3894 return false;
3895 }
3896
emitFieldInitializer(const FieldInfo & Field,const FieldInitializer & Initializer)3897 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
3898 const FieldInitializer &Initializer) {
3899 switch (Field.Contents.FT) {
3900 case FT_INTEGRAL:
3901 return emitFieldInitializer(Field, Field.Contents.IntInfo,
3902 Initializer.IntInfo);
3903 case FT_REAL:
3904 return emitFieldInitializer(Field, Field.Contents.RealInfo,
3905 Initializer.RealInfo);
3906 case FT_STRUCT:
3907 return emitFieldInitializer(Field, Field.Contents.StructInfo,
3908 Initializer.StructInfo);
3909 }
3910 llvm_unreachable("Unhandled FieldType enum");
3911 }
3912
emitStructInitializer(const StructInfo & Structure,const StructInitializer & Initializer)3913 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
3914 const StructInitializer &Initializer) {
3915 if (!Structure.Initializable)
3916 return Error(getLexer().getLoc(),
3917 "cannot initialize a value of type '" + Structure.Name +
3918 "'; 'org' was used in the type's declaration");
3919 size_t Index = 0, Offset = 0;
3920 for (const auto &Init : Initializer.FieldInitializers) {
3921 const auto &Field = Structure.Fields[Index++];
3922 getStreamer().emitZeros(Field.Offset - Offset);
3923 Offset = Field.Offset + Field.SizeOf;
3924 if (emitFieldInitializer(Field, Init))
3925 return true;
3926 }
3927 // Default-initialize all remaining fields.
3928 for (const auto &Field : llvm::drop_begin(
3929 Structure.Fields, Initializer.FieldInitializers.size())) {
3930 getStreamer().emitZeros(Field.Offset - Offset);
3931 Offset = Field.Offset + Field.SizeOf;
3932 if (emitFieldValue(Field))
3933 return true;
3934 }
3935 // Add final padding.
3936 if (Offset != Structure.Size)
3937 getStreamer().emitZeros(Structure.Size - Offset);
3938 return false;
3939 }
3940
3941 // Set data values from initializers.
emitStructValues(const StructInfo & Structure,unsigned * Count)3942 bool MasmParser::emitStructValues(const StructInfo &Structure,
3943 unsigned *Count) {
3944 std::vector<StructInitializer> Initializers;
3945 if (parseStructInstList(Structure, Initializers))
3946 return true;
3947
3948 for (const auto &Initializer : Initializers) {
3949 if (emitStructInitializer(Structure, Initializer))
3950 return true;
3951 }
3952
3953 if (Count)
3954 *Count = Initializers.size();
3955 return false;
3956 }
3957
3958 // Declare a field in the current struct.
addStructField(StringRef Name,const StructInfo & Structure)3959 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
3960 StructInfo &OwningStruct = StructInProgress.back();
3961 FieldInfo &Field =
3962 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
3963 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
3964
3965 StructInfo.Structure = Structure;
3966 Field.Type = Structure.Size;
3967
3968 if (parseStructInstList(Structure, StructInfo.Initializers))
3969 return true;
3970
3971 Field.LengthOf = StructInfo.Initializers.size();
3972 Field.SizeOf = Field.Type * Field.LengthOf;
3973
3974 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3975 if (!OwningStruct.IsUnion) {
3976 OwningStruct.NextOffset = FieldEnd;
3977 }
3978 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
3979
3980 return false;
3981 }
3982
3983 /// parseDirectiveStructValue
3984 /// ::= struct-id (<struct-initializer> | {struct-initializer})
3985 /// [, (<struct-initializer> | {struct-initializer})]*
parseDirectiveStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc)3986 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
3987 StringRef Directive, SMLoc DirLoc) {
3988 if (StructInProgress.empty()) {
3989 if (emitStructValues(Structure))
3990 return true;
3991 } else if (addStructField("", Structure)) {
3992 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
3993 }
3994
3995 return false;
3996 }
3997
3998 /// parseDirectiveNamedValue
3999 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc,StringRef Name)4000 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4001 StringRef Directive,
4002 SMLoc DirLoc, StringRef Name) {
4003 if (StructInProgress.empty()) {
4004 // Initialize named data value.
4005 MCSymbol *Sym = getContext().parseSymbol(Name);
4006 getStreamer().emitLabel(Sym);
4007 unsigned Count;
4008 if (emitStructValues(Structure, &Count))
4009 return true;
4010 AsmTypeInfo Type;
4011 Type.Name = Structure.Name;
4012 Type.Size = Structure.Size * Count;
4013 Type.ElementSize = Structure.Size;
4014 Type.Length = Count;
4015 KnownType[Name.lower()] = Type;
4016 } else if (addStructField(Name, Structure)) {
4017 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4018 }
4019
4020 return false;
4021 }
4022
4023 /// parseDirectiveStruct
4024 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4025 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4026 /// <name> ENDS
4027 ////// dataDir = data declaration
4028 ////// offsetDir = EVEN, ORG, ALIGN
parseDirectiveStruct(StringRef Directive,DirectiveKind DirKind,StringRef Name,SMLoc NameLoc)4029 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4030 DirectiveKind DirKind, StringRef Name,
4031 SMLoc NameLoc) {
4032 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4033 // anyway, so all field accesses must be qualified.
4034 AsmToken NextTok = getTok();
4035 int64_t AlignmentValue = 1;
4036 if (NextTok.isNot(AsmToken::Comma) &&
4037 NextTok.isNot(AsmToken::EndOfStatement) &&
4038 parseAbsoluteExpression(AlignmentValue)) {
4039 return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4040 "' directive");
4041 }
4042 if (!isPowerOf2_64(AlignmentValue)) {
4043 return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4044 std::to_string(AlignmentValue));
4045 }
4046
4047 StringRef Qualifier;
4048 SMLoc QualifierLoc;
4049 if (parseOptionalToken(AsmToken::Comma)) {
4050 QualifierLoc = getTok().getLoc();
4051 if (parseIdentifier(Qualifier))
4052 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4053 if (!Qualifier.equals_insensitive("nonunique"))
4054 return Error(QualifierLoc, "Unrecognized qualifier for '" +
4055 Twine(Directive) +
4056 "' directive; expected none or NONUNIQUE");
4057 }
4058
4059 if (parseEOL())
4060 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4061
4062 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4063 return false;
4064 }
4065
4066 /// parseDirectiveNestedStruct
4067 /// ::= (STRUC | STRUCT | UNION) [name]
4068 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4069 /// ENDS
parseDirectiveNestedStruct(StringRef Directive,DirectiveKind DirKind)4070 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4071 DirectiveKind DirKind) {
4072 if (StructInProgress.empty())
4073 return TokError("missing name in top-level '" + Twine(Directive) +
4074 "' directive");
4075
4076 StringRef Name;
4077 if (getTok().is(AsmToken::Identifier)) {
4078 Name = getTok().getIdentifier();
4079 parseToken(AsmToken::Identifier);
4080 }
4081 if (parseEOL())
4082 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4083
4084 // Reserve space to ensure Alignment doesn't get invalidated when
4085 // StructInProgress grows.
4086 StructInProgress.reserve(StructInProgress.size() + 1);
4087 StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4088 StructInProgress.back().Alignment);
4089 return false;
4090 }
4091
parseDirectiveEnds(StringRef Name,SMLoc NameLoc)4092 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4093 if (StructInProgress.empty())
4094 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4095 if (StructInProgress.size() > 1)
4096 return Error(NameLoc, "unexpected name in nested ENDS directive");
4097 if (StructInProgress.back().Name.compare_insensitive(Name))
4098 return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4099 StructInProgress.back().Name + "'");
4100 StructInfo Structure = StructInProgress.pop_back_val();
4101 // Pad to make the structure's size divisible by the smaller of its alignment
4102 // and the size of its largest field.
4103 Structure.Size = llvm::alignTo(
4104 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4105 Structs[Name.lower()] = Structure;
4106
4107 if (parseEOL())
4108 return addErrorSuffix(" in ENDS directive");
4109
4110 return false;
4111 }
4112
parseDirectiveNestedEnds()4113 bool MasmParser::parseDirectiveNestedEnds() {
4114 if (StructInProgress.empty())
4115 return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4116 if (StructInProgress.size() == 1)
4117 return TokError("missing name in top-level ENDS directive");
4118
4119 if (parseEOL())
4120 return addErrorSuffix(" in nested ENDS directive");
4121
4122 StructInfo Structure = StructInProgress.pop_back_val();
4123 // Pad to make the structure's size divisible by its alignment.
4124 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4125
4126 StructInfo &ParentStruct = StructInProgress.back();
4127 if (Structure.Name.empty()) {
4128 // Anonymous substructures' fields are addressed as if they belong to the
4129 // parent structure - so we transfer them to the parent here.
4130 const size_t OldFields = ParentStruct.Fields.size();
4131 ParentStruct.Fields.insert(
4132 ParentStruct.Fields.end(),
4133 std::make_move_iterator(Structure.Fields.begin()),
4134 std::make_move_iterator(Structure.Fields.end()));
4135 for (const auto &FieldByName : Structure.FieldsByName) {
4136 ParentStruct.FieldsByName[FieldByName.getKey()] =
4137 FieldByName.getValue() + OldFields;
4138 }
4139
4140 unsigned FirstFieldOffset = 0;
4141 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4142 FirstFieldOffset = llvm::alignTo(
4143 ParentStruct.NextOffset,
4144 std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4145 }
4146
4147 if (ParentStruct.IsUnion) {
4148 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4149 } else {
4150 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4151 Field.Offset += FirstFieldOffset;
4152
4153 const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4154 if (!ParentStruct.IsUnion) {
4155 ParentStruct.NextOffset = StructureEnd;
4156 }
4157 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4158 }
4159 } else {
4160 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4161 Structure.AlignmentSize);
4162 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4163 Field.Type = Structure.Size;
4164 Field.LengthOf = 1;
4165 Field.SizeOf = Structure.Size;
4166
4167 const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4168 if (!ParentStruct.IsUnion) {
4169 ParentStruct.NextOffset = StructureEnd;
4170 }
4171 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4172
4173 StructInfo.Structure = Structure;
4174 StructInfo.Initializers.emplace_back();
4175 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4176 for (const auto &SubField : Structure.Fields) {
4177 FieldInitializers.push_back(SubField.Contents);
4178 }
4179 }
4180
4181 return false;
4182 }
4183
4184 /// parseDirectiveOrg
4185 /// ::= org expression
parseDirectiveOrg()4186 bool MasmParser::parseDirectiveOrg() {
4187 const MCExpr *Offset;
4188 SMLoc OffsetLoc = Lexer.getLoc();
4189 if (checkForValidSection() || parseExpression(Offset))
4190 return true;
4191 if (parseEOL())
4192 return addErrorSuffix(" in 'org' directive");
4193
4194 if (StructInProgress.empty()) {
4195 // Not in a struct; change the offset for the next instruction or data
4196 if (checkForValidSection())
4197 return addErrorSuffix(" in 'org' directive");
4198
4199 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4200 } else {
4201 // Offset the next field of this struct
4202 StructInfo &Structure = StructInProgress.back();
4203 int64_t OffsetRes;
4204 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4205 return Error(OffsetLoc,
4206 "expected absolute expression in 'org' directive");
4207 if (OffsetRes < 0)
4208 return Error(
4209 OffsetLoc,
4210 "expected non-negative value in struct's 'org' directive; was " +
4211 std::to_string(OffsetRes));
4212 Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4213
4214 // ORG-affected structures cannot be initialized
4215 Structure.Initializable = false;
4216 }
4217
4218 return false;
4219 }
4220
emitAlignTo(int64_t Alignment)4221 bool MasmParser::emitAlignTo(int64_t Alignment) {
4222 if (StructInProgress.empty()) {
4223 // Not in a struct; align the next instruction or data
4224 if (checkForValidSection())
4225 return true;
4226
4227 // Check whether we should use optimal code alignment for this align
4228 // directive.
4229 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4230 assert(Section && "must have section to emit alignment");
4231 if (Section->useCodeAlign()) {
4232 getStreamer().emitCodeAlignment(Align(Alignment),
4233 &getTargetParser().getSTI(),
4234 /*MaxBytesToEmit=*/0);
4235 } else {
4236 // FIXME: Target specific behavior about how the "extra" bytes are filled.
4237 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4238 /*ValueSize=*/1,
4239 /*MaxBytesToEmit=*/0);
4240 }
4241 } else {
4242 // Align the next field of this struct
4243 StructInfo &Structure = StructInProgress.back();
4244 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4245 }
4246
4247 return false;
4248 }
4249
4250 /// parseDirectiveAlign
4251 /// ::= align expression
parseDirectiveAlign()4252 bool MasmParser::parseDirectiveAlign() {
4253 SMLoc AlignmentLoc = getLexer().getLoc();
4254 int64_t Alignment;
4255
4256 // Ignore empty 'align' directives.
4257 if (getTok().is(AsmToken::EndOfStatement)) {
4258 return Warning(AlignmentLoc,
4259 "align directive with no operand is ignored") &&
4260 parseEOL();
4261 }
4262 if (parseAbsoluteExpression(Alignment) || parseEOL())
4263 return addErrorSuffix(" in align directive");
4264
4265 // Always emit an alignment here even if we throw an error.
4266 bool ReturnVal = false;
4267
4268 // Reject alignments that aren't either a power of two or zero, for ML.exe
4269 // compatibility. Alignment of zero is silently rounded up to one.
4270 if (Alignment == 0)
4271 Alignment = 1;
4272 if (!isPowerOf2_64(Alignment))
4273 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4274 std::to_string(Alignment));
4275
4276 if (emitAlignTo(Alignment))
4277 ReturnVal |= addErrorSuffix(" in align directive");
4278
4279 return ReturnVal;
4280 }
4281
4282 /// parseDirectiveEven
4283 /// ::= even
parseDirectiveEven()4284 bool MasmParser::parseDirectiveEven() {
4285 if (parseEOL() || emitAlignTo(2))
4286 return addErrorSuffix(" in even directive");
4287
4288 return false;
4289 }
4290
4291 /// parseDirectiveMacro
4292 /// ::= name macro [parameters]
4293 /// ["LOCAL" identifiers]
4294 /// parameters ::= parameter [, parameter]*
4295 /// parameter ::= name ":" qualifier
4296 /// qualifier ::= "req" | "vararg" | "=" macro_argument
parseDirectiveMacro(StringRef Name,SMLoc NameLoc)4297 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
4298 MCAsmMacroParameters Parameters;
4299 while (getLexer().isNot(AsmToken::EndOfStatement)) {
4300 if (!Parameters.empty() && Parameters.back().Vararg)
4301 return Error(Lexer.getLoc(),
4302 "Vararg parameter '" + Parameters.back().Name +
4303 "' should be last in the list of parameters");
4304
4305 MCAsmMacroParameter Parameter;
4306 if (parseIdentifier(Parameter.Name))
4307 return TokError("expected identifier in 'macro' directive");
4308
4309 // Emit an error if two (or more) named parameters share the same name.
4310 for (const MCAsmMacroParameter& CurrParam : Parameters)
4311 if (CurrParam.Name.equals_insensitive(Parameter.Name))
4312 return TokError("macro '" + Name + "' has multiple parameters"
4313 " named '" + Parameter.Name + "'");
4314
4315 if (Lexer.is(AsmToken::Colon)) {
4316 Lex(); // consume ':'
4317
4318 if (parseOptionalToken(AsmToken::Equal)) {
4319 // Default value
4320 SMLoc ParamLoc;
4321
4322 ParamLoc = Lexer.getLoc();
4323 if (parseMacroArgument(nullptr, Parameter.Value))
4324 return true;
4325 } else {
4326 SMLoc QualLoc;
4327 StringRef Qualifier;
4328
4329 QualLoc = Lexer.getLoc();
4330 if (parseIdentifier(Qualifier))
4331 return Error(QualLoc, "missing parameter qualifier for "
4332 "'" +
4333 Parameter.Name + "' in macro '" + Name +
4334 "'");
4335
4336 if (Qualifier.equals_insensitive("req"))
4337 Parameter.Required = true;
4338 else if (Qualifier.equals_insensitive("vararg"))
4339 Parameter.Vararg = true;
4340 else
4341 return Error(QualLoc,
4342 Qualifier + " is not a valid parameter qualifier for '" +
4343 Parameter.Name + "' in macro '" + Name + "'");
4344 }
4345 }
4346
4347 Parameters.push_back(std::move(Parameter));
4348
4349 if (getLexer().is(AsmToken::Comma))
4350 Lex();
4351 }
4352
4353 // Eat just the end of statement.
4354 Lexer.Lex();
4355
4356 std::vector<std::string> Locals;
4357 if (getTok().is(AsmToken::Identifier) &&
4358 getTok().getIdentifier().equals_insensitive("local")) {
4359 Lex(); // Eat the LOCAL directive.
4360
4361 StringRef ID;
4362 while (true) {
4363 if (parseIdentifier(ID))
4364 return true;
4365 Locals.push_back(ID.lower());
4366
4367 // If we see a comma, continue (and allow line continuation).
4368 if (!parseOptionalToken(AsmToken::Comma))
4369 break;
4370 parseOptionalToken(AsmToken::EndOfStatement);
4371 }
4372 }
4373
4374 // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
4375 AsmToken EndToken, StartToken = getTok();
4376 unsigned MacroDepth = 0;
4377 bool IsMacroFunction = false;
4378 // Lex the macro definition.
4379 while (true) {
4380 // Ignore Lexing errors in macros.
4381 while (Lexer.is(AsmToken::Error)) {
4382 Lexer.Lex();
4383 }
4384
4385 // Check whether we have reached the end of the file.
4386 if (getLexer().is(AsmToken::Eof))
4387 return Error(NameLoc, "no matching 'endm' in definition");
4388
4389 // Otherwise, check whether we have reached the 'endm'... and determine if
4390 // this is a macro function.
4391 if (getLexer().is(AsmToken::Identifier)) {
4392 if (getTok().getIdentifier().equals_insensitive("endm")) {
4393 if (MacroDepth == 0) { // Outermost macro.
4394 EndToken = getTok();
4395 Lexer.Lex();
4396 if (getLexer().isNot(AsmToken::EndOfStatement))
4397 return TokError("unexpected token in '" + EndToken.getIdentifier() +
4398 "' directive");
4399 break;
4400 } else {
4401 // Otherwise we just found the end of an inner macro.
4402 --MacroDepth;
4403 }
4404 } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
4405 if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
4406 IsMacroFunction = true;
4407 }
4408 } else if (isMacroLikeDirective()) {
4409 // We allow nested macros. Those aren't instantiated until the
4410 // outermost macro is expanded so just ignore them for now.
4411 ++MacroDepth;
4412 }
4413 }
4414
4415 // Otherwise, scan til the end of the statement.
4416 eatToEndOfStatement();
4417 }
4418
4419 if (getContext().lookupMacro(Name.lower())) {
4420 return Error(NameLoc, "macro '" + Name + "' is already defined");
4421 }
4422
4423 const char *BodyStart = StartToken.getLoc().getPointer();
4424 const char *BodyEnd = EndToken.getLoc().getPointer();
4425 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
4426 MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
4427 IsMacroFunction);
4428 DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
4429 Macro.dump());
4430 getContext().defineMacro(Name.lower(), std::move(Macro));
4431 return false;
4432 }
4433
4434 /// parseDirectiveExitMacro
4435 /// ::= "exitm" [textitem]
parseDirectiveExitMacro(SMLoc DirectiveLoc,StringRef Directive,std::string & Value)4436 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
4437 StringRef Directive,
4438 std::string &Value) {
4439 SMLoc EndLoc = getTok().getLoc();
4440 if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
4441 return Error(EndLoc,
4442 "unable to parse text item in '" + Directive + "' directive");
4443 eatToEndOfStatement();
4444
4445 if (!isInsideMacroInstantiation())
4446 return TokError("unexpected '" + Directive + "' in file, "
4447 "no current macro definition");
4448
4449 // Exit all conditionals that are active in the current macro.
4450 while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
4451 TheCondState = TheCondStack.back();
4452 TheCondStack.pop_back();
4453 }
4454
4455 handleMacroExit();
4456 return false;
4457 }
4458
4459 /// parseDirectiveEndMacro
4460 /// ::= endm
parseDirectiveEndMacro(StringRef Directive)4461 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
4462 if (getLexer().isNot(AsmToken::EndOfStatement))
4463 return TokError("unexpected token in '" + Directive + "' directive");
4464
4465 // If we are inside a macro instantiation, terminate the current
4466 // instantiation.
4467 if (isInsideMacroInstantiation()) {
4468 handleMacroExit();
4469 return false;
4470 }
4471
4472 // Otherwise, this .endmacro is a stray entry in the file; well formed
4473 // .endmacro directives are handled during the macro definition parsing.
4474 return TokError("unexpected '" + Directive + "' in file, "
4475 "no current macro definition");
4476 }
4477
4478 /// parseDirectivePurgeMacro
4479 /// ::= purge identifier ( , identifier )*
parseDirectivePurgeMacro(SMLoc DirectiveLoc)4480 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
4481 StringRef Name;
4482 while (true) {
4483 SMLoc NameLoc;
4484 if (parseTokenLoc(NameLoc) ||
4485 check(parseIdentifier(Name), NameLoc,
4486 "expected identifier in 'purge' directive"))
4487 return true;
4488
4489 DEBUG_WITH_TYPE("asm-macros", dbgs()
4490 << "Un-defining macro: " << Name << "\n");
4491 if (!getContext().lookupMacro(Name.lower()))
4492 return Error(NameLoc, "macro '" + Name + "' is not defined");
4493 getContext().undefineMacro(Name.lower());
4494
4495 if (!parseOptionalToken(AsmToken::Comma))
4496 break;
4497 parseOptionalToken(AsmToken::EndOfStatement);
4498 }
4499
4500 return false;
4501 }
4502
parseDirectiveExtern()4503 bool MasmParser::parseDirectiveExtern() {
4504 // .extern is the default - but we still need to take any provided type info.
4505 auto parseOp = [&]() -> bool {
4506 MCSymbol *Sym;
4507 SMLoc NameLoc = getTok().getLoc();
4508 if (parseSymbol(Sym))
4509 return Error(NameLoc, "expected name");
4510 if (parseToken(AsmToken::Colon))
4511 return true;
4512
4513 StringRef TypeName;
4514 SMLoc TypeLoc = getTok().getLoc();
4515 if (parseIdentifier(TypeName))
4516 return Error(TypeLoc, "expected type");
4517 if (!TypeName.equals_insensitive("proc")) {
4518 AsmTypeInfo Type;
4519 if (lookUpType(TypeName, Type))
4520 return Error(TypeLoc, "unrecognized type");
4521 KnownType[Sym->getName().lower()] = Type;
4522 }
4523
4524 Sym->setExternal(true);
4525 getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
4526
4527 return false;
4528 };
4529
4530 if (parseMany(parseOp))
4531 return addErrorSuffix(" in directive 'extern'");
4532 return false;
4533 }
4534
4535 /// parseDirectiveSymbolAttribute
4536 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
parseDirectiveSymbolAttribute(MCSymbolAttr Attr)4537 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
4538 auto parseOp = [&]() -> bool {
4539 SMLoc Loc = getTok().getLoc();
4540 MCSymbol *Sym;
4541 if (parseSymbol(Sym))
4542 return Error(Loc, "expected identifier");
4543
4544 // Assembler local symbols don't make any sense here. Complain loudly.
4545 if (Sym->isTemporary())
4546 return Error(Loc, "non-local symbol required");
4547
4548 if (!getStreamer().emitSymbolAttribute(Sym, Attr))
4549 return Error(Loc, "unable to emit symbol attribute");
4550 return false;
4551 };
4552
4553 if (parseMany(parseOp))
4554 return addErrorSuffix(" in directive");
4555 return false;
4556 }
4557
4558 /// parseDirectiveComm
4559 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
parseDirectiveComm(bool IsLocal)4560 bool MasmParser::parseDirectiveComm(bool IsLocal) {
4561 if (checkForValidSection())
4562 return true;
4563
4564 SMLoc IDLoc = getLexer().getLoc();
4565 MCSymbol *Sym;
4566 if (parseSymbol(Sym))
4567 return TokError("expected identifier in directive");
4568
4569 if (getLexer().isNot(AsmToken::Comma))
4570 return TokError("unexpected token in directive");
4571 Lex();
4572
4573 int64_t Size;
4574 SMLoc SizeLoc = getLexer().getLoc();
4575 if (parseAbsoluteExpression(Size))
4576 return true;
4577
4578 int64_t Pow2Alignment = 0;
4579 SMLoc Pow2AlignmentLoc;
4580 if (getLexer().is(AsmToken::Comma)) {
4581 Lex();
4582 Pow2AlignmentLoc = getLexer().getLoc();
4583 if (parseAbsoluteExpression(Pow2Alignment))
4584 return true;
4585
4586 LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
4587 if (IsLocal && LCOMM == LCOMM::NoAlignment)
4588 return Error(Pow2AlignmentLoc, "alignment not supported on this target");
4589
4590 // If this target takes alignments in bytes (not log) validate and convert.
4591 if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
4592 (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
4593 if (!isPowerOf2_64(Pow2Alignment))
4594 return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
4595 Pow2Alignment = Log2_64(Pow2Alignment);
4596 }
4597 }
4598
4599 if (parseEOL())
4600 return true;
4601
4602 // NOTE: a size of zero for a .comm should create a undefined symbol
4603 // but a size of .lcomm creates a bss symbol of size zero.
4604 if (Size < 0)
4605 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
4606 "be less than zero");
4607
4608 // NOTE: The alignment in the directive is a power of 2 value, the assembler
4609 // may internally end up wanting an alignment in bytes.
4610 // FIXME: Diagnose overflow.
4611 if (Pow2Alignment < 0)
4612 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
4613 "alignment, can't be less than zero");
4614
4615 Sym->redefineIfPossible();
4616 if (!Sym->isUndefined())
4617 return Error(IDLoc, "invalid symbol redefinition");
4618
4619 // Create the Symbol as a common or local common with Size and Pow2Alignment.
4620 if (IsLocal) {
4621 getStreamer().emitLocalCommonSymbol(Sym, Size,
4622 Align(1ULL << Pow2Alignment));
4623 return false;
4624 }
4625
4626 getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment));
4627 return false;
4628 }
4629
4630 /// parseDirectiveComment
4631 /// ::= comment delimiter [[text]]
4632 /// [[text]]
4633 /// [[text]] delimiter [[text]]
parseDirectiveComment(SMLoc DirectiveLoc)4634 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
4635 std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
4636 size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
4637 assert(DelimiterEnd != std::string::npos);
4638 StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
4639 if (Delimiter.empty())
4640 return Error(DirectiveLoc, "no delimiter in 'comment' directive");
4641 do {
4642 if (getTok().is(AsmToken::Eof))
4643 return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
4644 Lex(); // eat end of statement
4645 } while (
4646 !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
4647 return parseEOL();
4648 }
4649
4650 /// parseDirectiveInclude
4651 /// ::= include <filename>
4652 /// | include filename
parseDirectiveInclude()4653 bool MasmParser::parseDirectiveInclude() {
4654 // Allow the strings to have escaped octal character sequence.
4655 std::string Filename;
4656 SMLoc IncludeLoc = getTok().getLoc();
4657
4658 if (parseAngleBracketString(Filename))
4659 Filename = parseStringTo(AsmToken::EndOfStatement);
4660 if (check(Filename.empty(), "missing filename in 'include' directive") ||
4661 check(getTok().isNot(AsmToken::EndOfStatement),
4662 "unexpected token in 'include' directive") ||
4663 // Attempt to switch the lexer to the included file before consuming the
4664 // end of statement to avoid losing it when we switch.
4665 check(enterIncludeFile(Filename), IncludeLoc,
4666 "Could not find include file '" + Filename + "'"))
4667 return true;
4668
4669 return false;
4670 }
4671
4672 /// parseDirectiveIf
4673 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
parseDirectiveIf(SMLoc DirectiveLoc,DirectiveKind DirKind)4674 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
4675 TheCondStack.push_back(TheCondState);
4676 TheCondState.TheCond = AsmCond::IfCond;
4677 if (TheCondState.Ignore) {
4678 eatToEndOfStatement();
4679 } else {
4680 int64_t ExprValue;
4681 if (parseAbsoluteExpression(ExprValue) || parseEOL())
4682 return true;
4683
4684 switch (DirKind) {
4685 default:
4686 llvm_unreachable("unsupported directive");
4687 case DK_IF:
4688 break;
4689 case DK_IFE:
4690 ExprValue = ExprValue == 0;
4691 break;
4692 }
4693
4694 TheCondState.CondMet = ExprValue;
4695 TheCondState.Ignore = !TheCondState.CondMet;
4696 }
4697
4698 return false;
4699 }
4700
4701 /// parseDirectiveIfb
4702 /// ::= .ifb textitem
parseDirectiveIfb(SMLoc DirectiveLoc,bool ExpectBlank)4703 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
4704 TheCondStack.push_back(TheCondState);
4705 TheCondState.TheCond = AsmCond::IfCond;
4706
4707 if (TheCondState.Ignore) {
4708 eatToEndOfStatement();
4709 } else {
4710 std::string Str;
4711 if (parseTextItem(Str))
4712 return TokError("expected text item parameter for 'ifb' directive");
4713
4714 if (parseEOL())
4715 return true;
4716
4717 TheCondState.CondMet = ExpectBlank == Str.empty();
4718 TheCondState.Ignore = !TheCondState.CondMet;
4719 }
4720
4721 return false;
4722 }
4723
4724 /// parseDirectiveIfidn
4725 /// ::= ifidn textitem, textitem
parseDirectiveIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)4726 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
4727 bool CaseInsensitive) {
4728 std::string String1, String2;
4729
4730 if (parseTextItem(String1)) {
4731 if (ExpectEqual)
4732 return TokError("expected text item parameter for 'ifidn' directive");
4733 return TokError("expected text item parameter for 'ifdif' directive");
4734 }
4735
4736 if (Lexer.isNot(AsmToken::Comma)) {
4737 if (ExpectEqual)
4738 return TokError(
4739 "expected comma after first string for 'ifidn' directive");
4740 return TokError("expected comma after first string for 'ifdif' directive");
4741 }
4742 Lex();
4743
4744 if (parseTextItem(String2)) {
4745 if (ExpectEqual)
4746 return TokError("expected text item parameter for 'ifidn' directive");
4747 return TokError("expected text item parameter for 'ifdif' directive");
4748 }
4749
4750 TheCondStack.push_back(TheCondState);
4751 TheCondState.TheCond = AsmCond::IfCond;
4752 if (CaseInsensitive)
4753 TheCondState.CondMet =
4754 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
4755 else
4756 TheCondState.CondMet = ExpectEqual == (String1 == String2);
4757 TheCondState.Ignore = !TheCondState.CondMet;
4758
4759 return false;
4760 }
4761
4762 /// parseDirectiveIfdef
4763 /// ::= ifdef symbol
4764 /// | ifdef variable
parseDirectiveIfdef(SMLoc DirectiveLoc,bool expect_defined)4765 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
4766 TheCondStack.push_back(TheCondState);
4767 TheCondState.TheCond = AsmCond::IfCond;
4768
4769 if (TheCondState.Ignore) {
4770 eatToEndOfStatement();
4771 } else {
4772 bool is_defined = false;
4773 MCRegister Reg;
4774 SMLoc StartLoc, EndLoc;
4775 is_defined =
4776 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
4777 if (!is_defined) {
4778 StringRef Name;
4779 if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
4780 parseEOL())
4781 return true;
4782
4783 if (BuiltinSymbolMap.contains(Name.lower())) {
4784 is_defined = true;
4785 } else if (Variables.contains(Name.lower())) {
4786 is_defined = true;
4787 } else {
4788 MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
4789 is_defined = (Sym && !Sym->isUndefined());
4790 }
4791 }
4792
4793 TheCondState.CondMet = (is_defined == expect_defined);
4794 TheCondState.Ignore = !TheCondState.CondMet;
4795 }
4796
4797 return false;
4798 }
4799
4800 /// parseDirectiveElseIf
4801 /// ::= elseif expression
parseDirectiveElseIf(SMLoc DirectiveLoc,DirectiveKind DirKind)4802 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
4803 DirectiveKind DirKind) {
4804 if (TheCondState.TheCond != AsmCond::IfCond &&
4805 TheCondState.TheCond != AsmCond::ElseIfCond)
4806 return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
4807 " .if or an .elseif");
4808 TheCondState.TheCond = AsmCond::ElseIfCond;
4809
4810 bool LastIgnoreState = false;
4811 if (!TheCondStack.empty())
4812 LastIgnoreState = TheCondStack.back().Ignore;
4813 if (LastIgnoreState || TheCondState.CondMet) {
4814 TheCondState.Ignore = true;
4815 eatToEndOfStatement();
4816 } else {
4817 int64_t ExprValue;
4818 if (parseAbsoluteExpression(ExprValue))
4819 return true;
4820
4821 if (parseEOL())
4822 return true;
4823
4824 switch (DirKind) {
4825 default:
4826 llvm_unreachable("unsupported directive");
4827 case DK_ELSEIF:
4828 break;
4829 case DK_ELSEIFE:
4830 ExprValue = ExprValue == 0;
4831 break;
4832 }
4833
4834 TheCondState.CondMet = ExprValue;
4835 TheCondState.Ignore = !TheCondState.CondMet;
4836 }
4837
4838 return false;
4839 }
4840
4841 /// parseDirectiveElseIfb
4842 /// ::= elseifb textitem
parseDirectiveElseIfb(SMLoc DirectiveLoc,bool ExpectBlank)4843 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
4844 if (TheCondState.TheCond != AsmCond::IfCond &&
4845 TheCondState.TheCond != AsmCond::ElseIfCond)
4846 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
4847 " if or an elseif");
4848 TheCondState.TheCond = AsmCond::ElseIfCond;
4849
4850 bool LastIgnoreState = false;
4851 if (!TheCondStack.empty())
4852 LastIgnoreState = TheCondStack.back().Ignore;
4853 if (LastIgnoreState || TheCondState.CondMet) {
4854 TheCondState.Ignore = true;
4855 eatToEndOfStatement();
4856 } else {
4857 std::string Str;
4858 if (parseTextItem(Str)) {
4859 if (ExpectBlank)
4860 return TokError("expected text item parameter for 'elseifb' directive");
4861 return TokError("expected text item parameter for 'elseifnb' directive");
4862 }
4863
4864 if (parseEOL())
4865 return true;
4866
4867 TheCondState.CondMet = ExpectBlank == Str.empty();
4868 TheCondState.Ignore = !TheCondState.CondMet;
4869 }
4870
4871 return false;
4872 }
4873
4874 /// parseDirectiveElseIfdef
4875 /// ::= elseifdef symbol
4876 /// | elseifdef variable
parseDirectiveElseIfdef(SMLoc DirectiveLoc,bool expect_defined)4877 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
4878 bool expect_defined) {
4879 if (TheCondState.TheCond != AsmCond::IfCond &&
4880 TheCondState.TheCond != AsmCond::ElseIfCond)
4881 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
4882 " if or an elseif");
4883 TheCondState.TheCond = AsmCond::ElseIfCond;
4884
4885 bool LastIgnoreState = false;
4886 if (!TheCondStack.empty())
4887 LastIgnoreState = TheCondStack.back().Ignore;
4888 if (LastIgnoreState || TheCondState.CondMet) {
4889 TheCondState.Ignore = true;
4890 eatToEndOfStatement();
4891 } else {
4892 bool is_defined = false;
4893 MCRegister Reg;
4894 SMLoc StartLoc, EndLoc;
4895 is_defined =
4896 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
4897 if (!is_defined) {
4898 StringRef Name;
4899 if (check(parseIdentifier(Name),
4900 "expected identifier after 'elseifdef'") ||
4901 parseEOL())
4902 return true;
4903
4904 if (BuiltinSymbolMap.contains(Name.lower())) {
4905 is_defined = true;
4906 } else if (Variables.contains(Name.lower())) {
4907 is_defined = true;
4908 } else {
4909 MCSymbol *Sym = getContext().lookupSymbol(Name);
4910 is_defined = (Sym && !Sym->isUndefined());
4911 }
4912 }
4913
4914 TheCondState.CondMet = (is_defined == expect_defined);
4915 TheCondState.Ignore = !TheCondState.CondMet;
4916 }
4917
4918 return false;
4919 }
4920
4921 /// parseDirectiveElseIfidn
4922 /// ::= elseifidn textitem, textitem
parseDirectiveElseIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)4923 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
4924 bool CaseInsensitive) {
4925 if (TheCondState.TheCond != AsmCond::IfCond &&
4926 TheCondState.TheCond != AsmCond::ElseIfCond)
4927 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
4928 " if or an elseif");
4929 TheCondState.TheCond = AsmCond::ElseIfCond;
4930
4931 bool LastIgnoreState = false;
4932 if (!TheCondStack.empty())
4933 LastIgnoreState = TheCondStack.back().Ignore;
4934 if (LastIgnoreState || TheCondState.CondMet) {
4935 TheCondState.Ignore = true;
4936 eatToEndOfStatement();
4937 } else {
4938 std::string String1, String2;
4939
4940 if (parseTextItem(String1)) {
4941 if (ExpectEqual)
4942 return TokError(
4943 "expected text item parameter for 'elseifidn' directive");
4944 return TokError("expected text item parameter for 'elseifdif' directive");
4945 }
4946
4947 if (Lexer.isNot(AsmToken::Comma)) {
4948 if (ExpectEqual)
4949 return TokError(
4950 "expected comma after first string for 'elseifidn' directive");
4951 return TokError(
4952 "expected comma after first string for 'elseifdif' directive");
4953 }
4954 Lex();
4955
4956 if (parseTextItem(String2)) {
4957 if (ExpectEqual)
4958 return TokError(
4959 "expected text item parameter for 'elseifidn' directive");
4960 return TokError("expected text item parameter for 'elseifdif' directive");
4961 }
4962
4963 if (CaseInsensitive)
4964 TheCondState.CondMet =
4965 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
4966 else
4967 TheCondState.CondMet = ExpectEqual == (String1 == String2);
4968 TheCondState.Ignore = !TheCondState.CondMet;
4969 }
4970
4971 return false;
4972 }
4973
4974 /// parseDirectiveElse
4975 /// ::= else
parseDirectiveElse(SMLoc DirectiveLoc)4976 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
4977 if (parseEOL())
4978 return true;
4979
4980 if (TheCondState.TheCond != AsmCond::IfCond &&
4981 TheCondState.TheCond != AsmCond::ElseIfCond)
4982 return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
4983 " or an elseif");
4984 TheCondState.TheCond = AsmCond::ElseCond;
4985 bool LastIgnoreState = false;
4986 if (!TheCondStack.empty())
4987 LastIgnoreState = TheCondStack.back().Ignore;
4988 if (LastIgnoreState || TheCondState.CondMet)
4989 TheCondState.Ignore = true;
4990 else
4991 TheCondState.Ignore = false;
4992
4993 return false;
4994 }
4995
4996 /// parseDirectiveEnd
4997 /// ::= end
parseDirectiveEnd(SMLoc DirectiveLoc)4998 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
4999 if (parseEOL())
5000 return true;
5001
5002 while (Lexer.isNot(AsmToken::Eof))
5003 Lexer.Lex();
5004
5005 return false;
5006 }
5007
5008 /// parseDirectiveError
5009 /// ::= .err [message]
parseDirectiveError(SMLoc DirectiveLoc)5010 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
5011 if (!TheCondStack.empty()) {
5012 if (TheCondStack.back().Ignore) {
5013 eatToEndOfStatement();
5014 return false;
5015 }
5016 }
5017
5018 std::string Message = ".err directive invoked in source file";
5019 if (Lexer.isNot(AsmToken::EndOfStatement))
5020 Message = parseStringTo(AsmToken::EndOfStatement);
5021 Lex();
5022
5023 return Error(DirectiveLoc, Message);
5024 }
5025
5026 /// parseDirectiveErrorIfb
5027 /// ::= .errb textitem[, message]
parseDirectiveErrorIfb(SMLoc DirectiveLoc,bool ExpectBlank)5028 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
5029 if (!TheCondStack.empty()) {
5030 if (TheCondStack.back().Ignore) {
5031 eatToEndOfStatement();
5032 return false;
5033 }
5034 }
5035
5036 std::string Text;
5037 if (parseTextItem(Text))
5038 return Error(getTok().getLoc(), "missing text item in '.errb' directive");
5039
5040 std::string Message = ".errb directive invoked in source file";
5041 if (Lexer.isNot(AsmToken::EndOfStatement)) {
5042 if (parseToken(AsmToken::Comma))
5043 return addErrorSuffix(" in '.errb' directive");
5044 Message = parseStringTo(AsmToken::EndOfStatement);
5045 }
5046 Lex();
5047
5048 if (Text.empty() == ExpectBlank)
5049 return Error(DirectiveLoc, Message);
5050 return false;
5051 }
5052
5053 /// parseDirectiveErrorIfdef
5054 /// ::= .errdef name[, message]
parseDirectiveErrorIfdef(SMLoc DirectiveLoc,bool ExpectDefined)5055 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
5056 bool ExpectDefined) {
5057 if (!TheCondStack.empty()) {
5058 if (TheCondStack.back().Ignore) {
5059 eatToEndOfStatement();
5060 return false;
5061 }
5062 }
5063
5064 bool IsDefined = false;
5065 MCRegister Reg;
5066 SMLoc StartLoc, EndLoc;
5067 IsDefined =
5068 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
5069 if (!IsDefined) {
5070 StringRef Name;
5071 if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
5072 return true;
5073
5074 if (BuiltinSymbolMap.contains(Name.lower())) {
5075 IsDefined = true;
5076 } else if (Variables.contains(Name.lower())) {
5077 IsDefined = true;
5078 } else {
5079 MCSymbol *Sym = getContext().lookupSymbol(Name);
5080 IsDefined = (Sym && !Sym->isUndefined());
5081 }
5082 }
5083
5084 std::string Message = ".errdef directive invoked in source file";
5085 if (Lexer.isNot(AsmToken::EndOfStatement)) {
5086 if (parseToken(AsmToken::Comma))
5087 return addErrorSuffix(" in '.errdef' directive");
5088 Message = parseStringTo(AsmToken::EndOfStatement);
5089 }
5090 Lex();
5091
5092 if (IsDefined == ExpectDefined)
5093 return Error(DirectiveLoc, Message);
5094 return false;
5095 }
5096
5097 /// parseDirectiveErrorIfidn
5098 /// ::= .erridn textitem, textitem[, message]
parseDirectiveErrorIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)5099 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
5100 bool CaseInsensitive) {
5101 if (!TheCondStack.empty()) {
5102 if (TheCondStack.back().Ignore) {
5103 eatToEndOfStatement();
5104 return false;
5105 }
5106 }
5107
5108 std::string String1, String2;
5109
5110 if (parseTextItem(String1)) {
5111 if (ExpectEqual)
5112 return TokError("expected string parameter for '.erridn' directive");
5113 return TokError("expected string parameter for '.errdif' directive");
5114 }
5115
5116 if (Lexer.isNot(AsmToken::Comma)) {
5117 if (ExpectEqual)
5118 return TokError(
5119 "expected comma after first string for '.erridn' directive");
5120 return TokError(
5121 "expected comma after first string for '.errdif' directive");
5122 }
5123 Lex();
5124
5125 if (parseTextItem(String2)) {
5126 if (ExpectEqual)
5127 return TokError("expected string parameter for '.erridn' directive");
5128 return TokError("expected string parameter for '.errdif' directive");
5129 }
5130
5131 std::string Message;
5132 if (ExpectEqual)
5133 Message = ".erridn directive invoked in source file";
5134 else
5135 Message = ".errdif directive invoked in source file";
5136 if (Lexer.isNot(AsmToken::EndOfStatement)) {
5137 if (parseToken(AsmToken::Comma))
5138 return addErrorSuffix(" in '.erridn' directive");
5139 Message = parseStringTo(AsmToken::EndOfStatement);
5140 }
5141 Lex();
5142
5143 if (CaseInsensitive)
5144 TheCondState.CondMet =
5145 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
5146 else
5147 TheCondState.CondMet = ExpectEqual == (String1 == String2);
5148 TheCondState.Ignore = !TheCondState.CondMet;
5149
5150 if ((CaseInsensitive &&
5151 ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
5152 (ExpectEqual == (String1 == String2)))
5153 return Error(DirectiveLoc, Message);
5154 return false;
5155 }
5156
5157 /// parseDirectiveErrorIfe
5158 /// ::= .erre expression[, message]
parseDirectiveErrorIfe(SMLoc DirectiveLoc,bool ExpectZero)5159 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
5160 if (!TheCondStack.empty()) {
5161 if (TheCondStack.back().Ignore) {
5162 eatToEndOfStatement();
5163 return false;
5164 }
5165 }
5166
5167 int64_t ExprValue;
5168 if (parseAbsoluteExpression(ExprValue))
5169 return addErrorSuffix(" in '.erre' directive");
5170
5171 std::string Message = ".erre directive invoked in source file";
5172 if (Lexer.isNot(AsmToken::EndOfStatement)) {
5173 if (parseToken(AsmToken::Comma))
5174 return addErrorSuffix(" in '.erre' directive");
5175 Message = parseStringTo(AsmToken::EndOfStatement);
5176 }
5177 Lex();
5178
5179 if ((ExprValue == 0) == ExpectZero)
5180 return Error(DirectiveLoc, Message);
5181 return false;
5182 }
5183
5184 /// parseDirectiveEndIf
5185 /// ::= .endif
parseDirectiveEndIf(SMLoc DirectiveLoc)5186 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
5187 if (parseEOL())
5188 return true;
5189
5190 if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
5191 return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
5192 "an .if or .else");
5193 if (!TheCondStack.empty()) {
5194 TheCondState = TheCondStack.back();
5195 TheCondStack.pop_back();
5196 }
5197
5198 return false;
5199 }
5200
initializeDirectiveKindMap()5201 void MasmParser::initializeDirectiveKindMap() {
5202 DirectiveKindMap["="] = DK_ASSIGN;
5203 DirectiveKindMap["equ"] = DK_EQU;
5204 DirectiveKindMap["textequ"] = DK_TEXTEQU;
5205 // DirectiveKindMap[".ascii"] = DK_ASCII;
5206 // DirectiveKindMap[".asciz"] = DK_ASCIZ;
5207 // DirectiveKindMap[".string"] = DK_STRING;
5208 DirectiveKindMap["byte"] = DK_BYTE;
5209 DirectiveKindMap["sbyte"] = DK_SBYTE;
5210 DirectiveKindMap["word"] = DK_WORD;
5211 DirectiveKindMap["sword"] = DK_SWORD;
5212 DirectiveKindMap["dword"] = DK_DWORD;
5213 DirectiveKindMap["sdword"] = DK_SDWORD;
5214 DirectiveKindMap["fword"] = DK_FWORD;
5215 DirectiveKindMap["qword"] = DK_QWORD;
5216 DirectiveKindMap["sqword"] = DK_SQWORD;
5217 DirectiveKindMap["real4"] = DK_REAL4;
5218 DirectiveKindMap["real8"] = DK_REAL8;
5219 DirectiveKindMap["real10"] = DK_REAL10;
5220 DirectiveKindMap["align"] = DK_ALIGN;
5221 DirectiveKindMap["even"] = DK_EVEN;
5222 DirectiveKindMap["org"] = DK_ORG;
5223 DirectiveKindMap["extern"] = DK_EXTERN;
5224 DirectiveKindMap["extrn"] = DK_EXTERN;
5225 DirectiveKindMap["public"] = DK_PUBLIC;
5226 // DirectiveKindMap[".comm"] = DK_COMM;
5227 DirectiveKindMap["comment"] = DK_COMMENT;
5228 DirectiveKindMap["include"] = DK_INCLUDE;
5229 DirectiveKindMap["repeat"] = DK_REPEAT;
5230 DirectiveKindMap["rept"] = DK_REPEAT;
5231 DirectiveKindMap["while"] = DK_WHILE;
5232 DirectiveKindMap["for"] = DK_FOR;
5233 DirectiveKindMap["irp"] = DK_FOR;
5234 DirectiveKindMap["forc"] = DK_FORC;
5235 DirectiveKindMap["irpc"] = DK_FORC;
5236 DirectiveKindMap["if"] = DK_IF;
5237 DirectiveKindMap["ife"] = DK_IFE;
5238 DirectiveKindMap["ifb"] = DK_IFB;
5239 DirectiveKindMap["ifnb"] = DK_IFNB;
5240 DirectiveKindMap["ifdef"] = DK_IFDEF;
5241 DirectiveKindMap["ifndef"] = DK_IFNDEF;
5242 DirectiveKindMap["ifdif"] = DK_IFDIF;
5243 DirectiveKindMap["ifdifi"] = DK_IFDIFI;
5244 DirectiveKindMap["ifidn"] = DK_IFIDN;
5245 DirectiveKindMap["ifidni"] = DK_IFIDNI;
5246 DirectiveKindMap["elseif"] = DK_ELSEIF;
5247 DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
5248 DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
5249 DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
5250 DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
5251 DirectiveKindMap["else"] = DK_ELSE;
5252 DirectiveKindMap["end"] = DK_END;
5253 DirectiveKindMap["endif"] = DK_ENDIF;
5254 // DirectiveKindMap[".file"] = DK_FILE;
5255 // DirectiveKindMap[".line"] = DK_LINE;
5256 // DirectiveKindMap[".loc"] = DK_LOC;
5257 // DirectiveKindMap[".stabs"] = DK_STABS;
5258 // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
5259 // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
5260 // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
5261 // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
5262 // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
5263 // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
5264 // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
5265 // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
5266 // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
5267 // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
5268 // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
5269 // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
5270 // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
5271 // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
5272 // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
5273 // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
5274 // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
5275 // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
5276 // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
5277 // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
5278 // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
5279 // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
5280 // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
5281 // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
5282 // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
5283 // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
5284 // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
5285 // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
5286 // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
5287 // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
5288 // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
5289 // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
5290 // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
5291 // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
5292 // DirectiveKindMap[".cfi_val_offset"] = DK_CFI_VAL_OFFSET;
5293 DirectiveKindMap["macro"] = DK_MACRO;
5294 DirectiveKindMap["exitm"] = DK_EXITM;
5295 DirectiveKindMap["endm"] = DK_ENDM;
5296 DirectiveKindMap["purge"] = DK_PURGE;
5297 DirectiveKindMap[".err"] = DK_ERR;
5298 DirectiveKindMap[".errb"] = DK_ERRB;
5299 DirectiveKindMap[".errnb"] = DK_ERRNB;
5300 DirectiveKindMap[".errdef"] = DK_ERRDEF;
5301 DirectiveKindMap[".errndef"] = DK_ERRNDEF;
5302 DirectiveKindMap[".errdif"] = DK_ERRDIF;
5303 DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
5304 DirectiveKindMap[".erridn"] = DK_ERRIDN;
5305 DirectiveKindMap[".erridni"] = DK_ERRIDNI;
5306 DirectiveKindMap[".erre"] = DK_ERRE;
5307 DirectiveKindMap[".errnz"] = DK_ERRNZ;
5308 DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
5309 DirectiveKindMap[".pushreg"] = DK_PUSHREG;
5310 DirectiveKindMap[".savereg"] = DK_SAVEREG;
5311 DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
5312 DirectiveKindMap[".setframe"] = DK_SETFRAME;
5313 DirectiveKindMap[".radix"] = DK_RADIX;
5314 DirectiveKindMap["db"] = DK_DB;
5315 DirectiveKindMap["dd"] = DK_DD;
5316 DirectiveKindMap["df"] = DK_DF;
5317 DirectiveKindMap["dq"] = DK_DQ;
5318 DirectiveKindMap["dw"] = DK_DW;
5319 DirectiveKindMap["echo"] = DK_ECHO;
5320 DirectiveKindMap["struc"] = DK_STRUCT;
5321 DirectiveKindMap["struct"] = DK_STRUCT;
5322 DirectiveKindMap["union"] = DK_UNION;
5323 DirectiveKindMap["ends"] = DK_ENDS;
5324 }
5325
isMacroLikeDirective()5326 bool MasmParser::isMacroLikeDirective() {
5327 if (getLexer().is(AsmToken::Identifier)) {
5328 bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
5329 .CasesLower("repeat", "rept", true)
5330 .CaseLower("while", true)
5331 .CasesLower("for", "irp", true)
5332 .CasesLower("forc", "irpc", true)
5333 .Default(false);
5334 if (IsMacroLike)
5335 return true;
5336 }
5337 if (peekTok().is(AsmToken::Identifier) &&
5338 peekTok().getIdentifier().equals_insensitive("macro"))
5339 return true;
5340
5341 return false;
5342 }
5343
parseMacroLikeBody(SMLoc DirectiveLoc)5344 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
5345 AsmToken EndToken, StartToken = getTok();
5346
5347 unsigned NestLevel = 0;
5348 while (true) {
5349 // Check whether we have reached the end of the file.
5350 if (getLexer().is(AsmToken::Eof)) {
5351 printError(DirectiveLoc, "no matching 'endm' in definition");
5352 return nullptr;
5353 }
5354
5355 if (isMacroLikeDirective())
5356 ++NestLevel;
5357
5358 // Otherwise, check whether we have reached the endm.
5359 if (Lexer.is(AsmToken::Identifier) &&
5360 getTok().getIdentifier().equals_insensitive("endm")) {
5361 if (NestLevel == 0) {
5362 EndToken = getTok();
5363 Lex();
5364 if (Lexer.isNot(AsmToken::EndOfStatement)) {
5365 printError(getTok().getLoc(), "unexpected token in 'endm' directive");
5366 return nullptr;
5367 }
5368 break;
5369 }
5370 --NestLevel;
5371 }
5372
5373 // Otherwise, scan till the end of the statement.
5374 eatToEndOfStatement();
5375 }
5376
5377 const char *BodyStart = StartToken.getLoc().getPointer();
5378 const char *BodyEnd = EndToken.getLoc().getPointer();
5379 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5380
5381 // We Are Anonymous.
5382 MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
5383 return &MacroLikeBodies.back();
5384 }
5385
expandStatement(SMLoc Loc)5386 bool MasmParser::expandStatement(SMLoc Loc) {
5387 std::string Body = parseStringTo(AsmToken::EndOfStatement);
5388 SMLoc EndLoc = getTok().getLoc();
5389
5390 MCAsmMacroParameters Parameters;
5391 MCAsmMacroArguments Arguments;
5392
5393 StringMap<std::string> BuiltinValues;
5394 for (const auto &S : BuiltinSymbolMap) {
5395 const BuiltinSymbol &Sym = S.getValue();
5396 if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
5397 BuiltinValues[S.getKey().lower()] = std::move(*Text);
5398 }
5399 }
5400 for (const auto &B : BuiltinValues) {
5401 MCAsmMacroParameter P;
5402 MCAsmMacroArgument A;
5403 P.Name = B.getKey();
5404 P.Required = true;
5405 A.push_back(AsmToken(AsmToken::String, B.getValue()));
5406
5407 Parameters.push_back(std::move(P));
5408 Arguments.push_back(std::move(A));
5409 }
5410
5411 for (const auto &V : Variables) {
5412 const Variable &Var = V.getValue();
5413 if (Var.IsText) {
5414 MCAsmMacroParameter P;
5415 MCAsmMacroArgument A;
5416 P.Name = Var.Name;
5417 P.Required = true;
5418 A.push_back(AsmToken(AsmToken::String, Var.TextValue));
5419
5420 Parameters.push_back(std::move(P));
5421 Arguments.push_back(std::move(A));
5422 }
5423 }
5424 MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
5425 MCAsmMacro M = MacroLikeBodies.back();
5426
5427 // Expand the statement in a new buffer.
5428 SmallString<80> Buf;
5429 raw_svector_ostream OS(Buf);
5430 if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
5431 return true;
5432 std::unique_ptr<MemoryBuffer> Expansion =
5433 MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
5434
5435 // Jump to the expanded statement and prime the lexer.
5436 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
5437 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
5438 EndStatementAtEOFStack.push_back(false);
5439 Lex();
5440 return false;
5441 }
5442
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,raw_svector_ostream & OS)5443 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
5444 raw_svector_ostream &OS) {
5445 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
5446 }
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,SMLoc ExitLoc,raw_svector_ostream & OS)5447 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
5448 SMLoc ExitLoc,
5449 raw_svector_ostream &OS) {
5450 OS << "endm\n";
5451
5452 std::unique_ptr<MemoryBuffer> Instantiation =
5453 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
5454
5455 // Create the macro instantiation object and add to the current macro
5456 // instantiation stack.
5457 MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
5458 ExitLoc, TheCondStack.size()};
5459 ActiveMacros.push_back(MI);
5460
5461 // Jump to the macro instantiation and prime the lexer.
5462 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
5463 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
5464 EndStatementAtEOFStack.push_back(true);
5465 Lex();
5466 }
5467
5468 /// parseDirectiveRepeat
5469 /// ::= ("repeat" | "rept") count
5470 /// body
5471 /// endm
parseDirectiveRepeat(SMLoc DirectiveLoc,StringRef Dir)5472 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
5473 const MCExpr *CountExpr;
5474 SMLoc CountLoc = getTok().getLoc();
5475 if (parseExpression(CountExpr))
5476 return true;
5477
5478 int64_t Count;
5479 if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
5480 return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
5481 }
5482
5483 if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
5484 return true;
5485
5486 // Lex the repeat definition.
5487 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5488 if (!M)
5489 return true;
5490
5491 // Macro instantiation is lexical, unfortunately. We construct a new buffer
5492 // to hold the macro body with substitutions.
5493 SmallString<256> Buf;
5494 raw_svector_ostream OS(Buf);
5495 while (Count--) {
5496 if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc()))
5497 return true;
5498 }
5499 instantiateMacroLikeBody(M, DirectiveLoc, OS);
5500
5501 return false;
5502 }
5503
5504 /// parseDirectiveWhile
5505 /// ::= "while" expression
5506 /// body
5507 /// endm
parseDirectiveWhile(SMLoc DirectiveLoc)5508 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
5509 const MCExpr *CondExpr;
5510 SMLoc CondLoc = getTok().getLoc();
5511 if (parseExpression(CondExpr))
5512 return true;
5513
5514 // Lex the repeat definition.
5515 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5516 if (!M)
5517 return true;
5518
5519 // Macro instantiation is lexical, unfortunately. We construct a new buffer
5520 // to hold the macro body with substitutions.
5521 SmallString<256> Buf;
5522 raw_svector_ostream OS(Buf);
5523 int64_t Condition;
5524 if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
5525 return Error(CondLoc, "expected absolute expression in 'while' directive");
5526 if (Condition) {
5527 // Instantiate the macro, then resume at this directive to recheck the
5528 // condition.
5529 if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc()))
5530 return true;
5531 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
5532 }
5533
5534 return false;
5535 }
5536
5537 /// parseDirectiveFor
5538 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
5539 /// body
5540 /// endm
parseDirectiveFor(SMLoc DirectiveLoc,StringRef Dir)5541 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
5542 MCAsmMacroParameter Parameter;
5543 MCAsmMacroArguments A;
5544 if (check(parseIdentifier(Parameter.Name),
5545 "expected identifier in '" + Dir + "' directive"))
5546 return true;
5547
5548 // Parse optional qualifier (default value, or "req")
5549 if (parseOptionalToken(AsmToken::Colon)) {
5550 if (parseOptionalToken(AsmToken::Equal)) {
5551 // Default value
5552 SMLoc ParamLoc;
5553
5554 ParamLoc = Lexer.getLoc();
5555 if (parseMacroArgument(nullptr, Parameter.Value))
5556 return true;
5557 } else {
5558 SMLoc QualLoc;
5559 StringRef Qualifier;
5560
5561 QualLoc = Lexer.getLoc();
5562 if (parseIdentifier(Qualifier))
5563 return Error(QualLoc, "missing parameter qualifier for "
5564 "'" +
5565 Parameter.Name + "' in '" + Dir +
5566 "' directive");
5567
5568 if (Qualifier.equals_insensitive("req"))
5569 Parameter.Required = true;
5570 else
5571 return Error(QualLoc,
5572 Qualifier + " is not a valid parameter qualifier for '" +
5573 Parameter.Name + "' in '" + Dir + "' directive");
5574 }
5575 }
5576
5577 if (parseToken(AsmToken::Comma,
5578 "expected comma in '" + Dir + "' directive") ||
5579 parseToken(AsmToken::Less,
5580 "values in '" + Dir +
5581 "' directive must be enclosed in angle brackets"))
5582 return true;
5583
5584 while (true) {
5585 A.emplace_back();
5586 if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
5587 return addErrorSuffix(" in arguments for '" + Dir + "' directive");
5588
5589 // If we see a comma, continue, and allow line continuation.
5590 if (!parseOptionalToken(AsmToken::Comma))
5591 break;
5592 parseOptionalToken(AsmToken::EndOfStatement);
5593 }
5594
5595 if (parseToken(AsmToken::Greater,
5596 "values in '" + Dir +
5597 "' directive must be enclosed in angle brackets") ||
5598 parseEOL())
5599 return true;
5600
5601 // Lex the for definition.
5602 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5603 if (!M)
5604 return true;
5605
5606 // Macro instantiation is lexical, unfortunately. We construct a new buffer
5607 // to hold the macro body with substitutions.
5608 SmallString<256> Buf;
5609 raw_svector_ostream OS(Buf);
5610
5611 for (const MCAsmMacroArgument &Arg : A) {
5612 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
5613 return true;
5614 }
5615
5616 instantiateMacroLikeBody(M, DirectiveLoc, OS);
5617
5618 return false;
5619 }
5620
5621 /// parseDirectiveForc
5622 /// ::= ("forc" | "irpc") symbol, <string>
5623 /// body
5624 /// endm
parseDirectiveForc(SMLoc DirectiveLoc,StringRef Directive)5625 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
5626 MCAsmMacroParameter Parameter;
5627
5628 std::string Argument;
5629 if (check(parseIdentifier(Parameter.Name),
5630 "expected identifier in '" + Directive + "' directive") ||
5631 parseToken(AsmToken::Comma,
5632 "expected comma in '" + Directive + "' directive"))
5633 return true;
5634 if (parseAngleBracketString(Argument)) {
5635 // Match ml64.exe; treat all characters to end of statement as a string,
5636 // ignoring comment markers, then discard anything following a space (using
5637 // the C locale).
5638 Argument = parseStringTo(AsmToken::EndOfStatement);
5639 if (getTok().is(AsmToken::EndOfStatement))
5640 Argument += getTok().getString();
5641 size_t End = 0;
5642 for (; End < Argument.size(); ++End) {
5643 if (isSpace(Argument[End]))
5644 break;
5645 }
5646 Argument.resize(End);
5647 }
5648 if (parseEOL())
5649 return true;
5650
5651 // Lex the irpc definition.
5652 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
5653 if (!M)
5654 return true;
5655
5656 // Macro instantiation is lexical, unfortunately. We construct a new buffer
5657 // to hold the macro body with substitutions.
5658 SmallString<256> Buf;
5659 raw_svector_ostream OS(Buf);
5660
5661 StringRef Values(Argument);
5662 for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
5663 MCAsmMacroArgument Arg;
5664 Arg.emplace_back(AsmToken::Identifier, Values.substr(I, 1));
5665
5666 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
5667 return true;
5668 }
5669
5670 instantiateMacroLikeBody(M, DirectiveLoc, OS);
5671
5672 return false;
5673 }
5674
parseDirectiveMSEmit(SMLoc IDLoc,ParseStatementInfo & Info,size_t Len)5675 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
5676 size_t Len) {
5677 const MCExpr *Value;
5678 SMLoc ExprLoc = getLexer().getLoc();
5679 if (parseExpression(Value))
5680 return true;
5681 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
5682 if (!MCE)
5683 return Error(ExprLoc, "unexpected expression in _emit");
5684 uint64_t IntValue = MCE->getValue();
5685 if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
5686 return Error(ExprLoc, "literal value out of range for directive");
5687
5688 Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
5689 return false;
5690 }
5691
parseDirectiveMSAlign(SMLoc IDLoc,ParseStatementInfo & Info)5692 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
5693 const MCExpr *Value;
5694 SMLoc ExprLoc = getLexer().getLoc();
5695 if (parseExpression(Value))
5696 return true;
5697 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
5698 if (!MCE)
5699 return Error(ExprLoc, "unexpected expression in align");
5700 uint64_t IntValue = MCE->getValue();
5701 if (!isPowerOf2_64(IntValue))
5702 return Error(ExprLoc, "literal value not a power of two greater then zero");
5703
5704 Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
5705 return false;
5706 }
5707
parseDirectiveRadix(SMLoc DirectiveLoc)5708 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
5709 const SMLoc Loc = getLexer().getLoc();
5710 std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
5711 StringRef RadixString = StringRef(RadixStringRaw).trim();
5712 unsigned Radix;
5713 if (RadixString.getAsInteger(10, Radix)) {
5714 return Error(Loc,
5715 "radix must be a decimal number in the range 2 to 16; was " +
5716 RadixString);
5717 }
5718 if (Radix < 2 || Radix > 16)
5719 return Error(Loc, "radix must be in the range 2 to 16; was " +
5720 std::to_string(Radix));
5721 getLexer().setMasmDefaultRadix(Radix);
5722 return false;
5723 }
5724
5725 /// parseDirectiveEcho
5726 /// ::= "echo" message
parseDirectiveEcho(SMLoc DirectiveLoc)5727 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
5728 std::string Message = parseStringTo(AsmToken::EndOfStatement);
5729 llvm::outs() << Message;
5730 if (!StringRef(Message).ends_with("\n"))
5731 llvm::outs() << '\n';
5732 return false;
5733 }
5734
5735 // We are comparing pointers, but the pointers are relative to a single string.
5736 // Thus, this should always be deterministic.
rewritesSort(const AsmRewrite * AsmRewriteA,const AsmRewrite * AsmRewriteB)5737 static int rewritesSort(const AsmRewrite *AsmRewriteA,
5738 const AsmRewrite *AsmRewriteB) {
5739 if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
5740 return -1;
5741 if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
5742 return 1;
5743
5744 // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
5745 // rewrite to the same location. Make sure the SizeDirective rewrite is
5746 // performed first, then the Imm/ImmPrefix and finally the Input/Output. This
5747 // ensures the sort algorithm is stable.
5748 if (AsmRewritePrecedence[AsmRewriteA->Kind] >
5749 AsmRewritePrecedence[AsmRewriteB->Kind])
5750 return -1;
5751
5752 if (AsmRewritePrecedence[AsmRewriteA->Kind] <
5753 AsmRewritePrecedence[AsmRewriteB->Kind])
5754 return 1;
5755 llvm_unreachable("Unstable rewrite sort.");
5756 }
5757
defineMacro(StringRef Name,StringRef Value)5758 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
5759 Variable &Var = Variables[Name.lower()];
5760 if (Var.Name.empty()) {
5761 Var.Name = Name;
5762 } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
5763 return Error(SMLoc(), "invalid variable redefinition");
5764 } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
5765 Warning(SMLoc(), "redefining '" + Name +
5766 "', already defined on the command line")) {
5767 return true;
5768 }
5769 Var.Redefinable = Variable::WARN_ON_REDEFINITION;
5770 Var.IsText = true;
5771 Var.TextValue = Value.str();
5772 return false;
5773 }
5774
lookUpField(StringRef Name,AsmFieldInfo & Info) const5775 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
5776 const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
5777 const StringRef Base = BaseMember.first, Member = BaseMember.second;
5778 return lookUpField(Base, Member, Info);
5779 }
5780
lookUpField(StringRef Base,StringRef Member,AsmFieldInfo & Info) const5781 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
5782 AsmFieldInfo &Info) const {
5783 if (Base.empty())
5784 return true;
5785
5786 AsmFieldInfo BaseInfo;
5787 if (Base.contains('.') && !lookUpField(Base, BaseInfo))
5788 Base = BaseInfo.Type.Name;
5789
5790 auto StructIt = Structs.find(Base.lower());
5791 auto TypeIt = KnownType.find(Base.lower());
5792 if (TypeIt != KnownType.end()) {
5793 StructIt = Structs.find(TypeIt->second.Name.lower());
5794 }
5795 if (StructIt != Structs.end())
5796 return lookUpField(StructIt->second, Member, Info);
5797
5798 return true;
5799 }
5800
lookUpField(const StructInfo & Structure,StringRef Member,AsmFieldInfo & Info) const5801 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
5802 AsmFieldInfo &Info) const {
5803 if (Member.empty()) {
5804 Info.Type.Name = Structure.Name;
5805 Info.Type.Size = Structure.Size;
5806 Info.Type.ElementSize = Structure.Size;
5807 Info.Type.Length = 1;
5808 return false;
5809 }
5810
5811 std::pair<StringRef, StringRef> Split = Member.split('.');
5812 const StringRef FieldName = Split.first, FieldMember = Split.second;
5813
5814 auto StructIt = Structs.find(FieldName.lower());
5815 if (StructIt != Structs.end())
5816 return lookUpField(StructIt->second, FieldMember, Info);
5817
5818 auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
5819 if (FieldIt == Structure.FieldsByName.end())
5820 return true;
5821
5822 const FieldInfo &Field = Structure.Fields[FieldIt->second];
5823 if (FieldMember.empty()) {
5824 Info.Offset += Field.Offset;
5825 Info.Type.Size = Field.SizeOf;
5826 Info.Type.ElementSize = Field.Type;
5827 Info.Type.Length = Field.LengthOf;
5828 if (Field.Contents.FT == FT_STRUCT)
5829 Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
5830 else
5831 Info.Type.Name = "";
5832 return false;
5833 }
5834
5835 if (Field.Contents.FT != FT_STRUCT)
5836 return true;
5837 const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
5838
5839 if (lookUpField(StructInfo.Structure, FieldMember, Info))
5840 return true;
5841
5842 Info.Offset += Field.Offset;
5843 return false;
5844 }
5845
lookUpType(StringRef Name,AsmTypeInfo & Info) const5846 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
5847 unsigned Size = StringSwitch<unsigned>(Name)
5848 .CasesLower("byte", "db", "sbyte", 1)
5849 .CasesLower("word", "dw", "sword", 2)
5850 .CasesLower("dword", "dd", "sdword", 4)
5851 .CasesLower("fword", "df", 6)
5852 .CasesLower("qword", "dq", "sqword", 8)
5853 .CaseLower("real4", 4)
5854 .CaseLower("real8", 8)
5855 .CaseLower("real10", 10)
5856 .Default(0);
5857 if (Size) {
5858 Info.Name = Name;
5859 Info.ElementSize = Size;
5860 Info.Length = 1;
5861 Info.Size = Size;
5862 return false;
5863 }
5864
5865 auto StructIt = Structs.find(Name.lower());
5866 if (StructIt != Structs.end()) {
5867 const StructInfo &Structure = StructIt->second;
5868 Info.Name = Name;
5869 Info.ElementSize = Structure.Size;
5870 Info.Length = 1;
5871 Info.Size = Structure.Size;
5872 return false;
5873 }
5874
5875 return true;
5876 }
5877
parseMSInlineAsm(std::string & AsmString,unsigned & NumOutputs,unsigned & NumInputs,SmallVectorImpl<std::pair<void *,bool>> & OpDecls,SmallVectorImpl<std::string> & Constraints,SmallVectorImpl<std::string> & Clobbers,const MCInstrInfo * MII,MCInstPrinter * IP,MCAsmParserSemaCallback & SI)5878 bool MasmParser::parseMSInlineAsm(
5879 std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
5880 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
5881 SmallVectorImpl<std::string> &Constraints,
5882 SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
5883 MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
5884 SmallVector<void *, 4> InputDecls;
5885 SmallVector<void *, 4> OutputDecls;
5886 SmallVector<bool, 4> InputDeclsAddressOf;
5887 SmallVector<bool, 4> OutputDeclsAddressOf;
5888 SmallVector<std::string, 4> InputConstraints;
5889 SmallVector<std::string, 4> OutputConstraints;
5890 SmallVector<MCRegister, 4> ClobberRegs;
5891
5892 SmallVector<AsmRewrite, 4> AsmStrRewrites;
5893
5894 // Prime the lexer.
5895 Lex();
5896
5897 // While we have input, parse each statement.
5898 unsigned InputIdx = 0;
5899 unsigned OutputIdx = 0;
5900 while (getLexer().isNot(AsmToken::Eof)) {
5901 // Parse curly braces marking block start/end.
5902 if (parseCurlyBlockScope(AsmStrRewrites))
5903 continue;
5904
5905 ParseStatementInfo Info(&AsmStrRewrites);
5906 bool StatementErr = parseStatement(Info, &SI);
5907
5908 if (StatementErr || Info.ParseError) {
5909 // Emit pending errors if any exist.
5910 printPendingErrors();
5911 return true;
5912 }
5913
5914 // No pending error should exist here.
5915 assert(!hasPendingError() && "unexpected error from parseStatement");
5916
5917 if (Info.Opcode == ~0U)
5918 continue;
5919
5920 const MCInstrDesc &Desc = MII->get(Info.Opcode);
5921
5922 // Build the list of clobbers, outputs and inputs.
5923 for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
5924 MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
5925
5926 // Register operand.
5927 if (Operand.isReg() && !Operand.needAddressOf() &&
5928 !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) {
5929 unsigned NumDefs = Desc.getNumDefs();
5930 // Clobber.
5931 if (NumDefs && Operand.getMCOperandNum() < NumDefs)
5932 ClobberRegs.push_back(Operand.getReg());
5933 continue;
5934 }
5935
5936 // Expr/Input or Output.
5937 StringRef SymName = Operand.getSymName();
5938 if (SymName.empty())
5939 continue;
5940
5941 void *OpDecl = Operand.getOpDecl();
5942 if (!OpDecl)
5943 continue;
5944
5945 StringRef Constraint = Operand.getConstraint();
5946 if (Operand.isImm()) {
5947 // Offset as immediate.
5948 if (Operand.isOffsetOfLocal())
5949 Constraint = "r";
5950 else
5951 Constraint = "i";
5952 }
5953
5954 bool isOutput = (i == 1) && Desc.mayStore();
5955 SMLoc Start = SMLoc::getFromPointer(SymName.data());
5956 if (isOutput) {
5957 ++InputIdx;
5958 OutputDecls.push_back(OpDecl);
5959 OutputDeclsAddressOf.push_back(Operand.needAddressOf());
5960 OutputConstraints.push_back(("=" + Constraint).str());
5961 AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
5962 } else {
5963 InputDecls.push_back(OpDecl);
5964 InputDeclsAddressOf.push_back(Operand.needAddressOf());
5965 InputConstraints.push_back(Constraint.str());
5966 if (Desc.operands()[i - 1].isBranchTarget())
5967 AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
5968 else
5969 AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
5970 }
5971 }
5972
5973 // Consider implicit defs to be clobbers. Think of cpuid and push.
5974 llvm::append_range(ClobberRegs, Desc.implicit_defs());
5975 }
5976
5977 // Set the number of Outputs and Inputs.
5978 NumOutputs = OutputDecls.size();
5979 NumInputs = InputDecls.size();
5980
5981 // Set the unique clobbers.
5982 array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
5983 ClobberRegs.erase(llvm::unique(ClobberRegs), ClobberRegs.end());
5984 Clobbers.assign(ClobberRegs.size(), std::string());
5985 for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
5986 raw_string_ostream OS(Clobbers[I]);
5987 IP->printRegName(OS, ClobberRegs[I]);
5988 }
5989
5990 // Merge the various outputs and inputs. Output are expected first.
5991 if (NumOutputs || NumInputs) {
5992 unsigned NumExprs = NumOutputs + NumInputs;
5993 OpDecls.resize(NumExprs);
5994 Constraints.resize(NumExprs);
5995 for (unsigned i = 0; i < NumOutputs; ++i) {
5996 OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
5997 Constraints[i] = OutputConstraints[i];
5998 }
5999 for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
6000 OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
6001 Constraints[j] = InputConstraints[i];
6002 }
6003 }
6004
6005 // Build the IR assembly string.
6006 std::string AsmStringIR;
6007 raw_string_ostream OS(AsmStringIR);
6008 StringRef ASMString =
6009 SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
6010 const char *AsmStart = ASMString.begin();
6011 const char *AsmEnd = ASMString.end();
6012 array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
6013 for (auto I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
6014 const AsmRewrite &AR = *I;
6015 // Check if this has already been covered by another rewrite...
6016 if (AR.Done)
6017 continue;
6018 AsmRewriteKind Kind = AR.Kind;
6019
6020 const char *Loc = AR.Loc.getPointer();
6021 assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
6022
6023 // Emit everything up to the immediate/expression.
6024 if (unsigned Len = Loc - AsmStart)
6025 OS << StringRef(AsmStart, Len);
6026
6027 // Skip the original expression.
6028 if (Kind == AOK_Skip) {
6029 AsmStart = Loc + AR.Len;
6030 continue;
6031 }
6032
6033 unsigned AdditionalSkip = 0;
6034 // Rewrite expressions in $N notation.
6035 switch (Kind) {
6036 default:
6037 break;
6038 case AOK_IntelExpr:
6039 assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
6040 if (AR.IntelExp.NeedBracs)
6041 OS << "[";
6042 if (AR.IntelExp.hasBaseReg())
6043 OS << AR.IntelExp.BaseReg;
6044 if (AR.IntelExp.hasIndexReg())
6045 OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
6046 << AR.IntelExp.IndexReg;
6047 if (AR.IntelExp.Scale > 1)
6048 OS << " * $$" << AR.IntelExp.Scale;
6049 if (AR.IntelExp.hasOffset()) {
6050 if (AR.IntelExp.hasRegs())
6051 OS << " + ";
6052 // Fuse this rewrite with a rewrite of the offset name, if present.
6053 StringRef OffsetName = AR.IntelExp.OffsetName;
6054 SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
6055 size_t OffsetLen = OffsetName.size();
6056 auto rewrite_it = std::find_if(
6057 I, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
6058 return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
6059 (FusingAR.Kind == AOK_Input ||
6060 FusingAR.Kind == AOK_CallInput);
6061 });
6062 if (rewrite_it == AsmStrRewrites.end()) {
6063 OS << "offset " << OffsetName;
6064 } else if (rewrite_it->Kind == AOK_CallInput) {
6065 OS << "${" << InputIdx++ << ":P}";
6066 rewrite_it->Done = true;
6067 } else {
6068 OS << '$' << InputIdx++;
6069 rewrite_it->Done = true;
6070 }
6071 }
6072 if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
6073 OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
6074 if (AR.IntelExp.NeedBracs)
6075 OS << "]";
6076 break;
6077 case AOK_Label:
6078 OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
6079 break;
6080 case AOK_Input:
6081 OS << '$' << InputIdx++;
6082 break;
6083 case AOK_CallInput:
6084 OS << "${" << InputIdx++ << ":P}";
6085 break;
6086 case AOK_Output:
6087 OS << '$' << OutputIdx++;
6088 break;
6089 case AOK_SizeDirective:
6090 switch (AR.Val) {
6091 default: break;
6092 case 8: OS << "byte ptr "; break;
6093 case 16: OS << "word ptr "; break;
6094 case 32: OS << "dword ptr "; break;
6095 case 64: OS << "qword ptr "; break;
6096 case 80: OS << "xword ptr "; break;
6097 case 128: OS << "xmmword ptr "; break;
6098 case 256: OS << "ymmword ptr "; break;
6099 }
6100 break;
6101 case AOK_Emit:
6102 OS << ".byte";
6103 break;
6104 case AOK_Align: {
6105 // MS alignment directives are measured in bytes. If the native assembler
6106 // measures alignment in bytes, we can pass it straight through.
6107 OS << ".align";
6108 if (getContext().getAsmInfo()->getAlignmentIsInBytes())
6109 break;
6110
6111 // Alignment is in log2 form, so print that instead and skip the original
6112 // immediate.
6113 unsigned Val = AR.Val;
6114 OS << ' ' << Val;
6115 assert(Val < 10 && "Expected alignment less then 2^10.");
6116 AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
6117 break;
6118 }
6119 case AOK_EVEN:
6120 OS << ".even";
6121 break;
6122 case AOK_EndOfStatement:
6123 OS << "\n\t";
6124 break;
6125 }
6126
6127 // Skip the original expression.
6128 AsmStart = Loc + AR.Len + AdditionalSkip;
6129 }
6130
6131 // Emit the remainder of the asm string.
6132 if (AsmStart != AsmEnd)
6133 OS << StringRef(AsmStart, AsmEnd - AsmStart);
6134
6135 AsmString = OS.str();
6136 return false;
6137 }
6138
initializeBuiltinSymbolMaps()6139 void MasmParser::initializeBuiltinSymbolMaps() {
6140 // Numeric built-ins (supported in all versions)
6141 BuiltinSymbolMap["@version"] = BI_VERSION;
6142 BuiltinSymbolMap["@line"] = BI_LINE;
6143
6144 // Text built-ins (supported in all versions)
6145 BuiltinSymbolMap["@date"] = BI_DATE;
6146 BuiltinSymbolMap["@time"] = BI_TIME;
6147 BuiltinSymbolMap["@filecur"] = BI_FILECUR;
6148 BuiltinSymbolMap["@filename"] = BI_FILENAME;
6149 BuiltinSymbolMap["@curseg"] = BI_CURSEG;
6150
6151 // Function built-ins (supported in all versions)
6152 BuiltinFunctionMap["@catstr"] = BI_CATSTR;
6153
6154 // Some built-ins exist only for MASM32 (32-bit x86)
6155 if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
6156 Triple::x86) {
6157 // Numeric built-ins
6158 // BuiltinSymbolMap["@cpu"] = BI_CPU;
6159 // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
6160 // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
6161 // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
6162 // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
6163 // BuiltinSymbolMap["@model"] = BI_MODEL;
6164
6165 // Text built-ins
6166 // BuiltinSymbolMap["@code"] = BI_CODE;
6167 // BuiltinSymbolMap["@data"] = BI_DATA;
6168 // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
6169 // BuiltinSymbolMap["@stack"] = BI_STACK;
6170 }
6171 }
6172
evaluateBuiltinValue(BuiltinSymbol Symbol,SMLoc StartLoc)6173 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
6174 SMLoc StartLoc) {
6175 switch (Symbol) {
6176 default:
6177 return nullptr;
6178 case BI_VERSION:
6179 // Match a recent version of ML.EXE.
6180 return MCConstantExpr::create(1427, getContext());
6181 case BI_LINE: {
6182 int64_t Line;
6183 if (ActiveMacros.empty())
6184 Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
6185 else
6186 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
6187 ActiveMacros.front()->ExitBuffer);
6188 return MCConstantExpr::create(Line, getContext());
6189 }
6190 }
6191 llvm_unreachable("unhandled built-in symbol");
6192 }
6193
6194 std::optional<std::string>
evaluateBuiltinTextMacro(BuiltinSymbol Symbol,SMLoc StartLoc)6195 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
6196 switch (Symbol) {
6197 default:
6198 return {};
6199 case BI_DATE: {
6200 // Current local date, formatted MM/DD/YY
6201 char TmpBuffer[sizeof("mm/dd/yy")];
6202 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
6203 return std::string(TmpBuffer, Len);
6204 }
6205 case BI_TIME: {
6206 // Current local time, formatted HH:MM:SS (24-hour clock)
6207 char TmpBuffer[sizeof("hh:mm:ss")];
6208 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
6209 return std::string(TmpBuffer, Len);
6210 }
6211 case BI_FILECUR:
6212 return SrcMgr
6213 .getMemoryBuffer(
6214 ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
6215 ->getBufferIdentifier()
6216 .str();
6217 case BI_FILENAME:
6218 return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
6219 ->getBufferIdentifier())
6220 .upper();
6221 case BI_CURSEG:
6222 return getStreamer().getCurrentSectionOnly()->getName().str();
6223 }
6224 llvm_unreachable("unhandled built-in symbol");
6225 }
6226
evaluateBuiltinMacroFunction(BuiltinFunction Function,StringRef Name,std::string & Res)6227 bool MasmParser::evaluateBuiltinMacroFunction(BuiltinFunction Function,
6228 StringRef Name,
6229 std::string &Res) {
6230 if (parseToken(AsmToken::LParen, "invoking macro function '" + Name +
6231 "' requires arguments in parentheses")) {
6232 return true;
6233 }
6234
6235 MCAsmMacroParameters P;
6236 switch (Function) {
6237 default:
6238 return true;
6239 case BI_CATSTR:
6240 break;
6241 }
6242 MCAsmMacro M(Name, "", P, {}, true);
6243
6244 MCAsmMacroArguments A;
6245 if (parseMacroArguments(&M, A, AsmToken::RParen) || parseRParen()) {
6246 return true;
6247 }
6248
6249 switch (Function) {
6250 default:
6251 llvm_unreachable("unhandled built-in function");
6252 case BI_CATSTR: {
6253 for (const MCAsmMacroArgument &Arg : A) {
6254 for (const AsmToken &Tok : Arg) {
6255 if (Tok.is(AsmToken::String)) {
6256 Res.append(Tok.getStringContents());
6257 } else {
6258 Res.append(Tok.getString());
6259 }
6260 }
6261 }
6262 return false;
6263 }
6264 }
6265 llvm_unreachable("unhandled built-in function");
6266 return true;
6267 }
6268
6269 /// Create an MCAsmParser instance.
createMCMasmParser(SourceMgr & SM,MCContext & C,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)6270 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
6271 MCStreamer &Out, const MCAsmInfo &MAI,
6272 struct tm TM, unsigned CB) {
6273 return new MasmParser(SM, C, Out, MAI, TM, CB);
6274 }
6275