1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the parser for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/ADT/APFloat.h"
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/BinaryFormat/Dwarf.h"
26 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCCodeView.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCDirectives.h"
31 #include "llvm/MC/MCDwarf.h"
32 #include "llvm/MC/MCExpr.h"
33 #include "llvm/MC/MCInstPrinter.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCInstrInfo.h"
36 #include "llvm/MC/MCParser/AsmCond.h"
37 #include "llvm/MC/MCParser/AsmLexer.h"
38 #include "llvm/MC/MCParser/MCAsmLexer.h"
39 #include "llvm/MC/MCParser/MCAsmParser.h"
40 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
41 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
42 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
43 #include "llvm/MC/MCRegisterInfo.h"
44 #include "llvm/MC/MCSection.h"
45 #include "llvm/MC/MCStreamer.h"
46 #include "llvm/MC/MCSubtargetInfo.h"
47 #include "llvm/MC/MCSymbol.h"
48 #include "llvm/MC/MCTargetOptions.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/Format.h"
53 #include "llvm/Support/MD5.h"
54 #include "llvm/Support/MathExtras.h"
55 #include "llvm/Support/MemoryBuffer.h"
56 #include "llvm/Support/Path.h"
57 #include "llvm/Support/SMLoc.h"
58 #include "llvm/Support/SourceMgr.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <climits>
63 #include <cstddef>
64 #include <cstdint>
65 #include <ctime>
66 #include <deque>
67 #include <memory>
68 #include <optional>
69 #include <sstream>
70 #include <string>
71 #include <tuple>
72 #include <utility>
73 #include <vector>
74
75 using namespace llvm;
76
77 namespace {
78
79 /// Helper types for tracking macro definitions.
80 typedef std::vector<AsmToken> MCAsmMacroArgument;
81 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
82
83 /// Helper class for storing information about an active macro instantiation.
84 struct MacroInstantiation {
85 /// The location of the instantiation.
86 SMLoc InstantiationLoc;
87
88 /// The buffer where parsing should resume upon instantiation completion.
89 unsigned ExitBuffer;
90
91 /// The location where parsing should resume upon instantiation completion.
92 SMLoc ExitLoc;
93
94 /// The depth of TheCondStack at the start of the instantiation.
95 size_t CondStackDepth;
96 };
97
98 struct ParseStatementInfo {
99 /// The parsed operands from the last parsed statement.
100 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
101
102 /// The opcode from the last parsed instruction.
103 unsigned Opcode = ~0U;
104
105 /// Was there an error parsing the inline assembly?
106 bool ParseError = false;
107
108 /// The value associated with a macro exit.
109 std::optional<std::string> ExitValue;
110
111 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
112
113 ParseStatementInfo() = delete;
ParseStatementInfo__anon60b61cd60111::ParseStatementInfo114 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
115 : AsmRewrites(rewrites) {}
116 };
117
118 enum FieldType {
119 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr.
120 FT_REAL, // Initializer: real number, stored as an APInt.
121 FT_STRUCT // Initializer: struct initializer, stored recursively.
122 };
123
124 struct FieldInfo;
125 struct StructInfo {
126 StringRef Name;
127 bool IsUnion = false;
128 bool Initializable = true;
129 unsigned Alignment = 0;
130 unsigned AlignmentSize = 0;
131 unsigned NextOffset = 0;
132 unsigned Size = 0;
133 std::vector<FieldInfo> Fields;
134 StringMap<size_t> FieldsByName;
135
136 FieldInfo &addField(StringRef FieldName, FieldType FT,
137 unsigned FieldAlignmentSize);
138
139 StructInfo() = default;
140 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue);
141 };
142
143 // FIXME: This should probably use a class hierarchy, raw pointers between the
144 // objects, and dynamic type resolution instead of a union. On the other hand,
145 // ownership then becomes much more complicated; the obvious thing would be to
146 // use BumpPtrAllocator, but the lack of a destructor makes that messy.
147
148 struct StructInitializer;
149 struct IntFieldInfo {
150 SmallVector<const MCExpr *, 1> Values;
151
152 IntFieldInfo() = default;
IntFieldInfo__anon60b61cd60111::IntFieldInfo153 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
IntFieldInfo__anon60b61cd60111::IntFieldInfo154 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
155 };
156 struct RealFieldInfo {
157 SmallVector<APInt, 1> AsIntValues;
158
159 RealFieldInfo() = default;
RealFieldInfo__anon60b61cd60111::RealFieldInfo160 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
RealFieldInfo__anon60b61cd60111::RealFieldInfo161 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
162 };
163 struct StructFieldInfo {
164 std::vector<StructInitializer> Initializers;
165 StructInfo Structure;
166
167 StructFieldInfo() = default;
168 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S);
169 };
170
171 class FieldInitializer {
172 public:
173 FieldType FT;
174 union {
175 IntFieldInfo IntInfo;
176 RealFieldInfo RealInfo;
177 StructFieldInfo StructInfo;
178 };
179
180 ~FieldInitializer();
181 FieldInitializer(FieldType FT);
182
183 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values);
184 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues);
185 FieldInitializer(std::vector<StructInitializer> &&Initializers,
186 struct StructInfo Structure);
187
188 FieldInitializer(const FieldInitializer &Initializer);
189 FieldInitializer(FieldInitializer &&Initializer);
190
191 FieldInitializer &operator=(const FieldInitializer &Initializer);
192 FieldInitializer &operator=(FieldInitializer &&Initializer);
193 };
194
195 struct StructInitializer {
196 std::vector<FieldInitializer> FieldInitializers;
197 };
198
199 struct FieldInfo {
200 // Offset of the field within the containing STRUCT.
201 unsigned Offset = 0;
202
203 // Total size of the field (= LengthOf * Type).
204 unsigned SizeOf = 0;
205
206 // Number of elements in the field (1 if scalar, >1 if an array).
207 unsigned LengthOf = 0;
208
209 // Size of a single entry in this field, in bytes ("type" in MASM standards).
210 unsigned Type = 0;
211
212 FieldInitializer Contents;
213
FieldInfo__anon60b61cd60111::FieldInfo214 FieldInfo(FieldType FT) : Contents(FT) {}
215 };
216
StructFieldInfo(std::vector<StructInitializer> V,StructInfo S)217 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V,
218 StructInfo S) {
219 Initializers = std::move(V);
220 Structure = S;
221 }
222
StructInfo(StringRef StructName,bool Union,unsigned AlignmentValue)223 StructInfo::StructInfo(StringRef StructName, bool Union,
224 unsigned AlignmentValue)
225 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {}
226
addField(StringRef FieldName,FieldType FT,unsigned FieldAlignmentSize)227 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT,
228 unsigned FieldAlignmentSize) {
229 if (!FieldName.empty())
230 FieldsByName[FieldName.lower()] = Fields.size();
231 Fields.emplace_back(FT);
232 FieldInfo &Field = Fields.back();
233 Field.Offset =
234 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize));
235 if (!IsUnion) {
236 NextOffset = std::max(NextOffset, Field.Offset);
237 }
238 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize);
239 return Field;
240 }
241
~FieldInitializer()242 FieldInitializer::~FieldInitializer() {
243 switch (FT) {
244 case FT_INTEGRAL:
245 IntInfo.~IntFieldInfo();
246 break;
247 case FT_REAL:
248 RealInfo.~RealFieldInfo();
249 break;
250 case FT_STRUCT:
251 StructInfo.~StructFieldInfo();
252 break;
253 }
254 }
255
FieldInitializer(FieldType FT)256 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
257 switch (FT) {
258 case FT_INTEGRAL:
259 new (&IntInfo) IntFieldInfo();
260 break;
261 case FT_REAL:
262 new (&RealInfo) RealFieldInfo();
263 break;
264 case FT_STRUCT:
265 new (&StructInfo) StructFieldInfo();
266 break;
267 }
268 }
269
FieldInitializer(SmallVector<const MCExpr *,1> && Values)270 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
271 : FT(FT_INTEGRAL) {
272 new (&IntInfo) IntFieldInfo(std::move(Values));
273 }
274
FieldInitializer(SmallVector<APInt,1> && AsIntValues)275 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
276 : FT(FT_REAL) {
277 new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
278 }
279
FieldInitializer(std::vector<StructInitializer> && Initializers,struct StructInfo Structure)280 FieldInitializer::FieldInitializer(
281 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure)
282 : FT(FT_STRUCT) {
283 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure);
284 }
285
FieldInitializer(const FieldInitializer & Initializer)286 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer)
287 : FT(Initializer.FT) {
288 switch (FT) {
289 case FT_INTEGRAL:
290 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
291 break;
292 case FT_REAL:
293 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
294 break;
295 case FT_STRUCT:
296 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
297 break;
298 }
299 }
300
FieldInitializer(FieldInitializer && Initializer)301 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer)
302 : FT(Initializer.FT) {
303 switch (FT) {
304 case FT_INTEGRAL:
305 new (&IntInfo) IntFieldInfo(Initializer.IntInfo);
306 break;
307 case FT_REAL:
308 new (&RealInfo) RealFieldInfo(Initializer.RealInfo);
309 break;
310 case FT_STRUCT:
311 new (&StructInfo) StructFieldInfo(Initializer.StructInfo);
312 break;
313 }
314 }
315
316 FieldInitializer &
operator =(const FieldInitializer & Initializer)317 FieldInitializer::operator=(const FieldInitializer &Initializer) {
318 if (FT != Initializer.FT) {
319 switch (FT) {
320 case FT_INTEGRAL:
321 IntInfo.~IntFieldInfo();
322 break;
323 case FT_REAL:
324 RealInfo.~RealFieldInfo();
325 break;
326 case FT_STRUCT:
327 StructInfo.~StructFieldInfo();
328 break;
329 }
330 }
331 FT = Initializer.FT;
332 switch (FT) {
333 case FT_INTEGRAL:
334 IntInfo = Initializer.IntInfo;
335 break;
336 case FT_REAL:
337 RealInfo = Initializer.RealInfo;
338 break;
339 case FT_STRUCT:
340 StructInfo = Initializer.StructInfo;
341 break;
342 }
343 return *this;
344 }
345
operator =(FieldInitializer && Initializer)346 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) {
347 if (FT != Initializer.FT) {
348 switch (FT) {
349 case FT_INTEGRAL:
350 IntInfo.~IntFieldInfo();
351 break;
352 case FT_REAL:
353 RealInfo.~RealFieldInfo();
354 break;
355 case FT_STRUCT:
356 StructInfo.~StructFieldInfo();
357 break;
358 }
359 }
360 FT = Initializer.FT;
361 switch (FT) {
362 case FT_INTEGRAL:
363 IntInfo = Initializer.IntInfo;
364 break;
365 case FT_REAL:
366 RealInfo = Initializer.RealInfo;
367 break;
368 case FT_STRUCT:
369 StructInfo = Initializer.StructInfo;
370 break;
371 }
372 return *this;
373 }
374
375 /// The concrete assembly parser instance.
376 // Note that this is a full MCAsmParser, not an MCAsmParserExtension!
377 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc.
378 class MasmParser : public MCAsmParser {
379 private:
380 AsmLexer Lexer;
381 MCContext &Ctx;
382 MCStreamer &Out;
383 const MCAsmInfo &MAI;
384 SourceMgr &SrcMgr;
385 SourceMgr::DiagHandlerTy SavedDiagHandler;
386 void *SavedDiagContext;
387 std::unique_ptr<MCAsmParserExtension> PlatformParser;
388
389 /// This is the current buffer index we're lexing from as managed by the
390 /// SourceMgr object.
391 unsigned CurBuffer;
392
393 /// time of assembly
394 struct tm TM;
395
396 BitVector EndStatementAtEOFStack;
397
398 AsmCond TheCondState;
399 std::vector<AsmCond> TheCondStack;
400
401 /// maps directive names to handler methods in parser
402 /// extensions. Extensions register themselves in this map by calling
403 /// addDirectiveHandler.
404 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
405
406 /// maps assembly-time variable names to variables.
407 struct Variable {
408 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE };
409
410 StringRef Name;
411 RedefinableKind Redefinable = REDEFINABLE;
412 bool IsText = false;
413 std::string TextValue;
414 };
415 StringMap<Variable> Variables;
416
417 /// Stack of active struct definitions.
418 SmallVector<StructInfo, 1> StructInProgress;
419
420 /// Maps struct tags to struct definitions.
421 StringMap<StructInfo> Structs;
422
423 /// Maps data location names to types.
424 StringMap<AsmTypeInfo> KnownType;
425
426 /// Stack of active macro instantiations.
427 std::vector<MacroInstantiation*> ActiveMacros;
428
429 /// List of bodies of anonymous macros.
430 std::deque<MCAsmMacro> MacroLikeBodies;
431
432 /// Keeps track of how many .macro's have been instantiated.
433 unsigned NumOfMacroInstantiations;
434
435 /// The values from the last parsed cpp hash file line comment if any.
436 struct CppHashInfoTy {
437 StringRef Filename;
438 int64_t LineNumber;
439 SMLoc Loc;
440 unsigned Buf;
CppHashInfoTy__anon60b61cd60111::MasmParser::CppHashInfoTy441 CppHashInfoTy() : LineNumber(0), Buf(0) {}
442 };
443 CppHashInfoTy CppHashInfo;
444
445 /// The filename from the first cpp hash file line comment, if any.
446 StringRef FirstCppHashFilename;
447
448 /// List of forward directional labels for diagnosis at the end.
449 SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
450
451 /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
452 /// Defaults to 1U, meaning Intel.
453 unsigned AssemblerDialect = 1U;
454
455 /// is Darwin compatibility enabled?
456 bool IsDarwin = false;
457
458 /// Are we parsing ms-style inline assembly?
459 bool ParsingMSInlineAsm = false;
460
461 /// Did we already inform the user about inconsistent MD5 usage?
462 bool ReportedInconsistentMD5 = false;
463
464 // Current <...> expression depth.
465 unsigned AngleBracketDepth = 0U;
466
467 // Number of locals defined.
468 uint16_t LocalCounter = 0;
469
470 public:
471 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
472 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0);
473 MasmParser(const MasmParser &) = delete;
474 MasmParser &operator=(const MasmParser &) = delete;
475 ~MasmParser() override;
476
477 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
478
addDirectiveHandler(StringRef Directive,ExtensionDirectiveHandler Handler)479 void addDirectiveHandler(StringRef Directive,
480 ExtensionDirectiveHandler Handler) override {
481 ExtensionDirectiveMap[Directive] = Handler;
482 if (!DirectiveKindMap.contains(Directive)) {
483 DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
484 }
485 }
486
addAliasForDirective(StringRef Directive,StringRef Alias)487 void addAliasForDirective(StringRef Directive, StringRef Alias) override {
488 DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
489 }
490
491 /// @name MCAsmParser Interface
492 /// {
493
getSourceManager()494 SourceMgr &getSourceManager() override { return SrcMgr; }
getLexer()495 MCAsmLexer &getLexer() override { return Lexer; }
getContext()496 MCContext &getContext() override { return Ctx; }
getStreamer()497 MCStreamer &getStreamer() override { return Out; }
498
getCVContext()499 CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
500
getAssemblerDialect()501 unsigned getAssemblerDialect() override {
502 if (AssemblerDialect == ~0U)
503 return MAI.getAssemblerDialect();
504 else
505 return AssemblerDialect;
506 }
setAssemblerDialect(unsigned i)507 void setAssemblerDialect(unsigned i) override {
508 AssemblerDialect = i;
509 }
510
511 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override;
512 bool Warning(SMLoc L, const Twine &Msg,
513 SMRange Range = std::nullopt) override;
514 bool printError(SMLoc L, const Twine &Msg,
515 SMRange Range = std::nullopt) override;
516
517 enum ExpandKind { ExpandMacros, DoNotExpandMacros };
518 const AsmToken &Lex(ExpandKind ExpandNextToken);
Lex()519 const AsmToken &Lex() override { return Lex(ExpandMacros); }
520
setParsingMSInlineAsm(bool V)521 void setParsingMSInlineAsm(bool V) override {
522 ParsingMSInlineAsm = V;
523 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
524 // hex integer literals.
525 Lexer.setLexMasmIntegers(V);
526 }
isParsingMSInlineAsm()527 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
528
isParsingMasm() const529 bool isParsingMasm() const override { return true; }
530
531 bool defineMacro(StringRef Name, StringRef Value) override;
532
533 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override;
534 bool lookUpField(StringRef Base, StringRef Member,
535 AsmFieldInfo &Info) const override;
536
537 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
538
539 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
540 unsigned &NumInputs,
541 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
542 SmallVectorImpl<std::string> &Constraints,
543 SmallVectorImpl<std::string> &Clobbers,
544 const MCInstrInfo *MII, const MCInstPrinter *IP,
545 MCAsmParserSemaCallback &SI) override;
546
547 bool parseExpression(const MCExpr *&Res);
548 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
549 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
550 AsmTypeInfo *TypeInfo) override;
551 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override;
552 bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
553 SMLoc &EndLoc) override;
554 bool parseAbsoluteExpression(int64_t &Res) override;
555
556 /// Parse a floating point expression using the float \p Semantics
557 /// and set \p Res to the value.
558 bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
559
560 /// Parse an identifier or string (as a quoted identifier)
561 /// and set \p Res to the identifier contents.
562 enum IdentifierPositionKind { StandardPosition, StartOfStatement };
563 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position);
parseIdentifier(StringRef & Res)564 bool parseIdentifier(StringRef &Res) override {
565 return parseIdentifier(Res, StandardPosition);
566 }
567 void eatToEndOfStatement() override;
568
569 bool checkForValidSection() override;
570
571 /// }
572
573 private:
574 bool expandMacros();
575 const AsmToken peekTok(bool ShouldSkipSpace = true);
576
577 bool parseStatement(ParseStatementInfo &Info,
578 MCAsmParserSemaCallback *SI);
579 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
580 bool parseCppHashLineFilenameComment(SMLoc L);
581
582 bool expandMacro(raw_svector_ostream &OS, StringRef Body,
583 ArrayRef<MCAsmMacroParameter> Parameters,
584 ArrayRef<MCAsmMacroArgument> A,
585 const std::vector<std::string> &Locals, SMLoc L);
586
587 /// Are we inside a macro instantiation?
isInsideMacroInstantiation()588 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
589
590 /// Handle entry to macro instantiation.
591 ///
592 /// \param M The macro.
593 /// \param NameLoc Instantiation location.
594 bool handleMacroEntry(
595 const MCAsmMacro *M, SMLoc NameLoc,
596 AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement);
597
598 /// Handle invocation of macro function.
599 ///
600 /// \param M The macro.
601 /// \param NameLoc Invocation location.
602 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc);
603
604 /// Handle exit from macro instantiation.
605 void handleMacroExit();
606
607 /// Extract AsmTokens for a macro argument.
608 bool
609 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA,
610 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
611
612 /// Parse all macro arguments for a given macro.
613 bool
614 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A,
615 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement);
616
617 void printMacroInstantiations();
618
619 bool expandStatement(SMLoc Loc);
620
printMessage(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Msg,SMRange Range=std::nullopt) const621 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
622 SMRange Range = std::nullopt) const {
623 ArrayRef<SMRange> Ranges(Range);
624 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
625 }
626 static void DiagHandler(const SMDiagnostic &Diag, void *Context);
627
628 bool lookUpField(const StructInfo &Structure, StringRef Member,
629 AsmFieldInfo &Info) const;
630
631 /// Should we emit DWARF describing this assembler source? (Returns false if
632 /// the source has .file directives, which means we don't want to generate
633 /// info describing the assembler source itself.)
634 bool enabledGenDwarfForAssembly();
635
636 /// Enter the specified file. This returns true on failure.
637 bool enterIncludeFile(const std::string &Filename);
638
639 /// Reset the current lexer position to that given by \p Loc. The
640 /// current token is not set; clients should ensure Lex() is called
641 /// subsequently.
642 ///
643 /// \param InBuffer If not 0, should be the known buffer id that contains the
644 /// location.
645 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0,
646 bool EndStatementAtEOF = true);
647
648 /// Parse up to a token of kind \p EndTok and return the contents from the
649 /// current token up to (but not including) this token; the current token on
650 /// exit will be either this kind or EOF. Reads through instantiated macro
651 /// functions and text macros.
652 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok);
653 std::string parseStringTo(AsmToken::TokenKind EndTok);
654
655 /// Parse up to the end of statement and return the contents from the current
656 /// token until the end of the statement; the current token on exit will be
657 /// either the EndOfStatement or EOF.
658 StringRef parseStringToEndOfStatement() override;
659
660 bool parseTextItem(std::string &Data);
661
662 unsigned getBinOpPrecedence(AsmToken::TokenKind K,
663 MCBinaryExpr::Opcode &Kind);
664
665 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
666 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
667 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
668
669 bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
670
671 bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
672 bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
673
674 // Generic (target and platform independent) directive parsing.
675 enum DirectiveKind {
676 DK_NO_DIRECTIVE, // Placeholder
677 DK_HANDLER_DIRECTIVE,
678 DK_ASSIGN,
679 DK_EQU,
680 DK_TEXTEQU,
681 DK_ASCII,
682 DK_ASCIZ,
683 DK_STRING,
684 DK_BYTE,
685 DK_SBYTE,
686 DK_WORD,
687 DK_SWORD,
688 DK_DWORD,
689 DK_SDWORD,
690 DK_FWORD,
691 DK_QWORD,
692 DK_SQWORD,
693 DK_DB,
694 DK_DD,
695 DK_DF,
696 DK_DQ,
697 DK_DW,
698 DK_REAL4,
699 DK_REAL8,
700 DK_REAL10,
701 DK_ALIGN,
702 DK_EVEN,
703 DK_ORG,
704 DK_ENDR,
705 DK_EXTERN,
706 DK_PUBLIC,
707 DK_COMM,
708 DK_COMMENT,
709 DK_INCLUDE,
710 DK_REPEAT,
711 DK_WHILE,
712 DK_FOR,
713 DK_FORC,
714 DK_IF,
715 DK_IFE,
716 DK_IFB,
717 DK_IFNB,
718 DK_IFDEF,
719 DK_IFNDEF,
720 DK_IFDIF,
721 DK_IFDIFI,
722 DK_IFIDN,
723 DK_IFIDNI,
724 DK_ELSEIF,
725 DK_ELSEIFE,
726 DK_ELSEIFB,
727 DK_ELSEIFNB,
728 DK_ELSEIFDEF,
729 DK_ELSEIFNDEF,
730 DK_ELSEIFDIF,
731 DK_ELSEIFDIFI,
732 DK_ELSEIFIDN,
733 DK_ELSEIFIDNI,
734 DK_ELSE,
735 DK_ENDIF,
736 DK_FILE,
737 DK_LINE,
738 DK_LOC,
739 DK_STABS,
740 DK_CV_FILE,
741 DK_CV_FUNC_ID,
742 DK_CV_INLINE_SITE_ID,
743 DK_CV_LOC,
744 DK_CV_LINETABLE,
745 DK_CV_INLINE_LINETABLE,
746 DK_CV_DEF_RANGE,
747 DK_CV_STRINGTABLE,
748 DK_CV_STRING,
749 DK_CV_FILECHECKSUMS,
750 DK_CV_FILECHECKSUM_OFFSET,
751 DK_CV_FPO_DATA,
752 DK_CFI_SECTIONS,
753 DK_CFI_STARTPROC,
754 DK_CFI_ENDPROC,
755 DK_CFI_DEF_CFA,
756 DK_CFI_DEF_CFA_OFFSET,
757 DK_CFI_ADJUST_CFA_OFFSET,
758 DK_CFI_DEF_CFA_REGISTER,
759 DK_CFI_OFFSET,
760 DK_CFI_REL_OFFSET,
761 DK_CFI_PERSONALITY,
762 DK_CFI_LSDA,
763 DK_CFI_REMEMBER_STATE,
764 DK_CFI_RESTORE_STATE,
765 DK_CFI_SAME_VALUE,
766 DK_CFI_RESTORE,
767 DK_CFI_ESCAPE,
768 DK_CFI_RETURN_COLUMN,
769 DK_CFI_SIGNAL_FRAME,
770 DK_CFI_UNDEFINED,
771 DK_CFI_REGISTER,
772 DK_CFI_WINDOW_SAVE,
773 DK_CFI_B_KEY_FRAME,
774 DK_MACRO,
775 DK_EXITM,
776 DK_ENDM,
777 DK_PURGE,
778 DK_ERR,
779 DK_ERRB,
780 DK_ERRNB,
781 DK_ERRDEF,
782 DK_ERRNDEF,
783 DK_ERRDIF,
784 DK_ERRDIFI,
785 DK_ERRIDN,
786 DK_ERRIDNI,
787 DK_ERRE,
788 DK_ERRNZ,
789 DK_ECHO,
790 DK_STRUCT,
791 DK_UNION,
792 DK_ENDS,
793 DK_END,
794 DK_PUSHFRAME,
795 DK_PUSHREG,
796 DK_SAVEREG,
797 DK_SAVEXMM128,
798 DK_SETFRAME,
799 DK_RADIX,
800 };
801
802 /// Maps directive name --> DirectiveKind enum, for directives parsed by this
803 /// class.
804 StringMap<DirectiveKind> DirectiveKindMap;
805
806 bool isMacroLikeDirective();
807
808 // Codeview def_range type parsing.
809 enum CVDefRangeType {
810 CVDR_DEFRANGE = 0, // Placeholder
811 CVDR_DEFRANGE_REGISTER,
812 CVDR_DEFRANGE_FRAMEPOINTER_REL,
813 CVDR_DEFRANGE_SUBFIELD_REGISTER,
814 CVDR_DEFRANGE_REGISTER_REL
815 };
816
817 /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview
818 /// def_range types parsed by this class.
819 StringMap<CVDefRangeType> CVDefRangeTypeMap;
820
821 // Generic (target and platform independent) directive parsing.
822 enum BuiltinSymbol {
823 BI_NO_SYMBOL, // Placeholder
824 BI_DATE,
825 BI_TIME,
826 BI_VERSION,
827 BI_FILECUR,
828 BI_FILENAME,
829 BI_LINE,
830 BI_CURSEG,
831 BI_CPU,
832 BI_INTERFACE,
833 BI_CODE,
834 BI_DATA,
835 BI_FARDATA,
836 BI_WORDSIZE,
837 BI_CODESIZE,
838 BI_DATASIZE,
839 BI_MODEL,
840 BI_STACK,
841 };
842
843 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this
844 /// class.
845 StringMap<BuiltinSymbol> BuiltinSymbolMap;
846
847 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc);
848
849 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol,
850 SMLoc StartLoc);
851
852 // ".ascii", ".asciz", ".string"
853 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
854
855 // "byte", "word", ...
856 bool emitIntValue(const MCExpr *Value, unsigned Size);
857 bool parseScalarInitializer(unsigned Size,
858 SmallVectorImpl<const MCExpr *> &Values,
859 unsigned StringPadLength = 0);
860 bool parseScalarInstList(
861 unsigned Size, SmallVectorImpl<const MCExpr *> &Values,
862 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
863 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr);
864 bool addIntegralField(StringRef Name, unsigned Size);
865 bool parseDirectiveValue(StringRef IDVal, unsigned Size);
866 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
867 StringRef Name, SMLoc NameLoc);
868
869 // "real4", "real8", "real10"
870 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr);
871 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size);
872 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics,
873 size_t Size);
874 bool parseRealInstList(
875 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values,
876 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
877 bool parseDirectiveNamedRealValue(StringRef TypeName,
878 const fltSemantics &Semantics,
879 unsigned Size, StringRef Name,
880 SMLoc NameLoc);
881
882 bool parseOptionalAngleBracketOpen();
883 bool parseAngleBracketClose(const Twine &Msg = "expected '>'");
884
885 bool parseFieldInitializer(const FieldInfo &Field,
886 FieldInitializer &Initializer);
887 bool parseFieldInitializer(const FieldInfo &Field,
888 const IntFieldInfo &Contents,
889 FieldInitializer &Initializer);
890 bool parseFieldInitializer(const FieldInfo &Field,
891 const RealFieldInfo &Contents,
892 FieldInitializer &Initializer);
893 bool parseFieldInitializer(const FieldInfo &Field,
894 const StructFieldInfo &Contents,
895 FieldInitializer &Initializer);
896
897 bool parseStructInitializer(const StructInfo &Structure,
898 StructInitializer &Initializer);
899 bool parseStructInstList(
900 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
901 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement);
902
903 bool emitFieldValue(const FieldInfo &Field);
904 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents);
905 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
906 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
907
908 bool emitFieldInitializer(const FieldInfo &Field,
909 const FieldInitializer &Initializer);
910 bool emitFieldInitializer(const FieldInfo &Field,
911 const IntFieldInfo &Contents,
912 const IntFieldInfo &Initializer);
913 bool emitFieldInitializer(const FieldInfo &Field,
914 const RealFieldInfo &Contents,
915 const RealFieldInfo &Initializer);
916 bool emitFieldInitializer(const FieldInfo &Field,
917 const StructFieldInfo &Contents,
918 const StructFieldInfo &Initializer);
919
920 bool emitStructInitializer(const StructInfo &Structure,
921 const StructInitializer &Initializer);
922
923 // User-defined types (structs, unions):
924 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr);
925 bool addStructField(StringRef Name, const StructInfo &Structure);
926 bool parseDirectiveStructValue(const StructInfo &Structure,
927 StringRef Directive, SMLoc DirLoc);
928 bool parseDirectiveNamedStructValue(const StructInfo &Structure,
929 StringRef Directive, SMLoc DirLoc,
930 StringRef Name);
931
932 // "=", "equ", "textequ"
933 bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
934 DirectiveKind DirKind, SMLoc NameLoc);
935
936 bool parseDirectiveOrg(); // "org"
937
938 bool emitAlignTo(int64_t Alignment);
939 bool parseDirectiveAlign(); // "align"
940 bool parseDirectiveEven(); // "even"
941
942 // ".file", ".line", ".loc", ".stabs"
943 bool parseDirectiveFile(SMLoc DirectiveLoc);
944 bool parseDirectiveLine();
945 bool parseDirectiveLoc();
946 bool parseDirectiveStabs();
947
948 // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
949 // ".cv_inline_linetable", ".cv_def_range", ".cv_string"
950 bool parseDirectiveCVFile();
951 bool parseDirectiveCVFuncId();
952 bool parseDirectiveCVInlineSiteId();
953 bool parseDirectiveCVLoc();
954 bool parseDirectiveCVLinetable();
955 bool parseDirectiveCVInlineLinetable();
956 bool parseDirectiveCVDefRange();
957 bool parseDirectiveCVString();
958 bool parseDirectiveCVStringTable();
959 bool parseDirectiveCVFileChecksums();
960 bool parseDirectiveCVFileChecksumOffset();
961 bool parseDirectiveCVFPOData();
962
963 // .cfi directives
964 bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
965 bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
966 bool parseDirectiveCFISections();
967 bool parseDirectiveCFIStartProc();
968 bool parseDirectiveCFIEndProc();
969 bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
970 bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
971 bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
972 bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
973 bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
974 bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
975 bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
976 bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
977 bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
978 bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
979 bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
980 bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
981 bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
982 bool parseDirectiveCFISignalFrame();
983 bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
984
985 // macro directives
986 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc);
987 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive,
988 std::string &Value);
989 bool parseDirectiveEndMacro(StringRef Directive);
990 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc);
991
992 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind,
993 StringRef Name, SMLoc NameLoc);
994 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind);
995 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc);
996 bool parseDirectiveNestedEnds();
997
998 bool parseDirectiveExtern();
999
1000 /// Parse a directive like ".globl" which accepts a single symbol (which
1001 /// should be a label or an external).
1002 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr);
1003
1004 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
1005
1006 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment"
1007
1008 bool parseDirectiveInclude(); // "include"
1009
1010 // "if" or "ife"
1011 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1012 // "ifb" or "ifnb", depending on ExpectBlank.
1013 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1014 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and
1015 // CaseInsensitive.
1016 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1017 bool CaseInsensitive);
1018 // "ifdef" or "ifndef", depending on expect_defined
1019 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
1020 // "elseif" or "elseife"
1021 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
1022 // "elseifb" or "elseifnb", depending on ExpectBlank.
1023 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1024 // ".elseifdef" or ".elseifndef", depending on expect_defined
1025 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined);
1026 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on
1027 // ExpectEqual and CaseInsensitive.
1028 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1029 bool CaseInsensitive);
1030 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else"
1031 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif"
1032 bool parseEscapedString(std::string &Data) override;
1033 bool parseAngleBracketString(std::string &Data) override;
1034
1035 // Macro-like directives
1036 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc);
1037 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1038 raw_svector_ostream &OS);
1039 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
1040 SMLoc ExitLoc, raw_svector_ostream &OS);
1041 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive);
1042 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive);
1043 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive);
1044 bool parseDirectiveWhile(SMLoc DirectiveLoc);
1045
1046 // "_emit" or "__emit"
1047 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info,
1048 size_t Len);
1049
1050 // "align"
1051 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info);
1052
1053 // "end"
1054 bool parseDirectiveEnd(SMLoc DirectiveLoc);
1055
1056 // ".err"
1057 bool parseDirectiveError(SMLoc DirectiveLoc);
1058 // ".errb" or ".errnb", depending on ExpectBlank.
1059 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank);
1060 // ".errdef" or ".errndef", depending on ExpectBlank.
1061 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined);
1062 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual
1063 // and CaseInsensitive.
1064 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
1065 bool CaseInsensitive);
1066 // ".erre" or ".errnz", depending on ExpectZero.
1067 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero);
1068
1069 // ".radix"
1070 bool parseDirectiveRadix(SMLoc DirectiveLoc);
1071
1072 // "echo"
1073 bool parseDirectiveEcho(SMLoc DirectiveLoc);
1074
1075 void initializeDirectiveKindMap();
1076 void initializeCVDefRangeTypeMap();
1077 void initializeBuiltinSymbolMap();
1078 };
1079
1080 } // end anonymous namespace
1081
1082 namespace llvm {
1083
1084 extern cl::opt<unsigned> AsmMacroMaxNestingDepth;
1085
1086 extern MCAsmParserExtension *createCOFFMasmParser();
1087
1088 } // end namespace llvm
1089
1090 enum { DEFAULT_ADDRSPACE = 0 };
1091
MasmParser(SourceMgr & SM,MCContext & Ctx,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)1092 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
1093 const MCAsmInfo &MAI, struct tm TM, unsigned CB)
1094 : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
1095 CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) {
1096 HadError = false;
1097 // Save the old handler.
1098 SavedDiagHandler = SrcMgr.getDiagHandler();
1099 SavedDiagContext = SrcMgr.getDiagContext();
1100 // Set our own handler which calls the saved handler.
1101 SrcMgr.setDiagHandler(DiagHandler, this);
1102 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1103 EndStatementAtEOFStack.push_back(true);
1104
1105 // Initialize the platform / file format parser.
1106 switch (Ctx.getObjectFileType()) {
1107 case MCContext::IsCOFF:
1108 PlatformParser.reset(createCOFFMasmParser());
1109 break;
1110 default:
1111 report_fatal_error("llvm-ml currently supports only COFF output.");
1112 break;
1113 }
1114
1115 initializeDirectiveKindMap();
1116 PlatformParser->Initialize(*this);
1117 initializeCVDefRangeTypeMap();
1118 initializeBuiltinSymbolMap();
1119
1120 NumOfMacroInstantiations = 0;
1121 }
1122
~MasmParser()1123 MasmParser::~MasmParser() {
1124 assert((HadError || ActiveMacros.empty()) &&
1125 "Unexpected active macro instantiation!");
1126
1127 // Restore the saved diagnostics handler and context for use during
1128 // finalization.
1129 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
1130 }
1131
printMacroInstantiations()1132 void MasmParser::printMacroInstantiations() {
1133 // Print the active macro instantiation stack.
1134 for (std::vector<MacroInstantiation *>::const_reverse_iterator
1135 it = ActiveMacros.rbegin(),
1136 ie = ActiveMacros.rend();
1137 it != ie; ++it)
1138 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
1139 "while in macro instantiation");
1140 }
1141
Note(SMLoc L,const Twine & Msg,SMRange Range)1142 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
1143 printPendingErrors();
1144 printMessage(L, SourceMgr::DK_Note, Msg, Range);
1145 printMacroInstantiations();
1146 }
1147
Warning(SMLoc L,const Twine & Msg,SMRange Range)1148 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
1149 if (getTargetParser().getTargetOptions().MCNoWarn)
1150 return false;
1151 if (getTargetParser().getTargetOptions().MCFatalWarnings)
1152 return Error(L, Msg, Range);
1153 printMessage(L, SourceMgr::DK_Warning, Msg, Range);
1154 printMacroInstantiations();
1155 return false;
1156 }
1157
printError(SMLoc L,const Twine & Msg,SMRange Range)1158 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
1159 HadError = true;
1160 printMessage(L, SourceMgr::DK_Error, Msg, Range);
1161 printMacroInstantiations();
1162 return true;
1163 }
1164
enterIncludeFile(const std::string & Filename)1165 bool MasmParser::enterIncludeFile(const std::string &Filename) {
1166 std::string IncludedFile;
1167 unsigned NewBuf =
1168 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
1169 if (!NewBuf)
1170 return true;
1171
1172 CurBuffer = NewBuf;
1173 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
1174 EndStatementAtEOFStack.push_back(true);
1175 return false;
1176 }
1177
jumpToLoc(SMLoc Loc,unsigned InBuffer,bool EndStatementAtEOF)1178 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer,
1179 bool EndStatementAtEOF) {
1180 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
1181 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
1182 Loc.getPointer(), EndStatementAtEOF);
1183 }
1184
expandMacros()1185 bool MasmParser::expandMacros() {
1186 const AsmToken &Tok = getTok();
1187 const std::string IDLower = Tok.getIdentifier().lower();
1188
1189 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower);
1190 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) {
1191 // This is a macro function invocation; expand it in place.
1192 const SMLoc MacroLoc = Tok.getLoc();
1193 const StringRef MacroId = Tok.getIdentifier();
1194 Lexer.Lex();
1195 if (handleMacroInvocation(M, MacroLoc)) {
1196 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId));
1197 Lexer.Lex();
1198 }
1199 return false;
1200 }
1201
1202 std::optional<std::string> ExpandedValue;
1203 auto BuiltinIt = BuiltinSymbolMap.find(IDLower);
1204 if (BuiltinIt != BuiltinSymbolMap.end()) {
1205 ExpandedValue =
1206 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc());
1207 } else {
1208 auto VarIt = Variables.find(IDLower);
1209 if (VarIt != Variables.end() && VarIt->getValue().IsText) {
1210 ExpandedValue = VarIt->getValue().TextValue;
1211 }
1212 }
1213
1214 if (!ExpandedValue)
1215 return true;
1216 std::unique_ptr<MemoryBuffer> Instantiation =
1217 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>");
1218
1219 // Jump to the macro instantiation and prime the lexer.
1220 CurBuffer =
1221 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc());
1222 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
1223 /*EndStatementAtEOF=*/false);
1224 EndStatementAtEOFStack.push_back(false);
1225 Lexer.Lex();
1226 return false;
1227 }
1228
Lex(ExpandKind ExpandNextToken)1229 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) {
1230 if (Lexer.getTok().is(AsmToken::Error))
1231 Error(Lexer.getErrLoc(), Lexer.getErr());
1232
1233 // if it's a end of statement with a comment in it
1234 if (getTok().is(AsmToken::EndOfStatement)) {
1235 // if this is a line comment output it.
1236 if (!getTok().getString().empty() && getTok().getString().front() != '\n' &&
1237 getTok().getString().front() != '\r' && MAI.preserveAsmComments())
1238 Out.addExplicitComment(Twine(getTok().getString()));
1239 }
1240
1241 const AsmToken *tok = &Lexer.Lex();
1242 bool StartOfStatement = Lexer.isAtStartOfStatement();
1243
1244 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) {
1245 if (StartOfStatement) {
1246 AsmToken NextTok;
1247 MutableArrayRef<AsmToken> Buf(NextTok);
1248 size_t ReadCount = Lexer.peekTokens(Buf);
1249 if (ReadCount && NextTok.is(AsmToken::Identifier) &&
1250 (NextTok.getString().equals_insensitive("equ") ||
1251 NextTok.getString().equals_insensitive("textequ"))) {
1252 // This looks like an EQU or TEXTEQU directive; don't expand the
1253 // identifier, allowing for redefinitions.
1254 break;
1255 }
1256 }
1257 if (expandMacros())
1258 break;
1259 }
1260
1261 // Parse comments here to be deferred until end of next statement.
1262 while (tok->is(AsmToken::Comment)) {
1263 if (MAI.preserveAsmComments())
1264 Out.addExplicitComment(Twine(tok->getString()));
1265 tok = &Lexer.Lex();
1266 }
1267
1268 // Recognize and bypass line continuations.
1269 while (tok->is(AsmToken::BackSlash) &&
1270 peekTok().is(AsmToken::EndOfStatement)) {
1271 // Eat both the backslash and the end of statement.
1272 Lexer.Lex();
1273 tok = &Lexer.Lex();
1274 }
1275
1276 if (tok->is(AsmToken::Eof)) {
1277 // If this is the end of an included file, pop the parent file off the
1278 // include stack.
1279 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1280 if (ParentIncludeLoc != SMLoc()) {
1281 EndStatementAtEOFStack.pop_back();
1282 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1283 return Lex();
1284 }
1285 EndStatementAtEOFStack.pop_back();
1286 assert(EndStatementAtEOFStack.empty());
1287 }
1288
1289 return *tok;
1290 }
1291
peekTok(bool ShouldSkipSpace)1292 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) {
1293 AsmToken Tok;
1294
1295 MutableArrayRef<AsmToken> Buf(Tok);
1296 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace);
1297
1298 if (ReadCount == 0) {
1299 // If this is the end of an included file, pop the parent file off the
1300 // include stack.
1301 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1302 if (ParentIncludeLoc != SMLoc()) {
1303 EndStatementAtEOFStack.pop_back();
1304 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1305 return peekTok(ShouldSkipSpace);
1306 }
1307 EndStatementAtEOFStack.pop_back();
1308 assert(EndStatementAtEOFStack.empty());
1309 }
1310
1311 assert(ReadCount == 1);
1312 return Tok;
1313 }
1314
enabledGenDwarfForAssembly()1315 bool MasmParser::enabledGenDwarfForAssembly() {
1316 // Check whether the user specified -g.
1317 if (!getContext().getGenDwarfForAssembly())
1318 return false;
1319 // If we haven't encountered any .file directives (which would imply that
1320 // the assembler source was produced with debug info already) then emit one
1321 // describing the assembler source file itself.
1322 if (getContext().getGenDwarfFileNumber() == 0) {
1323 // Use the first #line directive for this, if any. It's preprocessed, so
1324 // there is no checksum, and of course no source directive.
1325 if (!FirstCppHashFilename.empty())
1326 getContext().setMCLineTableRootFile(
1327 /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename,
1328 /*Cksum=*/std::nullopt, /*Source=*/std::nullopt);
1329 const MCDwarfFile &RootFile =
1330 getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile();
1331 getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective(
1332 /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name,
1333 RootFile.Checksum, RootFile.Source));
1334 }
1335 return true;
1336 }
1337
Run(bool NoInitialTextSection,bool NoFinalize)1338 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
1339 // Create the initial section, if requested.
1340 if (!NoInitialTextSection)
1341 Out.initSections(false, getTargetParser().getSTI());
1342
1343 // Prime the lexer.
1344 Lex();
1345
1346 HadError = false;
1347 AsmCond StartingCondState = TheCondState;
1348 SmallVector<AsmRewrite, 4> AsmStrRewrites;
1349
1350 // If we are generating dwarf for assembly source files save the initial text
1351 // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't
1352 // emitting any actual debug info yet and haven't had a chance to parse any
1353 // embedded .file directives.)
1354 if (getContext().getGenDwarfForAssembly()) {
1355 MCSection *Sec = getStreamer().getCurrentSectionOnly();
1356 if (!Sec->getBeginSymbol()) {
1357 MCSymbol *SectionStartSym = getContext().createTempSymbol();
1358 getStreamer().emitLabel(SectionStartSym);
1359 Sec->setBeginSymbol(SectionStartSym);
1360 }
1361 bool InsertResult = getContext().addGenDwarfSection(Sec);
1362 assert(InsertResult && ".text section should not have debug info yet");
1363 (void)InsertResult;
1364 }
1365
1366 getTargetParser().onBeginOfFile();
1367
1368 // While we have input, parse each statement.
1369 while (Lexer.isNot(AsmToken::Eof) ||
1370 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) {
1371 // Skip through the EOF at the end of an inclusion.
1372 if (Lexer.is(AsmToken::Eof))
1373 Lex();
1374
1375 ParseStatementInfo Info(&AsmStrRewrites);
1376 bool Parsed = parseStatement(Info, nullptr);
1377
1378 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
1379 // for printing ErrMsg via Lex() only if no (presumably better) parser error
1380 // exists.
1381 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
1382 Lex();
1383 }
1384
1385 // parseStatement returned true so may need to emit an error.
1386 printPendingErrors();
1387
1388 // Skipping to the next line if needed.
1389 if (Parsed && !getLexer().isAtStartOfStatement())
1390 eatToEndOfStatement();
1391 }
1392
1393 getTargetParser().onEndOfFile();
1394 printPendingErrors();
1395
1396 // All errors should have been emitted.
1397 assert(!hasPendingError() && "unexpected error from parseStatement");
1398
1399 getTargetParser().flushPendingInstructions(getStreamer());
1400
1401 if (TheCondState.TheCond != StartingCondState.TheCond ||
1402 TheCondState.Ignore != StartingCondState.Ignore)
1403 printError(getTok().getLoc(), "unmatched .ifs or .elses");
1404 // Check to see there are no empty DwarfFile slots.
1405 const auto &LineTables = getContext().getMCDwarfLineTables();
1406 if (!LineTables.empty()) {
1407 unsigned Index = 0;
1408 for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
1409 if (File.Name.empty() && Index != 0)
1410 printError(getTok().getLoc(), "unassigned file number: " +
1411 Twine(Index) +
1412 " for .file directives");
1413 ++Index;
1414 }
1415 }
1416
1417 // Check to see that all assembler local symbols were actually defined.
1418 // Targets that don't do subsections via symbols may not want this, though,
1419 // so conservatively exclude them. Only do this if we're finalizing, though,
1420 // as otherwise we won't necessarilly have seen everything yet.
1421 if (!NoFinalize) {
1422 if (MAI.hasSubsectionsViaSymbols()) {
1423 for (const auto &TableEntry : getContext().getSymbols()) {
1424 MCSymbol *Sym = TableEntry.getValue().Symbol;
1425 // Variable symbols may not be marked as defined, so check those
1426 // explicitly. If we know it's a variable, we have a definition for
1427 // the purposes of this check.
1428 if (Sym && Sym->isTemporary() && !Sym->isVariable() &&
1429 !Sym->isDefined())
1430 // FIXME: We would really like to refer back to where the symbol was
1431 // first referenced for a source location. We need to add something
1432 // to track that. Currently, we just point to the end of the file.
1433 printError(getTok().getLoc(), "assembler local symbol '" +
1434 Sym->getName() + "' not defined");
1435 }
1436 }
1437
1438 // Temporary symbols like the ones for directional jumps don't go in the
1439 // symbol table. They also need to be diagnosed in all (final) cases.
1440 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) {
1441 if (std::get<2>(LocSym)->isUndefined()) {
1442 // Reset the state of any "# line file" directives we've seen to the
1443 // context as it was at the diagnostic site.
1444 CppHashInfo = std::get<1>(LocSym);
1445 printError(std::get<0>(LocSym), "directional label undefined");
1446 }
1447 }
1448 }
1449
1450 // Finalize the output stream if there are no errors and if the client wants
1451 // us to.
1452 if (!HadError && !NoFinalize)
1453 Out.finish(Lexer.getLoc());
1454
1455 return HadError || getContext().hadError();
1456 }
1457
checkForValidSection()1458 bool MasmParser::checkForValidSection() {
1459 if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
1460 Out.initSections(false, getTargetParser().getSTI());
1461 return Error(getTok().getLoc(),
1462 "expected section directive before assembly directive");
1463 }
1464 return false;
1465 }
1466
1467 /// Throw away the rest of the line for testing purposes.
eatToEndOfStatement()1468 void MasmParser::eatToEndOfStatement() {
1469 while (Lexer.isNot(AsmToken::EndOfStatement)) {
1470 if (Lexer.is(AsmToken::Eof)) {
1471 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1472 if (ParentIncludeLoc == SMLoc()) {
1473 break;
1474 }
1475
1476 EndStatementAtEOFStack.pop_back();
1477 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1478 }
1479
1480 Lexer.Lex();
1481 }
1482
1483 // Eat EOL.
1484 if (Lexer.is(AsmToken::EndOfStatement))
1485 Lexer.Lex();
1486 }
1487
1488 SmallVector<StringRef, 1>
parseStringRefsTo(AsmToken::TokenKind EndTok)1489 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) {
1490 SmallVector<StringRef, 1> Refs;
1491 const char *Start = getTok().getLoc().getPointer();
1492 while (Lexer.isNot(EndTok)) {
1493 if (Lexer.is(AsmToken::Eof)) {
1494 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
1495 if (ParentIncludeLoc == SMLoc()) {
1496 break;
1497 }
1498 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1499
1500 EndStatementAtEOFStack.pop_back();
1501 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back());
1502 Lexer.Lex();
1503 Start = getTok().getLoc().getPointer();
1504 } else {
1505 Lexer.Lex();
1506 }
1507 }
1508 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start);
1509 return Refs;
1510 }
1511
parseStringTo(AsmToken::TokenKind EndTok)1512 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) {
1513 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok);
1514 std::string Str;
1515 for (StringRef S : Refs) {
1516 Str.append(S.str());
1517 }
1518 return Str;
1519 }
1520
parseStringToEndOfStatement()1521 StringRef MasmParser::parseStringToEndOfStatement() {
1522 const char *Start = getTok().getLoc().getPointer();
1523
1524 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
1525 Lexer.Lex();
1526
1527 const char *End = getTok().getLoc().getPointer();
1528 return StringRef(Start, End - Start);
1529 }
1530
1531 /// Parse a paren expression and return it.
1532 /// NOTE: This assumes the leading '(' has already been consumed.
1533 ///
1534 /// parenexpr ::= expr)
1535 ///
parseParenExpr(const MCExpr * & Res,SMLoc & EndLoc)1536 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1537 if (parseExpression(Res))
1538 return true;
1539 EndLoc = Lexer.getTok().getEndLoc();
1540 return parseRParen();
1541 }
1542
1543 /// Parse a bracket expression and return it.
1544 /// NOTE: This assumes the leading '[' has already been consumed.
1545 ///
1546 /// bracketexpr ::= expr]
1547 ///
parseBracketExpr(const MCExpr * & Res,SMLoc & EndLoc)1548 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
1549 if (parseExpression(Res))
1550 return true;
1551 EndLoc = getTok().getEndLoc();
1552 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression"))
1553 return true;
1554 return false;
1555 }
1556
1557 /// Parse a primary expression and return it.
1558 /// primaryexpr ::= (parenexpr
1559 /// primaryexpr ::= symbol
1560 /// primaryexpr ::= number
1561 /// primaryexpr ::= '.'
1562 /// primaryexpr ::= ~,+,-,'not' primaryexpr
1563 /// primaryexpr ::= string
1564 /// (a string is interpreted as a 64-bit number in big-endian base-256)
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc,AsmTypeInfo * TypeInfo)1565 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
1566 AsmTypeInfo *TypeInfo) {
1567 SMLoc FirstTokenLoc = getLexer().getLoc();
1568 AsmToken::TokenKind FirstTokenKind = Lexer.getKind();
1569 switch (FirstTokenKind) {
1570 default:
1571 return TokError("unknown token in expression");
1572 // If we have an error assume that we've already handled it.
1573 case AsmToken::Error:
1574 return true;
1575 case AsmToken::Exclaim:
1576 Lex(); // Eat the operator.
1577 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1578 return true;
1579 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
1580 return false;
1581 case AsmToken::Dollar:
1582 case AsmToken::At:
1583 case AsmToken::Identifier: {
1584 StringRef Identifier;
1585 if (parseIdentifier(Identifier)) {
1586 // We may have failed but $ may be a valid token.
1587 if (getTok().is(AsmToken::Dollar)) {
1588 if (Lexer.getMAI().getDollarIsPC()) {
1589 Lex();
1590 // This is a '$' reference, which references the current PC. Emit a
1591 // temporary label to the streamer and refer to it.
1592 MCSymbol *Sym = Ctx.createTempSymbol();
1593 Out.emitLabel(Sym);
1594 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None,
1595 getContext());
1596 EndLoc = FirstTokenLoc;
1597 return false;
1598 }
1599 return Error(FirstTokenLoc, "invalid token in expression");
1600 }
1601 }
1602 // Parse named bitwise negation.
1603 if (Identifier.equals_insensitive("not")) {
1604 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1605 return true;
1606 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1607 return false;
1608 }
1609 // Parse directional local label references.
1610 if (Identifier.equals_insensitive("@b") ||
1611 Identifier.equals_insensitive("@f")) {
1612 bool Before = Identifier.equals_insensitive("@b");
1613 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before);
1614 if (Before && Sym->isUndefined())
1615 return Error(FirstTokenLoc, "Expected @@ label before @B reference");
1616 Res = MCSymbolRefExpr::create(Sym, getContext());
1617 return false;
1618 }
1619 // Parse symbol variant.
1620 std::pair<StringRef, StringRef> Split;
1621 if (!MAI.useParensForSymbolVariant()) {
1622 Split = Identifier.split('@');
1623 } else if (Lexer.is(AsmToken::LParen)) {
1624 Lex(); // eat '('.
1625 StringRef VName;
1626 parseIdentifier(VName);
1627 // eat ')'.
1628 if (parseToken(AsmToken::RParen,
1629 "unexpected token in variant, expected ')'"))
1630 return true;
1631 Split = std::make_pair(Identifier, VName);
1632 }
1633
1634 EndLoc = SMLoc::getFromPointer(Identifier.end());
1635
1636 // This is a symbol reference.
1637 StringRef SymbolName = Identifier;
1638 if (SymbolName.empty())
1639 return Error(getLexer().getLoc(), "expected a symbol reference");
1640
1641 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1642
1643 // Look up the symbol variant if used.
1644 if (!Split.second.empty()) {
1645 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
1646 if (Variant != MCSymbolRefExpr::VK_Invalid) {
1647 SymbolName = Split.first;
1648 } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
1649 Variant = MCSymbolRefExpr::VK_None;
1650 } else {
1651 return Error(SMLoc::getFromPointer(Split.second.begin()),
1652 "invalid variant '" + Split.second + "'");
1653 }
1654 }
1655
1656 // Find the field offset if used.
1657 AsmFieldInfo Info;
1658 Split = SymbolName.split('.');
1659 if (Split.second.empty()) {
1660 } else {
1661 SymbolName = Split.first;
1662 if (lookUpField(SymbolName, Split.second, Info)) {
1663 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
1664 StringRef Base = BaseMember.first, Member = BaseMember.second;
1665 lookUpField(Base, Member, Info);
1666 } else if (Structs.count(SymbolName.lower())) {
1667 // This is actually a reference to a field offset.
1668 Res = MCConstantExpr::create(Info.Offset, getContext());
1669 return false;
1670 }
1671 }
1672
1673 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
1674 if (!Sym) {
1675 // If this is a built-in numeric value, treat it as a constant.
1676 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower());
1677 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end())
1678 ? BI_NO_SYMBOL
1679 : BuiltinIt->getValue();
1680 if (Symbol != BI_NO_SYMBOL) {
1681 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc);
1682 if (Value) {
1683 Res = Value;
1684 return false;
1685 }
1686 }
1687
1688 // Variables use case-insensitive symbol names; if this is a variable, we
1689 // find the symbol using its canonical name.
1690 auto VarIt = Variables.find(SymbolName.lower());
1691 if (VarIt != Variables.end())
1692 SymbolName = VarIt->second.Name;
1693 Sym = getContext().getOrCreateSymbol(SymbolName);
1694 }
1695
1696 // If this is an absolute variable reference, substitute it now to preserve
1697 // semantics in the face of reassignment.
1698 if (Sym->isVariable()) {
1699 auto V = Sym->getVariableValue(/*SetUsed=*/false);
1700 bool DoInline = isa<MCConstantExpr>(V) && !Variant;
1701 if (auto TV = dyn_cast<MCTargetExpr>(V))
1702 DoInline = TV->inlineAssignedExpr();
1703 if (DoInline) {
1704 if (Variant)
1705 return Error(EndLoc, "unexpected modifier on variable reference");
1706 Res = Sym->getVariableValue(/*SetUsed=*/false);
1707 return false;
1708 }
1709 }
1710
1711 // Otherwise create a symbol ref.
1712 const MCExpr *SymRef =
1713 MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
1714 if (Info.Offset) {
1715 Res = MCBinaryExpr::create(
1716 MCBinaryExpr::Add, SymRef,
1717 MCConstantExpr::create(Info.Offset, getContext()), getContext());
1718 } else {
1719 Res = SymRef;
1720 }
1721 if (TypeInfo) {
1722 if (Info.Type.Name.empty()) {
1723 auto TypeIt = KnownType.find(Identifier.lower());
1724 if (TypeIt != KnownType.end()) {
1725 Info.Type = TypeIt->second;
1726 }
1727 }
1728
1729 *TypeInfo = Info.Type;
1730 }
1731 return false;
1732 }
1733 case AsmToken::BigNum:
1734 return TokError("literal value out of range for directive");
1735 case AsmToken::Integer: {
1736 int64_t IntVal = getTok().getIntVal();
1737 Res = MCConstantExpr::create(IntVal, getContext());
1738 EndLoc = Lexer.getTok().getEndLoc();
1739 Lex(); // Eat token.
1740 return false;
1741 }
1742 case AsmToken::String: {
1743 // MASM strings (used as constants) are interpreted as big-endian base-256.
1744 SMLoc ValueLoc = getTok().getLoc();
1745 std::string Value;
1746 if (parseEscapedString(Value))
1747 return true;
1748 if (Value.size() > 8)
1749 return Error(ValueLoc, "literal value out of range");
1750 uint64_t IntValue = 0;
1751 for (const unsigned char CharVal : Value)
1752 IntValue = (IntValue << 8) | CharVal;
1753 Res = MCConstantExpr::create(IntValue, getContext());
1754 return false;
1755 }
1756 case AsmToken::Real: {
1757 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
1758 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
1759 Res = MCConstantExpr::create(IntVal, getContext());
1760 EndLoc = Lexer.getTok().getEndLoc();
1761 Lex(); // Eat token.
1762 return false;
1763 }
1764 case AsmToken::Dot: {
1765 // This is a '.' reference, which references the current PC. Emit a
1766 // temporary label to the streamer and refer to it.
1767 MCSymbol *Sym = Ctx.createTempSymbol();
1768 Out.emitLabel(Sym);
1769 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
1770 EndLoc = Lexer.getTok().getEndLoc();
1771 Lex(); // Eat identifier.
1772 return false;
1773 }
1774 case AsmToken::LParen:
1775 Lex(); // Eat the '('.
1776 return parseParenExpr(Res, EndLoc);
1777 case AsmToken::LBrac:
1778 if (!PlatformParser->HasBracketExpressions())
1779 return TokError("brackets expression not supported on this target");
1780 Lex(); // Eat the '['.
1781 return parseBracketExpr(Res, EndLoc);
1782 case AsmToken::Minus:
1783 Lex(); // Eat the operator.
1784 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1785 return true;
1786 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
1787 return false;
1788 case AsmToken::Plus:
1789 Lex(); // Eat the operator.
1790 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1791 return true;
1792 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
1793 return false;
1794 case AsmToken::Tilde:
1795 Lex(); // Eat the operator.
1796 if (parsePrimaryExpr(Res, EndLoc, nullptr))
1797 return true;
1798 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
1799 return false;
1800 // MIPS unary expression operators. The lexer won't generate these tokens if
1801 // MCAsmInfo::HasMipsExpressions is false for the target.
1802 case AsmToken::PercentCall16:
1803 case AsmToken::PercentCall_Hi:
1804 case AsmToken::PercentCall_Lo:
1805 case AsmToken::PercentDtprel_Hi:
1806 case AsmToken::PercentDtprel_Lo:
1807 case AsmToken::PercentGot:
1808 case AsmToken::PercentGot_Disp:
1809 case AsmToken::PercentGot_Hi:
1810 case AsmToken::PercentGot_Lo:
1811 case AsmToken::PercentGot_Ofst:
1812 case AsmToken::PercentGot_Page:
1813 case AsmToken::PercentGottprel:
1814 case AsmToken::PercentGp_Rel:
1815 case AsmToken::PercentHi:
1816 case AsmToken::PercentHigher:
1817 case AsmToken::PercentHighest:
1818 case AsmToken::PercentLo:
1819 case AsmToken::PercentNeg:
1820 case AsmToken::PercentPcrel_Hi:
1821 case AsmToken::PercentPcrel_Lo:
1822 case AsmToken::PercentTlsgd:
1823 case AsmToken::PercentTlsldm:
1824 case AsmToken::PercentTprel_Hi:
1825 case AsmToken::PercentTprel_Lo:
1826 Lex(); // Eat the operator.
1827 if (Lexer.isNot(AsmToken::LParen))
1828 return TokError("expected '(' after operator");
1829 Lex(); // Eat the operator.
1830 if (parseExpression(Res, EndLoc))
1831 return true;
1832 if (parseRParen())
1833 return true;
1834 Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
1835 return !Res;
1836 }
1837 }
1838
parseExpression(const MCExpr * & Res)1839 bool MasmParser::parseExpression(const MCExpr *&Res) {
1840 SMLoc EndLoc;
1841 return parseExpression(Res, EndLoc);
1842 }
1843
1844 /// This function checks if the next token is <string> type or arithmetic.
1845 /// string that begin with character '<' must end with character '>'.
1846 /// otherwise it is arithmetics.
1847 /// If the function returns a 'true' value,
1848 /// the End argument will be filled with the last location pointed to the '>'
1849 /// character.
isAngleBracketString(SMLoc & StrLoc,SMLoc & EndLoc)1850 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) {
1851 assert((StrLoc.getPointer() != nullptr) &&
1852 "Argument to the function cannot be a NULL value");
1853 const char *CharPtr = StrLoc.getPointer();
1854 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') &&
1855 (*CharPtr != '\0')) {
1856 if (*CharPtr == '!')
1857 CharPtr++;
1858 CharPtr++;
1859 }
1860 if (*CharPtr == '>') {
1861 EndLoc = StrLoc.getFromPointer(CharPtr + 1);
1862 return true;
1863 }
1864 return false;
1865 }
1866
1867 /// creating a string without the escape characters '!'.
angleBracketString(StringRef BracketContents)1868 static std::string angleBracketString(StringRef BracketContents) {
1869 std::string Res;
1870 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) {
1871 if (BracketContents[Pos] == '!')
1872 Pos++;
1873 Res += BracketContents[Pos];
1874 }
1875 return Res;
1876 }
1877
1878 /// Parse an expression and return it.
1879 ///
1880 /// expr ::= expr &&,|| expr -> lowest.
1881 /// expr ::= expr |,^,&,! expr
1882 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr
1883 /// expr ::= expr <<,>> expr
1884 /// expr ::= expr +,- expr
1885 /// expr ::= expr *,/,% expr -> highest.
1886 /// expr ::= primaryexpr
1887 ///
parseExpression(const MCExpr * & Res,SMLoc & EndLoc)1888 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1889 // Parse the expression.
1890 Res = nullptr;
1891 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) ||
1892 parseBinOpRHS(1, Res, EndLoc))
1893 return true;
1894
1895 // Try to constant fold it up front, if possible. Do not exploit
1896 // assembler here.
1897 int64_t Value;
1898 if (Res->evaluateAsAbsolute(Value))
1899 Res = MCConstantExpr::create(Value, getContext());
1900
1901 return false;
1902 }
1903
parseParenExpression(const MCExpr * & Res,SMLoc & EndLoc)1904 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
1905 Res = nullptr;
1906 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
1907 }
1908
parseParenExprOfDepth(unsigned ParenDepth,const MCExpr * & Res,SMLoc & EndLoc)1909 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
1910 SMLoc &EndLoc) {
1911 if (parseParenExpr(Res, EndLoc))
1912 return true;
1913
1914 for (; ParenDepth > 0; --ParenDepth) {
1915 if (parseBinOpRHS(1, Res, EndLoc))
1916 return true;
1917
1918 // We don't Lex() the last RParen.
1919 // This is the same behavior as parseParenExpression().
1920 if (ParenDepth - 1 > 0) {
1921 EndLoc = getTok().getEndLoc();
1922 if (parseRParen())
1923 return true;
1924 }
1925 }
1926 return false;
1927 }
1928
parseAbsoluteExpression(int64_t & Res)1929 bool MasmParser::parseAbsoluteExpression(int64_t &Res) {
1930 const MCExpr *Expr;
1931
1932 SMLoc StartLoc = Lexer.getLoc();
1933 if (parseExpression(Expr))
1934 return true;
1935
1936 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1937 return Error(StartLoc, "expected absolute expression");
1938
1939 return false;
1940 }
1941
getGNUBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind,bool ShouldUseLogicalShr,bool EndExpressionAtGreater)1942 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
1943 MCBinaryExpr::Opcode &Kind,
1944 bool ShouldUseLogicalShr,
1945 bool EndExpressionAtGreater) {
1946 switch (K) {
1947 default:
1948 return 0; // not a binop.
1949
1950 // Lowest Precedence: &&, ||
1951 case AsmToken::AmpAmp:
1952 Kind = MCBinaryExpr::LAnd;
1953 return 2;
1954 case AsmToken::PipePipe:
1955 Kind = MCBinaryExpr::LOr;
1956 return 1;
1957
1958 // Low Precedence: ==, !=, <>, <, <=, >, >=
1959 case AsmToken::EqualEqual:
1960 Kind = MCBinaryExpr::EQ;
1961 return 3;
1962 case AsmToken::ExclaimEqual:
1963 case AsmToken::LessGreater:
1964 Kind = MCBinaryExpr::NE;
1965 return 3;
1966 case AsmToken::Less:
1967 Kind = MCBinaryExpr::LT;
1968 return 3;
1969 case AsmToken::LessEqual:
1970 Kind = MCBinaryExpr::LTE;
1971 return 3;
1972 case AsmToken::Greater:
1973 if (EndExpressionAtGreater)
1974 return 0;
1975 Kind = MCBinaryExpr::GT;
1976 return 3;
1977 case AsmToken::GreaterEqual:
1978 Kind = MCBinaryExpr::GTE;
1979 return 3;
1980
1981 // Low Intermediate Precedence: +, -
1982 case AsmToken::Plus:
1983 Kind = MCBinaryExpr::Add;
1984 return 4;
1985 case AsmToken::Minus:
1986 Kind = MCBinaryExpr::Sub;
1987 return 4;
1988
1989 // High Intermediate Precedence: |, &, ^
1990 case AsmToken::Pipe:
1991 Kind = MCBinaryExpr::Or;
1992 return 5;
1993 case AsmToken::Caret:
1994 Kind = MCBinaryExpr::Xor;
1995 return 5;
1996 case AsmToken::Amp:
1997 Kind = MCBinaryExpr::And;
1998 return 5;
1999
2000 // Highest Precedence: *, /, %, <<, >>
2001 case AsmToken::Star:
2002 Kind = MCBinaryExpr::Mul;
2003 return 6;
2004 case AsmToken::Slash:
2005 Kind = MCBinaryExpr::Div;
2006 return 6;
2007 case AsmToken::Percent:
2008 Kind = MCBinaryExpr::Mod;
2009 return 6;
2010 case AsmToken::LessLess:
2011 Kind = MCBinaryExpr::Shl;
2012 return 6;
2013 case AsmToken::GreaterGreater:
2014 if (EndExpressionAtGreater)
2015 return 0;
2016 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
2017 return 6;
2018 }
2019 }
2020
getBinOpPrecedence(AsmToken::TokenKind K,MCBinaryExpr::Opcode & Kind)2021 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K,
2022 MCBinaryExpr::Opcode &Kind) {
2023 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
2024 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr,
2025 AngleBracketDepth > 0);
2026 }
2027
2028 /// Parse all binary operators with precedence >= 'Precedence'.
2029 /// Res contains the LHS of the expression on input.
parseBinOpRHS(unsigned Precedence,const MCExpr * & Res,SMLoc & EndLoc)2030 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
2031 SMLoc &EndLoc) {
2032 SMLoc StartLoc = Lexer.getLoc();
2033 while (true) {
2034 AsmToken::TokenKind TokKind = Lexer.getKind();
2035 if (Lexer.getKind() == AsmToken::Identifier) {
2036 TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString())
2037 .CaseLower("and", AsmToken::Amp)
2038 .CaseLower("not", AsmToken::Exclaim)
2039 .CaseLower("or", AsmToken::Pipe)
2040 .CaseLower("xor", AsmToken::Caret)
2041 .CaseLower("shl", AsmToken::LessLess)
2042 .CaseLower("shr", AsmToken::GreaterGreater)
2043 .CaseLower("eq", AsmToken::EqualEqual)
2044 .CaseLower("ne", AsmToken::ExclaimEqual)
2045 .CaseLower("lt", AsmToken::Less)
2046 .CaseLower("le", AsmToken::LessEqual)
2047 .CaseLower("gt", AsmToken::Greater)
2048 .CaseLower("ge", AsmToken::GreaterEqual)
2049 .Default(TokKind);
2050 }
2051 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
2052 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind);
2053
2054 // If the next token is lower precedence than we are allowed to eat, return
2055 // successfully with what we ate already.
2056 if (TokPrec < Precedence)
2057 return false;
2058
2059 Lex();
2060
2061 // Eat the next primary expression.
2062 const MCExpr *RHS;
2063 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc))
2064 return true;
2065
2066 // If BinOp binds less tightly with RHS than the operator after RHS, let
2067 // the pending operator take RHS as its LHS.
2068 MCBinaryExpr::Opcode Dummy;
2069 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
2070 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc))
2071 return true;
2072
2073 // Merge LHS and RHS according to operator.
2074 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
2075 }
2076 }
2077
2078 /// ParseStatement:
2079 /// ::= % statement
2080 /// ::= EndOfStatement
2081 /// ::= Label* Directive ...Operands... EndOfStatement
2082 /// ::= Label* Identifier OperandList* EndOfStatement
parseStatement(ParseStatementInfo & Info,MCAsmParserSemaCallback * SI)2083 bool MasmParser::parseStatement(ParseStatementInfo &Info,
2084 MCAsmParserSemaCallback *SI) {
2085 assert(!hasPendingError() && "parseStatement started with pending error");
2086 // Eat initial spaces and comments.
2087 while (Lexer.is(AsmToken::Space))
2088 Lex();
2089 if (Lexer.is(AsmToken::EndOfStatement)) {
2090 // If this is a line comment we can drop it safely.
2091 if (getTok().getString().empty() || getTok().getString().front() == '\r' ||
2092 getTok().getString().front() == '\n')
2093 Out.addBlankLine();
2094 Lex();
2095 return false;
2096 }
2097
2098 // If preceded by an expansion operator, first expand all text macros and
2099 // macro functions.
2100 if (getTok().is(AsmToken::Percent)) {
2101 SMLoc ExpansionLoc = getTok().getLoc();
2102 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc))
2103 return true;
2104 }
2105
2106 // Statements always start with an identifier, unless we're dealing with a
2107 // processor directive (.386, .686, etc.) that lexes as a real.
2108 AsmToken ID = getTok();
2109 SMLoc IDLoc = ID.getLoc();
2110 StringRef IDVal;
2111 if (Lexer.is(AsmToken::HashDirective))
2112 return parseCppHashLineFilenameComment(IDLoc);
2113 if (Lexer.is(AsmToken::Dot)) {
2114 // Treat '.' as a valid identifier in this context.
2115 Lex();
2116 IDVal = ".";
2117 } else if (Lexer.is(AsmToken::Real)) {
2118 // Treat ".<number>" as a valid identifier in this context.
2119 IDVal = getTok().getString();
2120 Lex(); // always eat a token
2121 if (!IDVal.starts_with("."))
2122 return Error(IDLoc, "unexpected token at start of statement");
2123 } else if (parseIdentifier(IDVal, StartOfStatement)) {
2124 if (!TheCondState.Ignore) {
2125 Lex(); // always eat a token
2126 return Error(IDLoc, "unexpected token at start of statement");
2127 }
2128 IDVal = "";
2129 }
2130
2131 // Handle conditional assembly here before checking for skipping. We
2132 // have to do this so that .endif isn't skipped in a ".if 0" block for
2133 // example.
2134 StringMap<DirectiveKind>::const_iterator DirKindIt =
2135 DirectiveKindMap.find(IDVal.lower());
2136 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
2137 ? DK_NO_DIRECTIVE
2138 : DirKindIt->getValue();
2139 switch (DirKind) {
2140 default:
2141 break;
2142 case DK_IF:
2143 case DK_IFE:
2144 return parseDirectiveIf(IDLoc, DirKind);
2145 case DK_IFB:
2146 return parseDirectiveIfb(IDLoc, true);
2147 case DK_IFNB:
2148 return parseDirectiveIfb(IDLoc, false);
2149 case DK_IFDEF:
2150 return parseDirectiveIfdef(IDLoc, true);
2151 case DK_IFNDEF:
2152 return parseDirectiveIfdef(IDLoc, false);
2153 case DK_IFDIF:
2154 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2155 /*CaseInsensitive=*/false);
2156 case DK_IFDIFI:
2157 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false,
2158 /*CaseInsensitive=*/true);
2159 case DK_IFIDN:
2160 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2161 /*CaseInsensitive=*/false);
2162 case DK_IFIDNI:
2163 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true,
2164 /*CaseInsensitive=*/true);
2165 case DK_ELSEIF:
2166 case DK_ELSEIFE:
2167 return parseDirectiveElseIf(IDLoc, DirKind);
2168 case DK_ELSEIFB:
2169 return parseDirectiveElseIfb(IDLoc, true);
2170 case DK_ELSEIFNB:
2171 return parseDirectiveElseIfb(IDLoc, false);
2172 case DK_ELSEIFDEF:
2173 return parseDirectiveElseIfdef(IDLoc, true);
2174 case DK_ELSEIFNDEF:
2175 return parseDirectiveElseIfdef(IDLoc, false);
2176 case DK_ELSEIFDIF:
2177 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2178 /*CaseInsensitive=*/false);
2179 case DK_ELSEIFDIFI:
2180 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false,
2181 /*CaseInsensitive=*/true);
2182 case DK_ELSEIFIDN:
2183 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2184 /*CaseInsensitive=*/false);
2185 case DK_ELSEIFIDNI:
2186 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true,
2187 /*CaseInsensitive=*/true);
2188 case DK_ELSE:
2189 return parseDirectiveElse(IDLoc);
2190 case DK_ENDIF:
2191 return parseDirectiveEndIf(IDLoc);
2192 }
2193
2194 // Ignore the statement if in the middle of inactive conditional
2195 // (e.g. ".if 0").
2196 if (TheCondState.Ignore) {
2197 eatToEndOfStatement();
2198 return false;
2199 }
2200
2201 // FIXME: Recurse on local labels?
2202
2203 // Check for a label.
2204 // ::= identifier ':'
2205 // ::= number ':'
2206 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) {
2207 if (checkForValidSection())
2208 return true;
2209
2210 // identifier ':' -> Label.
2211 Lex();
2212
2213 // Diagnose attempt to use '.' as a label.
2214 if (IDVal == ".")
2215 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label");
2216
2217 // Diagnose attempt to use a variable as a label.
2218 //
2219 // FIXME: Diagnostics. Note the location of the definition as a label.
2220 // FIXME: This doesn't diagnose assignment to a symbol which has been
2221 // implicitly marked as external.
2222 MCSymbol *Sym;
2223 if (ParsingMSInlineAsm && SI) {
2224 StringRef RewrittenLabel =
2225 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
2226 assert(!RewrittenLabel.empty() &&
2227 "We should have an internal name here.");
2228 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
2229 RewrittenLabel);
2230 IDVal = RewrittenLabel;
2231 }
2232 // Handle directional local labels
2233 if (IDVal == "@@") {
2234 Sym = Ctx.createDirectionalLocalSymbol(0);
2235 } else {
2236 Sym = getContext().getOrCreateSymbol(IDVal);
2237 }
2238
2239 // End of Labels should be treated as end of line for lexing
2240 // purposes but that information is not available to the Lexer who
2241 // does not understand Labels. This may cause us to see a Hash
2242 // here instead of a preprocessor line comment.
2243 if (getTok().is(AsmToken::Hash)) {
2244 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement);
2245 Lexer.Lex();
2246 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
2247 }
2248
2249 // Consume any end of statement token, if present, to avoid spurious
2250 // addBlankLine calls().
2251 if (getTok().is(AsmToken::EndOfStatement)) {
2252 Lex();
2253 }
2254
2255 getTargetParser().doBeforeLabelEmit(Sym, IDLoc);
2256
2257 // Emit the label.
2258 if (!getTargetParser().isParsingMSInlineAsm())
2259 Out.emitLabel(Sym, IDLoc);
2260
2261 // If we are generating dwarf for assembly source files then gather the
2262 // info to make a dwarf label entry for this label if needed.
2263 if (enabledGenDwarfForAssembly())
2264 MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
2265 IDLoc);
2266
2267 getTargetParser().onLabelParsed(Sym);
2268
2269 return false;
2270 }
2271
2272 // If macros are enabled, check to see if this is a macro instantiation.
2273 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) {
2274 return handleMacroEntry(M, IDLoc);
2275 }
2276
2277 // Otherwise, we have a normal instruction or directive.
2278
2279 if (DirKind != DK_NO_DIRECTIVE) {
2280 // There are several entities interested in parsing directives:
2281 //
2282 // 1. Asm parser extensions. For example, platform-specific parsers
2283 // (like the ELF parser) register themselves as extensions.
2284 // 2. The target-specific assembly parser. Some directives are target
2285 // specific or may potentially behave differently on certain targets.
2286 // 3. The generic directive parser implemented by this class. These are
2287 // all the directives that behave in a target and platform independent
2288 // manner, or at least have a default behavior that's shared between
2289 // all targets and platforms.
2290
2291 getTargetParser().flushPendingInstructions(getStreamer());
2292
2293 // Special-case handling of structure-end directives at higher priority,
2294 // since ENDS is overloaded as a segment-end directive.
2295 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 &&
2296 getTok().is(AsmToken::EndOfStatement)) {
2297 return parseDirectiveNestedEnds();
2298 }
2299
2300 // First, check the extension directive map to see if any extension has
2301 // registered itself to parse this directive.
2302 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2303 ExtensionDirectiveMap.lookup(IDVal.lower());
2304 if (Handler.first)
2305 return (*Handler.second)(Handler.first, IDVal, IDLoc);
2306
2307 // Next, let the target-specific assembly parser try.
2308 if (ID.isNot(AsmToken::Identifier))
2309 return false;
2310
2311 ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
2312 assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
2313 "Should only return Failure iff there was an error");
2314 if (TPDirectiveReturn.isFailure())
2315 return true;
2316 if (TPDirectiveReturn.isSuccess())
2317 return false;
2318
2319 // Finally, if no one else is interested in this directive, it must be
2320 // generic and familiar to this class.
2321 switch (DirKind) {
2322 default:
2323 break;
2324 case DK_ASCII:
2325 return parseDirectiveAscii(IDVal, false);
2326 case DK_ASCIZ:
2327 case DK_STRING:
2328 return parseDirectiveAscii(IDVal, true);
2329 case DK_BYTE:
2330 case DK_SBYTE:
2331 case DK_DB:
2332 return parseDirectiveValue(IDVal, 1);
2333 case DK_WORD:
2334 case DK_SWORD:
2335 case DK_DW:
2336 return parseDirectiveValue(IDVal, 2);
2337 case DK_DWORD:
2338 case DK_SDWORD:
2339 case DK_DD:
2340 return parseDirectiveValue(IDVal, 4);
2341 case DK_FWORD:
2342 case DK_DF:
2343 return parseDirectiveValue(IDVal, 6);
2344 case DK_QWORD:
2345 case DK_SQWORD:
2346 case DK_DQ:
2347 return parseDirectiveValue(IDVal, 8);
2348 case DK_REAL4:
2349 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4);
2350 case DK_REAL8:
2351 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8);
2352 case DK_REAL10:
2353 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10);
2354 case DK_STRUCT:
2355 case DK_UNION:
2356 return parseDirectiveNestedStruct(IDVal, DirKind);
2357 case DK_ENDS:
2358 return parseDirectiveNestedEnds();
2359 case DK_ALIGN:
2360 return parseDirectiveAlign();
2361 case DK_EVEN:
2362 return parseDirectiveEven();
2363 case DK_ORG:
2364 return parseDirectiveOrg();
2365 case DK_EXTERN:
2366 return parseDirectiveExtern();
2367 case DK_PUBLIC:
2368 return parseDirectiveSymbolAttribute(MCSA_Global);
2369 case DK_COMM:
2370 return parseDirectiveComm(/*IsLocal=*/false);
2371 case DK_COMMENT:
2372 return parseDirectiveComment(IDLoc);
2373 case DK_INCLUDE:
2374 return parseDirectiveInclude();
2375 case DK_REPEAT:
2376 return parseDirectiveRepeat(IDLoc, IDVal);
2377 case DK_WHILE:
2378 return parseDirectiveWhile(IDLoc);
2379 case DK_FOR:
2380 return parseDirectiveFor(IDLoc, IDVal);
2381 case DK_FORC:
2382 return parseDirectiveForc(IDLoc, IDVal);
2383 case DK_FILE:
2384 return parseDirectiveFile(IDLoc);
2385 case DK_LINE:
2386 return parseDirectiveLine();
2387 case DK_LOC:
2388 return parseDirectiveLoc();
2389 case DK_STABS:
2390 return parseDirectiveStabs();
2391 case DK_CV_FILE:
2392 return parseDirectiveCVFile();
2393 case DK_CV_FUNC_ID:
2394 return parseDirectiveCVFuncId();
2395 case DK_CV_INLINE_SITE_ID:
2396 return parseDirectiveCVInlineSiteId();
2397 case DK_CV_LOC:
2398 return parseDirectiveCVLoc();
2399 case DK_CV_LINETABLE:
2400 return parseDirectiveCVLinetable();
2401 case DK_CV_INLINE_LINETABLE:
2402 return parseDirectiveCVInlineLinetable();
2403 case DK_CV_DEF_RANGE:
2404 return parseDirectiveCVDefRange();
2405 case DK_CV_STRING:
2406 return parseDirectiveCVString();
2407 case DK_CV_STRINGTABLE:
2408 return parseDirectiveCVStringTable();
2409 case DK_CV_FILECHECKSUMS:
2410 return parseDirectiveCVFileChecksums();
2411 case DK_CV_FILECHECKSUM_OFFSET:
2412 return parseDirectiveCVFileChecksumOffset();
2413 case DK_CV_FPO_DATA:
2414 return parseDirectiveCVFPOData();
2415 case DK_CFI_SECTIONS:
2416 return parseDirectiveCFISections();
2417 case DK_CFI_STARTPROC:
2418 return parseDirectiveCFIStartProc();
2419 case DK_CFI_ENDPROC:
2420 return parseDirectiveCFIEndProc();
2421 case DK_CFI_DEF_CFA:
2422 return parseDirectiveCFIDefCfa(IDLoc);
2423 case DK_CFI_DEF_CFA_OFFSET:
2424 return parseDirectiveCFIDefCfaOffset(IDLoc);
2425 case DK_CFI_ADJUST_CFA_OFFSET:
2426 return parseDirectiveCFIAdjustCfaOffset(IDLoc);
2427 case DK_CFI_DEF_CFA_REGISTER:
2428 return parseDirectiveCFIDefCfaRegister(IDLoc);
2429 case DK_CFI_OFFSET:
2430 return parseDirectiveCFIOffset(IDLoc);
2431 case DK_CFI_REL_OFFSET:
2432 return parseDirectiveCFIRelOffset(IDLoc);
2433 case DK_CFI_PERSONALITY:
2434 return parseDirectiveCFIPersonalityOrLsda(true);
2435 case DK_CFI_LSDA:
2436 return parseDirectiveCFIPersonalityOrLsda(false);
2437 case DK_CFI_REMEMBER_STATE:
2438 return parseDirectiveCFIRememberState(IDLoc);
2439 case DK_CFI_RESTORE_STATE:
2440 return parseDirectiveCFIRestoreState(IDLoc);
2441 case DK_CFI_SAME_VALUE:
2442 return parseDirectiveCFISameValue(IDLoc);
2443 case DK_CFI_RESTORE:
2444 return parseDirectiveCFIRestore(IDLoc);
2445 case DK_CFI_ESCAPE:
2446 return parseDirectiveCFIEscape(IDLoc);
2447 case DK_CFI_RETURN_COLUMN:
2448 return parseDirectiveCFIReturnColumn(IDLoc);
2449 case DK_CFI_SIGNAL_FRAME:
2450 return parseDirectiveCFISignalFrame();
2451 case DK_CFI_UNDEFINED:
2452 return parseDirectiveCFIUndefined(IDLoc);
2453 case DK_CFI_REGISTER:
2454 return parseDirectiveCFIRegister(IDLoc);
2455 case DK_CFI_WINDOW_SAVE:
2456 return parseDirectiveCFIWindowSave(IDLoc);
2457 case DK_EXITM:
2458 Info.ExitValue = "";
2459 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
2460 case DK_ENDM:
2461 Info.ExitValue = "";
2462 return parseDirectiveEndMacro(IDVal);
2463 case DK_PURGE:
2464 return parseDirectivePurgeMacro(IDLoc);
2465 case DK_END:
2466 return parseDirectiveEnd(IDLoc);
2467 case DK_ERR:
2468 return parseDirectiveError(IDLoc);
2469 case DK_ERRB:
2470 return parseDirectiveErrorIfb(IDLoc, true);
2471 case DK_ERRNB:
2472 return parseDirectiveErrorIfb(IDLoc, false);
2473 case DK_ERRDEF:
2474 return parseDirectiveErrorIfdef(IDLoc, true);
2475 case DK_ERRNDEF:
2476 return parseDirectiveErrorIfdef(IDLoc, false);
2477 case DK_ERRDIF:
2478 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2479 /*CaseInsensitive=*/false);
2480 case DK_ERRDIFI:
2481 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false,
2482 /*CaseInsensitive=*/true);
2483 case DK_ERRIDN:
2484 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2485 /*CaseInsensitive=*/false);
2486 case DK_ERRIDNI:
2487 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true,
2488 /*CaseInsensitive=*/true);
2489 case DK_ERRE:
2490 return parseDirectiveErrorIfe(IDLoc, true);
2491 case DK_ERRNZ:
2492 return parseDirectiveErrorIfe(IDLoc, false);
2493 case DK_RADIX:
2494 return parseDirectiveRadix(IDLoc);
2495 case DK_ECHO:
2496 return parseDirectiveEcho(IDLoc);
2497 }
2498
2499 return Error(IDLoc, "unknown directive");
2500 }
2501
2502 // We also check if this is allocating memory with user-defined type.
2503 auto IDIt = Structs.find(IDVal.lower());
2504 if (IDIt != Structs.end())
2505 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal,
2506 IDLoc);
2507
2508 // Non-conditional Microsoft directives sometimes follow their first argument.
2509 const AsmToken nextTok = getTok();
2510 const StringRef nextVal = nextTok.getString();
2511 const SMLoc nextLoc = nextTok.getLoc();
2512
2513 const AsmToken afterNextTok = peekTok();
2514
2515 // There are several entities interested in parsing infix directives:
2516 //
2517 // 1. Asm parser extensions. For example, platform-specific parsers
2518 // (like the ELF parser) register themselves as extensions.
2519 // 2. The generic directive parser implemented by this class. These are
2520 // all the directives that behave in a target and platform independent
2521 // manner, or at least have a default behavior that's shared between
2522 // all targets and platforms.
2523
2524 getTargetParser().flushPendingInstructions(getStreamer());
2525
2526 // Special-case handling of structure-end directives at higher priority, since
2527 // ENDS is overloaded as a segment-end directive.
2528 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) {
2529 Lex();
2530 return parseDirectiveEnds(IDVal, IDLoc);
2531 }
2532
2533 // First, check the extension directive map to see if any extension has
2534 // registered itself to parse this directive.
2535 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler =
2536 ExtensionDirectiveMap.lookup(nextVal.lower());
2537 if (Handler.first) {
2538 Lex();
2539 Lexer.UnLex(ID);
2540 return (*Handler.second)(Handler.first, nextVal, nextLoc);
2541 }
2542
2543 // If no one else is interested in this directive, it must be
2544 // generic and familiar to this class.
2545 DirKindIt = DirectiveKindMap.find(nextVal.lower());
2546 DirKind = (DirKindIt == DirectiveKindMap.end())
2547 ? DK_NO_DIRECTIVE
2548 : DirKindIt->getValue();
2549 switch (DirKind) {
2550 default:
2551 break;
2552 case DK_ASSIGN:
2553 case DK_EQU:
2554 case DK_TEXTEQU:
2555 Lex();
2556 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc);
2557 case DK_BYTE:
2558 if (afterNextTok.is(AsmToken::Identifier) &&
2559 afterNextTok.getString().equals_insensitive("ptr")) {
2560 // Size directive; part of an instruction.
2561 break;
2562 }
2563 [[fallthrough]];
2564 case DK_SBYTE:
2565 case DK_DB:
2566 Lex();
2567 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
2568 case DK_WORD:
2569 if (afterNextTok.is(AsmToken::Identifier) &&
2570 afterNextTok.getString().equals_insensitive("ptr")) {
2571 // Size directive; part of an instruction.
2572 break;
2573 }
2574 [[fallthrough]];
2575 case DK_SWORD:
2576 case DK_DW:
2577 Lex();
2578 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
2579 case DK_DWORD:
2580 if (afterNextTok.is(AsmToken::Identifier) &&
2581 afterNextTok.getString().equals_insensitive("ptr")) {
2582 // Size directive; part of an instruction.
2583 break;
2584 }
2585 [[fallthrough]];
2586 case DK_SDWORD:
2587 case DK_DD:
2588 Lex();
2589 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
2590 case DK_FWORD:
2591 if (afterNextTok.is(AsmToken::Identifier) &&
2592 afterNextTok.getString().equals_insensitive("ptr")) {
2593 // Size directive; part of an instruction.
2594 break;
2595 }
2596 [[fallthrough]];
2597 case DK_DF:
2598 Lex();
2599 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
2600 case DK_QWORD:
2601 if (afterNextTok.is(AsmToken::Identifier) &&
2602 afterNextTok.getString().equals_insensitive("ptr")) {
2603 // Size directive; part of an instruction.
2604 break;
2605 }
2606 [[fallthrough]];
2607 case DK_SQWORD:
2608 case DK_DQ:
2609 Lex();
2610 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
2611 case DK_REAL4:
2612 Lex();
2613 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4,
2614 IDVal, IDLoc);
2615 case DK_REAL8:
2616 Lex();
2617 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8,
2618 IDVal, IDLoc);
2619 case DK_REAL10:
2620 Lex();
2621 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(),
2622 10, IDVal, IDLoc);
2623 case DK_STRUCT:
2624 case DK_UNION:
2625 Lex();
2626 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc);
2627 case DK_ENDS:
2628 Lex();
2629 return parseDirectiveEnds(IDVal, IDLoc);
2630 case DK_MACRO:
2631 Lex();
2632 return parseDirectiveMacro(IDVal, IDLoc);
2633 }
2634
2635 // Finally, we check if this is allocating a variable with user-defined type.
2636 auto NextIt = Structs.find(nextVal.lower());
2637 if (NextIt != Structs.end()) {
2638 Lex();
2639 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(),
2640 nextVal, nextLoc, IDVal);
2641 }
2642
2643 // __asm _emit or __asm __emit
2644 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" ||
2645 IDVal == "_EMIT" || IDVal == "__EMIT"))
2646 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size());
2647
2648 // __asm align
2649 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
2650 return parseDirectiveMSAlign(IDLoc, Info);
2651
2652 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
2653 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
2654 if (checkForValidSection())
2655 return true;
2656
2657 // Canonicalize the opcode to lower case.
2658 std::string OpcodeStr = IDVal.lower();
2659 ParseInstructionInfo IInfo(Info.AsmRewrites);
2660 bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
2661 Info.ParsedOperands);
2662 Info.ParseError = ParseHadError;
2663
2664 // Dump the parsed representation, if requested.
2665 if (getShowParsedOperands()) {
2666 SmallString<256> Str;
2667 raw_svector_ostream OS(Str);
2668 OS << "parsed instruction: [";
2669 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) {
2670 if (i != 0)
2671 OS << ", ";
2672 Info.ParsedOperands[i]->print(OS);
2673 }
2674 OS << "]";
2675
2676 printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
2677 }
2678
2679 // Fail even if ParseInstruction erroneously returns false.
2680 if (hasPendingError() || ParseHadError)
2681 return true;
2682
2683 // If we are generating dwarf for the current section then generate a .loc
2684 // directive for the instruction.
2685 if (!ParseHadError && enabledGenDwarfForAssembly() &&
2686 getContext().getGenDwarfSectionSyms().count(
2687 getStreamer().getCurrentSectionOnly())) {
2688 unsigned Line;
2689 if (ActiveMacros.empty())
2690 Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
2691 else
2692 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
2693 ActiveMacros.front()->ExitBuffer);
2694
2695 // If we previously parsed a cpp hash file line comment then make sure the
2696 // current Dwarf File is for the CppHashFilename if not then emit the
2697 // Dwarf File table for it and adjust the line number for the .loc.
2698 if (!CppHashInfo.Filename.empty()) {
2699 unsigned FileNumber = getStreamer().emitDwarfFileDirective(
2700 0, StringRef(), CppHashInfo.Filename);
2701 getContext().setGenDwarfFileNumber(FileNumber);
2702
2703 unsigned CppHashLocLineNo =
2704 SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf);
2705 Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo);
2706 }
2707
2708 getStreamer().emitDwarfLocDirective(
2709 getContext().getGenDwarfFileNumber(), Line, 0,
2710 DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0,
2711 StringRef());
2712 }
2713
2714 // If parsing succeeded, match the instruction.
2715 if (!ParseHadError) {
2716 uint64_t ErrorInfo;
2717 if (getTargetParser().MatchAndEmitInstruction(
2718 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
2719 getTargetParser().isParsingMSInlineAsm()))
2720 return true;
2721 }
2722 return false;
2723 }
2724
2725 // Parse and erase curly braces marking block start/end.
parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> & AsmStrRewrites)2726 bool MasmParser::parseCurlyBlockScope(
2727 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
2728 // Identify curly brace marking block start/end.
2729 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly))
2730 return false;
2731
2732 SMLoc StartLoc = Lexer.getLoc();
2733 Lex(); // Eat the brace.
2734 if (Lexer.is(AsmToken::EndOfStatement))
2735 Lex(); // Eat EndOfStatement following the brace.
2736
2737 // Erase the block start/end brace from the output asm string.
2738 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() -
2739 StartLoc.getPointer());
2740 return true;
2741 }
2742
2743 /// parseCppHashLineFilenameComment as this:
2744 /// ::= # number "filename"
parseCppHashLineFilenameComment(SMLoc L)2745 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) {
2746 Lex(); // Eat the hash token.
2747 // Lexer only ever emits HashDirective if it fully formed if it's
2748 // done the checking already so this is an internal error.
2749 assert(getTok().is(AsmToken::Integer) &&
2750 "Lexing Cpp line comment: Expected Integer");
2751 int64_t LineNumber = getTok().getIntVal();
2752 Lex();
2753 assert(getTok().is(AsmToken::String) &&
2754 "Lexing Cpp line comment: Expected String");
2755 StringRef Filename = getTok().getString();
2756 Lex();
2757
2758 // Get rid of the enclosing quotes.
2759 Filename = Filename.substr(1, Filename.size() - 2);
2760
2761 // Save the SMLoc, Filename and LineNumber for later use by diagnostics
2762 // and possibly DWARF file info.
2763 CppHashInfo.Loc = L;
2764 CppHashInfo.Filename = Filename;
2765 CppHashInfo.LineNumber = LineNumber;
2766 CppHashInfo.Buf = CurBuffer;
2767 if (FirstCppHashFilename.empty())
2768 FirstCppHashFilename = Filename;
2769 return false;
2770 }
2771
2772 /// will use the last parsed cpp hash line filename comment
2773 /// for the Filename and LineNo if any in the diagnostic.
DiagHandler(const SMDiagnostic & Diag,void * Context)2774 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
2775 const MasmParser *Parser = static_cast<const MasmParser *>(Context);
2776 raw_ostream &OS = errs();
2777
2778 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
2779 SMLoc DiagLoc = Diag.getLoc();
2780 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2781 unsigned CppHashBuf =
2782 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc);
2783
2784 // Like SourceMgr::printMessage() we need to print the include stack if any
2785 // before printing the message.
2786 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
2787 if (!Parser->SavedDiagHandler && DiagCurBuffer &&
2788 DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
2789 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
2790 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
2791 }
2792
2793 // If we have not parsed a cpp hash line filename comment or the source
2794 // manager changed or buffer changed (like in a nested include) then just
2795 // print the normal diagnostic using its Filename and LineNo.
2796 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr ||
2797 DiagBuf != CppHashBuf) {
2798 if (Parser->SavedDiagHandler)
2799 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
2800 else
2801 Diag.print(nullptr, OS);
2802 return;
2803 }
2804
2805 // Use the CppHashFilename and calculate a line number based on the
2806 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc
2807 // for the diagnostic.
2808 const std::string &Filename = std::string(Parser->CppHashInfo.Filename);
2809
2810 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf);
2811 int CppHashLocLineNo =
2812 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf);
2813 int LineNo =
2814 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo);
2815
2816 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo,
2817 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(),
2818 Diag.getLineContents(), Diag.getRanges());
2819
2820 if (Parser->SavedDiagHandler)
2821 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
2822 else
2823 NewDiag.print(nullptr, OS);
2824 }
2825
2826 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does
2827 // not accept '.'.
isMacroParameterChar(char C)2828 static bool isMacroParameterChar(char C) {
2829 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?';
2830 }
2831
expandMacro(raw_svector_ostream & OS,StringRef Body,ArrayRef<MCAsmMacroParameter> Parameters,ArrayRef<MCAsmMacroArgument> A,const std::vector<std::string> & Locals,SMLoc L)2832 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
2833 ArrayRef<MCAsmMacroParameter> Parameters,
2834 ArrayRef<MCAsmMacroArgument> A,
2835 const std::vector<std::string> &Locals, SMLoc L) {
2836 unsigned NParameters = Parameters.size();
2837 if (NParameters != A.size())
2838 return Error(L, "Wrong number of arguments");
2839 StringMap<std::string> LocalSymbols;
2840 std::string Name;
2841 Name.reserve(6);
2842 for (StringRef Local : Locals) {
2843 raw_string_ostream LocalName(Name);
2844 LocalName << "??"
2845 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true);
2846 LocalSymbols.insert({Local, Name});
2847 Name.clear();
2848 }
2849
2850 std::optional<char> CurrentQuote;
2851 while (!Body.empty()) {
2852 // Scan for the next substitution.
2853 std::size_t End = Body.size(), Pos = 0;
2854 std::size_t IdentifierPos = End;
2855 for (; Pos != End; ++Pos) {
2856 // Find the next possible macro parameter, including preceding a '&'
2857 // inside quotes.
2858 if (Body[Pos] == '&')
2859 break;
2860 if (isMacroParameterChar(Body[Pos])) {
2861 if (!CurrentQuote)
2862 break;
2863 if (IdentifierPos == End)
2864 IdentifierPos = Pos;
2865 } else {
2866 IdentifierPos = End;
2867 }
2868
2869 // Track quotation status
2870 if (!CurrentQuote) {
2871 if (Body[Pos] == '\'' || Body[Pos] == '"')
2872 CurrentQuote = Body[Pos];
2873 } else if (Body[Pos] == CurrentQuote) {
2874 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) {
2875 // Escaped quote, and quotes aren't identifier chars; skip
2876 ++Pos;
2877 continue;
2878 } else {
2879 CurrentQuote.reset();
2880 }
2881 }
2882 }
2883 if (IdentifierPos != End) {
2884 // We've recognized an identifier before an apostrophe inside quotes;
2885 // check once to see if we can expand it.
2886 Pos = IdentifierPos;
2887 IdentifierPos = End;
2888 }
2889
2890 // Add the prefix.
2891 OS << Body.slice(0, Pos);
2892
2893 // Check if we reached the end.
2894 if (Pos == End)
2895 break;
2896
2897 unsigned I = Pos;
2898 bool InitialAmpersand = (Body[I] == '&');
2899 if (InitialAmpersand) {
2900 ++I;
2901 ++Pos;
2902 }
2903 while (I < End && isMacroParameterChar(Body[I]))
2904 ++I;
2905
2906 const char *Begin = Body.data() + Pos;
2907 StringRef Argument(Begin, I - Pos);
2908 const std::string ArgumentLower = Argument.lower();
2909 unsigned Index = 0;
2910
2911 for (; Index < NParameters; ++Index)
2912 if (Parameters[Index].Name.equals_insensitive(ArgumentLower))
2913 break;
2914
2915 if (Index == NParameters) {
2916 if (InitialAmpersand)
2917 OS << '&';
2918 auto it = LocalSymbols.find(ArgumentLower);
2919 if (it != LocalSymbols.end())
2920 OS << it->second;
2921 else
2922 OS << Argument;
2923 Pos = I;
2924 } else {
2925 for (const AsmToken &Token : A[Index]) {
2926 // In MASM, you can write '%expr'.
2927 // The prefix '%' evaluates the expression 'expr'
2928 // and uses the result as a string (e.g. replace %(1+2) with the
2929 // string "3").
2930 // Here, we identify the integer token which is the result of the
2931 // absolute expression evaluation and replace it with its string
2932 // representation.
2933 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer))
2934 // Emit an integer value to the buffer.
2935 OS << Token.getIntVal();
2936 else
2937 OS << Token.getString();
2938 }
2939
2940 Pos += Argument.size();
2941 if (Pos < End && Body[Pos] == '&') {
2942 ++Pos;
2943 }
2944 }
2945 // Update the scan point.
2946 Body = Body.substr(Pos);
2947 }
2948
2949 return false;
2950 }
2951
isOperator(AsmToken::TokenKind kind)2952 static bool isOperator(AsmToken::TokenKind kind) {
2953 switch (kind) {
2954 default:
2955 return false;
2956 case AsmToken::Plus:
2957 case AsmToken::Minus:
2958 case AsmToken::Tilde:
2959 case AsmToken::Slash:
2960 case AsmToken::Star:
2961 case AsmToken::Dot:
2962 case AsmToken::Equal:
2963 case AsmToken::EqualEqual:
2964 case AsmToken::Pipe:
2965 case AsmToken::PipePipe:
2966 case AsmToken::Caret:
2967 case AsmToken::Amp:
2968 case AsmToken::AmpAmp:
2969 case AsmToken::Exclaim:
2970 case AsmToken::ExclaimEqual:
2971 case AsmToken::Less:
2972 case AsmToken::LessEqual:
2973 case AsmToken::LessLess:
2974 case AsmToken::LessGreater:
2975 case AsmToken::Greater:
2976 case AsmToken::GreaterEqual:
2977 case AsmToken::GreaterGreater:
2978 return true;
2979 }
2980 }
2981
2982 namespace {
2983
2984 class AsmLexerSkipSpaceRAII {
2985 public:
AsmLexerSkipSpaceRAII(AsmLexer & Lexer,bool SkipSpace)2986 AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
2987 Lexer.setSkipSpace(SkipSpace);
2988 }
2989
~AsmLexerSkipSpaceRAII()2990 ~AsmLexerSkipSpaceRAII() {
2991 Lexer.setSkipSpace(true);
2992 }
2993
2994 private:
2995 AsmLexer &Lexer;
2996 };
2997
2998 } // end anonymous namespace
2999
parseMacroArgument(const MCAsmMacroParameter * MP,MCAsmMacroArgument & MA,AsmToken::TokenKind EndTok)3000 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP,
3001 MCAsmMacroArgument &MA,
3002 AsmToken::TokenKind EndTok) {
3003 if (MP && MP->Vararg) {
3004 if (Lexer.isNot(EndTok)) {
3005 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok);
3006 for (StringRef S : Str) {
3007 MA.emplace_back(AsmToken::String, S);
3008 }
3009 }
3010 return false;
3011 }
3012
3013 SMLoc StrLoc = Lexer.getLoc(), EndLoc;
3014 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) {
3015 const char *StrChar = StrLoc.getPointer() + 1;
3016 const char *EndChar = EndLoc.getPointer() - 1;
3017 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3018 /// Eat from '<' to '>'.
3019 Lex();
3020 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar));
3021 return false;
3022 }
3023
3024 unsigned ParenLevel = 0;
3025
3026 // Darwin doesn't use spaces to delmit arguments.
3027 AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin);
3028
3029 bool SpaceEaten;
3030
3031 while (true) {
3032 SpaceEaten = false;
3033 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
3034 return TokError("unexpected token");
3035
3036 if (ParenLevel == 0) {
3037 if (Lexer.is(AsmToken::Comma))
3038 break;
3039
3040 if (Lexer.is(AsmToken::Space)) {
3041 SpaceEaten = true;
3042 Lex(); // Eat spaces.
3043 }
3044
3045 // Spaces can delimit parameters, but could also be part an expression.
3046 // If the token after a space is an operator, add the token and the next
3047 // one into this argument
3048 if (!IsDarwin) {
3049 if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) {
3050 MA.push_back(getTok());
3051 Lex();
3052
3053 // Whitespace after an operator can be ignored.
3054 if (Lexer.is(AsmToken::Space))
3055 Lex();
3056
3057 continue;
3058 }
3059 }
3060 if (SpaceEaten)
3061 break;
3062 }
3063
3064 // handleMacroEntry relies on not advancing the lexer here
3065 // to be able to fill in the remaining default parameter values
3066 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0))
3067 break;
3068
3069 // Adjust the current parentheses level.
3070 if (Lexer.is(AsmToken::LParen))
3071 ++ParenLevel;
3072 else if (Lexer.is(AsmToken::RParen) && ParenLevel)
3073 --ParenLevel;
3074
3075 // Append the token to the current argument list.
3076 MA.push_back(getTok());
3077 Lex();
3078 }
3079
3080 if (ParenLevel != 0)
3081 return TokError("unbalanced parentheses in argument");
3082
3083 if (MA.empty() && MP) {
3084 if (MP->Required) {
3085 return TokError("missing value for required parameter '" + MP->Name +
3086 "'");
3087 } else {
3088 MA = MP->Value;
3089 }
3090 }
3091 return false;
3092 }
3093
3094 // Parse the macro instantiation arguments.
parseMacroArguments(const MCAsmMacro * M,MCAsmMacroArguments & A,AsmToken::TokenKind EndTok)3095 bool MasmParser::parseMacroArguments(const MCAsmMacro *M,
3096 MCAsmMacroArguments &A,
3097 AsmToken::TokenKind EndTok) {
3098 const unsigned NParameters = M ? M->Parameters.size() : 0;
3099 bool NamedParametersFound = false;
3100 SmallVector<SMLoc, 4> FALocs;
3101
3102 A.resize(NParameters);
3103 FALocs.resize(NParameters);
3104
3105 // Parse two kinds of macro invocations:
3106 // - macros defined without any parameters accept an arbitrary number of them
3107 // - macros defined with parameters accept at most that many of them
3108 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
3109 ++Parameter) {
3110 SMLoc IDLoc = Lexer.getLoc();
3111 MCAsmMacroParameter FA;
3112
3113 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) {
3114 if (parseIdentifier(FA.Name))
3115 return Error(IDLoc, "invalid argument identifier for formal argument");
3116
3117 if (Lexer.isNot(AsmToken::Equal))
3118 return TokError("expected '=' after formal parameter identifier");
3119
3120 Lex();
3121
3122 NamedParametersFound = true;
3123 }
3124
3125 if (NamedParametersFound && FA.Name.empty())
3126 return Error(IDLoc, "cannot mix positional and keyword arguments");
3127
3128 unsigned PI = Parameter;
3129 if (!FA.Name.empty()) {
3130 assert(M && "expected macro to be defined");
3131 unsigned FAI = 0;
3132 for (FAI = 0; FAI < NParameters; ++FAI)
3133 if (M->Parameters[FAI].Name == FA.Name)
3134 break;
3135
3136 if (FAI >= NParameters) {
3137 return Error(IDLoc, "parameter named '" + FA.Name +
3138 "' does not exist for macro '" + M->Name + "'");
3139 }
3140 PI = FAI;
3141 }
3142 const MCAsmMacroParameter *MP = nullptr;
3143 if (M && PI < NParameters)
3144 MP = &M->Parameters[PI];
3145
3146 SMLoc StrLoc = Lexer.getLoc();
3147 SMLoc EndLoc;
3148 if (Lexer.is(AsmToken::Percent)) {
3149 const MCExpr *AbsoluteExp;
3150 int64_t Value;
3151 /// Eat '%'.
3152 Lex();
3153 if (parseExpression(AbsoluteExp, EndLoc))
3154 return false;
3155 if (!AbsoluteExp->evaluateAsAbsolute(Value,
3156 getStreamer().getAssemblerPtr()))
3157 return Error(StrLoc, "expected absolute expression");
3158 const char *StrChar = StrLoc.getPointer();
3159 const char *EndChar = EndLoc.getPointer();
3160 AsmToken newToken(AsmToken::Integer,
3161 StringRef(StrChar, EndChar - StrChar), Value);
3162 FA.Value.push_back(newToken);
3163 } else if (parseMacroArgument(MP, FA.Value, EndTok)) {
3164 if (M)
3165 return addErrorSuffix(" in '" + M->Name + "' macro");
3166 else
3167 return true;
3168 }
3169
3170 if (!FA.Value.empty()) {
3171 if (A.size() <= PI)
3172 A.resize(PI + 1);
3173 A[PI] = FA.Value;
3174
3175 if (FALocs.size() <= PI)
3176 FALocs.resize(PI + 1);
3177
3178 FALocs[PI] = Lexer.getLoc();
3179 }
3180
3181 // At the end of the statement, fill in remaining arguments that have
3182 // default values. If there aren't any, then the next argument is
3183 // required but missing
3184 if (Lexer.is(EndTok)) {
3185 bool Failure = false;
3186 for (unsigned FAI = 0; FAI < NParameters; ++FAI) {
3187 if (A[FAI].empty()) {
3188 if (M->Parameters[FAI].Required) {
3189 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(),
3190 "missing value for required parameter "
3191 "'" +
3192 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'");
3193 Failure = true;
3194 }
3195
3196 if (!M->Parameters[FAI].Value.empty())
3197 A[FAI] = M->Parameters[FAI].Value;
3198 }
3199 }
3200 return Failure;
3201 }
3202
3203 if (Lexer.is(AsmToken::Comma))
3204 Lex();
3205 }
3206
3207 return TokError("too many positional arguments");
3208 }
3209
handleMacroEntry(const MCAsmMacro * M,SMLoc NameLoc,AsmToken::TokenKind ArgumentEndTok)3210 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc,
3211 AsmToken::TokenKind ArgumentEndTok) {
3212 // Arbitrarily limit macro nesting depth (default matches 'as'). We can
3213 // eliminate this, although we should protect against infinite loops.
3214 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
3215 if (ActiveMacros.size() == MaxNestingDepth) {
3216 std::ostringstream MaxNestingDepthError;
3217 MaxNestingDepthError << "macros cannot be nested more than "
3218 << MaxNestingDepth << " levels deep."
3219 << " Use -asm-macro-max-nesting-depth to increase "
3220 "this limit.";
3221 return TokError(MaxNestingDepthError.str());
3222 }
3223
3224 MCAsmMacroArguments A;
3225 if (parseMacroArguments(M, A, ArgumentEndTok))
3226 return true;
3227
3228 // Macro instantiation is lexical, unfortunately. We construct a new buffer
3229 // to hold the macro body with substitutions.
3230 SmallString<256> Buf;
3231 StringRef Body = M->Body;
3232 raw_svector_ostream OS(Buf);
3233
3234 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc()))
3235 return true;
3236
3237 // We include the endm in the buffer as our cue to exit the macro
3238 // instantiation.
3239 OS << "endm\n";
3240
3241 std::unique_ptr<MemoryBuffer> Instantiation =
3242 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
3243
3244 // Create the macro instantiation object and add to the current macro
3245 // instantiation stack.
3246 MacroInstantiation *MI = new MacroInstantiation{
3247 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()};
3248 ActiveMacros.push_back(MI);
3249
3250 ++NumOfMacroInstantiations;
3251
3252 // Jump to the macro instantiation and prime the lexer.
3253 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
3254 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
3255 EndStatementAtEOFStack.push_back(true);
3256 Lex();
3257
3258 return false;
3259 }
3260
handleMacroExit()3261 void MasmParser::handleMacroExit() {
3262 // Jump to the token we should return to, and consume it.
3263 EndStatementAtEOFStack.pop_back();
3264 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer,
3265 EndStatementAtEOFStack.back());
3266 Lex();
3267
3268 // Pop the instantiation entry.
3269 delete ActiveMacros.back();
3270 ActiveMacros.pop_back();
3271 }
3272
handleMacroInvocation(const MCAsmMacro * M,SMLoc NameLoc)3273 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
3274 if (!M->IsFunction)
3275 return Error(NameLoc, "cannot invoke macro procedure as function");
3276
3277 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name +
3278 "' requires arguments in parentheses") ||
3279 handleMacroEntry(M, NameLoc, AsmToken::RParen))
3280 return true;
3281
3282 // Parse all statements in the macro, retrieving the exit value when it ends.
3283 std::string ExitValue;
3284 SmallVector<AsmRewrite, 4> AsmStrRewrites;
3285 while (Lexer.isNot(AsmToken::Eof)) {
3286 ParseStatementInfo Info(&AsmStrRewrites);
3287 bool Parsed = parseStatement(Info, nullptr);
3288
3289 if (!Parsed && Info.ExitValue) {
3290 ExitValue = std::move(*Info.ExitValue);
3291 break;
3292 }
3293
3294 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
3295 // for printing ErrMsg via Lex() only if no (presumably better) parser error
3296 // exists.
3297 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
3298 Lex();
3299 }
3300
3301 // parseStatement returned true so may need to emit an error.
3302 printPendingErrors();
3303
3304 // Skipping to the next line if needed.
3305 if (Parsed && !getLexer().isAtStartOfStatement())
3306 eatToEndOfStatement();
3307 }
3308
3309 // Consume the right-parenthesis on the other side of the arguments.
3310 if (parseRParen())
3311 return true;
3312
3313 // Exit values may require lexing, unfortunately. We construct a new buffer to
3314 // hold the exit value.
3315 std::unique_ptr<MemoryBuffer> MacroValue =
3316 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>");
3317
3318 // Jump from this location to the instantiated exit value, and prime the
3319 // lexer.
3320 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc());
3321 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr,
3322 /*EndStatementAtEOF=*/false);
3323 EndStatementAtEOFStack.push_back(false);
3324 Lex();
3325
3326 return false;
3327 }
3328
3329 /// parseIdentifier:
3330 /// ::= identifier
3331 /// ::= string
parseIdentifier(StringRef & Res,IdentifierPositionKind Position)3332 bool MasmParser::parseIdentifier(StringRef &Res,
3333 IdentifierPositionKind Position) {
3334 // The assembler has relaxed rules for accepting identifiers, in particular we
3335 // allow things like '.globl $foo' and '.def @feat.00', which would normally
3336 // be separate tokens. At this level, we have already lexed so we cannot
3337 // (currently) handle this as a context dependent token, instead we detect
3338 // adjacent tokens and return the combined identifier.
3339 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) {
3340 SMLoc PrefixLoc = getLexer().getLoc();
3341
3342 // Consume the prefix character, and check for a following identifier.
3343
3344 AsmToken nextTok = peekTok(false);
3345
3346 if (nextTok.isNot(AsmToken::Identifier))
3347 return true;
3348
3349 // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
3350 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer())
3351 return true;
3352
3353 // eat $ or @
3354 Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
3355 // Construct the joined identifier and consume the token.
3356 Res =
3357 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
3358 Lex(); // Parser Lex to maintain invariants.
3359 return false;
3360 }
3361
3362 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String))
3363 return true;
3364
3365 Res = getTok().getIdentifier();
3366
3367 // Consume the identifier token - but if parsing certain directives, avoid
3368 // lexical expansion of the next token.
3369 ExpandKind ExpandNextToken = ExpandMacros;
3370 if (Position == StartOfStatement &&
3371 StringSwitch<bool>(Res)
3372 .CaseLower("echo", true)
3373 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true)
3374 .Default(false)) {
3375 ExpandNextToken = DoNotExpandMacros;
3376 }
3377 Lex(ExpandNextToken);
3378
3379 return false;
3380 }
3381
3382 /// parseDirectiveEquate:
3383 /// ::= name "=" expression
3384 /// | name "equ" expression (not redefinable)
3385 /// | name "equ" text-list
3386 /// | name "textequ" text-list (redefinability unspecified)
parseDirectiveEquate(StringRef IDVal,StringRef Name,DirectiveKind DirKind,SMLoc NameLoc)3387 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name,
3388 DirectiveKind DirKind, SMLoc NameLoc) {
3389 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower());
3390 if (BuiltinIt != BuiltinSymbolMap.end())
3391 return Error(NameLoc, "cannot redefine a built-in symbol");
3392
3393 Variable &Var = Variables[Name.lower()];
3394 if (Var.Name.empty()) {
3395 Var.Name = Name;
3396 }
3397
3398 SMLoc StartLoc = Lexer.getLoc();
3399 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) {
3400 // "equ" and "textequ" both allow text expressions.
3401 std::string Value;
3402 std::string TextItem;
3403 if (!parseTextItem(TextItem)) {
3404 Value += TextItem;
3405
3406 // Accept a text-list, not just one text-item.
3407 auto parseItem = [&]() -> bool {
3408 if (parseTextItem(TextItem))
3409 return TokError("expected text item");
3410 Value += TextItem;
3411 return false;
3412 };
3413 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem))
3414 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3415
3416 if (!Var.IsText || Var.TextValue != Value) {
3417 switch (Var.Redefinable) {
3418 case Variable::NOT_REDEFINABLE:
3419 return Error(getTok().getLoc(), "invalid variable redefinition");
3420 case Variable::WARN_ON_REDEFINITION:
3421 if (Warning(NameLoc, "redefining '" + Name +
3422 "', already defined on the command line")) {
3423 return true;
3424 }
3425 break;
3426 default:
3427 break;
3428 }
3429 }
3430 Var.IsText = true;
3431 Var.TextValue = Value;
3432 Var.Redefinable = Variable::REDEFINABLE;
3433
3434 return false;
3435 }
3436 }
3437 if (DirKind == DK_TEXTEQU)
3438 return TokError("expected <text> in '" + Twine(IDVal) + "' directive");
3439
3440 // Parse as expression assignment.
3441 const MCExpr *Expr;
3442 SMLoc EndLoc;
3443 if (parseExpression(Expr, EndLoc))
3444 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3445 StringRef ExprAsString = StringRef(
3446 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer());
3447
3448 int64_t Value;
3449 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) {
3450 if (DirKind == DK_ASSIGN)
3451 return Error(
3452 StartLoc,
3453 "expected absolute expression; not all symbols have known values",
3454 {StartLoc, EndLoc});
3455
3456 // Not an absolute expression; define as a text replacement.
3457 if (!Var.IsText || Var.TextValue != ExprAsString) {
3458 switch (Var.Redefinable) {
3459 case Variable::NOT_REDEFINABLE:
3460 return Error(getTok().getLoc(), "invalid variable redefinition");
3461 case Variable::WARN_ON_REDEFINITION:
3462 if (Warning(NameLoc, "redefining '" + Name +
3463 "', already defined on the command line")) {
3464 return true;
3465 }
3466 break;
3467 default:
3468 break;
3469 }
3470 }
3471
3472 Var.IsText = true;
3473 Var.TextValue = ExprAsString.str();
3474 Var.Redefinable = Variable::REDEFINABLE;
3475
3476 return false;
3477 }
3478
3479 MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name);
3480
3481 const MCConstantExpr *PrevValue =
3482 Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>(
3483 Sym->getVariableValue(/*SetUsed=*/false))
3484 : nullptr;
3485 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) {
3486 switch (Var.Redefinable) {
3487 case Variable::NOT_REDEFINABLE:
3488 return Error(getTok().getLoc(), "invalid variable redefinition");
3489 case Variable::WARN_ON_REDEFINITION:
3490 if (Warning(NameLoc, "redefining '" + Name +
3491 "', already defined on the command line")) {
3492 return true;
3493 }
3494 break;
3495 default:
3496 break;
3497 }
3498 }
3499
3500 Var.IsText = false;
3501 Var.TextValue.clear();
3502 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE
3503 : Variable::NOT_REDEFINABLE;
3504
3505 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE);
3506 Sym->setVariableValue(Expr);
3507 Sym->setExternal(false);
3508
3509 return false;
3510 }
3511
parseEscapedString(std::string & Data)3512 bool MasmParser::parseEscapedString(std::string &Data) {
3513 if (check(getTok().isNot(AsmToken::String), "expected string"))
3514 return true;
3515
3516 Data = "";
3517 char Quote = getTok().getString().front();
3518 StringRef Str = getTok().getStringContents();
3519 Data.reserve(Str.size());
3520 for (size_t i = 0, e = Str.size(); i != e; ++i) {
3521 Data.push_back(Str[i]);
3522 if (Str[i] == Quote) {
3523 // MASM treats doubled delimiting quotes as an escaped delimiting quote.
3524 // If we're escaping the string's trailing delimiter, we're definitely
3525 // missing a quotation mark.
3526 if (i + 1 == Str.size())
3527 return Error(getTok().getLoc(), "missing quotation mark in string");
3528 if (Str[i + 1] == Quote)
3529 ++i;
3530 }
3531 }
3532
3533 Lex();
3534 return false;
3535 }
3536
parseAngleBracketString(std::string & Data)3537 bool MasmParser::parseAngleBracketString(std::string &Data) {
3538 SMLoc EndLoc, StartLoc = getTok().getLoc();
3539 if (isAngleBracketString(StartLoc, EndLoc)) {
3540 const char *StartChar = StartLoc.getPointer() + 1;
3541 const char *EndChar = EndLoc.getPointer() - 1;
3542 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back());
3543 // Eat from '<' to '>'.
3544 Lex();
3545
3546 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar));
3547 return false;
3548 }
3549 return true;
3550 }
3551
3552 /// textItem ::= textLiteral | textMacroID | % constExpr
parseTextItem(std::string & Data)3553 bool MasmParser::parseTextItem(std::string &Data) {
3554 switch (getTok().getKind()) {
3555 default:
3556 return true;
3557 case AsmToken::Percent: {
3558 int64_t Res;
3559 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res))
3560 return true;
3561 Data = std::to_string(Res);
3562 return false;
3563 }
3564 case AsmToken::Less:
3565 case AsmToken::LessEqual:
3566 case AsmToken::LessLess:
3567 case AsmToken::LessGreater:
3568 return parseAngleBracketString(Data);
3569 case AsmToken::Identifier: {
3570 // This must be a text macro; we need to expand it accordingly.
3571 StringRef ID;
3572 SMLoc StartLoc = getTok().getLoc();
3573 if (parseIdentifier(ID))
3574 return true;
3575 Data = ID.str();
3576
3577 bool Expanded = false;
3578 while (true) {
3579 // Try to resolve as a built-in text macro
3580 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower());
3581 if (BuiltinIt != BuiltinSymbolMap.end()) {
3582 std::optional<std::string> BuiltinText =
3583 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc);
3584 if (!BuiltinText) {
3585 // Not a text macro; break without substituting
3586 break;
3587 }
3588 Data = std::move(*BuiltinText);
3589 ID = StringRef(Data);
3590 Expanded = true;
3591 continue;
3592 }
3593
3594 // Try to resolve as a variable text macro
3595 auto VarIt = Variables.find(ID.lower());
3596 if (VarIt != Variables.end()) {
3597 const Variable &Var = VarIt->getValue();
3598 if (!Var.IsText) {
3599 // Not a text macro; break without substituting
3600 break;
3601 }
3602 Data = Var.TextValue;
3603 ID = StringRef(Data);
3604 Expanded = true;
3605 continue;
3606 }
3607
3608 break;
3609 }
3610
3611 if (!Expanded) {
3612 // Not a text macro; not usable in TextItem context. Since we haven't used
3613 // the token, put it back for better error recovery.
3614 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID));
3615 return true;
3616 }
3617 return false;
3618 }
3619 }
3620 llvm_unreachable("unhandled token kind");
3621 }
3622
3623 /// parseDirectiveAscii:
3624 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
parseDirectiveAscii(StringRef IDVal,bool ZeroTerminated)3625 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
3626 auto parseOp = [&]() -> bool {
3627 std::string Data;
3628 if (checkForValidSection() || parseEscapedString(Data))
3629 return true;
3630 getStreamer().emitBytes(Data);
3631 if (ZeroTerminated)
3632 getStreamer().emitBytes(StringRef("\0", 1));
3633 return false;
3634 };
3635
3636 if (parseMany(parseOp))
3637 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3638 return false;
3639 }
3640
emitIntValue(const MCExpr * Value,unsigned Size)3641 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
3642 // Special case constant expressions to match code generator.
3643 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
3644 assert(Size <= 8 && "Invalid size");
3645 int64_t IntValue = MCE->getValue();
3646 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
3647 return Error(MCE->getLoc(), "out of range literal value");
3648 getStreamer().emitIntValue(IntValue, Size);
3649 } else {
3650 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
3651 if (MSE && MSE->getSymbol().getName() == "?") {
3652 // ? initializer; treat as 0.
3653 getStreamer().emitIntValue(0, Size);
3654 } else {
3655 getStreamer().emitValue(Value, Size, Value->getLoc());
3656 }
3657 }
3658 return false;
3659 }
3660
parseScalarInitializer(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,unsigned StringPadLength)3661 bool MasmParser::parseScalarInitializer(unsigned Size,
3662 SmallVectorImpl<const MCExpr *> &Values,
3663 unsigned StringPadLength) {
3664 if (Size == 1 && getTok().is(AsmToken::String)) {
3665 std::string Value;
3666 if (parseEscapedString(Value))
3667 return true;
3668 // Treat each character as an initializer.
3669 for (const unsigned char CharVal : Value)
3670 Values.push_back(MCConstantExpr::create(CharVal, getContext()));
3671
3672 // Pad the string with spaces to the specified length.
3673 for (size_t i = Value.size(); i < StringPadLength; ++i)
3674 Values.push_back(MCConstantExpr::create(' ', getContext()));
3675 } else {
3676 const MCExpr *Value;
3677 if (parseExpression(Value))
3678 return true;
3679 if (getTok().is(AsmToken::Identifier) &&
3680 getTok().getString().equals_insensitive("dup")) {
3681 Lex(); // Eat 'dup'.
3682 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3683 if (!MCE)
3684 return Error(Value->getLoc(),
3685 "cannot repeat value a non-constant number of times");
3686 const int64_t Repetitions = MCE->getValue();
3687 if (Repetitions < 0)
3688 return Error(Value->getLoc(),
3689 "cannot repeat value a negative number of times");
3690
3691 SmallVector<const MCExpr *, 1> DuplicatedValues;
3692 if (parseToken(AsmToken::LParen,
3693 "parentheses required for 'dup' contents") ||
3694 parseScalarInstList(Size, DuplicatedValues) || parseRParen())
3695 return true;
3696
3697 for (int i = 0; i < Repetitions; ++i)
3698 Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
3699 } else {
3700 Values.push_back(Value);
3701 }
3702 }
3703 return false;
3704 }
3705
parseScalarInstList(unsigned Size,SmallVectorImpl<const MCExpr * > & Values,const AsmToken::TokenKind EndToken)3706 bool MasmParser::parseScalarInstList(unsigned Size,
3707 SmallVectorImpl<const MCExpr *> &Values,
3708 const AsmToken::TokenKind EndToken) {
3709 while (getTok().isNot(EndToken) &&
3710 (EndToken != AsmToken::Greater ||
3711 getTok().isNot(AsmToken::GreaterGreater))) {
3712 parseScalarInitializer(Size, Values);
3713
3714 // If we see a comma, continue, and allow line continuation.
3715 if (!parseOptionalToken(AsmToken::Comma))
3716 break;
3717 parseOptionalToken(AsmToken::EndOfStatement);
3718 }
3719 return false;
3720 }
3721
emitIntegralValues(unsigned Size,unsigned * Count)3722 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) {
3723 SmallVector<const MCExpr *, 1> Values;
3724 if (checkForValidSection() || parseScalarInstList(Size, Values))
3725 return true;
3726
3727 for (const auto *Value : Values) {
3728 emitIntValue(Value, Size);
3729 }
3730 if (Count)
3731 *Count = Values.size();
3732 return false;
3733 }
3734
3735 // Add a field to the current structure.
addIntegralField(StringRef Name,unsigned Size)3736 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) {
3737 StructInfo &Struct = StructInProgress.back();
3738 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size);
3739 IntFieldInfo &IntInfo = Field.Contents.IntInfo;
3740
3741 Field.Type = Size;
3742
3743 if (parseScalarInstList(Size, IntInfo.Values))
3744 return true;
3745
3746 Field.SizeOf = Field.Type * IntInfo.Values.size();
3747 Field.LengthOf = IntInfo.Values.size();
3748 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3749 if (!Struct.IsUnion) {
3750 Struct.NextOffset = FieldEnd;
3751 }
3752 Struct.Size = std::max(Struct.Size, FieldEnd);
3753 return false;
3754 }
3755
3756 /// parseDirectiveValue
3757 /// ::= (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveValue(StringRef IDVal,unsigned Size)3758 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
3759 if (StructInProgress.empty()) {
3760 // Initialize data value.
3761 if (emitIntegralValues(Size))
3762 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3763 } else if (addIntegralField("", Size)) {
3764 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3765 }
3766
3767 return false;
3768 }
3769
3770 /// parseDirectiveNamedValue
3771 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedValue(StringRef TypeName,unsigned Size,StringRef Name,SMLoc NameLoc)3772 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size,
3773 StringRef Name, SMLoc NameLoc) {
3774 if (StructInProgress.empty()) {
3775 // Initialize named data value.
3776 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3777 getStreamer().emitLabel(Sym);
3778 unsigned Count;
3779 if (emitIntegralValues(Size, &Count))
3780 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3781
3782 AsmTypeInfo Type;
3783 Type.Name = TypeName;
3784 Type.Size = Size * Count;
3785 Type.ElementSize = Size;
3786 Type.Length = Count;
3787 KnownType[Name.lower()] = Type;
3788 } else if (addIntegralField(Name, Size)) {
3789 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive");
3790 }
3791
3792 return false;
3793 }
3794
parseHexOcta(MasmParser & Asm,uint64_t & hi,uint64_t & lo)3795 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
3796 if (Asm.getTok().isNot(AsmToken::Integer) &&
3797 Asm.getTok().isNot(AsmToken::BigNum))
3798 return Asm.TokError("unknown token in expression");
3799 SMLoc ExprLoc = Asm.getTok().getLoc();
3800 APInt IntValue = Asm.getTok().getAPIntVal();
3801 Asm.Lex();
3802 if (!IntValue.isIntN(128))
3803 return Asm.Error(ExprLoc, "out of range literal value");
3804 if (!IntValue.isIntN(64)) {
3805 hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
3806 lo = IntValue.getLoBits(64).getZExtValue();
3807 } else {
3808 hi = 0;
3809 lo = IntValue.getZExtValue();
3810 }
3811 return false;
3812 }
3813
parseRealValue(const fltSemantics & Semantics,APInt & Res)3814 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
3815 // We don't truly support arithmetic on floating point expressions, so we
3816 // have to manually parse unary prefixes.
3817 bool IsNeg = false;
3818 SMLoc SignLoc;
3819 if (getLexer().is(AsmToken::Minus)) {
3820 SignLoc = getLexer().getLoc();
3821 Lexer.Lex();
3822 IsNeg = true;
3823 } else if (getLexer().is(AsmToken::Plus)) {
3824 SignLoc = getLexer().getLoc();
3825 Lexer.Lex();
3826 }
3827
3828 if (Lexer.is(AsmToken::Error))
3829 return TokError(Lexer.getErr());
3830 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
3831 Lexer.isNot(AsmToken::Identifier))
3832 return TokError("unexpected token in directive");
3833
3834 // Convert to an APFloat.
3835 APFloat Value(Semantics);
3836 StringRef IDVal = getTok().getString();
3837 if (getLexer().is(AsmToken::Identifier)) {
3838 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf"))
3839 Value = APFloat::getInf(Semantics);
3840 else if (IDVal.equals_insensitive("nan"))
3841 Value = APFloat::getNaN(Semantics, false, ~0);
3842 else if (IDVal.equals_insensitive("?"))
3843 Value = APFloat::getZero(Semantics);
3844 else
3845 return TokError("invalid floating point literal");
3846 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) {
3847 // MASM hexadecimal floating-point literal; no APFloat conversion needed.
3848 // To match ML64.exe, ignore the initial sign.
3849 unsigned SizeInBits = Value.getSizeInBits(Semantics);
3850 if (SizeInBits != (IDVal.size() << 2))
3851 return TokError("invalid floating point literal");
3852
3853 // Consume the numeric token.
3854 Lex();
3855
3856 Res = APInt(SizeInBits, IDVal, 16);
3857 if (SignLoc.isValid())
3858 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign");
3859 return false;
3860 } else if (errorToBool(
3861 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven)
3862 .takeError())) {
3863 return TokError("invalid floating point literal");
3864 }
3865 if (IsNeg)
3866 Value.changeSign();
3867
3868 // Consume the numeric token.
3869 Lex();
3870
3871 Res = Value.bitcastToAPInt();
3872
3873 return false;
3874 }
3875
parseRealInstList(const fltSemantics & Semantics,SmallVectorImpl<APInt> & ValuesAsInt,const AsmToken::TokenKind EndToken)3876 bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
3877 SmallVectorImpl<APInt> &ValuesAsInt,
3878 const AsmToken::TokenKind EndToken) {
3879 while (getTok().isNot(EndToken) ||
3880 (EndToken == AsmToken::Greater &&
3881 getTok().isNot(AsmToken::GreaterGreater))) {
3882 const AsmToken NextTok = peekTok();
3883 if (NextTok.is(AsmToken::Identifier) &&
3884 NextTok.getString().equals_insensitive("dup")) {
3885 const MCExpr *Value;
3886 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
3887 return true;
3888 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
3889 if (!MCE)
3890 return Error(Value->getLoc(),
3891 "cannot repeat value a non-constant number of times");
3892 const int64_t Repetitions = MCE->getValue();
3893 if (Repetitions < 0)
3894 return Error(Value->getLoc(),
3895 "cannot repeat value a negative number of times");
3896
3897 SmallVector<APInt, 1> DuplicatedValues;
3898 if (parseToken(AsmToken::LParen,
3899 "parentheses required for 'dup' contents") ||
3900 parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
3901 return true;
3902
3903 for (int i = 0; i < Repetitions; ++i)
3904 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
3905 } else {
3906 APInt AsInt;
3907 if (parseRealValue(Semantics, AsInt))
3908 return true;
3909 ValuesAsInt.push_back(AsInt);
3910 }
3911
3912 // Continue if we see a comma. (Also, allow line continuation.)
3913 if (!parseOptionalToken(AsmToken::Comma))
3914 break;
3915 parseOptionalToken(AsmToken::EndOfStatement);
3916 }
3917
3918 return false;
3919 }
3920
3921 // Initialize real data values.
emitRealValues(const fltSemantics & Semantics,unsigned * Count)3922 bool MasmParser::emitRealValues(const fltSemantics &Semantics,
3923 unsigned *Count) {
3924 if (checkForValidSection())
3925 return true;
3926
3927 SmallVector<APInt, 1> ValuesAsInt;
3928 if (parseRealInstList(Semantics, ValuesAsInt))
3929 return true;
3930
3931 for (const APInt &AsInt : ValuesAsInt) {
3932 getStreamer().emitIntValue(AsInt);
3933 }
3934 if (Count)
3935 *Count = ValuesAsInt.size();
3936 return false;
3937 }
3938
3939 // Add a real field to the current struct.
addRealField(StringRef Name,const fltSemantics & Semantics,size_t Size)3940 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics,
3941 size_t Size) {
3942 StructInfo &Struct = StructInProgress.back();
3943 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size);
3944 RealFieldInfo &RealInfo = Field.Contents.RealInfo;
3945
3946 Field.SizeOf = 0;
3947
3948 if (parseRealInstList(Semantics, RealInfo.AsIntValues))
3949 return true;
3950
3951 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8;
3952 Field.LengthOf = RealInfo.AsIntValues.size();
3953 Field.SizeOf = Field.Type * Field.LengthOf;
3954
3955 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
3956 if (!Struct.IsUnion) {
3957 Struct.NextOffset = FieldEnd;
3958 }
3959 Struct.Size = std::max(Struct.Size, FieldEnd);
3960 return false;
3961 }
3962
3963 /// parseDirectiveRealValue
3964 /// ::= (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveRealValue(StringRef IDVal,const fltSemantics & Semantics,size_t Size)3965 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
3966 const fltSemantics &Semantics,
3967 size_t Size) {
3968 if (StructInProgress.empty()) {
3969 // Initialize data value.
3970 if (emitRealValues(Semantics))
3971 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3972 } else if (addRealField("", Semantics, Size)) {
3973 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
3974 }
3975 return false;
3976 }
3977
3978 /// parseDirectiveNamedRealValue
3979 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ]
parseDirectiveNamedRealValue(StringRef TypeName,const fltSemantics & Semantics,unsigned Size,StringRef Name,SMLoc NameLoc)3980 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName,
3981 const fltSemantics &Semantics,
3982 unsigned Size, StringRef Name,
3983 SMLoc NameLoc) {
3984 if (StructInProgress.empty()) {
3985 // Initialize named data value.
3986 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
3987 getStreamer().emitLabel(Sym);
3988 unsigned Count;
3989 if (emitRealValues(Semantics, &Count))
3990 return addErrorSuffix(" in '" + TypeName + "' directive");
3991
3992 AsmTypeInfo Type;
3993 Type.Name = TypeName;
3994 Type.Size = Size * Count;
3995 Type.ElementSize = Size;
3996 Type.Length = Count;
3997 KnownType[Name.lower()] = Type;
3998 } else if (addRealField(Name, Semantics, Size)) {
3999 return addErrorSuffix(" in '" + TypeName + "' directive");
4000 }
4001 return false;
4002 }
4003
parseOptionalAngleBracketOpen()4004 bool MasmParser::parseOptionalAngleBracketOpen() {
4005 const AsmToken Tok = getTok();
4006 if (parseOptionalToken(AsmToken::LessLess)) {
4007 AngleBracketDepth++;
4008 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1)));
4009 return true;
4010 } else if (parseOptionalToken(AsmToken::LessGreater)) {
4011 AngleBracketDepth++;
4012 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4013 return true;
4014 } else if (parseOptionalToken(AsmToken::Less)) {
4015 AngleBracketDepth++;
4016 return true;
4017 }
4018
4019 return false;
4020 }
4021
parseAngleBracketClose(const Twine & Msg)4022 bool MasmParser::parseAngleBracketClose(const Twine &Msg) {
4023 const AsmToken Tok = getTok();
4024 if (parseOptionalToken(AsmToken::GreaterGreater)) {
4025 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1)));
4026 } else if (parseToken(AsmToken::Greater, Msg)) {
4027 return true;
4028 }
4029 AngleBracketDepth--;
4030 return false;
4031 }
4032
parseFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,FieldInitializer & Initializer)4033 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4034 const IntFieldInfo &Contents,
4035 FieldInitializer &Initializer) {
4036 SMLoc Loc = getTok().getLoc();
4037
4038 SmallVector<const MCExpr *, 1> Values;
4039 if (parseOptionalToken(AsmToken::LCurly)) {
4040 if (Field.LengthOf == 1 && Field.Type > 1)
4041 return Error(Loc, "Cannot initialize scalar field with array value");
4042 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) ||
4043 parseToken(AsmToken::RCurly))
4044 return true;
4045 } else if (parseOptionalAngleBracketOpen()) {
4046 if (Field.LengthOf == 1 && Field.Type > 1)
4047 return Error(Loc, "Cannot initialize scalar field with array value");
4048 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) ||
4049 parseAngleBracketClose())
4050 return true;
4051 } else if (Field.LengthOf > 1 && Field.Type > 1) {
4052 return Error(Loc, "Cannot initialize array field with scalar value");
4053 } else if (parseScalarInitializer(Field.Type, Values,
4054 /*StringPadLength=*/Field.LengthOf)) {
4055 return true;
4056 }
4057
4058 if (Values.size() > Field.LengthOf) {
4059 return Error(Loc, "Initializer too long for field; expected at most " +
4060 std::to_string(Field.LengthOf) + " elements, got " +
4061 std::to_string(Values.size()));
4062 }
4063 // Default-initialize all remaining values.
4064 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end());
4065
4066 Initializer = FieldInitializer(std::move(Values));
4067 return false;
4068 }
4069
parseFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,FieldInitializer & Initializer)4070 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4071 const RealFieldInfo &Contents,
4072 FieldInitializer &Initializer) {
4073 const fltSemantics *Semantics;
4074 switch (Field.Type) {
4075 case 4:
4076 Semantics = &APFloat::IEEEsingle();
4077 break;
4078 case 8:
4079 Semantics = &APFloat::IEEEdouble();
4080 break;
4081 case 10:
4082 Semantics = &APFloat::x87DoubleExtended();
4083 break;
4084 default:
4085 llvm_unreachable("unknown real field type");
4086 }
4087
4088 SMLoc Loc = getTok().getLoc();
4089
4090 SmallVector<APInt, 1> AsIntValues;
4091 if (parseOptionalToken(AsmToken::LCurly)) {
4092 if (Field.LengthOf == 1)
4093 return Error(Loc, "Cannot initialize scalar field with array value");
4094 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) ||
4095 parseToken(AsmToken::RCurly))
4096 return true;
4097 } else if (parseOptionalAngleBracketOpen()) {
4098 if (Field.LengthOf == 1)
4099 return Error(Loc, "Cannot initialize scalar field with array value");
4100 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) ||
4101 parseAngleBracketClose())
4102 return true;
4103 } else if (Field.LengthOf > 1) {
4104 return Error(Loc, "Cannot initialize array field with scalar value");
4105 } else {
4106 AsIntValues.emplace_back();
4107 if (parseRealValue(*Semantics, AsIntValues.back()))
4108 return true;
4109 }
4110
4111 if (AsIntValues.size() > Field.LengthOf) {
4112 return Error(Loc, "Initializer too long for field; expected at most " +
4113 std::to_string(Field.LengthOf) + " elements, got " +
4114 std::to_string(AsIntValues.size()));
4115 }
4116 // Default-initialize all remaining values.
4117 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(),
4118 Contents.AsIntValues.end());
4119
4120 Initializer = FieldInitializer(std::move(AsIntValues));
4121 return false;
4122 }
4123
parseFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,FieldInitializer & Initializer)4124 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4125 const StructFieldInfo &Contents,
4126 FieldInitializer &Initializer) {
4127 SMLoc Loc = getTok().getLoc();
4128
4129 std::vector<StructInitializer> Initializers;
4130 if (Field.LengthOf > 1) {
4131 if (parseOptionalToken(AsmToken::LCurly)) {
4132 if (parseStructInstList(Contents.Structure, Initializers,
4133 AsmToken::RCurly) ||
4134 parseToken(AsmToken::RCurly))
4135 return true;
4136 } else if (parseOptionalAngleBracketOpen()) {
4137 if (parseStructInstList(Contents.Structure, Initializers,
4138 AsmToken::Greater) ||
4139 parseAngleBracketClose())
4140 return true;
4141 } else {
4142 return Error(Loc, "Cannot initialize array field with scalar value");
4143 }
4144 } else {
4145 Initializers.emplace_back();
4146 if (parseStructInitializer(Contents.Structure, Initializers.back()))
4147 return true;
4148 }
4149
4150 if (Initializers.size() > Field.LengthOf) {
4151 return Error(Loc, "Initializer too long for field; expected at most " +
4152 std::to_string(Field.LengthOf) + " elements, got " +
4153 std::to_string(Initializers.size()));
4154 }
4155 // Default-initialize all remaining values.
4156 Initializers.insert(Initializers.end(),
4157 Contents.Initializers.begin() + Initializers.size(),
4158 Contents.Initializers.end());
4159
4160 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure);
4161 return false;
4162 }
4163
parseFieldInitializer(const FieldInfo & Field,FieldInitializer & Initializer)4164 bool MasmParser::parseFieldInitializer(const FieldInfo &Field,
4165 FieldInitializer &Initializer) {
4166 switch (Field.Contents.FT) {
4167 case FT_INTEGRAL:
4168 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer);
4169 case FT_REAL:
4170 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer);
4171 case FT_STRUCT:
4172 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer);
4173 }
4174 llvm_unreachable("Unhandled FieldType enum");
4175 }
4176
parseStructInitializer(const StructInfo & Structure,StructInitializer & Initializer)4177 bool MasmParser::parseStructInitializer(const StructInfo &Structure,
4178 StructInitializer &Initializer) {
4179 const AsmToken FirstToken = getTok();
4180
4181 std::optional<AsmToken::TokenKind> EndToken;
4182 if (parseOptionalToken(AsmToken::LCurly)) {
4183 EndToken = AsmToken::RCurly;
4184 } else if (parseOptionalAngleBracketOpen()) {
4185 EndToken = AsmToken::Greater;
4186 AngleBracketDepth++;
4187 } else if (FirstToken.is(AsmToken::Identifier) &&
4188 FirstToken.getString() == "?") {
4189 // ? initializer; leave EndToken uninitialized to treat as empty.
4190 if (parseToken(AsmToken::Identifier))
4191 return true;
4192 } else {
4193 return Error(FirstToken.getLoc(), "Expected struct initializer");
4194 }
4195
4196 auto &FieldInitializers = Initializer.FieldInitializers;
4197 size_t FieldIndex = 0;
4198 if (EndToken) {
4199 // Initialize all fields with given initializers.
4200 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) {
4201 const FieldInfo &Field = Structure.Fields[FieldIndex++];
4202 if (parseOptionalToken(AsmToken::Comma)) {
4203 // Empty initializer; use the default and continue. (Also, allow line
4204 // continuation.)
4205 FieldInitializers.push_back(Field.Contents);
4206 parseOptionalToken(AsmToken::EndOfStatement);
4207 continue;
4208 }
4209 FieldInitializers.emplace_back(Field.Contents.FT);
4210 if (parseFieldInitializer(Field, FieldInitializers.back()))
4211 return true;
4212
4213 // Continue if we see a comma. (Also, allow line continuation.)
4214 SMLoc CommaLoc = getTok().getLoc();
4215 if (!parseOptionalToken(AsmToken::Comma))
4216 break;
4217 if (FieldIndex == Structure.Fields.size())
4218 return Error(CommaLoc, "'" + Structure.Name +
4219 "' initializer initializes too many fields");
4220 parseOptionalToken(AsmToken::EndOfStatement);
4221 }
4222 }
4223 // Default-initialize all remaining fields.
4224 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex))
4225 FieldInitializers.push_back(Field.Contents);
4226
4227 if (EndToken) {
4228 if (*EndToken == AsmToken::Greater)
4229 return parseAngleBracketClose();
4230
4231 return parseToken(*EndToken);
4232 }
4233
4234 return false;
4235 }
4236
parseStructInstList(const StructInfo & Structure,std::vector<StructInitializer> & Initializers,const AsmToken::TokenKind EndToken)4237 bool MasmParser::parseStructInstList(
4238 const StructInfo &Structure, std::vector<StructInitializer> &Initializers,
4239 const AsmToken::TokenKind EndToken) {
4240 while (getTok().isNot(EndToken) ||
4241 (EndToken == AsmToken::Greater &&
4242 getTok().isNot(AsmToken::GreaterGreater))) {
4243 const AsmToken NextTok = peekTok();
4244 if (NextTok.is(AsmToken::Identifier) &&
4245 NextTok.getString().equals_insensitive("dup")) {
4246 const MCExpr *Value;
4247 if (parseExpression(Value) || parseToken(AsmToken::Identifier))
4248 return true;
4249 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
4250 if (!MCE)
4251 return Error(Value->getLoc(),
4252 "cannot repeat value a non-constant number of times");
4253 const int64_t Repetitions = MCE->getValue();
4254 if (Repetitions < 0)
4255 return Error(Value->getLoc(),
4256 "cannot repeat value a negative number of times");
4257
4258 std::vector<StructInitializer> DuplicatedValues;
4259 if (parseToken(AsmToken::LParen,
4260 "parentheses required for 'dup' contents") ||
4261 parseStructInstList(Structure, DuplicatedValues) || parseRParen())
4262 return true;
4263
4264 for (int i = 0; i < Repetitions; ++i)
4265 llvm::append_range(Initializers, DuplicatedValues);
4266 } else {
4267 Initializers.emplace_back();
4268 if (parseStructInitializer(Structure, Initializers.back()))
4269 return true;
4270 }
4271
4272 // Continue if we see a comma. (Also, allow line continuation.)
4273 if (!parseOptionalToken(AsmToken::Comma))
4274 break;
4275 parseOptionalToken(AsmToken::EndOfStatement);
4276 }
4277
4278 return false;
4279 }
4280
emitFieldValue(const FieldInfo & Field,const IntFieldInfo & Contents)4281 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4282 const IntFieldInfo &Contents) {
4283 // Default-initialize all values.
4284 for (const MCExpr *Value : Contents.Values) {
4285 if (emitIntValue(Value, Field.Type))
4286 return true;
4287 }
4288 return false;
4289 }
4290
emitFieldValue(const FieldInfo & Field,const RealFieldInfo & Contents)4291 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4292 const RealFieldInfo &Contents) {
4293 for (const APInt &AsInt : Contents.AsIntValues) {
4294 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4295 AsInt.getBitWidth() / 8);
4296 }
4297 return false;
4298 }
4299
emitFieldValue(const FieldInfo & Field,const StructFieldInfo & Contents)4300 bool MasmParser::emitFieldValue(const FieldInfo &Field,
4301 const StructFieldInfo &Contents) {
4302 for (const auto &Initializer : Contents.Initializers) {
4303 size_t Index = 0, Offset = 0;
4304 for (const auto &SubField : Contents.Structure.Fields) {
4305 getStreamer().emitZeros(SubField.Offset - Offset);
4306 Offset = SubField.Offset + SubField.SizeOf;
4307 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]);
4308 }
4309 }
4310 return false;
4311 }
4312
emitFieldValue(const FieldInfo & Field)4313 bool MasmParser::emitFieldValue(const FieldInfo &Field) {
4314 switch (Field.Contents.FT) {
4315 case FT_INTEGRAL:
4316 return emitFieldValue(Field, Field.Contents.IntInfo);
4317 case FT_REAL:
4318 return emitFieldValue(Field, Field.Contents.RealInfo);
4319 case FT_STRUCT:
4320 return emitFieldValue(Field, Field.Contents.StructInfo);
4321 }
4322 llvm_unreachable("Unhandled FieldType enum");
4323 }
4324
emitFieldInitializer(const FieldInfo & Field,const IntFieldInfo & Contents,const IntFieldInfo & Initializer)4325 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4326 const IntFieldInfo &Contents,
4327 const IntFieldInfo &Initializer) {
4328 for (const auto &Value : Initializer.Values) {
4329 if (emitIntValue(Value, Field.Type))
4330 return true;
4331 }
4332 // Default-initialize all remaining values.
4333 for (const auto &Value :
4334 llvm::drop_begin(Contents.Values, Initializer.Values.size())) {
4335 if (emitIntValue(Value, Field.Type))
4336 return true;
4337 }
4338 return false;
4339 }
4340
emitFieldInitializer(const FieldInfo & Field,const RealFieldInfo & Contents,const RealFieldInfo & Initializer)4341 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4342 const RealFieldInfo &Contents,
4343 const RealFieldInfo &Initializer) {
4344 for (const auto &AsInt : Initializer.AsIntValues) {
4345 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4346 AsInt.getBitWidth() / 8);
4347 }
4348 // Default-initialize all remaining values.
4349 for (const auto &AsInt :
4350 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) {
4351 getStreamer().emitIntValue(AsInt.getLimitedValue(),
4352 AsInt.getBitWidth() / 8);
4353 }
4354 return false;
4355 }
4356
emitFieldInitializer(const FieldInfo & Field,const StructFieldInfo & Contents,const StructFieldInfo & Initializer)4357 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4358 const StructFieldInfo &Contents,
4359 const StructFieldInfo &Initializer) {
4360 for (const auto &Init : Initializer.Initializers) {
4361 if (emitStructInitializer(Contents.Structure, Init))
4362 return true;
4363 }
4364 // Default-initialize all remaining values.
4365 for (const auto &Init : llvm::drop_begin(Contents.Initializers,
4366 Initializer.Initializers.size())) {
4367 if (emitStructInitializer(Contents.Structure, Init))
4368 return true;
4369 }
4370 return false;
4371 }
4372
emitFieldInitializer(const FieldInfo & Field,const FieldInitializer & Initializer)4373 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
4374 const FieldInitializer &Initializer) {
4375 switch (Field.Contents.FT) {
4376 case FT_INTEGRAL:
4377 return emitFieldInitializer(Field, Field.Contents.IntInfo,
4378 Initializer.IntInfo);
4379 case FT_REAL:
4380 return emitFieldInitializer(Field, Field.Contents.RealInfo,
4381 Initializer.RealInfo);
4382 case FT_STRUCT:
4383 return emitFieldInitializer(Field, Field.Contents.StructInfo,
4384 Initializer.StructInfo);
4385 }
4386 llvm_unreachable("Unhandled FieldType enum");
4387 }
4388
emitStructInitializer(const StructInfo & Structure,const StructInitializer & Initializer)4389 bool MasmParser::emitStructInitializer(const StructInfo &Structure,
4390 const StructInitializer &Initializer) {
4391 if (!Structure.Initializable)
4392 return Error(getLexer().getLoc(),
4393 "cannot initialize a value of type '" + Structure.Name +
4394 "'; 'org' was used in the type's declaration");
4395 size_t Index = 0, Offset = 0;
4396 for (const auto &Init : Initializer.FieldInitializers) {
4397 const auto &Field = Structure.Fields[Index++];
4398 getStreamer().emitZeros(Field.Offset - Offset);
4399 Offset = Field.Offset + Field.SizeOf;
4400 if (emitFieldInitializer(Field, Init))
4401 return true;
4402 }
4403 // Default-initialize all remaining fields.
4404 for (const auto &Field : llvm::drop_begin(
4405 Structure.Fields, Initializer.FieldInitializers.size())) {
4406 getStreamer().emitZeros(Field.Offset - Offset);
4407 Offset = Field.Offset + Field.SizeOf;
4408 if (emitFieldValue(Field))
4409 return true;
4410 }
4411 // Add final padding.
4412 if (Offset != Structure.Size)
4413 getStreamer().emitZeros(Structure.Size - Offset);
4414 return false;
4415 }
4416
4417 // Set data values from initializers.
emitStructValues(const StructInfo & Structure,unsigned * Count)4418 bool MasmParser::emitStructValues(const StructInfo &Structure,
4419 unsigned *Count) {
4420 std::vector<StructInitializer> Initializers;
4421 if (parseStructInstList(Structure, Initializers))
4422 return true;
4423
4424 for (const auto &Initializer : Initializers) {
4425 if (emitStructInitializer(Structure, Initializer))
4426 return true;
4427 }
4428
4429 if (Count)
4430 *Count = Initializers.size();
4431 return false;
4432 }
4433
4434 // Declare a field in the current struct.
addStructField(StringRef Name,const StructInfo & Structure)4435 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) {
4436 StructInfo &OwningStruct = StructInProgress.back();
4437 FieldInfo &Field =
4438 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize);
4439 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4440
4441 StructInfo.Structure = Structure;
4442 Field.Type = Structure.Size;
4443
4444 if (parseStructInstList(Structure, StructInfo.Initializers))
4445 return true;
4446
4447 Field.LengthOf = StructInfo.Initializers.size();
4448 Field.SizeOf = Field.Type * Field.LengthOf;
4449
4450 const unsigned FieldEnd = Field.Offset + Field.SizeOf;
4451 if (!OwningStruct.IsUnion) {
4452 OwningStruct.NextOffset = FieldEnd;
4453 }
4454 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd);
4455
4456 return false;
4457 }
4458
4459 /// parseDirectiveStructValue
4460 /// ::= struct-id (<struct-initializer> | {struct-initializer})
4461 /// [, (<struct-initializer> | {struct-initializer})]*
parseDirectiveStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc)4462 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure,
4463 StringRef Directive, SMLoc DirLoc) {
4464 if (StructInProgress.empty()) {
4465 if (emitStructValues(Structure))
4466 return true;
4467 } else if (addStructField("", Structure)) {
4468 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4469 }
4470
4471 return false;
4472 }
4473
4474 /// parseDirectiveNamedValue
4475 /// ::= name (byte | word | ... ) [ expression (, expression)* ]
parseDirectiveNamedStructValue(const StructInfo & Structure,StringRef Directive,SMLoc DirLoc,StringRef Name)4476 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure,
4477 StringRef Directive,
4478 SMLoc DirLoc, StringRef Name) {
4479 if (StructInProgress.empty()) {
4480 // Initialize named data value.
4481 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
4482 getStreamer().emitLabel(Sym);
4483 unsigned Count;
4484 if (emitStructValues(Structure, &Count))
4485 return true;
4486 AsmTypeInfo Type;
4487 Type.Name = Structure.Name;
4488 Type.Size = Structure.Size * Count;
4489 Type.ElementSize = Structure.Size;
4490 Type.Length = Count;
4491 KnownType[Name.lower()] = Type;
4492 } else if (addStructField(Name, Structure)) {
4493 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4494 }
4495
4496 return false;
4497 }
4498
4499 /// parseDirectiveStruct
4500 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE]
4501 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4502 /// <name> ENDS
4503 ////// dataDir = data declaration
4504 ////// offsetDir = EVEN, ORG, ALIGN
parseDirectiveStruct(StringRef Directive,DirectiveKind DirKind,StringRef Name,SMLoc NameLoc)4505 bool MasmParser::parseDirectiveStruct(StringRef Directive,
4506 DirectiveKind DirKind, StringRef Name,
4507 SMLoc NameLoc) {
4508 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS
4509 // anyway, so all field accesses must be qualified.
4510 AsmToken NextTok = getTok();
4511 int64_t AlignmentValue = 1;
4512 if (NextTok.isNot(AsmToken::Comma) &&
4513 NextTok.isNot(AsmToken::EndOfStatement) &&
4514 parseAbsoluteExpression(AlignmentValue)) {
4515 return addErrorSuffix(" in alignment value for '" + Twine(Directive) +
4516 "' directive");
4517 }
4518 if (!isPowerOf2_64(AlignmentValue)) {
4519 return Error(NextTok.getLoc(), "alignment must be a power of two; was " +
4520 std::to_string(AlignmentValue));
4521 }
4522
4523 StringRef Qualifier;
4524 SMLoc QualifierLoc;
4525 if (parseOptionalToken(AsmToken::Comma)) {
4526 QualifierLoc = getTok().getLoc();
4527 if (parseIdentifier(Qualifier))
4528 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4529 if (!Qualifier.equals_insensitive("nonunique"))
4530 return Error(QualifierLoc, "Unrecognized qualifier for '" +
4531 Twine(Directive) +
4532 "' directive; expected none or NONUNIQUE");
4533 }
4534
4535 if (parseEOL())
4536 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4537
4538 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
4539 return false;
4540 }
4541
4542 /// parseDirectiveNestedStruct
4543 /// ::= (STRUC | STRUCT | UNION) [name]
4544 /// (dataDir | generalDir | offsetDir | nestedStruct)+
4545 /// ENDS
parseDirectiveNestedStruct(StringRef Directive,DirectiveKind DirKind)4546 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
4547 DirectiveKind DirKind) {
4548 if (StructInProgress.empty())
4549 return TokError("missing name in top-level '" + Twine(Directive) +
4550 "' directive");
4551
4552 StringRef Name;
4553 if (getTok().is(AsmToken::Identifier)) {
4554 Name = getTok().getIdentifier();
4555 parseToken(AsmToken::Identifier);
4556 }
4557 if (parseEOL())
4558 return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
4559
4560 // Reserve space to ensure Alignment doesn't get invalidated when
4561 // StructInProgress grows.
4562 StructInProgress.reserve(StructInProgress.size() + 1);
4563 StructInProgress.emplace_back(Name, DirKind == DK_UNION,
4564 StructInProgress.back().Alignment);
4565 return false;
4566 }
4567
parseDirectiveEnds(StringRef Name,SMLoc NameLoc)4568 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
4569 if (StructInProgress.empty())
4570 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION");
4571 if (StructInProgress.size() > 1)
4572 return Error(NameLoc, "unexpected name in nested ENDS directive");
4573 if (StructInProgress.back().Name.compare_insensitive(Name))
4574 return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
4575 StructInProgress.back().Name + "'");
4576 StructInfo Structure = StructInProgress.pop_back_val();
4577 // Pad to make the structure's size divisible by the smaller of its alignment
4578 // and the size of its largest field.
4579 Structure.Size = llvm::alignTo(
4580 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
4581 Structs[Name.lower()] = Structure;
4582
4583 if (parseEOL())
4584 return addErrorSuffix(" in ENDS directive");
4585
4586 return false;
4587 }
4588
parseDirectiveNestedEnds()4589 bool MasmParser::parseDirectiveNestedEnds() {
4590 if (StructInProgress.empty())
4591 return TokError("ENDS directive without matching STRUC/STRUCT/UNION");
4592 if (StructInProgress.size() == 1)
4593 return TokError("missing name in top-level ENDS directive");
4594
4595 if (parseEOL())
4596 return addErrorSuffix(" in nested ENDS directive");
4597
4598 StructInfo Structure = StructInProgress.pop_back_val();
4599 // Pad to make the structure's size divisible by its alignment.
4600 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
4601
4602 StructInfo &ParentStruct = StructInProgress.back();
4603 if (Structure.Name.empty()) {
4604 // Anonymous substructures' fields are addressed as if they belong to the
4605 // parent structure - so we transfer them to the parent here.
4606 const size_t OldFields = ParentStruct.Fields.size();
4607 ParentStruct.Fields.insert(
4608 ParentStruct.Fields.end(),
4609 std::make_move_iterator(Structure.Fields.begin()),
4610 std::make_move_iterator(Structure.Fields.end()));
4611 for (const auto &FieldByName : Structure.FieldsByName) {
4612 ParentStruct.FieldsByName[FieldByName.getKey()] =
4613 FieldByName.getValue() + OldFields;
4614 }
4615
4616 unsigned FirstFieldOffset = 0;
4617 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) {
4618 FirstFieldOffset = llvm::alignTo(
4619 ParentStruct.NextOffset,
4620 std::min(ParentStruct.Alignment, Structure.AlignmentSize));
4621 }
4622
4623 if (ParentStruct.IsUnion) {
4624 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
4625 } else {
4626 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields))
4627 Field.Offset += FirstFieldOffset;
4628
4629 const unsigned StructureEnd = FirstFieldOffset + Structure.Size;
4630 if (!ParentStruct.IsUnion) {
4631 ParentStruct.NextOffset = StructureEnd;
4632 }
4633 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4634 }
4635 } else {
4636 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT,
4637 Structure.AlignmentSize);
4638 StructFieldInfo &StructInfo = Field.Contents.StructInfo;
4639 Field.Type = Structure.Size;
4640 Field.LengthOf = 1;
4641 Field.SizeOf = Structure.Size;
4642
4643 const unsigned StructureEnd = Field.Offset + Field.SizeOf;
4644 if (!ParentStruct.IsUnion) {
4645 ParentStruct.NextOffset = StructureEnd;
4646 }
4647 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd);
4648
4649 StructInfo.Structure = Structure;
4650 StructInfo.Initializers.emplace_back();
4651 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
4652 for (const auto &SubField : Structure.Fields) {
4653 FieldInitializers.push_back(SubField.Contents);
4654 }
4655 }
4656
4657 return false;
4658 }
4659
4660 /// parseDirectiveOrg
4661 /// ::= org expression
parseDirectiveOrg()4662 bool MasmParser::parseDirectiveOrg() {
4663 const MCExpr *Offset;
4664 SMLoc OffsetLoc = Lexer.getLoc();
4665 if (checkForValidSection() || parseExpression(Offset))
4666 return true;
4667 if (parseEOL())
4668 return addErrorSuffix(" in 'org' directive");
4669
4670 if (StructInProgress.empty()) {
4671 // Not in a struct; change the offset for the next instruction or data
4672 if (checkForValidSection())
4673 return addErrorSuffix(" in 'org' directive");
4674
4675 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc);
4676 } else {
4677 // Offset the next field of this struct
4678 StructInfo &Structure = StructInProgress.back();
4679 int64_t OffsetRes;
4680 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr()))
4681 return Error(OffsetLoc,
4682 "expected absolute expression in 'org' directive");
4683 if (OffsetRes < 0)
4684 return Error(
4685 OffsetLoc,
4686 "expected non-negative value in struct's 'org' directive; was " +
4687 std::to_string(OffsetRes));
4688 Structure.NextOffset = static_cast<unsigned>(OffsetRes);
4689
4690 // ORG-affected structures cannot be initialized
4691 Structure.Initializable = false;
4692 }
4693
4694 return false;
4695 }
4696
emitAlignTo(int64_t Alignment)4697 bool MasmParser::emitAlignTo(int64_t Alignment) {
4698 if (StructInProgress.empty()) {
4699 // Not in a struct; align the next instruction or data
4700 if (checkForValidSection())
4701 return true;
4702
4703 // Check whether we should use optimal code alignment for this align
4704 // directive.
4705 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4706 assert(Section && "must have section to emit alignment");
4707 if (Section->useCodeAlign()) {
4708 getStreamer().emitCodeAlignment(Align(Alignment),
4709 &getTargetParser().getSTI(),
4710 /*MaxBytesToEmit=*/0);
4711 } else {
4712 // FIXME: Target specific behavior about how the "extra" bytes are filled.
4713 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0,
4714 /*ValueSize=*/1,
4715 /*MaxBytesToEmit=*/0);
4716 }
4717 } else {
4718 // Align the next field of this struct
4719 StructInfo &Structure = StructInProgress.back();
4720 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment);
4721 }
4722
4723 return false;
4724 }
4725
4726 /// parseDirectiveAlign
4727 /// ::= align expression
parseDirectiveAlign()4728 bool MasmParser::parseDirectiveAlign() {
4729 SMLoc AlignmentLoc = getLexer().getLoc();
4730 int64_t Alignment;
4731
4732 // Ignore empty 'align' directives.
4733 if (getTok().is(AsmToken::EndOfStatement)) {
4734 return Warning(AlignmentLoc,
4735 "align directive with no operand is ignored") &&
4736 parseEOL();
4737 }
4738 if (parseAbsoluteExpression(Alignment) || parseEOL())
4739 return addErrorSuffix(" in align directive");
4740
4741 // Always emit an alignment here even if we throw an error.
4742 bool ReturnVal = false;
4743
4744 // Reject alignments that aren't either a power of two or zero, for ML.exe
4745 // compatibility. Alignment of zero is silently rounded up to one.
4746 if (Alignment == 0)
4747 Alignment = 1;
4748 if (!isPowerOf2_64(Alignment))
4749 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " +
4750 std::to_string(Alignment));
4751
4752 if (emitAlignTo(Alignment))
4753 ReturnVal |= addErrorSuffix(" in align directive");
4754
4755 return ReturnVal;
4756 }
4757
4758 /// parseDirectiveEven
4759 /// ::= even
parseDirectiveEven()4760 bool MasmParser::parseDirectiveEven() {
4761 if (parseEOL() || emitAlignTo(2))
4762 return addErrorSuffix(" in even directive");
4763
4764 return false;
4765 }
4766
4767 /// parseDirectiveFile
4768 /// ::= .file filename
4769 /// ::= .file number [directory] filename [md5 checksum] [source source-text]
parseDirectiveFile(SMLoc DirectiveLoc)4770 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
4771 // FIXME: I'm not sure what this is.
4772 int64_t FileNumber = -1;
4773 if (getLexer().is(AsmToken::Integer)) {
4774 FileNumber = getTok().getIntVal();
4775 Lex();
4776
4777 if (FileNumber < 0)
4778 return TokError("negative file number");
4779 }
4780
4781 std::string Path;
4782
4783 // Usually the directory and filename together, otherwise just the directory.
4784 // Allow the strings to have escaped octal character sequence.
4785 if (check(getTok().isNot(AsmToken::String),
4786 "unexpected token in '.file' directive") ||
4787 parseEscapedString(Path))
4788 return true;
4789
4790 StringRef Directory;
4791 StringRef Filename;
4792 std::string FilenameData;
4793 if (getLexer().is(AsmToken::String)) {
4794 if (check(FileNumber == -1,
4795 "explicit path specified, but no file number") ||
4796 parseEscapedString(FilenameData))
4797 return true;
4798 Filename = FilenameData;
4799 Directory = Path;
4800 } else {
4801 Filename = Path;
4802 }
4803
4804 uint64_t MD5Hi, MD5Lo;
4805 bool HasMD5 = false;
4806
4807 std::optional<StringRef> Source;
4808 bool HasSource = false;
4809 std::string SourceString;
4810
4811 while (!parseOptionalToken(AsmToken::EndOfStatement)) {
4812 StringRef Keyword;
4813 if (check(getTok().isNot(AsmToken::Identifier),
4814 "unexpected token in '.file' directive") ||
4815 parseIdentifier(Keyword))
4816 return true;
4817 if (Keyword == "md5") {
4818 HasMD5 = true;
4819 if (check(FileNumber == -1,
4820 "MD5 checksum specified, but no file number") ||
4821 parseHexOcta(*this, MD5Hi, MD5Lo))
4822 return true;
4823 } else if (Keyword == "source") {
4824 HasSource = true;
4825 if (check(FileNumber == -1,
4826 "source specified, but no file number") ||
4827 check(getTok().isNot(AsmToken::String),
4828 "unexpected token in '.file' directive") ||
4829 parseEscapedString(SourceString))
4830 return true;
4831 } else {
4832 return TokError("unexpected token in '.file' directive");
4833 }
4834 }
4835
4836 if (FileNumber == -1) {
4837 // Ignore the directive if there is no number and the target doesn't support
4838 // numberless .file directives. This allows some portability of assembler
4839 // between different object file formats.
4840 if (getContext().getAsmInfo()->hasSingleParameterDotFile())
4841 getStreamer().emitFileDirective(Filename);
4842 } else {
4843 // In case there is a -g option as well as debug info from directive .file,
4844 // we turn off the -g option, directly use the existing debug info instead.
4845 // Throw away any implicit file table for the assembler source.
4846 if (Ctx.getGenDwarfForAssembly()) {
4847 Ctx.getMCDwarfLineTable(0).resetFileTable();
4848 Ctx.setGenDwarfForAssembly(false);
4849 }
4850
4851 std::optional<MD5::MD5Result> CKMem;
4852 if (HasMD5) {
4853 MD5::MD5Result Sum;
4854 for (unsigned i = 0; i != 8; ++i) {
4855 Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
4856 Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
4857 }
4858 CKMem = Sum;
4859 }
4860 if (HasSource) {
4861 char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size()));
4862 memcpy(SourceBuf, SourceString.data(), SourceString.size());
4863 Source = StringRef(SourceBuf, SourceString.size());
4864 }
4865 if (FileNumber == 0) {
4866 if (Ctx.getDwarfVersion() < 5)
4867 return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5");
4868 getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source);
4869 } else {
4870 Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective(
4871 FileNumber, Directory, Filename, CKMem, Source);
4872 if (!FileNumOrErr)
4873 return Error(DirectiveLoc, toString(FileNumOrErr.takeError()));
4874 }
4875 // Alert the user if there are some .file directives with MD5 and some not.
4876 // But only do that once.
4877 if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) {
4878 ReportedInconsistentMD5 = true;
4879 return Warning(DirectiveLoc, "inconsistent use of MD5 checksums");
4880 }
4881 }
4882
4883 return false;
4884 }
4885
4886 /// parseDirectiveLine
4887 /// ::= .line [number]
parseDirectiveLine()4888 bool MasmParser::parseDirectiveLine() {
4889 int64_t LineNumber;
4890 if (getLexer().is(AsmToken::Integer)) {
4891 if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
4892 return true;
4893 (void)LineNumber;
4894 // FIXME: Do something with the .line.
4895 }
4896 if (parseEOL())
4897 return true;
4898
4899 return false;
4900 }
4901
4902 /// parseDirectiveLoc
4903 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
4904 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
4905 /// The first number is a file number, must have been previously assigned with
4906 /// a .file directive, the second number is the line number and optionally the
4907 /// third number is a column position (zero if not specified). The remaining
4908 /// optional items are .loc sub-directives.
parseDirectiveLoc()4909 bool MasmParser::parseDirectiveLoc() {
4910 int64_t FileNumber = 0, LineNumber = 0;
4911 SMLoc Loc = getTok().getLoc();
4912 if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") ||
4913 check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc,
4914 "file number less than one in '.loc' directive") ||
4915 check(!getContext().isValidDwarfFileNumber(FileNumber), Loc,
4916 "unassigned file number in '.loc' directive"))
4917 return true;
4918
4919 // optional
4920 if (getLexer().is(AsmToken::Integer)) {
4921 LineNumber = getTok().getIntVal();
4922 if (LineNumber < 0)
4923 return TokError("line number less than zero in '.loc' directive");
4924 Lex();
4925 }
4926
4927 int64_t ColumnPos = 0;
4928 if (getLexer().is(AsmToken::Integer)) {
4929 ColumnPos = getTok().getIntVal();
4930 if (ColumnPos < 0)
4931 return TokError("column position less than zero in '.loc' directive");
4932 Lex();
4933 }
4934
4935 auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags();
4936 unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT;
4937 unsigned Isa = 0;
4938 int64_t Discriminator = 0;
4939
4940 auto parseLocOp = [&]() -> bool {
4941 StringRef Name;
4942 SMLoc Loc = getTok().getLoc();
4943 if (parseIdentifier(Name))
4944 return TokError("unexpected token in '.loc' directive");
4945
4946 if (Name == "basic_block")
4947 Flags |= DWARF2_FLAG_BASIC_BLOCK;
4948 else if (Name == "prologue_end")
4949 Flags |= DWARF2_FLAG_PROLOGUE_END;
4950 else if (Name == "epilogue_begin")
4951 Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
4952 else if (Name == "is_stmt") {
4953 Loc = getTok().getLoc();
4954 const MCExpr *Value;
4955 if (parseExpression(Value))
4956 return true;
4957 // The expression must be the constant 0 or 1.
4958 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4959 int Value = MCE->getValue();
4960 if (Value == 0)
4961 Flags &= ~DWARF2_FLAG_IS_STMT;
4962 else if (Value == 1)
4963 Flags |= DWARF2_FLAG_IS_STMT;
4964 else
4965 return Error(Loc, "is_stmt value not 0 or 1");
4966 } else {
4967 return Error(Loc, "is_stmt value not the constant value of 0 or 1");
4968 }
4969 } else if (Name == "isa") {
4970 Loc = getTok().getLoc();
4971 const MCExpr *Value;
4972 if (parseExpression(Value))
4973 return true;
4974 // The expression must be a constant greater or equal to 0.
4975 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
4976 int Value = MCE->getValue();
4977 if (Value < 0)
4978 return Error(Loc, "isa number less than zero");
4979 Isa = Value;
4980 } else {
4981 return Error(Loc, "isa number not a constant value");
4982 }
4983 } else if (Name == "discriminator") {
4984 if (parseAbsoluteExpression(Discriminator))
4985 return true;
4986 } else {
4987 return Error(Loc, "unknown sub-directive in '.loc' directive");
4988 }
4989 return false;
4990 };
4991
4992 if (parseMany(parseLocOp, false /*hasComma*/))
4993 return true;
4994
4995 getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
4996 Isa, Discriminator, StringRef());
4997
4998 return false;
4999 }
5000
5001 /// parseDirectiveStabs
5002 /// ::= .stabs string, number, number, number
parseDirectiveStabs()5003 bool MasmParser::parseDirectiveStabs() {
5004 return TokError("unsupported directive '.stabs'");
5005 }
5006
5007 /// parseDirectiveCVFile
5008 /// ::= .cv_file number filename [checksum] [checksumkind]
parseDirectiveCVFile()5009 bool MasmParser::parseDirectiveCVFile() {
5010 SMLoc FileNumberLoc = getTok().getLoc();
5011 int64_t FileNumber;
5012 std::string Filename;
5013 std::string Checksum;
5014 int64_t ChecksumKind = 0;
5015
5016 if (parseIntToken(FileNumber,
5017 "expected file number in '.cv_file' directive") ||
5018 check(FileNumber < 1, FileNumberLoc, "file number less than one") ||
5019 check(getTok().isNot(AsmToken::String),
5020 "unexpected token in '.cv_file' directive") ||
5021 parseEscapedString(Filename))
5022 return true;
5023 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5024 if (check(getTok().isNot(AsmToken::String),
5025 "unexpected token in '.cv_file' directive") ||
5026 parseEscapedString(Checksum) ||
5027 parseIntToken(ChecksumKind,
5028 "expected checksum kind in '.cv_file' directive") ||
5029 parseEOL())
5030 return true;
5031 }
5032
5033 Checksum = fromHex(Checksum);
5034 void *CKMem = Ctx.allocate(Checksum.size(), 1);
5035 memcpy(CKMem, Checksum.data(), Checksum.size());
5036 ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
5037 Checksum.size());
5038
5039 if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes,
5040 static_cast<uint8_t>(ChecksumKind)))
5041 return Error(FileNumberLoc, "file number already allocated");
5042
5043 return false;
5044 }
5045
parseCVFunctionId(int64_t & FunctionId,StringRef DirectiveName)5046 bool MasmParser::parseCVFunctionId(int64_t &FunctionId,
5047 StringRef DirectiveName) {
5048 SMLoc Loc;
5049 return parseTokenLoc(Loc) ||
5050 parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
5051 "' directive") ||
5052 check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
5053 "expected function id within range [0, UINT_MAX)");
5054 }
5055
parseCVFileId(int64_t & FileNumber,StringRef DirectiveName)5056 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
5057 SMLoc Loc;
5058 return parseTokenLoc(Loc) ||
5059 parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
5060 "' directive") ||
5061 check(FileNumber < 1, Loc, "file number less than one in '" +
5062 DirectiveName + "' directive") ||
5063 check(!getCVContext().isValidFileNumber(FileNumber), Loc,
5064 "unassigned file number in '" + DirectiveName + "' directive");
5065 }
5066
5067 /// parseDirectiveCVFuncId
5068 /// ::= .cv_func_id FunctionId
5069 ///
5070 /// Introduces a function ID that can be used with .cv_loc.
parseDirectiveCVFuncId()5071 bool MasmParser::parseDirectiveCVFuncId() {
5072 SMLoc FunctionIdLoc = getTok().getLoc();
5073 int64_t FunctionId;
5074
5075 if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL())
5076 return true;
5077
5078 if (!getStreamer().emitCVFuncIdDirective(FunctionId))
5079 return Error(FunctionIdLoc, "function id already allocated");
5080
5081 return false;
5082 }
5083
5084 /// parseDirectiveCVInlineSiteId
5085 /// ::= .cv_inline_site_id FunctionId
5086 /// "within" IAFunc
5087 /// "inlined_at" IAFile IALine [IACol]
5088 ///
5089 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined
5090 /// at" source location information for use in the line table of the caller,
5091 /// whether the caller is a real function or another inlined call site.
parseDirectiveCVInlineSiteId()5092 bool MasmParser::parseDirectiveCVInlineSiteId() {
5093 SMLoc FunctionIdLoc = getTok().getLoc();
5094 int64_t FunctionId;
5095 int64_t IAFunc;
5096 int64_t IAFile;
5097 int64_t IALine;
5098 int64_t IACol = 0;
5099
5100 // FunctionId
5101 if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
5102 return true;
5103
5104 // "within"
5105 if (check((getLexer().isNot(AsmToken::Identifier) ||
5106 getTok().getIdentifier() != "within"),
5107 "expected 'within' identifier in '.cv_inline_site_id' directive"))
5108 return true;
5109 Lex();
5110
5111 // IAFunc
5112 if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
5113 return true;
5114
5115 // "inlined_at"
5116 if (check((getLexer().isNot(AsmToken::Identifier) ||
5117 getTok().getIdentifier() != "inlined_at"),
5118 "expected 'inlined_at' identifier in '.cv_inline_site_id' "
5119 "directive") )
5120 return true;
5121 Lex();
5122
5123 // IAFile IALine
5124 if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
5125 parseIntToken(IALine, "expected line number after 'inlined_at'"))
5126 return true;
5127
5128 // [IACol]
5129 if (getLexer().is(AsmToken::Integer)) {
5130 IACol = getTok().getIntVal();
5131 Lex();
5132 }
5133
5134 if (parseEOL())
5135 return true;
5136
5137 if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
5138 IALine, IACol, FunctionIdLoc))
5139 return Error(FunctionIdLoc, "function id already allocated");
5140
5141 return false;
5142 }
5143
5144 /// parseDirectiveCVLoc
5145 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end]
5146 /// [is_stmt VALUE]
5147 /// The first number is a file number, must have been previously assigned with
5148 /// a .file directive, the second number is the line number and optionally the
5149 /// third number is a column position (zero if not specified). The remaining
5150 /// optional items are .loc sub-directives.
parseDirectiveCVLoc()5151 bool MasmParser::parseDirectiveCVLoc() {
5152 SMLoc DirectiveLoc = getTok().getLoc();
5153 int64_t FunctionId, FileNumber;
5154 if (parseCVFunctionId(FunctionId, ".cv_loc") ||
5155 parseCVFileId(FileNumber, ".cv_loc"))
5156 return true;
5157
5158 int64_t LineNumber = 0;
5159 if (getLexer().is(AsmToken::Integer)) {
5160 LineNumber = getTok().getIntVal();
5161 if (LineNumber < 0)
5162 return TokError("line number less than zero in '.cv_loc' directive");
5163 Lex();
5164 }
5165
5166 int64_t ColumnPos = 0;
5167 if (getLexer().is(AsmToken::Integer)) {
5168 ColumnPos = getTok().getIntVal();
5169 if (ColumnPos < 0)
5170 return TokError("column position less than zero in '.cv_loc' directive");
5171 Lex();
5172 }
5173
5174 bool PrologueEnd = false;
5175 uint64_t IsStmt = 0;
5176
5177 auto parseOp = [&]() -> bool {
5178 StringRef Name;
5179 SMLoc Loc = getTok().getLoc();
5180 if (parseIdentifier(Name))
5181 return TokError("unexpected token in '.cv_loc' directive");
5182 if (Name == "prologue_end")
5183 PrologueEnd = true;
5184 else if (Name == "is_stmt") {
5185 Loc = getTok().getLoc();
5186 const MCExpr *Value;
5187 if (parseExpression(Value))
5188 return true;
5189 // The expression must be the constant 0 or 1.
5190 IsStmt = ~0ULL;
5191 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value))
5192 IsStmt = MCE->getValue();
5193
5194 if (IsStmt > 1)
5195 return Error(Loc, "is_stmt value not 0 or 1");
5196 } else {
5197 return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
5198 }
5199 return false;
5200 };
5201
5202 if (parseMany(parseOp, false /*hasComma*/))
5203 return true;
5204
5205 getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber,
5206 ColumnPos, PrologueEnd, IsStmt, StringRef(),
5207 DirectiveLoc);
5208 return false;
5209 }
5210
5211 /// parseDirectiveCVLinetable
5212 /// ::= .cv_linetable FunctionId, FnStart, FnEnd
parseDirectiveCVLinetable()5213 bool MasmParser::parseDirectiveCVLinetable() {
5214 int64_t FunctionId;
5215 StringRef FnStartName, FnEndName;
5216 SMLoc Loc = getTok().getLoc();
5217 if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
5218 parseToken(AsmToken::Comma,
5219 "unexpected token in '.cv_linetable' directive") ||
5220 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5221 "expected identifier in directive") ||
5222 parseToken(AsmToken::Comma,
5223 "unexpected token in '.cv_linetable' directive") ||
5224 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5225 "expected identifier in directive"))
5226 return true;
5227
5228 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5229 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5230
5231 getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym);
5232 return false;
5233 }
5234
5235 /// parseDirectiveCVInlineLinetable
5236 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
parseDirectiveCVInlineLinetable()5237 bool MasmParser::parseDirectiveCVInlineLinetable() {
5238 int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
5239 StringRef FnStartName, FnEndName;
5240 SMLoc Loc = getTok().getLoc();
5241 if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
5242 parseTokenLoc(Loc) ||
5243 parseIntToken(
5244 SourceFileId,
5245 "expected SourceField in '.cv_inline_linetable' directive") ||
5246 check(SourceFileId <= 0, Loc,
5247 "File id less than zero in '.cv_inline_linetable' directive") ||
5248 parseTokenLoc(Loc) ||
5249 parseIntToken(
5250 SourceLineNum,
5251 "expected SourceLineNum in '.cv_inline_linetable' directive") ||
5252 check(SourceLineNum < 0, Loc,
5253 "Line number less than zero in '.cv_inline_linetable' directive") ||
5254 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
5255 "expected identifier in directive") ||
5256 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
5257 "expected identifier in directive"))
5258 return true;
5259
5260 if (parseEOL())
5261 return true;
5262
5263 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
5264 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
5265 getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
5266 SourceLineNum, FnStartSym,
5267 FnEndSym);
5268 return false;
5269 }
5270
initializeCVDefRangeTypeMap()5271 void MasmParser::initializeCVDefRangeTypeMap() {
5272 CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER;
5273 CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL;
5274 CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER;
5275 CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL;
5276 }
5277
5278 /// parseDirectiveCVDefRange
5279 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes*
parseDirectiveCVDefRange()5280 bool MasmParser::parseDirectiveCVDefRange() {
5281 SMLoc Loc;
5282 std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges;
5283 while (getLexer().is(AsmToken::Identifier)) {
5284 Loc = getLexer().getLoc();
5285 StringRef GapStartName;
5286 if (parseIdentifier(GapStartName))
5287 return Error(Loc, "expected identifier in directive");
5288 MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName);
5289
5290 Loc = getLexer().getLoc();
5291 StringRef GapEndName;
5292 if (parseIdentifier(GapEndName))
5293 return Error(Loc, "expected identifier in directive");
5294 MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName);
5295
5296 Ranges.push_back({GapStartSym, GapEndSym});
5297 }
5298
5299 StringRef CVDefRangeTypeStr;
5300 if (parseToken(
5301 AsmToken::Comma,
5302 "expected comma before def_range type in .cv_def_range directive") ||
5303 parseIdentifier(CVDefRangeTypeStr))
5304 return Error(Loc, "expected def_range type in directive");
5305
5306 StringMap<CVDefRangeType>::const_iterator CVTypeIt =
5307 CVDefRangeTypeMap.find(CVDefRangeTypeStr);
5308 CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end())
5309 ? CVDR_DEFRANGE
5310 : CVTypeIt->getValue();
5311 switch (CVDRType) {
5312 case CVDR_DEFRANGE_REGISTER: {
5313 int64_t DRRegister;
5314 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5315 ".cv_def_range directive") ||
5316 parseAbsoluteExpression(DRRegister))
5317 return Error(Loc, "expected register number");
5318
5319 codeview::DefRangeRegisterHeader DRHdr;
5320 DRHdr.Register = DRRegister;
5321 DRHdr.MayHaveNoName = 0;
5322 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5323 break;
5324 }
5325 case CVDR_DEFRANGE_FRAMEPOINTER_REL: {
5326 int64_t DROffset;
5327 if (parseToken(AsmToken::Comma,
5328 "expected comma before offset in .cv_def_range directive") ||
5329 parseAbsoluteExpression(DROffset))
5330 return Error(Loc, "expected offset value");
5331
5332 codeview::DefRangeFramePointerRelHeader DRHdr;
5333 DRHdr.Offset = DROffset;
5334 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5335 break;
5336 }
5337 case CVDR_DEFRANGE_SUBFIELD_REGISTER: {
5338 int64_t DRRegister;
5339 int64_t DROffsetInParent;
5340 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5341 ".cv_def_range directive") ||
5342 parseAbsoluteExpression(DRRegister))
5343 return Error(Loc, "expected register number");
5344 if (parseToken(AsmToken::Comma,
5345 "expected comma before offset in .cv_def_range directive") ||
5346 parseAbsoluteExpression(DROffsetInParent))
5347 return Error(Loc, "expected offset value");
5348
5349 codeview::DefRangeSubfieldRegisterHeader DRHdr;
5350 DRHdr.Register = DRRegister;
5351 DRHdr.MayHaveNoName = 0;
5352 DRHdr.OffsetInParent = DROffsetInParent;
5353 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5354 break;
5355 }
5356 case CVDR_DEFRANGE_REGISTER_REL: {
5357 int64_t DRRegister;
5358 int64_t DRFlags;
5359 int64_t DRBasePointerOffset;
5360 if (parseToken(AsmToken::Comma, "expected comma before register number in "
5361 ".cv_def_range directive") ||
5362 parseAbsoluteExpression(DRRegister))
5363 return Error(Loc, "expected register value");
5364 if (parseToken(
5365 AsmToken::Comma,
5366 "expected comma before flag value in .cv_def_range directive") ||
5367 parseAbsoluteExpression(DRFlags))
5368 return Error(Loc, "expected flag value");
5369 if (parseToken(AsmToken::Comma, "expected comma before base pointer offset "
5370 "in .cv_def_range directive") ||
5371 parseAbsoluteExpression(DRBasePointerOffset))
5372 return Error(Loc, "expected base pointer offset value");
5373
5374 codeview::DefRangeRegisterRelHeader DRHdr;
5375 DRHdr.Register = DRRegister;
5376 DRHdr.Flags = DRFlags;
5377 DRHdr.BasePointerOffset = DRBasePointerOffset;
5378 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr);
5379 break;
5380 }
5381 default:
5382 return Error(Loc, "unexpected def_range type in .cv_def_range directive");
5383 }
5384 return true;
5385 }
5386
5387 /// parseDirectiveCVString
5388 /// ::= .cv_stringtable "string"
parseDirectiveCVString()5389 bool MasmParser::parseDirectiveCVString() {
5390 std::string Data;
5391 if (checkForValidSection() || parseEscapedString(Data))
5392 return addErrorSuffix(" in '.cv_string' directive");
5393
5394 // Put the string in the table and emit the offset.
5395 std::pair<StringRef, unsigned> Insertion =
5396 getCVContext().addToStringTable(Data);
5397 getStreamer().emitIntValue(Insertion.second, 4);
5398 return false;
5399 }
5400
5401 /// parseDirectiveCVStringTable
5402 /// ::= .cv_stringtable
parseDirectiveCVStringTable()5403 bool MasmParser::parseDirectiveCVStringTable() {
5404 getStreamer().emitCVStringTableDirective();
5405 return false;
5406 }
5407
5408 /// parseDirectiveCVFileChecksums
5409 /// ::= .cv_filechecksums
parseDirectiveCVFileChecksums()5410 bool MasmParser::parseDirectiveCVFileChecksums() {
5411 getStreamer().emitCVFileChecksumsDirective();
5412 return false;
5413 }
5414
5415 /// parseDirectiveCVFileChecksumOffset
5416 /// ::= .cv_filechecksumoffset fileno
parseDirectiveCVFileChecksumOffset()5417 bool MasmParser::parseDirectiveCVFileChecksumOffset() {
5418 int64_t FileNo;
5419 if (parseIntToken(FileNo, "expected identifier in directive"))
5420 return true;
5421 if (parseEOL())
5422 return true;
5423 getStreamer().emitCVFileChecksumOffsetDirective(FileNo);
5424 return false;
5425 }
5426
5427 /// parseDirectiveCVFPOData
5428 /// ::= .cv_fpo_data procsym
parseDirectiveCVFPOData()5429 bool MasmParser::parseDirectiveCVFPOData() {
5430 SMLoc DirLoc = getLexer().getLoc();
5431 StringRef ProcName;
5432 if (parseIdentifier(ProcName))
5433 return TokError("expected symbol name");
5434 if (parseEOL("unexpected tokens"))
5435 return addErrorSuffix(" in '.cv_fpo_data' directive");
5436 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
5437 getStreamer().emitCVFPOData(ProcSym, DirLoc);
5438 return false;
5439 }
5440
5441 /// parseDirectiveCFISections
5442 /// ::= .cfi_sections section [, section]
parseDirectiveCFISections()5443 bool MasmParser::parseDirectiveCFISections() {
5444 StringRef Name;
5445 bool EH = false;
5446 bool Debug = false;
5447
5448 if (parseIdentifier(Name))
5449 return TokError("Expected an identifier");
5450
5451 if (Name == ".eh_frame")
5452 EH = true;
5453 else if (Name == ".debug_frame")
5454 Debug = true;
5455
5456 if (getLexer().is(AsmToken::Comma)) {
5457 Lex();
5458
5459 if (parseIdentifier(Name))
5460 return TokError("Expected an identifier");
5461
5462 if (Name == ".eh_frame")
5463 EH = true;
5464 else if (Name == ".debug_frame")
5465 Debug = true;
5466 }
5467
5468 getStreamer().emitCFISections(EH, Debug);
5469 return false;
5470 }
5471
5472 /// parseDirectiveCFIStartProc
5473 /// ::= .cfi_startproc [simple]
parseDirectiveCFIStartProc()5474 bool MasmParser::parseDirectiveCFIStartProc() {
5475 StringRef Simple;
5476 if (!parseOptionalToken(AsmToken::EndOfStatement)) {
5477 if (check(parseIdentifier(Simple) || Simple != "simple",
5478 "unexpected token") ||
5479 parseEOL())
5480 return addErrorSuffix(" in '.cfi_startproc' directive");
5481 }
5482
5483 // TODO(kristina): Deal with a corner case of incorrect diagnostic context
5484 // being produced if this directive is emitted as part of preprocessor macro
5485 // expansion which can *ONLY* happen if Clang's cc1as is the API consumer.
5486 // Tools like llvm-mc on the other hand are not affected by it, and report
5487 // correct context information.
5488 getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc());
5489 return false;
5490 }
5491
5492 /// parseDirectiveCFIEndProc
5493 /// ::= .cfi_endproc
parseDirectiveCFIEndProc()5494 bool MasmParser::parseDirectiveCFIEndProc() {
5495 getStreamer().emitCFIEndProc();
5496 return false;
5497 }
5498
5499 /// parse register name or number.
parseRegisterOrRegisterNumber(int64_t & Register,SMLoc DirectiveLoc)5500 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register,
5501 SMLoc DirectiveLoc) {
5502 MCRegister RegNo;
5503
5504 if (getLexer().isNot(AsmToken::Integer)) {
5505 if (getTargetParser().parseRegister(RegNo, DirectiveLoc, DirectiveLoc))
5506 return true;
5507 Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true);
5508 } else
5509 return parseAbsoluteExpression(Register);
5510
5511 return false;
5512 }
5513
5514 /// parseDirectiveCFIDefCfa
5515 /// ::= .cfi_def_cfa register, offset
parseDirectiveCFIDefCfa(SMLoc DirectiveLoc)5516 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
5517 int64_t Register = 0, Offset = 0;
5518 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5519 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5520 parseAbsoluteExpression(Offset))
5521 return true;
5522
5523 getStreamer().emitCFIDefCfa(Register, Offset);
5524 return false;
5525 }
5526
5527 /// parseDirectiveCFIDefCfaOffset
5528 /// ::= .cfi_def_cfa_offset offset
parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc)5529 bool MasmParser::parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc) {
5530 int64_t Offset = 0;
5531 if (parseAbsoluteExpression(Offset))
5532 return true;
5533
5534 getStreamer().emitCFIDefCfaOffset(Offset, DirectiveLoc);
5535 return false;
5536 }
5537
5538 /// parseDirectiveCFIRegister
5539 /// ::= .cfi_register register, register
parseDirectiveCFIRegister(SMLoc DirectiveLoc)5540 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
5541 int64_t Register1 = 0, Register2 = 0;
5542 if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) ||
5543 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5544 parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
5545 return true;
5546
5547 getStreamer().emitCFIRegister(Register1, Register2, DirectiveLoc);
5548 return false;
5549 }
5550
5551 /// parseDirectiveCFIWindowSave
5552 /// ::= .cfi_window_save
parseDirectiveCFIWindowSave(SMLoc DirectiveLoc)5553 bool MasmParser::parseDirectiveCFIWindowSave(SMLoc DirectiveLoc) {
5554 getStreamer().emitCFIWindowSave(DirectiveLoc);
5555 return false;
5556 }
5557
5558 /// parseDirectiveCFIAdjustCfaOffset
5559 /// ::= .cfi_adjust_cfa_offset adjustment
parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc)5560 bool MasmParser::parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc) {
5561 int64_t Adjustment = 0;
5562 if (parseAbsoluteExpression(Adjustment))
5563 return true;
5564
5565 getStreamer().emitCFIAdjustCfaOffset(Adjustment, DirectiveLoc);
5566 return false;
5567 }
5568
5569 /// parseDirectiveCFIDefCfaRegister
5570 /// ::= .cfi_def_cfa_register register
parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc)5571 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
5572 int64_t Register = 0;
5573 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5574 return true;
5575
5576 getStreamer().emitCFIDefCfaRegister(Register);
5577 return false;
5578 }
5579
5580 /// parseDirectiveCFIOffset
5581 /// ::= .cfi_offset register, offset
parseDirectiveCFIOffset(SMLoc DirectiveLoc)5582 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
5583 int64_t Register = 0;
5584 int64_t Offset = 0;
5585
5586 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5587 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5588 parseAbsoluteExpression(Offset))
5589 return true;
5590
5591 getStreamer().emitCFIOffset(Register, Offset);
5592 return false;
5593 }
5594
5595 /// parseDirectiveCFIRelOffset
5596 /// ::= .cfi_rel_offset register, offset
parseDirectiveCFIRelOffset(SMLoc DirectiveLoc)5597 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
5598 int64_t Register = 0, Offset = 0;
5599
5600 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) ||
5601 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5602 parseAbsoluteExpression(Offset))
5603 return true;
5604
5605 getStreamer().emitCFIRelOffset(Register, Offset, DirectiveLoc);
5606 return false;
5607 }
5608
isValidEncoding(int64_t Encoding)5609 static bool isValidEncoding(int64_t Encoding) {
5610 if (Encoding & ~0xff)
5611 return false;
5612
5613 if (Encoding == dwarf::DW_EH_PE_omit)
5614 return true;
5615
5616 const unsigned Format = Encoding & 0xf;
5617 if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
5618 Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
5619 Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
5620 Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
5621 return false;
5622
5623 const unsigned Application = Encoding & 0x70;
5624 if (Application != dwarf::DW_EH_PE_absptr &&
5625 Application != dwarf::DW_EH_PE_pcrel)
5626 return false;
5627
5628 return true;
5629 }
5630
5631 /// parseDirectiveCFIPersonalityOrLsda
5632 /// IsPersonality true for cfi_personality, false for cfi_lsda
5633 /// ::= .cfi_personality encoding, [symbol_name]
5634 /// ::= .cfi_lsda encoding, [symbol_name]
parseDirectiveCFIPersonalityOrLsda(bool IsPersonality)5635 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
5636 int64_t Encoding = 0;
5637 if (parseAbsoluteExpression(Encoding))
5638 return true;
5639 if (Encoding == dwarf::DW_EH_PE_omit)
5640 return false;
5641
5642 StringRef Name;
5643 if (check(!isValidEncoding(Encoding), "unsupported encoding.") ||
5644 parseToken(AsmToken::Comma, "unexpected token in directive") ||
5645 check(parseIdentifier(Name), "expected identifier in directive"))
5646 return true;
5647
5648 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5649
5650 if (IsPersonality)
5651 getStreamer().emitCFIPersonality(Sym, Encoding);
5652 else
5653 getStreamer().emitCFILsda(Sym, Encoding);
5654 return false;
5655 }
5656
5657 /// parseDirectiveCFIRememberState
5658 /// ::= .cfi_remember_state
parseDirectiveCFIRememberState(SMLoc DirectiveLoc)5659 bool MasmParser::parseDirectiveCFIRememberState(SMLoc DirectiveLoc) {
5660 getStreamer().emitCFIRememberState(DirectiveLoc);
5661 return false;
5662 }
5663
5664 /// parseDirectiveCFIRestoreState
5665 /// ::= .cfi_remember_state
parseDirectiveCFIRestoreState(SMLoc DirectiveLoc)5666 bool MasmParser::parseDirectiveCFIRestoreState(SMLoc DirectiveLoc) {
5667 getStreamer().emitCFIRestoreState(DirectiveLoc);
5668 return false;
5669 }
5670
5671 /// parseDirectiveCFISameValue
5672 /// ::= .cfi_same_value register
parseDirectiveCFISameValue(SMLoc DirectiveLoc)5673 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
5674 int64_t Register = 0;
5675
5676 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5677 return true;
5678
5679 getStreamer().emitCFISameValue(Register, DirectiveLoc);
5680 return false;
5681 }
5682
5683 /// parseDirectiveCFIRestore
5684 /// ::= .cfi_restore register
parseDirectiveCFIRestore(SMLoc DirectiveLoc)5685 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
5686 int64_t Register = 0;
5687 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5688 return true;
5689
5690 getStreamer().emitCFIRestore(Register);
5691 return false;
5692 }
5693
5694 /// parseDirectiveCFIEscape
5695 /// ::= .cfi_escape expression[,...]
parseDirectiveCFIEscape(SMLoc DirectiveLoc)5696 bool MasmParser::parseDirectiveCFIEscape(SMLoc DirectiveLoc) {
5697 std::string Values;
5698 int64_t CurrValue;
5699 if (parseAbsoluteExpression(CurrValue))
5700 return true;
5701
5702 Values.push_back((uint8_t)CurrValue);
5703
5704 while (getLexer().is(AsmToken::Comma)) {
5705 Lex();
5706
5707 if (parseAbsoluteExpression(CurrValue))
5708 return true;
5709
5710 Values.push_back((uint8_t)CurrValue);
5711 }
5712
5713 getStreamer().emitCFIEscape(Values, DirectiveLoc);
5714 return false;
5715 }
5716
5717 /// parseDirectiveCFIReturnColumn
5718 /// ::= .cfi_return_column register
parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc)5719 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
5720 int64_t Register = 0;
5721 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5722 return true;
5723 getStreamer().emitCFIReturnColumn(Register);
5724 return false;
5725 }
5726
5727 /// parseDirectiveCFISignalFrame
5728 /// ::= .cfi_signal_frame
parseDirectiveCFISignalFrame()5729 bool MasmParser::parseDirectiveCFISignalFrame() {
5730 if (parseEOL())
5731 return true;
5732
5733 getStreamer().emitCFISignalFrame();
5734 return false;
5735 }
5736
5737 /// parseDirectiveCFIUndefined
5738 /// ::= .cfi_undefined register
parseDirectiveCFIUndefined(SMLoc DirectiveLoc)5739 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
5740 int64_t Register = 0;
5741
5742 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
5743 return true;
5744
5745 getStreamer().emitCFIUndefined(Register);
5746 return false;
5747 }
5748
5749 /// parseDirectiveMacro
5750 /// ::= name macro [parameters]
5751 /// ["LOCAL" identifiers]
5752 /// parameters ::= parameter [, parameter]*
5753 /// parameter ::= name ":" qualifier
5754 /// qualifier ::= "req" | "vararg" | "=" macro_argument
parseDirectiveMacro(StringRef Name,SMLoc NameLoc)5755 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) {
5756 MCAsmMacroParameters Parameters;
5757 while (getLexer().isNot(AsmToken::EndOfStatement)) {
5758 if (!Parameters.empty() && Parameters.back().Vararg)
5759 return Error(Lexer.getLoc(),
5760 "Vararg parameter '" + Parameters.back().Name +
5761 "' should be last in the list of parameters");
5762
5763 MCAsmMacroParameter Parameter;
5764 if (parseIdentifier(Parameter.Name))
5765 return TokError("expected identifier in 'macro' directive");
5766
5767 // Emit an error if two (or more) named parameters share the same name.
5768 for (const MCAsmMacroParameter& CurrParam : Parameters)
5769 if (CurrParam.Name.equals_insensitive(Parameter.Name))
5770 return TokError("macro '" + Name + "' has multiple parameters"
5771 " named '" + Parameter.Name + "'");
5772
5773 if (Lexer.is(AsmToken::Colon)) {
5774 Lex(); // consume ':'
5775
5776 if (parseOptionalToken(AsmToken::Equal)) {
5777 // Default value
5778 SMLoc ParamLoc;
5779
5780 ParamLoc = Lexer.getLoc();
5781 if (parseMacroArgument(nullptr, Parameter.Value))
5782 return true;
5783 } else {
5784 SMLoc QualLoc;
5785 StringRef Qualifier;
5786
5787 QualLoc = Lexer.getLoc();
5788 if (parseIdentifier(Qualifier))
5789 return Error(QualLoc, "missing parameter qualifier for "
5790 "'" +
5791 Parameter.Name + "' in macro '" + Name +
5792 "'");
5793
5794 if (Qualifier.equals_insensitive("req"))
5795 Parameter.Required = true;
5796 else if (Qualifier.equals_insensitive("vararg"))
5797 Parameter.Vararg = true;
5798 else
5799 return Error(QualLoc,
5800 Qualifier + " is not a valid parameter qualifier for '" +
5801 Parameter.Name + "' in macro '" + Name + "'");
5802 }
5803 }
5804
5805 Parameters.push_back(std::move(Parameter));
5806
5807 if (getLexer().is(AsmToken::Comma))
5808 Lex();
5809 }
5810
5811 // Eat just the end of statement.
5812 Lexer.Lex();
5813
5814 std::vector<std::string> Locals;
5815 if (getTok().is(AsmToken::Identifier) &&
5816 getTok().getIdentifier().equals_insensitive("local")) {
5817 Lex(); // Eat the LOCAL directive.
5818
5819 StringRef ID;
5820 while (true) {
5821 if (parseIdentifier(ID))
5822 return true;
5823 Locals.push_back(ID.lower());
5824
5825 // If we see a comma, continue (and allow line continuation).
5826 if (!parseOptionalToken(AsmToken::Comma))
5827 break;
5828 parseOptionalToken(AsmToken::EndOfStatement);
5829 }
5830 }
5831
5832 // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors.
5833 AsmToken EndToken, StartToken = getTok();
5834 unsigned MacroDepth = 0;
5835 bool IsMacroFunction = false;
5836 // Lex the macro definition.
5837 while (true) {
5838 // Ignore Lexing errors in macros.
5839 while (Lexer.is(AsmToken::Error)) {
5840 Lexer.Lex();
5841 }
5842
5843 // Check whether we have reached the end of the file.
5844 if (getLexer().is(AsmToken::Eof))
5845 return Error(NameLoc, "no matching 'endm' in definition");
5846
5847 // Otherwise, check whether we have reached the 'endm'... and determine if
5848 // this is a macro function.
5849 if (getLexer().is(AsmToken::Identifier)) {
5850 if (getTok().getIdentifier().equals_insensitive("endm")) {
5851 if (MacroDepth == 0) { // Outermost macro.
5852 EndToken = getTok();
5853 Lexer.Lex();
5854 if (getLexer().isNot(AsmToken::EndOfStatement))
5855 return TokError("unexpected token in '" + EndToken.getIdentifier() +
5856 "' directive");
5857 break;
5858 } else {
5859 // Otherwise we just found the end of an inner macro.
5860 --MacroDepth;
5861 }
5862 } else if (getTok().getIdentifier().equals_insensitive("exitm")) {
5863 if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) {
5864 IsMacroFunction = true;
5865 }
5866 } else if (isMacroLikeDirective()) {
5867 // We allow nested macros. Those aren't instantiated until the
5868 // outermost macro is expanded so just ignore them for now.
5869 ++MacroDepth;
5870 }
5871 }
5872
5873 // Otherwise, scan til the end of the statement.
5874 eatToEndOfStatement();
5875 }
5876
5877 if (getContext().lookupMacro(Name.lower())) {
5878 return Error(NameLoc, "macro '" + Name + "' is already defined");
5879 }
5880
5881 const char *BodyStart = StartToken.getLoc().getPointer();
5882 const char *BodyEnd = EndToken.getLoc().getPointer();
5883 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
5884 MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals),
5885 IsMacroFunction);
5886 DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n";
5887 Macro.dump());
5888 getContext().defineMacro(Name.lower(), std::move(Macro));
5889 return false;
5890 }
5891
5892 /// parseDirectiveExitMacro
5893 /// ::= "exitm" [textitem]
parseDirectiveExitMacro(SMLoc DirectiveLoc,StringRef Directive,std::string & Value)5894 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc,
5895 StringRef Directive,
5896 std::string &Value) {
5897 SMLoc EndLoc = getTok().getLoc();
5898 if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value))
5899 return Error(EndLoc,
5900 "unable to parse text item in '" + Directive + "' directive");
5901 eatToEndOfStatement();
5902
5903 if (!isInsideMacroInstantiation())
5904 return TokError("unexpected '" + Directive + "' in file, "
5905 "no current macro definition");
5906
5907 // Exit all conditionals that are active in the current macro.
5908 while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) {
5909 TheCondState = TheCondStack.back();
5910 TheCondStack.pop_back();
5911 }
5912
5913 handleMacroExit();
5914 return false;
5915 }
5916
5917 /// parseDirectiveEndMacro
5918 /// ::= endm
parseDirectiveEndMacro(StringRef Directive)5919 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) {
5920 if (getLexer().isNot(AsmToken::EndOfStatement))
5921 return TokError("unexpected token in '" + Directive + "' directive");
5922
5923 // If we are inside a macro instantiation, terminate the current
5924 // instantiation.
5925 if (isInsideMacroInstantiation()) {
5926 handleMacroExit();
5927 return false;
5928 }
5929
5930 // Otherwise, this .endmacro is a stray entry in the file; well formed
5931 // .endmacro directives are handled during the macro definition parsing.
5932 return TokError("unexpected '" + Directive + "' in file, "
5933 "no current macro definition");
5934 }
5935
5936 /// parseDirectivePurgeMacro
5937 /// ::= purge identifier ( , identifier )*
parseDirectivePurgeMacro(SMLoc DirectiveLoc)5938 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
5939 StringRef Name;
5940 while (true) {
5941 SMLoc NameLoc;
5942 if (parseTokenLoc(NameLoc) ||
5943 check(parseIdentifier(Name), NameLoc,
5944 "expected identifier in 'purge' directive"))
5945 return true;
5946
5947 DEBUG_WITH_TYPE("asm-macros", dbgs()
5948 << "Un-defining macro: " << Name << "\n");
5949 if (!getContext().lookupMacro(Name.lower()))
5950 return Error(NameLoc, "macro '" + Name + "' is not defined");
5951 getContext().undefineMacro(Name.lower());
5952
5953 if (!parseOptionalToken(AsmToken::Comma))
5954 break;
5955 parseOptionalToken(AsmToken::EndOfStatement);
5956 }
5957
5958 return false;
5959 }
5960
parseDirectiveExtern()5961 bool MasmParser::parseDirectiveExtern() {
5962 // .extern is the default - but we still need to take any provided type info.
5963 auto parseOp = [&]() -> bool {
5964 StringRef Name;
5965 SMLoc NameLoc = getTok().getLoc();
5966 if (parseIdentifier(Name))
5967 return Error(NameLoc, "expected name");
5968 if (parseToken(AsmToken::Colon))
5969 return true;
5970
5971 StringRef TypeName;
5972 SMLoc TypeLoc = getTok().getLoc();
5973 if (parseIdentifier(TypeName))
5974 return Error(TypeLoc, "expected type");
5975 if (!TypeName.equals_insensitive("proc")) {
5976 AsmTypeInfo Type;
5977 if (lookUpType(TypeName, Type))
5978 return Error(TypeLoc, "unrecognized type");
5979 KnownType[Name.lower()] = Type;
5980 }
5981
5982 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
5983 Sym->setExternal(true);
5984 getStreamer().emitSymbolAttribute(Sym, MCSA_Extern);
5985
5986 return false;
5987 };
5988
5989 if (parseMany(parseOp))
5990 return addErrorSuffix(" in directive 'extern'");
5991 return false;
5992 }
5993
5994 /// parseDirectiveSymbolAttribute
5995 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
parseDirectiveSymbolAttribute(MCSymbolAttr Attr)5996 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
5997 auto parseOp = [&]() -> bool {
5998 StringRef Name;
5999 SMLoc Loc = getTok().getLoc();
6000 if (parseIdentifier(Name))
6001 return Error(Loc, "expected identifier");
6002 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6003
6004 // Assembler local symbols don't make any sense here. Complain loudly.
6005 if (Sym->isTemporary())
6006 return Error(Loc, "non-local symbol required");
6007
6008 if (!getStreamer().emitSymbolAttribute(Sym, Attr))
6009 return Error(Loc, "unable to emit symbol attribute");
6010 return false;
6011 };
6012
6013 if (parseMany(parseOp))
6014 return addErrorSuffix(" in directive");
6015 return false;
6016 }
6017
6018 /// parseDirectiveComm
6019 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
parseDirectiveComm(bool IsLocal)6020 bool MasmParser::parseDirectiveComm(bool IsLocal) {
6021 if (checkForValidSection())
6022 return true;
6023
6024 SMLoc IDLoc = getLexer().getLoc();
6025 StringRef Name;
6026 if (parseIdentifier(Name))
6027 return TokError("expected identifier in directive");
6028
6029 // Handle the identifier as the key symbol.
6030 MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
6031
6032 if (getLexer().isNot(AsmToken::Comma))
6033 return TokError("unexpected token in directive");
6034 Lex();
6035
6036 int64_t Size;
6037 SMLoc SizeLoc = getLexer().getLoc();
6038 if (parseAbsoluteExpression(Size))
6039 return true;
6040
6041 int64_t Pow2Alignment = 0;
6042 SMLoc Pow2AlignmentLoc;
6043 if (getLexer().is(AsmToken::Comma)) {
6044 Lex();
6045 Pow2AlignmentLoc = getLexer().getLoc();
6046 if (parseAbsoluteExpression(Pow2Alignment))
6047 return true;
6048
6049 LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType();
6050 if (IsLocal && LCOMM == LCOMM::NoAlignment)
6051 return Error(Pow2AlignmentLoc, "alignment not supported on this target");
6052
6053 // If this target takes alignments in bytes (not log) validate and convert.
6054 if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) ||
6055 (IsLocal && LCOMM == LCOMM::ByteAlignment)) {
6056 if (!isPowerOf2_64(Pow2Alignment))
6057 return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
6058 Pow2Alignment = Log2_64(Pow2Alignment);
6059 }
6060 }
6061
6062 if (parseEOL())
6063 return true;
6064
6065 // NOTE: a size of zero for a .comm should create a undefined symbol
6066 // but a size of .lcomm creates a bss symbol of size zero.
6067 if (Size < 0)
6068 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
6069 "be less than zero");
6070
6071 // NOTE: The alignment in the directive is a power of 2 value, the assembler
6072 // may internally end up wanting an alignment in bytes.
6073 // FIXME: Diagnose overflow.
6074 if (Pow2Alignment < 0)
6075 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
6076 "alignment, can't be less than zero");
6077
6078 Sym->redefineIfPossible();
6079 if (!Sym->isUndefined())
6080 return Error(IDLoc, "invalid symbol redefinition");
6081
6082 // Create the Symbol as a common or local common with Size and Pow2Alignment.
6083 if (IsLocal) {
6084 getStreamer().emitLocalCommonSymbol(Sym, Size,
6085 Align(1ULL << Pow2Alignment));
6086 return false;
6087 }
6088
6089 getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment));
6090 return false;
6091 }
6092
6093 /// parseDirectiveComment
6094 /// ::= comment delimiter [[text]]
6095 /// [[text]]
6096 /// [[text]] delimiter [[text]]
parseDirectiveComment(SMLoc DirectiveLoc)6097 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
6098 std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
6099 size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
6100 assert(DelimiterEnd != std::string::npos);
6101 StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
6102 if (Delimiter.empty())
6103 return Error(DirectiveLoc, "no delimiter in 'comment' directive");
6104 do {
6105 if (getTok().is(AsmToken::Eof))
6106 return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive");
6107 Lex(); // eat end of statement
6108 } while (
6109 !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter));
6110 return parseEOL();
6111 }
6112
6113 /// parseDirectiveInclude
6114 /// ::= include <filename>
6115 /// | include filename
parseDirectiveInclude()6116 bool MasmParser::parseDirectiveInclude() {
6117 // Allow the strings to have escaped octal character sequence.
6118 std::string Filename;
6119 SMLoc IncludeLoc = getTok().getLoc();
6120
6121 if (parseAngleBracketString(Filename))
6122 Filename = parseStringTo(AsmToken::EndOfStatement);
6123 if (check(Filename.empty(), "missing filename in 'include' directive") ||
6124 check(getTok().isNot(AsmToken::EndOfStatement),
6125 "unexpected token in 'include' directive") ||
6126 // Attempt to switch the lexer to the included file before consuming the
6127 // end of statement to avoid losing it when we switch.
6128 check(enterIncludeFile(Filename), IncludeLoc,
6129 "Could not find include file '" + Filename + "'"))
6130 return true;
6131
6132 return false;
6133 }
6134
6135 /// parseDirectiveIf
6136 /// ::= .if{,eq,ge,gt,le,lt,ne} expression
parseDirectiveIf(SMLoc DirectiveLoc,DirectiveKind DirKind)6137 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
6138 TheCondStack.push_back(TheCondState);
6139 TheCondState.TheCond = AsmCond::IfCond;
6140 if (TheCondState.Ignore) {
6141 eatToEndOfStatement();
6142 } else {
6143 int64_t ExprValue;
6144 if (parseAbsoluteExpression(ExprValue) || parseEOL())
6145 return true;
6146
6147 switch (DirKind) {
6148 default:
6149 llvm_unreachable("unsupported directive");
6150 case DK_IF:
6151 break;
6152 case DK_IFE:
6153 ExprValue = ExprValue == 0;
6154 break;
6155 }
6156
6157 TheCondState.CondMet = ExprValue;
6158 TheCondState.Ignore = !TheCondState.CondMet;
6159 }
6160
6161 return false;
6162 }
6163
6164 /// parseDirectiveIfb
6165 /// ::= .ifb textitem
parseDirectiveIfb(SMLoc DirectiveLoc,bool ExpectBlank)6166 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6167 TheCondStack.push_back(TheCondState);
6168 TheCondState.TheCond = AsmCond::IfCond;
6169
6170 if (TheCondState.Ignore) {
6171 eatToEndOfStatement();
6172 } else {
6173 std::string Str;
6174 if (parseTextItem(Str))
6175 return TokError("expected text item parameter for 'ifb' directive");
6176
6177 if (parseEOL())
6178 return true;
6179
6180 TheCondState.CondMet = ExpectBlank == Str.empty();
6181 TheCondState.Ignore = !TheCondState.CondMet;
6182 }
6183
6184 return false;
6185 }
6186
6187 /// parseDirectiveIfidn
6188 /// ::= ifidn textitem, textitem
parseDirectiveIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6189 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6190 bool CaseInsensitive) {
6191 std::string String1, String2;
6192
6193 if (parseTextItem(String1)) {
6194 if (ExpectEqual)
6195 return TokError("expected text item parameter for 'ifidn' directive");
6196 return TokError("expected text item parameter for 'ifdif' directive");
6197 }
6198
6199 if (Lexer.isNot(AsmToken::Comma)) {
6200 if (ExpectEqual)
6201 return TokError(
6202 "expected comma after first string for 'ifidn' directive");
6203 return TokError("expected comma after first string for 'ifdif' directive");
6204 }
6205 Lex();
6206
6207 if (parseTextItem(String2)) {
6208 if (ExpectEqual)
6209 return TokError("expected text item parameter for 'ifidn' directive");
6210 return TokError("expected text item parameter for 'ifdif' directive");
6211 }
6212
6213 TheCondStack.push_back(TheCondState);
6214 TheCondState.TheCond = AsmCond::IfCond;
6215 if (CaseInsensitive)
6216 TheCondState.CondMet =
6217 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6218 else
6219 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6220 TheCondState.Ignore = !TheCondState.CondMet;
6221
6222 return false;
6223 }
6224
6225 /// parseDirectiveIfdef
6226 /// ::= ifdef symbol
6227 /// | ifdef variable
parseDirectiveIfdef(SMLoc DirectiveLoc,bool expect_defined)6228 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
6229 TheCondStack.push_back(TheCondState);
6230 TheCondState.TheCond = AsmCond::IfCond;
6231
6232 if (TheCondState.Ignore) {
6233 eatToEndOfStatement();
6234 } else {
6235 bool is_defined = false;
6236 MCRegister Reg;
6237 SMLoc StartLoc, EndLoc;
6238 is_defined =
6239 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6240 if (!is_defined) {
6241 StringRef Name;
6242 if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") ||
6243 parseEOL())
6244 return true;
6245
6246 if (BuiltinSymbolMap.contains(Name.lower())) {
6247 is_defined = true;
6248 } else if (Variables.contains(Name.lower())) {
6249 is_defined = true;
6250 } else {
6251 MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
6252 is_defined = (Sym && !Sym->isUndefined(false));
6253 }
6254 }
6255
6256 TheCondState.CondMet = (is_defined == expect_defined);
6257 TheCondState.Ignore = !TheCondState.CondMet;
6258 }
6259
6260 return false;
6261 }
6262
6263 /// parseDirectiveElseIf
6264 /// ::= elseif expression
parseDirectiveElseIf(SMLoc DirectiveLoc,DirectiveKind DirKind)6265 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc,
6266 DirectiveKind DirKind) {
6267 if (TheCondState.TheCond != AsmCond::IfCond &&
6268 TheCondState.TheCond != AsmCond::ElseIfCond)
6269 return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
6270 " .if or an .elseif");
6271 TheCondState.TheCond = AsmCond::ElseIfCond;
6272
6273 bool LastIgnoreState = false;
6274 if (!TheCondStack.empty())
6275 LastIgnoreState = TheCondStack.back().Ignore;
6276 if (LastIgnoreState || TheCondState.CondMet) {
6277 TheCondState.Ignore = true;
6278 eatToEndOfStatement();
6279 } else {
6280 int64_t ExprValue;
6281 if (parseAbsoluteExpression(ExprValue))
6282 return true;
6283
6284 if (parseEOL())
6285 return true;
6286
6287 switch (DirKind) {
6288 default:
6289 llvm_unreachable("unsupported directive");
6290 case DK_ELSEIF:
6291 break;
6292 case DK_ELSEIFE:
6293 ExprValue = ExprValue == 0;
6294 break;
6295 }
6296
6297 TheCondState.CondMet = ExprValue;
6298 TheCondState.Ignore = !TheCondState.CondMet;
6299 }
6300
6301 return false;
6302 }
6303
6304 /// parseDirectiveElseIfb
6305 /// ::= elseifb textitem
parseDirectiveElseIfb(SMLoc DirectiveLoc,bool ExpectBlank)6306 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6307 if (TheCondState.TheCond != AsmCond::IfCond &&
6308 TheCondState.TheCond != AsmCond::ElseIfCond)
6309 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6310 " if or an elseif");
6311 TheCondState.TheCond = AsmCond::ElseIfCond;
6312
6313 bool LastIgnoreState = false;
6314 if (!TheCondStack.empty())
6315 LastIgnoreState = TheCondStack.back().Ignore;
6316 if (LastIgnoreState || TheCondState.CondMet) {
6317 TheCondState.Ignore = true;
6318 eatToEndOfStatement();
6319 } else {
6320 std::string Str;
6321 if (parseTextItem(Str)) {
6322 if (ExpectBlank)
6323 return TokError("expected text item parameter for 'elseifb' directive");
6324 return TokError("expected text item parameter for 'elseifnb' directive");
6325 }
6326
6327 if (parseEOL())
6328 return true;
6329
6330 TheCondState.CondMet = ExpectBlank == Str.empty();
6331 TheCondState.Ignore = !TheCondState.CondMet;
6332 }
6333
6334 return false;
6335 }
6336
6337 /// parseDirectiveElseIfdef
6338 /// ::= elseifdef symbol
6339 /// | elseifdef variable
parseDirectiveElseIfdef(SMLoc DirectiveLoc,bool expect_defined)6340 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
6341 bool expect_defined) {
6342 if (TheCondState.TheCond != AsmCond::IfCond &&
6343 TheCondState.TheCond != AsmCond::ElseIfCond)
6344 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6345 " if or an elseif");
6346 TheCondState.TheCond = AsmCond::ElseIfCond;
6347
6348 bool LastIgnoreState = false;
6349 if (!TheCondStack.empty())
6350 LastIgnoreState = TheCondStack.back().Ignore;
6351 if (LastIgnoreState || TheCondState.CondMet) {
6352 TheCondState.Ignore = true;
6353 eatToEndOfStatement();
6354 } else {
6355 bool is_defined = false;
6356 MCRegister Reg;
6357 SMLoc StartLoc, EndLoc;
6358 is_defined =
6359 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6360 if (!is_defined) {
6361 StringRef Name;
6362 if (check(parseIdentifier(Name),
6363 "expected identifier after 'elseifdef'") ||
6364 parseEOL())
6365 return true;
6366
6367 if (BuiltinSymbolMap.contains(Name.lower())) {
6368 is_defined = true;
6369 } else if (Variables.contains(Name.lower())) {
6370 is_defined = true;
6371 } else {
6372 MCSymbol *Sym = getContext().lookupSymbol(Name);
6373 is_defined = (Sym && !Sym->isUndefined(false));
6374 }
6375 }
6376
6377 TheCondState.CondMet = (is_defined == expect_defined);
6378 TheCondState.Ignore = !TheCondState.CondMet;
6379 }
6380
6381 return false;
6382 }
6383
6384 /// parseDirectiveElseIfidn
6385 /// ::= elseifidn textitem, textitem
parseDirectiveElseIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6386 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6387 bool CaseInsensitive) {
6388 if (TheCondState.TheCond != AsmCond::IfCond &&
6389 TheCondState.TheCond != AsmCond::ElseIfCond)
6390 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an"
6391 " if or an elseif");
6392 TheCondState.TheCond = AsmCond::ElseIfCond;
6393
6394 bool LastIgnoreState = false;
6395 if (!TheCondStack.empty())
6396 LastIgnoreState = TheCondStack.back().Ignore;
6397 if (LastIgnoreState || TheCondState.CondMet) {
6398 TheCondState.Ignore = true;
6399 eatToEndOfStatement();
6400 } else {
6401 std::string String1, String2;
6402
6403 if (parseTextItem(String1)) {
6404 if (ExpectEqual)
6405 return TokError(
6406 "expected text item parameter for 'elseifidn' directive");
6407 return TokError("expected text item parameter for 'elseifdif' directive");
6408 }
6409
6410 if (Lexer.isNot(AsmToken::Comma)) {
6411 if (ExpectEqual)
6412 return TokError(
6413 "expected comma after first string for 'elseifidn' directive");
6414 return TokError(
6415 "expected comma after first string for 'elseifdif' directive");
6416 }
6417 Lex();
6418
6419 if (parseTextItem(String2)) {
6420 if (ExpectEqual)
6421 return TokError(
6422 "expected text item parameter for 'elseifidn' directive");
6423 return TokError("expected text item parameter for 'elseifdif' directive");
6424 }
6425
6426 if (CaseInsensitive)
6427 TheCondState.CondMet =
6428 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6429 else
6430 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6431 TheCondState.Ignore = !TheCondState.CondMet;
6432 }
6433
6434 return false;
6435 }
6436
6437 /// parseDirectiveElse
6438 /// ::= else
parseDirectiveElse(SMLoc DirectiveLoc)6439 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
6440 if (parseEOL())
6441 return true;
6442
6443 if (TheCondState.TheCond != AsmCond::IfCond &&
6444 TheCondState.TheCond != AsmCond::ElseIfCond)
6445 return Error(DirectiveLoc, "Encountered an else that doesn't follow an if"
6446 " or an elseif");
6447 TheCondState.TheCond = AsmCond::ElseCond;
6448 bool LastIgnoreState = false;
6449 if (!TheCondStack.empty())
6450 LastIgnoreState = TheCondStack.back().Ignore;
6451 if (LastIgnoreState || TheCondState.CondMet)
6452 TheCondState.Ignore = true;
6453 else
6454 TheCondState.Ignore = false;
6455
6456 return false;
6457 }
6458
6459 /// parseDirectiveEnd
6460 /// ::= end
parseDirectiveEnd(SMLoc DirectiveLoc)6461 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
6462 if (parseEOL())
6463 return true;
6464
6465 while (Lexer.isNot(AsmToken::Eof))
6466 Lexer.Lex();
6467
6468 return false;
6469 }
6470
6471 /// parseDirectiveError
6472 /// ::= .err [message]
parseDirectiveError(SMLoc DirectiveLoc)6473 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) {
6474 if (!TheCondStack.empty()) {
6475 if (TheCondStack.back().Ignore) {
6476 eatToEndOfStatement();
6477 return false;
6478 }
6479 }
6480
6481 std::string Message = ".err directive invoked in source file";
6482 if (Lexer.isNot(AsmToken::EndOfStatement))
6483 Message = parseStringTo(AsmToken::EndOfStatement);
6484 Lex();
6485
6486 return Error(DirectiveLoc, Message);
6487 }
6488
6489 /// parseDirectiveErrorIfb
6490 /// ::= .errb textitem[, message]
parseDirectiveErrorIfb(SMLoc DirectiveLoc,bool ExpectBlank)6491 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) {
6492 if (!TheCondStack.empty()) {
6493 if (TheCondStack.back().Ignore) {
6494 eatToEndOfStatement();
6495 return false;
6496 }
6497 }
6498
6499 std::string Text;
6500 if (parseTextItem(Text))
6501 return Error(getTok().getLoc(), "missing text item in '.errb' directive");
6502
6503 std::string Message = ".errb directive invoked in source file";
6504 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6505 if (parseToken(AsmToken::Comma))
6506 return addErrorSuffix(" in '.errb' directive");
6507 Message = parseStringTo(AsmToken::EndOfStatement);
6508 }
6509 Lex();
6510
6511 if (Text.empty() == ExpectBlank)
6512 return Error(DirectiveLoc, Message);
6513 return false;
6514 }
6515
6516 /// parseDirectiveErrorIfdef
6517 /// ::= .errdef name[, message]
parseDirectiveErrorIfdef(SMLoc DirectiveLoc,bool ExpectDefined)6518 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
6519 bool ExpectDefined) {
6520 if (!TheCondStack.empty()) {
6521 if (TheCondStack.back().Ignore) {
6522 eatToEndOfStatement();
6523 return false;
6524 }
6525 }
6526
6527 bool IsDefined = false;
6528 MCRegister Reg;
6529 SMLoc StartLoc, EndLoc;
6530 IsDefined =
6531 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess();
6532 if (!IsDefined) {
6533 StringRef Name;
6534 if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
6535 return true;
6536
6537 if (BuiltinSymbolMap.contains(Name.lower())) {
6538 IsDefined = true;
6539 } else if (Variables.contains(Name.lower())) {
6540 IsDefined = true;
6541 } else {
6542 MCSymbol *Sym = getContext().lookupSymbol(Name);
6543 IsDefined = (Sym && !Sym->isUndefined(false));
6544 }
6545 }
6546
6547 std::string Message = ".errdef directive invoked in source file";
6548 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6549 if (parseToken(AsmToken::Comma))
6550 return addErrorSuffix(" in '.errdef' directive");
6551 Message = parseStringTo(AsmToken::EndOfStatement);
6552 }
6553 Lex();
6554
6555 if (IsDefined == ExpectDefined)
6556 return Error(DirectiveLoc, Message);
6557 return false;
6558 }
6559
6560 /// parseDirectiveErrorIfidn
6561 /// ::= .erridn textitem, textitem[, message]
parseDirectiveErrorIfidn(SMLoc DirectiveLoc,bool ExpectEqual,bool CaseInsensitive)6562 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual,
6563 bool CaseInsensitive) {
6564 if (!TheCondStack.empty()) {
6565 if (TheCondStack.back().Ignore) {
6566 eatToEndOfStatement();
6567 return false;
6568 }
6569 }
6570
6571 std::string String1, String2;
6572
6573 if (parseTextItem(String1)) {
6574 if (ExpectEqual)
6575 return TokError("expected string parameter for '.erridn' directive");
6576 return TokError("expected string parameter for '.errdif' directive");
6577 }
6578
6579 if (Lexer.isNot(AsmToken::Comma)) {
6580 if (ExpectEqual)
6581 return TokError(
6582 "expected comma after first string for '.erridn' directive");
6583 return TokError(
6584 "expected comma after first string for '.errdif' directive");
6585 }
6586 Lex();
6587
6588 if (parseTextItem(String2)) {
6589 if (ExpectEqual)
6590 return TokError("expected string parameter for '.erridn' directive");
6591 return TokError("expected string parameter for '.errdif' directive");
6592 }
6593
6594 std::string Message;
6595 if (ExpectEqual)
6596 Message = ".erridn directive invoked in source file";
6597 else
6598 Message = ".errdif directive invoked in source file";
6599 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6600 if (parseToken(AsmToken::Comma))
6601 return addErrorSuffix(" in '.erridn' directive");
6602 Message = parseStringTo(AsmToken::EndOfStatement);
6603 }
6604 Lex();
6605
6606 if (CaseInsensitive)
6607 TheCondState.CondMet =
6608 ExpectEqual == (StringRef(String1).equals_insensitive(String2));
6609 else
6610 TheCondState.CondMet = ExpectEqual == (String1 == String2);
6611 TheCondState.Ignore = !TheCondState.CondMet;
6612
6613 if ((CaseInsensitive &&
6614 ExpectEqual == StringRef(String1).equals_insensitive(String2)) ||
6615 (ExpectEqual == (String1 == String2)))
6616 return Error(DirectiveLoc, Message);
6617 return false;
6618 }
6619
6620 /// parseDirectiveErrorIfe
6621 /// ::= .erre expression[, message]
parseDirectiveErrorIfe(SMLoc DirectiveLoc,bool ExpectZero)6622 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) {
6623 if (!TheCondStack.empty()) {
6624 if (TheCondStack.back().Ignore) {
6625 eatToEndOfStatement();
6626 return false;
6627 }
6628 }
6629
6630 int64_t ExprValue;
6631 if (parseAbsoluteExpression(ExprValue))
6632 return addErrorSuffix(" in '.erre' directive");
6633
6634 std::string Message = ".erre directive invoked in source file";
6635 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6636 if (parseToken(AsmToken::Comma))
6637 return addErrorSuffix(" in '.erre' directive");
6638 Message = parseStringTo(AsmToken::EndOfStatement);
6639 }
6640 Lex();
6641
6642 if ((ExprValue == 0) == ExpectZero)
6643 return Error(DirectiveLoc, Message);
6644 return false;
6645 }
6646
6647 /// parseDirectiveEndIf
6648 /// ::= .endif
parseDirectiveEndIf(SMLoc DirectiveLoc)6649 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
6650 if (parseEOL())
6651 return true;
6652
6653 if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
6654 return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
6655 "an .if or .else");
6656 if (!TheCondStack.empty()) {
6657 TheCondState = TheCondStack.back();
6658 TheCondStack.pop_back();
6659 }
6660
6661 return false;
6662 }
6663
initializeDirectiveKindMap()6664 void MasmParser::initializeDirectiveKindMap() {
6665 DirectiveKindMap["="] = DK_ASSIGN;
6666 DirectiveKindMap["equ"] = DK_EQU;
6667 DirectiveKindMap["textequ"] = DK_TEXTEQU;
6668 // DirectiveKindMap[".ascii"] = DK_ASCII;
6669 // DirectiveKindMap[".asciz"] = DK_ASCIZ;
6670 // DirectiveKindMap[".string"] = DK_STRING;
6671 DirectiveKindMap["byte"] = DK_BYTE;
6672 DirectiveKindMap["sbyte"] = DK_SBYTE;
6673 DirectiveKindMap["word"] = DK_WORD;
6674 DirectiveKindMap["sword"] = DK_SWORD;
6675 DirectiveKindMap["dword"] = DK_DWORD;
6676 DirectiveKindMap["sdword"] = DK_SDWORD;
6677 DirectiveKindMap["fword"] = DK_FWORD;
6678 DirectiveKindMap["qword"] = DK_QWORD;
6679 DirectiveKindMap["sqword"] = DK_SQWORD;
6680 DirectiveKindMap["real4"] = DK_REAL4;
6681 DirectiveKindMap["real8"] = DK_REAL8;
6682 DirectiveKindMap["real10"] = DK_REAL10;
6683 DirectiveKindMap["align"] = DK_ALIGN;
6684 DirectiveKindMap["even"] = DK_EVEN;
6685 DirectiveKindMap["org"] = DK_ORG;
6686 DirectiveKindMap["extern"] = DK_EXTERN;
6687 DirectiveKindMap["extrn"] = DK_EXTERN;
6688 DirectiveKindMap["public"] = DK_PUBLIC;
6689 // DirectiveKindMap[".comm"] = DK_COMM;
6690 DirectiveKindMap["comment"] = DK_COMMENT;
6691 DirectiveKindMap["include"] = DK_INCLUDE;
6692 DirectiveKindMap["repeat"] = DK_REPEAT;
6693 DirectiveKindMap["rept"] = DK_REPEAT;
6694 DirectiveKindMap["while"] = DK_WHILE;
6695 DirectiveKindMap["for"] = DK_FOR;
6696 DirectiveKindMap["irp"] = DK_FOR;
6697 DirectiveKindMap["forc"] = DK_FORC;
6698 DirectiveKindMap["irpc"] = DK_FORC;
6699 DirectiveKindMap["if"] = DK_IF;
6700 DirectiveKindMap["ife"] = DK_IFE;
6701 DirectiveKindMap["ifb"] = DK_IFB;
6702 DirectiveKindMap["ifnb"] = DK_IFNB;
6703 DirectiveKindMap["ifdef"] = DK_IFDEF;
6704 DirectiveKindMap["ifndef"] = DK_IFNDEF;
6705 DirectiveKindMap["ifdif"] = DK_IFDIF;
6706 DirectiveKindMap["ifdifi"] = DK_IFDIFI;
6707 DirectiveKindMap["ifidn"] = DK_IFIDN;
6708 DirectiveKindMap["ifidni"] = DK_IFIDNI;
6709 DirectiveKindMap["elseif"] = DK_ELSEIF;
6710 DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF;
6711 DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF;
6712 DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF;
6713 DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN;
6714 DirectiveKindMap["else"] = DK_ELSE;
6715 DirectiveKindMap["end"] = DK_END;
6716 DirectiveKindMap["endif"] = DK_ENDIF;
6717 // DirectiveKindMap[".file"] = DK_FILE;
6718 // DirectiveKindMap[".line"] = DK_LINE;
6719 // DirectiveKindMap[".loc"] = DK_LOC;
6720 // DirectiveKindMap[".stabs"] = DK_STABS;
6721 // DirectiveKindMap[".cv_file"] = DK_CV_FILE;
6722 // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
6723 // DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
6724 // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
6725 // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
6726 // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
6727 // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
6728 // DirectiveKindMap[".cv_string"] = DK_CV_STRING;
6729 // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
6730 // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
6731 // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET;
6732 // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA;
6733 // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS;
6734 // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC;
6735 // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC;
6736 // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA;
6737 // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET;
6738 // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET;
6739 // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER;
6740 // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET;
6741 // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET;
6742 // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY;
6743 // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA;
6744 // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE;
6745 // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE;
6746 // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE;
6747 // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE;
6748 // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE;
6749 // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN;
6750 // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME;
6751 // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED;
6752 // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER;
6753 // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE;
6754 // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME;
6755 DirectiveKindMap["macro"] = DK_MACRO;
6756 DirectiveKindMap["exitm"] = DK_EXITM;
6757 DirectiveKindMap["endm"] = DK_ENDM;
6758 DirectiveKindMap["purge"] = DK_PURGE;
6759 DirectiveKindMap[".err"] = DK_ERR;
6760 DirectiveKindMap[".errb"] = DK_ERRB;
6761 DirectiveKindMap[".errnb"] = DK_ERRNB;
6762 DirectiveKindMap[".errdef"] = DK_ERRDEF;
6763 DirectiveKindMap[".errndef"] = DK_ERRNDEF;
6764 DirectiveKindMap[".errdif"] = DK_ERRDIF;
6765 DirectiveKindMap[".errdifi"] = DK_ERRDIFI;
6766 DirectiveKindMap[".erridn"] = DK_ERRIDN;
6767 DirectiveKindMap[".erridni"] = DK_ERRIDNI;
6768 DirectiveKindMap[".erre"] = DK_ERRE;
6769 DirectiveKindMap[".errnz"] = DK_ERRNZ;
6770 DirectiveKindMap[".pushframe"] = DK_PUSHFRAME;
6771 DirectiveKindMap[".pushreg"] = DK_PUSHREG;
6772 DirectiveKindMap[".savereg"] = DK_SAVEREG;
6773 DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128;
6774 DirectiveKindMap[".setframe"] = DK_SETFRAME;
6775 DirectiveKindMap[".radix"] = DK_RADIX;
6776 DirectiveKindMap["db"] = DK_DB;
6777 DirectiveKindMap["dd"] = DK_DD;
6778 DirectiveKindMap["df"] = DK_DF;
6779 DirectiveKindMap["dq"] = DK_DQ;
6780 DirectiveKindMap["dw"] = DK_DW;
6781 DirectiveKindMap["echo"] = DK_ECHO;
6782 DirectiveKindMap["struc"] = DK_STRUCT;
6783 DirectiveKindMap["struct"] = DK_STRUCT;
6784 DirectiveKindMap["union"] = DK_UNION;
6785 DirectiveKindMap["ends"] = DK_ENDS;
6786 }
6787
isMacroLikeDirective()6788 bool MasmParser::isMacroLikeDirective() {
6789 if (getLexer().is(AsmToken::Identifier)) {
6790 bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier())
6791 .CasesLower("repeat", "rept", true)
6792 .CaseLower("while", true)
6793 .CasesLower("for", "irp", true)
6794 .CasesLower("forc", "irpc", true)
6795 .Default(false);
6796 if (IsMacroLike)
6797 return true;
6798 }
6799 if (peekTok().is(AsmToken::Identifier) &&
6800 peekTok().getIdentifier().equals_insensitive("macro"))
6801 return true;
6802
6803 return false;
6804 }
6805
parseMacroLikeBody(SMLoc DirectiveLoc)6806 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
6807 AsmToken EndToken, StartToken = getTok();
6808
6809 unsigned NestLevel = 0;
6810 while (true) {
6811 // Check whether we have reached the end of the file.
6812 if (getLexer().is(AsmToken::Eof)) {
6813 printError(DirectiveLoc, "no matching 'endm' in definition");
6814 return nullptr;
6815 }
6816
6817 if (isMacroLikeDirective())
6818 ++NestLevel;
6819
6820 // Otherwise, check whether we have reached the endm.
6821 if (Lexer.is(AsmToken::Identifier) &&
6822 getTok().getIdentifier().equals_insensitive("endm")) {
6823 if (NestLevel == 0) {
6824 EndToken = getTok();
6825 Lex();
6826 if (Lexer.isNot(AsmToken::EndOfStatement)) {
6827 printError(getTok().getLoc(), "unexpected token in 'endm' directive");
6828 return nullptr;
6829 }
6830 break;
6831 }
6832 --NestLevel;
6833 }
6834
6835 // Otherwise, scan till the end of the statement.
6836 eatToEndOfStatement();
6837 }
6838
6839 const char *BodyStart = StartToken.getLoc().getPointer();
6840 const char *BodyEnd = EndToken.getLoc().getPointer();
6841 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
6842
6843 // We Are Anonymous.
6844 MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters());
6845 return &MacroLikeBodies.back();
6846 }
6847
expandStatement(SMLoc Loc)6848 bool MasmParser::expandStatement(SMLoc Loc) {
6849 std::string Body = parseStringTo(AsmToken::EndOfStatement);
6850 SMLoc EndLoc = getTok().getLoc();
6851
6852 MCAsmMacroParameters Parameters;
6853 MCAsmMacroArguments Arguments;
6854
6855 StringMap<std::string> BuiltinValues;
6856 for (const auto &S : BuiltinSymbolMap) {
6857 const BuiltinSymbol &Sym = S.getValue();
6858 if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) {
6859 BuiltinValues[S.getKey().lower()] = std::move(*Text);
6860 }
6861 }
6862 for (const auto &B : BuiltinValues) {
6863 MCAsmMacroParameter P;
6864 MCAsmMacroArgument A;
6865 P.Name = B.getKey();
6866 P.Required = true;
6867 A.push_back(AsmToken(AsmToken::String, B.getValue()));
6868
6869 Parameters.push_back(std::move(P));
6870 Arguments.push_back(std::move(A));
6871 }
6872
6873 for (const auto &V : Variables) {
6874 const Variable &Var = V.getValue();
6875 if (Var.IsText) {
6876 MCAsmMacroParameter P;
6877 MCAsmMacroArgument A;
6878 P.Name = Var.Name;
6879 P.Required = true;
6880 A.push_back(AsmToken(AsmToken::String, Var.TextValue));
6881
6882 Parameters.push_back(std::move(P));
6883 Arguments.push_back(std::move(A));
6884 }
6885 }
6886 MacroLikeBodies.emplace_back(StringRef(), Body, Parameters);
6887 MCAsmMacro M = MacroLikeBodies.back();
6888
6889 // Expand the statement in a new buffer.
6890 SmallString<80> Buf;
6891 raw_svector_ostream OS(Buf);
6892 if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc))
6893 return true;
6894 std::unique_ptr<MemoryBuffer> Expansion =
6895 MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>");
6896
6897 // Jump to the expanded statement and prime the lexer.
6898 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc);
6899 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6900 EndStatementAtEOFStack.push_back(false);
6901 Lex();
6902 return false;
6903 }
6904
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,raw_svector_ostream & OS)6905 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6906 raw_svector_ostream &OS) {
6907 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS);
6908 }
instantiateMacroLikeBody(MCAsmMacro * M,SMLoc DirectiveLoc,SMLoc ExitLoc,raw_svector_ostream & OS)6909 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
6910 SMLoc ExitLoc,
6911 raw_svector_ostream &OS) {
6912 OS << "endm\n";
6913
6914 std::unique_ptr<MemoryBuffer> Instantiation =
6915 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
6916
6917 // Create the macro instantiation object and add to the current macro
6918 // instantiation stack.
6919 MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer,
6920 ExitLoc, TheCondStack.size()};
6921 ActiveMacros.push_back(MI);
6922
6923 // Jump to the macro instantiation and prime the lexer.
6924 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc());
6925 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
6926 EndStatementAtEOFStack.push_back(true);
6927 Lex();
6928 }
6929
6930 /// parseDirectiveRepeat
6931 /// ::= ("repeat" | "rept") count
6932 /// body
6933 /// endm
parseDirectiveRepeat(SMLoc DirectiveLoc,StringRef Dir)6934 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) {
6935 const MCExpr *CountExpr;
6936 SMLoc CountLoc = getTok().getLoc();
6937 if (parseExpression(CountExpr))
6938 return true;
6939
6940 int64_t Count;
6941 if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) {
6942 return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
6943 }
6944
6945 if (check(Count < 0, CountLoc, "Count is negative") || parseEOL())
6946 return true;
6947
6948 // Lex the repeat definition.
6949 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6950 if (!M)
6951 return true;
6952
6953 // Macro instantiation is lexical, unfortunately. We construct a new buffer
6954 // to hold the macro body with substitutions.
6955 SmallString<256> Buf;
6956 raw_svector_ostream OS(Buf);
6957 while (Count--) {
6958 if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6959 getTok().getLoc()))
6960 return true;
6961 }
6962 instantiateMacroLikeBody(M, DirectiveLoc, OS);
6963
6964 return false;
6965 }
6966
6967 /// parseDirectiveWhile
6968 /// ::= "while" expression
6969 /// body
6970 /// endm
parseDirectiveWhile(SMLoc DirectiveLoc)6971 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) {
6972 const MCExpr *CondExpr;
6973 SMLoc CondLoc = getTok().getLoc();
6974 if (parseExpression(CondExpr))
6975 return true;
6976
6977 // Lex the repeat definition.
6978 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
6979 if (!M)
6980 return true;
6981
6982 // Macro instantiation is lexical, unfortunately. We construct a new buffer
6983 // to hold the macro body with substitutions.
6984 SmallString<256> Buf;
6985 raw_svector_ostream OS(Buf);
6986 int64_t Condition;
6987 if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr()))
6988 return Error(CondLoc, "expected absolute expression in 'while' directive");
6989 if (Condition) {
6990 // Instantiate the macro, then resume at this directive to recheck the
6991 // condition.
6992 if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals,
6993 getTok().getLoc()))
6994 return true;
6995 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS);
6996 }
6997
6998 return false;
6999 }
7000
7001 /// parseDirectiveFor
7002 /// ::= ("for" | "irp") symbol [":" qualifier], <values>
7003 /// body
7004 /// endm
parseDirectiveFor(SMLoc DirectiveLoc,StringRef Dir)7005 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) {
7006 MCAsmMacroParameter Parameter;
7007 MCAsmMacroArguments A;
7008 if (check(parseIdentifier(Parameter.Name),
7009 "expected identifier in '" + Dir + "' directive"))
7010 return true;
7011
7012 // Parse optional qualifier (default value, or "req")
7013 if (parseOptionalToken(AsmToken::Colon)) {
7014 if (parseOptionalToken(AsmToken::Equal)) {
7015 // Default value
7016 SMLoc ParamLoc;
7017
7018 ParamLoc = Lexer.getLoc();
7019 if (parseMacroArgument(nullptr, Parameter.Value))
7020 return true;
7021 } else {
7022 SMLoc QualLoc;
7023 StringRef Qualifier;
7024
7025 QualLoc = Lexer.getLoc();
7026 if (parseIdentifier(Qualifier))
7027 return Error(QualLoc, "missing parameter qualifier for "
7028 "'" +
7029 Parameter.Name + "' in '" + Dir +
7030 "' directive");
7031
7032 if (Qualifier.equals_insensitive("req"))
7033 Parameter.Required = true;
7034 else
7035 return Error(QualLoc,
7036 Qualifier + " is not a valid parameter qualifier for '" +
7037 Parameter.Name + "' in '" + Dir + "' directive");
7038 }
7039 }
7040
7041 if (parseToken(AsmToken::Comma,
7042 "expected comma in '" + Dir + "' directive") ||
7043 parseToken(AsmToken::Less,
7044 "values in '" + Dir +
7045 "' directive must be enclosed in angle brackets"))
7046 return true;
7047
7048 while (true) {
7049 A.emplace_back();
7050 if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater))
7051 return addErrorSuffix(" in arguments for '" + Dir + "' directive");
7052
7053 // If we see a comma, continue, and allow line continuation.
7054 if (!parseOptionalToken(AsmToken::Comma))
7055 break;
7056 parseOptionalToken(AsmToken::EndOfStatement);
7057 }
7058
7059 if (parseToken(AsmToken::Greater,
7060 "values in '" + Dir +
7061 "' directive must be enclosed in angle brackets") ||
7062 parseEOL())
7063 return true;
7064
7065 // Lex the for definition.
7066 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7067 if (!M)
7068 return true;
7069
7070 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7071 // to hold the macro body with substitutions.
7072 SmallString<256> Buf;
7073 raw_svector_ostream OS(Buf);
7074
7075 for (const MCAsmMacroArgument &Arg : A) {
7076 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7077 return true;
7078 }
7079
7080 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7081
7082 return false;
7083 }
7084
7085 /// parseDirectiveForc
7086 /// ::= ("forc" | "irpc") symbol, <string>
7087 /// body
7088 /// endm
parseDirectiveForc(SMLoc DirectiveLoc,StringRef Directive)7089 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) {
7090 MCAsmMacroParameter Parameter;
7091
7092 std::string Argument;
7093 if (check(parseIdentifier(Parameter.Name),
7094 "expected identifier in '" + Directive + "' directive") ||
7095 parseToken(AsmToken::Comma,
7096 "expected comma in '" + Directive + "' directive"))
7097 return true;
7098 if (parseAngleBracketString(Argument)) {
7099 // Match ml64.exe; treat all characters to end of statement as a string,
7100 // ignoring comment markers, then discard anything following a space (using
7101 // the C locale).
7102 Argument = parseStringTo(AsmToken::EndOfStatement);
7103 if (getTok().is(AsmToken::EndOfStatement))
7104 Argument += getTok().getString();
7105 size_t End = 0;
7106 for (; End < Argument.size(); ++End) {
7107 if (isSpace(Argument[End]))
7108 break;
7109 }
7110 Argument.resize(End);
7111 }
7112 if (parseEOL())
7113 return true;
7114
7115 // Lex the irpc definition.
7116 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc);
7117 if (!M)
7118 return true;
7119
7120 // Macro instantiation is lexical, unfortunately. We construct a new buffer
7121 // to hold the macro body with substitutions.
7122 SmallString<256> Buf;
7123 raw_svector_ostream OS(Buf);
7124
7125 StringRef Values(Argument);
7126 for (std::size_t I = 0, End = Values.size(); I != End; ++I) {
7127 MCAsmMacroArgument Arg;
7128 Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1));
7129
7130 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc()))
7131 return true;
7132 }
7133
7134 instantiateMacroLikeBody(M, DirectiveLoc, OS);
7135
7136 return false;
7137 }
7138
parseDirectiveMSEmit(SMLoc IDLoc,ParseStatementInfo & Info,size_t Len)7139 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
7140 size_t Len) {
7141 const MCExpr *Value;
7142 SMLoc ExprLoc = getLexer().getLoc();
7143 if (parseExpression(Value))
7144 return true;
7145 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7146 if (!MCE)
7147 return Error(ExprLoc, "unexpected expression in _emit");
7148 uint64_t IntValue = MCE->getValue();
7149 if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
7150 return Error(ExprLoc, "literal value out of range for directive");
7151
7152 Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
7153 return false;
7154 }
7155
parseDirectiveMSAlign(SMLoc IDLoc,ParseStatementInfo & Info)7156 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
7157 const MCExpr *Value;
7158 SMLoc ExprLoc = getLexer().getLoc();
7159 if (parseExpression(Value))
7160 return true;
7161 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
7162 if (!MCE)
7163 return Error(ExprLoc, "unexpected expression in align");
7164 uint64_t IntValue = MCE->getValue();
7165 if (!isPowerOf2_64(IntValue))
7166 return Error(ExprLoc, "literal value not a power of two greater then zero");
7167
7168 Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
7169 return false;
7170 }
7171
parseDirectiveRadix(SMLoc DirectiveLoc)7172 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) {
7173 const SMLoc Loc = getLexer().getLoc();
7174 std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement);
7175 StringRef RadixString = StringRef(RadixStringRaw).trim();
7176 unsigned Radix;
7177 if (RadixString.getAsInteger(10, Radix)) {
7178 return Error(Loc,
7179 "radix must be a decimal number in the range 2 to 16; was " +
7180 RadixString);
7181 }
7182 if (Radix < 2 || Radix > 16)
7183 return Error(Loc, "radix must be in the range 2 to 16; was " +
7184 std::to_string(Radix));
7185 getLexer().setMasmDefaultRadix(Radix);
7186 return false;
7187 }
7188
7189 /// parseDirectiveEcho
7190 /// ::= "echo" message
parseDirectiveEcho(SMLoc DirectiveLoc)7191 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) {
7192 std::string Message = parseStringTo(AsmToken::EndOfStatement);
7193 llvm::outs() << Message;
7194 if (!StringRef(Message).ends_with("\n"))
7195 llvm::outs() << '\n';
7196 return false;
7197 }
7198
7199 // We are comparing pointers, but the pointers are relative to a single string.
7200 // Thus, this should always be deterministic.
rewritesSort(const AsmRewrite * AsmRewriteA,const AsmRewrite * AsmRewriteB)7201 static int rewritesSort(const AsmRewrite *AsmRewriteA,
7202 const AsmRewrite *AsmRewriteB) {
7203 if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer())
7204 return -1;
7205 if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
7206 return 1;
7207
7208 // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
7209 // rewrite to the same location. Make sure the SizeDirective rewrite is
7210 // performed first, then the Imm/ImmPrefix and finally the Input/Output. This
7211 // ensures the sort algorithm is stable.
7212 if (AsmRewritePrecedence[AsmRewriteA->Kind] >
7213 AsmRewritePrecedence[AsmRewriteB->Kind])
7214 return -1;
7215
7216 if (AsmRewritePrecedence[AsmRewriteA->Kind] <
7217 AsmRewritePrecedence[AsmRewriteB->Kind])
7218 return 1;
7219 llvm_unreachable("Unstable rewrite sort.");
7220 }
7221
defineMacro(StringRef Name,StringRef Value)7222 bool MasmParser::defineMacro(StringRef Name, StringRef Value) {
7223 Variable &Var = Variables[Name.lower()];
7224 if (Var.Name.empty()) {
7225 Var.Name = Name;
7226 } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) {
7227 return Error(SMLoc(), "invalid variable redefinition");
7228 } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION &&
7229 Warning(SMLoc(), "redefining '" + Name +
7230 "', already defined on the command line")) {
7231 return true;
7232 }
7233 Var.Redefinable = Variable::WARN_ON_REDEFINITION;
7234 Var.IsText = true;
7235 Var.TextValue = Value.str();
7236 return false;
7237 }
7238
lookUpField(StringRef Name,AsmFieldInfo & Info) const7239 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const {
7240 const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
7241 const StringRef Base = BaseMember.first, Member = BaseMember.second;
7242 return lookUpField(Base, Member, Info);
7243 }
7244
lookUpField(StringRef Base,StringRef Member,AsmFieldInfo & Info) const7245 bool MasmParser::lookUpField(StringRef Base, StringRef Member,
7246 AsmFieldInfo &Info) const {
7247 if (Base.empty())
7248 return true;
7249
7250 AsmFieldInfo BaseInfo;
7251 if (Base.contains('.') && !lookUpField(Base, BaseInfo))
7252 Base = BaseInfo.Type.Name;
7253
7254 auto StructIt = Structs.find(Base.lower());
7255 auto TypeIt = KnownType.find(Base.lower());
7256 if (TypeIt != KnownType.end()) {
7257 StructIt = Structs.find(TypeIt->second.Name.lower());
7258 }
7259 if (StructIt != Structs.end())
7260 return lookUpField(StructIt->second, Member, Info);
7261
7262 return true;
7263 }
7264
lookUpField(const StructInfo & Structure,StringRef Member,AsmFieldInfo & Info) const7265 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
7266 AsmFieldInfo &Info) const {
7267 if (Member.empty()) {
7268 Info.Type.Name = Structure.Name;
7269 Info.Type.Size = Structure.Size;
7270 Info.Type.ElementSize = Structure.Size;
7271 Info.Type.Length = 1;
7272 return false;
7273 }
7274
7275 std::pair<StringRef, StringRef> Split = Member.split('.');
7276 const StringRef FieldName = Split.first, FieldMember = Split.second;
7277
7278 auto StructIt = Structs.find(FieldName.lower());
7279 if (StructIt != Structs.end())
7280 return lookUpField(StructIt->second, FieldMember, Info);
7281
7282 auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
7283 if (FieldIt == Structure.FieldsByName.end())
7284 return true;
7285
7286 const FieldInfo &Field = Structure.Fields[FieldIt->second];
7287 if (FieldMember.empty()) {
7288 Info.Offset += Field.Offset;
7289 Info.Type.Size = Field.SizeOf;
7290 Info.Type.ElementSize = Field.Type;
7291 Info.Type.Length = Field.LengthOf;
7292 if (Field.Contents.FT == FT_STRUCT)
7293 Info.Type.Name = Field.Contents.StructInfo.Structure.Name;
7294 else
7295 Info.Type.Name = "";
7296 return false;
7297 }
7298
7299 if (Field.Contents.FT != FT_STRUCT)
7300 return true;
7301 const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
7302
7303 if (lookUpField(StructInfo.Structure, FieldMember, Info))
7304 return true;
7305
7306 Info.Offset += Field.Offset;
7307 return false;
7308 }
7309
lookUpType(StringRef Name,AsmTypeInfo & Info) const7310 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const {
7311 unsigned Size = StringSwitch<unsigned>(Name)
7312 .CasesLower("byte", "db", "sbyte", 1)
7313 .CasesLower("word", "dw", "sword", 2)
7314 .CasesLower("dword", "dd", "sdword", 4)
7315 .CasesLower("fword", "df", 6)
7316 .CasesLower("qword", "dq", "sqword", 8)
7317 .CaseLower("real4", 4)
7318 .CaseLower("real8", 8)
7319 .CaseLower("real10", 10)
7320 .Default(0);
7321 if (Size) {
7322 Info.Name = Name;
7323 Info.ElementSize = Size;
7324 Info.Length = 1;
7325 Info.Size = Size;
7326 return false;
7327 }
7328
7329 auto StructIt = Structs.find(Name.lower());
7330 if (StructIt != Structs.end()) {
7331 const StructInfo &Structure = StructIt->second;
7332 Info.Name = Name;
7333 Info.ElementSize = Structure.Size;
7334 Info.Length = 1;
7335 Info.Size = Structure.Size;
7336 return false;
7337 }
7338
7339 return true;
7340 }
7341
parseMSInlineAsm(std::string & AsmString,unsigned & NumOutputs,unsigned & NumInputs,SmallVectorImpl<std::pair<void *,bool>> & OpDecls,SmallVectorImpl<std::string> & Constraints,SmallVectorImpl<std::string> & Clobbers,const MCInstrInfo * MII,const MCInstPrinter * IP,MCAsmParserSemaCallback & SI)7342 bool MasmParser::parseMSInlineAsm(
7343 std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
7344 SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
7345 SmallVectorImpl<std::string> &Constraints,
7346 SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
7347 const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
7348 SmallVector<void *, 4> InputDecls;
7349 SmallVector<void *, 4> OutputDecls;
7350 SmallVector<bool, 4> InputDeclsAddressOf;
7351 SmallVector<bool, 4> OutputDeclsAddressOf;
7352 SmallVector<std::string, 4> InputConstraints;
7353 SmallVector<std::string, 4> OutputConstraints;
7354 SmallVector<unsigned, 4> ClobberRegs;
7355
7356 SmallVector<AsmRewrite, 4> AsmStrRewrites;
7357
7358 // Prime the lexer.
7359 Lex();
7360
7361 // While we have input, parse each statement.
7362 unsigned InputIdx = 0;
7363 unsigned OutputIdx = 0;
7364 while (getLexer().isNot(AsmToken::Eof)) {
7365 // Parse curly braces marking block start/end.
7366 if (parseCurlyBlockScope(AsmStrRewrites))
7367 continue;
7368
7369 ParseStatementInfo Info(&AsmStrRewrites);
7370 bool StatementErr = parseStatement(Info, &SI);
7371
7372 if (StatementErr || Info.ParseError) {
7373 // Emit pending errors if any exist.
7374 printPendingErrors();
7375 return true;
7376 }
7377
7378 // No pending error should exist here.
7379 assert(!hasPendingError() && "unexpected error from parseStatement");
7380
7381 if (Info.Opcode == ~0U)
7382 continue;
7383
7384 const MCInstrDesc &Desc = MII->get(Info.Opcode);
7385
7386 // Build the list of clobbers, outputs and inputs.
7387 for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
7388 MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
7389
7390 // Register operand.
7391 if (Operand.isReg() && !Operand.needAddressOf() &&
7392 !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) {
7393 unsigned NumDefs = Desc.getNumDefs();
7394 // Clobber.
7395 if (NumDefs && Operand.getMCOperandNum() < NumDefs)
7396 ClobberRegs.push_back(Operand.getReg());
7397 continue;
7398 }
7399
7400 // Expr/Input or Output.
7401 StringRef SymName = Operand.getSymName();
7402 if (SymName.empty())
7403 continue;
7404
7405 void *OpDecl = Operand.getOpDecl();
7406 if (!OpDecl)
7407 continue;
7408
7409 StringRef Constraint = Operand.getConstraint();
7410 if (Operand.isImm()) {
7411 // Offset as immediate.
7412 if (Operand.isOffsetOfLocal())
7413 Constraint = "r";
7414 else
7415 Constraint = "i";
7416 }
7417
7418 bool isOutput = (i == 1) && Desc.mayStore();
7419 SMLoc Start = SMLoc::getFromPointer(SymName.data());
7420 if (isOutput) {
7421 ++InputIdx;
7422 OutputDecls.push_back(OpDecl);
7423 OutputDeclsAddressOf.push_back(Operand.needAddressOf());
7424 OutputConstraints.push_back(("=" + Constraint).str());
7425 AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
7426 } else {
7427 InputDecls.push_back(OpDecl);
7428 InputDeclsAddressOf.push_back(Operand.needAddressOf());
7429 InputConstraints.push_back(Constraint.str());
7430 if (Desc.operands()[i - 1].isBranchTarget())
7431 AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size());
7432 else
7433 AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
7434 }
7435 }
7436
7437 // Consider implicit defs to be clobbers. Think of cpuid and push.
7438 llvm::append_range(ClobberRegs, Desc.implicit_defs());
7439 }
7440
7441 // Set the number of Outputs and Inputs.
7442 NumOutputs = OutputDecls.size();
7443 NumInputs = InputDecls.size();
7444
7445 // Set the unique clobbers.
7446 array_pod_sort(ClobberRegs.begin(), ClobberRegs.end());
7447 ClobberRegs.erase(llvm::unique(ClobberRegs), ClobberRegs.end());
7448 Clobbers.assign(ClobberRegs.size(), std::string());
7449 for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) {
7450 raw_string_ostream OS(Clobbers[I]);
7451 IP->printRegName(OS, ClobberRegs[I]);
7452 }
7453
7454 // Merge the various outputs and inputs. Output are expected first.
7455 if (NumOutputs || NumInputs) {
7456 unsigned NumExprs = NumOutputs + NumInputs;
7457 OpDecls.resize(NumExprs);
7458 Constraints.resize(NumExprs);
7459 for (unsigned i = 0; i < NumOutputs; ++i) {
7460 OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]);
7461 Constraints[i] = OutputConstraints[i];
7462 }
7463 for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
7464 OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]);
7465 Constraints[j] = InputConstraints[i];
7466 }
7467 }
7468
7469 // Build the IR assembly string.
7470 std::string AsmStringIR;
7471 raw_string_ostream OS(AsmStringIR);
7472 StringRef ASMString =
7473 SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
7474 const char *AsmStart = ASMString.begin();
7475 const char *AsmEnd = ASMString.end();
7476 array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
7477 for (auto I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
7478 const AsmRewrite &AR = *I;
7479 // Check if this has already been covered by another rewrite...
7480 if (AR.Done)
7481 continue;
7482 AsmRewriteKind Kind = AR.Kind;
7483
7484 const char *Loc = AR.Loc.getPointer();
7485 assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
7486
7487 // Emit everything up to the immediate/expression.
7488 if (unsigned Len = Loc - AsmStart)
7489 OS << StringRef(AsmStart, Len);
7490
7491 // Skip the original expression.
7492 if (Kind == AOK_Skip) {
7493 AsmStart = Loc + AR.Len;
7494 continue;
7495 }
7496
7497 unsigned AdditionalSkip = 0;
7498 // Rewrite expressions in $N notation.
7499 switch (Kind) {
7500 default:
7501 break;
7502 case AOK_IntelExpr:
7503 assert(AR.IntelExp.isValid() && "cannot write invalid intel expression");
7504 if (AR.IntelExp.NeedBracs)
7505 OS << "[";
7506 if (AR.IntelExp.hasBaseReg())
7507 OS << AR.IntelExp.BaseReg;
7508 if (AR.IntelExp.hasIndexReg())
7509 OS << (AR.IntelExp.hasBaseReg() ? " + " : "")
7510 << AR.IntelExp.IndexReg;
7511 if (AR.IntelExp.Scale > 1)
7512 OS << " * $$" << AR.IntelExp.Scale;
7513 if (AR.IntelExp.hasOffset()) {
7514 if (AR.IntelExp.hasRegs())
7515 OS << " + ";
7516 // Fuse this rewrite with a rewrite of the offset name, if present.
7517 StringRef OffsetName = AR.IntelExp.OffsetName;
7518 SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data());
7519 size_t OffsetLen = OffsetName.size();
7520 auto rewrite_it = std::find_if(
7521 I, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) {
7522 return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen &&
7523 (FusingAR.Kind == AOK_Input ||
7524 FusingAR.Kind == AOK_CallInput);
7525 });
7526 if (rewrite_it == AsmStrRewrites.end()) {
7527 OS << "offset " << OffsetName;
7528 } else if (rewrite_it->Kind == AOK_CallInput) {
7529 OS << "${" << InputIdx++ << ":P}";
7530 rewrite_it->Done = true;
7531 } else {
7532 OS << '$' << InputIdx++;
7533 rewrite_it->Done = true;
7534 }
7535 }
7536 if (AR.IntelExp.Imm || AR.IntelExp.emitImm())
7537 OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm;
7538 if (AR.IntelExp.NeedBracs)
7539 OS << "]";
7540 break;
7541 case AOK_Label:
7542 OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label;
7543 break;
7544 case AOK_Input:
7545 OS << '$' << InputIdx++;
7546 break;
7547 case AOK_CallInput:
7548 OS << "${" << InputIdx++ << ":P}";
7549 break;
7550 case AOK_Output:
7551 OS << '$' << OutputIdx++;
7552 break;
7553 case AOK_SizeDirective:
7554 switch (AR.Val) {
7555 default: break;
7556 case 8: OS << "byte ptr "; break;
7557 case 16: OS << "word ptr "; break;
7558 case 32: OS << "dword ptr "; break;
7559 case 64: OS << "qword ptr "; break;
7560 case 80: OS << "xword ptr "; break;
7561 case 128: OS << "xmmword ptr "; break;
7562 case 256: OS << "ymmword ptr "; break;
7563 }
7564 break;
7565 case AOK_Emit:
7566 OS << ".byte";
7567 break;
7568 case AOK_Align: {
7569 // MS alignment directives are measured in bytes. If the native assembler
7570 // measures alignment in bytes, we can pass it straight through.
7571 OS << ".align";
7572 if (getContext().getAsmInfo()->getAlignmentIsInBytes())
7573 break;
7574
7575 // Alignment is in log2 form, so print that instead and skip the original
7576 // immediate.
7577 unsigned Val = AR.Val;
7578 OS << ' ' << Val;
7579 assert(Val < 10 && "Expected alignment less then 2^10.");
7580 AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
7581 break;
7582 }
7583 case AOK_EVEN:
7584 OS << ".even";
7585 break;
7586 case AOK_EndOfStatement:
7587 OS << "\n\t";
7588 break;
7589 }
7590
7591 // Skip the original expression.
7592 AsmStart = Loc + AR.Len + AdditionalSkip;
7593 }
7594
7595 // Emit the remainder of the asm string.
7596 if (AsmStart != AsmEnd)
7597 OS << StringRef(AsmStart, AsmEnd - AsmStart);
7598
7599 AsmString = OS.str();
7600 return false;
7601 }
7602
initializeBuiltinSymbolMap()7603 void MasmParser::initializeBuiltinSymbolMap() {
7604 // Numeric built-ins (supported in all versions)
7605 BuiltinSymbolMap["@version"] = BI_VERSION;
7606 BuiltinSymbolMap["@line"] = BI_LINE;
7607
7608 // Text built-ins (supported in all versions)
7609 BuiltinSymbolMap["@date"] = BI_DATE;
7610 BuiltinSymbolMap["@time"] = BI_TIME;
7611 BuiltinSymbolMap["@filecur"] = BI_FILECUR;
7612 BuiltinSymbolMap["@filename"] = BI_FILENAME;
7613 BuiltinSymbolMap["@curseg"] = BI_CURSEG;
7614
7615 // Some built-ins exist only for MASM32 (32-bit x86)
7616 if (getContext().getSubtargetInfo()->getTargetTriple().getArch() ==
7617 Triple::x86) {
7618 // Numeric built-ins
7619 // BuiltinSymbolMap["@cpu"] = BI_CPU;
7620 // BuiltinSymbolMap["@interface"] = BI_INTERFACE;
7621 // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE;
7622 // BuiltinSymbolMap["@codesize"] = BI_CODESIZE;
7623 // BuiltinSymbolMap["@datasize"] = BI_DATASIZE;
7624 // BuiltinSymbolMap["@model"] = BI_MODEL;
7625
7626 // Text built-ins
7627 // BuiltinSymbolMap["@code"] = BI_CODE;
7628 // BuiltinSymbolMap["@data"] = BI_DATA;
7629 // BuiltinSymbolMap["@fardata?"] = BI_FARDATA;
7630 // BuiltinSymbolMap["@stack"] = BI_STACK;
7631 }
7632 }
7633
evaluateBuiltinValue(BuiltinSymbol Symbol,SMLoc StartLoc)7634 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol,
7635 SMLoc StartLoc) {
7636 switch (Symbol) {
7637 default:
7638 return nullptr;
7639 case BI_VERSION:
7640 // Match a recent version of ML.EXE.
7641 return MCConstantExpr::create(1427, getContext());
7642 case BI_LINE: {
7643 int64_t Line;
7644 if (ActiveMacros.empty())
7645 Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer);
7646 else
7647 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc,
7648 ActiveMacros.front()->ExitBuffer);
7649 return MCConstantExpr::create(Line, getContext());
7650 }
7651 }
7652 llvm_unreachable("unhandled built-in symbol");
7653 }
7654
7655 std::optional<std::string>
evaluateBuiltinTextMacro(BuiltinSymbol Symbol,SMLoc StartLoc)7656 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) {
7657 switch (Symbol) {
7658 default:
7659 return {};
7660 case BI_DATE: {
7661 // Current local date, formatted MM/DD/YY
7662 char TmpBuffer[sizeof("mm/dd/yy")];
7663 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM);
7664 return std::string(TmpBuffer, Len);
7665 }
7666 case BI_TIME: {
7667 // Current local time, formatted HH:MM:SS (24-hour clock)
7668 char TmpBuffer[sizeof("hh:mm:ss")];
7669 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM);
7670 return std::string(TmpBuffer, Len);
7671 }
7672 case BI_FILECUR:
7673 return SrcMgr
7674 .getMemoryBuffer(
7675 ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer)
7676 ->getBufferIdentifier()
7677 .str();
7678 case BI_FILENAME:
7679 return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())
7680 ->getBufferIdentifier())
7681 .upper();
7682 case BI_CURSEG:
7683 return getStreamer().getCurrentSectionOnly()->getName().str();
7684 }
7685 llvm_unreachable("unhandled built-in symbol");
7686 }
7687
7688 /// Create an MCAsmParser instance.
createMCMasmParser(SourceMgr & SM,MCContext & C,MCStreamer & Out,const MCAsmInfo & MAI,struct tm TM,unsigned CB)7689 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C,
7690 MCStreamer &Out, const MCAsmInfo &MAI,
7691 struct tm TM, unsigned CB) {
7692 return new MasmParser(SM, C, Out, MAI, TM, CB);
7693 }
7694