1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This class implements the parser for assembly files. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/ADT/APFloat.h" 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/ADT/StringSwitch.h" 24 #include "llvm/ADT/Twine.h" 25 #include "llvm/BinaryFormat/Dwarf.h" 26 #include "llvm/DebugInfo/CodeView/SymbolRecord.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/MC/MCCodeView.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCDirectives.h" 31 #include "llvm/MC/MCDwarf.h" 32 #include "llvm/MC/MCExpr.h" 33 #include "llvm/MC/MCInstPrinter.h" 34 #include "llvm/MC/MCInstrDesc.h" 35 #include "llvm/MC/MCInstrInfo.h" 36 #include "llvm/MC/MCParser/AsmCond.h" 37 #include "llvm/MC/MCParser/AsmLexer.h" 38 #include "llvm/MC/MCParser/MCAsmLexer.h" 39 #include "llvm/MC/MCParser/MCAsmParser.h" 40 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 41 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 42 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 43 #include "llvm/MC/MCRegisterInfo.h" 44 #include "llvm/MC/MCSection.h" 45 #include "llvm/MC/MCStreamer.h" 46 #include "llvm/MC/MCSubtargetInfo.h" 47 #include "llvm/MC/MCSymbol.h" 48 #include "llvm/MC/MCTargetOptions.h" 49 #include "llvm/Support/Casting.h" 50 #include "llvm/Support/CommandLine.h" 51 #include "llvm/Support/ErrorHandling.h" 52 #include "llvm/Support/Format.h" 53 #include "llvm/Support/MD5.h" 54 #include "llvm/Support/MathExtras.h" 55 #include "llvm/Support/MemoryBuffer.h" 56 #include "llvm/Support/Path.h" 57 #include "llvm/Support/SMLoc.h" 58 #include "llvm/Support/SourceMgr.h" 59 #include "llvm/Support/raw_ostream.h" 60 #include <algorithm> 61 #include <cassert> 62 #include <climits> 63 #include <cstddef> 64 #include <cstdint> 65 #include <ctime> 66 #include <deque> 67 #include <memory> 68 #include <optional> 69 #include <sstream> 70 #include <string> 71 #include <tuple> 72 #include <utility> 73 #include <vector> 74 75 using namespace llvm; 76 77 namespace { 78 79 /// Helper types for tracking macro definitions. 80 typedef std::vector<AsmToken> MCAsmMacroArgument; 81 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments; 82 83 /// Helper class for storing information about an active macro instantiation. 84 struct MacroInstantiation { 85 /// The location of the instantiation. 86 SMLoc InstantiationLoc; 87 88 /// The buffer where parsing should resume upon instantiation completion. 89 unsigned ExitBuffer; 90 91 /// The location where parsing should resume upon instantiation completion. 92 SMLoc ExitLoc; 93 94 /// The depth of TheCondStack at the start of the instantiation. 95 size_t CondStackDepth; 96 }; 97 98 struct ParseStatementInfo { 99 /// The parsed operands from the last parsed statement. 100 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands; 101 102 /// The opcode from the last parsed instruction. 103 unsigned Opcode = ~0U; 104 105 /// Was there an error parsing the inline assembly? 106 bool ParseError = false; 107 108 /// The value associated with a macro exit. 109 std::optional<std::string> ExitValue; 110 111 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 112 113 ParseStatementInfo() = delete; 114 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites) 115 : AsmRewrites(rewrites) {} 116 }; 117 118 enum FieldType { 119 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr. 120 FT_REAL, // Initializer: real number, stored as an APInt. 121 FT_STRUCT // Initializer: struct initializer, stored recursively. 122 }; 123 124 struct FieldInfo; 125 struct StructInfo { 126 StringRef Name; 127 bool IsUnion = false; 128 bool Initializable = true; 129 unsigned Alignment = 0; 130 unsigned AlignmentSize = 0; 131 unsigned NextOffset = 0; 132 unsigned Size = 0; 133 std::vector<FieldInfo> Fields; 134 StringMap<size_t> FieldsByName; 135 136 FieldInfo &addField(StringRef FieldName, FieldType FT, 137 unsigned FieldAlignmentSize); 138 139 StructInfo() = default; 140 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue); 141 }; 142 143 // FIXME: This should probably use a class hierarchy, raw pointers between the 144 // objects, and dynamic type resolution instead of a union. On the other hand, 145 // ownership then becomes much more complicated; the obvious thing would be to 146 // use BumpPtrAllocator, but the lack of a destructor makes that messy. 147 148 struct StructInitializer; 149 struct IntFieldInfo { 150 SmallVector<const MCExpr *, 1> Values; 151 152 IntFieldInfo() = default; 153 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; } 154 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; } 155 }; 156 struct RealFieldInfo { 157 SmallVector<APInt, 1> AsIntValues; 158 159 RealFieldInfo() = default; 160 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; } 161 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; } 162 }; 163 struct StructFieldInfo { 164 std::vector<StructInitializer> Initializers; 165 StructInfo Structure; 166 167 StructFieldInfo() = default; 168 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S); 169 }; 170 171 class FieldInitializer { 172 public: 173 FieldType FT; 174 union { 175 IntFieldInfo IntInfo; 176 RealFieldInfo RealInfo; 177 StructFieldInfo StructInfo; 178 }; 179 180 ~FieldInitializer(); 181 FieldInitializer(FieldType FT); 182 183 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values); 184 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues); 185 FieldInitializer(std::vector<StructInitializer> &&Initializers, 186 struct StructInfo Structure); 187 188 FieldInitializer(const FieldInitializer &Initializer); 189 FieldInitializer(FieldInitializer &&Initializer); 190 191 FieldInitializer &operator=(const FieldInitializer &Initializer); 192 FieldInitializer &operator=(FieldInitializer &&Initializer); 193 }; 194 195 struct StructInitializer { 196 std::vector<FieldInitializer> FieldInitializers; 197 }; 198 199 struct FieldInfo { 200 // Offset of the field within the containing STRUCT. 201 unsigned Offset = 0; 202 203 // Total size of the field (= LengthOf * Type). 204 unsigned SizeOf = 0; 205 206 // Number of elements in the field (1 if scalar, >1 if an array). 207 unsigned LengthOf = 0; 208 209 // Size of a single entry in this field, in bytes ("type" in MASM standards). 210 unsigned Type = 0; 211 212 FieldInitializer Contents; 213 214 FieldInfo(FieldType FT) : Contents(FT) {} 215 }; 216 217 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V, 218 StructInfo S) { 219 Initializers = std::move(V); 220 Structure = S; 221 } 222 223 StructInfo::StructInfo(StringRef StructName, bool Union, 224 unsigned AlignmentValue) 225 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {} 226 227 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, 228 unsigned FieldAlignmentSize) { 229 if (!FieldName.empty()) 230 FieldsByName[FieldName.lower()] = Fields.size(); 231 Fields.emplace_back(FT); 232 FieldInfo &Field = Fields.back(); 233 Field.Offset = 234 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize)); 235 if (!IsUnion) { 236 NextOffset = std::max(NextOffset, Field.Offset); 237 } 238 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize); 239 return Field; 240 } 241 242 FieldInitializer::~FieldInitializer() { 243 switch (FT) { 244 case FT_INTEGRAL: 245 IntInfo.~IntFieldInfo(); 246 break; 247 case FT_REAL: 248 RealInfo.~RealFieldInfo(); 249 break; 250 case FT_STRUCT: 251 StructInfo.~StructFieldInfo(); 252 break; 253 } 254 } 255 256 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) { 257 switch (FT) { 258 case FT_INTEGRAL: 259 new (&IntInfo) IntFieldInfo(); 260 break; 261 case FT_REAL: 262 new (&RealInfo) RealFieldInfo(); 263 break; 264 case FT_STRUCT: 265 new (&StructInfo) StructFieldInfo(); 266 break; 267 } 268 } 269 270 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values) 271 : FT(FT_INTEGRAL) { 272 new (&IntInfo) IntFieldInfo(Values); 273 } 274 275 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues) 276 : FT(FT_REAL) { 277 new (&RealInfo) RealFieldInfo(AsIntValues); 278 } 279 280 FieldInitializer::FieldInitializer( 281 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure) 282 : FT(FT_STRUCT) { 283 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure); 284 } 285 286 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer) 287 : FT(Initializer.FT) { 288 switch (FT) { 289 case FT_INTEGRAL: 290 new (&IntInfo) IntFieldInfo(Initializer.IntInfo); 291 break; 292 case FT_REAL: 293 new (&RealInfo) RealFieldInfo(Initializer.RealInfo); 294 break; 295 case FT_STRUCT: 296 new (&StructInfo) StructFieldInfo(Initializer.StructInfo); 297 break; 298 } 299 } 300 301 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer) 302 : FT(Initializer.FT) { 303 switch (FT) { 304 case FT_INTEGRAL: 305 new (&IntInfo) IntFieldInfo(Initializer.IntInfo); 306 break; 307 case FT_REAL: 308 new (&RealInfo) RealFieldInfo(Initializer.RealInfo); 309 break; 310 case FT_STRUCT: 311 new (&StructInfo) StructFieldInfo(Initializer.StructInfo); 312 break; 313 } 314 } 315 316 FieldInitializer & 317 FieldInitializer::operator=(const FieldInitializer &Initializer) { 318 if (FT != Initializer.FT) { 319 switch (FT) { 320 case FT_INTEGRAL: 321 IntInfo.~IntFieldInfo(); 322 break; 323 case FT_REAL: 324 RealInfo.~RealFieldInfo(); 325 break; 326 case FT_STRUCT: 327 StructInfo.~StructFieldInfo(); 328 break; 329 } 330 } 331 FT = Initializer.FT; 332 switch (FT) { 333 case FT_INTEGRAL: 334 IntInfo = Initializer.IntInfo; 335 break; 336 case FT_REAL: 337 RealInfo = Initializer.RealInfo; 338 break; 339 case FT_STRUCT: 340 StructInfo = Initializer.StructInfo; 341 break; 342 } 343 return *this; 344 } 345 346 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) { 347 if (FT != Initializer.FT) { 348 switch (FT) { 349 case FT_INTEGRAL: 350 IntInfo.~IntFieldInfo(); 351 break; 352 case FT_REAL: 353 RealInfo.~RealFieldInfo(); 354 break; 355 case FT_STRUCT: 356 StructInfo.~StructFieldInfo(); 357 break; 358 } 359 } 360 FT = Initializer.FT; 361 switch (FT) { 362 case FT_INTEGRAL: 363 IntInfo = Initializer.IntInfo; 364 break; 365 case FT_REAL: 366 RealInfo = Initializer.RealInfo; 367 break; 368 case FT_STRUCT: 369 StructInfo = Initializer.StructInfo; 370 break; 371 } 372 return *this; 373 } 374 375 /// The concrete assembly parser instance. 376 // Note that this is a full MCAsmParser, not an MCAsmParserExtension! 377 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc. 378 class MasmParser : public MCAsmParser { 379 private: 380 AsmLexer Lexer; 381 MCContext &Ctx; 382 MCStreamer &Out; 383 const MCAsmInfo &MAI; 384 SourceMgr &SrcMgr; 385 SourceMgr::DiagHandlerTy SavedDiagHandler; 386 void *SavedDiagContext; 387 std::unique_ptr<MCAsmParserExtension> PlatformParser; 388 389 /// This is the current buffer index we're lexing from as managed by the 390 /// SourceMgr object. 391 unsigned CurBuffer; 392 393 /// time of assembly 394 struct tm TM; 395 396 BitVector EndStatementAtEOFStack; 397 398 AsmCond TheCondState; 399 std::vector<AsmCond> TheCondStack; 400 401 /// maps directive names to handler methods in parser 402 /// extensions. Extensions register themselves in this map by calling 403 /// addDirectiveHandler. 404 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap; 405 406 /// maps assembly-time variable names to variables. 407 struct Variable { 408 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE }; 409 410 StringRef Name; 411 RedefinableKind Redefinable = REDEFINABLE; 412 bool IsText = false; 413 std::string TextValue; 414 }; 415 StringMap<Variable> Variables; 416 417 /// Stack of active struct definitions. 418 SmallVector<StructInfo, 1> StructInProgress; 419 420 /// Maps struct tags to struct definitions. 421 StringMap<StructInfo> Structs; 422 423 /// Maps data location names to types. 424 StringMap<AsmTypeInfo> KnownType; 425 426 /// Stack of active macro instantiations. 427 std::vector<MacroInstantiation*> ActiveMacros; 428 429 /// List of bodies of anonymous macros. 430 std::deque<MCAsmMacro> MacroLikeBodies; 431 432 /// Keeps track of how many .macro's have been instantiated. 433 unsigned NumOfMacroInstantiations; 434 435 /// The values from the last parsed cpp hash file line comment if any. 436 struct CppHashInfoTy { 437 StringRef Filename; 438 int64_t LineNumber; 439 SMLoc Loc; 440 unsigned Buf; 441 CppHashInfoTy() : LineNumber(0), Buf(0) {} 442 }; 443 CppHashInfoTy CppHashInfo; 444 445 /// The filename from the first cpp hash file line comment, if any. 446 StringRef FirstCppHashFilename; 447 448 /// List of forward directional labels for diagnosis at the end. 449 SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels; 450 451 /// AssemblerDialect. ~OU means unset value and use value provided by MAI. 452 /// Defaults to 1U, meaning Intel. 453 unsigned AssemblerDialect = 1U; 454 455 /// is Darwin compatibility enabled? 456 bool IsDarwin = false; 457 458 /// Are we parsing ms-style inline assembly? 459 bool ParsingMSInlineAsm = false; 460 461 /// Did we already inform the user about inconsistent MD5 usage? 462 bool ReportedInconsistentMD5 = false; 463 464 // Current <...> expression depth. 465 unsigned AngleBracketDepth = 0U; 466 467 // Number of locals defined. 468 uint16_t LocalCounter = 0; 469 470 public: 471 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, 472 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0); 473 MasmParser(const MasmParser &) = delete; 474 MasmParser &operator=(const MasmParser &) = delete; 475 ~MasmParser() override; 476 477 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override; 478 479 void addDirectiveHandler(StringRef Directive, 480 ExtensionDirectiveHandler Handler) override { 481 ExtensionDirectiveMap[Directive] = Handler; 482 if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) { 483 DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE; 484 } 485 } 486 487 void addAliasForDirective(StringRef Directive, StringRef Alias) override { 488 DirectiveKindMap[Directive] = DirectiveKindMap[Alias]; 489 } 490 491 /// @name MCAsmParser Interface 492 /// { 493 494 SourceMgr &getSourceManager() override { return SrcMgr; } 495 MCAsmLexer &getLexer() override { return Lexer; } 496 MCContext &getContext() override { return Ctx; } 497 MCStreamer &getStreamer() override { return Out; } 498 499 CodeViewContext &getCVContext() { return Ctx.getCVContext(); } 500 501 unsigned getAssemblerDialect() override { 502 if (AssemblerDialect == ~0U) 503 return MAI.getAssemblerDialect(); 504 else 505 return AssemblerDialect; 506 } 507 void setAssemblerDialect(unsigned i) override { 508 AssemblerDialect = i; 509 } 510 511 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override; 512 bool Warning(SMLoc L, const Twine &Msg, 513 SMRange Range = std::nullopt) override; 514 bool printError(SMLoc L, const Twine &Msg, 515 SMRange Range = std::nullopt) override; 516 517 enum ExpandKind { ExpandMacros, DoNotExpandMacros }; 518 const AsmToken &Lex(ExpandKind ExpandNextToken); 519 const AsmToken &Lex() override { return Lex(ExpandMacros); } 520 521 void setParsingMSInlineAsm(bool V) override { 522 ParsingMSInlineAsm = V; 523 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and 524 // hex integer literals. 525 Lexer.setLexMasmIntegers(V); 526 } 527 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; } 528 529 bool isParsingMasm() const override { return true; } 530 531 bool defineMacro(StringRef Name, StringRef Value) override; 532 533 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override; 534 bool lookUpField(StringRef Base, StringRef Member, 535 AsmFieldInfo &Info) const override; 536 537 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override; 538 539 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs, 540 unsigned &NumInputs, 541 SmallVectorImpl<std::pair<void *, bool>> &OpDecls, 542 SmallVectorImpl<std::string> &Constraints, 543 SmallVectorImpl<std::string> &Clobbers, 544 const MCInstrInfo *MII, const MCInstPrinter *IP, 545 MCAsmParserSemaCallback &SI) override; 546 547 bool parseExpression(const MCExpr *&Res); 548 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override; 549 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, 550 AsmTypeInfo *TypeInfo) override; 551 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override; 552 bool parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res, 553 SMLoc &EndLoc) override; 554 bool parseAbsoluteExpression(int64_t &Res) override; 555 556 /// Parse a floating point expression using the float \p Semantics 557 /// and set \p Res to the value. 558 bool parseRealValue(const fltSemantics &Semantics, APInt &Res); 559 560 /// Parse an identifier or string (as a quoted identifier) 561 /// and set \p Res to the identifier contents. 562 enum IdentifierPositionKind { StandardPosition, StartOfStatement }; 563 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position); 564 bool parseIdentifier(StringRef &Res) override { 565 return parseIdentifier(Res, StandardPosition); 566 } 567 void eatToEndOfStatement() override; 568 569 bool checkForValidSection() override; 570 571 /// } 572 573 private: 574 bool expandMacros(); 575 const AsmToken peekTok(bool ShouldSkipSpace = true); 576 577 bool parseStatement(ParseStatementInfo &Info, 578 MCAsmParserSemaCallback *SI); 579 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites); 580 bool parseCppHashLineFilenameComment(SMLoc L); 581 582 bool expandMacro(raw_svector_ostream &OS, StringRef Body, 583 ArrayRef<MCAsmMacroParameter> Parameters, 584 ArrayRef<MCAsmMacroArgument> A, 585 const std::vector<std::string> &Locals, SMLoc L); 586 587 /// Are we inside a macro instantiation? 588 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();} 589 590 /// Handle entry to macro instantiation. 591 /// 592 /// \param M The macro. 593 /// \param NameLoc Instantiation location. 594 bool handleMacroEntry( 595 const MCAsmMacro *M, SMLoc NameLoc, 596 AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement); 597 598 /// Handle invocation of macro function. 599 /// 600 /// \param M The macro. 601 /// \param NameLoc Invocation location. 602 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc); 603 604 /// Handle exit from macro instantiation. 605 void handleMacroExit(); 606 607 /// Extract AsmTokens for a macro argument. 608 bool 609 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA, 610 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement); 611 612 /// Parse all macro arguments for a given macro. 613 bool 614 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A, 615 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement); 616 617 void printMacroInstantiations(); 618 619 bool expandStatement(SMLoc Loc); 620 621 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, 622 SMRange Range = std::nullopt) const { 623 ArrayRef<SMRange> Ranges(Range); 624 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges); 625 } 626 static void DiagHandler(const SMDiagnostic &Diag, void *Context); 627 628 bool lookUpField(const StructInfo &Structure, StringRef Member, 629 AsmFieldInfo &Info) const; 630 631 /// Should we emit DWARF describing this assembler source? (Returns false if 632 /// the source has .file directives, which means we don't want to generate 633 /// info describing the assembler source itself.) 634 bool enabledGenDwarfForAssembly(); 635 636 /// Enter the specified file. This returns true on failure. 637 bool enterIncludeFile(const std::string &Filename); 638 639 /// Reset the current lexer position to that given by \p Loc. The 640 /// current token is not set; clients should ensure Lex() is called 641 /// subsequently. 642 /// 643 /// \param InBuffer If not 0, should be the known buffer id that contains the 644 /// location. 645 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0, 646 bool EndStatementAtEOF = true); 647 648 /// Parse up to a token of kind \p EndTok and return the contents from the 649 /// current token up to (but not including) this token; the current token on 650 /// exit will be either this kind or EOF. Reads through instantiated macro 651 /// functions and text macros. 652 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok); 653 std::string parseStringTo(AsmToken::TokenKind EndTok); 654 655 /// Parse up to the end of statement and return the contents from the current 656 /// token until the end of the statement; the current token on exit will be 657 /// either the EndOfStatement or EOF. 658 StringRef parseStringToEndOfStatement() override; 659 660 bool parseTextItem(std::string &Data); 661 662 unsigned getBinOpPrecedence(AsmToken::TokenKind K, 663 MCBinaryExpr::Opcode &Kind); 664 665 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); 666 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); 667 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc); 668 669 bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc); 670 671 bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName); 672 bool parseCVFileId(int64_t &FileId, StringRef DirectiveName); 673 674 // Generic (target and platform independent) directive parsing. 675 enum DirectiveKind { 676 DK_NO_DIRECTIVE, // Placeholder 677 DK_HANDLER_DIRECTIVE, 678 DK_ASSIGN, 679 DK_EQU, 680 DK_TEXTEQU, 681 DK_ASCII, 682 DK_ASCIZ, 683 DK_STRING, 684 DK_BYTE, 685 DK_SBYTE, 686 DK_WORD, 687 DK_SWORD, 688 DK_DWORD, 689 DK_SDWORD, 690 DK_FWORD, 691 DK_QWORD, 692 DK_SQWORD, 693 DK_DB, 694 DK_DD, 695 DK_DF, 696 DK_DQ, 697 DK_DW, 698 DK_REAL4, 699 DK_REAL8, 700 DK_REAL10, 701 DK_ALIGN, 702 DK_EVEN, 703 DK_ORG, 704 DK_ENDR, 705 DK_EXTERN, 706 DK_PUBLIC, 707 DK_COMM, 708 DK_COMMENT, 709 DK_INCLUDE, 710 DK_REPEAT, 711 DK_WHILE, 712 DK_FOR, 713 DK_FORC, 714 DK_IF, 715 DK_IFE, 716 DK_IFB, 717 DK_IFNB, 718 DK_IFDEF, 719 DK_IFNDEF, 720 DK_IFDIF, 721 DK_IFDIFI, 722 DK_IFIDN, 723 DK_IFIDNI, 724 DK_ELSEIF, 725 DK_ELSEIFE, 726 DK_ELSEIFB, 727 DK_ELSEIFNB, 728 DK_ELSEIFDEF, 729 DK_ELSEIFNDEF, 730 DK_ELSEIFDIF, 731 DK_ELSEIFDIFI, 732 DK_ELSEIFIDN, 733 DK_ELSEIFIDNI, 734 DK_ELSE, 735 DK_ENDIF, 736 DK_FILE, 737 DK_LINE, 738 DK_LOC, 739 DK_STABS, 740 DK_CV_FILE, 741 DK_CV_FUNC_ID, 742 DK_CV_INLINE_SITE_ID, 743 DK_CV_LOC, 744 DK_CV_LINETABLE, 745 DK_CV_INLINE_LINETABLE, 746 DK_CV_DEF_RANGE, 747 DK_CV_STRINGTABLE, 748 DK_CV_STRING, 749 DK_CV_FILECHECKSUMS, 750 DK_CV_FILECHECKSUM_OFFSET, 751 DK_CV_FPO_DATA, 752 DK_CFI_SECTIONS, 753 DK_CFI_STARTPROC, 754 DK_CFI_ENDPROC, 755 DK_CFI_DEF_CFA, 756 DK_CFI_DEF_CFA_OFFSET, 757 DK_CFI_ADJUST_CFA_OFFSET, 758 DK_CFI_DEF_CFA_REGISTER, 759 DK_CFI_OFFSET, 760 DK_CFI_REL_OFFSET, 761 DK_CFI_PERSONALITY, 762 DK_CFI_LSDA, 763 DK_CFI_REMEMBER_STATE, 764 DK_CFI_RESTORE_STATE, 765 DK_CFI_SAME_VALUE, 766 DK_CFI_RESTORE, 767 DK_CFI_ESCAPE, 768 DK_CFI_RETURN_COLUMN, 769 DK_CFI_SIGNAL_FRAME, 770 DK_CFI_UNDEFINED, 771 DK_CFI_REGISTER, 772 DK_CFI_WINDOW_SAVE, 773 DK_CFI_B_KEY_FRAME, 774 DK_MACRO, 775 DK_EXITM, 776 DK_ENDM, 777 DK_PURGE, 778 DK_ERR, 779 DK_ERRB, 780 DK_ERRNB, 781 DK_ERRDEF, 782 DK_ERRNDEF, 783 DK_ERRDIF, 784 DK_ERRDIFI, 785 DK_ERRIDN, 786 DK_ERRIDNI, 787 DK_ERRE, 788 DK_ERRNZ, 789 DK_ECHO, 790 DK_STRUCT, 791 DK_UNION, 792 DK_ENDS, 793 DK_END, 794 DK_PUSHFRAME, 795 DK_PUSHREG, 796 DK_SAVEREG, 797 DK_SAVEXMM128, 798 DK_SETFRAME, 799 DK_RADIX, 800 }; 801 802 /// Maps directive name --> DirectiveKind enum, for directives parsed by this 803 /// class. 804 StringMap<DirectiveKind> DirectiveKindMap; 805 806 bool isMacroLikeDirective(); 807 808 // Codeview def_range type parsing. 809 enum CVDefRangeType { 810 CVDR_DEFRANGE = 0, // Placeholder 811 CVDR_DEFRANGE_REGISTER, 812 CVDR_DEFRANGE_FRAMEPOINTER_REL, 813 CVDR_DEFRANGE_SUBFIELD_REGISTER, 814 CVDR_DEFRANGE_REGISTER_REL 815 }; 816 817 /// Maps Codeview def_range types --> CVDefRangeType enum, for Codeview 818 /// def_range types parsed by this class. 819 StringMap<CVDefRangeType> CVDefRangeTypeMap; 820 821 // Generic (target and platform independent) directive parsing. 822 enum BuiltinSymbol { 823 BI_NO_SYMBOL, // Placeholder 824 BI_DATE, 825 BI_TIME, 826 BI_VERSION, 827 BI_FILECUR, 828 BI_FILENAME, 829 BI_LINE, 830 BI_CURSEG, 831 BI_CPU, 832 BI_INTERFACE, 833 BI_CODE, 834 BI_DATA, 835 BI_FARDATA, 836 BI_WORDSIZE, 837 BI_CODESIZE, 838 BI_DATASIZE, 839 BI_MODEL, 840 BI_STACK, 841 }; 842 843 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this 844 /// class. 845 StringMap<BuiltinSymbol> BuiltinSymbolMap; 846 847 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc); 848 849 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol, 850 SMLoc StartLoc); 851 852 // ".ascii", ".asciz", ".string" 853 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); 854 855 // "byte", "word", ... 856 bool emitIntValue(const MCExpr *Value, unsigned Size); 857 bool parseScalarInitializer(unsigned Size, 858 SmallVectorImpl<const MCExpr *> &Values, 859 unsigned StringPadLength = 0); 860 bool parseScalarInstList( 861 unsigned Size, SmallVectorImpl<const MCExpr *> &Values, 862 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); 863 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr); 864 bool addIntegralField(StringRef Name, unsigned Size); 865 bool parseDirectiveValue(StringRef IDVal, unsigned Size); 866 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size, 867 StringRef Name, SMLoc NameLoc); 868 869 // "real4", "real8", "real10" 870 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr); 871 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); 872 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, 873 size_t Size); 874 bool parseRealInstList( 875 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values, 876 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); 877 bool parseDirectiveNamedRealValue(StringRef TypeName, 878 const fltSemantics &Semantics, 879 unsigned Size, StringRef Name, 880 SMLoc NameLoc); 881 882 bool parseOptionalAngleBracketOpen(); 883 bool parseAngleBracketClose(const Twine &Msg = "expected '>'"); 884 885 bool parseFieldInitializer(const FieldInfo &Field, 886 FieldInitializer &Initializer); 887 bool parseFieldInitializer(const FieldInfo &Field, 888 const IntFieldInfo &Contents, 889 FieldInitializer &Initializer); 890 bool parseFieldInitializer(const FieldInfo &Field, 891 const RealFieldInfo &Contents, 892 FieldInitializer &Initializer); 893 bool parseFieldInitializer(const FieldInfo &Field, 894 const StructFieldInfo &Contents, 895 FieldInitializer &Initializer); 896 897 bool parseStructInitializer(const StructInfo &Structure, 898 StructInitializer &Initializer); 899 bool parseStructInstList( 900 const StructInfo &Structure, std::vector<StructInitializer> &Initializers, 901 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); 902 903 bool emitFieldValue(const FieldInfo &Field); 904 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents); 905 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents); 906 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents); 907 908 bool emitFieldInitializer(const FieldInfo &Field, 909 const FieldInitializer &Initializer); 910 bool emitFieldInitializer(const FieldInfo &Field, 911 const IntFieldInfo &Contents, 912 const IntFieldInfo &Initializer); 913 bool emitFieldInitializer(const FieldInfo &Field, 914 const RealFieldInfo &Contents, 915 const RealFieldInfo &Initializer); 916 bool emitFieldInitializer(const FieldInfo &Field, 917 const StructFieldInfo &Contents, 918 const StructFieldInfo &Initializer); 919 920 bool emitStructInitializer(const StructInfo &Structure, 921 const StructInitializer &Initializer); 922 923 // User-defined types (structs, unions): 924 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr); 925 bool addStructField(StringRef Name, const StructInfo &Structure); 926 bool parseDirectiveStructValue(const StructInfo &Structure, 927 StringRef Directive, SMLoc DirLoc); 928 bool parseDirectiveNamedStructValue(const StructInfo &Structure, 929 StringRef Directive, SMLoc DirLoc, 930 StringRef Name); 931 932 // "=", "equ", "textequ" 933 bool parseDirectiveEquate(StringRef IDVal, StringRef Name, 934 DirectiveKind DirKind, SMLoc NameLoc); 935 936 bool parseDirectiveOrg(); // "org" 937 938 bool emitAlignTo(int64_t Alignment); 939 bool parseDirectiveAlign(); // "align" 940 bool parseDirectiveEven(); // "even" 941 942 // ".file", ".line", ".loc", ".stabs" 943 bool parseDirectiveFile(SMLoc DirectiveLoc); 944 bool parseDirectiveLine(); 945 bool parseDirectiveLoc(); 946 bool parseDirectiveStabs(); 947 948 // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable", 949 // ".cv_inline_linetable", ".cv_def_range", ".cv_string" 950 bool parseDirectiveCVFile(); 951 bool parseDirectiveCVFuncId(); 952 bool parseDirectiveCVInlineSiteId(); 953 bool parseDirectiveCVLoc(); 954 bool parseDirectiveCVLinetable(); 955 bool parseDirectiveCVInlineLinetable(); 956 bool parseDirectiveCVDefRange(); 957 bool parseDirectiveCVString(); 958 bool parseDirectiveCVStringTable(); 959 bool parseDirectiveCVFileChecksums(); 960 bool parseDirectiveCVFileChecksumOffset(); 961 bool parseDirectiveCVFPOData(); 962 963 // .cfi directives 964 bool parseDirectiveCFIRegister(SMLoc DirectiveLoc); 965 bool parseDirectiveCFIWindowSave(); 966 bool parseDirectiveCFISections(); 967 bool parseDirectiveCFIStartProc(); 968 bool parseDirectiveCFIEndProc(); 969 bool parseDirectiveCFIDefCfaOffset(); 970 bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc); 971 bool parseDirectiveCFIAdjustCfaOffset(); 972 bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc); 973 bool parseDirectiveCFIOffset(SMLoc DirectiveLoc); 974 bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc); 975 bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality); 976 bool parseDirectiveCFIRememberState(); 977 bool parseDirectiveCFIRestoreState(); 978 bool parseDirectiveCFISameValue(SMLoc DirectiveLoc); 979 bool parseDirectiveCFIRestore(SMLoc DirectiveLoc); 980 bool parseDirectiveCFIEscape(); 981 bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc); 982 bool parseDirectiveCFISignalFrame(); 983 bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc); 984 985 // macro directives 986 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc); 987 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive, 988 std::string &Value); 989 bool parseDirectiveEndMacro(StringRef Directive); 990 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc); 991 992 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind, 993 StringRef Name, SMLoc NameLoc); 994 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind); 995 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc); 996 bool parseDirectiveNestedEnds(); 997 998 bool parseDirectiveExtern(); 999 1000 /// Parse a directive like ".globl" which accepts a single symbol (which 1001 /// should be a label or an external). 1002 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr); 1003 1004 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" 1005 1006 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment" 1007 1008 bool parseDirectiveInclude(); // "include" 1009 1010 // "if" or "ife" 1011 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind); 1012 // "ifb" or "ifnb", depending on ExpectBlank. 1013 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); 1014 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and 1015 // CaseInsensitive. 1016 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 1017 bool CaseInsensitive); 1018 // "ifdef" or "ifndef", depending on expect_defined 1019 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined); 1020 // "elseif" or "elseife" 1021 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind); 1022 // "elseifb" or "elseifnb", depending on ExpectBlank. 1023 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank); 1024 // ".elseifdef" or ".elseifndef", depending on expect_defined 1025 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined); 1026 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on 1027 // ExpectEqual and CaseInsensitive. 1028 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 1029 bool CaseInsensitive); 1030 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else" 1031 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif" 1032 bool parseEscapedString(std::string &Data) override; 1033 bool parseAngleBracketString(std::string &Data) override; 1034 1035 // Macro-like directives 1036 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc); 1037 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 1038 raw_svector_ostream &OS); 1039 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 1040 SMLoc ExitLoc, raw_svector_ostream &OS); 1041 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive); 1042 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive); 1043 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive); 1044 bool parseDirectiveWhile(SMLoc DirectiveLoc); 1045 1046 // "_emit" or "__emit" 1047 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, 1048 size_t Len); 1049 1050 // "align" 1051 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info); 1052 1053 // "end" 1054 bool parseDirectiveEnd(SMLoc DirectiveLoc); 1055 1056 // ".err" 1057 bool parseDirectiveError(SMLoc DirectiveLoc); 1058 // ".errb" or ".errnb", depending on ExpectBlank. 1059 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank); 1060 // ".errdef" or ".errndef", depending on ExpectBlank. 1061 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined); 1062 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual 1063 // and CaseInsensitive. 1064 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 1065 bool CaseInsensitive); 1066 // ".erre" or ".errnz", depending on ExpectZero. 1067 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero); 1068 1069 // ".radix" 1070 bool parseDirectiveRadix(SMLoc DirectiveLoc); 1071 1072 // "echo" 1073 bool parseDirectiveEcho(SMLoc DirectiveLoc); 1074 1075 void initializeDirectiveKindMap(); 1076 void initializeCVDefRangeTypeMap(); 1077 void initializeBuiltinSymbolMap(); 1078 }; 1079 1080 } // end anonymous namespace 1081 1082 namespace llvm { 1083 1084 extern cl::opt<unsigned> AsmMacroMaxNestingDepth; 1085 1086 extern MCAsmParserExtension *createCOFFMasmParser(); 1087 1088 } // end namespace llvm 1089 1090 enum { DEFAULT_ADDRSPACE = 0 }; 1091 1092 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, 1093 const MCAsmInfo &MAI, struct tm TM, unsigned CB) 1094 : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM), 1095 CurBuffer(CB ? CB : SM.getMainFileID()), TM(TM) { 1096 HadError = false; 1097 // Save the old handler. 1098 SavedDiagHandler = SrcMgr.getDiagHandler(); 1099 SavedDiagContext = SrcMgr.getDiagContext(); 1100 // Set our own handler which calls the saved handler. 1101 SrcMgr.setDiagHandler(DiagHandler, this); 1102 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 1103 EndStatementAtEOFStack.push_back(true); 1104 1105 // Initialize the platform / file format parser. 1106 switch (Ctx.getObjectFileType()) { 1107 case MCContext::IsCOFF: 1108 PlatformParser.reset(createCOFFMasmParser()); 1109 break; 1110 default: 1111 report_fatal_error("llvm-ml currently supports only COFF output."); 1112 break; 1113 } 1114 1115 initializeDirectiveKindMap(); 1116 PlatformParser->Initialize(*this); 1117 initializeCVDefRangeTypeMap(); 1118 initializeBuiltinSymbolMap(); 1119 1120 NumOfMacroInstantiations = 0; 1121 } 1122 1123 MasmParser::~MasmParser() { 1124 assert((HadError || ActiveMacros.empty()) && 1125 "Unexpected active macro instantiation!"); 1126 1127 // Restore the saved diagnostics handler and context for use during 1128 // finalization. 1129 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext); 1130 } 1131 1132 void MasmParser::printMacroInstantiations() { 1133 // Print the active macro instantiation stack. 1134 for (std::vector<MacroInstantiation *>::const_reverse_iterator 1135 it = ActiveMacros.rbegin(), 1136 ie = ActiveMacros.rend(); 1137 it != ie; ++it) 1138 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note, 1139 "while in macro instantiation"); 1140 } 1141 1142 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) { 1143 printPendingErrors(); 1144 printMessage(L, SourceMgr::DK_Note, Msg, Range); 1145 printMacroInstantiations(); 1146 } 1147 1148 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) { 1149 if (getTargetParser().getTargetOptions().MCNoWarn) 1150 return false; 1151 if (getTargetParser().getTargetOptions().MCFatalWarnings) 1152 return Error(L, Msg, Range); 1153 printMessage(L, SourceMgr::DK_Warning, Msg, Range); 1154 printMacroInstantiations(); 1155 return false; 1156 } 1157 1158 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) { 1159 HadError = true; 1160 printMessage(L, SourceMgr::DK_Error, Msg, Range); 1161 printMacroInstantiations(); 1162 return true; 1163 } 1164 1165 bool MasmParser::enterIncludeFile(const std::string &Filename) { 1166 std::string IncludedFile; 1167 unsigned NewBuf = 1168 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); 1169 if (!NewBuf) 1170 return true; 1171 1172 CurBuffer = NewBuf; 1173 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 1174 EndStatementAtEOFStack.push_back(true); 1175 return false; 1176 } 1177 1178 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer, 1179 bool EndStatementAtEOF) { 1180 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc); 1181 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), 1182 Loc.getPointer(), EndStatementAtEOF); 1183 } 1184 1185 bool MasmParser::expandMacros() { 1186 const AsmToken &Tok = getTok(); 1187 const std::string IDLower = Tok.getIdentifier().lower(); 1188 1189 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower); 1190 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) { 1191 // This is a macro function invocation; expand it in place. 1192 const SMLoc MacroLoc = Tok.getLoc(); 1193 const StringRef MacroId = Tok.getIdentifier(); 1194 Lexer.Lex(); 1195 if (handleMacroInvocation(M, MacroLoc)) { 1196 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId)); 1197 Lexer.Lex(); 1198 } 1199 return false; 1200 } 1201 1202 std::optional<std::string> ExpandedValue; 1203 auto BuiltinIt = BuiltinSymbolMap.find(IDLower); 1204 if (BuiltinIt != BuiltinSymbolMap.end()) { 1205 ExpandedValue = 1206 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc()); 1207 } else { 1208 auto VarIt = Variables.find(IDLower); 1209 if (VarIt != Variables.end() && VarIt->getValue().IsText) { 1210 ExpandedValue = VarIt->getValue().TextValue; 1211 } 1212 } 1213 1214 if (!ExpandedValue) 1215 return true; 1216 std::unique_ptr<MemoryBuffer> Instantiation = 1217 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>"); 1218 1219 // Jump to the macro instantiation and prime the lexer. 1220 CurBuffer = 1221 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc()); 1222 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, 1223 /*EndStatementAtEOF=*/false); 1224 EndStatementAtEOFStack.push_back(false); 1225 Lexer.Lex(); 1226 return false; 1227 } 1228 1229 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) { 1230 if (Lexer.getTok().is(AsmToken::Error)) 1231 Error(Lexer.getErrLoc(), Lexer.getErr()); 1232 1233 // if it's a end of statement with a comment in it 1234 if (getTok().is(AsmToken::EndOfStatement)) { 1235 // if this is a line comment output it. 1236 if (!getTok().getString().empty() && getTok().getString().front() != '\n' && 1237 getTok().getString().front() != '\r' && MAI.preserveAsmComments()) 1238 Out.addExplicitComment(Twine(getTok().getString())); 1239 } 1240 1241 const AsmToken *tok = &Lexer.Lex(); 1242 bool StartOfStatement = Lexer.isAtStartOfStatement(); 1243 1244 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) { 1245 if (StartOfStatement) { 1246 AsmToken NextTok; 1247 MutableArrayRef<AsmToken> Buf(NextTok); 1248 size_t ReadCount = Lexer.peekTokens(Buf); 1249 if (ReadCount && NextTok.is(AsmToken::Identifier) && 1250 (NextTok.getString().equals_insensitive("equ") || 1251 NextTok.getString().equals_insensitive("textequ"))) { 1252 // This looks like an EQU or TEXTEQU directive; don't expand the 1253 // identifier, allowing for redefinitions. 1254 break; 1255 } 1256 } 1257 if (expandMacros()) 1258 break; 1259 } 1260 1261 // Parse comments here to be deferred until end of next statement. 1262 while (tok->is(AsmToken::Comment)) { 1263 if (MAI.preserveAsmComments()) 1264 Out.addExplicitComment(Twine(tok->getString())); 1265 tok = &Lexer.Lex(); 1266 } 1267 1268 // Recognize and bypass line continuations. 1269 while (tok->is(AsmToken::BackSlash) && 1270 peekTok().is(AsmToken::EndOfStatement)) { 1271 // Eat both the backslash and the end of statement. 1272 Lexer.Lex(); 1273 tok = &Lexer.Lex(); 1274 } 1275 1276 if (tok->is(AsmToken::Eof)) { 1277 // If this is the end of an included file, pop the parent file off the 1278 // include stack. 1279 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1280 if (ParentIncludeLoc != SMLoc()) { 1281 EndStatementAtEOFStack.pop_back(); 1282 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1283 return Lex(); 1284 } 1285 EndStatementAtEOFStack.pop_back(); 1286 assert(EndStatementAtEOFStack.empty()); 1287 } 1288 1289 return *tok; 1290 } 1291 1292 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) { 1293 AsmToken Tok; 1294 1295 MutableArrayRef<AsmToken> Buf(Tok); 1296 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace); 1297 1298 if (ReadCount == 0) { 1299 // If this is the end of an included file, pop the parent file off the 1300 // include stack. 1301 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1302 if (ParentIncludeLoc != SMLoc()) { 1303 EndStatementAtEOFStack.pop_back(); 1304 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1305 return peekTok(ShouldSkipSpace); 1306 } 1307 EndStatementAtEOFStack.pop_back(); 1308 assert(EndStatementAtEOFStack.empty()); 1309 } 1310 1311 assert(ReadCount == 1); 1312 return Tok; 1313 } 1314 1315 bool MasmParser::enabledGenDwarfForAssembly() { 1316 // Check whether the user specified -g. 1317 if (!getContext().getGenDwarfForAssembly()) 1318 return false; 1319 // If we haven't encountered any .file directives (which would imply that 1320 // the assembler source was produced with debug info already) then emit one 1321 // describing the assembler source file itself. 1322 if (getContext().getGenDwarfFileNumber() == 0) { 1323 // Use the first #line directive for this, if any. It's preprocessed, so 1324 // there is no checksum, and of course no source directive. 1325 if (!FirstCppHashFilename.empty()) 1326 getContext().setMCLineTableRootFile( 1327 /*CUID=*/0, getContext().getCompilationDir(), FirstCppHashFilename, 1328 /*Cksum=*/std::nullopt, /*Source=*/std::nullopt); 1329 const MCDwarfFile &RootFile = 1330 getContext().getMCDwarfLineTable(/*CUID=*/0).getRootFile(); 1331 getContext().setGenDwarfFileNumber(getStreamer().emitDwarfFileDirective( 1332 /*CUID=*/0, getContext().getCompilationDir(), RootFile.Name, 1333 RootFile.Checksum, RootFile.Source)); 1334 } 1335 return true; 1336 } 1337 1338 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) { 1339 // Create the initial section, if requested. 1340 if (!NoInitialTextSection) 1341 Out.initSections(false, getTargetParser().getSTI()); 1342 1343 // Prime the lexer. 1344 Lex(); 1345 1346 HadError = false; 1347 AsmCond StartingCondState = TheCondState; 1348 SmallVector<AsmRewrite, 4> AsmStrRewrites; 1349 1350 // If we are generating dwarf for assembly source files save the initial text 1351 // section. (Don't use enabledGenDwarfForAssembly() here, as we aren't 1352 // emitting any actual debug info yet and haven't had a chance to parse any 1353 // embedded .file directives.) 1354 if (getContext().getGenDwarfForAssembly()) { 1355 MCSection *Sec = getStreamer().getCurrentSectionOnly(); 1356 if (!Sec->getBeginSymbol()) { 1357 MCSymbol *SectionStartSym = getContext().createTempSymbol(); 1358 getStreamer().emitLabel(SectionStartSym); 1359 Sec->setBeginSymbol(SectionStartSym); 1360 } 1361 bool InsertResult = getContext().addGenDwarfSection(Sec); 1362 assert(InsertResult && ".text section should not have debug info yet"); 1363 (void)InsertResult; 1364 } 1365 1366 getTargetParser().onBeginOfFile(); 1367 1368 // While we have input, parse each statement. 1369 while (Lexer.isNot(AsmToken::Eof) || 1370 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) { 1371 // Skip through the EOF at the end of an inclusion. 1372 if (Lexer.is(AsmToken::Eof)) 1373 Lex(); 1374 1375 ParseStatementInfo Info(&AsmStrRewrites); 1376 bool Parsed = parseStatement(Info, nullptr); 1377 1378 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error 1379 // for printing ErrMsg via Lex() only if no (presumably better) parser error 1380 // exists. 1381 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) { 1382 Lex(); 1383 } 1384 1385 // parseStatement returned true so may need to emit an error. 1386 printPendingErrors(); 1387 1388 // Skipping to the next line if needed. 1389 if (Parsed && !getLexer().isAtStartOfStatement()) 1390 eatToEndOfStatement(); 1391 } 1392 1393 getTargetParser().onEndOfFile(); 1394 printPendingErrors(); 1395 1396 // All errors should have been emitted. 1397 assert(!hasPendingError() && "unexpected error from parseStatement"); 1398 1399 getTargetParser().flushPendingInstructions(getStreamer()); 1400 1401 if (TheCondState.TheCond != StartingCondState.TheCond || 1402 TheCondState.Ignore != StartingCondState.Ignore) 1403 printError(getTok().getLoc(), "unmatched .ifs or .elses"); 1404 // Check to see there are no empty DwarfFile slots. 1405 const auto &LineTables = getContext().getMCDwarfLineTables(); 1406 if (!LineTables.empty()) { 1407 unsigned Index = 0; 1408 for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) { 1409 if (File.Name.empty() && Index != 0) 1410 printError(getTok().getLoc(), "unassigned file number: " + 1411 Twine(Index) + 1412 " for .file directives"); 1413 ++Index; 1414 } 1415 } 1416 1417 // Check to see that all assembler local symbols were actually defined. 1418 // Targets that don't do subsections via symbols may not want this, though, 1419 // so conservatively exclude them. Only do this if we're finalizing, though, 1420 // as otherwise we won't necessarilly have seen everything yet. 1421 if (!NoFinalize) { 1422 if (MAI.hasSubsectionsViaSymbols()) { 1423 for (const auto &TableEntry : getContext().getSymbols()) { 1424 MCSymbol *Sym = TableEntry.getValue(); 1425 // Variable symbols may not be marked as defined, so check those 1426 // explicitly. If we know it's a variable, we have a definition for 1427 // the purposes of this check. 1428 if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined()) 1429 // FIXME: We would really like to refer back to where the symbol was 1430 // first referenced for a source location. We need to add something 1431 // to track that. Currently, we just point to the end of the file. 1432 printError(getTok().getLoc(), "assembler local symbol '" + 1433 Sym->getName() + "' not defined"); 1434 } 1435 } 1436 1437 // Temporary symbols like the ones for directional jumps don't go in the 1438 // symbol table. They also need to be diagnosed in all (final) cases. 1439 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) { 1440 if (std::get<2>(LocSym)->isUndefined()) { 1441 // Reset the state of any "# line file" directives we've seen to the 1442 // context as it was at the diagnostic site. 1443 CppHashInfo = std::get<1>(LocSym); 1444 printError(std::get<0>(LocSym), "directional label undefined"); 1445 } 1446 } 1447 } 1448 1449 // Finalize the output stream if there are no errors and if the client wants 1450 // us to. 1451 if (!HadError && !NoFinalize) 1452 Out.finish(Lexer.getLoc()); 1453 1454 return HadError || getContext().hadError(); 1455 } 1456 1457 bool MasmParser::checkForValidSection() { 1458 if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) { 1459 Out.initSections(false, getTargetParser().getSTI()); 1460 return Error(getTok().getLoc(), 1461 "expected section directive before assembly directive"); 1462 } 1463 return false; 1464 } 1465 1466 /// Throw away the rest of the line for testing purposes. 1467 void MasmParser::eatToEndOfStatement() { 1468 while (Lexer.isNot(AsmToken::EndOfStatement)) { 1469 if (Lexer.is(AsmToken::Eof)) { 1470 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1471 if (ParentIncludeLoc == SMLoc()) { 1472 break; 1473 } 1474 1475 EndStatementAtEOFStack.pop_back(); 1476 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1477 } 1478 1479 Lexer.Lex(); 1480 } 1481 1482 // Eat EOL. 1483 if (Lexer.is(AsmToken::EndOfStatement)) 1484 Lexer.Lex(); 1485 } 1486 1487 SmallVector<StringRef, 1> 1488 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) { 1489 SmallVector<StringRef, 1> Refs; 1490 const char *Start = getTok().getLoc().getPointer(); 1491 while (Lexer.isNot(EndTok)) { 1492 if (Lexer.is(AsmToken::Eof)) { 1493 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1494 if (ParentIncludeLoc == SMLoc()) { 1495 break; 1496 } 1497 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start); 1498 1499 EndStatementAtEOFStack.pop_back(); 1500 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1501 Lexer.Lex(); 1502 Start = getTok().getLoc().getPointer(); 1503 } else { 1504 Lexer.Lex(); 1505 } 1506 } 1507 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start); 1508 return Refs; 1509 } 1510 1511 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) { 1512 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok); 1513 std::string Str; 1514 for (StringRef S : Refs) { 1515 Str.append(S.str()); 1516 } 1517 return Str; 1518 } 1519 1520 StringRef MasmParser::parseStringToEndOfStatement() { 1521 const char *Start = getTok().getLoc().getPointer(); 1522 1523 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) 1524 Lexer.Lex(); 1525 1526 const char *End = getTok().getLoc().getPointer(); 1527 return StringRef(Start, End - Start); 1528 } 1529 1530 /// Parse a paren expression and return it. 1531 /// NOTE: This assumes the leading '(' has already been consumed. 1532 /// 1533 /// parenexpr ::= expr) 1534 /// 1535 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { 1536 if (parseExpression(Res)) 1537 return true; 1538 EndLoc = Lexer.getTok().getEndLoc(); 1539 return parseRParen(); 1540 } 1541 1542 /// Parse a bracket expression and return it. 1543 /// NOTE: This assumes the leading '[' has already been consumed. 1544 /// 1545 /// bracketexpr ::= expr] 1546 /// 1547 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { 1548 if (parseExpression(Res)) 1549 return true; 1550 EndLoc = getTok().getEndLoc(); 1551 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression")) 1552 return true; 1553 return false; 1554 } 1555 1556 /// Parse a primary expression and return it. 1557 /// primaryexpr ::= (parenexpr 1558 /// primaryexpr ::= symbol 1559 /// primaryexpr ::= number 1560 /// primaryexpr ::= '.' 1561 /// primaryexpr ::= ~,+,-,'not' primaryexpr 1562 /// primaryexpr ::= string 1563 /// (a string is interpreted as a 64-bit number in big-endian base-256) 1564 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, 1565 AsmTypeInfo *TypeInfo) { 1566 SMLoc FirstTokenLoc = getLexer().getLoc(); 1567 AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); 1568 switch (FirstTokenKind) { 1569 default: 1570 return TokError("unknown token in expression"); 1571 // If we have an error assume that we've already handled it. 1572 case AsmToken::Error: 1573 return true; 1574 case AsmToken::Exclaim: 1575 Lex(); // Eat the operator. 1576 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1577 return true; 1578 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc); 1579 return false; 1580 case AsmToken::Dollar: 1581 case AsmToken::At: 1582 case AsmToken::Identifier: { 1583 StringRef Identifier; 1584 if (parseIdentifier(Identifier)) { 1585 // We may have failed but $ may be a valid token. 1586 if (getTok().is(AsmToken::Dollar)) { 1587 if (Lexer.getMAI().getDollarIsPC()) { 1588 Lex(); 1589 // This is a '$' reference, which references the current PC. Emit a 1590 // temporary label to the streamer and refer to it. 1591 MCSymbol *Sym = Ctx.createTempSymbol(); 1592 Out.emitLabel(Sym); 1593 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, 1594 getContext()); 1595 EndLoc = FirstTokenLoc; 1596 return false; 1597 } 1598 return Error(FirstTokenLoc, "invalid token in expression"); 1599 } 1600 } 1601 // Parse named bitwise negation. 1602 if (Identifier.equals_insensitive("not")) { 1603 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1604 return true; 1605 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); 1606 return false; 1607 } 1608 // Parse directional local label references. 1609 if (Identifier.equals_insensitive("@b") || 1610 Identifier.equals_insensitive("@f")) { 1611 bool Before = Identifier.equals_insensitive("@b"); 1612 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before); 1613 if (Before && Sym->isUndefined()) 1614 return Error(FirstTokenLoc, "Expected @@ label before @B reference"); 1615 Res = MCSymbolRefExpr::create(Sym, getContext()); 1616 return false; 1617 } 1618 // Parse symbol variant. 1619 std::pair<StringRef, StringRef> Split; 1620 if (!MAI.useParensForSymbolVariant()) { 1621 if (FirstTokenKind == AsmToken::String) { 1622 if (Lexer.is(AsmToken::At)) { 1623 Lex(); // eat @ 1624 SMLoc AtLoc = getLexer().getLoc(); 1625 StringRef VName; 1626 if (parseIdentifier(VName)) 1627 return Error(AtLoc, "expected symbol variant after '@'"); 1628 1629 Split = std::make_pair(Identifier, VName); 1630 } 1631 } else { 1632 Split = Identifier.split('@'); 1633 } 1634 } else if (Lexer.is(AsmToken::LParen)) { 1635 Lex(); // eat '('. 1636 StringRef VName; 1637 parseIdentifier(VName); 1638 // eat ')'. 1639 if (parseToken(AsmToken::RParen, 1640 "unexpected token in variant, expected ')'")) 1641 return true; 1642 Split = std::make_pair(Identifier, VName); 1643 } 1644 1645 EndLoc = SMLoc::getFromPointer(Identifier.end()); 1646 1647 // This is a symbol reference. 1648 StringRef SymbolName = Identifier; 1649 if (SymbolName.empty()) 1650 return Error(getLexer().getLoc(), "expected a symbol reference"); 1651 1652 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1653 1654 // Look up the symbol variant if used. 1655 if (!Split.second.empty()) { 1656 Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); 1657 if (Variant != MCSymbolRefExpr::VK_Invalid) { 1658 SymbolName = Split.first; 1659 } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) { 1660 Variant = MCSymbolRefExpr::VK_None; 1661 } else { 1662 return Error(SMLoc::getFromPointer(Split.second.begin()), 1663 "invalid variant '" + Split.second + "'"); 1664 } 1665 } 1666 1667 // Find the field offset if used. 1668 AsmFieldInfo Info; 1669 Split = SymbolName.split('.'); 1670 if (Split.second.empty()) { 1671 } else { 1672 SymbolName = Split.first; 1673 if (lookUpField(SymbolName, Split.second, Info)) { 1674 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.'); 1675 StringRef Base = BaseMember.first, Member = BaseMember.second; 1676 lookUpField(Base, Member, Info); 1677 } else if (Structs.count(SymbolName.lower())) { 1678 // This is actually a reference to a field offset. 1679 Res = MCConstantExpr::create(Info.Offset, getContext()); 1680 return false; 1681 } 1682 } 1683 1684 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); 1685 if (!Sym) { 1686 // If this is a built-in numeric value, treat it as a constant. 1687 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower()); 1688 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end()) 1689 ? BI_NO_SYMBOL 1690 : BuiltinIt->getValue(); 1691 if (Symbol != BI_NO_SYMBOL) { 1692 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc); 1693 if (Value) { 1694 Res = Value; 1695 return false; 1696 } 1697 } 1698 1699 // Variables use case-insensitive symbol names; if this is a variable, we 1700 // find the symbol using its canonical name. 1701 auto VarIt = Variables.find(SymbolName.lower()); 1702 if (VarIt != Variables.end()) 1703 SymbolName = VarIt->second.Name; 1704 Sym = getContext().getOrCreateSymbol(SymbolName); 1705 } 1706 1707 // If this is an absolute variable reference, substitute it now to preserve 1708 // semantics in the face of reassignment. 1709 if (Sym->isVariable()) { 1710 auto V = Sym->getVariableValue(/*SetUsed=*/false); 1711 bool DoInline = isa<MCConstantExpr>(V) && !Variant; 1712 if (auto TV = dyn_cast<MCTargetExpr>(V)) 1713 DoInline = TV->inlineAssignedExpr(); 1714 if (DoInline) { 1715 if (Variant) 1716 return Error(EndLoc, "unexpected modifier on variable reference"); 1717 Res = Sym->getVariableValue(/*SetUsed=*/false); 1718 return false; 1719 } 1720 } 1721 1722 // Otherwise create a symbol ref. 1723 const MCExpr *SymRef = 1724 MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc); 1725 if (Info.Offset) { 1726 Res = MCBinaryExpr::create( 1727 MCBinaryExpr::Add, SymRef, 1728 MCConstantExpr::create(Info.Offset, getContext()), getContext()); 1729 } else { 1730 Res = SymRef; 1731 } 1732 if (TypeInfo) { 1733 if (Info.Type.Name.empty()) { 1734 auto TypeIt = KnownType.find(Identifier.lower()); 1735 if (TypeIt != KnownType.end()) { 1736 Info.Type = TypeIt->second; 1737 } 1738 } 1739 1740 *TypeInfo = Info.Type; 1741 } 1742 return false; 1743 } 1744 case AsmToken::BigNum: 1745 return TokError("literal value out of range for directive"); 1746 case AsmToken::Integer: { 1747 int64_t IntVal = getTok().getIntVal(); 1748 Res = MCConstantExpr::create(IntVal, getContext()); 1749 EndLoc = Lexer.getTok().getEndLoc(); 1750 Lex(); // Eat token. 1751 return false; 1752 } 1753 case AsmToken::String: { 1754 // MASM strings (used as constants) are interpreted as big-endian base-256. 1755 SMLoc ValueLoc = getTok().getLoc(); 1756 std::string Value; 1757 if (parseEscapedString(Value)) 1758 return true; 1759 if (Value.size() > 8) 1760 return Error(ValueLoc, "literal value out of range"); 1761 uint64_t IntValue = 0; 1762 for (const unsigned char CharVal : Value) 1763 IntValue = (IntValue << 8) | CharVal; 1764 Res = MCConstantExpr::create(IntValue, getContext()); 1765 return false; 1766 } 1767 case AsmToken::Real: { 1768 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString()); 1769 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); 1770 Res = MCConstantExpr::create(IntVal, getContext()); 1771 EndLoc = Lexer.getTok().getEndLoc(); 1772 Lex(); // Eat token. 1773 return false; 1774 } 1775 case AsmToken::Dot: { 1776 // This is a '.' reference, which references the current PC. Emit a 1777 // temporary label to the streamer and refer to it. 1778 MCSymbol *Sym = Ctx.createTempSymbol(); 1779 Out.emitLabel(Sym); 1780 Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); 1781 EndLoc = Lexer.getTok().getEndLoc(); 1782 Lex(); // Eat identifier. 1783 return false; 1784 } 1785 case AsmToken::LParen: 1786 Lex(); // Eat the '('. 1787 return parseParenExpr(Res, EndLoc); 1788 case AsmToken::LBrac: 1789 if (!PlatformParser->HasBracketExpressions()) 1790 return TokError("brackets expression not supported on this target"); 1791 Lex(); // Eat the '['. 1792 return parseBracketExpr(Res, EndLoc); 1793 case AsmToken::Minus: 1794 Lex(); // Eat the operator. 1795 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1796 return true; 1797 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc); 1798 return false; 1799 case AsmToken::Plus: 1800 Lex(); // Eat the operator. 1801 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1802 return true; 1803 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc); 1804 return false; 1805 case AsmToken::Tilde: 1806 Lex(); // Eat the operator. 1807 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1808 return true; 1809 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); 1810 return false; 1811 // MIPS unary expression operators. The lexer won't generate these tokens if 1812 // MCAsmInfo::HasMipsExpressions is false for the target. 1813 case AsmToken::PercentCall16: 1814 case AsmToken::PercentCall_Hi: 1815 case AsmToken::PercentCall_Lo: 1816 case AsmToken::PercentDtprel_Hi: 1817 case AsmToken::PercentDtprel_Lo: 1818 case AsmToken::PercentGot: 1819 case AsmToken::PercentGot_Disp: 1820 case AsmToken::PercentGot_Hi: 1821 case AsmToken::PercentGot_Lo: 1822 case AsmToken::PercentGot_Ofst: 1823 case AsmToken::PercentGot_Page: 1824 case AsmToken::PercentGottprel: 1825 case AsmToken::PercentGp_Rel: 1826 case AsmToken::PercentHi: 1827 case AsmToken::PercentHigher: 1828 case AsmToken::PercentHighest: 1829 case AsmToken::PercentLo: 1830 case AsmToken::PercentNeg: 1831 case AsmToken::PercentPcrel_Hi: 1832 case AsmToken::PercentPcrel_Lo: 1833 case AsmToken::PercentTlsgd: 1834 case AsmToken::PercentTlsldm: 1835 case AsmToken::PercentTprel_Hi: 1836 case AsmToken::PercentTprel_Lo: 1837 Lex(); // Eat the operator. 1838 if (Lexer.isNot(AsmToken::LParen)) 1839 return TokError("expected '(' after operator"); 1840 Lex(); // Eat the operator. 1841 if (parseExpression(Res, EndLoc)) 1842 return true; 1843 if (parseRParen()) 1844 return true; 1845 Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx); 1846 return !Res; 1847 } 1848 } 1849 1850 bool MasmParser::parseExpression(const MCExpr *&Res) { 1851 SMLoc EndLoc; 1852 return parseExpression(Res, EndLoc); 1853 } 1854 1855 /// This function checks if the next token is <string> type or arithmetic. 1856 /// string that begin with character '<' must end with character '>'. 1857 /// otherwise it is arithmetics. 1858 /// If the function returns a 'true' value, 1859 /// the End argument will be filled with the last location pointed to the '>' 1860 /// character. 1861 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) { 1862 assert((StrLoc.getPointer() != nullptr) && 1863 "Argument to the function cannot be a NULL value"); 1864 const char *CharPtr = StrLoc.getPointer(); 1865 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') && 1866 (*CharPtr != '\0')) { 1867 if (*CharPtr == '!') 1868 CharPtr++; 1869 CharPtr++; 1870 } 1871 if (*CharPtr == '>') { 1872 EndLoc = StrLoc.getFromPointer(CharPtr + 1); 1873 return true; 1874 } 1875 return false; 1876 } 1877 1878 /// creating a string without the escape characters '!'. 1879 static std::string angleBracketString(StringRef BracketContents) { 1880 std::string Res; 1881 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) { 1882 if (BracketContents[Pos] == '!') 1883 Pos++; 1884 Res += BracketContents[Pos]; 1885 } 1886 return Res; 1887 } 1888 1889 /// Parse an expression and return it. 1890 /// 1891 /// expr ::= expr &&,|| expr -> lowest. 1892 /// expr ::= expr |,^,&,! expr 1893 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr 1894 /// expr ::= expr <<,>> expr 1895 /// expr ::= expr +,- expr 1896 /// expr ::= expr *,/,% expr -> highest. 1897 /// expr ::= primaryexpr 1898 /// 1899 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { 1900 // Parse the expression. 1901 Res = nullptr; 1902 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) || 1903 parseBinOpRHS(1, Res, EndLoc)) 1904 return true; 1905 1906 // Try to constant fold it up front, if possible. Do not exploit 1907 // assembler here. 1908 int64_t Value; 1909 if (Res->evaluateAsAbsolute(Value)) 1910 Res = MCConstantExpr::create(Value, getContext()); 1911 1912 return false; 1913 } 1914 1915 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { 1916 Res = nullptr; 1917 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc); 1918 } 1919 1920 bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res, 1921 SMLoc &EndLoc) { 1922 if (parseParenExpr(Res, EndLoc)) 1923 return true; 1924 1925 for (; ParenDepth > 0; --ParenDepth) { 1926 if (parseBinOpRHS(1, Res, EndLoc)) 1927 return true; 1928 1929 // We don't Lex() the last RParen. 1930 // This is the same behavior as parseParenExpression(). 1931 if (ParenDepth - 1 > 0) { 1932 EndLoc = getTok().getEndLoc(); 1933 if (parseRParen()) 1934 return true; 1935 } 1936 } 1937 return false; 1938 } 1939 1940 bool MasmParser::parseAbsoluteExpression(int64_t &Res) { 1941 const MCExpr *Expr; 1942 1943 SMLoc StartLoc = Lexer.getLoc(); 1944 if (parseExpression(Expr)) 1945 return true; 1946 1947 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr())) 1948 return Error(StartLoc, "expected absolute expression"); 1949 1950 return false; 1951 } 1952 1953 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K, 1954 MCBinaryExpr::Opcode &Kind, 1955 bool ShouldUseLogicalShr, 1956 bool EndExpressionAtGreater) { 1957 switch (K) { 1958 default: 1959 return 0; // not a binop. 1960 1961 // Lowest Precedence: &&, || 1962 case AsmToken::AmpAmp: 1963 Kind = MCBinaryExpr::LAnd; 1964 return 2; 1965 case AsmToken::PipePipe: 1966 Kind = MCBinaryExpr::LOr; 1967 return 1; 1968 1969 // Low Precedence: ==, !=, <>, <, <=, >, >= 1970 case AsmToken::EqualEqual: 1971 Kind = MCBinaryExpr::EQ; 1972 return 3; 1973 case AsmToken::ExclaimEqual: 1974 case AsmToken::LessGreater: 1975 Kind = MCBinaryExpr::NE; 1976 return 3; 1977 case AsmToken::Less: 1978 Kind = MCBinaryExpr::LT; 1979 return 3; 1980 case AsmToken::LessEqual: 1981 Kind = MCBinaryExpr::LTE; 1982 return 3; 1983 case AsmToken::Greater: 1984 if (EndExpressionAtGreater) 1985 return 0; 1986 Kind = MCBinaryExpr::GT; 1987 return 3; 1988 case AsmToken::GreaterEqual: 1989 Kind = MCBinaryExpr::GTE; 1990 return 3; 1991 1992 // Low Intermediate Precedence: +, - 1993 case AsmToken::Plus: 1994 Kind = MCBinaryExpr::Add; 1995 return 4; 1996 case AsmToken::Minus: 1997 Kind = MCBinaryExpr::Sub; 1998 return 4; 1999 2000 // High Intermediate Precedence: |, &, ^ 2001 case AsmToken::Pipe: 2002 Kind = MCBinaryExpr::Or; 2003 return 5; 2004 case AsmToken::Caret: 2005 Kind = MCBinaryExpr::Xor; 2006 return 5; 2007 case AsmToken::Amp: 2008 Kind = MCBinaryExpr::And; 2009 return 5; 2010 2011 // Highest Precedence: *, /, %, <<, >> 2012 case AsmToken::Star: 2013 Kind = MCBinaryExpr::Mul; 2014 return 6; 2015 case AsmToken::Slash: 2016 Kind = MCBinaryExpr::Div; 2017 return 6; 2018 case AsmToken::Percent: 2019 Kind = MCBinaryExpr::Mod; 2020 return 6; 2021 case AsmToken::LessLess: 2022 Kind = MCBinaryExpr::Shl; 2023 return 6; 2024 case AsmToken::GreaterGreater: 2025 if (EndExpressionAtGreater) 2026 return 0; 2027 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr; 2028 return 6; 2029 } 2030 } 2031 2032 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K, 2033 MCBinaryExpr::Opcode &Kind) { 2034 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr(); 2035 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr, 2036 AngleBracketDepth > 0); 2037 } 2038 2039 /// Parse all binary operators with precedence >= 'Precedence'. 2040 /// Res contains the LHS of the expression on input. 2041 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, 2042 SMLoc &EndLoc) { 2043 SMLoc StartLoc = Lexer.getLoc(); 2044 while (true) { 2045 AsmToken::TokenKind TokKind = Lexer.getKind(); 2046 if (Lexer.getKind() == AsmToken::Identifier) { 2047 TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString()) 2048 .CaseLower("and", AsmToken::Amp) 2049 .CaseLower("not", AsmToken::Exclaim) 2050 .CaseLower("or", AsmToken::Pipe) 2051 .CaseLower("xor", AsmToken::Caret) 2052 .CaseLower("shl", AsmToken::LessLess) 2053 .CaseLower("shr", AsmToken::GreaterGreater) 2054 .CaseLower("eq", AsmToken::EqualEqual) 2055 .CaseLower("ne", AsmToken::ExclaimEqual) 2056 .CaseLower("lt", AsmToken::Less) 2057 .CaseLower("le", AsmToken::LessEqual) 2058 .CaseLower("gt", AsmToken::Greater) 2059 .CaseLower("ge", AsmToken::GreaterEqual) 2060 .Default(TokKind); 2061 } 2062 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; 2063 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind); 2064 2065 // If the next token is lower precedence than we are allowed to eat, return 2066 // successfully with what we ate already. 2067 if (TokPrec < Precedence) 2068 return false; 2069 2070 Lex(); 2071 2072 // Eat the next primary expression. 2073 const MCExpr *RHS; 2074 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc)) 2075 return true; 2076 2077 // If BinOp binds less tightly with RHS than the operator after RHS, let 2078 // the pending operator take RHS as its LHS. 2079 MCBinaryExpr::Opcode Dummy; 2080 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); 2081 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc)) 2082 return true; 2083 2084 // Merge LHS and RHS according to operator. 2085 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc); 2086 } 2087 } 2088 2089 /// ParseStatement: 2090 /// ::= % statement 2091 /// ::= EndOfStatement 2092 /// ::= Label* Directive ...Operands... EndOfStatement 2093 /// ::= Label* Identifier OperandList* EndOfStatement 2094 bool MasmParser::parseStatement(ParseStatementInfo &Info, 2095 MCAsmParserSemaCallback *SI) { 2096 assert(!hasPendingError() && "parseStatement started with pending error"); 2097 // Eat initial spaces and comments. 2098 while (Lexer.is(AsmToken::Space)) 2099 Lex(); 2100 if (Lexer.is(AsmToken::EndOfStatement)) { 2101 // If this is a line comment we can drop it safely. 2102 if (getTok().getString().empty() || getTok().getString().front() == '\r' || 2103 getTok().getString().front() == '\n') 2104 Out.addBlankLine(); 2105 Lex(); 2106 return false; 2107 } 2108 2109 // If preceded by an expansion operator, first expand all text macros and 2110 // macro functions. 2111 if (getTok().is(AsmToken::Percent)) { 2112 SMLoc ExpansionLoc = getTok().getLoc(); 2113 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc)) 2114 return true; 2115 } 2116 2117 // Statements always start with an identifier, unless we're dealing with a 2118 // processor directive (.386, .686, etc.) that lexes as a real. 2119 AsmToken ID = getTok(); 2120 SMLoc IDLoc = ID.getLoc(); 2121 StringRef IDVal; 2122 if (Lexer.is(AsmToken::HashDirective)) 2123 return parseCppHashLineFilenameComment(IDLoc); 2124 if (Lexer.is(AsmToken::Dot)) { 2125 // Treat '.' as a valid identifier in this context. 2126 Lex(); 2127 IDVal = "."; 2128 } else if (Lexer.is(AsmToken::LCurly)) { 2129 // Treat '{' as a valid identifier in this context. 2130 Lex(); 2131 IDVal = "{"; 2132 2133 } else if (Lexer.is(AsmToken::RCurly)) { 2134 // Treat '}' as a valid identifier in this context. 2135 Lex(); 2136 IDVal = "}"; 2137 } else if (Lexer.is(AsmToken::Star) && 2138 getTargetParser().starIsStartOfStatement()) { 2139 // Accept '*' as a valid start of statement. 2140 Lex(); 2141 IDVal = "*"; 2142 } else if (Lexer.is(AsmToken::Real)) { 2143 // Treat ".<number>" as a valid identifier in this context. 2144 IDVal = getTok().getString(); 2145 Lex(); // always eat a token 2146 if (!IDVal.startswith(".")) 2147 return Error(IDLoc, "unexpected token at start of statement"); 2148 } else if (parseIdentifier(IDVal, StartOfStatement)) { 2149 if (!TheCondState.Ignore) { 2150 Lex(); // always eat a token 2151 return Error(IDLoc, "unexpected token at start of statement"); 2152 } 2153 IDVal = ""; 2154 } 2155 2156 // Handle conditional assembly here before checking for skipping. We 2157 // have to do this so that .endif isn't skipped in a ".if 0" block for 2158 // example. 2159 StringMap<DirectiveKind>::const_iterator DirKindIt = 2160 DirectiveKindMap.find(IDVal.lower()); 2161 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end()) 2162 ? DK_NO_DIRECTIVE 2163 : DirKindIt->getValue(); 2164 switch (DirKind) { 2165 default: 2166 break; 2167 case DK_IF: 2168 case DK_IFE: 2169 return parseDirectiveIf(IDLoc, DirKind); 2170 case DK_IFB: 2171 return parseDirectiveIfb(IDLoc, true); 2172 case DK_IFNB: 2173 return parseDirectiveIfb(IDLoc, false); 2174 case DK_IFDEF: 2175 return parseDirectiveIfdef(IDLoc, true); 2176 case DK_IFNDEF: 2177 return parseDirectiveIfdef(IDLoc, false); 2178 case DK_IFDIF: 2179 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false, 2180 /*CaseInsensitive=*/false); 2181 case DK_IFDIFI: 2182 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false, 2183 /*CaseInsensitive=*/true); 2184 case DK_IFIDN: 2185 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true, 2186 /*CaseInsensitive=*/false); 2187 case DK_IFIDNI: 2188 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true, 2189 /*CaseInsensitive=*/true); 2190 case DK_ELSEIF: 2191 case DK_ELSEIFE: 2192 return parseDirectiveElseIf(IDLoc, DirKind); 2193 case DK_ELSEIFB: 2194 return parseDirectiveElseIfb(IDLoc, true); 2195 case DK_ELSEIFNB: 2196 return parseDirectiveElseIfb(IDLoc, false); 2197 case DK_ELSEIFDEF: 2198 return parseDirectiveElseIfdef(IDLoc, true); 2199 case DK_ELSEIFNDEF: 2200 return parseDirectiveElseIfdef(IDLoc, false); 2201 case DK_ELSEIFDIF: 2202 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false, 2203 /*CaseInsensitive=*/false); 2204 case DK_ELSEIFDIFI: 2205 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false, 2206 /*CaseInsensitive=*/true); 2207 case DK_ELSEIFIDN: 2208 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true, 2209 /*CaseInsensitive=*/false); 2210 case DK_ELSEIFIDNI: 2211 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true, 2212 /*CaseInsensitive=*/true); 2213 case DK_ELSE: 2214 return parseDirectiveElse(IDLoc); 2215 case DK_ENDIF: 2216 return parseDirectiveEndIf(IDLoc); 2217 } 2218 2219 // Ignore the statement if in the middle of inactive conditional 2220 // (e.g. ".if 0"). 2221 if (TheCondState.Ignore) { 2222 eatToEndOfStatement(); 2223 return false; 2224 } 2225 2226 // FIXME: Recurse on local labels? 2227 2228 // Check for a label. 2229 // ::= identifier ':' 2230 // ::= number ':' 2231 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) { 2232 if (checkForValidSection()) 2233 return true; 2234 2235 // identifier ':' -> Label. 2236 Lex(); 2237 2238 // Diagnose attempt to use '.' as a label. 2239 if (IDVal == ".") 2240 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label"); 2241 2242 // Diagnose attempt to use a variable as a label. 2243 // 2244 // FIXME: Diagnostics. Note the location of the definition as a label. 2245 // FIXME: This doesn't diagnose assignment to a symbol which has been 2246 // implicitly marked as external. 2247 MCSymbol *Sym; 2248 if (ParsingMSInlineAsm && SI) { 2249 StringRef RewrittenLabel = 2250 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true); 2251 assert(!RewrittenLabel.empty() && 2252 "We should have an internal name here."); 2253 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(), 2254 RewrittenLabel); 2255 IDVal = RewrittenLabel; 2256 } 2257 // Handle directional local labels 2258 if (IDVal == "@@") { 2259 Sym = Ctx.createDirectionalLocalSymbol(0); 2260 } else { 2261 Sym = getContext().getOrCreateSymbol(IDVal); 2262 } 2263 2264 // End of Labels should be treated as end of line for lexing 2265 // purposes but that information is not available to the Lexer who 2266 // does not understand Labels. This may cause us to see a Hash 2267 // here instead of a preprocessor line comment. 2268 if (getTok().is(AsmToken::Hash)) { 2269 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement); 2270 Lexer.Lex(); 2271 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr)); 2272 } 2273 2274 // Consume any end of statement token, if present, to avoid spurious 2275 // addBlankLine calls(). 2276 if (getTok().is(AsmToken::EndOfStatement)) { 2277 Lex(); 2278 } 2279 2280 getTargetParser().doBeforeLabelEmit(Sym, IDLoc); 2281 2282 // Emit the label. 2283 if (!getTargetParser().isParsingMSInlineAsm()) 2284 Out.emitLabel(Sym, IDLoc); 2285 2286 // If we are generating dwarf for assembly source files then gather the 2287 // info to make a dwarf label entry for this label if needed. 2288 if (enabledGenDwarfForAssembly()) 2289 MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(), 2290 IDLoc); 2291 2292 getTargetParser().onLabelParsed(Sym); 2293 2294 return false; 2295 } 2296 2297 // If macros are enabled, check to see if this is a macro instantiation. 2298 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) { 2299 return handleMacroEntry(M, IDLoc); 2300 } 2301 2302 // Otherwise, we have a normal instruction or directive. 2303 2304 if (DirKind != DK_NO_DIRECTIVE) { 2305 // There are several entities interested in parsing directives: 2306 // 2307 // 1. Asm parser extensions. For example, platform-specific parsers 2308 // (like the ELF parser) register themselves as extensions. 2309 // 2. The target-specific assembly parser. Some directives are target 2310 // specific or may potentially behave differently on certain targets. 2311 // 3. The generic directive parser implemented by this class. These are 2312 // all the directives that behave in a target and platform independent 2313 // manner, or at least have a default behavior that's shared between 2314 // all targets and platforms. 2315 2316 getTargetParser().flushPendingInstructions(getStreamer()); 2317 2318 // Special-case handling of structure-end directives at higher priority, 2319 // since ENDS is overloaded as a segment-end directive. 2320 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 && 2321 getTok().is(AsmToken::EndOfStatement)) { 2322 return parseDirectiveNestedEnds(); 2323 } 2324 2325 // First, check the extension directive map to see if any extension has 2326 // registered itself to parse this directive. 2327 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler = 2328 ExtensionDirectiveMap.lookup(IDVal.lower()); 2329 if (Handler.first) 2330 return (*Handler.second)(Handler.first, IDVal, IDLoc); 2331 2332 // Next, let the target-specific assembly parser try. 2333 SMLoc StartTokLoc = getTok().getLoc(); 2334 bool TPDirectiveReturn = 2335 ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID); 2336 2337 if (hasPendingError()) 2338 return true; 2339 // Currently the return value should be true if we are 2340 // uninterested but as this is at odds with the standard parsing 2341 // convention (return true = error) we have instances of a parsed 2342 // directive that fails returning true as an error. Catch these 2343 // cases as best as possible errors here. 2344 if (TPDirectiveReturn && StartTokLoc != getTok().getLoc()) 2345 return true; 2346 // Return if we did some parsing or believe we succeeded. 2347 if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc()) 2348 return false; 2349 2350 // Finally, if no one else is interested in this directive, it must be 2351 // generic and familiar to this class. 2352 switch (DirKind) { 2353 default: 2354 break; 2355 case DK_ASCII: 2356 return parseDirectiveAscii(IDVal, false); 2357 case DK_ASCIZ: 2358 case DK_STRING: 2359 return parseDirectiveAscii(IDVal, true); 2360 case DK_BYTE: 2361 case DK_SBYTE: 2362 case DK_DB: 2363 return parseDirectiveValue(IDVal, 1); 2364 case DK_WORD: 2365 case DK_SWORD: 2366 case DK_DW: 2367 return parseDirectiveValue(IDVal, 2); 2368 case DK_DWORD: 2369 case DK_SDWORD: 2370 case DK_DD: 2371 return parseDirectiveValue(IDVal, 4); 2372 case DK_FWORD: 2373 case DK_DF: 2374 return parseDirectiveValue(IDVal, 6); 2375 case DK_QWORD: 2376 case DK_SQWORD: 2377 case DK_DQ: 2378 return parseDirectiveValue(IDVal, 8); 2379 case DK_REAL4: 2380 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4); 2381 case DK_REAL8: 2382 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8); 2383 case DK_REAL10: 2384 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10); 2385 case DK_STRUCT: 2386 case DK_UNION: 2387 return parseDirectiveNestedStruct(IDVal, DirKind); 2388 case DK_ENDS: 2389 return parseDirectiveNestedEnds(); 2390 case DK_ALIGN: 2391 return parseDirectiveAlign(); 2392 case DK_EVEN: 2393 return parseDirectiveEven(); 2394 case DK_ORG: 2395 return parseDirectiveOrg(); 2396 case DK_EXTERN: 2397 return parseDirectiveExtern(); 2398 case DK_PUBLIC: 2399 return parseDirectiveSymbolAttribute(MCSA_Global); 2400 case DK_COMM: 2401 return parseDirectiveComm(/*IsLocal=*/false); 2402 case DK_COMMENT: 2403 return parseDirectiveComment(IDLoc); 2404 case DK_INCLUDE: 2405 return parseDirectiveInclude(); 2406 case DK_REPEAT: 2407 return parseDirectiveRepeat(IDLoc, IDVal); 2408 case DK_WHILE: 2409 return parseDirectiveWhile(IDLoc); 2410 case DK_FOR: 2411 return parseDirectiveFor(IDLoc, IDVal); 2412 case DK_FORC: 2413 return parseDirectiveForc(IDLoc, IDVal); 2414 case DK_FILE: 2415 return parseDirectiveFile(IDLoc); 2416 case DK_LINE: 2417 return parseDirectiveLine(); 2418 case DK_LOC: 2419 return parseDirectiveLoc(); 2420 case DK_STABS: 2421 return parseDirectiveStabs(); 2422 case DK_CV_FILE: 2423 return parseDirectiveCVFile(); 2424 case DK_CV_FUNC_ID: 2425 return parseDirectiveCVFuncId(); 2426 case DK_CV_INLINE_SITE_ID: 2427 return parseDirectiveCVInlineSiteId(); 2428 case DK_CV_LOC: 2429 return parseDirectiveCVLoc(); 2430 case DK_CV_LINETABLE: 2431 return parseDirectiveCVLinetable(); 2432 case DK_CV_INLINE_LINETABLE: 2433 return parseDirectiveCVInlineLinetable(); 2434 case DK_CV_DEF_RANGE: 2435 return parseDirectiveCVDefRange(); 2436 case DK_CV_STRING: 2437 return parseDirectiveCVString(); 2438 case DK_CV_STRINGTABLE: 2439 return parseDirectiveCVStringTable(); 2440 case DK_CV_FILECHECKSUMS: 2441 return parseDirectiveCVFileChecksums(); 2442 case DK_CV_FILECHECKSUM_OFFSET: 2443 return parseDirectiveCVFileChecksumOffset(); 2444 case DK_CV_FPO_DATA: 2445 return parseDirectiveCVFPOData(); 2446 case DK_CFI_SECTIONS: 2447 return parseDirectiveCFISections(); 2448 case DK_CFI_STARTPROC: 2449 return parseDirectiveCFIStartProc(); 2450 case DK_CFI_ENDPROC: 2451 return parseDirectiveCFIEndProc(); 2452 case DK_CFI_DEF_CFA: 2453 return parseDirectiveCFIDefCfa(IDLoc); 2454 case DK_CFI_DEF_CFA_OFFSET: 2455 return parseDirectiveCFIDefCfaOffset(); 2456 case DK_CFI_ADJUST_CFA_OFFSET: 2457 return parseDirectiveCFIAdjustCfaOffset(); 2458 case DK_CFI_DEF_CFA_REGISTER: 2459 return parseDirectiveCFIDefCfaRegister(IDLoc); 2460 case DK_CFI_OFFSET: 2461 return parseDirectiveCFIOffset(IDLoc); 2462 case DK_CFI_REL_OFFSET: 2463 return parseDirectiveCFIRelOffset(IDLoc); 2464 case DK_CFI_PERSONALITY: 2465 return parseDirectiveCFIPersonalityOrLsda(true); 2466 case DK_CFI_LSDA: 2467 return parseDirectiveCFIPersonalityOrLsda(false); 2468 case DK_CFI_REMEMBER_STATE: 2469 return parseDirectiveCFIRememberState(); 2470 case DK_CFI_RESTORE_STATE: 2471 return parseDirectiveCFIRestoreState(); 2472 case DK_CFI_SAME_VALUE: 2473 return parseDirectiveCFISameValue(IDLoc); 2474 case DK_CFI_RESTORE: 2475 return parseDirectiveCFIRestore(IDLoc); 2476 case DK_CFI_ESCAPE: 2477 return parseDirectiveCFIEscape(); 2478 case DK_CFI_RETURN_COLUMN: 2479 return parseDirectiveCFIReturnColumn(IDLoc); 2480 case DK_CFI_SIGNAL_FRAME: 2481 return parseDirectiveCFISignalFrame(); 2482 case DK_CFI_UNDEFINED: 2483 return parseDirectiveCFIUndefined(IDLoc); 2484 case DK_CFI_REGISTER: 2485 return parseDirectiveCFIRegister(IDLoc); 2486 case DK_CFI_WINDOW_SAVE: 2487 return parseDirectiveCFIWindowSave(); 2488 case DK_EXITM: 2489 Info.ExitValue = ""; 2490 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue); 2491 case DK_ENDM: 2492 Info.ExitValue = ""; 2493 return parseDirectiveEndMacro(IDVal); 2494 case DK_PURGE: 2495 return parseDirectivePurgeMacro(IDLoc); 2496 case DK_END: 2497 return parseDirectiveEnd(IDLoc); 2498 case DK_ERR: 2499 return parseDirectiveError(IDLoc); 2500 case DK_ERRB: 2501 return parseDirectiveErrorIfb(IDLoc, true); 2502 case DK_ERRNB: 2503 return parseDirectiveErrorIfb(IDLoc, false); 2504 case DK_ERRDEF: 2505 return parseDirectiveErrorIfdef(IDLoc, true); 2506 case DK_ERRNDEF: 2507 return parseDirectiveErrorIfdef(IDLoc, false); 2508 case DK_ERRDIF: 2509 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false, 2510 /*CaseInsensitive=*/false); 2511 case DK_ERRDIFI: 2512 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false, 2513 /*CaseInsensitive=*/true); 2514 case DK_ERRIDN: 2515 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true, 2516 /*CaseInsensitive=*/false); 2517 case DK_ERRIDNI: 2518 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true, 2519 /*CaseInsensitive=*/true); 2520 case DK_ERRE: 2521 return parseDirectiveErrorIfe(IDLoc, true); 2522 case DK_ERRNZ: 2523 return parseDirectiveErrorIfe(IDLoc, false); 2524 case DK_RADIX: 2525 return parseDirectiveRadix(IDLoc); 2526 case DK_ECHO: 2527 return parseDirectiveEcho(IDLoc); 2528 } 2529 2530 return Error(IDLoc, "unknown directive"); 2531 } 2532 2533 // We also check if this is allocating memory with user-defined type. 2534 auto IDIt = Structs.find(IDVal.lower()); 2535 if (IDIt != Structs.end()) 2536 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal, 2537 IDLoc); 2538 2539 // Non-conditional Microsoft directives sometimes follow their first argument. 2540 const AsmToken nextTok = getTok(); 2541 const StringRef nextVal = nextTok.getString(); 2542 const SMLoc nextLoc = nextTok.getLoc(); 2543 2544 const AsmToken afterNextTok = peekTok(); 2545 2546 // There are several entities interested in parsing infix directives: 2547 // 2548 // 1. Asm parser extensions. For example, platform-specific parsers 2549 // (like the ELF parser) register themselves as extensions. 2550 // 2. The generic directive parser implemented by this class. These are 2551 // all the directives that behave in a target and platform independent 2552 // manner, or at least have a default behavior that's shared between 2553 // all targets and platforms. 2554 2555 getTargetParser().flushPendingInstructions(getStreamer()); 2556 2557 // Special-case handling of structure-end directives at higher priority, since 2558 // ENDS is overloaded as a segment-end directive. 2559 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) { 2560 Lex(); 2561 return parseDirectiveEnds(IDVal, IDLoc); 2562 } 2563 2564 // First, check the extension directive map to see if any extension has 2565 // registered itself to parse this directive. 2566 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler = 2567 ExtensionDirectiveMap.lookup(nextVal.lower()); 2568 if (Handler.first) { 2569 Lex(); 2570 Lexer.UnLex(ID); 2571 return (*Handler.second)(Handler.first, nextVal, nextLoc); 2572 } 2573 2574 // If no one else is interested in this directive, it must be 2575 // generic and familiar to this class. 2576 DirKindIt = DirectiveKindMap.find(nextVal.lower()); 2577 DirKind = (DirKindIt == DirectiveKindMap.end()) 2578 ? DK_NO_DIRECTIVE 2579 : DirKindIt->getValue(); 2580 switch (DirKind) { 2581 default: 2582 break; 2583 case DK_ASSIGN: 2584 case DK_EQU: 2585 case DK_TEXTEQU: 2586 Lex(); 2587 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc); 2588 case DK_BYTE: 2589 if (afterNextTok.is(AsmToken::Identifier) && 2590 afterNextTok.getString().equals_insensitive("ptr")) { 2591 // Size directive; part of an instruction. 2592 break; 2593 } 2594 [[fallthrough]]; 2595 case DK_SBYTE: 2596 case DK_DB: 2597 Lex(); 2598 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc); 2599 case DK_WORD: 2600 if (afterNextTok.is(AsmToken::Identifier) && 2601 afterNextTok.getString().equals_insensitive("ptr")) { 2602 // Size directive; part of an instruction. 2603 break; 2604 } 2605 [[fallthrough]]; 2606 case DK_SWORD: 2607 case DK_DW: 2608 Lex(); 2609 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc); 2610 case DK_DWORD: 2611 if (afterNextTok.is(AsmToken::Identifier) && 2612 afterNextTok.getString().equals_insensitive("ptr")) { 2613 // Size directive; part of an instruction. 2614 break; 2615 } 2616 [[fallthrough]]; 2617 case DK_SDWORD: 2618 case DK_DD: 2619 Lex(); 2620 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc); 2621 case DK_FWORD: 2622 if (afterNextTok.is(AsmToken::Identifier) && 2623 afterNextTok.getString().equals_insensitive("ptr")) { 2624 // Size directive; part of an instruction. 2625 break; 2626 } 2627 [[fallthrough]]; 2628 case DK_DF: 2629 Lex(); 2630 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc); 2631 case DK_QWORD: 2632 if (afterNextTok.is(AsmToken::Identifier) && 2633 afterNextTok.getString().equals_insensitive("ptr")) { 2634 // Size directive; part of an instruction. 2635 break; 2636 } 2637 [[fallthrough]]; 2638 case DK_SQWORD: 2639 case DK_DQ: 2640 Lex(); 2641 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); 2642 case DK_REAL4: 2643 Lex(); 2644 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4, 2645 IDVal, IDLoc); 2646 case DK_REAL8: 2647 Lex(); 2648 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8, 2649 IDVal, IDLoc); 2650 case DK_REAL10: 2651 Lex(); 2652 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(), 2653 10, IDVal, IDLoc); 2654 case DK_STRUCT: 2655 case DK_UNION: 2656 Lex(); 2657 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc); 2658 case DK_ENDS: 2659 Lex(); 2660 return parseDirectiveEnds(IDVal, IDLoc); 2661 case DK_MACRO: 2662 Lex(); 2663 return parseDirectiveMacro(IDVal, IDLoc); 2664 } 2665 2666 // Finally, we check if this is allocating a variable with user-defined type. 2667 auto NextIt = Structs.find(nextVal.lower()); 2668 if (NextIt != Structs.end()) { 2669 Lex(); 2670 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(), 2671 nextVal, nextLoc, IDVal); 2672 } 2673 2674 // __asm _emit or __asm __emit 2675 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" || 2676 IDVal == "_EMIT" || IDVal == "__EMIT")) 2677 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size()); 2678 2679 // __asm align 2680 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN")) 2681 return parseDirectiveMSAlign(IDLoc, Info); 2682 2683 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN")) 2684 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4); 2685 if (checkForValidSection()) 2686 return true; 2687 2688 // Canonicalize the opcode to lower case. 2689 std::string OpcodeStr = IDVal.lower(); 2690 ParseInstructionInfo IInfo(Info.AsmRewrites); 2691 bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID, 2692 Info.ParsedOperands); 2693 Info.ParseError = ParseHadError; 2694 2695 // Dump the parsed representation, if requested. 2696 if (getShowParsedOperands()) { 2697 SmallString<256> Str; 2698 raw_svector_ostream OS(Str); 2699 OS << "parsed instruction: ["; 2700 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) { 2701 if (i != 0) 2702 OS << ", "; 2703 Info.ParsedOperands[i]->print(OS); 2704 } 2705 OS << "]"; 2706 2707 printMessage(IDLoc, SourceMgr::DK_Note, OS.str()); 2708 } 2709 2710 // Fail even if ParseInstruction erroneously returns false. 2711 if (hasPendingError() || ParseHadError) 2712 return true; 2713 2714 // If we are generating dwarf for the current section then generate a .loc 2715 // directive for the instruction. 2716 if (!ParseHadError && enabledGenDwarfForAssembly() && 2717 getContext().getGenDwarfSectionSyms().count( 2718 getStreamer().getCurrentSectionOnly())) { 2719 unsigned Line; 2720 if (ActiveMacros.empty()) 2721 Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); 2722 else 2723 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc, 2724 ActiveMacros.front()->ExitBuffer); 2725 2726 // If we previously parsed a cpp hash file line comment then make sure the 2727 // current Dwarf File is for the CppHashFilename if not then emit the 2728 // Dwarf File table for it and adjust the line number for the .loc. 2729 if (!CppHashInfo.Filename.empty()) { 2730 unsigned FileNumber = getStreamer().emitDwarfFileDirective( 2731 0, StringRef(), CppHashInfo.Filename); 2732 getContext().setGenDwarfFileNumber(FileNumber); 2733 2734 unsigned CppHashLocLineNo = 2735 SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf); 2736 Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo); 2737 } 2738 2739 getStreamer().emitDwarfLocDirective( 2740 getContext().getGenDwarfFileNumber(), Line, 0, 2741 DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0, 2742 StringRef()); 2743 } 2744 2745 // If parsing succeeded, match the instruction. 2746 if (!ParseHadError) { 2747 uint64_t ErrorInfo; 2748 if (getTargetParser().MatchAndEmitInstruction( 2749 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, 2750 getTargetParser().isParsingMSInlineAsm())) 2751 return true; 2752 } 2753 return false; 2754 } 2755 2756 // Parse and erase curly braces marking block start/end. 2757 bool MasmParser::parseCurlyBlockScope( 2758 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) { 2759 // Identify curly brace marking block start/end. 2760 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly)) 2761 return false; 2762 2763 SMLoc StartLoc = Lexer.getLoc(); 2764 Lex(); // Eat the brace. 2765 if (Lexer.is(AsmToken::EndOfStatement)) 2766 Lex(); // Eat EndOfStatement following the brace. 2767 2768 // Erase the block start/end brace from the output asm string. 2769 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() - 2770 StartLoc.getPointer()); 2771 return true; 2772 } 2773 2774 /// parseCppHashLineFilenameComment as this: 2775 /// ::= # number "filename" 2776 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) { 2777 Lex(); // Eat the hash token. 2778 // Lexer only ever emits HashDirective if it fully formed if it's 2779 // done the checking already so this is an internal error. 2780 assert(getTok().is(AsmToken::Integer) && 2781 "Lexing Cpp line comment: Expected Integer"); 2782 int64_t LineNumber = getTok().getIntVal(); 2783 Lex(); 2784 assert(getTok().is(AsmToken::String) && 2785 "Lexing Cpp line comment: Expected String"); 2786 StringRef Filename = getTok().getString(); 2787 Lex(); 2788 2789 // Get rid of the enclosing quotes. 2790 Filename = Filename.substr(1, Filename.size() - 2); 2791 2792 // Save the SMLoc, Filename and LineNumber for later use by diagnostics 2793 // and possibly DWARF file info. 2794 CppHashInfo.Loc = L; 2795 CppHashInfo.Filename = Filename; 2796 CppHashInfo.LineNumber = LineNumber; 2797 CppHashInfo.Buf = CurBuffer; 2798 if (FirstCppHashFilename.empty()) 2799 FirstCppHashFilename = Filename; 2800 return false; 2801 } 2802 2803 /// will use the last parsed cpp hash line filename comment 2804 /// for the Filename and LineNo if any in the diagnostic. 2805 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { 2806 const MasmParser *Parser = static_cast<const MasmParser *>(Context); 2807 raw_ostream &OS = errs(); 2808 2809 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr(); 2810 SMLoc DiagLoc = Diag.getLoc(); 2811 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); 2812 unsigned CppHashBuf = 2813 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc); 2814 2815 // Like SourceMgr::printMessage() we need to print the include stack if any 2816 // before printing the message. 2817 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); 2818 if (!Parser->SavedDiagHandler && DiagCurBuffer && 2819 DiagCurBuffer != DiagSrcMgr.getMainFileID()) { 2820 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer); 2821 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS); 2822 } 2823 2824 // If we have not parsed a cpp hash line filename comment or the source 2825 // manager changed or buffer changed (like in a nested include) then just 2826 // print the normal diagnostic using its Filename and LineNo. 2827 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr || 2828 DiagBuf != CppHashBuf) { 2829 if (Parser->SavedDiagHandler) 2830 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext); 2831 else 2832 Diag.print(nullptr, OS); 2833 return; 2834 } 2835 2836 // Use the CppHashFilename and calculate a line number based on the 2837 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc 2838 // for the diagnostic. 2839 const std::string &Filename = std::string(Parser->CppHashInfo.Filename); 2840 2841 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf); 2842 int CppHashLocLineNo = 2843 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf); 2844 int LineNo = 2845 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo); 2846 2847 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo, 2848 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(), 2849 Diag.getLineContents(), Diag.getRanges()); 2850 2851 if (Parser->SavedDiagHandler) 2852 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext); 2853 else 2854 NewDiag.print(nullptr, OS); 2855 } 2856 2857 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does 2858 // not accept '.'. 2859 static bool isMacroParameterChar(char C) { 2860 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?'; 2861 } 2862 2863 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, 2864 ArrayRef<MCAsmMacroParameter> Parameters, 2865 ArrayRef<MCAsmMacroArgument> A, 2866 const std::vector<std::string> &Locals, SMLoc L) { 2867 unsigned NParameters = Parameters.size(); 2868 if (NParameters != A.size()) 2869 return Error(L, "Wrong number of arguments"); 2870 StringMap<std::string> LocalSymbols; 2871 std::string Name; 2872 Name.reserve(6); 2873 for (StringRef Local : Locals) { 2874 raw_string_ostream LocalName(Name); 2875 LocalName << "??" 2876 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true); 2877 LocalSymbols.insert({Local, LocalName.str()}); 2878 Name.clear(); 2879 } 2880 2881 std::optional<char> CurrentQuote; 2882 while (!Body.empty()) { 2883 // Scan for the next substitution. 2884 std::size_t End = Body.size(), Pos = 0; 2885 std::size_t IdentifierPos = End; 2886 for (; Pos != End; ++Pos) { 2887 // Find the next possible macro parameter, including preceding a '&' 2888 // inside quotes. 2889 if (Body[Pos] == '&') 2890 break; 2891 if (isMacroParameterChar(Body[Pos])) { 2892 if (!CurrentQuote) 2893 break; 2894 if (IdentifierPos == End) 2895 IdentifierPos = Pos; 2896 } else { 2897 IdentifierPos = End; 2898 } 2899 2900 // Track quotation status 2901 if (!CurrentQuote) { 2902 if (Body[Pos] == '\'' || Body[Pos] == '"') 2903 CurrentQuote = Body[Pos]; 2904 } else if (Body[Pos] == CurrentQuote) { 2905 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) { 2906 // Escaped quote, and quotes aren't identifier chars; skip 2907 ++Pos; 2908 continue; 2909 } else { 2910 CurrentQuote.reset(); 2911 } 2912 } 2913 } 2914 if (IdentifierPos != End) { 2915 // We've recognized an identifier before an apostrophe inside quotes; 2916 // check once to see if we can expand it. 2917 Pos = IdentifierPos; 2918 IdentifierPos = End; 2919 } 2920 2921 // Add the prefix. 2922 OS << Body.slice(0, Pos); 2923 2924 // Check if we reached the end. 2925 if (Pos == End) 2926 break; 2927 2928 unsigned I = Pos; 2929 bool InitialAmpersand = (Body[I] == '&'); 2930 if (InitialAmpersand) { 2931 ++I; 2932 ++Pos; 2933 } 2934 while (I < End && isMacroParameterChar(Body[I])) 2935 ++I; 2936 2937 const char *Begin = Body.data() + Pos; 2938 StringRef Argument(Begin, I - Pos); 2939 const std::string ArgumentLower = Argument.lower(); 2940 unsigned Index = 0; 2941 2942 for (; Index < NParameters; ++Index) 2943 if (Parameters[Index].Name.equals_insensitive(ArgumentLower)) 2944 break; 2945 2946 if (Index == NParameters) { 2947 if (InitialAmpersand) 2948 OS << '&'; 2949 auto it = LocalSymbols.find(ArgumentLower); 2950 if (it != LocalSymbols.end()) 2951 OS << it->second; 2952 else 2953 OS << Argument; 2954 Pos = I; 2955 } else { 2956 for (const AsmToken &Token : A[Index]) { 2957 // In MASM, you can write '%expr'. 2958 // The prefix '%' evaluates the expression 'expr' 2959 // and uses the result as a string (e.g. replace %(1+2) with the 2960 // string "3"). 2961 // Here, we identify the integer token which is the result of the 2962 // absolute expression evaluation and replace it with its string 2963 // representation. 2964 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer)) 2965 // Emit an integer value to the buffer. 2966 OS << Token.getIntVal(); 2967 else 2968 OS << Token.getString(); 2969 } 2970 2971 Pos += Argument.size(); 2972 if (Pos < End && Body[Pos] == '&') { 2973 ++Pos; 2974 } 2975 } 2976 // Update the scan point. 2977 Body = Body.substr(Pos); 2978 } 2979 2980 return false; 2981 } 2982 2983 static bool isOperator(AsmToken::TokenKind kind) { 2984 switch (kind) { 2985 default: 2986 return false; 2987 case AsmToken::Plus: 2988 case AsmToken::Minus: 2989 case AsmToken::Tilde: 2990 case AsmToken::Slash: 2991 case AsmToken::Star: 2992 case AsmToken::Dot: 2993 case AsmToken::Equal: 2994 case AsmToken::EqualEqual: 2995 case AsmToken::Pipe: 2996 case AsmToken::PipePipe: 2997 case AsmToken::Caret: 2998 case AsmToken::Amp: 2999 case AsmToken::AmpAmp: 3000 case AsmToken::Exclaim: 3001 case AsmToken::ExclaimEqual: 3002 case AsmToken::Less: 3003 case AsmToken::LessEqual: 3004 case AsmToken::LessLess: 3005 case AsmToken::LessGreater: 3006 case AsmToken::Greater: 3007 case AsmToken::GreaterEqual: 3008 case AsmToken::GreaterGreater: 3009 return true; 3010 } 3011 } 3012 3013 namespace { 3014 3015 class AsmLexerSkipSpaceRAII { 3016 public: 3017 AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) { 3018 Lexer.setSkipSpace(SkipSpace); 3019 } 3020 3021 ~AsmLexerSkipSpaceRAII() { 3022 Lexer.setSkipSpace(true); 3023 } 3024 3025 private: 3026 AsmLexer &Lexer; 3027 }; 3028 3029 } // end anonymous namespace 3030 3031 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP, 3032 MCAsmMacroArgument &MA, 3033 AsmToken::TokenKind EndTok) { 3034 if (MP && MP->Vararg) { 3035 if (Lexer.isNot(EndTok)) { 3036 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok); 3037 for (StringRef S : Str) { 3038 MA.emplace_back(AsmToken::String, S); 3039 } 3040 } 3041 return false; 3042 } 3043 3044 SMLoc StrLoc = Lexer.getLoc(), EndLoc; 3045 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) { 3046 const char *StrChar = StrLoc.getPointer() + 1; 3047 const char *EndChar = EndLoc.getPointer() - 1; 3048 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back()); 3049 /// Eat from '<' to '>'. 3050 Lex(); 3051 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar)); 3052 return false; 3053 } 3054 3055 unsigned ParenLevel = 0; 3056 3057 // Darwin doesn't use spaces to delmit arguments. 3058 AsmLexerSkipSpaceRAII ScopedSkipSpace(Lexer, IsDarwin); 3059 3060 bool SpaceEaten; 3061 3062 while (true) { 3063 SpaceEaten = false; 3064 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) 3065 return TokError("unexpected token"); 3066 3067 if (ParenLevel == 0) { 3068 if (Lexer.is(AsmToken::Comma)) 3069 break; 3070 3071 if (Lexer.is(AsmToken::Space)) { 3072 SpaceEaten = true; 3073 Lex(); // Eat spaces. 3074 } 3075 3076 // Spaces can delimit parameters, but could also be part an expression. 3077 // If the token after a space is an operator, add the token and the next 3078 // one into this argument 3079 if (!IsDarwin) { 3080 if (isOperator(Lexer.getKind()) && Lexer.isNot(EndTok)) { 3081 MA.push_back(getTok()); 3082 Lex(); 3083 3084 // Whitespace after an operator can be ignored. 3085 if (Lexer.is(AsmToken::Space)) 3086 Lex(); 3087 3088 continue; 3089 } 3090 } 3091 if (SpaceEaten) 3092 break; 3093 } 3094 3095 // handleMacroEntry relies on not advancing the lexer here 3096 // to be able to fill in the remaining default parameter values 3097 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0)) 3098 break; 3099 3100 // Adjust the current parentheses level. 3101 if (Lexer.is(AsmToken::LParen)) 3102 ++ParenLevel; 3103 else if (Lexer.is(AsmToken::RParen) && ParenLevel) 3104 --ParenLevel; 3105 3106 // Append the token to the current argument list. 3107 MA.push_back(getTok()); 3108 Lex(); 3109 } 3110 3111 if (ParenLevel != 0) 3112 return TokError("unbalanced parentheses in argument"); 3113 3114 if (MA.empty() && MP) { 3115 if (MP->Required) { 3116 return TokError("missing value for required parameter '" + MP->Name + 3117 "'"); 3118 } else { 3119 MA = MP->Value; 3120 } 3121 } 3122 return false; 3123 } 3124 3125 // Parse the macro instantiation arguments. 3126 bool MasmParser::parseMacroArguments(const MCAsmMacro *M, 3127 MCAsmMacroArguments &A, 3128 AsmToken::TokenKind EndTok) { 3129 const unsigned NParameters = M ? M->Parameters.size() : 0; 3130 bool NamedParametersFound = false; 3131 SmallVector<SMLoc, 4> FALocs; 3132 3133 A.resize(NParameters); 3134 FALocs.resize(NParameters); 3135 3136 // Parse two kinds of macro invocations: 3137 // - macros defined without any parameters accept an arbitrary number of them 3138 // - macros defined with parameters accept at most that many of them 3139 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters; 3140 ++Parameter) { 3141 SMLoc IDLoc = Lexer.getLoc(); 3142 MCAsmMacroParameter FA; 3143 3144 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) { 3145 if (parseIdentifier(FA.Name)) 3146 return Error(IDLoc, "invalid argument identifier for formal argument"); 3147 3148 if (Lexer.isNot(AsmToken::Equal)) 3149 return TokError("expected '=' after formal parameter identifier"); 3150 3151 Lex(); 3152 3153 NamedParametersFound = true; 3154 } 3155 3156 if (NamedParametersFound && FA.Name.empty()) 3157 return Error(IDLoc, "cannot mix positional and keyword arguments"); 3158 3159 unsigned PI = Parameter; 3160 if (!FA.Name.empty()) { 3161 assert(M && "expected macro to be defined"); 3162 unsigned FAI = 0; 3163 for (FAI = 0; FAI < NParameters; ++FAI) 3164 if (M->Parameters[FAI].Name == FA.Name) 3165 break; 3166 3167 if (FAI >= NParameters) { 3168 return Error(IDLoc, "parameter named '" + FA.Name + 3169 "' does not exist for macro '" + M->Name + "'"); 3170 } 3171 PI = FAI; 3172 } 3173 const MCAsmMacroParameter *MP = nullptr; 3174 if (M && PI < NParameters) 3175 MP = &M->Parameters[PI]; 3176 3177 SMLoc StrLoc = Lexer.getLoc(); 3178 SMLoc EndLoc; 3179 if (Lexer.is(AsmToken::Percent)) { 3180 const MCExpr *AbsoluteExp; 3181 int64_t Value; 3182 /// Eat '%'. 3183 Lex(); 3184 if (parseExpression(AbsoluteExp, EndLoc)) 3185 return false; 3186 if (!AbsoluteExp->evaluateAsAbsolute(Value, 3187 getStreamer().getAssemblerPtr())) 3188 return Error(StrLoc, "expected absolute expression"); 3189 const char *StrChar = StrLoc.getPointer(); 3190 const char *EndChar = EndLoc.getPointer(); 3191 AsmToken newToken(AsmToken::Integer, 3192 StringRef(StrChar, EndChar - StrChar), Value); 3193 FA.Value.push_back(newToken); 3194 } else if (parseMacroArgument(MP, FA.Value, EndTok)) { 3195 if (M) 3196 return addErrorSuffix(" in '" + M->Name + "' macro"); 3197 else 3198 return true; 3199 } 3200 3201 if (!FA.Value.empty()) { 3202 if (A.size() <= PI) 3203 A.resize(PI + 1); 3204 A[PI] = FA.Value; 3205 3206 if (FALocs.size() <= PI) 3207 FALocs.resize(PI + 1); 3208 3209 FALocs[PI] = Lexer.getLoc(); 3210 } 3211 3212 // At the end of the statement, fill in remaining arguments that have 3213 // default values. If there aren't any, then the next argument is 3214 // required but missing 3215 if (Lexer.is(EndTok)) { 3216 bool Failure = false; 3217 for (unsigned FAI = 0; FAI < NParameters; ++FAI) { 3218 if (A[FAI].empty()) { 3219 if (M->Parameters[FAI].Required) { 3220 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(), 3221 "missing value for required parameter " 3222 "'" + 3223 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'"); 3224 Failure = true; 3225 } 3226 3227 if (!M->Parameters[FAI].Value.empty()) 3228 A[FAI] = M->Parameters[FAI].Value; 3229 } 3230 } 3231 return Failure; 3232 } 3233 3234 if (Lexer.is(AsmToken::Comma)) 3235 Lex(); 3236 } 3237 3238 return TokError("too many positional arguments"); 3239 } 3240 3241 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc, 3242 AsmToken::TokenKind ArgumentEndTok) { 3243 // Arbitrarily limit macro nesting depth (default matches 'as'). We can 3244 // eliminate this, although we should protect against infinite loops. 3245 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth; 3246 if (ActiveMacros.size() == MaxNestingDepth) { 3247 std::ostringstream MaxNestingDepthError; 3248 MaxNestingDepthError << "macros cannot be nested more than " 3249 << MaxNestingDepth << " levels deep." 3250 << " Use -asm-macro-max-nesting-depth to increase " 3251 "this limit."; 3252 return TokError(MaxNestingDepthError.str()); 3253 } 3254 3255 MCAsmMacroArguments A; 3256 if (parseMacroArguments(M, A, ArgumentEndTok)) 3257 return true; 3258 3259 // Macro instantiation is lexical, unfortunately. We construct a new buffer 3260 // to hold the macro body with substitutions. 3261 SmallString<256> Buf; 3262 StringRef Body = M->Body; 3263 raw_svector_ostream OS(Buf); 3264 3265 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc())) 3266 return true; 3267 3268 // We include the endm in the buffer as our cue to exit the macro 3269 // instantiation. 3270 OS << "endm\n"; 3271 3272 std::unique_ptr<MemoryBuffer> Instantiation = 3273 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>"); 3274 3275 // Create the macro instantiation object and add to the current macro 3276 // instantiation stack. 3277 MacroInstantiation *MI = new MacroInstantiation{ 3278 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()}; 3279 ActiveMacros.push_back(MI); 3280 3281 ++NumOfMacroInstantiations; 3282 3283 // Jump to the macro instantiation and prime the lexer. 3284 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc()); 3285 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 3286 EndStatementAtEOFStack.push_back(true); 3287 Lex(); 3288 3289 return false; 3290 } 3291 3292 void MasmParser::handleMacroExit() { 3293 // Jump to the token we should return to, and consume it. 3294 EndStatementAtEOFStack.pop_back(); 3295 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer, 3296 EndStatementAtEOFStack.back()); 3297 Lex(); 3298 3299 // Pop the instantiation entry. 3300 delete ActiveMacros.back(); 3301 ActiveMacros.pop_back(); 3302 } 3303 3304 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) { 3305 if (!M->IsFunction) 3306 return Error(NameLoc, "cannot invoke macro procedure as function"); 3307 3308 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name + 3309 "' requires arguments in parentheses") || 3310 handleMacroEntry(M, NameLoc, AsmToken::RParen)) 3311 return true; 3312 3313 // Parse all statements in the macro, retrieving the exit value when it ends. 3314 std::string ExitValue; 3315 SmallVector<AsmRewrite, 4> AsmStrRewrites; 3316 while (Lexer.isNot(AsmToken::Eof)) { 3317 ParseStatementInfo Info(&AsmStrRewrites); 3318 bool Parsed = parseStatement(Info, nullptr); 3319 3320 if (!Parsed && Info.ExitValue) { 3321 ExitValue = std::move(*Info.ExitValue); 3322 break; 3323 } 3324 3325 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error 3326 // for printing ErrMsg via Lex() only if no (presumably better) parser error 3327 // exists. 3328 if (Parsed && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) { 3329 Lex(); 3330 } 3331 3332 // parseStatement returned true so may need to emit an error. 3333 printPendingErrors(); 3334 3335 // Skipping to the next line if needed. 3336 if (Parsed && !getLexer().isAtStartOfStatement()) 3337 eatToEndOfStatement(); 3338 } 3339 3340 // Consume the right-parenthesis on the other side of the arguments. 3341 if (parseRParen()) 3342 return true; 3343 3344 // Exit values may require lexing, unfortunately. We construct a new buffer to 3345 // hold the exit value. 3346 std::unique_ptr<MemoryBuffer> MacroValue = 3347 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>"); 3348 3349 // Jump from this location to the instantiated exit value, and prime the 3350 // lexer. 3351 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc()); 3352 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, 3353 /*EndStatementAtEOF=*/false); 3354 EndStatementAtEOFStack.push_back(false); 3355 Lex(); 3356 3357 return false; 3358 } 3359 3360 /// parseIdentifier: 3361 /// ::= identifier 3362 /// ::= string 3363 bool MasmParser::parseIdentifier(StringRef &Res, 3364 IdentifierPositionKind Position) { 3365 // The assembler has relaxed rules for accepting identifiers, in particular we 3366 // allow things like '.globl $foo' and '.def @feat.00', which would normally 3367 // be separate tokens. At this level, we have already lexed so we cannot 3368 // (currently) handle this as a context dependent token, instead we detect 3369 // adjacent tokens and return the combined identifier. 3370 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) { 3371 SMLoc PrefixLoc = getLexer().getLoc(); 3372 3373 // Consume the prefix character, and check for a following identifier. 3374 3375 AsmToken nextTok = peekTok(false); 3376 3377 if (nextTok.isNot(AsmToken::Identifier)) 3378 return true; 3379 3380 // We have a '$' or '@' followed by an identifier, make sure they are adjacent. 3381 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer()) 3382 return true; 3383 3384 // eat $ or @ 3385 Lexer.Lex(); // Lexer's Lex guarantees consecutive token. 3386 // Construct the joined identifier and consume the token. 3387 Res = 3388 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1); 3389 Lex(); // Parser Lex to maintain invariants. 3390 return false; 3391 } 3392 3393 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String)) 3394 return true; 3395 3396 Res = getTok().getIdentifier(); 3397 3398 // Consume the identifier token - but if parsing certain directives, avoid 3399 // lexical expansion of the next token. 3400 ExpandKind ExpandNextToken = ExpandMacros; 3401 if (Position == StartOfStatement && 3402 StringSwitch<bool>(Res) 3403 .CaseLower("echo", true) 3404 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true) 3405 .Default(false)) { 3406 ExpandNextToken = DoNotExpandMacros; 3407 } 3408 Lex(ExpandNextToken); 3409 3410 return false; 3411 } 3412 3413 /// parseDirectiveEquate: 3414 /// ::= name "=" expression 3415 /// | name "equ" expression (not redefinable) 3416 /// | name "equ" text-list 3417 /// | name "textequ" text-list (redefinability unspecified) 3418 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, 3419 DirectiveKind DirKind, SMLoc NameLoc) { 3420 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower()); 3421 if (BuiltinIt != BuiltinSymbolMap.end()) 3422 return Error(NameLoc, "cannot redefine a built-in symbol"); 3423 3424 Variable &Var = Variables[Name.lower()]; 3425 if (Var.Name.empty()) { 3426 Var.Name = Name; 3427 } 3428 3429 SMLoc StartLoc = Lexer.getLoc(); 3430 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) { 3431 // "equ" and "textequ" both allow text expressions. 3432 std::string Value; 3433 std::string TextItem; 3434 if (!parseTextItem(TextItem)) { 3435 Value += TextItem; 3436 3437 // Accept a text-list, not just one text-item. 3438 auto parseItem = [&]() -> bool { 3439 if (parseTextItem(TextItem)) 3440 return TokError("expected text item"); 3441 Value += TextItem; 3442 return false; 3443 }; 3444 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem)) 3445 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3446 3447 if (!Var.IsText || Var.TextValue != Value) { 3448 switch (Var.Redefinable) { 3449 case Variable::NOT_REDEFINABLE: 3450 return Error(getTok().getLoc(), "invalid variable redefinition"); 3451 case Variable::WARN_ON_REDEFINITION: 3452 if (Warning(NameLoc, "redefining '" + Name + 3453 "', already defined on the command line")) { 3454 return true; 3455 } 3456 break; 3457 default: 3458 break; 3459 } 3460 } 3461 Var.IsText = true; 3462 Var.TextValue = Value; 3463 Var.Redefinable = Variable::REDEFINABLE; 3464 3465 return false; 3466 } 3467 } 3468 if (DirKind == DK_TEXTEQU) 3469 return TokError("expected <text> in '" + Twine(IDVal) + "' directive"); 3470 3471 // Parse as expression assignment. 3472 const MCExpr *Expr; 3473 SMLoc EndLoc; 3474 if (parseExpression(Expr, EndLoc)) 3475 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3476 StringRef ExprAsString = StringRef( 3477 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer()); 3478 3479 int64_t Value; 3480 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) { 3481 if (DirKind == DK_ASSIGN) 3482 return Error( 3483 StartLoc, 3484 "expected absolute expression; not all symbols have known values", 3485 {StartLoc, EndLoc}); 3486 3487 // Not an absolute expression; define as a text replacement. 3488 if (!Var.IsText || Var.TextValue != ExprAsString) { 3489 switch (Var.Redefinable) { 3490 case Variable::NOT_REDEFINABLE: 3491 return Error(getTok().getLoc(), "invalid variable redefinition"); 3492 case Variable::WARN_ON_REDEFINITION: 3493 if (Warning(NameLoc, "redefining '" + Name + 3494 "', already defined on the command line")) { 3495 return true; 3496 } 3497 break; 3498 default: 3499 break; 3500 } 3501 } 3502 3503 Var.IsText = true; 3504 Var.TextValue = ExprAsString.str(); 3505 Var.Redefinable = Variable::REDEFINABLE; 3506 3507 return false; 3508 } 3509 3510 MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); 3511 3512 const MCConstantExpr *PrevValue = 3513 Sym->isVariable() ? dyn_cast_or_null<MCConstantExpr>( 3514 Sym->getVariableValue(/*SetUsed=*/false)) 3515 : nullptr; 3516 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) { 3517 switch (Var.Redefinable) { 3518 case Variable::NOT_REDEFINABLE: 3519 return Error(getTok().getLoc(), "invalid variable redefinition"); 3520 case Variable::WARN_ON_REDEFINITION: 3521 if (Warning(NameLoc, "redefining '" + Name + 3522 "', already defined on the command line")) { 3523 return true; 3524 } 3525 break; 3526 default: 3527 break; 3528 } 3529 } 3530 3531 Var.IsText = false; 3532 Var.TextValue.clear(); 3533 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE 3534 : Variable::NOT_REDEFINABLE; 3535 3536 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE); 3537 Sym->setVariableValue(Expr); 3538 Sym->setExternal(false); 3539 3540 return false; 3541 } 3542 3543 bool MasmParser::parseEscapedString(std::string &Data) { 3544 if (check(getTok().isNot(AsmToken::String), "expected string")) 3545 return true; 3546 3547 Data = ""; 3548 char Quote = getTok().getString().front(); 3549 StringRef Str = getTok().getStringContents(); 3550 Data.reserve(Str.size()); 3551 for (size_t i = 0, e = Str.size(); i != e; ++i) { 3552 Data.push_back(Str[i]); 3553 if (Str[i] == Quote) { 3554 // MASM treats doubled delimiting quotes as an escaped delimiting quote. 3555 // If we're escaping the string's trailing delimiter, we're definitely 3556 // missing a quotation mark. 3557 if (i + 1 == Str.size()) 3558 return Error(getTok().getLoc(), "missing quotation mark in string"); 3559 if (Str[i + 1] == Quote) 3560 ++i; 3561 } 3562 } 3563 3564 Lex(); 3565 return false; 3566 } 3567 3568 bool MasmParser::parseAngleBracketString(std::string &Data) { 3569 SMLoc EndLoc, StartLoc = getTok().getLoc(); 3570 if (isAngleBracketString(StartLoc, EndLoc)) { 3571 const char *StartChar = StartLoc.getPointer() + 1; 3572 const char *EndChar = EndLoc.getPointer() - 1; 3573 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back()); 3574 // Eat from '<' to '>'. 3575 Lex(); 3576 3577 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar)); 3578 return false; 3579 } 3580 return true; 3581 } 3582 3583 /// textItem ::= textLiteral | textMacroID | % constExpr 3584 bool MasmParser::parseTextItem(std::string &Data) { 3585 switch (getTok().getKind()) { 3586 default: 3587 return true; 3588 case AsmToken::Percent: { 3589 int64_t Res; 3590 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res)) 3591 return true; 3592 Data = std::to_string(Res); 3593 return false; 3594 } 3595 case AsmToken::Less: 3596 case AsmToken::LessEqual: 3597 case AsmToken::LessLess: 3598 case AsmToken::LessGreater: 3599 return parseAngleBracketString(Data); 3600 case AsmToken::Identifier: { 3601 // This must be a text macro; we need to expand it accordingly. 3602 StringRef ID; 3603 SMLoc StartLoc = getTok().getLoc(); 3604 if (parseIdentifier(ID)) 3605 return true; 3606 Data = ID.str(); 3607 3608 bool Expanded = false; 3609 while (true) { 3610 // Try to resolve as a built-in text macro 3611 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower()); 3612 if (BuiltinIt != BuiltinSymbolMap.end()) { 3613 std::optional<std::string> BuiltinText = 3614 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc); 3615 if (!BuiltinText) { 3616 // Not a text macro; break without substituting 3617 break; 3618 } 3619 Data = std::move(*BuiltinText); 3620 ID = StringRef(Data); 3621 Expanded = true; 3622 continue; 3623 } 3624 3625 // Try to resolve as a variable text macro 3626 auto VarIt = Variables.find(ID.lower()); 3627 if (VarIt != Variables.end()) { 3628 const Variable &Var = VarIt->getValue(); 3629 if (!Var.IsText) { 3630 // Not a text macro; break without substituting 3631 break; 3632 } 3633 Data = Var.TextValue; 3634 ID = StringRef(Data); 3635 Expanded = true; 3636 continue; 3637 } 3638 3639 break; 3640 } 3641 3642 if (!Expanded) { 3643 // Not a text macro; not usable in TextItem context. Since we haven't used 3644 // the token, put it back for better error recovery. 3645 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID)); 3646 return true; 3647 } 3648 return false; 3649 } 3650 } 3651 llvm_unreachable("unhandled token kind"); 3652 } 3653 3654 /// parseDirectiveAscii: 3655 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ] 3656 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { 3657 auto parseOp = [&]() -> bool { 3658 std::string Data; 3659 if (checkForValidSection() || parseEscapedString(Data)) 3660 return true; 3661 getStreamer().emitBytes(Data); 3662 if (ZeroTerminated) 3663 getStreamer().emitBytes(StringRef("\0", 1)); 3664 return false; 3665 }; 3666 3667 if (parseMany(parseOp)) 3668 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3669 return false; 3670 } 3671 3672 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) { 3673 // Special case constant expressions to match code generator. 3674 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { 3675 assert(Size <= 8 && "Invalid size"); 3676 int64_t IntValue = MCE->getValue(); 3677 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) 3678 return Error(MCE->getLoc(), "out of range literal value"); 3679 getStreamer().emitIntValue(IntValue, Size); 3680 } else { 3681 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value); 3682 if (MSE && MSE->getSymbol().getName() == "?") { 3683 // ? initializer; treat as 0. 3684 getStreamer().emitIntValue(0, Size); 3685 } else { 3686 getStreamer().emitValue(Value, Size, Value->getLoc()); 3687 } 3688 } 3689 return false; 3690 } 3691 3692 bool MasmParser::parseScalarInitializer(unsigned Size, 3693 SmallVectorImpl<const MCExpr *> &Values, 3694 unsigned StringPadLength) { 3695 if (Size == 1 && getTok().is(AsmToken::String)) { 3696 std::string Value; 3697 if (parseEscapedString(Value)) 3698 return true; 3699 // Treat each character as an initializer. 3700 for (const unsigned char CharVal : Value) 3701 Values.push_back(MCConstantExpr::create(CharVal, getContext())); 3702 3703 // Pad the string with spaces to the specified length. 3704 for (size_t i = Value.size(); i < StringPadLength; ++i) 3705 Values.push_back(MCConstantExpr::create(' ', getContext())); 3706 } else { 3707 const MCExpr *Value; 3708 if (parseExpression(Value)) 3709 return true; 3710 if (getTok().is(AsmToken::Identifier) && 3711 getTok().getString().equals_insensitive("dup")) { 3712 Lex(); // Eat 'dup'. 3713 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 3714 if (!MCE) 3715 return Error(Value->getLoc(), 3716 "cannot repeat value a non-constant number of times"); 3717 const int64_t Repetitions = MCE->getValue(); 3718 if (Repetitions < 0) 3719 return Error(Value->getLoc(), 3720 "cannot repeat value a negative number of times"); 3721 3722 SmallVector<const MCExpr *, 1> DuplicatedValues; 3723 if (parseToken(AsmToken::LParen, 3724 "parentheses required for 'dup' contents") || 3725 parseScalarInstList(Size, DuplicatedValues) || parseRParen()) 3726 return true; 3727 3728 for (int i = 0; i < Repetitions; ++i) 3729 Values.append(DuplicatedValues.begin(), DuplicatedValues.end()); 3730 } else { 3731 Values.push_back(Value); 3732 } 3733 } 3734 return false; 3735 } 3736 3737 bool MasmParser::parseScalarInstList(unsigned Size, 3738 SmallVectorImpl<const MCExpr *> &Values, 3739 const AsmToken::TokenKind EndToken) { 3740 while (getTok().isNot(EndToken) && 3741 (EndToken != AsmToken::Greater || 3742 getTok().isNot(AsmToken::GreaterGreater))) { 3743 parseScalarInitializer(Size, Values); 3744 3745 // If we see a comma, continue, and allow line continuation. 3746 if (!parseOptionalToken(AsmToken::Comma)) 3747 break; 3748 parseOptionalToken(AsmToken::EndOfStatement); 3749 } 3750 return false; 3751 } 3752 3753 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) { 3754 SmallVector<const MCExpr *, 1> Values; 3755 if (checkForValidSection() || parseScalarInstList(Size, Values)) 3756 return true; 3757 3758 for (const auto *Value : Values) { 3759 emitIntValue(Value, Size); 3760 } 3761 if (Count) 3762 *Count = Values.size(); 3763 return false; 3764 } 3765 3766 // Add a field to the current structure. 3767 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { 3768 StructInfo &Struct = StructInProgress.back(); 3769 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size); 3770 IntFieldInfo &IntInfo = Field.Contents.IntInfo; 3771 3772 Field.Type = Size; 3773 3774 if (parseScalarInstList(Size, IntInfo.Values)) 3775 return true; 3776 3777 Field.SizeOf = Field.Type * IntInfo.Values.size(); 3778 Field.LengthOf = IntInfo.Values.size(); 3779 const unsigned FieldEnd = Field.Offset + Field.SizeOf; 3780 if (!Struct.IsUnion) { 3781 Struct.NextOffset = FieldEnd; 3782 } 3783 Struct.Size = std::max(Struct.Size, FieldEnd); 3784 return false; 3785 } 3786 3787 /// parseDirectiveValue 3788 /// ::= (byte | word | ... ) [ expression (, expression)* ] 3789 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) { 3790 if (StructInProgress.empty()) { 3791 // Initialize data value. 3792 if (emitIntegralValues(Size)) 3793 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3794 } else if (addIntegralField("", Size)) { 3795 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3796 } 3797 3798 return false; 3799 } 3800 3801 /// parseDirectiveNamedValue 3802 /// ::= name (byte | word | ... ) [ expression (, expression)* ] 3803 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size, 3804 StringRef Name, SMLoc NameLoc) { 3805 if (StructInProgress.empty()) { 3806 // Initialize named data value. 3807 MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 3808 getStreamer().emitLabel(Sym); 3809 unsigned Count; 3810 if (emitIntegralValues(Size, &Count)) 3811 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive"); 3812 3813 AsmTypeInfo Type; 3814 Type.Name = TypeName; 3815 Type.Size = Size * Count; 3816 Type.ElementSize = Size; 3817 Type.Length = Count; 3818 KnownType[Name.lower()] = Type; 3819 } else if (addIntegralField(Name, Size)) { 3820 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive"); 3821 } 3822 3823 return false; 3824 } 3825 3826 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) { 3827 if (Asm.getTok().isNot(AsmToken::Integer) && 3828 Asm.getTok().isNot(AsmToken::BigNum)) 3829 return Asm.TokError("unknown token in expression"); 3830 SMLoc ExprLoc = Asm.getTok().getLoc(); 3831 APInt IntValue = Asm.getTok().getAPIntVal(); 3832 Asm.Lex(); 3833 if (!IntValue.isIntN(128)) 3834 return Asm.Error(ExprLoc, "out of range literal value"); 3835 if (!IntValue.isIntN(64)) { 3836 hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue(); 3837 lo = IntValue.getLoBits(64).getZExtValue(); 3838 } else { 3839 hi = 0; 3840 lo = IntValue.getZExtValue(); 3841 } 3842 return false; 3843 } 3844 3845 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) { 3846 // We don't truly support arithmetic on floating point expressions, so we 3847 // have to manually parse unary prefixes. 3848 bool IsNeg = false; 3849 SMLoc SignLoc; 3850 if (getLexer().is(AsmToken::Minus)) { 3851 SignLoc = getLexer().getLoc(); 3852 Lexer.Lex(); 3853 IsNeg = true; 3854 } else if (getLexer().is(AsmToken::Plus)) { 3855 SignLoc = getLexer().getLoc(); 3856 Lexer.Lex(); 3857 } 3858 3859 if (Lexer.is(AsmToken::Error)) 3860 return TokError(Lexer.getErr()); 3861 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) && 3862 Lexer.isNot(AsmToken::Identifier)) 3863 return TokError("unexpected token in directive"); 3864 3865 // Convert to an APFloat. 3866 APFloat Value(Semantics); 3867 StringRef IDVal = getTok().getString(); 3868 if (getLexer().is(AsmToken::Identifier)) { 3869 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf")) 3870 Value = APFloat::getInf(Semantics); 3871 else if (IDVal.equals_insensitive("nan")) 3872 Value = APFloat::getNaN(Semantics, false, ~0); 3873 else if (IDVal.equals_insensitive("?")) 3874 Value = APFloat::getZero(Semantics); 3875 else 3876 return TokError("invalid floating point literal"); 3877 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) { 3878 // MASM hexadecimal floating-point literal; no APFloat conversion needed. 3879 // To match ML64.exe, ignore the initial sign. 3880 unsigned SizeInBits = Value.getSizeInBits(Semantics); 3881 if (SizeInBits != (IDVal.size() << 2)) 3882 return TokError("invalid floating point literal"); 3883 3884 // Consume the numeric token. 3885 Lex(); 3886 3887 Res = APInt(SizeInBits, IDVal, 16); 3888 if (SignLoc.isValid()) 3889 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign"); 3890 return false; 3891 } else if (errorToBool( 3892 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) 3893 .takeError())) { 3894 return TokError("invalid floating point literal"); 3895 } 3896 if (IsNeg) 3897 Value.changeSign(); 3898 3899 // Consume the numeric token. 3900 Lex(); 3901 3902 Res = Value.bitcastToAPInt(); 3903 3904 return false; 3905 } 3906 3907 bool MasmParser::parseRealInstList(const fltSemantics &Semantics, 3908 SmallVectorImpl<APInt> &ValuesAsInt, 3909 const AsmToken::TokenKind EndToken) { 3910 while (getTok().isNot(EndToken) || 3911 (EndToken == AsmToken::Greater && 3912 getTok().isNot(AsmToken::GreaterGreater))) { 3913 const AsmToken NextTok = peekTok(); 3914 if (NextTok.is(AsmToken::Identifier) && 3915 NextTok.getString().equals_insensitive("dup")) { 3916 const MCExpr *Value; 3917 if (parseExpression(Value) || parseToken(AsmToken::Identifier)) 3918 return true; 3919 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 3920 if (!MCE) 3921 return Error(Value->getLoc(), 3922 "cannot repeat value a non-constant number of times"); 3923 const int64_t Repetitions = MCE->getValue(); 3924 if (Repetitions < 0) 3925 return Error(Value->getLoc(), 3926 "cannot repeat value a negative number of times"); 3927 3928 SmallVector<APInt, 1> DuplicatedValues; 3929 if (parseToken(AsmToken::LParen, 3930 "parentheses required for 'dup' contents") || 3931 parseRealInstList(Semantics, DuplicatedValues) || parseRParen()) 3932 return true; 3933 3934 for (int i = 0; i < Repetitions; ++i) 3935 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end()); 3936 } else { 3937 APInt AsInt; 3938 if (parseRealValue(Semantics, AsInt)) 3939 return true; 3940 ValuesAsInt.push_back(AsInt); 3941 } 3942 3943 // Continue if we see a comma. (Also, allow line continuation.) 3944 if (!parseOptionalToken(AsmToken::Comma)) 3945 break; 3946 parseOptionalToken(AsmToken::EndOfStatement); 3947 } 3948 3949 return false; 3950 } 3951 3952 // Initialize real data values. 3953 bool MasmParser::emitRealValues(const fltSemantics &Semantics, 3954 unsigned *Count) { 3955 if (checkForValidSection()) 3956 return true; 3957 3958 SmallVector<APInt, 1> ValuesAsInt; 3959 if (parseRealInstList(Semantics, ValuesAsInt)) 3960 return true; 3961 3962 for (const APInt &AsInt : ValuesAsInt) { 3963 getStreamer().emitIntValue(AsInt); 3964 } 3965 if (Count) 3966 *Count = ValuesAsInt.size(); 3967 return false; 3968 } 3969 3970 // Add a real field to the current struct. 3971 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, 3972 size_t Size) { 3973 StructInfo &Struct = StructInProgress.back(); 3974 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size); 3975 RealFieldInfo &RealInfo = Field.Contents.RealInfo; 3976 3977 Field.SizeOf = 0; 3978 3979 if (parseRealInstList(Semantics, RealInfo.AsIntValues)) 3980 return true; 3981 3982 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; 3983 Field.LengthOf = RealInfo.AsIntValues.size(); 3984 Field.SizeOf = Field.Type * Field.LengthOf; 3985 3986 const unsigned FieldEnd = Field.Offset + Field.SizeOf; 3987 if (!Struct.IsUnion) { 3988 Struct.NextOffset = FieldEnd; 3989 } 3990 Struct.Size = std::max(Struct.Size, FieldEnd); 3991 return false; 3992 } 3993 3994 /// parseDirectiveRealValue 3995 /// ::= (real4 | real8 | real10) [ expression (, expression)* ] 3996 bool MasmParser::parseDirectiveRealValue(StringRef IDVal, 3997 const fltSemantics &Semantics, 3998 size_t Size) { 3999 if (StructInProgress.empty()) { 4000 // Initialize data value. 4001 if (emitRealValues(Semantics)) 4002 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 4003 } else if (addRealField("", Semantics, Size)) { 4004 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 4005 } 4006 return false; 4007 } 4008 4009 /// parseDirectiveNamedRealValue 4010 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ] 4011 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName, 4012 const fltSemantics &Semantics, 4013 unsigned Size, StringRef Name, 4014 SMLoc NameLoc) { 4015 if (StructInProgress.empty()) { 4016 // Initialize named data value. 4017 MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 4018 getStreamer().emitLabel(Sym); 4019 unsigned Count; 4020 if (emitRealValues(Semantics, &Count)) 4021 return addErrorSuffix(" in '" + TypeName + "' directive"); 4022 4023 AsmTypeInfo Type; 4024 Type.Name = TypeName; 4025 Type.Size = Size * Count; 4026 Type.ElementSize = Size; 4027 Type.Length = Count; 4028 KnownType[Name.lower()] = Type; 4029 } else if (addRealField(Name, Semantics, Size)) { 4030 return addErrorSuffix(" in '" + TypeName + "' directive"); 4031 } 4032 return false; 4033 } 4034 4035 bool MasmParser::parseOptionalAngleBracketOpen() { 4036 const AsmToken Tok = getTok(); 4037 if (parseOptionalToken(AsmToken::LessLess)) { 4038 AngleBracketDepth++; 4039 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1))); 4040 return true; 4041 } else if (parseOptionalToken(AsmToken::LessGreater)) { 4042 AngleBracketDepth++; 4043 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1))); 4044 return true; 4045 } else if (parseOptionalToken(AsmToken::Less)) { 4046 AngleBracketDepth++; 4047 return true; 4048 } 4049 4050 return false; 4051 } 4052 4053 bool MasmParser::parseAngleBracketClose(const Twine &Msg) { 4054 const AsmToken Tok = getTok(); 4055 if (parseOptionalToken(AsmToken::GreaterGreater)) { 4056 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1))); 4057 } else if (parseToken(AsmToken::Greater, Msg)) { 4058 return true; 4059 } 4060 AngleBracketDepth--; 4061 return false; 4062 } 4063 4064 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 4065 const IntFieldInfo &Contents, 4066 FieldInitializer &Initializer) { 4067 SMLoc Loc = getTok().getLoc(); 4068 4069 SmallVector<const MCExpr *, 1> Values; 4070 if (parseOptionalToken(AsmToken::LCurly)) { 4071 if (Field.LengthOf == 1 && Field.Type > 1) 4072 return Error(Loc, "Cannot initialize scalar field with array value"); 4073 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) || 4074 parseToken(AsmToken::RCurly)) 4075 return true; 4076 } else if (parseOptionalAngleBracketOpen()) { 4077 if (Field.LengthOf == 1 && Field.Type > 1) 4078 return Error(Loc, "Cannot initialize scalar field with array value"); 4079 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) || 4080 parseAngleBracketClose()) 4081 return true; 4082 } else if (Field.LengthOf > 1 && Field.Type > 1) { 4083 return Error(Loc, "Cannot initialize array field with scalar value"); 4084 } else if (parseScalarInitializer(Field.Type, Values, 4085 /*StringPadLength=*/Field.LengthOf)) { 4086 return true; 4087 } 4088 4089 if (Values.size() > Field.LengthOf) { 4090 return Error(Loc, "Initializer too long for field; expected at most " + 4091 std::to_string(Field.LengthOf) + " elements, got " + 4092 std::to_string(Values.size())); 4093 } 4094 // Default-initialize all remaining values. 4095 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end()); 4096 4097 Initializer = FieldInitializer(std::move(Values)); 4098 return false; 4099 } 4100 4101 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 4102 const RealFieldInfo &Contents, 4103 FieldInitializer &Initializer) { 4104 const fltSemantics *Semantics; 4105 switch (Field.Type) { 4106 case 4: 4107 Semantics = &APFloat::IEEEsingle(); 4108 break; 4109 case 8: 4110 Semantics = &APFloat::IEEEdouble(); 4111 break; 4112 case 10: 4113 Semantics = &APFloat::x87DoubleExtended(); 4114 break; 4115 default: 4116 llvm_unreachable("unknown real field type"); 4117 } 4118 4119 SMLoc Loc = getTok().getLoc(); 4120 4121 SmallVector<APInt, 1> AsIntValues; 4122 if (parseOptionalToken(AsmToken::LCurly)) { 4123 if (Field.LengthOf == 1) 4124 return Error(Loc, "Cannot initialize scalar field with array value"); 4125 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) || 4126 parseToken(AsmToken::RCurly)) 4127 return true; 4128 } else if (parseOptionalAngleBracketOpen()) { 4129 if (Field.LengthOf == 1) 4130 return Error(Loc, "Cannot initialize scalar field with array value"); 4131 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) || 4132 parseAngleBracketClose()) 4133 return true; 4134 } else if (Field.LengthOf > 1) { 4135 return Error(Loc, "Cannot initialize array field with scalar value"); 4136 } else { 4137 AsIntValues.emplace_back(); 4138 if (parseRealValue(*Semantics, AsIntValues.back())) 4139 return true; 4140 } 4141 4142 if (AsIntValues.size() > Field.LengthOf) { 4143 return Error(Loc, "Initializer too long for field; expected at most " + 4144 std::to_string(Field.LengthOf) + " elements, got " + 4145 std::to_string(AsIntValues.size())); 4146 } 4147 // Default-initialize all remaining values. 4148 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(), 4149 Contents.AsIntValues.end()); 4150 4151 Initializer = FieldInitializer(std::move(AsIntValues)); 4152 return false; 4153 } 4154 4155 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 4156 const StructFieldInfo &Contents, 4157 FieldInitializer &Initializer) { 4158 SMLoc Loc = getTok().getLoc(); 4159 4160 std::vector<StructInitializer> Initializers; 4161 if (Field.LengthOf > 1) { 4162 if (parseOptionalToken(AsmToken::LCurly)) { 4163 if (parseStructInstList(Contents.Structure, Initializers, 4164 AsmToken::RCurly) || 4165 parseToken(AsmToken::RCurly)) 4166 return true; 4167 } else if (parseOptionalAngleBracketOpen()) { 4168 if (parseStructInstList(Contents.Structure, Initializers, 4169 AsmToken::Greater) || 4170 parseAngleBracketClose()) 4171 return true; 4172 } else { 4173 return Error(Loc, "Cannot initialize array field with scalar value"); 4174 } 4175 } else { 4176 Initializers.emplace_back(); 4177 if (parseStructInitializer(Contents.Structure, Initializers.back())) 4178 return true; 4179 } 4180 4181 if (Initializers.size() > Field.LengthOf) { 4182 return Error(Loc, "Initializer too long for field; expected at most " + 4183 std::to_string(Field.LengthOf) + " elements, got " + 4184 std::to_string(Initializers.size())); 4185 } 4186 // Default-initialize all remaining values. 4187 Initializers.insert(Initializers.end(), 4188 Contents.Initializers.begin() + Initializers.size(), 4189 Contents.Initializers.end()); 4190 4191 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure); 4192 return false; 4193 } 4194 4195 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 4196 FieldInitializer &Initializer) { 4197 switch (Field.Contents.FT) { 4198 case FT_INTEGRAL: 4199 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer); 4200 case FT_REAL: 4201 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer); 4202 case FT_STRUCT: 4203 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer); 4204 } 4205 llvm_unreachable("Unhandled FieldType enum"); 4206 } 4207 4208 bool MasmParser::parseStructInitializer(const StructInfo &Structure, 4209 StructInitializer &Initializer) { 4210 const AsmToken FirstToken = getTok(); 4211 4212 std::optional<AsmToken::TokenKind> EndToken; 4213 if (parseOptionalToken(AsmToken::LCurly)) { 4214 EndToken = AsmToken::RCurly; 4215 } else if (parseOptionalAngleBracketOpen()) { 4216 EndToken = AsmToken::Greater; 4217 AngleBracketDepth++; 4218 } else if (FirstToken.is(AsmToken::Identifier) && 4219 FirstToken.getString() == "?") { 4220 // ? initializer; leave EndToken uninitialized to treat as empty. 4221 if (parseToken(AsmToken::Identifier)) 4222 return true; 4223 } else { 4224 return Error(FirstToken.getLoc(), "Expected struct initializer"); 4225 } 4226 4227 auto &FieldInitializers = Initializer.FieldInitializers; 4228 size_t FieldIndex = 0; 4229 if (EndToken) { 4230 // Initialize all fields with given initializers. 4231 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) { 4232 const FieldInfo &Field = Structure.Fields[FieldIndex++]; 4233 if (parseOptionalToken(AsmToken::Comma)) { 4234 // Empty initializer; use the default and continue. (Also, allow line 4235 // continuation.) 4236 FieldInitializers.push_back(Field.Contents); 4237 parseOptionalToken(AsmToken::EndOfStatement); 4238 continue; 4239 } 4240 FieldInitializers.emplace_back(Field.Contents.FT); 4241 if (parseFieldInitializer(Field, FieldInitializers.back())) 4242 return true; 4243 4244 // Continue if we see a comma. (Also, allow line continuation.) 4245 SMLoc CommaLoc = getTok().getLoc(); 4246 if (!parseOptionalToken(AsmToken::Comma)) 4247 break; 4248 if (FieldIndex == Structure.Fields.size()) 4249 return Error(CommaLoc, "'" + Structure.Name + 4250 "' initializer initializes too many fields"); 4251 parseOptionalToken(AsmToken::EndOfStatement); 4252 } 4253 } 4254 // Default-initialize all remaining fields. 4255 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex)) 4256 FieldInitializers.push_back(Field.Contents); 4257 4258 if (EndToken) { 4259 if (*EndToken == AsmToken::Greater) 4260 return parseAngleBracketClose(); 4261 4262 return parseToken(*EndToken); 4263 } 4264 4265 return false; 4266 } 4267 4268 bool MasmParser::parseStructInstList( 4269 const StructInfo &Structure, std::vector<StructInitializer> &Initializers, 4270 const AsmToken::TokenKind EndToken) { 4271 while (getTok().isNot(EndToken) || 4272 (EndToken == AsmToken::Greater && 4273 getTok().isNot(AsmToken::GreaterGreater))) { 4274 const AsmToken NextTok = peekTok(); 4275 if (NextTok.is(AsmToken::Identifier) && 4276 NextTok.getString().equals_insensitive("dup")) { 4277 const MCExpr *Value; 4278 if (parseExpression(Value) || parseToken(AsmToken::Identifier)) 4279 return true; 4280 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 4281 if (!MCE) 4282 return Error(Value->getLoc(), 4283 "cannot repeat value a non-constant number of times"); 4284 const int64_t Repetitions = MCE->getValue(); 4285 if (Repetitions < 0) 4286 return Error(Value->getLoc(), 4287 "cannot repeat value a negative number of times"); 4288 4289 std::vector<StructInitializer> DuplicatedValues; 4290 if (parseToken(AsmToken::LParen, 4291 "parentheses required for 'dup' contents") || 4292 parseStructInstList(Structure, DuplicatedValues) || parseRParen()) 4293 return true; 4294 4295 for (int i = 0; i < Repetitions; ++i) 4296 llvm::append_range(Initializers, DuplicatedValues); 4297 } else { 4298 Initializers.emplace_back(); 4299 if (parseStructInitializer(Structure, Initializers.back())) 4300 return true; 4301 } 4302 4303 // Continue if we see a comma. (Also, allow line continuation.) 4304 if (!parseOptionalToken(AsmToken::Comma)) 4305 break; 4306 parseOptionalToken(AsmToken::EndOfStatement); 4307 } 4308 4309 return false; 4310 } 4311 4312 bool MasmParser::emitFieldValue(const FieldInfo &Field, 4313 const IntFieldInfo &Contents) { 4314 // Default-initialize all values. 4315 for (const MCExpr *Value : Contents.Values) { 4316 if (emitIntValue(Value, Field.Type)) 4317 return true; 4318 } 4319 return false; 4320 } 4321 4322 bool MasmParser::emitFieldValue(const FieldInfo &Field, 4323 const RealFieldInfo &Contents) { 4324 for (const APInt &AsInt : Contents.AsIntValues) { 4325 getStreamer().emitIntValue(AsInt.getLimitedValue(), 4326 AsInt.getBitWidth() / 8); 4327 } 4328 return false; 4329 } 4330 4331 bool MasmParser::emitFieldValue(const FieldInfo &Field, 4332 const StructFieldInfo &Contents) { 4333 for (const auto &Initializer : Contents.Initializers) { 4334 size_t Index = 0, Offset = 0; 4335 for (const auto &SubField : Contents.Structure.Fields) { 4336 getStreamer().emitZeros(SubField.Offset - Offset); 4337 Offset = SubField.Offset + SubField.SizeOf; 4338 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]); 4339 } 4340 } 4341 return false; 4342 } 4343 4344 bool MasmParser::emitFieldValue(const FieldInfo &Field) { 4345 switch (Field.Contents.FT) { 4346 case FT_INTEGRAL: 4347 return emitFieldValue(Field, Field.Contents.IntInfo); 4348 case FT_REAL: 4349 return emitFieldValue(Field, Field.Contents.RealInfo); 4350 case FT_STRUCT: 4351 return emitFieldValue(Field, Field.Contents.StructInfo); 4352 } 4353 llvm_unreachable("Unhandled FieldType enum"); 4354 } 4355 4356 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 4357 const IntFieldInfo &Contents, 4358 const IntFieldInfo &Initializer) { 4359 for (const auto &Value : Initializer.Values) { 4360 if (emitIntValue(Value, Field.Type)) 4361 return true; 4362 } 4363 // Default-initialize all remaining values. 4364 for (const auto &Value : 4365 llvm::drop_begin(Contents.Values, Initializer.Values.size())) { 4366 if (emitIntValue(Value, Field.Type)) 4367 return true; 4368 } 4369 return false; 4370 } 4371 4372 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 4373 const RealFieldInfo &Contents, 4374 const RealFieldInfo &Initializer) { 4375 for (const auto &AsInt : Initializer.AsIntValues) { 4376 getStreamer().emitIntValue(AsInt.getLimitedValue(), 4377 AsInt.getBitWidth() / 8); 4378 } 4379 // Default-initialize all remaining values. 4380 for (const auto &AsInt : 4381 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) { 4382 getStreamer().emitIntValue(AsInt.getLimitedValue(), 4383 AsInt.getBitWidth() / 8); 4384 } 4385 return false; 4386 } 4387 4388 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 4389 const StructFieldInfo &Contents, 4390 const StructFieldInfo &Initializer) { 4391 for (const auto &Init : Initializer.Initializers) { 4392 if (emitStructInitializer(Contents.Structure, Init)) 4393 return true; 4394 } 4395 // Default-initialize all remaining values. 4396 for (const auto &Init : llvm::drop_begin(Contents.Initializers, 4397 Initializer.Initializers.size())) { 4398 if (emitStructInitializer(Contents.Structure, Init)) 4399 return true; 4400 } 4401 return false; 4402 } 4403 4404 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 4405 const FieldInitializer &Initializer) { 4406 switch (Field.Contents.FT) { 4407 case FT_INTEGRAL: 4408 return emitFieldInitializer(Field, Field.Contents.IntInfo, 4409 Initializer.IntInfo); 4410 case FT_REAL: 4411 return emitFieldInitializer(Field, Field.Contents.RealInfo, 4412 Initializer.RealInfo); 4413 case FT_STRUCT: 4414 return emitFieldInitializer(Field, Field.Contents.StructInfo, 4415 Initializer.StructInfo); 4416 } 4417 llvm_unreachable("Unhandled FieldType enum"); 4418 } 4419 4420 bool MasmParser::emitStructInitializer(const StructInfo &Structure, 4421 const StructInitializer &Initializer) { 4422 if (!Structure.Initializable) 4423 return Error(getLexer().getLoc(), 4424 "cannot initialize a value of type '" + Structure.Name + 4425 "'; 'org' was used in the type's declaration"); 4426 size_t Index = 0, Offset = 0; 4427 for (const auto &Init : Initializer.FieldInitializers) { 4428 const auto &Field = Structure.Fields[Index++]; 4429 getStreamer().emitZeros(Field.Offset - Offset); 4430 Offset = Field.Offset + Field.SizeOf; 4431 if (emitFieldInitializer(Field, Init)) 4432 return true; 4433 } 4434 // Default-initialize all remaining fields. 4435 for (const auto &Field : llvm::drop_begin( 4436 Structure.Fields, Initializer.FieldInitializers.size())) { 4437 getStreamer().emitZeros(Field.Offset - Offset); 4438 Offset = Field.Offset + Field.SizeOf; 4439 if (emitFieldValue(Field)) 4440 return true; 4441 } 4442 // Add final padding. 4443 if (Offset != Structure.Size) 4444 getStreamer().emitZeros(Structure.Size - Offset); 4445 return false; 4446 } 4447 4448 // Set data values from initializers. 4449 bool MasmParser::emitStructValues(const StructInfo &Structure, 4450 unsigned *Count) { 4451 std::vector<StructInitializer> Initializers; 4452 if (parseStructInstList(Structure, Initializers)) 4453 return true; 4454 4455 for (const auto &Initializer : Initializers) { 4456 if (emitStructInitializer(Structure, Initializer)) 4457 return true; 4458 } 4459 4460 if (Count) 4461 *Count = Initializers.size(); 4462 return false; 4463 } 4464 4465 // Declare a field in the current struct. 4466 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { 4467 StructInfo &OwningStruct = StructInProgress.back(); 4468 FieldInfo &Field = 4469 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize); 4470 StructFieldInfo &StructInfo = Field.Contents.StructInfo; 4471 4472 StructInfo.Structure = Structure; 4473 Field.Type = Structure.Size; 4474 4475 if (parseStructInstList(Structure, StructInfo.Initializers)) 4476 return true; 4477 4478 Field.LengthOf = StructInfo.Initializers.size(); 4479 Field.SizeOf = Field.Type * Field.LengthOf; 4480 4481 const unsigned FieldEnd = Field.Offset + Field.SizeOf; 4482 if (!OwningStruct.IsUnion) { 4483 OwningStruct.NextOffset = FieldEnd; 4484 } 4485 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd); 4486 4487 return false; 4488 } 4489 4490 /// parseDirectiveStructValue 4491 /// ::= struct-id (<struct-initializer> | {struct-initializer}) 4492 /// [, (<struct-initializer> | {struct-initializer})]* 4493 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure, 4494 StringRef Directive, SMLoc DirLoc) { 4495 if (StructInProgress.empty()) { 4496 if (emitStructValues(Structure)) 4497 return true; 4498 } else if (addStructField("", Structure)) { 4499 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4500 } 4501 4502 return false; 4503 } 4504 4505 /// parseDirectiveNamedValue 4506 /// ::= name (byte | word | ... ) [ expression (, expression)* ] 4507 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure, 4508 StringRef Directive, 4509 SMLoc DirLoc, StringRef Name) { 4510 if (StructInProgress.empty()) { 4511 // Initialize named data value. 4512 MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 4513 getStreamer().emitLabel(Sym); 4514 unsigned Count; 4515 if (emitStructValues(Structure, &Count)) 4516 return true; 4517 AsmTypeInfo Type; 4518 Type.Name = Structure.Name; 4519 Type.Size = Structure.Size * Count; 4520 Type.ElementSize = Structure.Size; 4521 Type.Length = Count; 4522 KnownType[Name.lower()] = Type; 4523 } else if (addStructField(Name, Structure)) { 4524 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4525 } 4526 4527 return false; 4528 } 4529 4530 /// parseDirectiveStruct 4531 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE] 4532 /// (dataDir | generalDir | offsetDir | nestedStruct)+ 4533 /// <name> ENDS 4534 ////// dataDir = data declaration 4535 ////// offsetDir = EVEN, ORG, ALIGN 4536 bool MasmParser::parseDirectiveStruct(StringRef Directive, 4537 DirectiveKind DirKind, StringRef Name, 4538 SMLoc NameLoc) { 4539 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS 4540 // anyway, so all field accesses must be qualified. 4541 AsmToken NextTok = getTok(); 4542 int64_t AlignmentValue = 1; 4543 if (NextTok.isNot(AsmToken::Comma) && 4544 NextTok.isNot(AsmToken::EndOfStatement) && 4545 parseAbsoluteExpression(AlignmentValue)) { 4546 return addErrorSuffix(" in alignment value for '" + Twine(Directive) + 4547 "' directive"); 4548 } 4549 if (!isPowerOf2_64(AlignmentValue)) { 4550 return Error(NextTok.getLoc(), "alignment must be a power of two; was " + 4551 std::to_string(AlignmentValue)); 4552 } 4553 4554 StringRef Qualifier; 4555 SMLoc QualifierLoc; 4556 if (parseOptionalToken(AsmToken::Comma)) { 4557 QualifierLoc = getTok().getLoc(); 4558 if (parseIdentifier(Qualifier)) 4559 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4560 if (!Qualifier.equals_insensitive("nonunique")) 4561 return Error(QualifierLoc, "Unrecognized qualifier for '" + 4562 Twine(Directive) + 4563 "' directive; expected none or NONUNIQUE"); 4564 } 4565 4566 if (parseToken(AsmToken::EndOfStatement)) 4567 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4568 4569 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue); 4570 return false; 4571 } 4572 4573 /// parseDirectiveNestedStruct 4574 /// ::= (STRUC | STRUCT | UNION) [name] 4575 /// (dataDir | generalDir | offsetDir | nestedStruct)+ 4576 /// ENDS 4577 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive, 4578 DirectiveKind DirKind) { 4579 if (StructInProgress.empty()) 4580 return TokError("missing name in top-level '" + Twine(Directive) + 4581 "' directive"); 4582 4583 StringRef Name; 4584 if (getTok().is(AsmToken::Identifier)) { 4585 Name = getTok().getIdentifier(); 4586 parseToken(AsmToken::Identifier); 4587 } 4588 if (parseToken(AsmToken::EndOfStatement)) 4589 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4590 4591 // Reserve space to ensure Alignment doesn't get invalidated when 4592 // StructInProgress grows. 4593 StructInProgress.reserve(StructInProgress.size() + 1); 4594 StructInProgress.emplace_back(Name, DirKind == DK_UNION, 4595 StructInProgress.back().Alignment); 4596 return false; 4597 } 4598 4599 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) { 4600 if (StructInProgress.empty()) 4601 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION"); 4602 if (StructInProgress.size() > 1) 4603 return Error(NameLoc, "unexpected name in nested ENDS directive"); 4604 if (StructInProgress.back().Name.compare_insensitive(Name)) 4605 return Error(NameLoc, "mismatched name in ENDS directive; expected '" + 4606 StructInProgress.back().Name + "'"); 4607 StructInfo Structure = StructInProgress.pop_back_val(); 4608 // Pad to make the structure's size divisible by the smaller of its alignment 4609 // and the size of its largest field. 4610 Structure.Size = llvm::alignTo( 4611 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize)); 4612 Structs[Name.lower()] = Structure; 4613 4614 if (parseToken(AsmToken::EndOfStatement)) 4615 return addErrorSuffix(" in ENDS directive"); 4616 4617 return false; 4618 } 4619 4620 bool MasmParser::parseDirectiveNestedEnds() { 4621 if (StructInProgress.empty()) 4622 return TokError("ENDS directive without matching STRUC/STRUCT/UNION"); 4623 if (StructInProgress.size() == 1) 4624 return TokError("missing name in top-level ENDS directive"); 4625 4626 if (parseToken(AsmToken::EndOfStatement)) 4627 return addErrorSuffix(" in nested ENDS directive"); 4628 4629 StructInfo Structure = StructInProgress.pop_back_val(); 4630 // Pad to make the structure's size divisible by its alignment. 4631 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment); 4632 4633 StructInfo &ParentStruct = StructInProgress.back(); 4634 if (Structure.Name.empty()) { 4635 // Anonymous substructures' fields are addressed as if they belong to the 4636 // parent structure - so we transfer them to the parent here. 4637 const size_t OldFields = ParentStruct.Fields.size(); 4638 ParentStruct.Fields.insert( 4639 ParentStruct.Fields.end(), 4640 std::make_move_iterator(Structure.Fields.begin()), 4641 std::make_move_iterator(Structure.Fields.end())); 4642 for (const auto &FieldByName : Structure.FieldsByName) { 4643 ParentStruct.FieldsByName[FieldByName.getKey()] = 4644 FieldByName.getValue() + OldFields; 4645 } 4646 4647 unsigned FirstFieldOffset = 0; 4648 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) { 4649 FirstFieldOffset = llvm::alignTo( 4650 ParentStruct.NextOffset, 4651 std::min(ParentStruct.Alignment, Structure.AlignmentSize)); 4652 } 4653 4654 if (ParentStruct.IsUnion) { 4655 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size); 4656 } else { 4657 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields)) 4658 Field.Offset += FirstFieldOffset; 4659 4660 const unsigned StructureEnd = FirstFieldOffset + Structure.Size; 4661 if (!ParentStruct.IsUnion) { 4662 ParentStruct.NextOffset = StructureEnd; 4663 } 4664 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd); 4665 } 4666 } else { 4667 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT, 4668 Structure.AlignmentSize); 4669 StructFieldInfo &StructInfo = Field.Contents.StructInfo; 4670 Field.Type = Structure.Size; 4671 Field.LengthOf = 1; 4672 Field.SizeOf = Structure.Size; 4673 4674 const unsigned StructureEnd = Field.Offset + Field.SizeOf; 4675 if (!ParentStruct.IsUnion) { 4676 ParentStruct.NextOffset = StructureEnd; 4677 } 4678 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd); 4679 4680 StructInfo.Structure = Structure; 4681 StructInfo.Initializers.emplace_back(); 4682 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers; 4683 for (const auto &SubField : Structure.Fields) { 4684 FieldInitializers.push_back(SubField.Contents); 4685 } 4686 } 4687 4688 return false; 4689 } 4690 4691 /// parseDirectiveOrg 4692 /// ::= org expression 4693 bool MasmParser::parseDirectiveOrg() { 4694 const MCExpr *Offset; 4695 SMLoc OffsetLoc = Lexer.getLoc(); 4696 if (checkForValidSection() || parseExpression(Offset)) 4697 return true; 4698 if (parseToken(AsmToken::EndOfStatement)) 4699 return addErrorSuffix(" in 'org' directive"); 4700 4701 if (StructInProgress.empty()) { 4702 // Not in a struct; change the offset for the next instruction or data 4703 if (checkForValidSection()) 4704 return addErrorSuffix(" in 'org' directive"); 4705 4706 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc); 4707 } else { 4708 // Offset the next field of this struct 4709 StructInfo &Structure = StructInProgress.back(); 4710 int64_t OffsetRes; 4711 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr())) 4712 return Error(OffsetLoc, 4713 "expected absolute expression in 'org' directive"); 4714 if (OffsetRes < 0) 4715 return Error( 4716 OffsetLoc, 4717 "expected non-negative value in struct's 'org' directive; was " + 4718 std::to_string(OffsetRes)); 4719 Structure.NextOffset = static_cast<unsigned>(OffsetRes); 4720 4721 // ORG-affected structures cannot be initialized 4722 Structure.Initializable = false; 4723 } 4724 4725 return false; 4726 } 4727 4728 bool MasmParser::emitAlignTo(int64_t Alignment) { 4729 if (StructInProgress.empty()) { 4730 // Not in a struct; align the next instruction or data 4731 if (checkForValidSection()) 4732 return true; 4733 4734 // Check whether we should use optimal code alignment for this align 4735 // directive. 4736 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 4737 assert(Section && "must have section to emit alignment"); 4738 if (Section->useCodeAlign()) { 4739 getStreamer().emitCodeAlignment(Align(Alignment), 4740 &getTargetParser().getSTI(), 4741 /*MaxBytesToEmit=*/0); 4742 } else { 4743 // FIXME: Target specific behavior about how the "extra" bytes are filled. 4744 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0, 4745 /*ValueSize=*/1, 4746 /*MaxBytesToEmit=*/0); 4747 } 4748 } else { 4749 // Align the next field of this struct 4750 StructInfo &Structure = StructInProgress.back(); 4751 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment); 4752 } 4753 4754 return false; 4755 } 4756 4757 /// parseDirectiveAlign 4758 /// ::= align expression 4759 bool MasmParser::parseDirectiveAlign() { 4760 SMLoc AlignmentLoc = getLexer().getLoc(); 4761 int64_t Alignment; 4762 4763 // Ignore empty 'align' directives. 4764 if (getTok().is(AsmToken::EndOfStatement)) { 4765 return Warning(AlignmentLoc, 4766 "align directive with no operand is ignored") && 4767 parseToken(AsmToken::EndOfStatement); 4768 } 4769 if (parseAbsoluteExpression(Alignment) || 4770 parseToken(AsmToken::EndOfStatement)) 4771 return addErrorSuffix(" in align directive"); 4772 4773 // Always emit an alignment here even if we throw an error. 4774 bool ReturnVal = false; 4775 4776 // Reject alignments that aren't either a power of two or zero, for ML.exe 4777 // compatibility. Alignment of zero is silently rounded up to one. 4778 if (Alignment == 0) 4779 Alignment = 1; 4780 if (!isPowerOf2_64(Alignment)) 4781 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " + 4782 std::to_string(Alignment)); 4783 4784 if (emitAlignTo(Alignment)) 4785 ReturnVal |= addErrorSuffix(" in align directive"); 4786 4787 return ReturnVal; 4788 } 4789 4790 /// parseDirectiveEven 4791 /// ::= even 4792 bool MasmParser::parseDirectiveEven() { 4793 if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2)) 4794 return addErrorSuffix(" in even directive"); 4795 4796 return false; 4797 } 4798 4799 /// parseDirectiveFile 4800 /// ::= .file filename 4801 /// ::= .file number [directory] filename [md5 checksum] [source source-text] 4802 bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) { 4803 // FIXME: I'm not sure what this is. 4804 int64_t FileNumber = -1; 4805 if (getLexer().is(AsmToken::Integer)) { 4806 FileNumber = getTok().getIntVal(); 4807 Lex(); 4808 4809 if (FileNumber < 0) 4810 return TokError("negative file number"); 4811 } 4812 4813 std::string Path; 4814 4815 // Usually the directory and filename together, otherwise just the directory. 4816 // Allow the strings to have escaped octal character sequence. 4817 if (check(getTok().isNot(AsmToken::String), 4818 "unexpected token in '.file' directive") || 4819 parseEscapedString(Path)) 4820 return true; 4821 4822 StringRef Directory; 4823 StringRef Filename; 4824 std::string FilenameData; 4825 if (getLexer().is(AsmToken::String)) { 4826 if (check(FileNumber == -1, 4827 "explicit path specified, but no file number") || 4828 parseEscapedString(FilenameData)) 4829 return true; 4830 Filename = FilenameData; 4831 Directory = Path; 4832 } else { 4833 Filename = Path; 4834 } 4835 4836 uint64_t MD5Hi, MD5Lo; 4837 bool HasMD5 = false; 4838 4839 std::optional<StringRef> Source; 4840 bool HasSource = false; 4841 std::string SourceString; 4842 4843 while (!parseOptionalToken(AsmToken::EndOfStatement)) { 4844 StringRef Keyword; 4845 if (check(getTok().isNot(AsmToken::Identifier), 4846 "unexpected token in '.file' directive") || 4847 parseIdentifier(Keyword)) 4848 return true; 4849 if (Keyword == "md5") { 4850 HasMD5 = true; 4851 if (check(FileNumber == -1, 4852 "MD5 checksum specified, but no file number") || 4853 parseHexOcta(*this, MD5Hi, MD5Lo)) 4854 return true; 4855 } else if (Keyword == "source") { 4856 HasSource = true; 4857 if (check(FileNumber == -1, 4858 "source specified, but no file number") || 4859 check(getTok().isNot(AsmToken::String), 4860 "unexpected token in '.file' directive") || 4861 parseEscapedString(SourceString)) 4862 return true; 4863 } else { 4864 return TokError("unexpected token in '.file' directive"); 4865 } 4866 } 4867 4868 if (FileNumber == -1) { 4869 // Ignore the directive if there is no number and the target doesn't support 4870 // numberless .file directives. This allows some portability of assembler 4871 // between different object file formats. 4872 if (getContext().getAsmInfo()->hasSingleParameterDotFile()) 4873 getStreamer().emitFileDirective(Filename); 4874 } else { 4875 // In case there is a -g option as well as debug info from directive .file, 4876 // we turn off the -g option, directly use the existing debug info instead. 4877 // Throw away any implicit file table for the assembler source. 4878 if (Ctx.getGenDwarfForAssembly()) { 4879 Ctx.getMCDwarfLineTable(0).resetFileTable(); 4880 Ctx.setGenDwarfForAssembly(false); 4881 } 4882 4883 std::optional<MD5::MD5Result> CKMem; 4884 if (HasMD5) { 4885 MD5::MD5Result Sum; 4886 for (unsigned i = 0; i != 8; ++i) { 4887 Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8)); 4888 Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8)); 4889 } 4890 CKMem = Sum; 4891 } 4892 if (HasSource) { 4893 char *SourceBuf = static_cast<char *>(Ctx.allocate(SourceString.size())); 4894 memcpy(SourceBuf, SourceString.data(), SourceString.size()); 4895 Source = StringRef(SourceBuf, SourceString.size()); 4896 } 4897 if (FileNumber == 0) { 4898 if (Ctx.getDwarfVersion() < 5) 4899 return Warning(DirectiveLoc, "file 0 not supported prior to DWARF-5"); 4900 getStreamer().emitDwarfFile0Directive(Directory, Filename, CKMem, Source); 4901 } else { 4902 Expected<unsigned> FileNumOrErr = getStreamer().tryEmitDwarfFileDirective( 4903 FileNumber, Directory, Filename, CKMem, Source); 4904 if (!FileNumOrErr) 4905 return Error(DirectiveLoc, toString(FileNumOrErr.takeError())); 4906 } 4907 // Alert the user if there are some .file directives with MD5 and some not. 4908 // But only do that once. 4909 if (!ReportedInconsistentMD5 && !Ctx.isDwarfMD5UsageConsistent(0)) { 4910 ReportedInconsistentMD5 = true; 4911 return Warning(DirectiveLoc, "inconsistent use of MD5 checksums"); 4912 } 4913 } 4914 4915 return false; 4916 } 4917 4918 /// parseDirectiveLine 4919 /// ::= .line [number] 4920 bool MasmParser::parseDirectiveLine() { 4921 int64_t LineNumber; 4922 if (getLexer().is(AsmToken::Integer)) { 4923 if (parseIntToken(LineNumber, "unexpected token in '.line' directive")) 4924 return true; 4925 (void)LineNumber; 4926 // FIXME: Do something with the .line. 4927 } 4928 if (parseEOL()) 4929 return true; 4930 4931 return false; 4932 } 4933 4934 /// parseDirectiveLoc 4935 /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end] 4936 /// [epilogue_begin] [is_stmt VALUE] [isa VALUE] 4937 /// The first number is a file number, must have been previously assigned with 4938 /// a .file directive, the second number is the line number and optionally the 4939 /// third number is a column position (zero if not specified). The remaining 4940 /// optional items are .loc sub-directives. 4941 bool MasmParser::parseDirectiveLoc() { 4942 int64_t FileNumber = 0, LineNumber = 0; 4943 SMLoc Loc = getTok().getLoc(); 4944 if (parseIntToken(FileNumber, "unexpected token in '.loc' directive") || 4945 check(FileNumber < 1 && Ctx.getDwarfVersion() < 5, Loc, 4946 "file number less than one in '.loc' directive") || 4947 check(!getContext().isValidDwarfFileNumber(FileNumber), Loc, 4948 "unassigned file number in '.loc' directive")) 4949 return true; 4950 4951 // optional 4952 if (getLexer().is(AsmToken::Integer)) { 4953 LineNumber = getTok().getIntVal(); 4954 if (LineNumber < 0) 4955 return TokError("line number less than zero in '.loc' directive"); 4956 Lex(); 4957 } 4958 4959 int64_t ColumnPos = 0; 4960 if (getLexer().is(AsmToken::Integer)) { 4961 ColumnPos = getTok().getIntVal(); 4962 if (ColumnPos < 0) 4963 return TokError("column position less than zero in '.loc' directive"); 4964 Lex(); 4965 } 4966 4967 auto PrevFlags = getContext().getCurrentDwarfLoc().getFlags(); 4968 unsigned Flags = PrevFlags & DWARF2_FLAG_IS_STMT; 4969 unsigned Isa = 0; 4970 int64_t Discriminator = 0; 4971 4972 auto parseLocOp = [&]() -> bool { 4973 StringRef Name; 4974 SMLoc Loc = getTok().getLoc(); 4975 if (parseIdentifier(Name)) 4976 return TokError("unexpected token in '.loc' directive"); 4977 4978 if (Name == "basic_block") 4979 Flags |= DWARF2_FLAG_BASIC_BLOCK; 4980 else if (Name == "prologue_end") 4981 Flags |= DWARF2_FLAG_PROLOGUE_END; 4982 else if (Name == "epilogue_begin") 4983 Flags |= DWARF2_FLAG_EPILOGUE_BEGIN; 4984 else if (Name == "is_stmt") { 4985 Loc = getTok().getLoc(); 4986 const MCExpr *Value; 4987 if (parseExpression(Value)) 4988 return true; 4989 // The expression must be the constant 0 or 1. 4990 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { 4991 int Value = MCE->getValue(); 4992 if (Value == 0) 4993 Flags &= ~DWARF2_FLAG_IS_STMT; 4994 else if (Value == 1) 4995 Flags |= DWARF2_FLAG_IS_STMT; 4996 else 4997 return Error(Loc, "is_stmt value not 0 or 1"); 4998 } else { 4999 return Error(Loc, "is_stmt value not the constant value of 0 or 1"); 5000 } 5001 } else if (Name == "isa") { 5002 Loc = getTok().getLoc(); 5003 const MCExpr *Value; 5004 if (parseExpression(Value)) 5005 return true; 5006 // The expression must be a constant greater or equal to 0. 5007 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { 5008 int Value = MCE->getValue(); 5009 if (Value < 0) 5010 return Error(Loc, "isa number less than zero"); 5011 Isa = Value; 5012 } else { 5013 return Error(Loc, "isa number not a constant value"); 5014 } 5015 } else if (Name == "discriminator") { 5016 if (parseAbsoluteExpression(Discriminator)) 5017 return true; 5018 } else { 5019 return Error(Loc, "unknown sub-directive in '.loc' directive"); 5020 } 5021 return false; 5022 }; 5023 5024 if (parseMany(parseLocOp, false /*hasComma*/)) 5025 return true; 5026 5027 getStreamer().emitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags, 5028 Isa, Discriminator, StringRef()); 5029 5030 return false; 5031 } 5032 5033 /// parseDirectiveStabs 5034 /// ::= .stabs string, number, number, number 5035 bool MasmParser::parseDirectiveStabs() { 5036 return TokError("unsupported directive '.stabs'"); 5037 } 5038 5039 /// parseDirectiveCVFile 5040 /// ::= .cv_file number filename [checksum] [checksumkind] 5041 bool MasmParser::parseDirectiveCVFile() { 5042 SMLoc FileNumberLoc = getTok().getLoc(); 5043 int64_t FileNumber; 5044 std::string Filename; 5045 std::string Checksum; 5046 int64_t ChecksumKind = 0; 5047 5048 if (parseIntToken(FileNumber, 5049 "expected file number in '.cv_file' directive") || 5050 check(FileNumber < 1, FileNumberLoc, "file number less than one") || 5051 check(getTok().isNot(AsmToken::String), 5052 "unexpected token in '.cv_file' directive") || 5053 parseEscapedString(Filename)) 5054 return true; 5055 if (!parseOptionalToken(AsmToken::EndOfStatement)) { 5056 if (check(getTok().isNot(AsmToken::String), 5057 "unexpected token in '.cv_file' directive") || 5058 parseEscapedString(Checksum) || 5059 parseIntToken(ChecksumKind, 5060 "expected checksum kind in '.cv_file' directive") || 5061 parseEOL()) 5062 return true; 5063 } 5064 5065 Checksum = fromHex(Checksum); 5066 void *CKMem = Ctx.allocate(Checksum.size(), 1); 5067 memcpy(CKMem, Checksum.data(), Checksum.size()); 5068 ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem), 5069 Checksum.size()); 5070 5071 if (!getStreamer().emitCVFileDirective(FileNumber, Filename, ChecksumAsBytes, 5072 static_cast<uint8_t>(ChecksumKind))) 5073 return Error(FileNumberLoc, "file number already allocated"); 5074 5075 return false; 5076 } 5077 5078 bool MasmParser::parseCVFunctionId(int64_t &FunctionId, 5079 StringRef DirectiveName) { 5080 SMLoc Loc; 5081 return parseTokenLoc(Loc) || 5082 parseIntToken(FunctionId, "expected function id in '" + DirectiveName + 5083 "' directive") || 5084 check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc, 5085 "expected function id within range [0, UINT_MAX)"); 5086 } 5087 5088 bool MasmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) { 5089 SMLoc Loc; 5090 return parseTokenLoc(Loc) || 5091 parseIntToken(FileNumber, "expected integer in '" + DirectiveName + 5092 "' directive") || 5093 check(FileNumber < 1, Loc, "file number less than one in '" + 5094 DirectiveName + "' directive") || 5095 check(!getCVContext().isValidFileNumber(FileNumber), Loc, 5096 "unassigned file number in '" + DirectiveName + "' directive"); 5097 } 5098 5099 /// parseDirectiveCVFuncId 5100 /// ::= .cv_func_id FunctionId 5101 /// 5102 /// Introduces a function ID that can be used with .cv_loc. 5103 bool MasmParser::parseDirectiveCVFuncId() { 5104 SMLoc FunctionIdLoc = getTok().getLoc(); 5105 int64_t FunctionId; 5106 5107 if (parseCVFunctionId(FunctionId, ".cv_func_id") || parseEOL()) 5108 return true; 5109 5110 if (!getStreamer().emitCVFuncIdDirective(FunctionId)) 5111 return Error(FunctionIdLoc, "function id already allocated"); 5112 5113 return false; 5114 } 5115 5116 /// parseDirectiveCVInlineSiteId 5117 /// ::= .cv_inline_site_id FunctionId 5118 /// "within" IAFunc 5119 /// "inlined_at" IAFile IALine [IACol] 5120 /// 5121 /// Introduces a function ID that can be used with .cv_loc. Includes "inlined 5122 /// at" source location information for use in the line table of the caller, 5123 /// whether the caller is a real function or another inlined call site. 5124 bool MasmParser::parseDirectiveCVInlineSiteId() { 5125 SMLoc FunctionIdLoc = getTok().getLoc(); 5126 int64_t FunctionId; 5127 int64_t IAFunc; 5128 int64_t IAFile; 5129 int64_t IALine; 5130 int64_t IACol = 0; 5131 5132 // FunctionId 5133 if (parseCVFunctionId(FunctionId, ".cv_inline_site_id")) 5134 return true; 5135 5136 // "within" 5137 if (check((getLexer().isNot(AsmToken::Identifier) || 5138 getTok().getIdentifier() != "within"), 5139 "expected 'within' identifier in '.cv_inline_site_id' directive")) 5140 return true; 5141 Lex(); 5142 5143 // IAFunc 5144 if (parseCVFunctionId(IAFunc, ".cv_inline_site_id")) 5145 return true; 5146 5147 // "inlined_at" 5148 if (check((getLexer().isNot(AsmToken::Identifier) || 5149 getTok().getIdentifier() != "inlined_at"), 5150 "expected 'inlined_at' identifier in '.cv_inline_site_id' " 5151 "directive") ) 5152 return true; 5153 Lex(); 5154 5155 // IAFile IALine 5156 if (parseCVFileId(IAFile, ".cv_inline_site_id") || 5157 parseIntToken(IALine, "expected line number after 'inlined_at'")) 5158 return true; 5159 5160 // [IACol] 5161 if (getLexer().is(AsmToken::Integer)) { 5162 IACol = getTok().getIntVal(); 5163 Lex(); 5164 } 5165 5166 if (parseEOL()) 5167 return true; 5168 5169 if (!getStreamer().emitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile, 5170 IALine, IACol, FunctionIdLoc)) 5171 return Error(FunctionIdLoc, "function id already allocated"); 5172 5173 return false; 5174 } 5175 5176 /// parseDirectiveCVLoc 5177 /// ::= .cv_loc FunctionId FileNumber [LineNumber] [ColumnPos] [prologue_end] 5178 /// [is_stmt VALUE] 5179 /// The first number is a file number, must have been previously assigned with 5180 /// a .file directive, the second number is the line number and optionally the 5181 /// third number is a column position (zero if not specified). The remaining 5182 /// optional items are .loc sub-directives. 5183 bool MasmParser::parseDirectiveCVLoc() { 5184 SMLoc DirectiveLoc = getTok().getLoc(); 5185 int64_t FunctionId, FileNumber; 5186 if (parseCVFunctionId(FunctionId, ".cv_loc") || 5187 parseCVFileId(FileNumber, ".cv_loc")) 5188 return true; 5189 5190 int64_t LineNumber = 0; 5191 if (getLexer().is(AsmToken::Integer)) { 5192 LineNumber = getTok().getIntVal(); 5193 if (LineNumber < 0) 5194 return TokError("line number less than zero in '.cv_loc' directive"); 5195 Lex(); 5196 } 5197 5198 int64_t ColumnPos = 0; 5199 if (getLexer().is(AsmToken::Integer)) { 5200 ColumnPos = getTok().getIntVal(); 5201 if (ColumnPos < 0) 5202 return TokError("column position less than zero in '.cv_loc' directive"); 5203 Lex(); 5204 } 5205 5206 bool PrologueEnd = false; 5207 uint64_t IsStmt = 0; 5208 5209 auto parseOp = [&]() -> bool { 5210 StringRef Name; 5211 SMLoc Loc = getTok().getLoc(); 5212 if (parseIdentifier(Name)) 5213 return TokError("unexpected token in '.cv_loc' directive"); 5214 if (Name == "prologue_end") 5215 PrologueEnd = true; 5216 else if (Name == "is_stmt") { 5217 Loc = getTok().getLoc(); 5218 const MCExpr *Value; 5219 if (parseExpression(Value)) 5220 return true; 5221 // The expression must be the constant 0 or 1. 5222 IsStmt = ~0ULL; 5223 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) 5224 IsStmt = MCE->getValue(); 5225 5226 if (IsStmt > 1) 5227 return Error(Loc, "is_stmt value not 0 or 1"); 5228 } else { 5229 return Error(Loc, "unknown sub-directive in '.cv_loc' directive"); 5230 } 5231 return false; 5232 }; 5233 5234 if (parseMany(parseOp, false /*hasComma*/)) 5235 return true; 5236 5237 getStreamer().emitCVLocDirective(FunctionId, FileNumber, LineNumber, 5238 ColumnPos, PrologueEnd, IsStmt, StringRef(), 5239 DirectiveLoc); 5240 return false; 5241 } 5242 5243 /// parseDirectiveCVLinetable 5244 /// ::= .cv_linetable FunctionId, FnStart, FnEnd 5245 bool MasmParser::parseDirectiveCVLinetable() { 5246 int64_t FunctionId; 5247 StringRef FnStartName, FnEndName; 5248 SMLoc Loc = getTok().getLoc(); 5249 if (parseCVFunctionId(FunctionId, ".cv_linetable") || 5250 parseToken(AsmToken::Comma, 5251 "unexpected token in '.cv_linetable' directive") || 5252 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc, 5253 "expected identifier in directive") || 5254 parseToken(AsmToken::Comma, 5255 "unexpected token in '.cv_linetable' directive") || 5256 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc, 5257 "expected identifier in directive")) 5258 return true; 5259 5260 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); 5261 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName); 5262 5263 getStreamer().emitCVLinetableDirective(FunctionId, FnStartSym, FnEndSym); 5264 return false; 5265 } 5266 5267 /// parseDirectiveCVInlineLinetable 5268 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd 5269 bool MasmParser::parseDirectiveCVInlineLinetable() { 5270 int64_t PrimaryFunctionId, SourceFileId, SourceLineNum; 5271 StringRef FnStartName, FnEndName; 5272 SMLoc Loc = getTok().getLoc(); 5273 if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") || 5274 parseTokenLoc(Loc) || 5275 parseIntToken( 5276 SourceFileId, 5277 "expected SourceField in '.cv_inline_linetable' directive") || 5278 check(SourceFileId <= 0, Loc, 5279 "File id less than zero in '.cv_inline_linetable' directive") || 5280 parseTokenLoc(Loc) || 5281 parseIntToken( 5282 SourceLineNum, 5283 "expected SourceLineNum in '.cv_inline_linetable' directive") || 5284 check(SourceLineNum < 0, Loc, 5285 "Line number less than zero in '.cv_inline_linetable' directive") || 5286 parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc, 5287 "expected identifier in directive") || 5288 parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc, 5289 "expected identifier in directive")) 5290 return true; 5291 5292 if (parseEOL()) 5293 return true; 5294 5295 MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName); 5296 MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName); 5297 getStreamer().emitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId, 5298 SourceLineNum, FnStartSym, 5299 FnEndSym); 5300 return false; 5301 } 5302 5303 void MasmParser::initializeCVDefRangeTypeMap() { 5304 CVDefRangeTypeMap["reg"] = CVDR_DEFRANGE_REGISTER; 5305 CVDefRangeTypeMap["frame_ptr_rel"] = CVDR_DEFRANGE_FRAMEPOINTER_REL; 5306 CVDefRangeTypeMap["subfield_reg"] = CVDR_DEFRANGE_SUBFIELD_REGISTER; 5307 CVDefRangeTypeMap["reg_rel"] = CVDR_DEFRANGE_REGISTER_REL; 5308 } 5309 5310 /// parseDirectiveCVDefRange 5311 /// ::= .cv_def_range RangeStart RangeEnd (GapStart GapEnd)*, bytes* 5312 bool MasmParser::parseDirectiveCVDefRange() { 5313 SMLoc Loc; 5314 std::vector<std::pair<const MCSymbol *, const MCSymbol *>> Ranges; 5315 while (getLexer().is(AsmToken::Identifier)) { 5316 Loc = getLexer().getLoc(); 5317 StringRef GapStartName; 5318 if (parseIdentifier(GapStartName)) 5319 return Error(Loc, "expected identifier in directive"); 5320 MCSymbol *GapStartSym = getContext().getOrCreateSymbol(GapStartName); 5321 5322 Loc = getLexer().getLoc(); 5323 StringRef GapEndName; 5324 if (parseIdentifier(GapEndName)) 5325 return Error(Loc, "expected identifier in directive"); 5326 MCSymbol *GapEndSym = getContext().getOrCreateSymbol(GapEndName); 5327 5328 Ranges.push_back({GapStartSym, GapEndSym}); 5329 } 5330 5331 StringRef CVDefRangeTypeStr; 5332 if (parseToken( 5333 AsmToken::Comma, 5334 "expected comma before def_range type in .cv_def_range directive") || 5335 parseIdentifier(CVDefRangeTypeStr)) 5336 return Error(Loc, "expected def_range type in directive"); 5337 5338 StringMap<CVDefRangeType>::const_iterator CVTypeIt = 5339 CVDefRangeTypeMap.find(CVDefRangeTypeStr); 5340 CVDefRangeType CVDRType = (CVTypeIt == CVDefRangeTypeMap.end()) 5341 ? CVDR_DEFRANGE 5342 : CVTypeIt->getValue(); 5343 switch (CVDRType) { 5344 case CVDR_DEFRANGE_REGISTER: { 5345 int64_t DRRegister; 5346 if (parseToken(AsmToken::Comma, "expected comma before register number in " 5347 ".cv_def_range directive") || 5348 parseAbsoluteExpression(DRRegister)) 5349 return Error(Loc, "expected register number"); 5350 5351 codeview::DefRangeRegisterHeader DRHdr; 5352 DRHdr.Register = DRRegister; 5353 DRHdr.MayHaveNoName = 0; 5354 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr); 5355 break; 5356 } 5357 case CVDR_DEFRANGE_FRAMEPOINTER_REL: { 5358 int64_t DROffset; 5359 if (parseToken(AsmToken::Comma, 5360 "expected comma before offset in .cv_def_range directive") || 5361 parseAbsoluteExpression(DROffset)) 5362 return Error(Loc, "expected offset value"); 5363 5364 codeview::DefRangeFramePointerRelHeader DRHdr; 5365 DRHdr.Offset = DROffset; 5366 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr); 5367 break; 5368 } 5369 case CVDR_DEFRANGE_SUBFIELD_REGISTER: { 5370 int64_t DRRegister; 5371 int64_t DROffsetInParent; 5372 if (parseToken(AsmToken::Comma, "expected comma before register number in " 5373 ".cv_def_range directive") || 5374 parseAbsoluteExpression(DRRegister)) 5375 return Error(Loc, "expected register number"); 5376 if (parseToken(AsmToken::Comma, 5377 "expected comma before offset in .cv_def_range directive") || 5378 parseAbsoluteExpression(DROffsetInParent)) 5379 return Error(Loc, "expected offset value"); 5380 5381 codeview::DefRangeSubfieldRegisterHeader DRHdr; 5382 DRHdr.Register = DRRegister; 5383 DRHdr.MayHaveNoName = 0; 5384 DRHdr.OffsetInParent = DROffsetInParent; 5385 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr); 5386 break; 5387 } 5388 case CVDR_DEFRANGE_REGISTER_REL: { 5389 int64_t DRRegister; 5390 int64_t DRFlags; 5391 int64_t DRBasePointerOffset; 5392 if (parseToken(AsmToken::Comma, "expected comma before register number in " 5393 ".cv_def_range directive") || 5394 parseAbsoluteExpression(DRRegister)) 5395 return Error(Loc, "expected register value"); 5396 if (parseToken( 5397 AsmToken::Comma, 5398 "expected comma before flag value in .cv_def_range directive") || 5399 parseAbsoluteExpression(DRFlags)) 5400 return Error(Loc, "expected flag value"); 5401 if (parseToken(AsmToken::Comma, "expected comma before base pointer offset " 5402 "in .cv_def_range directive") || 5403 parseAbsoluteExpression(DRBasePointerOffset)) 5404 return Error(Loc, "expected base pointer offset value"); 5405 5406 codeview::DefRangeRegisterRelHeader DRHdr; 5407 DRHdr.Register = DRRegister; 5408 DRHdr.Flags = DRFlags; 5409 DRHdr.BasePointerOffset = DRBasePointerOffset; 5410 getStreamer().emitCVDefRangeDirective(Ranges, DRHdr); 5411 break; 5412 } 5413 default: 5414 return Error(Loc, "unexpected def_range type in .cv_def_range directive"); 5415 } 5416 return true; 5417 } 5418 5419 /// parseDirectiveCVString 5420 /// ::= .cv_stringtable "string" 5421 bool MasmParser::parseDirectiveCVString() { 5422 std::string Data; 5423 if (checkForValidSection() || parseEscapedString(Data)) 5424 return addErrorSuffix(" in '.cv_string' directive"); 5425 5426 // Put the string in the table and emit the offset. 5427 std::pair<StringRef, unsigned> Insertion = 5428 getCVContext().addToStringTable(Data); 5429 getStreamer().emitIntValue(Insertion.second, 4); 5430 return false; 5431 } 5432 5433 /// parseDirectiveCVStringTable 5434 /// ::= .cv_stringtable 5435 bool MasmParser::parseDirectiveCVStringTable() { 5436 getStreamer().emitCVStringTableDirective(); 5437 return false; 5438 } 5439 5440 /// parseDirectiveCVFileChecksums 5441 /// ::= .cv_filechecksums 5442 bool MasmParser::parseDirectiveCVFileChecksums() { 5443 getStreamer().emitCVFileChecksumsDirective(); 5444 return false; 5445 } 5446 5447 /// parseDirectiveCVFileChecksumOffset 5448 /// ::= .cv_filechecksumoffset fileno 5449 bool MasmParser::parseDirectiveCVFileChecksumOffset() { 5450 int64_t FileNo; 5451 if (parseIntToken(FileNo, "expected identifier in directive")) 5452 return true; 5453 if (parseEOL()) 5454 return true; 5455 getStreamer().emitCVFileChecksumOffsetDirective(FileNo); 5456 return false; 5457 } 5458 5459 /// parseDirectiveCVFPOData 5460 /// ::= .cv_fpo_data procsym 5461 bool MasmParser::parseDirectiveCVFPOData() { 5462 SMLoc DirLoc = getLexer().getLoc(); 5463 StringRef ProcName; 5464 if (parseIdentifier(ProcName)) 5465 return TokError("expected symbol name"); 5466 if (parseEOL("unexpected tokens")) 5467 return addErrorSuffix(" in '.cv_fpo_data' directive"); 5468 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 5469 getStreamer().emitCVFPOData(ProcSym, DirLoc); 5470 return false; 5471 } 5472 5473 /// parseDirectiveCFISections 5474 /// ::= .cfi_sections section [, section] 5475 bool MasmParser::parseDirectiveCFISections() { 5476 StringRef Name; 5477 bool EH = false; 5478 bool Debug = false; 5479 5480 if (parseIdentifier(Name)) 5481 return TokError("Expected an identifier"); 5482 5483 if (Name == ".eh_frame") 5484 EH = true; 5485 else if (Name == ".debug_frame") 5486 Debug = true; 5487 5488 if (getLexer().is(AsmToken::Comma)) { 5489 Lex(); 5490 5491 if (parseIdentifier(Name)) 5492 return TokError("Expected an identifier"); 5493 5494 if (Name == ".eh_frame") 5495 EH = true; 5496 else if (Name == ".debug_frame") 5497 Debug = true; 5498 } 5499 5500 getStreamer().emitCFISections(EH, Debug); 5501 return false; 5502 } 5503 5504 /// parseDirectiveCFIStartProc 5505 /// ::= .cfi_startproc [simple] 5506 bool MasmParser::parseDirectiveCFIStartProc() { 5507 StringRef Simple; 5508 if (!parseOptionalToken(AsmToken::EndOfStatement)) { 5509 if (check(parseIdentifier(Simple) || Simple != "simple", 5510 "unexpected token") || 5511 parseToken(AsmToken::EndOfStatement)) 5512 return addErrorSuffix(" in '.cfi_startproc' directive"); 5513 } 5514 5515 // TODO(kristina): Deal with a corner case of incorrect diagnostic context 5516 // being produced if this directive is emitted as part of preprocessor macro 5517 // expansion which can *ONLY* happen if Clang's cc1as is the API consumer. 5518 // Tools like llvm-mc on the other hand are not affected by it, and report 5519 // correct context information. 5520 getStreamer().emitCFIStartProc(!Simple.empty(), Lexer.getLoc()); 5521 return false; 5522 } 5523 5524 /// parseDirectiveCFIEndProc 5525 /// ::= .cfi_endproc 5526 bool MasmParser::parseDirectiveCFIEndProc() { 5527 getStreamer().emitCFIEndProc(); 5528 return false; 5529 } 5530 5531 /// parse register name or number. 5532 bool MasmParser::parseRegisterOrRegisterNumber(int64_t &Register, 5533 SMLoc DirectiveLoc) { 5534 MCRegister RegNo; 5535 5536 if (getLexer().isNot(AsmToken::Integer)) { 5537 if (getTargetParser().parseRegister(RegNo, DirectiveLoc, DirectiveLoc)) 5538 return true; 5539 Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true); 5540 } else 5541 return parseAbsoluteExpression(Register); 5542 5543 return false; 5544 } 5545 5546 /// parseDirectiveCFIDefCfa 5547 /// ::= .cfi_def_cfa register, offset 5548 bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) { 5549 int64_t Register = 0, Offset = 0; 5550 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || 5551 parseToken(AsmToken::Comma, "unexpected token in directive") || 5552 parseAbsoluteExpression(Offset)) 5553 return true; 5554 5555 getStreamer().emitCFIDefCfa(Register, Offset); 5556 return false; 5557 } 5558 5559 /// parseDirectiveCFIDefCfaOffset 5560 /// ::= .cfi_def_cfa_offset offset 5561 bool MasmParser::parseDirectiveCFIDefCfaOffset() { 5562 int64_t Offset = 0; 5563 if (parseAbsoluteExpression(Offset)) 5564 return true; 5565 5566 getStreamer().emitCFIDefCfaOffset(Offset); 5567 return false; 5568 } 5569 5570 /// parseDirectiveCFIRegister 5571 /// ::= .cfi_register register, register 5572 bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) { 5573 int64_t Register1 = 0, Register2 = 0; 5574 if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc) || 5575 parseToken(AsmToken::Comma, "unexpected token in directive") || 5576 parseRegisterOrRegisterNumber(Register2, DirectiveLoc)) 5577 return true; 5578 5579 getStreamer().emitCFIRegister(Register1, Register2); 5580 return false; 5581 } 5582 5583 /// parseDirectiveCFIWindowSave 5584 /// ::= .cfi_window_save 5585 bool MasmParser::parseDirectiveCFIWindowSave() { 5586 getStreamer().emitCFIWindowSave(); 5587 return false; 5588 } 5589 5590 /// parseDirectiveCFIAdjustCfaOffset 5591 /// ::= .cfi_adjust_cfa_offset adjustment 5592 bool MasmParser::parseDirectiveCFIAdjustCfaOffset() { 5593 int64_t Adjustment = 0; 5594 if (parseAbsoluteExpression(Adjustment)) 5595 return true; 5596 5597 getStreamer().emitCFIAdjustCfaOffset(Adjustment); 5598 return false; 5599 } 5600 5601 /// parseDirectiveCFIDefCfaRegister 5602 /// ::= .cfi_def_cfa_register register 5603 bool MasmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) { 5604 int64_t Register = 0; 5605 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) 5606 return true; 5607 5608 getStreamer().emitCFIDefCfaRegister(Register); 5609 return false; 5610 } 5611 5612 /// parseDirectiveCFIOffset 5613 /// ::= .cfi_offset register, offset 5614 bool MasmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) { 5615 int64_t Register = 0; 5616 int64_t Offset = 0; 5617 5618 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || 5619 parseToken(AsmToken::Comma, "unexpected token in directive") || 5620 parseAbsoluteExpression(Offset)) 5621 return true; 5622 5623 getStreamer().emitCFIOffset(Register, Offset); 5624 return false; 5625 } 5626 5627 /// parseDirectiveCFIRelOffset 5628 /// ::= .cfi_rel_offset register, offset 5629 bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) { 5630 int64_t Register = 0, Offset = 0; 5631 5632 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || 5633 parseToken(AsmToken::Comma, "unexpected token in directive") || 5634 parseAbsoluteExpression(Offset)) 5635 return true; 5636 5637 getStreamer().emitCFIRelOffset(Register, Offset); 5638 return false; 5639 } 5640 5641 static bool isValidEncoding(int64_t Encoding) { 5642 if (Encoding & ~0xff) 5643 return false; 5644 5645 if (Encoding == dwarf::DW_EH_PE_omit) 5646 return true; 5647 5648 const unsigned Format = Encoding & 0xf; 5649 if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 && 5650 Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 && 5651 Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 && 5652 Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed) 5653 return false; 5654 5655 const unsigned Application = Encoding & 0x70; 5656 if (Application != dwarf::DW_EH_PE_absptr && 5657 Application != dwarf::DW_EH_PE_pcrel) 5658 return false; 5659 5660 return true; 5661 } 5662 5663 /// parseDirectiveCFIPersonalityOrLsda 5664 /// IsPersonality true for cfi_personality, false for cfi_lsda 5665 /// ::= .cfi_personality encoding, [symbol_name] 5666 /// ::= .cfi_lsda encoding, [symbol_name] 5667 bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { 5668 int64_t Encoding = 0; 5669 if (parseAbsoluteExpression(Encoding)) 5670 return true; 5671 if (Encoding == dwarf::DW_EH_PE_omit) 5672 return false; 5673 5674 StringRef Name; 5675 if (check(!isValidEncoding(Encoding), "unsupported encoding.") || 5676 parseToken(AsmToken::Comma, "unexpected token in directive") || 5677 check(parseIdentifier(Name), "expected identifier in directive")) 5678 return true; 5679 5680 MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 5681 5682 if (IsPersonality) 5683 getStreamer().emitCFIPersonality(Sym, Encoding); 5684 else 5685 getStreamer().emitCFILsda(Sym, Encoding); 5686 return false; 5687 } 5688 5689 /// parseDirectiveCFIRememberState 5690 /// ::= .cfi_remember_state 5691 bool MasmParser::parseDirectiveCFIRememberState() { 5692 getStreamer().emitCFIRememberState(); 5693 return false; 5694 } 5695 5696 /// parseDirectiveCFIRestoreState 5697 /// ::= .cfi_remember_state 5698 bool MasmParser::parseDirectiveCFIRestoreState() { 5699 getStreamer().emitCFIRestoreState(); 5700 return false; 5701 } 5702 5703 /// parseDirectiveCFISameValue 5704 /// ::= .cfi_same_value register 5705 bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) { 5706 int64_t Register = 0; 5707 5708 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) 5709 return true; 5710 5711 getStreamer().emitCFISameValue(Register); 5712 return false; 5713 } 5714 5715 /// parseDirectiveCFIRestore 5716 /// ::= .cfi_restore register 5717 bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) { 5718 int64_t Register = 0; 5719 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) 5720 return true; 5721 5722 getStreamer().emitCFIRestore(Register); 5723 return false; 5724 } 5725 5726 /// parseDirectiveCFIEscape 5727 /// ::= .cfi_escape expression[,...] 5728 bool MasmParser::parseDirectiveCFIEscape() { 5729 std::string Values; 5730 int64_t CurrValue; 5731 if (parseAbsoluteExpression(CurrValue)) 5732 return true; 5733 5734 Values.push_back((uint8_t)CurrValue); 5735 5736 while (getLexer().is(AsmToken::Comma)) { 5737 Lex(); 5738 5739 if (parseAbsoluteExpression(CurrValue)) 5740 return true; 5741 5742 Values.push_back((uint8_t)CurrValue); 5743 } 5744 5745 getStreamer().emitCFIEscape(Values); 5746 return false; 5747 } 5748 5749 /// parseDirectiveCFIReturnColumn 5750 /// ::= .cfi_return_column register 5751 bool MasmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) { 5752 int64_t Register = 0; 5753 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) 5754 return true; 5755 getStreamer().emitCFIReturnColumn(Register); 5756 return false; 5757 } 5758 5759 /// parseDirectiveCFISignalFrame 5760 /// ::= .cfi_signal_frame 5761 bool MasmParser::parseDirectiveCFISignalFrame() { 5762 if (parseEOL()) 5763 return true; 5764 5765 getStreamer().emitCFISignalFrame(); 5766 return false; 5767 } 5768 5769 /// parseDirectiveCFIUndefined 5770 /// ::= .cfi_undefined register 5771 bool MasmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) { 5772 int64_t Register = 0; 5773 5774 if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) 5775 return true; 5776 5777 getStreamer().emitCFIUndefined(Register); 5778 return false; 5779 } 5780 5781 /// parseDirectiveMacro 5782 /// ::= name macro [parameters] 5783 /// ["LOCAL" identifiers] 5784 /// parameters ::= parameter [, parameter]* 5785 /// parameter ::= name ":" qualifier 5786 /// qualifier ::= "req" | "vararg" | "=" macro_argument 5787 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { 5788 MCAsmMacroParameters Parameters; 5789 while (getLexer().isNot(AsmToken::EndOfStatement)) { 5790 if (!Parameters.empty() && Parameters.back().Vararg) 5791 return Error(Lexer.getLoc(), 5792 "Vararg parameter '" + Parameters.back().Name + 5793 "' should be last in the list of parameters"); 5794 5795 MCAsmMacroParameter Parameter; 5796 if (parseIdentifier(Parameter.Name)) 5797 return TokError("expected identifier in 'macro' directive"); 5798 5799 // Emit an error if two (or more) named parameters share the same name. 5800 for (const MCAsmMacroParameter& CurrParam : Parameters) 5801 if (CurrParam.Name.equals_insensitive(Parameter.Name)) 5802 return TokError("macro '" + Name + "' has multiple parameters" 5803 " named '" + Parameter.Name + "'"); 5804 5805 if (Lexer.is(AsmToken::Colon)) { 5806 Lex(); // consume ':' 5807 5808 if (parseOptionalToken(AsmToken::Equal)) { 5809 // Default value 5810 SMLoc ParamLoc; 5811 5812 ParamLoc = Lexer.getLoc(); 5813 if (parseMacroArgument(nullptr, Parameter.Value)) 5814 return true; 5815 } else { 5816 SMLoc QualLoc; 5817 StringRef Qualifier; 5818 5819 QualLoc = Lexer.getLoc(); 5820 if (parseIdentifier(Qualifier)) 5821 return Error(QualLoc, "missing parameter qualifier for " 5822 "'" + 5823 Parameter.Name + "' in macro '" + Name + 5824 "'"); 5825 5826 if (Qualifier.equals_insensitive("req")) 5827 Parameter.Required = true; 5828 else if (Qualifier.equals_insensitive("vararg")) 5829 Parameter.Vararg = true; 5830 else 5831 return Error(QualLoc, 5832 Qualifier + " is not a valid parameter qualifier for '" + 5833 Parameter.Name + "' in macro '" + Name + "'"); 5834 } 5835 } 5836 5837 Parameters.push_back(std::move(Parameter)); 5838 5839 if (getLexer().is(AsmToken::Comma)) 5840 Lex(); 5841 } 5842 5843 // Eat just the end of statement. 5844 Lexer.Lex(); 5845 5846 std::vector<std::string> Locals; 5847 if (getTok().is(AsmToken::Identifier) && 5848 getTok().getIdentifier().equals_insensitive("local")) { 5849 Lex(); // Eat the LOCAL directive. 5850 5851 StringRef ID; 5852 while (true) { 5853 if (parseIdentifier(ID)) 5854 return true; 5855 Locals.push_back(ID.lower()); 5856 5857 // If we see a comma, continue (and allow line continuation). 5858 if (!parseOptionalToken(AsmToken::Comma)) 5859 break; 5860 parseOptionalToken(AsmToken::EndOfStatement); 5861 } 5862 } 5863 5864 // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors. 5865 AsmToken EndToken, StartToken = getTok(); 5866 unsigned MacroDepth = 0; 5867 bool IsMacroFunction = false; 5868 // Lex the macro definition. 5869 while (true) { 5870 // Ignore Lexing errors in macros. 5871 while (Lexer.is(AsmToken::Error)) { 5872 Lexer.Lex(); 5873 } 5874 5875 // Check whether we have reached the end of the file. 5876 if (getLexer().is(AsmToken::Eof)) 5877 return Error(NameLoc, "no matching 'endm' in definition"); 5878 5879 // Otherwise, check whether we have reached the 'endm'... and determine if 5880 // this is a macro function. 5881 if (getLexer().is(AsmToken::Identifier)) { 5882 if (getTok().getIdentifier().equals_insensitive("endm")) { 5883 if (MacroDepth == 0) { // Outermost macro. 5884 EndToken = getTok(); 5885 Lexer.Lex(); 5886 if (getLexer().isNot(AsmToken::EndOfStatement)) 5887 return TokError("unexpected token in '" + EndToken.getIdentifier() + 5888 "' directive"); 5889 break; 5890 } else { 5891 // Otherwise we just found the end of an inner macro. 5892 --MacroDepth; 5893 } 5894 } else if (getTok().getIdentifier().equals_insensitive("exitm")) { 5895 if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) { 5896 IsMacroFunction = true; 5897 } 5898 } else if (isMacroLikeDirective()) { 5899 // We allow nested macros. Those aren't instantiated until the 5900 // outermost macro is expanded so just ignore them for now. 5901 ++MacroDepth; 5902 } 5903 } 5904 5905 // Otherwise, scan til the end of the statement. 5906 eatToEndOfStatement(); 5907 } 5908 5909 if (getContext().lookupMacro(Name.lower())) { 5910 return Error(NameLoc, "macro '" + Name + "' is already defined"); 5911 } 5912 5913 const char *BodyStart = StartToken.getLoc().getPointer(); 5914 const char *BodyEnd = EndToken.getLoc().getPointer(); 5915 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); 5916 MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals), 5917 IsMacroFunction); 5918 DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n"; 5919 Macro.dump()); 5920 getContext().defineMacro(Name.lower(), std::move(Macro)); 5921 return false; 5922 } 5923 5924 /// parseDirectiveExitMacro 5925 /// ::= "exitm" [textitem] 5926 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc, 5927 StringRef Directive, 5928 std::string &Value) { 5929 SMLoc EndLoc = getTok().getLoc(); 5930 if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value)) 5931 return Error(EndLoc, 5932 "unable to parse text item in '" + Directive + "' directive"); 5933 eatToEndOfStatement(); 5934 5935 if (!isInsideMacroInstantiation()) 5936 return TokError("unexpected '" + Directive + "' in file, " 5937 "no current macro definition"); 5938 5939 // Exit all conditionals that are active in the current macro. 5940 while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) { 5941 TheCondState = TheCondStack.back(); 5942 TheCondStack.pop_back(); 5943 } 5944 5945 handleMacroExit(); 5946 return false; 5947 } 5948 5949 /// parseDirectiveEndMacro 5950 /// ::= endm 5951 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) { 5952 if (getLexer().isNot(AsmToken::EndOfStatement)) 5953 return TokError("unexpected token in '" + Directive + "' directive"); 5954 5955 // If we are inside a macro instantiation, terminate the current 5956 // instantiation. 5957 if (isInsideMacroInstantiation()) { 5958 handleMacroExit(); 5959 return false; 5960 } 5961 5962 // Otherwise, this .endmacro is a stray entry in the file; well formed 5963 // .endmacro directives are handled during the macro definition parsing. 5964 return TokError("unexpected '" + Directive + "' in file, " 5965 "no current macro definition"); 5966 } 5967 5968 /// parseDirectivePurgeMacro 5969 /// ::= purge identifier ( , identifier )* 5970 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) { 5971 StringRef Name; 5972 while (true) { 5973 SMLoc NameLoc; 5974 if (parseTokenLoc(NameLoc) || 5975 check(parseIdentifier(Name), NameLoc, 5976 "expected identifier in 'purge' directive")) 5977 return true; 5978 5979 DEBUG_WITH_TYPE("asm-macros", dbgs() 5980 << "Un-defining macro: " << Name << "\n"); 5981 if (!getContext().lookupMacro(Name.lower())) 5982 return Error(NameLoc, "macro '" + Name + "' is not defined"); 5983 getContext().undefineMacro(Name.lower()); 5984 5985 if (!parseOptionalToken(AsmToken::Comma)) 5986 break; 5987 parseOptionalToken(AsmToken::EndOfStatement); 5988 } 5989 5990 return false; 5991 } 5992 5993 bool MasmParser::parseDirectiveExtern() { 5994 // .extern is the default - but we still need to take any provided type info. 5995 auto parseOp = [&]() -> bool { 5996 StringRef Name; 5997 SMLoc NameLoc = getTok().getLoc(); 5998 if (parseIdentifier(Name)) 5999 return Error(NameLoc, "expected name"); 6000 if (parseToken(AsmToken::Colon)) 6001 return true; 6002 6003 StringRef TypeName; 6004 SMLoc TypeLoc = getTok().getLoc(); 6005 if (parseIdentifier(TypeName)) 6006 return Error(TypeLoc, "expected type"); 6007 if (!TypeName.equals_insensitive("proc")) { 6008 AsmTypeInfo Type; 6009 if (lookUpType(TypeName, Type)) 6010 return Error(TypeLoc, "unrecognized type"); 6011 KnownType[Name.lower()] = Type; 6012 } 6013 6014 MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 6015 Sym->setExternal(true); 6016 getStreamer().emitSymbolAttribute(Sym, MCSA_Extern); 6017 6018 return false; 6019 }; 6020 6021 if (parseMany(parseOp)) 6022 return addErrorSuffix(" in directive 'extern'"); 6023 return false; 6024 } 6025 6026 /// parseDirectiveSymbolAttribute 6027 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] 6028 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { 6029 auto parseOp = [&]() -> bool { 6030 StringRef Name; 6031 SMLoc Loc = getTok().getLoc(); 6032 if (parseIdentifier(Name)) 6033 return Error(Loc, "expected identifier"); 6034 MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 6035 6036 // Assembler local symbols don't make any sense here. Complain loudly. 6037 if (Sym->isTemporary()) 6038 return Error(Loc, "non-local symbol required"); 6039 6040 if (!getStreamer().emitSymbolAttribute(Sym, Attr)) 6041 return Error(Loc, "unable to emit symbol attribute"); 6042 return false; 6043 }; 6044 6045 if (parseMany(parseOp)) 6046 return addErrorSuffix(" in directive"); 6047 return false; 6048 } 6049 6050 /// parseDirectiveComm 6051 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] 6052 bool MasmParser::parseDirectiveComm(bool IsLocal) { 6053 if (checkForValidSection()) 6054 return true; 6055 6056 SMLoc IDLoc = getLexer().getLoc(); 6057 StringRef Name; 6058 if (parseIdentifier(Name)) 6059 return TokError("expected identifier in directive"); 6060 6061 // Handle the identifier as the key symbol. 6062 MCSymbol *Sym = getContext().getOrCreateSymbol(Name); 6063 6064 if (getLexer().isNot(AsmToken::Comma)) 6065 return TokError("unexpected token in directive"); 6066 Lex(); 6067 6068 int64_t Size; 6069 SMLoc SizeLoc = getLexer().getLoc(); 6070 if (parseAbsoluteExpression(Size)) 6071 return true; 6072 6073 int64_t Pow2Alignment = 0; 6074 SMLoc Pow2AlignmentLoc; 6075 if (getLexer().is(AsmToken::Comma)) { 6076 Lex(); 6077 Pow2AlignmentLoc = getLexer().getLoc(); 6078 if (parseAbsoluteExpression(Pow2Alignment)) 6079 return true; 6080 6081 LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType(); 6082 if (IsLocal && LCOMM == LCOMM::NoAlignment) 6083 return Error(Pow2AlignmentLoc, "alignment not supported on this target"); 6084 6085 // If this target takes alignments in bytes (not log) validate and convert. 6086 if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) || 6087 (IsLocal && LCOMM == LCOMM::ByteAlignment)) { 6088 if (!isPowerOf2_64(Pow2Alignment)) 6089 return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); 6090 Pow2Alignment = Log2_64(Pow2Alignment); 6091 } 6092 } 6093 6094 if (parseEOL()) 6095 return true; 6096 6097 // NOTE: a size of zero for a .comm should create a undefined symbol 6098 // but a size of .lcomm creates a bss symbol of size zero. 6099 if (Size < 0) 6100 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " 6101 "be less than zero"); 6102 6103 // NOTE: The alignment in the directive is a power of 2 value, the assembler 6104 // may internally end up wanting an alignment in bytes. 6105 // FIXME: Diagnose overflow. 6106 if (Pow2Alignment < 0) 6107 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " 6108 "alignment, can't be less than zero"); 6109 6110 Sym->redefineIfPossible(); 6111 if (!Sym->isUndefined()) 6112 return Error(IDLoc, "invalid symbol redefinition"); 6113 6114 // Create the Symbol as a common or local common with Size and Pow2Alignment. 6115 if (IsLocal) { 6116 getStreamer().emitLocalCommonSymbol(Sym, Size, 6117 Align(1ULL << Pow2Alignment)); 6118 return false; 6119 } 6120 6121 getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment)); 6122 return false; 6123 } 6124 6125 /// parseDirectiveComment 6126 /// ::= comment delimiter [[text]] 6127 /// [[text]] 6128 /// [[text]] delimiter [[text]] 6129 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) { 6130 std::string FirstLine = parseStringTo(AsmToken::EndOfStatement); 6131 size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A "); 6132 StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd); 6133 if (Delimiter.empty()) 6134 return Error(DirectiveLoc, "no delimiter in 'comment' directive"); 6135 do { 6136 if (getTok().is(AsmToken::Eof)) 6137 return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive"); 6138 Lex(); // eat end of statement 6139 } while ( 6140 !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter)); 6141 return parseEOL(); 6142 } 6143 6144 /// parseDirectiveInclude 6145 /// ::= include <filename> 6146 /// | include filename 6147 bool MasmParser::parseDirectiveInclude() { 6148 // Allow the strings to have escaped octal character sequence. 6149 std::string Filename; 6150 SMLoc IncludeLoc = getTok().getLoc(); 6151 6152 if (parseAngleBracketString(Filename)) 6153 Filename = parseStringTo(AsmToken::EndOfStatement); 6154 if (check(Filename.empty(), "missing filename in 'include' directive") || 6155 check(getTok().isNot(AsmToken::EndOfStatement), 6156 "unexpected token in 'include' directive") || 6157 // Attempt to switch the lexer to the included file before consuming the 6158 // end of statement to avoid losing it when we switch. 6159 check(enterIncludeFile(Filename), IncludeLoc, 6160 "Could not find include file '" + Filename + "'")) 6161 return true; 6162 6163 return false; 6164 } 6165 6166 /// parseDirectiveIf 6167 /// ::= .if{,eq,ge,gt,le,lt,ne} expression 6168 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) { 6169 TheCondStack.push_back(TheCondState); 6170 TheCondState.TheCond = AsmCond::IfCond; 6171 if (TheCondState.Ignore) { 6172 eatToEndOfStatement(); 6173 } else { 6174 int64_t ExprValue; 6175 if (parseAbsoluteExpression(ExprValue) || parseEOL()) 6176 return true; 6177 6178 switch (DirKind) { 6179 default: 6180 llvm_unreachable("unsupported directive"); 6181 case DK_IF: 6182 break; 6183 case DK_IFE: 6184 ExprValue = ExprValue == 0; 6185 break; 6186 } 6187 6188 TheCondState.CondMet = ExprValue; 6189 TheCondState.Ignore = !TheCondState.CondMet; 6190 } 6191 6192 return false; 6193 } 6194 6195 /// parseDirectiveIfb 6196 /// ::= .ifb textitem 6197 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { 6198 TheCondStack.push_back(TheCondState); 6199 TheCondState.TheCond = AsmCond::IfCond; 6200 6201 if (TheCondState.Ignore) { 6202 eatToEndOfStatement(); 6203 } else { 6204 std::string Str; 6205 if (parseTextItem(Str)) 6206 return TokError("expected text item parameter for 'ifb' directive"); 6207 6208 if (parseEOL()) 6209 return true; 6210 6211 TheCondState.CondMet = ExpectBlank == Str.empty(); 6212 TheCondState.Ignore = !TheCondState.CondMet; 6213 } 6214 6215 return false; 6216 } 6217 6218 /// parseDirectiveIfidn 6219 /// ::= ifidn textitem, textitem 6220 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 6221 bool CaseInsensitive) { 6222 std::string String1, String2; 6223 6224 if (parseTextItem(String1)) { 6225 if (ExpectEqual) 6226 return TokError("expected text item parameter for 'ifidn' directive"); 6227 return TokError("expected text item parameter for 'ifdif' directive"); 6228 } 6229 6230 if (Lexer.isNot(AsmToken::Comma)) { 6231 if (ExpectEqual) 6232 return TokError( 6233 "expected comma after first string for 'ifidn' directive"); 6234 return TokError("expected comma after first string for 'ifdif' directive"); 6235 } 6236 Lex(); 6237 6238 if (parseTextItem(String2)) { 6239 if (ExpectEqual) 6240 return TokError("expected text item parameter for 'ifidn' directive"); 6241 return TokError("expected text item parameter for 'ifdif' directive"); 6242 } 6243 6244 TheCondStack.push_back(TheCondState); 6245 TheCondState.TheCond = AsmCond::IfCond; 6246 if (CaseInsensitive) 6247 TheCondState.CondMet = 6248 ExpectEqual == (StringRef(String1).equals_insensitive(String2)); 6249 else 6250 TheCondState.CondMet = ExpectEqual == (String1 == String2); 6251 TheCondState.Ignore = !TheCondState.CondMet; 6252 6253 return false; 6254 } 6255 6256 /// parseDirectiveIfdef 6257 /// ::= ifdef symbol 6258 /// | ifdef variable 6259 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { 6260 TheCondStack.push_back(TheCondState); 6261 TheCondState.TheCond = AsmCond::IfCond; 6262 6263 if (TheCondState.Ignore) { 6264 eatToEndOfStatement(); 6265 } else { 6266 bool is_defined = false; 6267 MCRegister Reg; 6268 SMLoc StartLoc, EndLoc; 6269 is_defined = (getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc) == 6270 MatchOperand_Success); 6271 if (!is_defined) { 6272 StringRef Name; 6273 if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") || 6274 parseEOL()) 6275 return true; 6276 6277 if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { 6278 is_defined = true; 6279 } else if (Variables.find(Name.lower()) != Variables.end()) { 6280 is_defined = true; 6281 } else { 6282 MCSymbol *Sym = getContext().lookupSymbol(Name.lower()); 6283 is_defined = (Sym && !Sym->isUndefined(false)); 6284 } 6285 } 6286 6287 TheCondState.CondMet = (is_defined == expect_defined); 6288 TheCondState.Ignore = !TheCondState.CondMet; 6289 } 6290 6291 return false; 6292 } 6293 6294 /// parseDirectiveElseIf 6295 /// ::= elseif expression 6296 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc, 6297 DirectiveKind DirKind) { 6298 if (TheCondState.TheCond != AsmCond::IfCond && 6299 TheCondState.TheCond != AsmCond::ElseIfCond) 6300 return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an" 6301 " .if or an .elseif"); 6302 TheCondState.TheCond = AsmCond::ElseIfCond; 6303 6304 bool LastIgnoreState = false; 6305 if (!TheCondStack.empty()) 6306 LastIgnoreState = TheCondStack.back().Ignore; 6307 if (LastIgnoreState || TheCondState.CondMet) { 6308 TheCondState.Ignore = true; 6309 eatToEndOfStatement(); 6310 } else { 6311 int64_t ExprValue; 6312 if (parseAbsoluteExpression(ExprValue)) 6313 return true; 6314 6315 if (parseEOL()) 6316 return true; 6317 6318 switch (DirKind) { 6319 default: 6320 llvm_unreachable("unsupported directive"); 6321 case DK_ELSEIF: 6322 break; 6323 case DK_ELSEIFE: 6324 ExprValue = ExprValue == 0; 6325 break; 6326 } 6327 6328 TheCondState.CondMet = ExprValue; 6329 TheCondState.Ignore = !TheCondState.CondMet; 6330 } 6331 6332 return false; 6333 } 6334 6335 /// parseDirectiveElseIfb 6336 /// ::= elseifb textitem 6337 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) { 6338 if (TheCondState.TheCond != AsmCond::IfCond && 6339 TheCondState.TheCond != AsmCond::ElseIfCond) 6340 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an" 6341 " if or an elseif"); 6342 TheCondState.TheCond = AsmCond::ElseIfCond; 6343 6344 bool LastIgnoreState = false; 6345 if (!TheCondStack.empty()) 6346 LastIgnoreState = TheCondStack.back().Ignore; 6347 if (LastIgnoreState || TheCondState.CondMet) { 6348 TheCondState.Ignore = true; 6349 eatToEndOfStatement(); 6350 } else { 6351 std::string Str; 6352 if (parseTextItem(Str)) { 6353 if (ExpectBlank) 6354 return TokError("expected text item parameter for 'elseifb' directive"); 6355 return TokError("expected text item parameter for 'elseifnb' directive"); 6356 } 6357 6358 if (parseEOL()) 6359 return true; 6360 6361 TheCondState.CondMet = ExpectBlank == Str.empty(); 6362 TheCondState.Ignore = !TheCondState.CondMet; 6363 } 6364 6365 return false; 6366 } 6367 6368 /// parseDirectiveElseIfdef 6369 /// ::= elseifdef symbol 6370 /// | elseifdef variable 6371 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc, 6372 bool expect_defined) { 6373 if (TheCondState.TheCond != AsmCond::IfCond && 6374 TheCondState.TheCond != AsmCond::ElseIfCond) 6375 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an" 6376 " if or an elseif"); 6377 TheCondState.TheCond = AsmCond::ElseIfCond; 6378 6379 bool LastIgnoreState = false; 6380 if (!TheCondStack.empty()) 6381 LastIgnoreState = TheCondStack.back().Ignore; 6382 if (LastIgnoreState || TheCondState.CondMet) { 6383 TheCondState.Ignore = true; 6384 eatToEndOfStatement(); 6385 } else { 6386 bool is_defined = false; 6387 MCRegister Reg; 6388 SMLoc StartLoc, EndLoc; 6389 is_defined = (getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc) == 6390 MatchOperand_Success); 6391 if (!is_defined) { 6392 StringRef Name; 6393 if (check(parseIdentifier(Name), 6394 "expected identifier after 'elseifdef'") || 6395 parseEOL()) 6396 return true; 6397 6398 if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { 6399 is_defined = true; 6400 } else if (Variables.find(Name.lower()) != Variables.end()) { 6401 is_defined = true; 6402 } else { 6403 MCSymbol *Sym = getContext().lookupSymbol(Name); 6404 is_defined = (Sym && !Sym->isUndefined(false)); 6405 } 6406 } 6407 6408 TheCondState.CondMet = (is_defined == expect_defined); 6409 TheCondState.Ignore = !TheCondState.CondMet; 6410 } 6411 6412 return false; 6413 } 6414 6415 /// parseDirectiveElseIfidn 6416 /// ::= elseifidn textitem, textitem 6417 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 6418 bool CaseInsensitive) { 6419 if (TheCondState.TheCond != AsmCond::IfCond && 6420 TheCondState.TheCond != AsmCond::ElseIfCond) 6421 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an" 6422 " if or an elseif"); 6423 TheCondState.TheCond = AsmCond::ElseIfCond; 6424 6425 bool LastIgnoreState = false; 6426 if (!TheCondStack.empty()) 6427 LastIgnoreState = TheCondStack.back().Ignore; 6428 if (LastIgnoreState || TheCondState.CondMet) { 6429 TheCondState.Ignore = true; 6430 eatToEndOfStatement(); 6431 } else { 6432 std::string String1, String2; 6433 6434 if (parseTextItem(String1)) { 6435 if (ExpectEqual) 6436 return TokError( 6437 "expected text item parameter for 'elseifidn' directive"); 6438 return TokError("expected text item parameter for 'elseifdif' directive"); 6439 } 6440 6441 if (Lexer.isNot(AsmToken::Comma)) { 6442 if (ExpectEqual) 6443 return TokError( 6444 "expected comma after first string for 'elseifidn' directive"); 6445 return TokError( 6446 "expected comma after first string for 'elseifdif' directive"); 6447 } 6448 Lex(); 6449 6450 if (parseTextItem(String2)) { 6451 if (ExpectEqual) 6452 return TokError( 6453 "expected text item parameter for 'elseifidn' directive"); 6454 return TokError("expected text item parameter for 'elseifdif' directive"); 6455 } 6456 6457 if (CaseInsensitive) 6458 TheCondState.CondMet = 6459 ExpectEqual == (StringRef(String1).equals_insensitive(String2)); 6460 else 6461 TheCondState.CondMet = ExpectEqual == (String1 == String2); 6462 TheCondState.Ignore = !TheCondState.CondMet; 6463 } 6464 6465 return false; 6466 } 6467 6468 /// parseDirectiveElse 6469 /// ::= else 6470 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) { 6471 if (parseEOL()) 6472 return true; 6473 6474 if (TheCondState.TheCond != AsmCond::IfCond && 6475 TheCondState.TheCond != AsmCond::ElseIfCond) 6476 return Error(DirectiveLoc, "Encountered an else that doesn't follow an if" 6477 " or an elseif"); 6478 TheCondState.TheCond = AsmCond::ElseCond; 6479 bool LastIgnoreState = false; 6480 if (!TheCondStack.empty()) 6481 LastIgnoreState = TheCondStack.back().Ignore; 6482 if (LastIgnoreState || TheCondState.CondMet) 6483 TheCondState.Ignore = true; 6484 else 6485 TheCondState.Ignore = false; 6486 6487 return false; 6488 } 6489 6490 /// parseDirectiveEnd 6491 /// ::= end 6492 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) { 6493 if (parseEOL()) 6494 return true; 6495 6496 while (Lexer.isNot(AsmToken::Eof)) 6497 Lexer.Lex(); 6498 6499 return false; 6500 } 6501 6502 /// parseDirectiveError 6503 /// ::= .err [message] 6504 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) { 6505 if (!TheCondStack.empty()) { 6506 if (TheCondStack.back().Ignore) { 6507 eatToEndOfStatement(); 6508 return false; 6509 } 6510 } 6511 6512 std::string Message = ".err directive invoked in source file"; 6513 if (Lexer.isNot(AsmToken::EndOfStatement)) 6514 Message = parseStringTo(AsmToken::EndOfStatement); 6515 Lex(); 6516 6517 return Error(DirectiveLoc, Message); 6518 } 6519 6520 /// parseDirectiveErrorIfb 6521 /// ::= .errb textitem[, message] 6522 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) { 6523 if (!TheCondStack.empty()) { 6524 if (TheCondStack.back().Ignore) { 6525 eatToEndOfStatement(); 6526 return false; 6527 } 6528 } 6529 6530 std::string Text; 6531 if (parseTextItem(Text)) 6532 return Error(getTok().getLoc(), "missing text item in '.errb' directive"); 6533 6534 std::string Message = ".errb directive invoked in source file"; 6535 if (Lexer.isNot(AsmToken::EndOfStatement)) { 6536 if (parseToken(AsmToken::Comma)) 6537 return addErrorSuffix(" in '.errb' directive"); 6538 Message = parseStringTo(AsmToken::EndOfStatement); 6539 } 6540 Lex(); 6541 6542 if (Text.empty() == ExpectBlank) 6543 return Error(DirectiveLoc, Message); 6544 return false; 6545 } 6546 6547 /// parseDirectiveErrorIfdef 6548 /// ::= .errdef name[, message] 6549 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc, 6550 bool ExpectDefined) { 6551 if (!TheCondStack.empty()) { 6552 if (TheCondStack.back().Ignore) { 6553 eatToEndOfStatement(); 6554 return false; 6555 } 6556 } 6557 6558 bool IsDefined = false; 6559 MCRegister Reg; 6560 SMLoc StartLoc, EndLoc; 6561 IsDefined = (getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc) == 6562 MatchOperand_Success); 6563 if (!IsDefined) { 6564 StringRef Name; 6565 if (check(parseIdentifier(Name), "expected identifier after '.errdef'")) 6566 return true; 6567 6568 if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) { 6569 IsDefined = true; 6570 } else if (Variables.find(Name.lower()) != Variables.end()) { 6571 IsDefined = true; 6572 } else { 6573 MCSymbol *Sym = getContext().lookupSymbol(Name); 6574 IsDefined = (Sym && !Sym->isUndefined(false)); 6575 } 6576 } 6577 6578 std::string Message = ".errdef directive invoked in source file"; 6579 if (Lexer.isNot(AsmToken::EndOfStatement)) { 6580 if (parseToken(AsmToken::Comma)) 6581 return addErrorSuffix(" in '.errdef' directive"); 6582 Message = parseStringTo(AsmToken::EndOfStatement); 6583 } 6584 Lex(); 6585 6586 if (IsDefined == ExpectDefined) 6587 return Error(DirectiveLoc, Message); 6588 return false; 6589 } 6590 6591 /// parseDirectiveErrorIfidn 6592 /// ::= .erridn textitem, textitem[, message] 6593 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 6594 bool CaseInsensitive) { 6595 if (!TheCondStack.empty()) { 6596 if (TheCondStack.back().Ignore) { 6597 eatToEndOfStatement(); 6598 return false; 6599 } 6600 } 6601 6602 std::string String1, String2; 6603 6604 if (parseTextItem(String1)) { 6605 if (ExpectEqual) 6606 return TokError("expected string parameter for '.erridn' directive"); 6607 return TokError("expected string parameter for '.errdif' directive"); 6608 } 6609 6610 if (Lexer.isNot(AsmToken::Comma)) { 6611 if (ExpectEqual) 6612 return TokError( 6613 "expected comma after first string for '.erridn' directive"); 6614 return TokError( 6615 "expected comma after first string for '.errdif' directive"); 6616 } 6617 Lex(); 6618 6619 if (parseTextItem(String2)) { 6620 if (ExpectEqual) 6621 return TokError("expected string parameter for '.erridn' directive"); 6622 return TokError("expected string parameter for '.errdif' directive"); 6623 } 6624 6625 std::string Message; 6626 if (ExpectEqual) 6627 Message = ".erridn directive invoked in source file"; 6628 else 6629 Message = ".errdif directive invoked in source file"; 6630 if (Lexer.isNot(AsmToken::EndOfStatement)) { 6631 if (parseToken(AsmToken::Comma)) 6632 return addErrorSuffix(" in '.erridn' directive"); 6633 Message = parseStringTo(AsmToken::EndOfStatement); 6634 } 6635 Lex(); 6636 6637 if (CaseInsensitive) 6638 TheCondState.CondMet = 6639 ExpectEqual == (StringRef(String1).equals_insensitive(String2)); 6640 else 6641 TheCondState.CondMet = ExpectEqual == (String1 == String2); 6642 TheCondState.Ignore = !TheCondState.CondMet; 6643 6644 if ((CaseInsensitive && 6645 ExpectEqual == StringRef(String1).equals_insensitive(String2)) || 6646 (ExpectEqual == (String1 == String2))) 6647 return Error(DirectiveLoc, Message); 6648 return false; 6649 } 6650 6651 /// parseDirectiveErrorIfe 6652 /// ::= .erre expression[, message] 6653 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) { 6654 if (!TheCondStack.empty()) { 6655 if (TheCondStack.back().Ignore) { 6656 eatToEndOfStatement(); 6657 return false; 6658 } 6659 } 6660 6661 int64_t ExprValue; 6662 if (parseAbsoluteExpression(ExprValue)) 6663 return addErrorSuffix(" in '.erre' directive"); 6664 6665 std::string Message = ".erre directive invoked in source file"; 6666 if (Lexer.isNot(AsmToken::EndOfStatement)) { 6667 if (parseToken(AsmToken::Comma)) 6668 return addErrorSuffix(" in '.erre' directive"); 6669 Message = parseStringTo(AsmToken::EndOfStatement); 6670 } 6671 Lex(); 6672 6673 if ((ExprValue == 0) == ExpectZero) 6674 return Error(DirectiveLoc, Message); 6675 return false; 6676 } 6677 6678 /// parseDirectiveEndIf 6679 /// ::= .endif 6680 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) { 6681 if (parseEOL()) 6682 return true; 6683 6684 if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty()) 6685 return Error(DirectiveLoc, "Encountered a .endif that doesn't follow " 6686 "an .if or .else"); 6687 if (!TheCondStack.empty()) { 6688 TheCondState = TheCondStack.back(); 6689 TheCondStack.pop_back(); 6690 } 6691 6692 return false; 6693 } 6694 6695 void MasmParser::initializeDirectiveKindMap() { 6696 DirectiveKindMap["="] = DK_ASSIGN; 6697 DirectiveKindMap["equ"] = DK_EQU; 6698 DirectiveKindMap["textequ"] = DK_TEXTEQU; 6699 // DirectiveKindMap[".ascii"] = DK_ASCII; 6700 // DirectiveKindMap[".asciz"] = DK_ASCIZ; 6701 // DirectiveKindMap[".string"] = DK_STRING; 6702 DirectiveKindMap["byte"] = DK_BYTE; 6703 DirectiveKindMap["sbyte"] = DK_SBYTE; 6704 DirectiveKindMap["word"] = DK_WORD; 6705 DirectiveKindMap["sword"] = DK_SWORD; 6706 DirectiveKindMap["dword"] = DK_DWORD; 6707 DirectiveKindMap["sdword"] = DK_SDWORD; 6708 DirectiveKindMap["fword"] = DK_FWORD; 6709 DirectiveKindMap["qword"] = DK_QWORD; 6710 DirectiveKindMap["sqword"] = DK_SQWORD; 6711 DirectiveKindMap["real4"] = DK_REAL4; 6712 DirectiveKindMap["real8"] = DK_REAL8; 6713 DirectiveKindMap["real10"] = DK_REAL10; 6714 DirectiveKindMap["align"] = DK_ALIGN; 6715 DirectiveKindMap["even"] = DK_EVEN; 6716 DirectiveKindMap["org"] = DK_ORG; 6717 DirectiveKindMap["extern"] = DK_EXTERN; 6718 DirectiveKindMap["extrn"] = DK_EXTERN; 6719 DirectiveKindMap["public"] = DK_PUBLIC; 6720 // DirectiveKindMap[".comm"] = DK_COMM; 6721 DirectiveKindMap["comment"] = DK_COMMENT; 6722 DirectiveKindMap["include"] = DK_INCLUDE; 6723 DirectiveKindMap["repeat"] = DK_REPEAT; 6724 DirectiveKindMap["rept"] = DK_REPEAT; 6725 DirectiveKindMap["while"] = DK_WHILE; 6726 DirectiveKindMap["for"] = DK_FOR; 6727 DirectiveKindMap["irp"] = DK_FOR; 6728 DirectiveKindMap["forc"] = DK_FORC; 6729 DirectiveKindMap["irpc"] = DK_FORC; 6730 DirectiveKindMap["if"] = DK_IF; 6731 DirectiveKindMap["ife"] = DK_IFE; 6732 DirectiveKindMap["ifb"] = DK_IFB; 6733 DirectiveKindMap["ifnb"] = DK_IFNB; 6734 DirectiveKindMap["ifdef"] = DK_IFDEF; 6735 DirectiveKindMap["ifndef"] = DK_IFNDEF; 6736 DirectiveKindMap["ifdif"] = DK_IFDIF; 6737 DirectiveKindMap["ifdifi"] = DK_IFDIFI; 6738 DirectiveKindMap["ifidn"] = DK_IFIDN; 6739 DirectiveKindMap["ifidni"] = DK_IFIDNI; 6740 DirectiveKindMap["elseif"] = DK_ELSEIF; 6741 DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF; 6742 DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF; 6743 DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF; 6744 DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN; 6745 DirectiveKindMap["else"] = DK_ELSE; 6746 DirectiveKindMap["end"] = DK_END; 6747 DirectiveKindMap["endif"] = DK_ENDIF; 6748 // DirectiveKindMap[".file"] = DK_FILE; 6749 // DirectiveKindMap[".line"] = DK_LINE; 6750 // DirectiveKindMap[".loc"] = DK_LOC; 6751 // DirectiveKindMap[".stabs"] = DK_STABS; 6752 // DirectiveKindMap[".cv_file"] = DK_CV_FILE; 6753 // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID; 6754 // DirectiveKindMap[".cv_loc"] = DK_CV_LOC; 6755 // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE; 6756 // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE; 6757 // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID; 6758 // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE; 6759 // DirectiveKindMap[".cv_string"] = DK_CV_STRING; 6760 // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE; 6761 // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS; 6762 // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET; 6763 // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA; 6764 // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS; 6765 // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC; 6766 // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC; 6767 // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA; 6768 // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET; 6769 // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET; 6770 // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER; 6771 // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; 6772 // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; 6773 // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; 6774 // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA; 6775 // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE; 6776 // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE; 6777 // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE; 6778 // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE; 6779 // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE; 6780 // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN; 6781 // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME; 6782 // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED; 6783 // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER; 6784 // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE; 6785 // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME; 6786 DirectiveKindMap["macro"] = DK_MACRO; 6787 DirectiveKindMap["exitm"] = DK_EXITM; 6788 DirectiveKindMap["endm"] = DK_ENDM; 6789 DirectiveKindMap["purge"] = DK_PURGE; 6790 DirectiveKindMap[".err"] = DK_ERR; 6791 DirectiveKindMap[".errb"] = DK_ERRB; 6792 DirectiveKindMap[".errnb"] = DK_ERRNB; 6793 DirectiveKindMap[".errdef"] = DK_ERRDEF; 6794 DirectiveKindMap[".errndef"] = DK_ERRNDEF; 6795 DirectiveKindMap[".errdif"] = DK_ERRDIF; 6796 DirectiveKindMap[".errdifi"] = DK_ERRDIFI; 6797 DirectiveKindMap[".erridn"] = DK_ERRIDN; 6798 DirectiveKindMap[".erridni"] = DK_ERRIDNI; 6799 DirectiveKindMap[".erre"] = DK_ERRE; 6800 DirectiveKindMap[".errnz"] = DK_ERRNZ; 6801 DirectiveKindMap[".pushframe"] = DK_PUSHFRAME; 6802 DirectiveKindMap[".pushreg"] = DK_PUSHREG; 6803 DirectiveKindMap[".savereg"] = DK_SAVEREG; 6804 DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128; 6805 DirectiveKindMap[".setframe"] = DK_SETFRAME; 6806 DirectiveKindMap[".radix"] = DK_RADIX; 6807 DirectiveKindMap["db"] = DK_DB; 6808 DirectiveKindMap["dd"] = DK_DD; 6809 DirectiveKindMap["df"] = DK_DF; 6810 DirectiveKindMap["dq"] = DK_DQ; 6811 DirectiveKindMap["dw"] = DK_DW; 6812 DirectiveKindMap["echo"] = DK_ECHO; 6813 DirectiveKindMap["struc"] = DK_STRUCT; 6814 DirectiveKindMap["struct"] = DK_STRUCT; 6815 DirectiveKindMap["union"] = DK_UNION; 6816 DirectiveKindMap["ends"] = DK_ENDS; 6817 } 6818 6819 bool MasmParser::isMacroLikeDirective() { 6820 if (getLexer().is(AsmToken::Identifier)) { 6821 bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier()) 6822 .CasesLower("repeat", "rept", true) 6823 .CaseLower("while", true) 6824 .CasesLower("for", "irp", true) 6825 .CasesLower("forc", "irpc", true) 6826 .Default(false); 6827 if (IsMacroLike) 6828 return true; 6829 } 6830 if (peekTok().is(AsmToken::Identifier) && 6831 peekTok().getIdentifier().equals_insensitive("macro")) 6832 return true; 6833 6834 return false; 6835 } 6836 6837 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { 6838 AsmToken EndToken, StartToken = getTok(); 6839 6840 unsigned NestLevel = 0; 6841 while (true) { 6842 // Check whether we have reached the end of the file. 6843 if (getLexer().is(AsmToken::Eof)) { 6844 printError(DirectiveLoc, "no matching 'endm' in definition"); 6845 return nullptr; 6846 } 6847 6848 if (isMacroLikeDirective()) 6849 ++NestLevel; 6850 6851 // Otherwise, check whether we have reached the endm. 6852 if (Lexer.is(AsmToken::Identifier) && 6853 getTok().getIdentifier().equals_insensitive("endm")) { 6854 if (NestLevel == 0) { 6855 EndToken = getTok(); 6856 Lex(); 6857 if (Lexer.isNot(AsmToken::EndOfStatement)) { 6858 printError(getTok().getLoc(), "unexpected token in 'endm' directive"); 6859 return nullptr; 6860 } 6861 break; 6862 } 6863 --NestLevel; 6864 } 6865 6866 // Otherwise, scan till the end of the statement. 6867 eatToEndOfStatement(); 6868 } 6869 6870 const char *BodyStart = StartToken.getLoc().getPointer(); 6871 const char *BodyEnd = EndToken.getLoc().getPointer(); 6872 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); 6873 6874 // We Are Anonymous. 6875 MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters()); 6876 return &MacroLikeBodies.back(); 6877 } 6878 6879 bool MasmParser::expandStatement(SMLoc Loc) { 6880 std::string Body = parseStringTo(AsmToken::EndOfStatement); 6881 SMLoc EndLoc = getTok().getLoc(); 6882 6883 MCAsmMacroParameters Parameters; 6884 MCAsmMacroArguments Arguments; 6885 6886 StringMap<std::string> BuiltinValues; 6887 for (const auto &S : BuiltinSymbolMap) { 6888 const BuiltinSymbol &Sym = S.getValue(); 6889 if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) { 6890 BuiltinValues[S.getKey().lower()] = std::move(*Text); 6891 } 6892 } 6893 for (const auto &B : BuiltinValues) { 6894 MCAsmMacroParameter P; 6895 MCAsmMacroArgument A; 6896 P.Name = B.getKey(); 6897 P.Required = true; 6898 A.push_back(AsmToken(AsmToken::String, B.getValue())); 6899 6900 Parameters.push_back(std::move(P)); 6901 Arguments.push_back(std::move(A)); 6902 } 6903 6904 for (const auto &V : Variables) { 6905 const Variable &Var = V.getValue(); 6906 if (Var.IsText) { 6907 MCAsmMacroParameter P; 6908 MCAsmMacroArgument A; 6909 P.Name = Var.Name; 6910 P.Required = true; 6911 A.push_back(AsmToken(AsmToken::String, Var.TextValue)); 6912 6913 Parameters.push_back(std::move(P)); 6914 Arguments.push_back(std::move(A)); 6915 } 6916 } 6917 MacroLikeBodies.emplace_back(StringRef(), Body, Parameters); 6918 MCAsmMacro M = MacroLikeBodies.back(); 6919 6920 // Expand the statement in a new buffer. 6921 SmallString<80> Buf; 6922 raw_svector_ostream OS(Buf); 6923 if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc)) 6924 return true; 6925 std::unique_ptr<MemoryBuffer> Expansion = 6926 MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>"); 6927 6928 // Jump to the expanded statement and prime the lexer. 6929 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc); 6930 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 6931 EndStatementAtEOFStack.push_back(false); 6932 Lex(); 6933 return false; 6934 } 6935 6936 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 6937 raw_svector_ostream &OS) { 6938 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS); 6939 } 6940 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 6941 SMLoc ExitLoc, 6942 raw_svector_ostream &OS) { 6943 OS << "endm\n"; 6944 6945 std::unique_ptr<MemoryBuffer> Instantiation = 6946 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>"); 6947 6948 // Create the macro instantiation object and add to the current macro 6949 // instantiation stack. 6950 MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer, 6951 ExitLoc, TheCondStack.size()}; 6952 ActiveMacros.push_back(MI); 6953 6954 // Jump to the macro instantiation and prime the lexer. 6955 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc()); 6956 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 6957 EndStatementAtEOFStack.push_back(true); 6958 Lex(); 6959 } 6960 6961 /// parseDirectiveRepeat 6962 /// ::= ("repeat" | "rept") count 6963 /// body 6964 /// endm 6965 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) { 6966 const MCExpr *CountExpr; 6967 SMLoc CountLoc = getTok().getLoc(); 6968 if (parseExpression(CountExpr)) 6969 return true; 6970 6971 int64_t Count; 6972 if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) { 6973 return Error(CountLoc, "unexpected token in '" + Dir + "' directive"); 6974 } 6975 6976 if (check(Count < 0, CountLoc, "Count is negative") || parseEOL()) 6977 return true; 6978 6979 // Lex the repeat definition. 6980 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 6981 if (!M) 6982 return true; 6983 6984 // Macro instantiation is lexical, unfortunately. We construct a new buffer 6985 // to hold the macro body with substitutions. 6986 SmallString<256> Buf; 6987 raw_svector_ostream OS(Buf); 6988 while (Count--) { 6989 if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals, 6990 getTok().getLoc())) 6991 return true; 6992 } 6993 instantiateMacroLikeBody(M, DirectiveLoc, OS); 6994 6995 return false; 6996 } 6997 6998 /// parseDirectiveWhile 6999 /// ::= "while" expression 7000 /// body 7001 /// endm 7002 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) { 7003 const MCExpr *CondExpr; 7004 SMLoc CondLoc = getTok().getLoc(); 7005 if (parseExpression(CondExpr)) 7006 return true; 7007 7008 // Lex the repeat definition. 7009 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 7010 if (!M) 7011 return true; 7012 7013 // Macro instantiation is lexical, unfortunately. We construct a new buffer 7014 // to hold the macro body with substitutions. 7015 SmallString<256> Buf; 7016 raw_svector_ostream OS(Buf); 7017 int64_t Condition; 7018 if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr())) 7019 return Error(CondLoc, "expected absolute expression in 'while' directive"); 7020 if (Condition) { 7021 // Instantiate the macro, then resume at this directive to recheck the 7022 // condition. 7023 if (expandMacro(OS, M->Body, std::nullopt, std::nullopt, M->Locals, 7024 getTok().getLoc())) 7025 return true; 7026 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS); 7027 } 7028 7029 return false; 7030 } 7031 7032 /// parseDirectiveFor 7033 /// ::= ("for" | "irp") symbol [":" qualifier], <values> 7034 /// body 7035 /// endm 7036 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) { 7037 MCAsmMacroParameter Parameter; 7038 MCAsmMacroArguments A; 7039 if (check(parseIdentifier(Parameter.Name), 7040 "expected identifier in '" + Dir + "' directive")) 7041 return true; 7042 7043 // Parse optional qualifier (default value, or "req") 7044 if (parseOptionalToken(AsmToken::Colon)) { 7045 if (parseOptionalToken(AsmToken::Equal)) { 7046 // Default value 7047 SMLoc ParamLoc; 7048 7049 ParamLoc = Lexer.getLoc(); 7050 if (parseMacroArgument(nullptr, Parameter.Value)) 7051 return true; 7052 } else { 7053 SMLoc QualLoc; 7054 StringRef Qualifier; 7055 7056 QualLoc = Lexer.getLoc(); 7057 if (parseIdentifier(Qualifier)) 7058 return Error(QualLoc, "missing parameter qualifier for " 7059 "'" + 7060 Parameter.Name + "' in '" + Dir + 7061 "' directive"); 7062 7063 if (Qualifier.equals_insensitive("req")) 7064 Parameter.Required = true; 7065 else 7066 return Error(QualLoc, 7067 Qualifier + " is not a valid parameter qualifier for '" + 7068 Parameter.Name + "' in '" + Dir + "' directive"); 7069 } 7070 } 7071 7072 if (parseToken(AsmToken::Comma, 7073 "expected comma in '" + Dir + "' directive") || 7074 parseToken(AsmToken::Less, 7075 "values in '" + Dir + 7076 "' directive must be enclosed in angle brackets")) 7077 return true; 7078 7079 while (true) { 7080 A.emplace_back(); 7081 if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater)) 7082 return addErrorSuffix(" in arguments for '" + Dir + "' directive"); 7083 7084 // If we see a comma, continue, and allow line continuation. 7085 if (!parseOptionalToken(AsmToken::Comma)) 7086 break; 7087 parseOptionalToken(AsmToken::EndOfStatement); 7088 } 7089 7090 if (parseToken(AsmToken::Greater, 7091 "values in '" + Dir + 7092 "' directive must be enclosed in angle brackets") || 7093 parseEOL()) 7094 return true; 7095 7096 // Lex the for definition. 7097 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 7098 if (!M) 7099 return true; 7100 7101 // Macro instantiation is lexical, unfortunately. We construct a new buffer 7102 // to hold the macro body with substitutions. 7103 SmallString<256> Buf; 7104 raw_svector_ostream OS(Buf); 7105 7106 for (const MCAsmMacroArgument &Arg : A) { 7107 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc())) 7108 return true; 7109 } 7110 7111 instantiateMacroLikeBody(M, DirectiveLoc, OS); 7112 7113 return false; 7114 } 7115 7116 /// parseDirectiveForc 7117 /// ::= ("forc" | "irpc") symbol, <string> 7118 /// body 7119 /// endm 7120 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) { 7121 MCAsmMacroParameter Parameter; 7122 7123 std::string Argument; 7124 if (check(parseIdentifier(Parameter.Name), 7125 "expected identifier in '" + Directive + "' directive") || 7126 parseToken(AsmToken::Comma, 7127 "expected comma in '" + Directive + "' directive")) 7128 return true; 7129 if (parseAngleBracketString(Argument)) { 7130 // Match ml64.exe; treat all characters to end of statement as a string, 7131 // ignoring comment markers, then discard anything following a space (using 7132 // the C locale). 7133 Argument = parseStringTo(AsmToken::EndOfStatement); 7134 if (getTok().is(AsmToken::EndOfStatement)) 7135 Argument += getTok().getString(); 7136 size_t End = 0; 7137 for (; End < Argument.size(); ++End) { 7138 if (isSpace(Argument[End])) 7139 break; 7140 } 7141 Argument.resize(End); 7142 } 7143 if (parseEOL()) 7144 return true; 7145 7146 // Lex the irpc definition. 7147 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 7148 if (!M) 7149 return true; 7150 7151 // Macro instantiation is lexical, unfortunately. We construct a new buffer 7152 // to hold the macro body with substitutions. 7153 SmallString<256> Buf; 7154 raw_svector_ostream OS(Buf); 7155 7156 StringRef Values(Argument); 7157 for (std::size_t I = 0, End = Values.size(); I != End; ++I) { 7158 MCAsmMacroArgument Arg; 7159 Arg.emplace_back(AsmToken::Identifier, Values.slice(I, I + 1)); 7160 7161 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc())) 7162 return true; 7163 } 7164 7165 instantiateMacroLikeBody(M, DirectiveLoc, OS); 7166 7167 return false; 7168 } 7169 7170 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, 7171 size_t Len) { 7172 const MCExpr *Value; 7173 SMLoc ExprLoc = getLexer().getLoc(); 7174 if (parseExpression(Value)) 7175 return true; 7176 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 7177 if (!MCE) 7178 return Error(ExprLoc, "unexpected expression in _emit"); 7179 uint64_t IntValue = MCE->getValue(); 7180 if (!isUInt<8>(IntValue) && !isInt<8>(IntValue)) 7181 return Error(ExprLoc, "literal value out of range for directive"); 7182 7183 Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len); 7184 return false; 7185 } 7186 7187 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { 7188 const MCExpr *Value; 7189 SMLoc ExprLoc = getLexer().getLoc(); 7190 if (parseExpression(Value)) 7191 return true; 7192 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 7193 if (!MCE) 7194 return Error(ExprLoc, "unexpected expression in align"); 7195 uint64_t IntValue = MCE->getValue(); 7196 if (!isPowerOf2_64(IntValue)) 7197 return Error(ExprLoc, "literal value not a power of two greater then zero"); 7198 7199 Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue)); 7200 return false; 7201 } 7202 7203 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) { 7204 const SMLoc Loc = getLexer().getLoc(); 7205 std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement); 7206 StringRef RadixString = StringRef(RadixStringRaw).trim(); 7207 unsigned Radix; 7208 if (RadixString.getAsInteger(10, Radix)) { 7209 return Error(Loc, 7210 "radix must be a decimal number in the range 2 to 16; was " + 7211 RadixString); 7212 } 7213 if (Radix < 2 || Radix > 16) 7214 return Error(Loc, "radix must be in the range 2 to 16; was " + 7215 std::to_string(Radix)); 7216 getLexer().setMasmDefaultRadix(Radix); 7217 return false; 7218 } 7219 7220 /// parseDirectiveEcho 7221 /// ::= "echo" message 7222 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) { 7223 std::string Message = parseStringTo(AsmToken::EndOfStatement); 7224 llvm::outs() << Message; 7225 if (!StringRef(Message).endswith("\n")) 7226 llvm::outs() << '\n'; 7227 return false; 7228 } 7229 7230 // We are comparing pointers, but the pointers are relative to a single string. 7231 // Thus, this should always be deterministic. 7232 static int rewritesSort(const AsmRewrite *AsmRewriteA, 7233 const AsmRewrite *AsmRewriteB) { 7234 if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer()) 7235 return -1; 7236 if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer()) 7237 return 1; 7238 7239 // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output 7240 // rewrite to the same location. Make sure the SizeDirective rewrite is 7241 // performed first, then the Imm/ImmPrefix and finally the Input/Output. This 7242 // ensures the sort algorithm is stable. 7243 if (AsmRewritePrecedence[AsmRewriteA->Kind] > 7244 AsmRewritePrecedence[AsmRewriteB->Kind]) 7245 return -1; 7246 7247 if (AsmRewritePrecedence[AsmRewriteA->Kind] < 7248 AsmRewritePrecedence[AsmRewriteB->Kind]) 7249 return 1; 7250 llvm_unreachable("Unstable rewrite sort."); 7251 } 7252 7253 bool MasmParser::defineMacro(StringRef Name, StringRef Value) { 7254 Variable &Var = Variables[Name.lower()]; 7255 if (Var.Name.empty()) { 7256 Var.Name = Name; 7257 } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) { 7258 return Error(SMLoc(), "invalid variable redefinition"); 7259 } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION && 7260 Warning(SMLoc(), "redefining '" + Name + 7261 "', already defined on the command line")) { 7262 return true; 7263 } 7264 Var.Redefinable = Variable::WARN_ON_REDEFINITION; 7265 Var.IsText = true; 7266 Var.TextValue = Value.str(); 7267 return false; 7268 } 7269 7270 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const { 7271 const std::pair<StringRef, StringRef> BaseMember = Name.split('.'); 7272 const StringRef Base = BaseMember.first, Member = BaseMember.second; 7273 return lookUpField(Base, Member, Info); 7274 } 7275 7276 bool MasmParser::lookUpField(StringRef Base, StringRef Member, 7277 AsmFieldInfo &Info) const { 7278 if (Base.empty()) 7279 return true; 7280 7281 AsmFieldInfo BaseInfo; 7282 if (Base.contains('.') && !lookUpField(Base, BaseInfo)) 7283 Base = BaseInfo.Type.Name; 7284 7285 auto StructIt = Structs.find(Base.lower()); 7286 auto TypeIt = KnownType.find(Base.lower()); 7287 if (TypeIt != KnownType.end()) { 7288 StructIt = Structs.find(TypeIt->second.Name.lower()); 7289 } 7290 if (StructIt != Structs.end()) 7291 return lookUpField(StructIt->second, Member, Info); 7292 7293 return true; 7294 } 7295 7296 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, 7297 AsmFieldInfo &Info) const { 7298 if (Member.empty()) { 7299 Info.Type.Name = Structure.Name; 7300 Info.Type.Size = Structure.Size; 7301 Info.Type.ElementSize = Structure.Size; 7302 Info.Type.Length = 1; 7303 return false; 7304 } 7305 7306 std::pair<StringRef, StringRef> Split = Member.split('.'); 7307 const StringRef FieldName = Split.first, FieldMember = Split.second; 7308 7309 auto StructIt = Structs.find(FieldName.lower()); 7310 if (StructIt != Structs.end()) 7311 return lookUpField(StructIt->second, FieldMember, Info); 7312 7313 auto FieldIt = Structure.FieldsByName.find(FieldName.lower()); 7314 if (FieldIt == Structure.FieldsByName.end()) 7315 return true; 7316 7317 const FieldInfo &Field = Structure.Fields[FieldIt->second]; 7318 if (FieldMember.empty()) { 7319 Info.Offset += Field.Offset; 7320 Info.Type.Size = Field.SizeOf; 7321 Info.Type.ElementSize = Field.Type; 7322 Info.Type.Length = Field.LengthOf; 7323 if (Field.Contents.FT == FT_STRUCT) 7324 Info.Type.Name = Field.Contents.StructInfo.Structure.Name; 7325 else 7326 Info.Type.Name = ""; 7327 return false; 7328 } 7329 7330 if (Field.Contents.FT != FT_STRUCT) 7331 return true; 7332 const StructFieldInfo &StructInfo = Field.Contents.StructInfo; 7333 7334 if (lookUpField(StructInfo.Structure, FieldMember, Info)) 7335 return true; 7336 7337 Info.Offset += Field.Offset; 7338 return false; 7339 } 7340 7341 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const { 7342 unsigned Size = StringSwitch<unsigned>(Name) 7343 .CasesLower("byte", "db", "sbyte", 1) 7344 .CasesLower("word", "dw", "sword", 2) 7345 .CasesLower("dword", "dd", "sdword", 4) 7346 .CasesLower("fword", "df", 6) 7347 .CasesLower("qword", "dq", "sqword", 8) 7348 .CaseLower("real4", 4) 7349 .CaseLower("real8", 8) 7350 .CaseLower("real10", 10) 7351 .Default(0); 7352 if (Size) { 7353 Info.Name = Name; 7354 Info.ElementSize = Size; 7355 Info.Length = 1; 7356 Info.Size = Size; 7357 return false; 7358 } 7359 7360 auto StructIt = Structs.find(Name.lower()); 7361 if (StructIt != Structs.end()) { 7362 const StructInfo &Structure = StructIt->second; 7363 Info.Name = Name; 7364 Info.ElementSize = Structure.Size; 7365 Info.Length = 1; 7366 Info.Size = Structure.Size; 7367 return false; 7368 } 7369 7370 return true; 7371 } 7372 7373 bool MasmParser::parseMSInlineAsm( 7374 std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, 7375 SmallVectorImpl<std::pair<void *, bool>> &OpDecls, 7376 SmallVectorImpl<std::string> &Constraints, 7377 SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII, 7378 const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { 7379 SmallVector<void *, 4> InputDecls; 7380 SmallVector<void *, 4> OutputDecls; 7381 SmallVector<bool, 4> InputDeclsAddressOf; 7382 SmallVector<bool, 4> OutputDeclsAddressOf; 7383 SmallVector<std::string, 4> InputConstraints; 7384 SmallVector<std::string, 4> OutputConstraints; 7385 SmallVector<unsigned, 4> ClobberRegs; 7386 7387 SmallVector<AsmRewrite, 4> AsmStrRewrites; 7388 7389 // Prime the lexer. 7390 Lex(); 7391 7392 // While we have input, parse each statement. 7393 unsigned InputIdx = 0; 7394 unsigned OutputIdx = 0; 7395 while (getLexer().isNot(AsmToken::Eof)) { 7396 // Parse curly braces marking block start/end. 7397 if (parseCurlyBlockScope(AsmStrRewrites)) 7398 continue; 7399 7400 ParseStatementInfo Info(&AsmStrRewrites); 7401 bool StatementErr = parseStatement(Info, &SI); 7402 7403 if (StatementErr || Info.ParseError) { 7404 // Emit pending errors if any exist. 7405 printPendingErrors(); 7406 return true; 7407 } 7408 7409 // No pending error should exist here. 7410 assert(!hasPendingError() && "unexpected error from parseStatement"); 7411 7412 if (Info.Opcode == ~0U) 7413 continue; 7414 7415 const MCInstrDesc &Desc = MII->get(Info.Opcode); 7416 7417 // Build the list of clobbers, outputs and inputs. 7418 for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) { 7419 MCParsedAsmOperand &Operand = *Info.ParsedOperands[i]; 7420 7421 // Register operand. 7422 if (Operand.isReg() && !Operand.needAddressOf() && 7423 !getTargetParser().OmitRegisterFromClobberLists(Operand.getReg())) { 7424 unsigned NumDefs = Desc.getNumDefs(); 7425 // Clobber. 7426 if (NumDefs && Operand.getMCOperandNum() < NumDefs) 7427 ClobberRegs.push_back(Operand.getReg()); 7428 continue; 7429 } 7430 7431 // Expr/Input or Output. 7432 StringRef SymName = Operand.getSymName(); 7433 if (SymName.empty()) 7434 continue; 7435 7436 void *OpDecl = Operand.getOpDecl(); 7437 if (!OpDecl) 7438 continue; 7439 7440 StringRef Constraint = Operand.getConstraint(); 7441 if (Operand.isImm()) { 7442 // Offset as immediate. 7443 if (Operand.isOffsetOfLocal()) 7444 Constraint = "r"; 7445 else 7446 Constraint = "i"; 7447 } 7448 7449 bool isOutput = (i == 1) && Desc.mayStore(); 7450 SMLoc Start = SMLoc::getFromPointer(SymName.data()); 7451 if (isOutput) { 7452 ++InputIdx; 7453 OutputDecls.push_back(OpDecl); 7454 OutputDeclsAddressOf.push_back(Operand.needAddressOf()); 7455 OutputConstraints.push_back(("=" + Constraint).str()); 7456 AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size()); 7457 } else { 7458 InputDecls.push_back(OpDecl); 7459 InputDeclsAddressOf.push_back(Operand.needAddressOf()); 7460 InputConstraints.push_back(Constraint.str()); 7461 if (Desc.operands()[i - 1].isBranchTarget()) 7462 AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size()); 7463 else 7464 AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size()); 7465 } 7466 } 7467 7468 // Consider implicit defs to be clobbers. Think of cpuid and push. 7469 llvm::append_range(ClobberRegs, Desc.implicit_defs()); 7470 } 7471 7472 // Set the number of Outputs and Inputs. 7473 NumOutputs = OutputDecls.size(); 7474 NumInputs = InputDecls.size(); 7475 7476 // Set the unique clobbers. 7477 array_pod_sort(ClobberRegs.begin(), ClobberRegs.end()); 7478 ClobberRegs.erase(std::unique(ClobberRegs.begin(), ClobberRegs.end()), 7479 ClobberRegs.end()); 7480 Clobbers.assign(ClobberRegs.size(), std::string()); 7481 for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) { 7482 raw_string_ostream OS(Clobbers[I]); 7483 IP->printRegName(OS, ClobberRegs[I]); 7484 } 7485 7486 // Merge the various outputs and inputs. Output are expected first. 7487 if (NumOutputs || NumInputs) { 7488 unsigned NumExprs = NumOutputs + NumInputs; 7489 OpDecls.resize(NumExprs); 7490 Constraints.resize(NumExprs); 7491 for (unsigned i = 0; i < NumOutputs; ++i) { 7492 OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]); 7493 Constraints[i] = OutputConstraints[i]; 7494 } 7495 for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) { 7496 OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]); 7497 Constraints[j] = InputConstraints[i]; 7498 } 7499 } 7500 7501 // Build the IR assembly string. 7502 std::string AsmStringIR; 7503 raw_string_ostream OS(AsmStringIR); 7504 StringRef ASMString = 7505 SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer(); 7506 const char *AsmStart = ASMString.begin(); 7507 const char *AsmEnd = ASMString.end(); 7508 array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort); 7509 for (auto it = AsmStrRewrites.begin(); it != AsmStrRewrites.end(); ++it) { 7510 const AsmRewrite &AR = *it; 7511 // Check if this has already been covered by another rewrite... 7512 if (AR.Done) 7513 continue; 7514 AsmRewriteKind Kind = AR.Kind; 7515 7516 const char *Loc = AR.Loc.getPointer(); 7517 assert(Loc >= AsmStart && "Expected Loc to be at or after Start!"); 7518 7519 // Emit everything up to the immediate/expression. 7520 if (unsigned Len = Loc - AsmStart) 7521 OS << StringRef(AsmStart, Len); 7522 7523 // Skip the original expression. 7524 if (Kind == AOK_Skip) { 7525 AsmStart = Loc + AR.Len; 7526 continue; 7527 } 7528 7529 unsigned AdditionalSkip = 0; 7530 // Rewrite expressions in $N notation. 7531 switch (Kind) { 7532 default: 7533 break; 7534 case AOK_IntelExpr: 7535 assert(AR.IntelExp.isValid() && "cannot write invalid intel expression"); 7536 if (AR.IntelExp.NeedBracs) 7537 OS << "["; 7538 if (AR.IntelExp.hasBaseReg()) 7539 OS << AR.IntelExp.BaseReg; 7540 if (AR.IntelExp.hasIndexReg()) 7541 OS << (AR.IntelExp.hasBaseReg() ? " + " : "") 7542 << AR.IntelExp.IndexReg; 7543 if (AR.IntelExp.Scale > 1) 7544 OS << " * $$" << AR.IntelExp.Scale; 7545 if (AR.IntelExp.hasOffset()) { 7546 if (AR.IntelExp.hasRegs()) 7547 OS << " + "; 7548 // Fuse this rewrite with a rewrite of the offset name, if present. 7549 StringRef OffsetName = AR.IntelExp.OffsetName; 7550 SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data()); 7551 size_t OffsetLen = OffsetName.size(); 7552 auto rewrite_it = std::find_if( 7553 it, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) { 7554 return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen && 7555 (FusingAR.Kind == AOK_Input || 7556 FusingAR.Kind == AOK_CallInput); 7557 }); 7558 if (rewrite_it == AsmStrRewrites.end()) { 7559 OS << "offset " << OffsetName; 7560 } else if (rewrite_it->Kind == AOK_CallInput) { 7561 OS << "${" << InputIdx++ << ":P}"; 7562 rewrite_it->Done = true; 7563 } else { 7564 OS << '$' << InputIdx++; 7565 rewrite_it->Done = true; 7566 } 7567 } 7568 if (AR.IntelExp.Imm || AR.IntelExp.emitImm()) 7569 OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm; 7570 if (AR.IntelExp.NeedBracs) 7571 OS << "]"; 7572 break; 7573 case AOK_Label: 7574 OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label; 7575 break; 7576 case AOK_Input: 7577 OS << '$' << InputIdx++; 7578 break; 7579 case AOK_CallInput: 7580 OS << "${" << InputIdx++ << ":P}"; 7581 break; 7582 case AOK_Output: 7583 OS << '$' << OutputIdx++; 7584 break; 7585 case AOK_SizeDirective: 7586 switch (AR.Val) { 7587 default: break; 7588 case 8: OS << "byte ptr "; break; 7589 case 16: OS << "word ptr "; break; 7590 case 32: OS << "dword ptr "; break; 7591 case 64: OS << "qword ptr "; break; 7592 case 80: OS << "xword ptr "; break; 7593 case 128: OS << "xmmword ptr "; break; 7594 case 256: OS << "ymmword ptr "; break; 7595 } 7596 break; 7597 case AOK_Emit: 7598 OS << ".byte"; 7599 break; 7600 case AOK_Align: { 7601 // MS alignment directives are measured in bytes. If the native assembler 7602 // measures alignment in bytes, we can pass it straight through. 7603 OS << ".align"; 7604 if (getContext().getAsmInfo()->getAlignmentIsInBytes()) 7605 break; 7606 7607 // Alignment is in log2 form, so print that instead and skip the original 7608 // immediate. 7609 unsigned Val = AR.Val; 7610 OS << ' ' << Val; 7611 assert(Val < 10 && "Expected alignment less then 2^10."); 7612 AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4; 7613 break; 7614 } 7615 case AOK_EVEN: 7616 OS << ".even"; 7617 break; 7618 case AOK_EndOfStatement: 7619 OS << "\n\t"; 7620 break; 7621 } 7622 7623 // Skip the original expression. 7624 AsmStart = Loc + AR.Len + AdditionalSkip; 7625 } 7626 7627 // Emit the remainder of the asm string. 7628 if (AsmStart != AsmEnd) 7629 OS << StringRef(AsmStart, AsmEnd - AsmStart); 7630 7631 AsmString = OS.str(); 7632 return false; 7633 } 7634 7635 void MasmParser::initializeBuiltinSymbolMap() { 7636 // Numeric built-ins (supported in all versions) 7637 BuiltinSymbolMap["@version"] = BI_VERSION; 7638 BuiltinSymbolMap["@line"] = BI_LINE; 7639 7640 // Text built-ins (supported in all versions) 7641 BuiltinSymbolMap["@date"] = BI_DATE; 7642 BuiltinSymbolMap["@time"] = BI_TIME; 7643 BuiltinSymbolMap["@filecur"] = BI_FILECUR; 7644 BuiltinSymbolMap["@filename"] = BI_FILENAME; 7645 BuiltinSymbolMap["@curseg"] = BI_CURSEG; 7646 7647 // Some built-ins exist only for MASM32 (32-bit x86) 7648 if (getContext().getSubtargetInfo()->getTargetTriple().getArch() == 7649 Triple::x86) { 7650 // Numeric built-ins 7651 // BuiltinSymbolMap["@cpu"] = BI_CPU; 7652 // BuiltinSymbolMap["@interface"] = BI_INTERFACE; 7653 // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE; 7654 // BuiltinSymbolMap["@codesize"] = BI_CODESIZE; 7655 // BuiltinSymbolMap["@datasize"] = BI_DATASIZE; 7656 // BuiltinSymbolMap["@model"] = BI_MODEL; 7657 7658 // Text built-ins 7659 // BuiltinSymbolMap["@code"] = BI_CODE; 7660 // BuiltinSymbolMap["@data"] = BI_DATA; 7661 // BuiltinSymbolMap["@fardata?"] = BI_FARDATA; 7662 // BuiltinSymbolMap["@stack"] = BI_STACK; 7663 } 7664 } 7665 7666 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol, 7667 SMLoc StartLoc) { 7668 switch (Symbol) { 7669 default: 7670 return nullptr; 7671 case BI_VERSION: 7672 // Match a recent version of ML.EXE. 7673 return MCConstantExpr::create(1427, getContext()); 7674 case BI_LINE: { 7675 int64_t Line; 7676 if (ActiveMacros.empty()) 7677 Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer); 7678 else 7679 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc, 7680 ActiveMacros.front()->ExitBuffer); 7681 return MCConstantExpr::create(Line, getContext()); 7682 } 7683 } 7684 llvm_unreachable("unhandled built-in symbol"); 7685 } 7686 7687 std::optional<std::string> 7688 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) { 7689 switch (Symbol) { 7690 default: 7691 return {}; 7692 case BI_DATE: { 7693 // Current local date, formatted MM/DD/YY 7694 char TmpBuffer[sizeof("mm/dd/yy")]; 7695 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM); 7696 return std::string(TmpBuffer, Len); 7697 } 7698 case BI_TIME: { 7699 // Current local time, formatted HH:MM:SS (24-hour clock) 7700 char TmpBuffer[sizeof("hh:mm:ss")]; 7701 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM); 7702 return std::string(TmpBuffer, Len); 7703 } 7704 case BI_FILECUR: 7705 return SrcMgr 7706 .getMemoryBuffer( 7707 ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer) 7708 ->getBufferIdentifier() 7709 .str(); 7710 case BI_FILENAME: 7711 return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID()) 7712 ->getBufferIdentifier()) 7713 .upper(); 7714 case BI_CURSEG: 7715 return getStreamer().getCurrentSectionOnly()->getName().str(); 7716 } 7717 llvm_unreachable("unhandled built-in symbol"); 7718 } 7719 7720 /// Create an MCAsmParser instance. 7721 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C, 7722 MCStreamer &Out, const MCAsmInfo &MAI, 7723 struct tm TM, unsigned CB) { 7724 return new MasmParser(SM, C, Out, MAI, TM, CB); 7725 } 7726