1 //===- AsmParser.cpp - Parser for Assembly Files --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This class implements the parser for assembly files. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/ADT/APFloat.h" 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringExtras.h" 21 #include "llvm/ADT/StringMap.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/ADT/StringSwitch.h" 24 #include "llvm/ADT/Twine.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCCodeView.h" 27 #include "llvm/MC/MCContext.h" 28 #include "llvm/MC/MCDirectives.h" 29 #include "llvm/MC/MCExpr.h" 30 #include "llvm/MC/MCInstPrinter.h" 31 #include "llvm/MC/MCInstrDesc.h" 32 #include "llvm/MC/MCInstrInfo.h" 33 #include "llvm/MC/MCParser/AsmCond.h" 34 #include "llvm/MC/MCParser/AsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCSection.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/MC/MCTargetOptions.h" 44 #include "llvm/Support/Casting.h" 45 #include "llvm/Support/CommandLine.h" 46 #include "llvm/Support/ErrorHandling.h" 47 #include "llvm/Support/Format.h" 48 #include "llvm/Support/MD5.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/MemoryBuffer.h" 51 #include "llvm/Support/Path.h" 52 #include "llvm/Support/SMLoc.h" 53 #include "llvm/Support/SourceMgr.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <climits> 58 #include <cstddef> 59 #include <cstdint> 60 #include <ctime> 61 #include <deque> 62 #include <memory> 63 #include <optional> 64 #include <sstream> 65 #include <string> 66 #include <tuple> 67 #include <utility> 68 #include <vector> 69 70 using namespace llvm; 71 72 namespace { 73 74 /// Helper types for tracking macro definitions. 75 typedef std::vector<AsmToken> MCAsmMacroArgument; 76 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments; 77 78 /// Helper class for storing information about an active macro instantiation. 79 struct MacroInstantiation { 80 /// The location of the instantiation. 81 SMLoc InstantiationLoc; 82 83 /// The buffer where parsing should resume upon instantiation completion. 84 unsigned ExitBuffer; 85 86 /// The location where parsing should resume upon instantiation completion. 87 SMLoc ExitLoc; 88 89 /// The depth of TheCondStack at the start of the instantiation. 90 size_t CondStackDepth; 91 }; 92 93 struct ParseStatementInfo { 94 /// The parsed operands from the last parsed statement. 95 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands; 96 97 /// The opcode from the last parsed instruction. 98 unsigned Opcode = ~0U; 99 100 /// Was there an error parsing the inline assembly? 101 bool ParseError = false; 102 103 /// The value associated with a macro exit. 104 std::optional<std::string> ExitValue; 105 106 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 107 108 ParseStatementInfo() = delete; 109 ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites) 110 : AsmRewrites(rewrites) {} 111 }; 112 113 enum FieldType { 114 FT_INTEGRAL, // Initializer: integer expression, stored as an MCExpr. 115 FT_REAL, // Initializer: real number, stored as an APInt. 116 FT_STRUCT // Initializer: struct initializer, stored recursively. 117 }; 118 119 struct FieldInfo; 120 struct StructInfo { 121 StringRef Name; 122 bool IsUnion = false; 123 bool Initializable = true; 124 unsigned Alignment = 0; 125 unsigned AlignmentSize = 0; 126 unsigned NextOffset = 0; 127 unsigned Size = 0; 128 std::vector<FieldInfo> Fields; 129 StringMap<size_t> FieldsByName; 130 131 FieldInfo &addField(StringRef FieldName, FieldType FT, 132 unsigned FieldAlignmentSize); 133 134 StructInfo() = default; 135 StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue); 136 }; 137 138 // FIXME: This should probably use a class hierarchy, raw pointers between the 139 // objects, and dynamic type resolution instead of a union. On the other hand, 140 // ownership then becomes much more complicated; the obvious thing would be to 141 // use BumpPtrAllocator, but the lack of a destructor makes that messy. 142 143 struct StructInitializer; 144 struct IntFieldInfo { 145 SmallVector<const MCExpr *, 1> Values; 146 147 IntFieldInfo() = default; 148 IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; } 149 IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); } 150 }; 151 struct RealFieldInfo { 152 SmallVector<APInt, 1> AsIntValues; 153 154 RealFieldInfo() = default; 155 RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; } 156 RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); } 157 }; 158 struct StructFieldInfo { 159 std::vector<StructInitializer> Initializers; 160 StructInfo Structure; 161 162 StructFieldInfo() = default; 163 StructFieldInfo(std::vector<StructInitializer> V, StructInfo S); 164 }; 165 166 class FieldInitializer { 167 public: 168 FieldType FT; 169 union { 170 IntFieldInfo IntInfo; 171 RealFieldInfo RealInfo; 172 StructFieldInfo StructInfo; 173 }; 174 175 ~FieldInitializer(); 176 FieldInitializer(FieldType FT); 177 178 FieldInitializer(SmallVector<const MCExpr *, 1> &&Values); 179 FieldInitializer(SmallVector<APInt, 1> &&AsIntValues); 180 FieldInitializer(std::vector<StructInitializer> &&Initializers, 181 struct StructInfo Structure); 182 183 FieldInitializer(const FieldInitializer &Initializer); 184 FieldInitializer(FieldInitializer &&Initializer); 185 186 FieldInitializer &operator=(const FieldInitializer &Initializer); 187 FieldInitializer &operator=(FieldInitializer &&Initializer); 188 }; 189 190 struct StructInitializer { 191 std::vector<FieldInitializer> FieldInitializers; 192 }; 193 194 struct FieldInfo { 195 // Offset of the field within the containing STRUCT. 196 unsigned Offset = 0; 197 198 // Total size of the field (= LengthOf * Type). 199 unsigned SizeOf = 0; 200 201 // Number of elements in the field (1 if scalar, >1 if an array). 202 unsigned LengthOf = 0; 203 204 // Size of a single entry in this field, in bytes ("type" in MASM standards). 205 unsigned Type = 0; 206 207 FieldInitializer Contents; 208 209 FieldInfo(FieldType FT) : Contents(FT) {} 210 }; 211 212 StructFieldInfo::StructFieldInfo(std::vector<StructInitializer> V, 213 StructInfo S) { 214 Initializers = std::move(V); 215 Structure = S; 216 } 217 218 StructInfo::StructInfo(StringRef StructName, bool Union, 219 unsigned AlignmentValue) 220 : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {} 221 222 FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, 223 unsigned FieldAlignmentSize) { 224 if (!FieldName.empty()) 225 FieldsByName[FieldName.lower()] = Fields.size(); 226 Fields.emplace_back(FT); 227 FieldInfo &Field = Fields.back(); 228 Field.Offset = 229 llvm::alignTo(NextOffset, std::min(Alignment, FieldAlignmentSize)); 230 if (!IsUnion) { 231 NextOffset = std::max(NextOffset, Field.Offset); 232 } 233 AlignmentSize = std::max(AlignmentSize, FieldAlignmentSize); 234 return Field; 235 } 236 237 FieldInitializer::~FieldInitializer() { 238 switch (FT) { 239 case FT_INTEGRAL: 240 IntInfo.~IntFieldInfo(); 241 break; 242 case FT_REAL: 243 RealInfo.~RealFieldInfo(); 244 break; 245 case FT_STRUCT: 246 StructInfo.~StructFieldInfo(); 247 break; 248 } 249 } 250 251 FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) { 252 switch (FT) { 253 case FT_INTEGRAL: 254 new (&IntInfo) IntFieldInfo(); 255 break; 256 case FT_REAL: 257 new (&RealInfo) RealFieldInfo(); 258 break; 259 case FT_STRUCT: 260 new (&StructInfo) StructFieldInfo(); 261 break; 262 } 263 } 264 265 FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values) 266 : FT(FT_INTEGRAL) { 267 new (&IntInfo) IntFieldInfo(std::move(Values)); 268 } 269 270 FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues) 271 : FT(FT_REAL) { 272 new (&RealInfo) RealFieldInfo(std::move(AsIntValues)); 273 } 274 275 FieldInitializer::FieldInitializer( 276 std::vector<StructInitializer> &&Initializers, struct StructInfo Structure) 277 : FT(FT_STRUCT) { 278 new (&StructInfo) StructFieldInfo(std::move(Initializers), Structure); 279 } 280 281 FieldInitializer::FieldInitializer(const FieldInitializer &Initializer) 282 : FT(Initializer.FT) { 283 switch (FT) { 284 case FT_INTEGRAL: 285 new (&IntInfo) IntFieldInfo(Initializer.IntInfo); 286 break; 287 case FT_REAL: 288 new (&RealInfo) RealFieldInfo(Initializer.RealInfo); 289 break; 290 case FT_STRUCT: 291 new (&StructInfo) StructFieldInfo(Initializer.StructInfo); 292 break; 293 } 294 } 295 296 FieldInitializer::FieldInitializer(FieldInitializer &&Initializer) 297 : FT(Initializer.FT) { 298 switch (FT) { 299 case FT_INTEGRAL: 300 new (&IntInfo) IntFieldInfo(Initializer.IntInfo); 301 break; 302 case FT_REAL: 303 new (&RealInfo) RealFieldInfo(Initializer.RealInfo); 304 break; 305 case FT_STRUCT: 306 new (&StructInfo) StructFieldInfo(Initializer.StructInfo); 307 break; 308 } 309 } 310 311 FieldInitializer & 312 FieldInitializer::operator=(const FieldInitializer &Initializer) { 313 if (FT != Initializer.FT) { 314 switch (FT) { 315 case FT_INTEGRAL: 316 IntInfo.~IntFieldInfo(); 317 break; 318 case FT_REAL: 319 RealInfo.~RealFieldInfo(); 320 break; 321 case FT_STRUCT: 322 StructInfo.~StructFieldInfo(); 323 break; 324 } 325 } 326 FT = Initializer.FT; 327 switch (FT) { 328 case FT_INTEGRAL: 329 IntInfo = Initializer.IntInfo; 330 break; 331 case FT_REAL: 332 RealInfo = Initializer.RealInfo; 333 break; 334 case FT_STRUCT: 335 StructInfo = Initializer.StructInfo; 336 break; 337 } 338 return *this; 339 } 340 341 FieldInitializer &FieldInitializer::operator=(FieldInitializer &&Initializer) { 342 if (FT != Initializer.FT) { 343 switch (FT) { 344 case FT_INTEGRAL: 345 IntInfo.~IntFieldInfo(); 346 break; 347 case FT_REAL: 348 RealInfo.~RealFieldInfo(); 349 break; 350 case FT_STRUCT: 351 StructInfo.~StructFieldInfo(); 352 break; 353 } 354 } 355 FT = Initializer.FT; 356 switch (FT) { 357 case FT_INTEGRAL: 358 IntInfo = Initializer.IntInfo; 359 break; 360 case FT_REAL: 361 RealInfo = Initializer.RealInfo; 362 break; 363 case FT_STRUCT: 364 StructInfo = Initializer.StructInfo; 365 break; 366 } 367 return *this; 368 } 369 370 /// The concrete assembly parser instance. 371 // Note that this is a full MCAsmParser, not an MCAsmParserExtension! 372 // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc. 373 class MasmParser : public MCAsmParser { 374 private: 375 SourceMgr::DiagHandlerTy SavedDiagHandler; 376 void *SavedDiagContext; 377 std::unique_ptr<MCAsmParserExtension> PlatformParser; 378 379 /// This is the current buffer index we're lexing from as managed by the 380 /// SourceMgr object. 381 unsigned CurBuffer; 382 383 /// time of assembly 384 struct tm TM; 385 386 BitVector EndStatementAtEOFStack; 387 388 AsmCond TheCondState; 389 std::vector<AsmCond> TheCondStack; 390 391 /// maps directive names to handler methods in parser 392 /// extensions. Extensions register themselves in this map by calling 393 /// addDirectiveHandler. 394 StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap; 395 396 /// maps assembly-time variable names to variables. 397 struct Variable { 398 enum RedefinableKind { NOT_REDEFINABLE, WARN_ON_REDEFINITION, REDEFINABLE }; 399 400 StringRef Name; 401 RedefinableKind Redefinable = REDEFINABLE; 402 bool IsText = false; 403 std::string TextValue; 404 }; 405 StringMap<Variable> Variables; 406 407 /// Stack of active struct definitions. 408 SmallVector<StructInfo, 1> StructInProgress; 409 410 /// Maps struct tags to struct definitions. 411 StringMap<StructInfo> Structs; 412 413 /// Maps data location names to types. 414 StringMap<AsmTypeInfo> KnownType; 415 416 /// Stack of active macro instantiations. 417 std::vector<MacroInstantiation*> ActiveMacros; 418 419 /// List of bodies of anonymous macros. 420 std::deque<MCAsmMacro> MacroLikeBodies; 421 422 /// Keeps track of how many .macro's have been instantiated. 423 unsigned NumOfMacroInstantiations; 424 425 /// The values from the last parsed cpp hash file line comment if any. 426 struct CppHashInfoTy { 427 StringRef Filename; 428 int64_t LineNumber; 429 SMLoc Loc; 430 unsigned Buf; 431 CppHashInfoTy() : LineNumber(0), Buf(0) {} 432 }; 433 CppHashInfoTy CppHashInfo; 434 435 /// The filename from the first cpp hash file line comment, if any. 436 StringRef FirstCppHashFilename; 437 438 /// List of forward directional labels for diagnosis at the end. 439 SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels; 440 441 /// AssemblerDialect. ~OU means unset value and use value provided by MAI. 442 /// Defaults to 1U, meaning Intel. 443 unsigned AssemblerDialect = 1U; 444 445 /// Are we parsing ms-style inline assembly? 446 bool ParsingMSInlineAsm = false; 447 448 // Current <...> expression depth. 449 unsigned AngleBracketDepth = 0U; 450 451 // Number of locals defined. 452 uint16_t LocalCounter = 0; 453 454 public: 455 MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, 456 const MCAsmInfo &MAI, struct tm TM, unsigned CB = 0); 457 MasmParser(const MasmParser &) = delete; 458 MasmParser &operator=(const MasmParser &) = delete; 459 ~MasmParser() override; 460 461 bool Run(bool NoInitialTextSection, bool NoFinalize = false) override; 462 463 void addDirectiveHandler(StringRef Directive, 464 ExtensionDirectiveHandler Handler) override { 465 ExtensionDirectiveMap[Directive] = Handler; 466 DirectiveKindMap.try_emplace(Directive, DK_HANDLER_DIRECTIVE); 467 } 468 469 void addAliasForDirective(StringRef Directive, StringRef Alias) override { 470 DirectiveKindMap[Directive] = DirectiveKindMap[Alias]; 471 } 472 473 /// @name MCAsmParser Interface 474 /// { 475 476 unsigned getAssemblerDialect() override { 477 if (AssemblerDialect == ~0U) 478 return MAI.getAssemblerDialect(); 479 else 480 return AssemblerDialect; 481 } 482 void setAssemblerDialect(unsigned i) override { 483 AssemblerDialect = i; 484 } 485 486 void Note(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt) override; 487 bool Warning(SMLoc L, const Twine &Msg, 488 SMRange Range = std::nullopt) override; 489 bool printError(SMLoc L, const Twine &Msg, 490 SMRange Range = std::nullopt) override; 491 492 enum ExpandKind { ExpandMacros, DoNotExpandMacros }; 493 const AsmToken &Lex(ExpandKind ExpandNextToken); 494 const AsmToken &Lex() override { return Lex(ExpandMacros); } 495 496 void setParsingMSInlineAsm(bool V) override { 497 ParsingMSInlineAsm = V; 498 // When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and 499 // hex integer literals. 500 Lexer.setLexMasmIntegers(V); 501 } 502 bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; } 503 504 bool isParsingMasm() const override { return true; } 505 506 bool defineMacro(StringRef Name, StringRef Value) override; 507 508 bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override; 509 bool lookUpField(StringRef Base, StringRef Member, 510 AsmFieldInfo &Info) const override; 511 512 bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override; 513 514 bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs, 515 unsigned &NumInputs, 516 SmallVectorImpl<std::pair<void *, bool>> &OpDecls, 517 SmallVectorImpl<std::string> &Constraints, 518 SmallVectorImpl<std::string> &Clobbers, 519 const MCInstrInfo *MII, MCInstPrinter *IP, 520 MCAsmParserSemaCallback &SI) override; 521 522 bool parseExpression(const MCExpr *&Res); 523 bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) override; 524 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, 525 AsmTypeInfo *TypeInfo) override; 526 bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) override; 527 bool parseAbsoluteExpression(int64_t &Res) override; 528 529 /// Parse a floating point expression using the float \p Semantics 530 /// and set \p Res to the value. 531 bool parseRealValue(const fltSemantics &Semantics, APInt &Res); 532 533 /// Parse an identifier or string (as a quoted identifier) 534 /// and set \p Res to the identifier contents. 535 enum IdentifierPositionKind { StandardPosition, StartOfStatement }; 536 bool parseIdentifier(StringRef &Res, IdentifierPositionKind Position); 537 bool parseIdentifier(StringRef &Res) override { 538 return parseIdentifier(Res, StandardPosition); 539 } 540 void eatToEndOfStatement() override; 541 542 bool checkForValidSection() override; 543 544 /// } 545 546 private: 547 bool expandMacros(); 548 const AsmToken peekTok(bool ShouldSkipSpace = true); 549 550 bool parseStatement(ParseStatementInfo &Info, 551 MCAsmParserSemaCallback *SI); 552 bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites); 553 bool parseCppHashLineFilenameComment(SMLoc L); 554 555 bool expandMacro(raw_svector_ostream &OS, StringRef Body, 556 ArrayRef<MCAsmMacroParameter> Parameters, 557 ArrayRef<MCAsmMacroArgument> A, 558 const std::vector<std::string> &Locals, SMLoc L); 559 560 /// Are we inside a macro instantiation? 561 bool isInsideMacroInstantiation() {return !ActiveMacros.empty();} 562 563 /// Handle entry to macro instantiation. 564 /// 565 /// \param M The macro. 566 /// \param NameLoc Instantiation location. 567 bool handleMacroEntry( 568 const MCAsmMacro *M, SMLoc NameLoc, 569 AsmToken::TokenKind ArgumentEndTok = AsmToken::EndOfStatement); 570 571 /// Handle invocation of macro function. 572 /// 573 /// \param M The macro. 574 /// \param NameLoc Invocation location. 575 bool handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc); 576 577 /// Handle exit from macro instantiation. 578 void handleMacroExit(); 579 580 /// Extract AsmTokens for a macro argument. 581 bool 582 parseMacroArgument(const MCAsmMacroParameter *MP, MCAsmMacroArgument &MA, 583 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement); 584 585 /// Parse all macro arguments for a given macro. 586 bool 587 parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A, 588 AsmToken::TokenKind EndTok = AsmToken::EndOfStatement); 589 590 void printMacroInstantiations(); 591 592 bool expandStatement(SMLoc Loc); 593 594 void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, 595 SMRange Range = std::nullopt) const { 596 ArrayRef<SMRange> Ranges(Range); 597 SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges); 598 } 599 static void DiagHandler(const SMDiagnostic &Diag, void *Context); 600 601 bool lookUpField(const StructInfo &Structure, StringRef Member, 602 AsmFieldInfo &Info) const; 603 604 /// Enter the specified file. This returns true on failure. 605 bool enterIncludeFile(const std::string &Filename); 606 607 /// Reset the current lexer position to that given by \p Loc. The 608 /// current token is not set; clients should ensure Lex() is called 609 /// subsequently. 610 /// 611 /// \param InBuffer If not 0, should be the known buffer id that contains the 612 /// location. 613 void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0, 614 bool EndStatementAtEOF = true); 615 616 /// Parse up to a token of kind \p EndTok and return the contents from the 617 /// current token up to (but not including) this token; the current token on 618 /// exit will be either this kind or EOF. Reads through instantiated macro 619 /// functions and text macros. 620 SmallVector<StringRef, 1> parseStringRefsTo(AsmToken::TokenKind EndTok); 621 std::string parseStringTo(AsmToken::TokenKind EndTok); 622 623 /// Parse up to the end of statement and return the contents from the current 624 /// token until the end of the statement; the current token on exit will be 625 /// either the EndOfStatement or EOF. 626 StringRef parseStringToEndOfStatement() override; 627 628 bool parseTextItem(std::string &Data); 629 630 unsigned getBinOpPrecedence(AsmToken::TokenKind K, 631 MCBinaryExpr::Opcode &Kind); 632 633 bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); 634 bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); 635 bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc); 636 637 // Generic (target and platform independent) directive parsing. 638 enum DirectiveKind { 639 DK_NO_DIRECTIVE, // Placeholder 640 DK_HANDLER_DIRECTIVE, 641 DK_ASSIGN, 642 DK_EQU, 643 DK_TEXTEQU, 644 DK_ASCII, 645 DK_ASCIZ, 646 DK_STRING, 647 DK_BYTE, 648 DK_SBYTE, 649 DK_WORD, 650 DK_SWORD, 651 DK_DWORD, 652 DK_SDWORD, 653 DK_FWORD, 654 DK_QWORD, 655 DK_SQWORD, 656 DK_DB, 657 DK_DD, 658 DK_DF, 659 DK_DQ, 660 DK_DW, 661 DK_REAL4, 662 DK_REAL8, 663 DK_REAL10, 664 DK_ALIGN, 665 DK_EVEN, 666 DK_ORG, 667 DK_ENDR, 668 DK_EXTERN, 669 DK_PUBLIC, 670 DK_COMM, 671 DK_COMMENT, 672 DK_INCLUDE, 673 DK_REPEAT, 674 DK_WHILE, 675 DK_FOR, 676 DK_FORC, 677 DK_IF, 678 DK_IFE, 679 DK_IFB, 680 DK_IFNB, 681 DK_IFDEF, 682 DK_IFNDEF, 683 DK_IFDIF, 684 DK_IFDIFI, 685 DK_IFIDN, 686 DK_IFIDNI, 687 DK_ELSEIF, 688 DK_ELSEIFE, 689 DK_ELSEIFB, 690 DK_ELSEIFNB, 691 DK_ELSEIFDEF, 692 DK_ELSEIFNDEF, 693 DK_ELSEIFDIF, 694 DK_ELSEIFDIFI, 695 DK_ELSEIFIDN, 696 DK_ELSEIFIDNI, 697 DK_ELSE, 698 DK_ENDIF, 699 700 DK_MACRO, 701 DK_EXITM, 702 DK_ENDM, 703 DK_PURGE, 704 DK_ERR, 705 DK_ERRB, 706 DK_ERRNB, 707 DK_ERRDEF, 708 DK_ERRNDEF, 709 DK_ERRDIF, 710 DK_ERRDIFI, 711 DK_ERRIDN, 712 DK_ERRIDNI, 713 DK_ERRE, 714 DK_ERRNZ, 715 DK_ECHO, 716 DK_STRUCT, 717 DK_UNION, 718 DK_ENDS, 719 DK_END, 720 DK_PUSHFRAME, 721 DK_PUSHREG, 722 DK_SAVEREG, 723 DK_SAVEXMM128, 724 DK_SETFRAME, 725 DK_RADIX, 726 }; 727 728 /// Maps directive name --> DirectiveKind enum, for directives parsed by this 729 /// class. 730 StringMap<DirectiveKind> DirectiveKindMap; 731 732 bool isMacroLikeDirective(); 733 734 // Generic (target and platform independent) directive parsing. 735 enum BuiltinSymbol { 736 BI_NO_SYMBOL, // Placeholder 737 BI_DATE, 738 BI_TIME, 739 BI_VERSION, 740 BI_FILECUR, 741 BI_FILENAME, 742 BI_LINE, 743 BI_CURSEG, 744 BI_CPU, 745 BI_INTERFACE, 746 BI_CODE, 747 BI_DATA, 748 BI_FARDATA, 749 BI_WORDSIZE, 750 BI_CODESIZE, 751 BI_DATASIZE, 752 BI_MODEL, 753 BI_STACK, 754 }; 755 756 /// Maps builtin name --> BuiltinSymbol enum, for builtins handled by this 757 /// class. 758 StringMap<BuiltinSymbol> BuiltinSymbolMap; 759 760 const MCExpr *evaluateBuiltinValue(BuiltinSymbol Symbol, SMLoc StartLoc); 761 762 std::optional<std::string> evaluateBuiltinTextMacro(BuiltinSymbol Symbol, 763 SMLoc StartLoc); 764 765 // Generic (target and platform independent) directive parsing. 766 enum BuiltinFunction { 767 BI_NO_FUNCTION, // Placeholder 768 BI_CATSTR, 769 }; 770 771 /// Maps builtin name --> BuiltinFunction enum, for builtins handled by this 772 /// class. 773 StringMap<BuiltinFunction> BuiltinFunctionMap; 774 775 bool evaluateBuiltinMacroFunction(BuiltinFunction Function, StringRef Name, 776 std::string &Res); 777 778 // ".ascii", ".asciz", ".string" 779 bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); 780 781 // "byte", "word", ... 782 bool emitIntValue(const MCExpr *Value, unsigned Size); 783 bool parseScalarInitializer(unsigned Size, 784 SmallVectorImpl<const MCExpr *> &Values, 785 unsigned StringPadLength = 0); 786 bool parseScalarInstList( 787 unsigned Size, SmallVectorImpl<const MCExpr *> &Values, 788 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); 789 bool emitIntegralValues(unsigned Size, unsigned *Count = nullptr); 790 bool addIntegralField(StringRef Name, unsigned Size); 791 bool parseDirectiveValue(StringRef IDVal, unsigned Size); 792 bool parseDirectiveNamedValue(StringRef TypeName, unsigned Size, 793 StringRef Name, SMLoc NameLoc); 794 795 // "real4", "real8", "real10" 796 bool emitRealValues(const fltSemantics &Semantics, unsigned *Count = nullptr); 797 bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); 798 bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, 799 size_t Size); 800 bool parseRealInstList( 801 const fltSemantics &Semantics, SmallVectorImpl<APInt> &Values, 802 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); 803 bool parseDirectiveNamedRealValue(StringRef TypeName, 804 const fltSemantics &Semantics, 805 unsigned Size, StringRef Name, 806 SMLoc NameLoc); 807 808 bool parseOptionalAngleBracketOpen(); 809 bool parseAngleBracketClose(const Twine &Msg = "expected '>'"); 810 811 bool parseFieldInitializer(const FieldInfo &Field, 812 FieldInitializer &Initializer); 813 bool parseFieldInitializer(const FieldInfo &Field, 814 const IntFieldInfo &Contents, 815 FieldInitializer &Initializer); 816 bool parseFieldInitializer(const FieldInfo &Field, 817 const RealFieldInfo &Contents, 818 FieldInitializer &Initializer); 819 bool parseFieldInitializer(const FieldInfo &Field, 820 const StructFieldInfo &Contents, 821 FieldInitializer &Initializer); 822 823 bool parseStructInitializer(const StructInfo &Structure, 824 StructInitializer &Initializer); 825 bool parseStructInstList( 826 const StructInfo &Structure, std::vector<StructInitializer> &Initializers, 827 const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); 828 829 bool emitFieldValue(const FieldInfo &Field); 830 bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents); 831 bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents); 832 bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents); 833 834 bool emitFieldInitializer(const FieldInfo &Field, 835 const FieldInitializer &Initializer); 836 bool emitFieldInitializer(const FieldInfo &Field, 837 const IntFieldInfo &Contents, 838 const IntFieldInfo &Initializer); 839 bool emitFieldInitializer(const FieldInfo &Field, 840 const RealFieldInfo &Contents, 841 const RealFieldInfo &Initializer); 842 bool emitFieldInitializer(const FieldInfo &Field, 843 const StructFieldInfo &Contents, 844 const StructFieldInfo &Initializer); 845 846 bool emitStructInitializer(const StructInfo &Structure, 847 const StructInitializer &Initializer); 848 849 // User-defined types (structs, unions): 850 bool emitStructValues(const StructInfo &Structure, unsigned *Count = nullptr); 851 bool addStructField(StringRef Name, const StructInfo &Structure); 852 bool parseDirectiveStructValue(const StructInfo &Structure, 853 StringRef Directive, SMLoc DirLoc); 854 bool parseDirectiveNamedStructValue(const StructInfo &Structure, 855 StringRef Directive, SMLoc DirLoc, 856 StringRef Name); 857 858 // "=", "equ", "textequ" 859 bool parseDirectiveEquate(StringRef IDVal, StringRef Name, 860 DirectiveKind DirKind, SMLoc NameLoc); 861 862 bool parseDirectiveOrg(); // "org" 863 864 bool emitAlignTo(int64_t Alignment); 865 bool parseDirectiveAlign(); // "align" 866 bool parseDirectiveEven(); // "even" 867 868 // macro directives 869 bool parseDirectivePurgeMacro(SMLoc DirectiveLoc); 870 bool parseDirectiveExitMacro(SMLoc DirectiveLoc, StringRef Directive, 871 std::string &Value); 872 bool parseDirectiveEndMacro(StringRef Directive); 873 bool parseDirectiveMacro(StringRef Name, SMLoc NameLoc); 874 875 bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind, 876 StringRef Name, SMLoc NameLoc); 877 bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind); 878 bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc); 879 bool parseDirectiveNestedEnds(); 880 881 bool parseDirectiveExtern(); 882 883 /// Parse a directive like ".globl" which accepts a single symbol (which 884 /// should be a label or an external). 885 bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr); 886 887 bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" 888 889 bool parseDirectiveComment(SMLoc DirectiveLoc); // "comment" 890 891 bool parseDirectiveInclude(); // "include" 892 893 // "if" or "ife" 894 bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind); 895 // "ifb" or "ifnb", depending on ExpectBlank. 896 bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); 897 // "ifidn", "ifdif", "ifidni", or "ifdifi", depending on ExpectEqual and 898 // CaseInsensitive. 899 bool parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 900 bool CaseInsensitive); 901 // "ifdef" or "ifndef", depending on expect_defined 902 bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined); 903 // "elseif" or "elseife" 904 bool parseDirectiveElseIf(SMLoc DirectiveLoc, DirectiveKind DirKind); 905 // "elseifb" or "elseifnb", depending on ExpectBlank. 906 bool parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank); 907 // ".elseifdef" or ".elseifndef", depending on expect_defined 908 bool parseDirectiveElseIfdef(SMLoc DirectiveLoc, bool expect_defined); 909 // "elseifidn", "elseifdif", "elseifidni", or "elseifdifi", depending on 910 // ExpectEqual and CaseInsensitive. 911 bool parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 912 bool CaseInsensitive); 913 bool parseDirectiveElse(SMLoc DirectiveLoc); // "else" 914 bool parseDirectiveEndIf(SMLoc DirectiveLoc); // "endif" 915 bool parseEscapedString(std::string &Data) override; 916 bool parseAngleBracketString(std::string &Data) override; 917 918 // Macro-like directives 919 MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc); 920 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 921 raw_svector_ostream &OS); 922 void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 923 SMLoc ExitLoc, raw_svector_ostream &OS); 924 bool parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Directive); 925 bool parseDirectiveFor(SMLoc DirectiveLoc, StringRef Directive); 926 bool parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive); 927 bool parseDirectiveWhile(SMLoc DirectiveLoc); 928 929 // "_emit" or "__emit" 930 bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, 931 size_t Len); 932 933 // "align" 934 bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info); 935 936 // "end" 937 bool parseDirectiveEnd(SMLoc DirectiveLoc); 938 939 // ".err" 940 bool parseDirectiveError(SMLoc DirectiveLoc); 941 // ".errb" or ".errnb", depending on ExpectBlank. 942 bool parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank); 943 // ".errdef" or ".errndef", depending on ExpectBlank. 944 bool parseDirectiveErrorIfdef(SMLoc DirectiveLoc, bool ExpectDefined); 945 // ".erridn", ".errdif", ".erridni", or ".errdifi", depending on ExpectEqual 946 // and CaseInsensitive. 947 bool parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 948 bool CaseInsensitive); 949 // ".erre" or ".errnz", depending on ExpectZero. 950 bool parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero); 951 952 // ".radix" 953 bool parseDirectiveRadix(SMLoc DirectiveLoc); 954 955 // "echo" 956 bool parseDirectiveEcho(SMLoc DirectiveLoc); 957 958 void initializeDirectiveKindMap(); 959 void initializeBuiltinSymbolMaps(); 960 }; 961 962 } // end anonymous namespace 963 964 namespace llvm { 965 966 extern cl::opt<unsigned> AsmMacroMaxNestingDepth; 967 968 extern MCAsmParserExtension *createCOFFMasmParser(); 969 970 } // end namespace llvm 971 972 enum { DEFAULT_ADDRSPACE = 0 }; 973 974 MasmParser::MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, 975 const MCAsmInfo &MAI, struct tm TM, unsigned CB) 976 : MCAsmParser(Ctx, Out, SM, MAI), CurBuffer(CB ? CB : SM.getMainFileID()), 977 TM(TM) { 978 HadError = false; 979 // Save the old handler. 980 SavedDiagHandler = SrcMgr.getDiagHandler(); 981 SavedDiagContext = SrcMgr.getDiagContext(); 982 // Set our own handler which calls the saved handler. 983 SrcMgr.setDiagHandler(DiagHandler, this); 984 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 985 EndStatementAtEOFStack.push_back(true); 986 987 // Initialize the platform / file format parser. 988 switch (Ctx.getObjectFileType()) { 989 case MCContext::IsCOFF: 990 PlatformParser.reset(createCOFFMasmParser()); 991 break; 992 default: 993 report_fatal_error("llvm-ml currently supports only COFF output."); 994 break; 995 } 996 997 initializeDirectiveKindMap(); 998 PlatformParser->Initialize(*this); 999 initializeBuiltinSymbolMaps(); 1000 1001 NumOfMacroInstantiations = 0; 1002 } 1003 1004 MasmParser::~MasmParser() { 1005 assert((HadError || ActiveMacros.empty()) && 1006 "Unexpected active macro instantiation!"); 1007 1008 // Restore the saved diagnostics handler and context for use during 1009 // finalization. 1010 SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext); 1011 } 1012 1013 void MasmParser::printMacroInstantiations() { 1014 // Print the active macro instantiation stack. 1015 for (std::vector<MacroInstantiation *>::const_reverse_iterator 1016 it = ActiveMacros.rbegin(), 1017 ie = ActiveMacros.rend(); 1018 it != ie; ++it) 1019 printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note, 1020 "while in macro instantiation"); 1021 } 1022 1023 void MasmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) { 1024 printPendingErrors(); 1025 printMessage(L, SourceMgr::DK_Note, Msg, Range); 1026 printMacroInstantiations(); 1027 } 1028 1029 bool MasmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) { 1030 if (getTargetParser().getTargetOptions().MCNoWarn) 1031 return false; 1032 if (getTargetParser().getTargetOptions().MCFatalWarnings) 1033 return Error(L, Msg, Range); 1034 printMessage(L, SourceMgr::DK_Warning, Msg, Range); 1035 printMacroInstantiations(); 1036 return false; 1037 } 1038 1039 bool MasmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) { 1040 HadError = true; 1041 printMessage(L, SourceMgr::DK_Error, Msg, Range); 1042 printMacroInstantiations(); 1043 return true; 1044 } 1045 1046 bool MasmParser::enterIncludeFile(const std::string &Filename) { 1047 std::string IncludedFile; 1048 unsigned NewBuf = 1049 SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); 1050 if (!NewBuf) 1051 return true; 1052 1053 CurBuffer = NewBuf; 1054 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 1055 EndStatementAtEOFStack.push_back(true); 1056 return false; 1057 } 1058 1059 void MasmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer, 1060 bool EndStatementAtEOF) { 1061 CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc); 1062 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), 1063 Loc.getPointer(), EndStatementAtEOF); 1064 } 1065 1066 bool MasmParser::expandMacros() { 1067 const AsmToken &Tok = getTok(); 1068 const std::string IDLower = Tok.getIdentifier().lower(); 1069 1070 const llvm::MCAsmMacro *M = getContext().lookupMacro(IDLower); 1071 if (M && M->IsFunction && peekTok().is(AsmToken::LParen)) { 1072 // This is a macro function invocation; expand it in place. 1073 const SMLoc MacroLoc = Tok.getLoc(); 1074 const StringRef MacroId = Tok.getIdentifier(); 1075 Lexer.Lex(); 1076 if (handleMacroInvocation(M, MacroLoc)) { 1077 Lexer.UnLex(AsmToken(AsmToken::Error, MacroId)); 1078 Lexer.Lex(); 1079 } 1080 return false; 1081 } 1082 1083 std::optional<std::string> ExpandedValue; 1084 1085 if (auto BuiltinIt = BuiltinSymbolMap.find(IDLower); 1086 BuiltinIt != BuiltinSymbolMap.end()) { 1087 ExpandedValue = 1088 evaluateBuiltinTextMacro(BuiltinIt->getValue(), Tok.getLoc()); 1089 } else if (auto BuiltinFuncIt = BuiltinFunctionMap.find(IDLower); 1090 BuiltinFuncIt != BuiltinFunctionMap.end()) { 1091 StringRef Name; 1092 if (parseIdentifier(Name)) { 1093 return true; 1094 } 1095 std::string Res; 1096 if (evaluateBuiltinMacroFunction(BuiltinFuncIt->getValue(), Name, Res)) { 1097 return true; 1098 } 1099 ExpandedValue = Res; 1100 } else if (auto VarIt = Variables.find(IDLower); 1101 VarIt != Variables.end() && VarIt->getValue().IsText) { 1102 ExpandedValue = VarIt->getValue().TextValue; 1103 } 1104 1105 if (!ExpandedValue) 1106 return true; 1107 std::unique_ptr<MemoryBuffer> Instantiation = 1108 MemoryBuffer::getMemBufferCopy(*ExpandedValue, "<instantiation>"); 1109 1110 // Jump to the macro instantiation and prime the lexer. 1111 CurBuffer = 1112 SrcMgr.AddNewSourceBuffer(std::move(Instantiation), Tok.getEndLoc()); 1113 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, 1114 /*EndStatementAtEOF=*/false); 1115 EndStatementAtEOFStack.push_back(false); 1116 Lexer.Lex(); 1117 return false; 1118 } 1119 1120 const AsmToken &MasmParser::Lex(ExpandKind ExpandNextToken) { 1121 if (Lexer.getTok().is(AsmToken::Error)) 1122 Error(Lexer.getErrLoc(), Lexer.getErr()); 1123 bool StartOfStatement = false; 1124 1125 // if it's a end of statement with a comment in it 1126 if (getTok().is(AsmToken::EndOfStatement)) { 1127 // if this is a line comment output it. 1128 if (!getTok().getString().empty() && getTok().getString().front() != '\n' && 1129 getTok().getString().front() != '\r' && MAI.preserveAsmComments()) 1130 Out.addExplicitComment(Twine(getTok().getString())); 1131 StartOfStatement = true; 1132 } 1133 1134 const AsmToken *tok = &Lexer.Lex(); 1135 1136 while (ExpandNextToken == ExpandMacros && tok->is(AsmToken::Identifier)) { 1137 if (StartOfStatement) { 1138 AsmToken NextTok; 1139 MutableArrayRef<AsmToken> Buf(NextTok); 1140 size_t ReadCount = Lexer.peekTokens(Buf); 1141 if (ReadCount && NextTok.is(AsmToken::Identifier) && 1142 (NextTok.getString().equals_insensitive("equ") || 1143 NextTok.getString().equals_insensitive("textequ"))) { 1144 // This looks like an EQU or TEXTEQU directive; don't expand the 1145 // identifier, allowing for redefinitions. 1146 break; 1147 } 1148 } 1149 if (expandMacros()) 1150 break; 1151 } 1152 1153 // Parse comments here to be deferred until end of next statement. 1154 while (tok->is(AsmToken::Comment)) { 1155 if (MAI.preserveAsmComments()) 1156 Out.addExplicitComment(Twine(tok->getString())); 1157 tok = &Lexer.Lex(); 1158 } 1159 1160 // Recognize and bypass line continuations. 1161 while (tok->is(AsmToken::BackSlash) && 1162 peekTok().is(AsmToken::EndOfStatement)) { 1163 // Eat both the backslash and the end of statement. 1164 Lexer.Lex(); 1165 tok = &Lexer.Lex(); 1166 } 1167 1168 if (tok->is(AsmToken::Eof)) { 1169 // If this is the end of an included file, pop the parent file off the 1170 // include stack. 1171 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1172 if (ParentIncludeLoc != SMLoc()) { 1173 EndStatementAtEOFStack.pop_back(); 1174 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1175 return Lex(); 1176 } 1177 EndStatementAtEOFStack.pop_back(); 1178 assert(EndStatementAtEOFStack.empty()); 1179 } 1180 1181 return *tok; 1182 } 1183 1184 const AsmToken MasmParser::peekTok(bool ShouldSkipSpace) { 1185 AsmToken Tok; 1186 1187 MutableArrayRef<AsmToken> Buf(Tok); 1188 size_t ReadCount = Lexer.peekTokens(Buf, ShouldSkipSpace); 1189 1190 if (ReadCount == 0) { 1191 // If this is the end of an included file, pop the parent file off the 1192 // include stack. 1193 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1194 if (ParentIncludeLoc != SMLoc()) { 1195 EndStatementAtEOFStack.pop_back(); 1196 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1197 return peekTok(ShouldSkipSpace); 1198 } 1199 EndStatementAtEOFStack.pop_back(); 1200 assert(EndStatementAtEOFStack.empty()); 1201 } 1202 1203 assert(ReadCount == 1); 1204 return Tok; 1205 } 1206 1207 bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) { 1208 // Create the initial section, if requested. 1209 if (!NoInitialTextSection) 1210 Out.initSections(false, getTargetParser().getSTI()); 1211 1212 // Prime the lexer. 1213 Lex(); 1214 1215 HadError = false; 1216 AsmCond StartingCondState = TheCondState; 1217 SmallVector<AsmRewrite, 4> AsmStrRewrites; 1218 1219 // While we have input, parse each statement. 1220 while (Lexer.isNot(AsmToken::Eof) || 1221 SrcMgr.getParentIncludeLoc(CurBuffer) != SMLoc()) { 1222 // Skip through the EOF at the end of an inclusion. 1223 if (Lexer.is(AsmToken::Eof)) 1224 Lex(); 1225 1226 ParseStatementInfo Info(&AsmStrRewrites); 1227 bool HasError = parseStatement(Info, nullptr); 1228 1229 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error 1230 // for printing ErrMsg via Lex() only if no (presumably better) parser error 1231 // exists. 1232 if (HasError && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) 1233 Lex(); 1234 1235 // parseStatement returned true so may need to emit an error. 1236 printPendingErrors(); 1237 1238 // Skipping to the next line if needed. 1239 if (HasError && !getLexer().justConsumedEOL()) 1240 eatToEndOfStatement(); 1241 } 1242 1243 printPendingErrors(); 1244 1245 // All errors should have been emitted. 1246 assert(!hasPendingError() && "unexpected error from parseStatement"); 1247 1248 if (TheCondState.TheCond != StartingCondState.TheCond || 1249 TheCondState.Ignore != StartingCondState.Ignore) 1250 printError(getTok().getLoc(), "unmatched .ifs or .elses"); 1251 1252 // Check to see that all assembler local symbols were actually defined. 1253 // Targets that don't do subsections via symbols may not want this, though, 1254 // so conservatively exclude them. Only do this if we're finalizing, though, 1255 // as otherwise we won't necessarily have seen everything yet. 1256 if (!NoFinalize) { 1257 // Temporary symbols like the ones for directional jumps don't go in the 1258 // symbol table. They also need to be diagnosed in all (final) cases. 1259 for (std::tuple<SMLoc, CppHashInfoTy, MCSymbol *> &LocSym : DirLabels) { 1260 if (std::get<2>(LocSym)->isUndefined()) { 1261 // Reset the state of any "# line file" directives we've seen to the 1262 // context as it was at the diagnostic site. 1263 CppHashInfo = std::get<1>(LocSym); 1264 printError(std::get<0>(LocSym), "directional label undefined"); 1265 } 1266 } 1267 } 1268 1269 // Finalize the output stream if there are no errors and if the client wants 1270 // us to. 1271 if (!HadError && !NoFinalize) 1272 Out.finish(Lexer.getLoc()); 1273 1274 return HadError || getContext().hadError(); 1275 } 1276 1277 bool MasmParser::checkForValidSection() { 1278 if (!ParsingMSInlineAsm && !(getStreamer().getCurrentFragment() && 1279 getStreamer().getCurrentSectionOnly())) { 1280 Out.initSections(false, getTargetParser().getSTI()); 1281 return Error(getTok().getLoc(), 1282 "expected section directive before assembly directive"); 1283 } 1284 return false; 1285 } 1286 1287 /// Throw away the rest of the line for testing purposes. 1288 void MasmParser::eatToEndOfStatement() { 1289 while (Lexer.isNot(AsmToken::EndOfStatement)) { 1290 if (Lexer.is(AsmToken::Eof)) { 1291 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1292 if (ParentIncludeLoc == SMLoc()) { 1293 break; 1294 } 1295 1296 EndStatementAtEOFStack.pop_back(); 1297 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1298 } 1299 1300 Lexer.Lex(); 1301 } 1302 1303 // Eat EOL. 1304 if (Lexer.is(AsmToken::EndOfStatement)) 1305 Lexer.Lex(); 1306 } 1307 1308 SmallVector<StringRef, 1> 1309 MasmParser::parseStringRefsTo(AsmToken::TokenKind EndTok) { 1310 SmallVector<StringRef, 1> Refs; 1311 const char *Start = getTok().getLoc().getPointer(); 1312 while (Lexer.isNot(EndTok)) { 1313 if (Lexer.is(AsmToken::Eof)) { 1314 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 1315 if (ParentIncludeLoc == SMLoc()) { 1316 break; 1317 } 1318 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start); 1319 1320 EndStatementAtEOFStack.pop_back(); 1321 jumpToLoc(ParentIncludeLoc, 0, EndStatementAtEOFStack.back()); 1322 Lexer.Lex(); 1323 Start = getTok().getLoc().getPointer(); 1324 } else { 1325 Lexer.Lex(); 1326 } 1327 } 1328 Refs.emplace_back(Start, getTok().getLoc().getPointer() - Start); 1329 return Refs; 1330 } 1331 1332 std::string MasmParser::parseStringTo(AsmToken::TokenKind EndTok) { 1333 SmallVector<StringRef, 1> Refs = parseStringRefsTo(EndTok); 1334 std::string Str; 1335 for (StringRef S : Refs) { 1336 Str.append(S.str()); 1337 } 1338 return Str; 1339 } 1340 1341 StringRef MasmParser::parseStringToEndOfStatement() { 1342 const char *Start = getTok().getLoc().getPointer(); 1343 1344 while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) 1345 Lexer.Lex(); 1346 1347 const char *End = getTok().getLoc().getPointer(); 1348 return StringRef(Start, End - Start); 1349 } 1350 1351 /// Parse a paren expression and return it. 1352 /// NOTE: This assumes the leading '(' has already been consumed. 1353 /// 1354 /// parenexpr ::= expr) 1355 /// 1356 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { 1357 if (parseExpression(Res)) 1358 return true; 1359 EndLoc = Lexer.getTok().getEndLoc(); 1360 return parseRParen(); 1361 } 1362 1363 /// Parse a bracket expression and return it. 1364 /// NOTE: This assumes the leading '[' has already been consumed. 1365 /// 1366 /// bracketexpr ::= expr] 1367 /// 1368 bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { 1369 if (parseExpression(Res)) 1370 return true; 1371 EndLoc = getTok().getEndLoc(); 1372 if (parseToken(AsmToken::RBrac, "expected ']' in brackets expression")) 1373 return true; 1374 return false; 1375 } 1376 1377 /// Parse a primary expression and return it. 1378 /// primaryexpr ::= (parenexpr 1379 /// primaryexpr ::= symbol 1380 /// primaryexpr ::= number 1381 /// primaryexpr ::= '.' 1382 /// primaryexpr ::= ~,+,-,'not' primaryexpr 1383 /// primaryexpr ::= string 1384 /// (a string is interpreted as a 64-bit number in big-endian base-256) 1385 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, 1386 AsmTypeInfo *TypeInfo) { 1387 SMLoc FirstTokenLoc = getLexer().getLoc(); 1388 AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); 1389 switch (FirstTokenKind) { 1390 default: 1391 return TokError("unknown token in expression"); 1392 // If we have an error assume that we've already handled it. 1393 case AsmToken::Error: 1394 return true; 1395 case AsmToken::Exclaim: 1396 Lex(); // Eat the operator. 1397 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1398 return true; 1399 Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc); 1400 return false; 1401 case AsmToken::Dollar: 1402 case AsmToken::At: 1403 case AsmToken::Identifier: { 1404 StringRef Identifier; 1405 if (parseIdentifier(Identifier)) { 1406 // We may have failed but $ may be a valid token. 1407 if (getTok().is(AsmToken::Dollar)) { 1408 if (Lexer.getMAI().getDollarIsPC()) { 1409 Lex(); 1410 // This is a '$' reference, which references the current PC. Emit a 1411 // temporary label to the streamer and refer to it. 1412 MCSymbol *Sym = Ctx.createTempSymbol(); 1413 Out.emitLabel(Sym); 1414 Res = MCSymbolRefExpr::create(Sym, getContext()); 1415 EndLoc = FirstTokenLoc; 1416 return false; 1417 } 1418 return Error(FirstTokenLoc, "invalid token in expression"); 1419 } 1420 } 1421 // Parse named bitwise negation. 1422 if (Identifier.equals_insensitive("not")) { 1423 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1424 return true; 1425 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); 1426 return false; 1427 } 1428 // Parse directional local label references. 1429 if (Identifier.equals_insensitive("@b") || 1430 Identifier.equals_insensitive("@f")) { 1431 bool Before = Identifier.equals_insensitive("@b"); 1432 MCSymbol *Sym = getContext().getDirectionalLocalSymbol(0, Before); 1433 if (Before && Sym->isUndefined()) 1434 return Error(FirstTokenLoc, "Expected @@ label before @B reference"); 1435 Res = MCSymbolRefExpr::create(Sym, getContext()); 1436 return false; 1437 } 1438 1439 EndLoc = SMLoc::getFromPointer(Identifier.end()); 1440 1441 // This is a symbol reference. 1442 StringRef SymbolName = Identifier; 1443 if (SymbolName.empty()) 1444 return Error(getLexer().getLoc(), "expected a symbol reference"); 1445 1446 // Find the field offset if used. 1447 AsmFieldInfo Info; 1448 auto Split = SymbolName.split('.'); 1449 if (Split.second.empty()) { 1450 } else { 1451 SymbolName = Split.first; 1452 if (lookUpField(SymbolName, Split.second, Info)) { 1453 std::pair<StringRef, StringRef> BaseMember = Split.second.split('.'); 1454 StringRef Base = BaseMember.first, Member = BaseMember.second; 1455 lookUpField(Base, Member, Info); 1456 } else if (Structs.count(SymbolName.lower())) { 1457 // This is actually a reference to a field offset. 1458 Res = MCConstantExpr::create(Info.Offset, getContext()); 1459 return false; 1460 } 1461 } 1462 1463 MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); 1464 if (!Sym) { 1465 // If this is a built-in numeric value, treat it as a constant. 1466 auto BuiltinIt = BuiltinSymbolMap.find(SymbolName.lower()); 1467 const BuiltinSymbol Symbol = (BuiltinIt == BuiltinSymbolMap.end()) 1468 ? BI_NO_SYMBOL 1469 : BuiltinIt->getValue(); 1470 if (Symbol != BI_NO_SYMBOL) { 1471 const MCExpr *Value = evaluateBuiltinValue(Symbol, FirstTokenLoc); 1472 if (Value) { 1473 Res = Value; 1474 return false; 1475 } 1476 } 1477 1478 // Variables use case-insensitive symbol names; if this is a variable, we 1479 // find the symbol using its canonical name. 1480 auto VarIt = Variables.find(SymbolName.lower()); 1481 if (VarIt != Variables.end()) 1482 SymbolName = VarIt->second.Name; 1483 Sym = getContext().parseSymbol(SymbolName); 1484 } 1485 1486 // If this is an absolute variable reference, substitute it now to preserve 1487 // semantics in the face of reassignment. 1488 if (Sym->isVariable()) { 1489 auto V = Sym->getVariableValue(); 1490 bool DoInline = isa<MCConstantExpr>(V); 1491 if (auto TV = dyn_cast<MCTargetExpr>(V)) 1492 DoInline = TV->inlineAssignedExpr(); 1493 if (DoInline) { 1494 Res = Sym->getVariableValue(); 1495 return false; 1496 } 1497 } 1498 1499 // Otherwise create a symbol ref. 1500 const MCExpr *SymRef = 1501 MCSymbolRefExpr::create(Sym, getContext(), FirstTokenLoc); 1502 if (Info.Offset) { 1503 Res = MCBinaryExpr::create( 1504 MCBinaryExpr::Add, SymRef, 1505 MCConstantExpr::create(Info.Offset, getContext()), getContext()); 1506 } else { 1507 Res = SymRef; 1508 } 1509 if (TypeInfo) { 1510 if (Info.Type.Name.empty()) { 1511 auto TypeIt = KnownType.find(Identifier.lower()); 1512 if (TypeIt != KnownType.end()) { 1513 Info.Type = TypeIt->second; 1514 } 1515 } 1516 1517 *TypeInfo = Info.Type; 1518 } 1519 return false; 1520 } 1521 case AsmToken::BigNum: 1522 return TokError("literal value out of range for directive"); 1523 case AsmToken::Integer: { 1524 int64_t IntVal = getTok().getIntVal(); 1525 Res = MCConstantExpr::create(IntVal, getContext()); 1526 EndLoc = Lexer.getTok().getEndLoc(); 1527 Lex(); // Eat token. 1528 return false; 1529 } 1530 case AsmToken::String: { 1531 // MASM strings (used as constants) are interpreted as big-endian base-256. 1532 SMLoc ValueLoc = getTok().getLoc(); 1533 std::string Value; 1534 if (parseEscapedString(Value)) 1535 return true; 1536 if (Value.size() > 8) 1537 return Error(ValueLoc, "literal value out of range"); 1538 uint64_t IntValue = 0; 1539 for (const unsigned char CharVal : Value) 1540 IntValue = (IntValue << 8) | CharVal; 1541 Res = MCConstantExpr::create(IntValue, getContext()); 1542 return false; 1543 } 1544 case AsmToken::Real: { 1545 APFloat RealVal(APFloat::IEEEdouble(), getTok().getString()); 1546 uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); 1547 Res = MCConstantExpr::create(IntVal, getContext()); 1548 EndLoc = Lexer.getTok().getEndLoc(); 1549 Lex(); // Eat token. 1550 return false; 1551 } 1552 case AsmToken::Dot: { 1553 // This is a '.' reference, which references the current PC. Emit a 1554 // temporary label to the streamer and refer to it. 1555 MCSymbol *Sym = Ctx.createTempSymbol(); 1556 Out.emitLabel(Sym); 1557 Res = MCSymbolRefExpr::create(Sym, getContext()); 1558 EndLoc = Lexer.getTok().getEndLoc(); 1559 Lex(); // Eat identifier. 1560 return false; 1561 } 1562 case AsmToken::LParen: 1563 Lex(); // Eat the '('. 1564 return parseParenExpr(Res, EndLoc); 1565 case AsmToken::LBrac: 1566 if (!PlatformParser->HasBracketExpressions()) 1567 return TokError("brackets expression not supported on this target"); 1568 Lex(); // Eat the '['. 1569 return parseBracketExpr(Res, EndLoc); 1570 case AsmToken::Minus: 1571 Lex(); // Eat the operator. 1572 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1573 return true; 1574 Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc); 1575 return false; 1576 case AsmToken::Plus: 1577 Lex(); // Eat the operator. 1578 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1579 return true; 1580 Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc); 1581 return false; 1582 case AsmToken::Tilde: 1583 Lex(); // Eat the operator. 1584 if (parsePrimaryExpr(Res, EndLoc, nullptr)) 1585 return true; 1586 Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); 1587 return false; 1588 } 1589 } 1590 1591 bool MasmParser::parseExpression(const MCExpr *&Res) { 1592 SMLoc EndLoc; 1593 return parseExpression(Res, EndLoc); 1594 } 1595 1596 /// This function checks if the next token is <string> type or arithmetic. 1597 /// string that begin with character '<' must end with character '>'. 1598 /// otherwise it is arithmetics. 1599 /// If the function returns a 'true' value, 1600 /// the End argument will be filled with the last location pointed to the '>' 1601 /// character. 1602 static bool isAngleBracketString(SMLoc &StrLoc, SMLoc &EndLoc) { 1603 assert((StrLoc.getPointer() != nullptr) && 1604 "Argument to the function cannot be a NULL value"); 1605 const char *CharPtr = StrLoc.getPointer(); 1606 while ((*CharPtr != '>') && (*CharPtr != '\n') && (*CharPtr != '\r') && 1607 (*CharPtr != '\0')) { 1608 if (*CharPtr == '!') 1609 CharPtr++; 1610 CharPtr++; 1611 } 1612 if (*CharPtr == '>') { 1613 EndLoc = StrLoc.getFromPointer(CharPtr + 1); 1614 return true; 1615 } 1616 return false; 1617 } 1618 1619 /// creating a string without the escape characters '!'. 1620 static std::string angleBracketString(StringRef BracketContents) { 1621 std::string Res; 1622 for (size_t Pos = 0; Pos < BracketContents.size(); Pos++) { 1623 if (BracketContents[Pos] == '!') 1624 Pos++; 1625 Res += BracketContents[Pos]; 1626 } 1627 return Res; 1628 } 1629 1630 /// Parse an expression and return it. 1631 /// 1632 /// expr ::= expr &&,|| expr -> lowest. 1633 /// expr ::= expr |,^,&,! expr 1634 /// expr ::= expr ==,!=,<>,<,<=,>,>= expr 1635 /// expr ::= expr <<,>> expr 1636 /// expr ::= expr +,- expr 1637 /// expr ::= expr *,/,% expr -> highest. 1638 /// expr ::= primaryexpr 1639 /// 1640 bool MasmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { 1641 // Parse the expression. 1642 Res = nullptr; 1643 if (getTargetParser().parsePrimaryExpr(Res, EndLoc) || 1644 parseBinOpRHS(1, Res, EndLoc)) 1645 return true; 1646 1647 // Try to constant fold it up front, if possible. Do not exploit 1648 // assembler here. 1649 int64_t Value; 1650 if (Res->evaluateAsAbsolute(Value)) 1651 Res = MCConstantExpr::create(Value, getContext()); 1652 1653 return false; 1654 } 1655 1656 bool MasmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { 1657 Res = nullptr; 1658 return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc); 1659 } 1660 1661 bool MasmParser::parseAbsoluteExpression(int64_t &Res) { 1662 const MCExpr *Expr; 1663 1664 SMLoc StartLoc = Lexer.getLoc(); 1665 if (parseExpression(Expr)) 1666 return true; 1667 1668 if (!Expr->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr())) 1669 return Error(StartLoc, "expected absolute expression"); 1670 1671 return false; 1672 } 1673 1674 static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K, 1675 MCBinaryExpr::Opcode &Kind, 1676 bool ShouldUseLogicalShr, 1677 bool EndExpressionAtGreater) { 1678 switch (K) { 1679 default: 1680 return 0; // not a binop. 1681 1682 // Lowest Precedence: &&, || 1683 case AsmToken::AmpAmp: 1684 Kind = MCBinaryExpr::LAnd; 1685 return 2; 1686 case AsmToken::PipePipe: 1687 Kind = MCBinaryExpr::LOr; 1688 return 1; 1689 1690 // Low Precedence: ==, !=, <>, <, <=, >, >= 1691 case AsmToken::EqualEqual: 1692 Kind = MCBinaryExpr::EQ; 1693 return 3; 1694 case AsmToken::ExclaimEqual: 1695 case AsmToken::LessGreater: 1696 Kind = MCBinaryExpr::NE; 1697 return 3; 1698 case AsmToken::Less: 1699 Kind = MCBinaryExpr::LT; 1700 return 3; 1701 case AsmToken::LessEqual: 1702 Kind = MCBinaryExpr::LTE; 1703 return 3; 1704 case AsmToken::Greater: 1705 if (EndExpressionAtGreater) 1706 return 0; 1707 Kind = MCBinaryExpr::GT; 1708 return 3; 1709 case AsmToken::GreaterEqual: 1710 Kind = MCBinaryExpr::GTE; 1711 return 3; 1712 1713 // Low Intermediate Precedence: +, - 1714 case AsmToken::Plus: 1715 Kind = MCBinaryExpr::Add; 1716 return 4; 1717 case AsmToken::Minus: 1718 Kind = MCBinaryExpr::Sub; 1719 return 4; 1720 1721 // High Intermediate Precedence: |, &, ^ 1722 case AsmToken::Pipe: 1723 Kind = MCBinaryExpr::Or; 1724 return 5; 1725 case AsmToken::Caret: 1726 Kind = MCBinaryExpr::Xor; 1727 return 5; 1728 case AsmToken::Amp: 1729 Kind = MCBinaryExpr::And; 1730 return 5; 1731 1732 // Highest Precedence: *, /, %, <<, >> 1733 case AsmToken::Star: 1734 Kind = MCBinaryExpr::Mul; 1735 return 6; 1736 case AsmToken::Slash: 1737 Kind = MCBinaryExpr::Div; 1738 return 6; 1739 case AsmToken::Percent: 1740 Kind = MCBinaryExpr::Mod; 1741 return 6; 1742 case AsmToken::LessLess: 1743 Kind = MCBinaryExpr::Shl; 1744 return 6; 1745 case AsmToken::GreaterGreater: 1746 if (EndExpressionAtGreater) 1747 return 0; 1748 Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr; 1749 return 6; 1750 } 1751 } 1752 1753 unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K, 1754 MCBinaryExpr::Opcode &Kind) { 1755 bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr(); 1756 return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr, 1757 AngleBracketDepth > 0); 1758 } 1759 1760 /// Parse all binary operators with precedence >= 'Precedence'. 1761 /// Res contains the LHS of the expression on input. 1762 bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, 1763 SMLoc &EndLoc) { 1764 SMLoc StartLoc = Lexer.getLoc(); 1765 while (true) { 1766 AsmToken::TokenKind TokKind = Lexer.getKind(); 1767 if (Lexer.getKind() == AsmToken::Identifier) { 1768 TokKind = StringSwitch<AsmToken::TokenKind>(Lexer.getTok().getString()) 1769 .CaseLower("and", AsmToken::Amp) 1770 .CaseLower("not", AsmToken::Exclaim) 1771 .CaseLower("or", AsmToken::Pipe) 1772 .CaseLower("xor", AsmToken::Caret) 1773 .CaseLower("shl", AsmToken::LessLess) 1774 .CaseLower("shr", AsmToken::GreaterGreater) 1775 .CaseLower("eq", AsmToken::EqualEqual) 1776 .CaseLower("ne", AsmToken::ExclaimEqual) 1777 .CaseLower("lt", AsmToken::Less) 1778 .CaseLower("le", AsmToken::LessEqual) 1779 .CaseLower("gt", AsmToken::Greater) 1780 .CaseLower("ge", AsmToken::GreaterEqual) 1781 .Default(TokKind); 1782 } 1783 MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; 1784 unsigned TokPrec = getBinOpPrecedence(TokKind, Kind); 1785 1786 // If the next token is lower precedence than we are allowed to eat, return 1787 // successfully with what we ate already. 1788 if (TokPrec < Precedence) 1789 return false; 1790 1791 Lex(); 1792 1793 // Eat the next primary expression. 1794 const MCExpr *RHS; 1795 if (getTargetParser().parsePrimaryExpr(RHS, EndLoc)) 1796 return true; 1797 1798 // If BinOp binds less tightly with RHS than the operator after RHS, let 1799 // the pending operator take RHS as its LHS. 1800 MCBinaryExpr::Opcode Dummy; 1801 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); 1802 if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc)) 1803 return true; 1804 1805 // Merge LHS and RHS according to operator. 1806 Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc); 1807 } 1808 } 1809 1810 /// ParseStatement: 1811 /// ::= % statement 1812 /// ::= EndOfStatement 1813 /// ::= Label* Directive ...Operands... EndOfStatement 1814 /// ::= Label* Identifier OperandList* EndOfStatement 1815 bool MasmParser::parseStatement(ParseStatementInfo &Info, 1816 MCAsmParserSemaCallback *SI) { 1817 assert(!hasPendingError() && "parseStatement started with pending error"); 1818 // Eat initial spaces and comments. 1819 while (Lexer.is(AsmToken::Space)) 1820 Lex(); 1821 if (Lexer.is(AsmToken::EndOfStatement)) { 1822 // If this is a line comment we can drop it safely. 1823 if (getTok().getString().empty() || getTok().getString().front() == '\r' || 1824 getTok().getString().front() == '\n') 1825 Out.addBlankLine(); 1826 Lex(); 1827 return false; 1828 } 1829 1830 // If preceded by an expansion operator, first expand all text macros and 1831 // macro functions. 1832 if (getTok().is(AsmToken::Percent)) { 1833 SMLoc ExpansionLoc = getTok().getLoc(); 1834 if (parseToken(AsmToken::Percent) || expandStatement(ExpansionLoc)) 1835 return true; 1836 } 1837 1838 // Statements always start with an identifier, unless we're dealing with a 1839 // processor directive (.386, .686, etc.) that lexes as a real. 1840 AsmToken ID = getTok(); 1841 SMLoc IDLoc = ID.getLoc(); 1842 StringRef IDVal; 1843 if (Lexer.is(AsmToken::HashDirective)) 1844 return parseCppHashLineFilenameComment(IDLoc); 1845 if (Lexer.is(AsmToken::Dot)) { 1846 // Treat '.' as a valid identifier in this context. 1847 Lex(); 1848 IDVal = "."; 1849 } else if (Lexer.is(AsmToken::Real)) { 1850 // Treat ".<number>" as a valid identifier in this context. 1851 IDVal = getTok().getString(); 1852 Lex(); // always eat a token 1853 if (!IDVal.starts_with(".")) 1854 return Error(IDLoc, "unexpected token at start of statement"); 1855 } else if (parseIdentifier(IDVal, StartOfStatement)) { 1856 if (!TheCondState.Ignore) { 1857 Lex(); // always eat a token 1858 return Error(IDLoc, "unexpected token at start of statement"); 1859 } 1860 IDVal = ""; 1861 } 1862 1863 // Handle conditional assembly here before checking for skipping. We 1864 // have to do this so that .endif isn't skipped in a ".if 0" block for 1865 // example. 1866 StringMap<DirectiveKind>::const_iterator DirKindIt = 1867 DirectiveKindMap.find(IDVal.lower()); 1868 DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end()) 1869 ? DK_NO_DIRECTIVE 1870 : DirKindIt->getValue(); 1871 switch (DirKind) { 1872 default: 1873 break; 1874 case DK_IF: 1875 case DK_IFE: 1876 return parseDirectiveIf(IDLoc, DirKind); 1877 case DK_IFB: 1878 return parseDirectiveIfb(IDLoc, true); 1879 case DK_IFNB: 1880 return parseDirectiveIfb(IDLoc, false); 1881 case DK_IFDEF: 1882 return parseDirectiveIfdef(IDLoc, true); 1883 case DK_IFNDEF: 1884 return parseDirectiveIfdef(IDLoc, false); 1885 case DK_IFDIF: 1886 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false, 1887 /*CaseInsensitive=*/false); 1888 case DK_IFDIFI: 1889 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/false, 1890 /*CaseInsensitive=*/true); 1891 case DK_IFIDN: 1892 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true, 1893 /*CaseInsensitive=*/false); 1894 case DK_IFIDNI: 1895 return parseDirectiveIfidn(IDLoc, /*ExpectEqual=*/true, 1896 /*CaseInsensitive=*/true); 1897 case DK_ELSEIF: 1898 case DK_ELSEIFE: 1899 return parseDirectiveElseIf(IDLoc, DirKind); 1900 case DK_ELSEIFB: 1901 return parseDirectiveElseIfb(IDLoc, true); 1902 case DK_ELSEIFNB: 1903 return parseDirectiveElseIfb(IDLoc, false); 1904 case DK_ELSEIFDEF: 1905 return parseDirectiveElseIfdef(IDLoc, true); 1906 case DK_ELSEIFNDEF: 1907 return parseDirectiveElseIfdef(IDLoc, false); 1908 case DK_ELSEIFDIF: 1909 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false, 1910 /*CaseInsensitive=*/false); 1911 case DK_ELSEIFDIFI: 1912 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/false, 1913 /*CaseInsensitive=*/true); 1914 case DK_ELSEIFIDN: 1915 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true, 1916 /*CaseInsensitive=*/false); 1917 case DK_ELSEIFIDNI: 1918 return parseDirectiveElseIfidn(IDLoc, /*ExpectEqual=*/true, 1919 /*CaseInsensitive=*/true); 1920 case DK_ELSE: 1921 return parseDirectiveElse(IDLoc); 1922 case DK_ENDIF: 1923 return parseDirectiveEndIf(IDLoc); 1924 } 1925 1926 // Ignore the statement if in the middle of inactive conditional 1927 // (e.g. ".if 0"). 1928 if (TheCondState.Ignore) { 1929 eatToEndOfStatement(); 1930 return false; 1931 } 1932 1933 // FIXME: Recurse on local labels? 1934 1935 // Check for a label. 1936 // ::= identifier ':' 1937 // ::= number ':' 1938 if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) { 1939 if (checkForValidSection()) 1940 return true; 1941 1942 // identifier ':' -> Label. 1943 Lex(); 1944 1945 // Diagnose attempt to use '.' as a label. 1946 if (IDVal == ".") 1947 return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label"); 1948 1949 // Diagnose attempt to use a variable as a label. 1950 // 1951 // FIXME: Diagnostics. Note the location of the definition as a label. 1952 // FIXME: This doesn't diagnose assignment to a symbol which has been 1953 // implicitly marked as external. 1954 MCSymbol *Sym; 1955 if (ParsingMSInlineAsm && SI) { 1956 StringRef RewrittenLabel = 1957 SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true); 1958 assert(!RewrittenLabel.empty() && 1959 "We should have an internal name here."); 1960 Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(), 1961 RewrittenLabel); 1962 IDVal = RewrittenLabel; 1963 } 1964 // Handle directional local labels 1965 if (IDVal == "@@") { 1966 Sym = Ctx.createDirectionalLocalSymbol(0); 1967 } else { 1968 Sym = getContext().parseSymbol(IDVal); 1969 } 1970 1971 // End of Labels should be treated as end of line for lexing 1972 // purposes but that information is not available to the Lexer who 1973 // does not understand Labels. This may cause us to see a Hash 1974 // here instead of a preprocessor line comment. 1975 if (getTok().is(AsmToken::Hash)) { 1976 std::string CommentStr = parseStringTo(AsmToken::EndOfStatement); 1977 Lexer.Lex(); 1978 Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr)); 1979 } 1980 1981 // Consume any end of statement token, if present, to avoid spurious 1982 // addBlankLine calls(). 1983 if (getTok().is(AsmToken::EndOfStatement)) { 1984 Lex(); 1985 } 1986 1987 // Emit the label. 1988 if (!getTargetParser().isParsingMSInlineAsm()) 1989 Out.emitLabel(Sym, IDLoc); 1990 return false; 1991 } 1992 1993 // If macros are enabled, check to see if this is a macro instantiation. 1994 if (const MCAsmMacro *M = getContext().lookupMacro(IDVal.lower())) { 1995 AsmToken::TokenKind ArgumentEndTok = parseOptionalToken(AsmToken::LParen) 1996 ? AsmToken::RParen 1997 : AsmToken::EndOfStatement; 1998 return handleMacroEntry(M, IDLoc, ArgumentEndTok); 1999 } 2000 2001 // Otherwise, we have a normal instruction or directive. 2002 2003 if (DirKind != DK_NO_DIRECTIVE) { 2004 // There are several entities interested in parsing directives: 2005 // 2006 // 1. Asm parser extensions. For example, platform-specific parsers 2007 // (like the ELF parser) register themselves as extensions. 2008 // 2. The target-specific assembly parser. Some directives are target 2009 // specific or may potentially behave differently on certain targets. 2010 // 3. The generic directive parser implemented by this class. These are 2011 // all the directives that behave in a target and platform independent 2012 // manner, or at least have a default behavior that's shared between 2013 // all targets and platforms. 2014 2015 // Special-case handling of structure-end directives at higher priority, 2016 // since ENDS is overloaded as a segment-end directive. 2017 if (IDVal.equals_insensitive("ends") && StructInProgress.size() > 1 && 2018 getTok().is(AsmToken::EndOfStatement)) { 2019 return parseDirectiveNestedEnds(); 2020 } 2021 2022 // First, check the extension directive map to see if any extension has 2023 // registered itself to parse this directive. 2024 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler = 2025 ExtensionDirectiveMap.lookup(IDVal.lower()); 2026 if (Handler.first) 2027 return (*Handler.second)(Handler.first, IDVal, IDLoc); 2028 2029 // Next, let the target-specific assembly parser try. 2030 if (ID.isNot(AsmToken::Identifier)) 2031 return false; 2032 2033 ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID); 2034 assert(TPDirectiveReturn.isFailure() == hasPendingError() && 2035 "Should only return Failure iff there was an error"); 2036 if (TPDirectiveReturn.isFailure()) 2037 return true; 2038 if (TPDirectiveReturn.isSuccess()) 2039 return false; 2040 2041 // Finally, if no one else is interested in this directive, it must be 2042 // generic and familiar to this class. 2043 switch (DirKind) { 2044 default: 2045 break; 2046 case DK_ASCII: 2047 return parseDirectiveAscii(IDVal, false); 2048 case DK_ASCIZ: 2049 case DK_STRING: 2050 return parseDirectiveAscii(IDVal, true); 2051 case DK_BYTE: 2052 case DK_SBYTE: 2053 case DK_DB: 2054 return parseDirectiveValue(IDVal, 1); 2055 case DK_WORD: 2056 case DK_SWORD: 2057 case DK_DW: 2058 return parseDirectiveValue(IDVal, 2); 2059 case DK_DWORD: 2060 case DK_SDWORD: 2061 case DK_DD: 2062 return parseDirectiveValue(IDVal, 4); 2063 case DK_FWORD: 2064 case DK_DF: 2065 return parseDirectiveValue(IDVal, 6); 2066 case DK_QWORD: 2067 case DK_SQWORD: 2068 case DK_DQ: 2069 return parseDirectiveValue(IDVal, 8); 2070 case DK_REAL4: 2071 return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4); 2072 case DK_REAL8: 2073 return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8); 2074 case DK_REAL10: 2075 return parseDirectiveRealValue(IDVal, APFloat::x87DoubleExtended(), 10); 2076 case DK_STRUCT: 2077 case DK_UNION: 2078 return parseDirectiveNestedStruct(IDVal, DirKind); 2079 case DK_ENDS: 2080 return parseDirectiveNestedEnds(); 2081 case DK_ALIGN: 2082 return parseDirectiveAlign(); 2083 case DK_EVEN: 2084 return parseDirectiveEven(); 2085 case DK_ORG: 2086 return parseDirectiveOrg(); 2087 case DK_EXTERN: 2088 return parseDirectiveExtern(); 2089 case DK_PUBLIC: 2090 return parseDirectiveSymbolAttribute(MCSA_Global); 2091 case DK_COMM: 2092 return parseDirectiveComm(/*IsLocal=*/false); 2093 case DK_COMMENT: 2094 return parseDirectiveComment(IDLoc); 2095 case DK_INCLUDE: 2096 return parseDirectiveInclude(); 2097 case DK_REPEAT: 2098 return parseDirectiveRepeat(IDLoc, IDVal); 2099 case DK_WHILE: 2100 return parseDirectiveWhile(IDLoc); 2101 case DK_FOR: 2102 return parseDirectiveFor(IDLoc, IDVal); 2103 case DK_FORC: 2104 return parseDirectiveForc(IDLoc, IDVal); 2105 case DK_EXITM: 2106 Info.ExitValue = ""; 2107 return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue); 2108 case DK_ENDM: 2109 Info.ExitValue = ""; 2110 return parseDirectiveEndMacro(IDVal); 2111 case DK_PURGE: 2112 return parseDirectivePurgeMacro(IDLoc); 2113 case DK_END: 2114 return parseDirectiveEnd(IDLoc); 2115 case DK_ERR: 2116 return parseDirectiveError(IDLoc); 2117 case DK_ERRB: 2118 return parseDirectiveErrorIfb(IDLoc, true); 2119 case DK_ERRNB: 2120 return parseDirectiveErrorIfb(IDLoc, false); 2121 case DK_ERRDEF: 2122 return parseDirectiveErrorIfdef(IDLoc, true); 2123 case DK_ERRNDEF: 2124 return parseDirectiveErrorIfdef(IDLoc, false); 2125 case DK_ERRDIF: 2126 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false, 2127 /*CaseInsensitive=*/false); 2128 case DK_ERRDIFI: 2129 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/false, 2130 /*CaseInsensitive=*/true); 2131 case DK_ERRIDN: 2132 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true, 2133 /*CaseInsensitive=*/false); 2134 case DK_ERRIDNI: 2135 return parseDirectiveErrorIfidn(IDLoc, /*ExpectEqual=*/true, 2136 /*CaseInsensitive=*/true); 2137 case DK_ERRE: 2138 return parseDirectiveErrorIfe(IDLoc, true); 2139 case DK_ERRNZ: 2140 return parseDirectiveErrorIfe(IDLoc, false); 2141 case DK_RADIX: 2142 return parseDirectiveRadix(IDLoc); 2143 case DK_ECHO: 2144 return parseDirectiveEcho(IDLoc); 2145 } 2146 2147 return Error(IDLoc, "unknown directive"); 2148 } 2149 2150 // We also check if this is allocating memory with user-defined type. 2151 auto IDIt = Structs.find(IDVal.lower()); 2152 if (IDIt != Structs.end()) 2153 return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal, 2154 IDLoc); 2155 2156 // Non-conditional Microsoft directives sometimes follow their first argument. 2157 const AsmToken nextTok = getTok(); 2158 const StringRef nextVal = nextTok.getString(); 2159 const SMLoc nextLoc = nextTok.getLoc(); 2160 2161 const AsmToken afterNextTok = peekTok(); 2162 2163 // There are several entities interested in parsing infix directives: 2164 // 2165 // 1. Asm parser extensions. For example, platform-specific parsers 2166 // (like the ELF parser) register themselves as extensions. 2167 // 2. The generic directive parser implemented by this class. These are 2168 // all the directives that behave in a target and platform independent 2169 // manner, or at least have a default behavior that's shared between 2170 // all targets and platforms. 2171 2172 getTargetParser().flushPendingInstructions(getStreamer()); 2173 2174 // Special-case handling of structure-end directives at higher priority, since 2175 // ENDS is overloaded as a segment-end directive. 2176 if (nextVal.equals_insensitive("ends") && StructInProgress.size() == 1) { 2177 Lex(); 2178 return parseDirectiveEnds(IDVal, IDLoc); 2179 } 2180 2181 // First, check the extension directive map to see if any extension has 2182 // registered itself to parse this directive. 2183 std::pair<MCAsmParserExtension *, DirectiveHandler> Handler = 2184 ExtensionDirectiveMap.lookup(nextVal.lower()); 2185 if (Handler.first) { 2186 Lex(); 2187 Lexer.UnLex(ID); 2188 return (*Handler.second)(Handler.first, nextVal, nextLoc); 2189 } 2190 2191 // If no one else is interested in this directive, it must be 2192 // generic and familiar to this class. 2193 DirKindIt = DirectiveKindMap.find(nextVal.lower()); 2194 DirKind = (DirKindIt == DirectiveKindMap.end()) 2195 ? DK_NO_DIRECTIVE 2196 : DirKindIt->getValue(); 2197 switch (DirKind) { 2198 default: 2199 break; 2200 case DK_ASSIGN: 2201 case DK_EQU: 2202 case DK_TEXTEQU: 2203 Lex(); 2204 return parseDirectiveEquate(nextVal, IDVal, DirKind, IDLoc); 2205 case DK_BYTE: 2206 if (afterNextTok.is(AsmToken::Identifier) && 2207 afterNextTok.getString().equals_insensitive("ptr")) { 2208 // Size directive; part of an instruction. 2209 break; 2210 } 2211 [[fallthrough]]; 2212 case DK_SBYTE: 2213 case DK_DB: 2214 Lex(); 2215 return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc); 2216 case DK_WORD: 2217 if (afterNextTok.is(AsmToken::Identifier) && 2218 afterNextTok.getString().equals_insensitive("ptr")) { 2219 // Size directive; part of an instruction. 2220 break; 2221 } 2222 [[fallthrough]]; 2223 case DK_SWORD: 2224 case DK_DW: 2225 Lex(); 2226 return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc); 2227 case DK_DWORD: 2228 if (afterNextTok.is(AsmToken::Identifier) && 2229 afterNextTok.getString().equals_insensitive("ptr")) { 2230 // Size directive; part of an instruction. 2231 break; 2232 } 2233 [[fallthrough]]; 2234 case DK_SDWORD: 2235 case DK_DD: 2236 Lex(); 2237 return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc); 2238 case DK_FWORD: 2239 if (afterNextTok.is(AsmToken::Identifier) && 2240 afterNextTok.getString().equals_insensitive("ptr")) { 2241 // Size directive; part of an instruction. 2242 break; 2243 } 2244 [[fallthrough]]; 2245 case DK_DF: 2246 Lex(); 2247 return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc); 2248 case DK_QWORD: 2249 if (afterNextTok.is(AsmToken::Identifier) && 2250 afterNextTok.getString().equals_insensitive("ptr")) { 2251 // Size directive; part of an instruction. 2252 break; 2253 } 2254 [[fallthrough]]; 2255 case DK_SQWORD: 2256 case DK_DQ: 2257 Lex(); 2258 return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); 2259 case DK_REAL4: 2260 Lex(); 2261 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4, 2262 IDVal, IDLoc); 2263 case DK_REAL8: 2264 Lex(); 2265 return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8, 2266 IDVal, IDLoc); 2267 case DK_REAL10: 2268 Lex(); 2269 return parseDirectiveNamedRealValue(nextVal, APFloat::x87DoubleExtended(), 2270 10, IDVal, IDLoc); 2271 case DK_STRUCT: 2272 case DK_UNION: 2273 Lex(); 2274 return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc); 2275 case DK_ENDS: 2276 Lex(); 2277 return parseDirectiveEnds(IDVal, IDLoc); 2278 case DK_MACRO: 2279 Lex(); 2280 return parseDirectiveMacro(IDVal, IDLoc); 2281 } 2282 2283 // Finally, we check if this is allocating a variable with user-defined type. 2284 auto NextIt = Structs.find(nextVal.lower()); 2285 if (NextIt != Structs.end()) { 2286 Lex(); 2287 return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(), 2288 nextVal, nextLoc, IDVal); 2289 } 2290 2291 // __asm _emit or __asm __emit 2292 if (ParsingMSInlineAsm && (IDVal == "_emit" || IDVal == "__emit" || 2293 IDVal == "_EMIT" || IDVal == "__EMIT")) 2294 return parseDirectiveMSEmit(IDLoc, Info, IDVal.size()); 2295 2296 // __asm align 2297 if (ParsingMSInlineAsm && (IDVal == "align" || IDVal == "ALIGN")) 2298 return parseDirectiveMSAlign(IDLoc, Info); 2299 2300 if (ParsingMSInlineAsm && (IDVal == "even" || IDVal == "EVEN")) 2301 Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4); 2302 if (checkForValidSection()) 2303 return true; 2304 2305 // Canonicalize the opcode to lower case. 2306 std::string OpcodeStr = IDVal.lower(); 2307 ParseInstructionInfo IInfo(Info.AsmRewrites); 2308 bool ParseHadError = getTargetParser().parseInstruction(IInfo, OpcodeStr, ID, 2309 Info.ParsedOperands); 2310 Info.ParseError = ParseHadError; 2311 2312 // Dump the parsed representation, if requested. 2313 if (getShowParsedOperands()) { 2314 SmallString<256> Str; 2315 raw_svector_ostream OS(Str); 2316 OS << "parsed instruction: ["; 2317 for (unsigned i = 0; i != Info.ParsedOperands.size(); ++i) { 2318 if (i != 0) 2319 OS << ", "; 2320 Info.ParsedOperands[i]->print(OS, MAI); 2321 } 2322 OS << "]"; 2323 2324 printMessage(IDLoc, SourceMgr::DK_Note, OS.str()); 2325 } 2326 2327 // Fail even if ParseInstruction erroneously returns false. 2328 if (hasPendingError() || ParseHadError) 2329 return true; 2330 2331 // If parsing succeeded, match the instruction. 2332 if (!ParseHadError) { 2333 uint64_t ErrorInfo; 2334 if (getTargetParser().matchAndEmitInstruction( 2335 IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, 2336 getTargetParser().isParsingMSInlineAsm())) 2337 return true; 2338 } 2339 return false; 2340 } 2341 2342 // Parse and erase curly braces marking block start/end. 2343 bool MasmParser::parseCurlyBlockScope( 2344 SmallVectorImpl<AsmRewrite> &AsmStrRewrites) { 2345 // Identify curly brace marking block start/end. 2346 if (Lexer.isNot(AsmToken::LCurly) && Lexer.isNot(AsmToken::RCurly)) 2347 return false; 2348 2349 SMLoc StartLoc = Lexer.getLoc(); 2350 Lex(); // Eat the brace. 2351 if (Lexer.is(AsmToken::EndOfStatement)) 2352 Lex(); // Eat EndOfStatement following the brace. 2353 2354 // Erase the block start/end brace from the output asm string. 2355 AsmStrRewrites.emplace_back(AOK_Skip, StartLoc, Lexer.getLoc().getPointer() - 2356 StartLoc.getPointer()); 2357 return true; 2358 } 2359 2360 /// parseCppHashLineFilenameComment as this: 2361 /// ::= # number "filename" 2362 bool MasmParser::parseCppHashLineFilenameComment(SMLoc L) { 2363 Lex(); // Eat the hash token. 2364 // Lexer only ever emits HashDirective if it fully formed if it's 2365 // done the checking already so this is an internal error. 2366 assert(getTok().is(AsmToken::Integer) && 2367 "Lexing Cpp line comment: Expected Integer"); 2368 int64_t LineNumber = getTok().getIntVal(); 2369 Lex(); 2370 assert(getTok().is(AsmToken::String) && 2371 "Lexing Cpp line comment: Expected String"); 2372 StringRef Filename = getTok().getString(); 2373 Lex(); 2374 2375 // Get rid of the enclosing quotes. 2376 Filename = Filename.substr(1, Filename.size() - 2); 2377 2378 // Save the SMLoc, Filename and LineNumber for later use by diagnostics 2379 // and possibly DWARF file info. 2380 CppHashInfo.Loc = L; 2381 CppHashInfo.Filename = Filename; 2382 CppHashInfo.LineNumber = LineNumber; 2383 CppHashInfo.Buf = CurBuffer; 2384 if (FirstCppHashFilename.empty()) 2385 FirstCppHashFilename = Filename; 2386 return false; 2387 } 2388 2389 /// will use the last parsed cpp hash line filename comment 2390 /// for the Filename and LineNo if any in the diagnostic. 2391 void MasmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { 2392 const MasmParser *Parser = static_cast<const MasmParser *>(Context); 2393 raw_ostream &OS = errs(); 2394 2395 const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr(); 2396 SMLoc DiagLoc = Diag.getLoc(); 2397 unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); 2398 unsigned CppHashBuf = 2399 Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashInfo.Loc); 2400 2401 // Like SourceMgr::printMessage() we need to print the include stack if any 2402 // before printing the message. 2403 unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); 2404 if (!Parser->SavedDiagHandler && DiagCurBuffer && 2405 DiagCurBuffer != DiagSrcMgr.getMainFileID()) { 2406 SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer); 2407 DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS); 2408 } 2409 2410 // If we have not parsed a cpp hash line filename comment or the source 2411 // manager changed or buffer changed (like in a nested include) then just 2412 // print the normal diagnostic using its Filename and LineNo. 2413 if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr || 2414 DiagBuf != CppHashBuf) { 2415 if (Parser->SavedDiagHandler) 2416 Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext); 2417 else 2418 Diag.print(nullptr, OS); 2419 return; 2420 } 2421 2422 // Use the CppHashFilename and calculate a line number based on the 2423 // CppHashInfo.Loc and CppHashInfo.LineNumber relative to this Diag's SMLoc 2424 // for the diagnostic. 2425 const std::string &Filename = std::string(Parser->CppHashInfo.Filename); 2426 2427 int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf); 2428 int CppHashLocLineNo = 2429 Parser->SrcMgr.FindLineNumber(Parser->CppHashInfo.Loc, CppHashBuf); 2430 int LineNo = 2431 Parser->CppHashInfo.LineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo); 2432 2433 SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo, 2434 Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(), 2435 Diag.getLineContents(), Diag.getRanges()); 2436 2437 if (Parser->SavedDiagHandler) 2438 Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext); 2439 else 2440 NewDiag.print(nullptr, OS); 2441 } 2442 2443 // This is similar to the IsIdentifierChar function in AsmLexer.cpp, but does 2444 // not accept '.'. 2445 static bool isMacroParameterChar(char C) { 2446 return isAlnum(C) || C == '_' || C == '$' || C == '@' || C == '?'; 2447 } 2448 2449 bool MasmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, 2450 ArrayRef<MCAsmMacroParameter> Parameters, 2451 ArrayRef<MCAsmMacroArgument> A, 2452 const std::vector<std::string> &Locals, SMLoc L) { 2453 unsigned NParameters = Parameters.size(); 2454 if (NParameters != A.size()) 2455 return Error(L, "Wrong number of arguments"); 2456 StringMap<std::string> LocalSymbols; 2457 std::string Name; 2458 Name.reserve(6); 2459 for (StringRef Local : Locals) { 2460 raw_string_ostream LocalName(Name); 2461 LocalName << "??" 2462 << format_hex_no_prefix(LocalCounter++, 4, /*Upper=*/true); 2463 LocalSymbols.insert({Local, Name}); 2464 Name.clear(); 2465 } 2466 2467 std::optional<char> CurrentQuote; 2468 while (!Body.empty()) { 2469 // Scan for the next substitution. 2470 std::size_t End = Body.size(), Pos = 0; 2471 std::size_t IdentifierPos = End; 2472 for (; Pos != End; ++Pos) { 2473 // Find the next possible macro parameter, including preceding a '&' 2474 // inside quotes. 2475 if (Body[Pos] == '&') 2476 break; 2477 if (isMacroParameterChar(Body[Pos])) { 2478 if (!CurrentQuote) 2479 break; 2480 if (IdentifierPos == End) 2481 IdentifierPos = Pos; 2482 } else { 2483 IdentifierPos = End; 2484 } 2485 2486 // Track quotation status 2487 if (!CurrentQuote) { 2488 if (Body[Pos] == '\'' || Body[Pos] == '"') 2489 CurrentQuote = Body[Pos]; 2490 } else if (Body[Pos] == CurrentQuote) { 2491 if (Pos + 1 != End && Body[Pos + 1] == CurrentQuote) { 2492 // Escaped quote, and quotes aren't identifier chars; skip 2493 ++Pos; 2494 continue; 2495 } else { 2496 CurrentQuote.reset(); 2497 } 2498 } 2499 } 2500 if (IdentifierPos != End) { 2501 // We've recognized an identifier before an apostrophe inside quotes; 2502 // check once to see if we can expand it. 2503 Pos = IdentifierPos; 2504 IdentifierPos = End; 2505 } 2506 2507 // Add the prefix. 2508 OS << Body.slice(0, Pos); 2509 2510 // Check if we reached the end. 2511 if (Pos == End) 2512 break; 2513 2514 unsigned I = Pos; 2515 bool InitialAmpersand = (Body[I] == '&'); 2516 if (InitialAmpersand) { 2517 ++I; 2518 ++Pos; 2519 } 2520 while (I < End && isMacroParameterChar(Body[I])) 2521 ++I; 2522 2523 const char *Begin = Body.data() + Pos; 2524 StringRef Argument(Begin, I - Pos); 2525 const std::string ArgumentLower = Argument.lower(); 2526 unsigned Index = 0; 2527 2528 for (; Index < NParameters; ++Index) 2529 if (Parameters[Index].Name.equals_insensitive(ArgumentLower)) 2530 break; 2531 2532 if (Index == NParameters) { 2533 if (InitialAmpersand) 2534 OS << '&'; 2535 auto it = LocalSymbols.find(ArgumentLower); 2536 if (it != LocalSymbols.end()) 2537 OS << it->second; 2538 else 2539 OS << Argument; 2540 Pos = I; 2541 } else { 2542 for (const AsmToken &Token : A[Index]) { 2543 // In MASM, you can write '%expr'. 2544 // The prefix '%' evaluates the expression 'expr' 2545 // and uses the result as a string (e.g. replace %(1+2) with the 2546 // string "3"). 2547 // Here, we identify the integer token which is the result of the 2548 // absolute expression evaluation and replace it with its string 2549 // representation. 2550 if (Token.getString().front() == '%' && Token.is(AsmToken::Integer)) 2551 // Emit an integer value to the buffer. 2552 OS << Token.getIntVal(); 2553 else 2554 OS << Token.getString(); 2555 } 2556 2557 Pos += Argument.size(); 2558 if (Pos < End && Body[Pos] == '&') { 2559 ++Pos; 2560 } 2561 } 2562 // Update the scan point. 2563 Body = Body.substr(Pos); 2564 } 2565 2566 return false; 2567 } 2568 2569 bool MasmParser::parseMacroArgument(const MCAsmMacroParameter *MP, 2570 MCAsmMacroArgument &MA, 2571 AsmToken::TokenKind EndTok) { 2572 if (MP && MP->Vararg) { 2573 if (Lexer.isNot(EndTok)) { 2574 SmallVector<StringRef, 1> Str = parseStringRefsTo(EndTok); 2575 for (StringRef S : Str) { 2576 MA.emplace_back(AsmToken::String, S); 2577 } 2578 } 2579 return false; 2580 } 2581 2582 SMLoc StrLoc = Lexer.getLoc(), EndLoc; 2583 if (Lexer.is(AsmToken::Less) && isAngleBracketString(StrLoc, EndLoc)) { 2584 const char *StrChar = StrLoc.getPointer() + 1; 2585 const char *EndChar = EndLoc.getPointer() - 1; 2586 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back()); 2587 /// Eat from '<' to '>'. 2588 Lex(); 2589 MA.emplace_back(AsmToken::String, StringRef(StrChar, EndChar - StrChar)); 2590 return false; 2591 } 2592 2593 unsigned ParenLevel = 0; 2594 2595 while (true) { 2596 if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) 2597 return TokError("unexpected token"); 2598 2599 if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) 2600 break; 2601 2602 // handleMacroEntry relies on not advancing the lexer here 2603 // to be able to fill in the remaining default parameter values 2604 if (Lexer.is(EndTok) && (EndTok != AsmToken::RParen || ParenLevel == 0)) 2605 break; 2606 2607 // Adjust the current parentheses level. 2608 if (Lexer.is(AsmToken::LParen)) 2609 ++ParenLevel; 2610 else if (Lexer.is(AsmToken::RParen) && ParenLevel) 2611 --ParenLevel; 2612 2613 // Append the token to the current argument list. 2614 MA.push_back(getTok()); 2615 Lex(); 2616 } 2617 2618 if (ParenLevel != 0) 2619 return TokError("unbalanced parentheses in argument"); 2620 2621 if (MA.empty() && MP) { 2622 if (MP->Required) { 2623 return TokError("missing value for required parameter '" + MP->Name + 2624 "'"); 2625 } else { 2626 MA = MP->Value; 2627 } 2628 } 2629 return false; 2630 } 2631 2632 // Parse the macro instantiation arguments. 2633 bool MasmParser::parseMacroArguments(const MCAsmMacro *M, 2634 MCAsmMacroArguments &A, 2635 AsmToken::TokenKind EndTok) { 2636 const unsigned NParameters = M ? M->Parameters.size() : 0; 2637 bool NamedParametersFound = false; 2638 SmallVector<SMLoc, 4> FALocs; 2639 2640 A.resize(NParameters); 2641 FALocs.resize(NParameters); 2642 2643 // Parse two kinds of macro invocations: 2644 // - macros defined without any parameters accept an arbitrary number of them 2645 // - macros defined with parameters accept at most that many of them 2646 for (unsigned Parameter = 0; !NParameters || Parameter < NParameters; 2647 ++Parameter) { 2648 SMLoc IDLoc = Lexer.getLoc(); 2649 MCAsmMacroParameter FA; 2650 2651 if (Lexer.is(AsmToken::Identifier) && peekTok().is(AsmToken::Equal)) { 2652 if (parseIdentifier(FA.Name)) 2653 return Error(IDLoc, "invalid argument identifier for formal argument"); 2654 2655 if (Lexer.isNot(AsmToken::Equal)) 2656 return TokError("expected '=' after formal parameter identifier"); 2657 2658 Lex(); 2659 2660 NamedParametersFound = true; 2661 } 2662 2663 if (NamedParametersFound && FA.Name.empty()) 2664 return Error(IDLoc, "cannot mix positional and keyword arguments"); 2665 2666 unsigned PI = Parameter; 2667 if (!FA.Name.empty()) { 2668 assert(M && "expected macro to be defined"); 2669 unsigned FAI = 0; 2670 for (FAI = 0; FAI < NParameters; ++FAI) 2671 if (M->Parameters[FAI].Name == FA.Name) 2672 break; 2673 2674 if (FAI >= NParameters) { 2675 return Error(IDLoc, "parameter named '" + FA.Name + 2676 "' does not exist for macro '" + M->Name + "'"); 2677 } 2678 PI = FAI; 2679 } 2680 const MCAsmMacroParameter *MP = nullptr; 2681 if (M && PI < NParameters) 2682 MP = &M->Parameters[PI]; 2683 2684 SMLoc StrLoc = Lexer.getLoc(); 2685 SMLoc EndLoc; 2686 if (Lexer.is(AsmToken::Percent)) { 2687 const MCExpr *AbsoluteExp; 2688 int64_t Value; 2689 /// Eat '%'. 2690 Lex(); 2691 if (parseExpression(AbsoluteExp, EndLoc)) 2692 return false; 2693 if (!AbsoluteExp->evaluateAsAbsolute(Value, 2694 getStreamer().getAssemblerPtr())) 2695 return Error(StrLoc, "expected absolute expression"); 2696 const char *StrChar = StrLoc.getPointer(); 2697 const char *EndChar = EndLoc.getPointer(); 2698 AsmToken newToken(AsmToken::Integer, 2699 StringRef(StrChar, EndChar - StrChar), Value); 2700 FA.Value.push_back(newToken); 2701 } else if (parseMacroArgument(MP, FA.Value, EndTok)) { 2702 if (M) 2703 return addErrorSuffix(" in '" + M->Name + "' macro"); 2704 else 2705 return true; 2706 } 2707 2708 if (!FA.Value.empty()) { 2709 if (A.size() <= PI) 2710 A.resize(PI + 1); 2711 A[PI] = FA.Value; 2712 2713 if (FALocs.size() <= PI) 2714 FALocs.resize(PI + 1); 2715 2716 FALocs[PI] = Lexer.getLoc(); 2717 } 2718 2719 // At the end of the statement, fill in remaining arguments that have 2720 // default values. If there aren't any, then the next argument is 2721 // required but missing 2722 if (Lexer.is(EndTok)) { 2723 bool Failure = false; 2724 for (unsigned FAI = 0; FAI < NParameters; ++FAI) { 2725 if (A[FAI].empty()) { 2726 if (M->Parameters[FAI].Required) { 2727 Error(FALocs[FAI].isValid() ? FALocs[FAI] : Lexer.getLoc(), 2728 "missing value for required parameter " 2729 "'" + 2730 M->Parameters[FAI].Name + "' in macro '" + M->Name + "'"); 2731 Failure = true; 2732 } 2733 2734 if (!M->Parameters[FAI].Value.empty()) 2735 A[FAI] = M->Parameters[FAI].Value; 2736 } 2737 } 2738 return Failure; 2739 } 2740 2741 if (Lexer.is(AsmToken::Comma)) 2742 Lex(); 2743 } 2744 2745 return TokError("too many positional arguments"); 2746 } 2747 2748 bool MasmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc, 2749 AsmToken::TokenKind ArgumentEndTok) { 2750 // Arbitrarily limit macro nesting depth (default matches 'as'). We can 2751 // eliminate this, although we should protect against infinite loops. 2752 unsigned MaxNestingDepth = AsmMacroMaxNestingDepth; 2753 if (ActiveMacros.size() == MaxNestingDepth) { 2754 std::ostringstream MaxNestingDepthError; 2755 MaxNestingDepthError << "macros cannot be nested more than " 2756 << MaxNestingDepth << " levels deep." 2757 << " Use -asm-macro-max-nesting-depth to increase " 2758 "this limit."; 2759 return TokError(MaxNestingDepthError.str()); 2760 } 2761 2762 MCAsmMacroArguments A; 2763 if (parseMacroArguments(M, A, ArgumentEndTok) || parseToken(ArgumentEndTok)) 2764 return true; 2765 2766 // Macro instantiation is lexical, unfortunately. We construct a new buffer 2767 // to hold the macro body with substitutions. 2768 SmallString<256> Buf; 2769 StringRef Body = M->Body; 2770 raw_svector_ostream OS(Buf); 2771 2772 if (expandMacro(OS, Body, M->Parameters, A, M->Locals, getTok().getLoc())) 2773 return true; 2774 2775 // We include the endm in the buffer as our cue to exit the macro 2776 // instantiation. 2777 OS << "endm\n"; 2778 2779 std::unique_ptr<MemoryBuffer> Instantiation = 2780 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>"); 2781 2782 // Create the macro instantiation object and add to the current macro 2783 // instantiation stack. 2784 MacroInstantiation *MI = new MacroInstantiation{ 2785 NameLoc, CurBuffer, getTok().getLoc(), TheCondStack.size()}; 2786 ActiveMacros.push_back(MI); 2787 2788 ++NumOfMacroInstantiations; 2789 2790 // Jump to the macro instantiation and prime the lexer. 2791 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc()); 2792 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 2793 EndStatementAtEOFStack.push_back(true); 2794 Lex(); 2795 2796 return false; 2797 } 2798 2799 void MasmParser::handleMacroExit() { 2800 // Jump to the token we should return to, and consume it. 2801 EndStatementAtEOFStack.pop_back(); 2802 jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer, 2803 EndStatementAtEOFStack.back()); 2804 Lex(); 2805 2806 // Pop the instantiation entry. 2807 delete ActiveMacros.back(); 2808 ActiveMacros.pop_back(); 2809 } 2810 2811 bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) { 2812 if (!M->IsFunction) 2813 return Error(NameLoc, "cannot invoke macro procedure as function"); 2814 2815 if (parseToken(AsmToken::LParen, "invoking macro function '" + M->Name + 2816 "' requires arguments in parentheses") || 2817 handleMacroEntry(M, NameLoc, AsmToken::RParen)) 2818 return true; 2819 2820 // Parse all statements in the macro, retrieving the exit value when it ends. 2821 std::string ExitValue; 2822 SmallVector<AsmRewrite, 4> AsmStrRewrites; 2823 while (Lexer.isNot(AsmToken::Eof)) { 2824 ParseStatementInfo Info(&AsmStrRewrites); 2825 bool HasError = parseStatement(Info, nullptr); 2826 2827 if (!HasError && Info.ExitValue) { 2828 ExitValue = std::move(*Info.ExitValue); 2829 break; 2830 } 2831 2832 // If we have a Lexer Error we are on an Error Token. Load in Lexer Error 2833 // for printing ErrMsg via Lex() only if no (presumably better) parser error 2834 // exists. 2835 if (HasError && !hasPendingError() && Lexer.getTok().is(AsmToken::Error)) 2836 Lex(); 2837 2838 // parseStatement returned true so may need to emit an error. 2839 printPendingErrors(); 2840 2841 // Skipping to the next line if needed. 2842 if (HasError && !getLexer().justConsumedEOL()) 2843 eatToEndOfStatement(); 2844 } 2845 2846 // Exit values may require lexing, unfortunately. We construct a new buffer to 2847 // hold the exit value. 2848 std::unique_ptr<MemoryBuffer> MacroValue = 2849 MemoryBuffer::getMemBufferCopy(ExitValue, "<macro-value>"); 2850 2851 // Jump from this location to the instantiated exit value, and prime the 2852 // lexer. 2853 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(MacroValue), Lexer.getLoc()); 2854 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), nullptr, 2855 /*EndStatementAtEOF=*/false); 2856 EndStatementAtEOFStack.push_back(false); 2857 Lex(); 2858 2859 return false; 2860 } 2861 2862 /// parseIdentifier: 2863 /// ::= identifier 2864 /// ::= string 2865 bool MasmParser::parseIdentifier(StringRef &Res, 2866 IdentifierPositionKind Position) { 2867 // The assembler has relaxed rules for accepting identifiers, in particular we 2868 // allow things like '.globl $foo' and '.def @feat.00', which would normally 2869 // be separate tokens. At this level, we have already lexed so we cannot 2870 // (currently) handle this as a context dependent token, instead we detect 2871 // adjacent tokens and return the combined identifier. 2872 if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) { 2873 SMLoc PrefixLoc = getLexer().getLoc(); 2874 2875 // Consume the prefix character, and check for a following identifier. 2876 2877 AsmToken nextTok = peekTok(false); 2878 2879 if (nextTok.isNot(AsmToken::Identifier)) 2880 return true; 2881 2882 // We have a '$' or '@' followed by an identifier, make sure they are adjacent. 2883 if (PrefixLoc.getPointer() + 1 != nextTok.getLoc().getPointer()) 2884 return true; 2885 2886 // eat $ or @ 2887 Lexer.Lex(); // Lexer's Lex guarantees consecutive token. 2888 // Construct the joined identifier and consume the token. 2889 Res = 2890 StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1); 2891 Lex(); // Parser Lex to maintain invariants. 2892 return false; 2893 } 2894 2895 if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String)) 2896 return true; 2897 2898 Res = getTok().getIdentifier(); 2899 2900 // Consume the identifier token - but if parsing certain directives, avoid 2901 // lexical expansion of the next token. 2902 ExpandKind ExpandNextToken = ExpandMacros; 2903 if (Position == StartOfStatement && 2904 StringSwitch<bool>(Res) 2905 .CaseLower("echo", true) 2906 .CasesLower("ifdef", "ifndef", "elseifdef", "elseifndef", true) 2907 .Default(false)) { 2908 ExpandNextToken = DoNotExpandMacros; 2909 } 2910 Lex(ExpandNextToken); 2911 2912 return false; 2913 } 2914 2915 /// parseDirectiveEquate: 2916 /// ::= name "=" expression 2917 /// | name "equ" expression (not redefinable) 2918 /// | name "equ" text-list 2919 /// | name "textequ" text-list (redefinability unspecified) 2920 bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, 2921 DirectiveKind DirKind, SMLoc NameLoc) { 2922 auto BuiltinIt = BuiltinSymbolMap.find(Name.lower()); 2923 if (BuiltinIt != BuiltinSymbolMap.end()) 2924 return Error(NameLoc, "cannot redefine a built-in symbol"); 2925 2926 Variable &Var = Variables[Name.lower()]; 2927 if (Var.Name.empty()) { 2928 Var.Name = Name; 2929 } 2930 2931 SMLoc StartLoc = Lexer.getLoc(); 2932 if (DirKind == DK_EQU || DirKind == DK_TEXTEQU) { 2933 // "equ" and "textequ" both allow text expressions. 2934 std::string Value; 2935 std::string TextItem; 2936 if (!parseTextItem(TextItem)) { 2937 Value += TextItem; 2938 2939 // Accept a text-list, not just one text-item. 2940 auto parseItem = [&]() -> bool { 2941 if (parseTextItem(TextItem)) 2942 return TokError("expected text item"); 2943 Value += TextItem; 2944 return false; 2945 }; 2946 if (parseOptionalToken(AsmToken::Comma) && parseMany(parseItem)) 2947 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 2948 2949 if (!Var.IsText || Var.TextValue != Value) { 2950 switch (Var.Redefinable) { 2951 case Variable::NOT_REDEFINABLE: 2952 return Error(getTok().getLoc(), "invalid variable redefinition"); 2953 case Variable::WARN_ON_REDEFINITION: 2954 if (Warning(NameLoc, "redefining '" + Name + 2955 "', already defined on the command line")) { 2956 return true; 2957 } 2958 break; 2959 default: 2960 break; 2961 } 2962 } 2963 Var.IsText = true; 2964 Var.TextValue = Value; 2965 Var.Redefinable = Variable::REDEFINABLE; 2966 2967 return false; 2968 } 2969 } 2970 if (DirKind == DK_TEXTEQU) 2971 return TokError("expected <text> in '" + Twine(IDVal) + "' directive"); 2972 2973 // Parse as expression assignment. 2974 const MCExpr *Expr; 2975 SMLoc EndLoc; 2976 if (parseExpression(Expr, EndLoc)) 2977 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 2978 StringRef ExprAsString = StringRef( 2979 StartLoc.getPointer(), EndLoc.getPointer() - StartLoc.getPointer()); 2980 2981 int64_t Value; 2982 if (!Expr->evaluateAsAbsolute(Value, getStreamer().getAssemblerPtr())) { 2983 if (DirKind == DK_ASSIGN) 2984 return Error( 2985 StartLoc, 2986 "expected absolute expression; not all symbols have known values", 2987 {StartLoc, EndLoc}); 2988 2989 // Not an absolute expression; define as a text replacement. 2990 if (!Var.IsText || Var.TextValue != ExprAsString) { 2991 switch (Var.Redefinable) { 2992 case Variable::NOT_REDEFINABLE: 2993 return Error(getTok().getLoc(), "invalid variable redefinition"); 2994 case Variable::WARN_ON_REDEFINITION: 2995 if (Warning(NameLoc, "redefining '" + Name + 2996 "', already defined on the command line")) { 2997 return true; 2998 } 2999 break; 3000 default: 3001 break; 3002 } 3003 } 3004 3005 Var.IsText = true; 3006 Var.TextValue = ExprAsString.str(); 3007 Var.Redefinable = Variable::REDEFINABLE; 3008 3009 return false; 3010 } 3011 3012 auto *Sym = getContext().parseSymbol(Var.Name); 3013 const MCConstantExpr *PrevValue = 3014 Sym->isVariable() 3015 ? dyn_cast_or_null<MCConstantExpr>(Sym->getVariableValue()) 3016 : nullptr; 3017 if (Var.IsText || !PrevValue || PrevValue->getValue() != Value) { 3018 switch (Var.Redefinable) { 3019 case Variable::NOT_REDEFINABLE: 3020 return Error(getTok().getLoc(), "invalid variable redefinition"); 3021 case Variable::WARN_ON_REDEFINITION: 3022 if (Warning(NameLoc, "redefining '" + Name + 3023 "', already defined on the command line")) { 3024 return true; 3025 } 3026 break; 3027 default: 3028 break; 3029 } 3030 } 3031 3032 Var.IsText = false; 3033 Var.TextValue.clear(); 3034 Var.Redefinable = (DirKind == DK_ASSIGN) ? Variable::REDEFINABLE 3035 : Variable::NOT_REDEFINABLE; 3036 3037 Sym->setRedefinable(Var.Redefinable != Variable::NOT_REDEFINABLE); 3038 Sym->setVariableValue(Expr); 3039 Sym->setExternal(false); 3040 3041 return false; 3042 } 3043 3044 bool MasmParser::parseEscapedString(std::string &Data) { 3045 if (check(getTok().isNot(AsmToken::String), "expected string")) 3046 return true; 3047 3048 Data = ""; 3049 char Quote = getTok().getString().front(); 3050 StringRef Str = getTok().getStringContents(); 3051 Data.reserve(Str.size()); 3052 for (size_t i = 0, e = Str.size(); i != e; ++i) { 3053 Data.push_back(Str[i]); 3054 if (Str[i] == Quote) { 3055 // MASM treats doubled delimiting quotes as an escaped delimiting quote. 3056 // If we're escaping the string's trailing delimiter, we're definitely 3057 // missing a quotation mark. 3058 if (i + 1 == Str.size()) 3059 return Error(getTok().getLoc(), "missing quotation mark in string"); 3060 if (Str[i + 1] == Quote) 3061 ++i; 3062 } 3063 } 3064 3065 Lex(); 3066 return false; 3067 } 3068 3069 bool MasmParser::parseAngleBracketString(std::string &Data) { 3070 SMLoc EndLoc, StartLoc = getTok().getLoc(); 3071 if (isAngleBracketString(StartLoc, EndLoc)) { 3072 const char *StartChar = StartLoc.getPointer() + 1; 3073 const char *EndChar = EndLoc.getPointer() - 1; 3074 jumpToLoc(EndLoc, CurBuffer, EndStatementAtEOFStack.back()); 3075 // Eat from '<' to '>'. 3076 Lex(); 3077 3078 Data = angleBracketString(StringRef(StartChar, EndChar - StartChar)); 3079 return false; 3080 } 3081 return true; 3082 } 3083 3084 /// textItem ::= textLiteral | textMacroID | % constExpr 3085 bool MasmParser::parseTextItem(std::string &Data) { 3086 switch (getTok().getKind()) { 3087 default: 3088 return true; 3089 case AsmToken::Percent: { 3090 int64_t Res; 3091 if (parseToken(AsmToken::Percent) || parseAbsoluteExpression(Res)) 3092 return true; 3093 Data = std::to_string(Res); 3094 return false; 3095 } 3096 case AsmToken::Less: 3097 case AsmToken::LessEqual: 3098 case AsmToken::LessLess: 3099 case AsmToken::LessGreater: 3100 return parseAngleBracketString(Data); 3101 case AsmToken::Identifier: { 3102 // This must be a text macro; we need to expand it accordingly. 3103 StringRef ID; 3104 SMLoc StartLoc = getTok().getLoc(); 3105 if (parseIdentifier(ID)) 3106 return true; 3107 Data = ID.str(); 3108 3109 bool Expanded = false; 3110 while (true) { 3111 // Try to resolve as a built-in text macro 3112 auto BuiltinIt = BuiltinSymbolMap.find(ID.lower()); 3113 if (BuiltinIt != BuiltinSymbolMap.end()) { 3114 std::optional<std::string> BuiltinText = 3115 evaluateBuiltinTextMacro(BuiltinIt->getValue(), StartLoc); 3116 if (!BuiltinText) { 3117 // Not a text macro; break without substituting 3118 break; 3119 } 3120 Data = std::move(*BuiltinText); 3121 ID = StringRef(Data); 3122 Expanded = true; 3123 continue; 3124 } 3125 3126 // Try to resolve as a built-in macro function 3127 auto BuiltinFuncIt = BuiltinFunctionMap.find(ID.lower()); 3128 if (BuiltinFuncIt != BuiltinFunctionMap.end()) { 3129 Data.clear(); 3130 if (evaluateBuiltinMacroFunction(BuiltinFuncIt->getValue(), ID, Data)) { 3131 return true; 3132 } 3133 ID = StringRef(Data); 3134 Expanded = true; 3135 continue; 3136 } 3137 3138 // Try to resolve as a variable text macro 3139 auto VarIt = Variables.find(ID.lower()); 3140 if (VarIt != Variables.end()) { 3141 const Variable &Var = VarIt->getValue(); 3142 if (!Var.IsText) { 3143 // Not a text macro; break without substituting 3144 break; 3145 } 3146 Data = Var.TextValue; 3147 ID = StringRef(Data); 3148 Expanded = true; 3149 continue; 3150 } 3151 3152 break; 3153 } 3154 3155 if (!Expanded) { 3156 // Not a text macro; not usable in TextItem context. Since we haven't used 3157 // the token, put it back for better error recovery. 3158 getLexer().UnLex(AsmToken(AsmToken::Identifier, ID)); 3159 return true; 3160 } 3161 return false; 3162 } 3163 } 3164 llvm_unreachable("unhandled token kind"); 3165 } 3166 3167 /// parseDirectiveAscii: 3168 /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ] 3169 bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { 3170 auto parseOp = [&]() -> bool { 3171 std::string Data; 3172 if (checkForValidSection() || parseEscapedString(Data)) 3173 return true; 3174 getStreamer().emitBytes(Data); 3175 if (ZeroTerminated) 3176 getStreamer().emitBytes(StringRef("\0", 1)); 3177 return false; 3178 }; 3179 3180 if (parseMany(parseOp)) 3181 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3182 return false; 3183 } 3184 3185 bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) { 3186 // Special case constant expressions to match code generator. 3187 if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { 3188 assert(Size <= 8 && "Invalid size"); 3189 int64_t IntValue = MCE->getValue(); 3190 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) 3191 return Error(MCE->getLoc(), "out of range literal value"); 3192 getStreamer().emitIntValue(IntValue, Size); 3193 } else { 3194 const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value); 3195 if (MSE && MSE->getSymbol().getName() == "?") { 3196 // ? initializer; treat as 0. 3197 getStreamer().emitIntValue(0, Size); 3198 } else { 3199 getStreamer().emitValue(Value, Size, Value->getLoc()); 3200 } 3201 } 3202 return false; 3203 } 3204 3205 bool MasmParser::parseScalarInitializer(unsigned Size, 3206 SmallVectorImpl<const MCExpr *> &Values, 3207 unsigned StringPadLength) { 3208 if (Size == 1 && getTok().is(AsmToken::String)) { 3209 std::string Value; 3210 if (parseEscapedString(Value)) 3211 return true; 3212 // Treat each character as an initializer. 3213 for (const unsigned char CharVal : Value) 3214 Values.push_back(MCConstantExpr::create(CharVal, getContext())); 3215 3216 // Pad the string with spaces to the specified length. 3217 for (size_t i = Value.size(); i < StringPadLength; ++i) 3218 Values.push_back(MCConstantExpr::create(' ', getContext())); 3219 } else { 3220 const MCExpr *Value; 3221 if (parseExpression(Value)) 3222 return true; 3223 if (getTok().is(AsmToken::Identifier) && 3224 getTok().getString().equals_insensitive("dup")) { 3225 Lex(); // Eat 'dup'. 3226 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 3227 if (!MCE) 3228 return Error(Value->getLoc(), 3229 "cannot repeat value a non-constant number of times"); 3230 const int64_t Repetitions = MCE->getValue(); 3231 if (Repetitions < 0) 3232 return Error(Value->getLoc(), 3233 "cannot repeat value a negative number of times"); 3234 3235 SmallVector<const MCExpr *, 1> DuplicatedValues; 3236 if (parseToken(AsmToken::LParen, 3237 "parentheses required for 'dup' contents") || 3238 parseScalarInstList(Size, DuplicatedValues) || parseRParen()) 3239 return true; 3240 3241 for (int i = 0; i < Repetitions; ++i) 3242 Values.append(DuplicatedValues.begin(), DuplicatedValues.end()); 3243 } else { 3244 Values.push_back(Value); 3245 } 3246 } 3247 return false; 3248 } 3249 3250 bool MasmParser::parseScalarInstList(unsigned Size, 3251 SmallVectorImpl<const MCExpr *> &Values, 3252 const AsmToken::TokenKind EndToken) { 3253 while (getTok().isNot(EndToken) && 3254 (EndToken != AsmToken::Greater || 3255 getTok().isNot(AsmToken::GreaterGreater))) { 3256 parseScalarInitializer(Size, Values); 3257 3258 // If we see a comma, continue, and allow line continuation. 3259 if (!parseOptionalToken(AsmToken::Comma)) 3260 break; 3261 parseOptionalToken(AsmToken::EndOfStatement); 3262 } 3263 return false; 3264 } 3265 3266 bool MasmParser::emitIntegralValues(unsigned Size, unsigned *Count) { 3267 SmallVector<const MCExpr *, 1> Values; 3268 if (checkForValidSection() || parseScalarInstList(Size, Values)) 3269 return true; 3270 3271 for (const auto *Value : Values) { 3272 emitIntValue(Value, Size); 3273 } 3274 if (Count) 3275 *Count = Values.size(); 3276 return false; 3277 } 3278 3279 // Add a field to the current structure. 3280 bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { 3281 StructInfo &Struct = StructInProgress.back(); 3282 FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size); 3283 IntFieldInfo &IntInfo = Field.Contents.IntInfo; 3284 3285 Field.Type = Size; 3286 3287 if (parseScalarInstList(Size, IntInfo.Values)) 3288 return true; 3289 3290 Field.SizeOf = Field.Type * IntInfo.Values.size(); 3291 Field.LengthOf = IntInfo.Values.size(); 3292 const unsigned FieldEnd = Field.Offset + Field.SizeOf; 3293 if (!Struct.IsUnion) { 3294 Struct.NextOffset = FieldEnd; 3295 } 3296 Struct.Size = std::max(Struct.Size, FieldEnd); 3297 return false; 3298 } 3299 3300 /// parseDirectiveValue 3301 /// ::= (byte | word | ... ) [ expression (, expression)* ] 3302 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) { 3303 if (StructInProgress.empty()) { 3304 // Initialize data value. 3305 if (emitIntegralValues(Size)) 3306 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3307 } else if (addIntegralField("", Size)) { 3308 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3309 } 3310 3311 return false; 3312 } 3313 3314 /// parseDirectiveNamedValue 3315 /// ::= name (byte | word | ... ) [ expression (, expression)* ] 3316 bool MasmParser::parseDirectiveNamedValue(StringRef TypeName, unsigned Size, 3317 StringRef Name, SMLoc NameLoc) { 3318 if (StructInProgress.empty()) { 3319 // Initialize named data value. 3320 MCSymbol *Sym = getContext().parseSymbol(Name); 3321 getStreamer().emitLabel(Sym); 3322 unsigned Count; 3323 if (emitIntegralValues(Size, &Count)) 3324 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive"); 3325 3326 AsmTypeInfo Type; 3327 Type.Name = TypeName; 3328 Type.Size = Size * Count; 3329 Type.ElementSize = Size; 3330 Type.Length = Count; 3331 KnownType[Name.lower()] = Type; 3332 } else if (addIntegralField(Name, Size)) { 3333 return addErrorSuffix(" in '" + Twine(TypeName) + "' directive"); 3334 } 3335 3336 return false; 3337 } 3338 3339 bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) { 3340 // We don't truly support arithmetic on floating point expressions, so we 3341 // have to manually parse unary prefixes. 3342 bool IsNeg = false; 3343 SMLoc SignLoc; 3344 if (getLexer().is(AsmToken::Minus)) { 3345 SignLoc = getLexer().getLoc(); 3346 Lexer.Lex(); 3347 IsNeg = true; 3348 } else if (getLexer().is(AsmToken::Plus)) { 3349 SignLoc = getLexer().getLoc(); 3350 Lexer.Lex(); 3351 } 3352 3353 if (Lexer.is(AsmToken::Error)) 3354 return TokError(Lexer.getErr()); 3355 if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) && 3356 Lexer.isNot(AsmToken::Identifier)) 3357 return TokError("unexpected token in directive"); 3358 3359 // Convert to an APFloat. 3360 APFloat Value(Semantics); 3361 StringRef IDVal = getTok().getString(); 3362 if (getLexer().is(AsmToken::Identifier)) { 3363 if (IDVal.equals_insensitive("infinity") || IDVal.equals_insensitive("inf")) 3364 Value = APFloat::getInf(Semantics); 3365 else if (IDVal.equals_insensitive("nan")) 3366 Value = APFloat::getNaN(Semantics, false, ~0); 3367 else if (IDVal.equals_insensitive("?")) 3368 Value = APFloat::getZero(Semantics); 3369 else 3370 return TokError("invalid floating point literal"); 3371 } else if (IDVal.consume_back("r") || IDVal.consume_back("R")) { 3372 // MASM hexadecimal floating-point literal; no APFloat conversion needed. 3373 // To match ML64.exe, ignore the initial sign. 3374 unsigned SizeInBits = Value.getSizeInBits(Semantics); 3375 if (SizeInBits != (IDVal.size() << 2)) 3376 return TokError("invalid floating point literal"); 3377 3378 // Consume the numeric token. 3379 Lex(); 3380 3381 Res = APInt(SizeInBits, IDVal, 16); 3382 if (SignLoc.isValid()) 3383 return Warning(SignLoc, "MASM-style hex floats ignore explicit sign"); 3384 return false; 3385 } else if (errorToBool( 3386 Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) 3387 .takeError())) { 3388 return TokError("invalid floating point literal"); 3389 } 3390 if (IsNeg) 3391 Value.changeSign(); 3392 3393 // Consume the numeric token. 3394 Lex(); 3395 3396 Res = Value.bitcastToAPInt(); 3397 3398 return false; 3399 } 3400 3401 bool MasmParser::parseRealInstList(const fltSemantics &Semantics, 3402 SmallVectorImpl<APInt> &ValuesAsInt, 3403 const AsmToken::TokenKind EndToken) { 3404 while (getTok().isNot(EndToken) || 3405 (EndToken == AsmToken::Greater && 3406 getTok().isNot(AsmToken::GreaterGreater))) { 3407 const AsmToken NextTok = peekTok(); 3408 if (NextTok.is(AsmToken::Identifier) && 3409 NextTok.getString().equals_insensitive("dup")) { 3410 const MCExpr *Value; 3411 if (parseExpression(Value) || parseToken(AsmToken::Identifier)) 3412 return true; 3413 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 3414 if (!MCE) 3415 return Error(Value->getLoc(), 3416 "cannot repeat value a non-constant number of times"); 3417 const int64_t Repetitions = MCE->getValue(); 3418 if (Repetitions < 0) 3419 return Error(Value->getLoc(), 3420 "cannot repeat value a negative number of times"); 3421 3422 SmallVector<APInt, 1> DuplicatedValues; 3423 if (parseToken(AsmToken::LParen, 3424 "parentheses required for 'dup' contents") || 3425 parseRealInstList(Semantics, DuplicatedValues) || parseRParen()) 3426 return true; 3427 3428 for (int i = 0; i < Repetitions; ++i) 3429 ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end()); 3430 } else { 3431 APInt AsInt; 3432 if (parseRealValue(Semantics, AsInt)) 3433 return true; 3434 ValuesAsInt.push_back(AsInt); 3435 } 3436 3437 // Continue if we see a comma. (Also, allow line continuation.) 3438 if (!parseOptionalToken(AsmToken::Comma)) 3439 break; 3440 parseOptionalToken(AsmToken::EndOfStatement); 3441 } 3442 3443 return false; 3444 } 3445 3446 // Initialize real data values. 3447 bool MasmParser::emitRealValues(const fltSemantics &Semantics, 3448 unsigned *Count) { 3449 if (checkForValidSection()) 3450 return true; 3451 3452 SmallVector<APInt, 1> ValuesAsInt; 3453 if (parseRealInstList(Semantics, ValuesAsInt)) 3454 return true; 3455 3456 for (const APInt &AsInt : ValuesAsInt) { 3457 getStreamer().emitIntValue(AsInt); 3458 } 3459 if (Count) 3460 *Count = ValuesAsInt.size(); 3461 return false; 3462 } 3463 3464 // Add a real field to the current struct. 3465 bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, 3466 size_t Size) { 3467 StructInfo &Struct = StructInProgress.back(); 3468 FieldInfo &Field = Struct.addField(Name, FT_REAL, Size); 3469 RealFieldInfo &RealInfo = Field.Contents.RealInfo; 3470 3471 Field.SizeOf = 0; 3472 3473 if (parseRealInstList(Semantics, RealInfo.AsIntValues)) 3474 return true; 3475 3476 Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; 3477 Field.LengthOf = RealInfo.AsIntValues.size(); 3478 Field.SizeOf = Field.Type * Field.LengthOf; 3479 3480 const unsigned FieldEnd = Field.Offset + Field.SizeOf; 3481 if (!Struct.IsUnion) { 3482 Struct.NextOffset = FieldEnd; 3483 } 3484 Struct.Size = std::max(Struct.Size, FieldEnd); 3485 return false; 3486 } 3487 3488 /// parseDirectiveRealValue 3489 /// ::= (real4 | real8 | real10) [ expression (, expression)* ] 3490 bool MasmParser::parseDirectiveRealValue(StringRef IDVal, 3491 const fltSemantics &Semantics, 3492 size_t Size) { 3493 if (StructInProgress.empty()) { 3494 // Initialize data value. 3495 if (emitRealValues(Semantics)) 3496 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3497 } else if (addRealField("", Semantics, Size)) { 3498 return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); 3499 } 3500 return false; 3501 } 3502 3503 /// parseDirectiveNamedRealValue 3504 /// ::= name (real4 | real8 | real10) [ expression (, expression)* ] 3505 bool MasmParser::parseDirectiveNamedRealValue(StringRef TypeName, 3506 const fltSemantics &Semantics, 3507 unsigned Size, StringRef Name, 3508 SMLoc NameLoc) { 3509 if (StructInProgress.empty()) { 3510 // Initialize named data value. 3511 MCSymbol *Sym = getContext().parseSymbol(Name); 3512 getStreamer().emitLabel(Sym); 3513 unsigned Count; 3514 if (emitRealValues(Semantics, &Count)) 3515 return addErrorSuffix(" in '" + TypeName + "' directive"); 3516 3517 AsmTypeInfo Type; 3518 Type.Name = TypeName; 3519 Type.Size = Size * Count; 3520 Type.ElementSize = Size; 3521 Type.Length = Count; 3522 KnownType[Name.lower()] = Type; 3523 } else if (addRealField(Name, Semantics, Size)) { 3524 return addErrorSuffix(" in '" + TypeName + "' directive"); 3525 } 3526 return false; 3527 } 3528 3529 bool MasmParser::parseOptionalAngleBracketOpen() { 3530 const AsmToken Tok = getTok(); 3531 if (parseOptionalToken(AsmToken::LessLess)) { 3532 AngleBracketDepth++; 3533 Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1))); 3534 return true; 3535 } else if (parseOptionalToken(AsmToken::LessGreater)) { 3536 AngleBracketDepth++; 3537 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1))); 3538 return true; 3539 } else if (parseOptionalToken(AsmToken::Less)) { 3540 AngleBracketDepth++; 3541 return true; 3542 } 3543 3544 return false; 3545 } 3546 3547 bool MasmParser::parseAngleBracketClose(const Twine &Msg) { 3548 const AsmToken Tok = getTok(); 3549 if (parseOptionalToken(AsmToken::GreaterGreater)) { 3550 Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1))); 3551 } else if (parseToken(AsmToken::Greater, Msg)) { 3552 return true; 3553 } 3554 AngleBracketDepth--; 3555 return false; 3556 } 3557 3558 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 3559 const IntFieldInfo &Contents, 3560 FieldInitializer &Initializer) { 3561 SMLoc Loc = getTok().getLoc(); 3562 3563 SmallVector<const MCExpr *, 1> Values; 3564 if (parseOptionalToken(AsmToken::LCurly)) { 3565 if (Field.LengthOf == 1 && Field.Type > 1) 3566 return Error(Loc, "Cannot initialize scalar field with array value"); 3567 if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) || 3568 parseToken(AsmToken::RCurly)) 3569 return true; 3570 } else if (parseOptionalAngleBracketOpen()) { 3571 if (Field.LengthOf == 1 && Field.Type > 1) 3572 return Error(Loc, "Cannot initialize scalar field with array value"); 3573 if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) || 3574 parseAngleBracketClose()) 3575 return true; 3576 } else if (Field.LengthOf > 1 && Field.Type > 1) { 3577 return Error(Loc, "Cannot initialize array field with scalar value"); 3578 } else if (parseScalarInitializer(Field.Type, Values, 3579 /*StringPadLength=*/Field.LengthOf)) { 3580 return true; 3581 } 3582 3583 if (Values.size() > Field.LengthOf) { 3584 return Error(Loc, "Initializer too long for field; expected at most " + 3585 std::to_string(Field.LengthOf) + " elements, got " + 3586 std::to_string(Values.size())); 3587 } 3588 // Default-initialize all remaining values. 3589 Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end()); 3590 3591 Initializer = FieldInitializer(std::move(Values)); 3592 return false; 3593 } 3594 3595 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 3596 const RealFieldInfo &Contents, 3597 FieldInitializer &Initializer) { 3598 const fltSemantics *Semantics; 3599 switch (Field.Type) { 3600 case 4: 3601 Semantics = &APFloat::IEEEsingle(); 3602 break; 3603 case 8: 3604 Semantics = &APFloat::IEEEdouble(); 3605 break; 3606 case 10: 3607 Semantics = &APFloat::x87DoubleExtended(); 3608 break; 3609 default: 3610 llvm_unreachable("unknown real field type"); 3611 } 3612 3613 SMLoc Loc = getTok().getLoc(); 3614 3615 SmallVector<APInt, 1> AsIntValues; 3616 if (parseOptionalToken(AsmToken::LCurly)) { 3617 if (Field.LengthOf == 1) 3618 return Error(Loc, "Cannot initialize scalar field with array value"); 3619 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::RCurly) || 3620 parseToken(AsmToken::RCurly)) 3621 return true; 3622 } else if (parseOptionalAngleBracketOpen()) { 3623 if (Field.LengthOf == 1) 3624 return Error(Loc, "Cannot initialize scalar field with array value"); 3625 if (parseRealInstList(*Semantics, AsIntValues, AsmToken::Greater) || 3626 parseAngleBracketClose()) 3627 return true; 3628 } else if (Field.LengthOf > 1) { 3629 return Error(Loc, "Cannot initialize array field with scalar value"); 3630 } else { 3631 AsIntValues.emplace_back(); 3632 if (parseRealValue(*Semantics, AsIntValues.back())) 3633 return true; 3634 } 3635 3636 if (AsIntValues.size() > Field.LengthOf) { 3637 return Error(Loc, "Initializer too long for field; expected at most " + 3638 std::to_string(Field.LengthOf) + " elements, got " + 3639 std::to_string(AsIntValues.size())); 3640 } 3641 // Default-initialize all remaining values. 3642 AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(), 3643 Contents.AsIntValues.end()); 3644 3645 Initializer = FieldInitializer(std::move(AsIntValues)); 3646 return false; 3647 } 3648 3649 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 3650 const StructFieldInfo &Contents, 3651 FieldInitializer &Initializer) { 3652 SMLoc Loc = getTok().getLoc(); 3653 3654 std::vector<StructInitializer> Initializers; 3655 if (Field.LengthOf > 1) { 3656 if (parseOptionalToken(AsmToken::LCurly)) { 3657 if (parseStructInstList(Contents.Structure, Initializers, 3658 AsmToken::RCurly) || 3659 parseToken(AsmToken::RCurly)) 3660 return true; 3661 } else if (parseOptionalAngleBracketOpen()) { 3662 if (parseStructInstList(Contents.Structure, Initializers, 3663 AsmToken::Greater) || 3664 parseAngleBracketClose()) 3665 return true; 3666 } else { 3667 return Error(Loc, "Cannot initialize array field with scalar value"); 3668 } 3669 } else { 3670 Initializers.emplace_back(); 3671 if (parseStructInitializer(Contents.Structure, Initializers.back())) 3672 return true; 3673 } 3674 3675 if (Initializers.size() > Field.LengthOf) { 3676 return Error(Loc, "Initializer too long for field; expected at most " + 3677 std::to_string(Field.LengthOf) + " elements, got " + 3678 std::to_string(Initializers.size())); 3679 } 3680 // Default-initialize all remaining values. 3681 llvm::append_range(Initializers, llvm::drop_begin(Contents.Initializers, 3682 Initializers.size())); 3683 3684 Initializer = FieldInitializer(std::move(Initializers), Contents.Structure); 3685 return false; 3686 } 3687 3688 bool MasmParser::parseFieldInitializer(const FieldInfo &Field, 3689 FieldInitializer &Initializer) { 3690 switch (Field.Contents.FT) { 3691 case FT_INTEGRAL: 3692 return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer); 3693 case FT_REAL: 3694 return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer); 3695 case FT_STRUCT: 3696 return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer); 3697 } 3698 llvm_unreachable("Unhandled FieldType enum"); 3699 } 3700 3701 bool MasmParser::parseStructInitializer(const StructInfo &Structure, 3702 StructInitializer &Initializer) { 3703 const AsmToken FirstToken = getTok(); 3704 3705 std::optional<AsmToken::TokenKind> EndToken; 3706 if (parseOptionalToken(AsmToken::LCurly)) { 3707 EndToken = AsmToken::RCurly; 3708 } else if (parseOptionalAngleBracketOpen()) { 3709 EndToken = AsmToken::Greater; 3710 AngleBracketDepth++; 3711 } else if (FirstToken.is(AsmToken::Identifier) && 3712 FirstToken.getString() == "?") { 3713 // ? initializer; leave EndToken uninitialized to treat as empty. 3714 if (parseToken(AsmToken::Identifier)) 3715 return true; 3716 } else { 3717 return Error(FirstToken.getLoc(), "Expected struct initializer"); 3718 } 3719 3720 auto &FieldInitializers = Initializer.FieldInitializers; 3721 size_t FieldIndex = 0; 3722 if (EndToken) { 3723 // Initialize all fields with given initializers. 3724 while (getTok().isNot(*EndToken) && FieldIndex < Structure.Fields.size()) { 3725 const FieldInfo &Field = Structure.Fields[FieldIndex++]; 3726 if (parseOptionalToken(AsmToken::Comma)) { 3727 // Empty initializer; use the default and continue. (Also, allow line 3728 // continuation.) 3729 FieldInitializers.push_back(Field.Contents); 3730 parseOptionalToken(AsmToken::EndOfStatement); 3731 continue; 3732 } 3733 FieldInitializers.emplace_back(Field.Contents.FT); 3734 if (parseFieldInitializer(Field, FieldInitializers.back())) 3735 return true; 3736 3737 // Continue if we see a comma. (Also, allow line continuation.) 3738 SMLoc CommaLoc = getTok().getLoc(); 3739 if (!parseOptionalToken(AsmToken::Comma)) 3740 break; 3741 if (FieldIndex == Structure.Fields.size()) 3742 return Error(CommaLoc, "'" + Structure.Name + 3743 "' initializer initializes too many fields"); 3744 parseOptionalToken(AsmToken::EndOfStatement); 3745 } 3746 } 3747 // Default-initialize all remaining fields. 3748 for (const FieldInfo &Field : llvm::drop_begin(Structure.Fields, FieldIndex)) 3749 FieldInitializers.push_back(Field.Contents); 3750 3751 if (EndToken) { 3752 if (*EndToken == AsmToken::Greater) 3753 return parseAngleBracketClose(); 3754 3755 return parseToken(*EndToken); 3756 } 3757 3758 return false; 3759 } 3760 3761 bool MasmParser::parseStructInstList( 3762 const StructInfo &Structure, std::vector<StructInitializer> &Initializers, 3763 const AsmToken::TokenKind EndToken) { 3764 while (getTok().isNot(EndToken) || 3765 (EndToken == AsmToken::Greater && 3766 getTok().isNot(AsmToken::GreaterGreater))) { 3767 const AsmToken NextTok = peekTok(); 3768 if (NextTok.is(AsmToken::Identifier) && 3769 NextTok.getString().equals_insensitive("dup")) { 3770 const MCExpr *Value; 3771 if (parseExpression(Value) || parseToken(AsmToken::Identifier)) 3772 return true; 3773 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 3774 if (!MCE) 3775 return Error(Value->getLoc(), 3776 "cannot repeat value a non-constant number of times"); 3777 const int64_t Repetitions = MCE->getValue(); 3778 if (Repetitions < 0) 3779 return Error(Value->getLoc(), 3780 "cannot repeat value a negative number of times"); 3781 3782 std::vector<StructInitializer> DuplicatedValues; 3783 if (parseToken(AsmToken::LParen, 3784 "parentheses required for 'dup' contents") || 3785 parseStructInstList(Structure, DuplicatedValues) || parseRParen()) 3786 return true; 3787 3788 for (int i = 0; i < Repetitions; ++i) 3789 llvm::append_range(Initializers, DuplicatedValues); 3790 } else { 3791 Initializers.emplace_back(); 3792 if (parseStructInitializer(Structure, Initializers.back())) 3793 return true; 3794 } 3795 3796 // Continue if we see a comma. (Also, allow line continuation.) 3797 if (!parseOptionalToken(AsmToken::Comma)) 3798 break; 3799 parseOptionalToken(AsmToken::EndOfStatement); 3800 } 3801 3802 return false; 3803 } 3804 3805 bool MasmParser::emitFieldValue(const FieldInfo &Field, 3806 const IntFieldInfo &Contents) { 3807 // Default-initialize all values. 3808 for (const MCExpr *Value : Contents.Values) { 3809 if (emitIntValue(Value, Field.Type)) 3810 return true; 3811 } 3812 return false; 3813 } 3814 3815 bool MasmParser::emitFieldValue(const FieldInfo &Field, 3816 const RealFieldInfo &Contents) { 3817 for (const APInt &AsInt : Contents.AsIntValues) { 3818 getStreamer().emitIntValue(AsInt.getLimitedValue(), 3819 AsInt.getBitWidth() / 8); 3820 } 3821 return false; 3822 } 3823 3824 bool MasmParser::emitFieldValue(const FieldInfo &Field, 3825 const StructFieldInfo &Contents) { 3826 for (const auto &Initializer : Contents.Initializers) { 3827 size_t Index = 0, Offset = 0; 3828 for (const auto &SubField : Contents.Structure.Fields) { 3829 getStreamer().emitZeros(SubField.Offset - Offset); 3830 Offset = SubField.Offset + SubField.SizeOf; 3831 emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]); 3832 } 3833 } 3834 return false; 3835 } 3836 3837 bool MasmParser::emitFieldValue(const FieldInfo &Field) { 3838 switch (Field.Contents.FT) { 3839 case FT_INTEGRAL: 3840 return emitFieldValue(Field, Field.Contents.IntInfo); 3841 case FT_REAL: 3842 return emitFieldValue(Field, Field.Contents.RealInfo); 3843 case FT_STRUCT: 3844 return emitFieldValue(Field, Field.Contents.StructInfo); 3845 } 3846 llvm_unreachable("Unhandled FieldType enum"); 3847 } 3848 3849 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 3850 const IntFieldInfo &Contents, 3851 const IntFieldInfo &Initializer) { 3852 for (const auto &Value : Initializer.Values) { 3853 if (emitIntValue(Value, Field.Type)) 3854 return true; 3855 } 3856 // Default-initialize all remaining values. 3857 for (const auto &Value : 3858 llvm::drop_begin(Contents.Values, Initializer.Values.size())) { 3859 if (emitIntValue(Value, Field.Type)) 3860 return true; 3861 } 3862 return false; 3863 } 3864 3865 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 3866 const RealFieldInfo &Contents, 3867 const RealFieldInfo &Initializer) { 3868 for (const auto &AsInt : Initializer.AsIntValues) { 3869 getStreamer().emitIntValue(AsInt.getLimitedValue(), 3870 AsInt.getBitWidth() / 8); 3871 } 3872 // Default-initialize all remaining values. 3873 for (const auto &AsInt : 3874 llvm::drop_begin(Contents.AsIntValues, Initializer.AsIntValues.size())) { 3875 getStreamer().emitIntValue(AsInt.getLimitedValue(), 3876 AsInt.getBitWidth() / 8); 3877 } 3878 return false; 3879 } 3880 3881 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 3882 const StructFieldInfo &Contents, 3883 const StructFieldInfo &Initializer) { 3884 for (const auto &Init : Initializer.Initializers) { 3885 if (emitStructInitializer(Contents.Structure, Init)) 3886 return true; 3887 } 3888 // Default-initialize all remaining values. 3889 for (const auto &Init : llvm::drop_begin(Contents.Initializers, 3890 Initializer.Initializers.size())) { 3891 if (emitStructInitializer(Contents.Structure, Init)) 3892 return true; 3893 } 3894 return false; 3895 } 3896 3897 bool MasmParser::emitFieldInitializer(const FieldInfo &Field, 3898 const FieldInitializer &Initializer) { 3899 switch (Field.Contents.FT) { 3900 case FT_INTEGRAL: 3901 return emitFieldInitializer(Field, Field.Contents.IntInfo, 3902 Initializer.IntInfo); 3903 case FT_REAL: 3904 return emitFieldInitializer(Field, Field.Contents.RealInfo, 3905 Initializer.RealInfo); 3906 case FT_STRUCT: 3907 return emitFieldInitializer(Field, Field.Contents.StructInfo, 3908 Initializer.StructInfo); 3909 } 3910 llvm_unreachable("Unhandled FieldType enum"); 3911 } 3912 3913 bool MasmParser::emitStructInitializer(const StructInfo &Structure, 3914 const StructInitializer &Initializer) { 3915 if (!Structure.Initializable) 3916 return Error(getLexer().getLoc(), 3917 "cannot initialize a value of type '" + Structure.Name + 3918 "'; 'org' was used in the type's declaration"); 3919 size_t Index = 0, Offset = 0; 3920 for (const auto &Init : Initializer.FieldInitializers) { 3921 const auto &Field = Structure.Fields[Index++]; 3922 getStreamer().emitZeros(Field.Offset - Offset); 3923 Offset = Field.Offset + Field.SizeOf; 3924 if (emitFieldInitializer(Field, Init)) 3925 return true; 3926 } 3927 // Default-initialize all remaining fields. 3928 for (const auto &Field : llvm::drop_begin( 3929 Structure.Fields, Initializer.FieldInitializers.size())) { 3930 getStreamer().emitZeros(Field.Offset - Offset); 3931 Offset = Field.Offset + Field.SizeOf; 3932 if (emitFieldValue(Field)) 3933 return true; 3934 } 3935 // Add final padding. 3936 if (Offset != Structure.Size) 3937 getStreamer().emitZeros(Structure.Size - Offset); 3938 return false; 3939 } 3940 3941 // Set data values from initializers. 3942 bool MasmParser::emitStructValues(const StructInfo &Structure, 3943 unsigned *Count) { 3944 std::vector<StructInitializer> Initializers; 3945 if (parseStructInstList(Structure, Initializers)) 3946 return true; 3947 3948 for (const auto &Initializer : Initializers) { 3949 if (emitStructInitializer(Structure, Initializer)) 3950 return true; 3951 } 3952 3953 if (Count) 3954 *Count = Initializers.size(); 3955 return false; 3956 } 3957 3958 // Declare a field in the current struct. 3959 bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { 3960 StructInfo &OwningStruct = StructInProgress.back(); 3961 FieldInfo &Field = 3962 OwningStruct.addField(Name, FT_STRUCT, Structure.AlignmentSize); 3963 StructFieldInfo &StructInfo = Field.Contents.StructInfo; 3964 3965 StructInfo.Structure = Structure; 3966 Field.Type = Structure.Size; 3967 3968 if (parseStructInstList(Structure, StructInfo.Initializers)) 3969 return true; 3970 3971 Field.LengthOf = StructInfo.Initializers.size(); 3972 Field.SizeOf = Field.Type * Field.LengthOf; 3973 3974 const unsigned FieldEnd = Field.Offset + Field.SizeOf; 3975 if (!OwningStruct.IsUnion) { 3976 OwningStruct.NextOffset = FieldEnd; 3977 } 3978 OwningStruct.Size = std::max(OwningStruct.Size, FieldEnd); 3979 3980 return false; 3981 } 3982 3983 /// parseDirectiveStructValue 3984 /// ::= struct-id (<struct-initializer> | {struct-initializer}) 3985 /// [, (<struct-initializer> | {struct-initializer})]* 3986 bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure, 3987 StringRef Directive, SMLoc DirLoc) { 3988 if (StructInProgress.empty()) { 3989 if (emitStructValues(Structure)) 3990 return true; 3991 } else if (addStructField("", Structure)) { 3992 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 3993 } 3994 3995 return false; 3996 } 3997 3998 /// parseDirectiveNamedValue 3999 /// ::= name (byte | word | ... ) [ expression (, expression)* ] 4000 bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure, 4001 StringRef Directive, 4002 SMLoc DirLoc, StringRef Name) { 4003 if (StructInProgress.empty()) { 4004 // Initialize named data value. 4005 MCSymbol *Sym = getContext().parseSymbol(Name); 4006 getStreamer().emitLabel(Sym); 4007 unsigned Count; 4008 if (emitStructValues(Structure, &Count)) 4009 return true; 4010 AsmTypeInfo Type; 4011 Type.Name = Structure.Name; 4012 Type.Size = Structure.Size * Count; 4013 Type.ElementSize = Structure.Size; 4014 Type.Length = Count; 4015 KnownType[Name.lower()] = Type; 4016 } else if (addStructField(Name, Structure)) { 4017 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4018 } 4019 4020 return false; 4021 } 4022 4023 /// parseDirectiveStruct 4024 /// ::= <name> (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE] 4025 /// (dataDir | generalDir | offsetDir | nestedStruct)+ 4026 /// <name> ENDS 4027 ////// dataDir = data declaration 4028 ////// offsetDir = EVEN, ORG, ALIGN 4029 bool MasmParser::parseDirectiveStruct(StringRef Directive, 4030 DirectiveKind DirKind, StringRef Name, 4031 SMLoc NameLoc) { 4032 // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS 4033 // anyway, so all field accesses must be qualified. 4034 AsmToken NextTok = getTok(); 4035 int64_t AlignmentValue = 1; 4036 if (NextTok.isNot(AsmToken::Comma) && 4037 NextTok.isNot(AsmToken::EndOfStatement) && 4038 parseAbsoluteExpression(AlignmentValue)) { 4039 return addErrorSuffix(" in alignment value for '" + Twine(Directive) + 4040 "' directive"); 4041 } 4042 if (!isPowerOf2_64(AlignmentValue)) { 4043 return Error(NextTok.getLoc(), "alignment must be a power of two; was " + 4044 std::to_string(AlignmentValue)); 4045 } 4046 4047 StringRef Qualifier; 4048 SMLoc QualifierLoc; 4049 if (parseOptionalToken(AsmToken::Comma)) { 4050 QualifierLoc = getTok().getLoc(); 4051 if (parseIdentifier(Qualifier)) 4052 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4053 if (!Qualifier.equals_insensitive("nonunique")) 4054 return Error(QualifierLoc, "Unrecognized qualifier for '" + 4055 Twine(Directive) + 4056 "' directive; expected none or NONUNIQUE"); 4057 } 4058 4059 if (parseEOL()) 4060 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4061 4062 StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue); 4063 return false; 4064 } 4065 4066 /// parseDirectiveNestedStruct 4067 /// ::= (STRUC | STRUCT | UNION) [name] 4068 /// (dataDir | generalDir | offsetDir | nestedStruct)+ 4069 /// ENDS 4070 bool MasmParser::parseDirectiveNestedStruct(StringRef Directive, 4071 DirectiveKind DirKind) { 4072 if (StructInProgress.empty()) 4073 return TokError("missing name in top-level '" + Twine(Directive) + 4074 "' directive"); 4075 4076 StringRef Name; 4077 if (getTok().is(AsmToken::Identifier)) { 4078 Name = getTok().getIdentifier(); 4079 parseToken(AsmToken::Identifier); 4080 } 4081 if (parseEOL()) 4082 return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); 4083 4084 // Reserve space to ensure Alignment doesn't get invalidated when 4085 // StructInProgress grows. 4086 StructInProgress.reserve(StructInProgress.size() + 1); 4087 StructInProgress.emplace_back(Name, DirKind == DK_UNION, 4088 StructInProgress.back().Alignment); 4089 return false; 4090 } 4091 4092 bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) { 4093 if (StructInProgress.empty()) 4094 return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION"); 4095 if (StructInProgress.size() > 1) 4096 return Error(NameLoc, "unexpected name in nested ENDS directive"); 4097 if (StructInProgress.back().Name.compare_insensitive(Name)) 4098 return Error(NameLoc, "mismatched name in ENDS directive; expected '" + 4099 StructInProgress.back().Name + "'"); 4100 StructInfo Structure = StructInProgress.pop_back_val(); 4101 // Pad to make the structure's size divisible by the smaller of its alignment 4102 // and the size of its largest field. 4103 Structure.Size = llvm::alignTo( 4104 Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize)); 4105 Structs[Name.lower()] = Structure; 4106 4107 if (parseEOL()) 4108 return addErrorSuffix(" in ENDS directive"); 4109 4110 return false; 4111 } 4112 4113 bool MasmParser::parseDirectiveNestedEnds() { 4114 if (StructInProgress.empty()) 4115 return TokError("ENDS directive without matching STRUC/STRUCT/UNION"); 4116 if (StructInProgress.size() == 1) 4117 return TokError("missing name in top-level ENDS directive"); 4118 4119 if (parseEOL()) 4120 return addErrorSuffix(" in nested ENDS directive"); 4121 4122 StructInfo Structure = StructInProgress.pop_back_val(); 4123 // Pad to make the structure's size divisible by its alignment. 4124 Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment); 4125 4126 StructInfo &ParentStruct = StructInProgress.back(); 4127 if (Structure.Name.empty()) { 4128 // Anonymous substructures' fields are addressed as if they belong to the 4129 // parent structure - so we transfer them to the parent here. 4130 const size_t OldFields = ParentStruct.Fields.size(); 4131 ParentStruct.Fields.insert( 4132 ParentStruct.Fields.end(), 4133 std::make_move_iterator(Structure.Fields.begin()), 4134 std::make_move_iterator(Structure.Fields.end())); 4135 for (const auto &FieldByName : Structure.FieldsByName) { 4136 ParentStruct.FieldsByName[FieldByName.getKey()] = 4137 FieldByName.getValue() + OldFields; 4138 } 4139 4140 unsigned FirstFieldOffset = 0; 4141 if (!Structure.Fields.empty() && !ParentStruct.IsUnion) { 4142 FirstFieldOffset = llvm::alignTo( 4143 ParentStruct.NextOffset, 4144 std::min(ParentStruct.Alignment, Structure.AlignmentSize)); 4145 } 4146 4147 if (ParentStruct.IsUnion) { 4148 ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size); 4149 } else { 4150 for (auto &Field : llvm::drop_begin(ParentStruct.Fields, OldFields)) 4151 Field.Offset += FirstFieldOffset; 4152 4153 const unsigned StructureEnd = FirstFieldOffset + Structure.Size; 4154 if (!ParentStruct.IsUnion) { 4155 ParentStruct.NextOffset = StructureEnd; 4156 } 4157 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd); 4158 } 4159 } else { 4160 FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT, 4161 Structure.AlignmentSize); 4162 StructFieldInfo &StructInfo = Field.Contents.StructInfo; 4163 Field.Type = Structure.Size; 4164 Field.LengthOf = 1; 4165 Field.SizeOf = Structure.Size; 4166 4167 const unsigned StructureEnd = Field.Offset + Field.SizeOf; 4168 if (!ParentStruct.IsUnion) { 4169 ParentStruct.NextOffset = StructureEnd; 4170 } 4171 ParentStruct.Size = std::max(ParentStruct.Size, StructureEnd); 4172 4173 StructInfo.Structure = Structure; 4174 StructInfo.Initializers.emplace_back(); 4175 auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers; 4176 for (const auto &SubField : Structure.Fields) { 4177 FieldInitializers.push_back(SubField.Contents); 4178 } 4179 } 4180 4181 return false; 4182 } 4183 4184 /// parseDirectiveOrg 4185 /// ::= org expression 4186 bool MasmParser::parseDirectiveOrg() { 4187 const MCExpr *Offset; 4188 SMLoc OffsetLoc = Lexer.getLoc(); 4189 if (checkForValidSection() || parseExpression(Offset)) 4190 return true; 4191 if (parseEOL()) 4192 return addErrorSuffix(" in 'org' directive"); 4193 4194 if (StructInProgress.empty()) { 4195 // Not in a struct; change the offset for the next instruction or data 4196 if (checkForValidSection()) 4197 return addErrorSuffix(" in 'org' directive"); 4198 4199 getStreamer().emitValueToOffset(Offset, 0, OffsetLoc); 4200 } else { 4201 // Offset the next field of this struct 4202 StructInfo &Structure = StructInProgress.back(); 4203 int64_t OffsetRes; 4204 if (!Offset->evaluateAsAbsolute(OffsetRes, getStreamer().getAssemblerPtr())) 4205 return Error(OffsetLoc, 4206 "expected absolute expression in 'org' directive"); 4207 if (OffsetRes < 0) 4208 return Error( 4209 OffsetLoc, 4210 "expected non-negative value in struct's 'org' directive; was " + 4211 std::to_string(OffsetRes)); 4212 Structure.NextOffset = static_cast<unsigned>(OffsetRes); 4213 4214 // ORG-affected structures cannot be initialized 4215 Structure.Initializable = false; 4216 } 4217 4218 return false; 4219 } 4220 4221 bool MasmParser::emitAlignTo(int64_t Alignment) { 4222 if (StructInProgress.empty()) { 4223 // Not in a struct; align the next instruction or data 4224 if (checkForValidSection()) 4225 return true; 4226 4227 // Check whether we should use optimal code alignment for this align 4228 // directive. 4229 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 4230 assert(Section && "must have section to emit alignment"); 4231 if (Section->useCodeAlign()) { 4232 getStreamer().emitCodeAlignment(Align(Alignment), 4233 &getTargetParser().getSTI(), 4234 /*MaxBytesToEmit=*/0); 4235 } else { 4236 // FIXME: Target specific behavior about how the "extra" bytes are filled. 4237 getStreamer().emitValueToAlignment(Align(Alignment), /*Value=*/0, 4238 /*ValueSize=*/1, 4239 /*MaxBytesToEmit=*/0); 4240 } 4241 } else { 4242 // Align the next field of this struct 4243 StructInfo &Structure = StructInProgress.back(); 4244 Structure.NextOffset = llvm::alignTo(Structure.NextOffset, Alignment); 4245 } 4246 4247 return false; 4248 } 4249 4250 /// parseDirectiveAlign 4251 /// ::= align expression 4252 bool MasmParser::parseDirectiveAlign() { 4253 SMLoc AlignmentLoc = getLexer().getLoc(); 4254 int64_t Alignment; 4255 4256 // Ignore empty 'align' directives. 4257 if (getTok().is(AsmToken::EndOfStatement)) { 4258 return Warning(AlignmentLoc, 4259 "align directive with no operand is ignored") && 4260 parseEOL(); 4261 } 4262 if (parseAbsoluteExpression(Alignment) || parseEOL()) 4263 return addErrorSuffix(" in align directive"); 4264 4265 // Always emit an alignment here even if we throw an error. 4266 bool ReturnVal = false; 4267 4268 // Reject alignments that aren't either a power of two or zero, for ML.exe 4269 // compatibility. Alignment of zero is silently rounded up to one. 4270 if (Alignment == 0) 4271 Alignment = 1; 4272 if (!isPowerOf2_64(Alignment)) 4273 ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2; was " + 4274 std::to_string(Alignment)); 4275 4276 if (emitAlignTo(Alignment)) 4277 ReturnVal |= addErrorSuffix(" in align directive"); 4278 4279 return ReturnVal; 4280 } 4281 4282 /// parseDirectiveEven 4283 /// ::= even 4284 bool MasmParser::parseDirectiveEven() { 4285 if (parseEOL() || emitAlignTo(2)) 4286 return addErrorSuffix(" in even directive"); 4287 4288 return false; 4289 } 4290 4291 /// parseDirectiveMacro 4292 /// ::= name macro [parameters] 4293 /// ["LOCAL" identifiers] 4294 /// parameters ::= parameter [, parameter]* 4295 /// parameter ::= name ":" qualifier 4296 /// qualifier ::= "req" | "vararg" | "=" macro_argument 4297 bool MasmParser::parseDirectiveMacro(StringRef Name, SMLoc NameLoc) { 4298 MCAsmMacroParameters Parameters; 4299 while (getLexer().isNot(AsmToken::EndOfStatement)) { 4300 if (!Parameters.empty() && Parameters.back().Vararg) 4301 return Error(Lexer.getLoc(), 4302 "Vararg parameter '" + Parameters.back().Name + 4303 "' should be last in the list of parameters"); 4304 4305 MCAsmMacroParameter Parameter; 4306 if (parseIdentifier(Parameter.Name)) 4307 return TokError("expected identifier in 'macro' directive"); 4308 4309 // Emit an error if two (or more) named parameters share the same name. 4310 for (const MCAsmMacroParameter& CurrParam : Parameters) 4311 if (CurrParam.Name.equals_insensitive(Parameter.Name)) 4312 return TokError("macro '" + Name + "' has multiple parameters" 4313 " named '" + Parameter.Name + "'"); 4314 4315 if (Lexer.is(AsmToken::Colon)) { 4316 Lex(); // consume ':' 4317 4318 if (parseOptionalToken(AsmToken::Equal)) { 4319 // Default value 4320 SMLoc ParamLoc; 4321 4322 ParamLoc = Lexer.getLoc(); 4323 if (parseMacroArgument(nullptr, Parameter.Value)) 4324 return true; 4325 } else { 4326 SMLoc QualLoc; 4327 StringRef Qualifier; 4328 4329 QualLoc = Lexer.getLoc(); 4330 if (parseIdentifier(Qualifier)) 4331 return Error(QualLoc, "missing parameter qualifier for " 4332 "'" + 4333 Parameter.Name + "' in macro '" + Name + 4334 "'"); 4335 4336 if (Qualifier.equals_insensitive("req")) 4337 Parameter.Required = true; 4338 else if (Qualifier.equals_insensitive("vararg")) 4339 Parameter.Vararg = true; 4340 else 4341 return Error(QualLoc, 4342 Qualifier + " is not a valid parameter qualifier for '" + 4343 Parameter.Name + "' in macro '" + Name + "'"); 4344 } 4345 } 4346 4347 Parameters.push_back(std::move(Parameter)); 4348 4349 if (getLexer().is(AsmToken::Comma)) 4350 Lex(); 4351 } 4352 4353 // Eat just the end of statement. 4354 Lexer.Lex(); 4355 4356 std::vector<std::string> Locals; 4357 if (getTok().is(AsmToken::Identifier) && 4358 getTok().getIdentifier().equals_insensitive("local")) { 4359 Lex(); // Eat the LOCAL directive. 4360 4361 StringRef ID; 4362 while (true) { 4363 if (parseIdentifier(ID)) 4364 return true; 4365 Locals.push_back(ID.lower()); 4366 4367 // If we see a comma, continue (and allow line continuation). 4368 if (!parseOptionalToken(AsmToken::Comma)) 4369 break; 4370 parseOptionalToken(AsmToken::EndOfStatement); 4371 } 4372 } 4373 4374 // Consuming deferred text, so use Lexer.Lex to ignore Lexing Errors. 4375 AsmToken EndToken, StartToken = getTok(); 4376 unsigned MacroDepth = 0; 4377 bool IsMacroFunction = false; 4378 // Lex the macro definition. 4379 while (true) { 4380 // Ignore Lexing errors in macros. 4381 while (Lexer.is(AsmToken::Error)) { 4382 Lexer.Lex(); 4383 } 4384 4385 // Check whether we have reached the end of the file. 4386 if (getLexer().is(AsmToken::Eof)) 4387 return Error(NameLoc, "no matching 'endm' in definition"); 4388 4389 // Otherwise, check whether we have reached the 'endm'... and determine if 4390 // this is a macro function. 4391 if (getLexer().is(AsmToken::Identifier)) { 4392 if (getTok().getIdentifier().equals_insensitive("endm")) { 4393 if (MacroDepth == 0) { // Outermost macro. 4394 EndToken = getTok(); 4395 Lexer.Lex(); 4396 if (getLexer().isNot(AsmToken::EndOfStatement)) 4397 return TokError("unexpected token in '" + EndToken.getIdentifier() + 4398 "' directive"); 4399 break; 4400 } else { 4401 // Otherwise we just found the end of an inner macro. 4402 --MacroDepth; 4403 } 4404 } else if (getTok().getIdentifier().equals_insensitive("exitm")) { 4405 if (MacroDepth == 0 && peekTok().isNot(AsmToken::EndOfStatement)) { 4406 IsMacroFunction = true; 4407 } 4408 } else if (isMacroLikeDirective()) { 4409 // We allow nested macros. Those aren't instantiated until the 4410 // outermost macro is expanded so just ignore them for now. 4411 ++MacroDepth; 4412 } 4413 } 4414 4415 // Otherwise, scan til the end of the statement. 4416 eatToEndOfStatement(); 4417 } 4418 4419 if (getContext().lookupMacro(Name.lower())) { 4420 return Error(NameLoc, "macro '" + Name + "' is already defined"); 4421 } 4422 4423 const char *BodyStart = StartToken.getLoc().getPointer(); 4424 const char *BodyEnd = EndToken.getLoc().getPointer(); 4425 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); 4426 MCAsmMacro Macro(Name, Body, std::move(Parameters), std::move(Locals), 4427 IsMacroFunction); 4428 DEBUG_WITH_TYPE("asm-macros", dbgs() << "Defining new macro:\n"; 4429 Macro.dump()); 4430 getContext().defineMacro(Name.lower(), std::move(Macro)); 4431 return false; 4432 } 4433 4434 /// parseDirectiveExitMacro 4435 /// ::= "exitm" [textitem] 4436 bool MasmParser::parseDirectiveExitMacro(SMLoc DirectiveLoc, 4437 StringRef Directive, 4438 std::string &Value) { 4439 SMLoc EndLoc = getTok().getLoc(); 4440 if (getTok().isNot(AsmToken::EndOfStatement) && parseTextItem(Value)) 4441 return Error(EndLoc, 4442 "unable to parse text item in '" + Directive + "' directive"); 4443 eatToEndOfStatement(); 4444 4445 if (!isInsideMacroInstantiation()) 4446 return TokError("unexpected '" + Directive + "' in file, " 4447 "no current macro definition"); 4448 4449 // Exit all conditionals that are active in the current macro. 4450 while (TheCondStack.size() != ActiveMacros.back()->CondStackDepth) { 4451 TheCondState = TheCondStack.back(); 4452 TheCondStack.pop_back(); 4453 } 4454 4455 handleMacroExit(); 4456 return false; 4457 } 4458 4459 /// parseDirectiveEndMacro 4460 /// ::= endm 4461 bool MasmParser::parseDirectiveEndMacro(StringRef Directive) { 4462 if (getLexer().isNot(AsmToken::EndOfStatement)) 4463 return TokError("unexpected token in '" + Directive + "' directive"); 4464 4465 // If we are inside a macro instantiation, terminate the current 4466 // instantiation. 4467 if (isInsideMacroInstantiation()) { 4468 handleMacroExit(); 4469 return false; 4470 } 4471 4472 // Otherwise, this .endmacro is a stray entry in the file; well formed 4473 // .endmacro directives are handled during the macro definition parsing. 4474 return TokError("unexpected '" + Directive + "' in file, " 4475 "no current macro definition"); 4476 } 4477 4478 /// parseDirectivePurgeMacro 4479 /// ::= purge identifier ( , identifier )* 4480 bool MasmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) { 4481 StringRef Name; 4482 while (true) { 4483 SMLoc NameLoc; 4484 if (parseTokenLoc(NameLoc) || 4485 check(parseIdentifier(Name), NameLoc, 4486 "expected identifier in 'purge' directive")) 4487 return true; 4488 4489 DEBUG_WITH_TYPE("asm-macros", dbgs() 4490 << "Un-defining macro: " << Name << "\n"); 4491 if (!getContext().lookupMacro(Name.lower())) 4492 return Error(NameLoc, "macro '" + Name + "' is not defined"); 4493 getContext().undefineMacro(Name.lower()); 4494 4495 if (!parseOptionalToken(AsmToken::Comma)) 4496 break; 4497 parseOptionalToken(AsmToken::EndOfStatement); 4498 } 4499 4500 return false; 4501 } 4502 4503 bool MasmParser::parseDirectiveExtern() { 4504 // .extern is the default - but we still need to take any provided type info. 4505 auto parseOp = [&]() -> bool { 4506 MCSymbol *Sym; 4507 SMLoc NameLoc = getTok().getLoc(); 4508 if (parseSymbol(Sym)) 4509 return Error(NameLoc, "expected name"); 4510 if (parseToken(AsmToken::Colon)) 4511 return true; 4512 4513 StringRef TypeName; 4514 SMLoc TypeLoc = getTok().getLoc(); 4515 if (parseIdentifier(TypeName)) 4516 return Error(TypeLoc, "expected type"); 4517 if (!TypeName.equals_insensitive("proc")) { 4518 AsmTypeInfo Type; 4519 if (lookUpType(TypeName, Type)) 4520 return Error(TypeLoc, "unrecognized type"); 4521 KnownType[Sym->getName().lower()] = Type; 4522 } 4523 4524 Sym->setExternal(true); 4525 getStreamer().emitSymbolAttribute(Sym, MCSA_Extern); 4526 4527 return false; 4528 }; 4529 4530 if (parseMany(parseOp)) 4531 return addErrorSuffix(" in directive 'extern'"); 4532 return false; 4533 } 4534 4535 /// parseDirectiveSymbolAttribute 4536 /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] 4537 bool MasmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { 4538 auto parseOp = [&]() -> bool { 4539 SMLoc Loc = getTok().getLoc(); 4540 MCSymbol *Sym; 4541 if (parseSymbol(Sym)) 4542 return Error(Loc, "expected identifier"); 4543 4544 // Assembler local symbols don't make any sense here. Complain loudly. 4545 if (Sym->isTemporary()) 4546 return Error(Loc, "non-local symbol required"); 4547 4548 if (!getStreamer().emitSymbolAttribute(Sym, Attr)) 4549 return Error(Loc, "unable to emit symbol attribute"); 4550 return false; 4551 }; 4552 4553 if (parseMany(parseOp)) 4554 return addErrorSuffix(" in directive"); 4555 return false; 4556 } 4557 4558 /// parseDirectiveComm 4559 /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] 4560 bool MasmParser::parseDirectiveComm(bool IsLocal) { 4561 if (checkForValidSection()) 4562 return true; 4563 4564 SMLoc IDLoc = getLexer().getLoc(); 4565 MCSymbol *Sym; 4566 if (parseSymbol(Sym)) 4567 return TokError("expected identifier in directive"); 4568 4569 if (getLexer().isNot(AsmToken::Comma)) 4570 return TokError("unexpected token in directive"); 4571 Lex(); 4572 4573 int64_t Size; 4574 SMLoc SizeLoc = getLexer().getLoc(); 4575 if (parseAbsoluteExpression(Size)) 4576 return true; 4577 4578 int64_t Pow2Alignment = 0; 4579 SMLoc Pow2AlignmentLoc; 4580 if (getLexer().is(AsmToken::Comma)) { 4581 Lex(); 4582 Pow2AlignmentLoc = getLexer().getLoc(); 4583 if (parseAbsoluteExpression(Pow2Alignment)) 4584 return true; 4585 4586 LCOMM::LCOMMType LCOMM = Lexer.getMAI().getLCOMMDirectiveAlignmentType(); 4587 if (IsLocal && LCOMM == LCOMM::NoAlignment) 4588 return Error(Pow2AlignmentLoc, "alignment not supported on this target"); 4589 4590 // If this target takes alignments in bytes (not log) validate and convert. 4591 if ((!IsLocal && Lexer.getMAI().getCOMMDirectiveAlignmentIsInBytes()) || 4592 (IsLocal && LCOMM == LCOMM::ByteAlignment)) { 4593 if (!isPowerOf2_64(Pow2Alignment)) 4594 return Error(Pow2AlignmentLoc, "alignment must be a power of 2"); 4595 Pow2Alignment = Log2_64(Pow2Alignment); 4596 } 4597 } 4598 4599 if (parseEOL()) 4600 return true; 4601 4602 // NOTE: a size of zero for a .comm should create a undefined symbol 4603 // but a size of .lcomm creates a bss symbol of size zero. 4604 if (Size < 0) 4605 return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " 4606 "be less than zero"); 4607 4608 // NOTE: The alignment in the directive is a power of 2 value, the assembler 4609 // may internally end up wanting an alignment in bytes. 4610 // FIXME: Diagnose overflow. 4611 if (Pow2Alignment < 0) 4612 return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " 4613 "alignment, can't be less than zero"); 4614 4615 Sym->redefineIfPossible(); 4616 if (!Sym->isUndefined()) 4617 return Error(IDLoc, "invalid symbol redefinition"); 4618 4619 // Create the Symbol as a common or local common with Size and Pow2Alignment. 4620 if (IsLocal) { 4621 getStreamer().emitLocalCommonSymbol(Sym, Size, 4622 Align(1ULL << Pow2Alignment)); 4623 return false; 4624 } 4625 4626 getStreamer().emitCommonSymbol(Sym, Size, Align(1ULL << Pow2Alignment)); 4627 return false; 4628 } 4629 4630 /// parseDirectiveComment 4631 /// ::= comment delimiter [[text]] 4632 /// [[text]] 4633 /// [[text]] delimiter [[text]] 4634 bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) { 4635 std::string FirstLine = parseStringTo(AsmToken::EndOfStatement); 4636 size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A "); 4637 assert(DelimiterEnd != std::string::npos); 4638 StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd); 4639 if (Delimiter.empty()) 4640 return Error(DirectiveLoc, "no delimiter in 'comment' directive"); 4641 do { 4642 if (getTok().is(AsmToken::Eof)) 4643 return Error(DirectiveLoc, "unmatched delimiter in 'comment' directive"); 4644 Lex(); // eat end of statement 4645 } while ( 4646 !StringRef(parseStringTo(AsmToken::EndOfStatement)).contains(Delimiter)); 4647 return parseEOL(); 4648 } 4649 4650 /// parseDirectiveInclude 4651 /// ::= include <filename> 4652 /// | include filename 4653 bool MasmParser::parseDirectiveInclude() { 4654 // Allow the strings to have escaped octal character sequence. 4655 std::string Filename; 4656 SMLoc IncludeLoc = getTok().getLoc(); 4657 4658 if (parseAngleBracketString(Filename)) 4659 Filename = parseStringTo(AsmToken::EndOfStatement); 4660 if (check(Filename.empty(), "missing filename in 'include' directive") || 4661 check(getTok().isNot(AsmToken::EndOfStatement), 4662 "unexpected token in 'include' directive") || 4663 // Attempt to switch the lexer to the included file before consuming the 4664 // end of statement to avoid losing it when we switch. 4665 check(enterIncludeFile(Filename), IncludeLoc, 4666 "Could not find include file '" + Filename + "'")) 4667 return true; 4668 4669 return false; 4670 } 4671 4672 /// parseDirectiveIf 4673 /// ::= .if{,eq,ge,gt,le,lt,ne} expression 4674 bool MasmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) { 4675 TheCondStack.push_back(TheCondState); 4676 TheCondState.TheCond = AsmCond::IfCond; 4677 if (TheCondState.Ignore) { 4678 eatToEndOfStatement(); 4679 } else { 4680 int64_t ExprValue; 4681 if (parseAbsoluteExpression(ExprValue) || parseEOL()) 4682 return true; 4683 4684 switch (DirKind) { 4685 default: 4686 llvm_unreachable("unsupported directive"); 4687 case DK_IF: 4688 break; 4689 case DK_IFE: 4690 ExprValue = ExprValue == 0; 4691 break; 4692 } 4693 4694 TheCondState.CondMet = ExprValue; 4695 TheCondState.Ignore = !TheCondState.CondMet; 4696 } 4697 4698 return false; 4699 } 4700 4701 /// parseDirectiveIfb 4702 /// ::= .ifb textitem 4703 bool MasmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { 4704 TheCondStack.push_back(TheCondState); 4705 TheCondState.TheCond = AsmCond::IfCond; 4706 4707 if (TheCondState.Ignore) { 4708 eatToEndOfStatement(); 4709 } else { 4710 std::string Str; 4711 if (parseTextItem(Str)) 4712 return TokError("expected text item parameter for 'ifb' directive"); 4713 4714 if (parseEOL()) 4715 return true; 4716 4717 TheCondState.CondMet = ExpectBlank == Str.empty(); 4718 TheCondState.Ignore = !TheCondState.CondMet; 4719 } 4720 4721 return false; 4722 } 4723 4724 /// parseDirectiveIfidn 4725 /// ::= ifidn textitem, textitem 4726 bool MasmParser::parseDirectiveIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 4727 bool CaseInsensitive) { 4728 std::string String1, String2; 4729 4730 if (parseTextItem(String1)) { 4731 if (ExpectEqual) 4732 return TokError("expected text item parameter for 'ifidn' directive"); 4733 return TokError("expected text item parameter for 'ifdif' directive"); 4734 } 4735 4736 if (Lexer.isNot(AsmToken::Comma)) { 4737 if (ExpectEqual) 4738 return TokError( 4739 "expected comma after first string for 'ifidn' directive"); 4740 return TokError("expected comma after first string for 'ifdif' directive"); 4741 } 4742 Lex(); 4743 4744 if (parseTextItem(String2)) { 4745 if (ExpectEqual) 4746 return TokError("expected text item parameter for 'ifidn' directive"); 4747 return TokError("expected text item parameter for 'ifdif' directive"); 4748 } 4749 4750 TheCondStack.push_back(TheCondState); 4751 TheCondState.TheCond = AsmCond::IfCond; 4752 if (CaseInsensitive) 4753 TheCondState.CondMet = 4754 ExpectEqual == (StringRef(String1).equals_insensitive(String2)); 4755 else 4756 TheCondState.CondMet = ExpectEqual == (String1 == String2); 4757 TheCondState.Ignore = !TheCondState.CondMet; 4758 4759 return false; 4760 } 4761 4762 /// parseDirectiveIfdef 4763 /// ::= ifdef symbol 4764 /// | ifdef variable 4765 bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { 4766 TheCondStack.push_back(TheCondState); 4767 TheCondState.TheCond = AsmCond::IfCond; 4768 4769 if (TheCondState.Ignore) { 4770 eatToEndOfStatement(); 4771 } else { 4772 bool is_defined = false; 4773 MCRegister Reg; 4774 SMLoc StartLoc, EndLoc; 4775 is_defined = 4776 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess(); 4777 if (!is_defined) { 4778 StringRef Name; 4779 if (check(parseIdentifier(Name), "expected identifier after 'ifdef'") || 4780 parseEOL()) 4781 return true; 4782 4783 if (BuiltinSymbolMap.contains(Name.lower())) { 4784 is_defined = true; 4785 } else if (Variables.contains(Name.lower())) { 4786 is_defined = true; 4787 } else { 4788 MCSymbol *Sym = getContext().lookupSymbol(Name.lower()); 4789 is_defined = (Sym && !Sym->isUndefined()); 4790 } 4791 } 4792 4793 TheCondState.CondMet = (is_defined == expect_defined); 4794 TheCondState.Ignore = !TheCondState.CondMet; 4795 } 4796 4797 return false; 4798 } 4799 4800 /// parseDirectiveElseIf 4801 /// ::= elseif expression 4802 bool MasmParser::parseDirectiveElseIf(SMLoc DirectiveLoc, 4803 DirectiveKind DirKind) { 4804 if (TheCondState.TheCond != AsmCond::IfCond && 4805 TheCondState.TheCond != AsmCond::ElseIfCond) 4806 return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an" 4807 " .if or an .elseif"); 4808 TheCondState.TheCond = AsmCond::ElseIfCond; 4809 4810 bool LastIgnoreState = false; 4811 if (!TheCondStack.empty()) 4812 LastIgnoreState = TheCondStack.back().Ignore; 4813 if (LastIgnoreState || TheCondState.CondMet) { 4814 TheCondState.Ignore = true; 4815 eatToEndOfStatement(); 4816 } else { 4817 int64_t ExprValue; 4818 if (parseAbsoluteExpression(ExprValue)) 4819 return true; 4820 4821 if (parseEOL()) 4822 return true; 4823 4824 switch (DirKind) { 4825 default: 4826 llvm_unreachable("unsupported directive"); 4827 case DK_ELSEIF: 4828 break; 4829 case DK_ELSEIFE: 4830 ExprValue = ExprValue == 0; 4831 break; 4832 } 4833 4834 TheCondState.CondMet = ExprValue; 4835 TheCondState.Ignore = !TheCondState.CondMet; 4836 } 4837 4838 return false; 4839 } 4840 4841 /// parseDirectiveElseIfb 4842 /// ::= elseifb textitem 4843 bool MasmParser::parseDirectiveElseIfb(SMLoc DirectiveLoc, bool ExpectBlank) { 4844 if (TheCondState.TheCond != AsmCond::IfCond && 4845 TheCondState.TheCond != AsmCond::ElseIfCond) 4846 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an" 4847 " if or an elseif"); 4848 TheCondState.TheCond = AsmCond::ElseIfCond; 4849 4850 bool LastIgnoreState = false; 4851 if (!TheCondStack.empty()) 4852 LastIgnoreState = TheCondStack.back().Ignore; 4853 if (LastIgnoreState || TheCondState.CondMet) { 4854 TheCondState.Ignore = true; 4855 eatToEndOfStatement(); 4856 } else { 4857 std::string Str; 4858 if (parseTextItem(Str)) { 4859 if (ExpectBlank) 4860 return TokError("expected text item parameter for 'elseifb' directive"); 4861 return TokError("expected text item parameter for 'elseifnb' directive"); 4862 } 4863 4864 if (parseEOL()) 4865 return true; 4866 4867 TheCondState.CondMet = ExpectBlank == Str.empty(); 4868 TheCondState.Ignore = !TheCondState.CondMet; 4869 } 4870 4871 return false; 4872 } 4873 4874 /// parseDirectiveElseIfdef 4875 /// ::= elseifdef symbol 4876 /// | elseifdef variable 4877 bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc, 4878 bool expect_defined) { 4879 if (TheCondState.TheCond != AsmCond::IfCond && 4880 TheCondState.TheCond != AsmCond::ElseIfCond) 4881 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an" 4882 " if or an elseif"); 4883 TheCondState.TheCond = AsmCond::ElseIfCond; 4884 4885 bool LastIgnoreState = false; 4886 if (!TheCondStack.empty()) 4887 LastIgnoreState = TheCondStack.back().Ignore; 4888 if (LastIgnoreState || TheCondState.CondMet) { 4889 TheCondState.Ignore = true; 4890 eatToEndOfStatement(); 4891 } else { 4892 bool is_defined = false; 4893 MCRegister Reg; 4894 SMLoc StartLoc, EndLoc; 4895 is_defined = 4896 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess(); 4897 if (!is_defined) { 4898 StringRef Name; 4899 if (check(parseIdentifier(Name), 4900 "expected identifier after 'elseifdef'") || 4901 parseEOL()) 4902 return true; 4903 4904 if (BuiltinSymbolMap.contains(Name.lower())) { 4905 is_defined = true; 4906 } else if (Variables.contains(Name.lower())) { 4907 is_defined = true; 4908 } else { 4909 MCSymbol *Sym = getContext().lookupSymbol(Name); 4910 is_defined = (Sym && !Sym->isUndefined()); 4911 } 4912 } 4913 4914 TheCondState.CondMet = (is_defined == expect_defined); 4915 TheCondState.Ignore = !TheCondState.CondMet; 4916 } 4917 4918 return false; 4919 } 4920 4921 /// parseDirectiveElseIfidn 4922 /// ::= elseifidn textitem, textitem 4923 bool MasmParser::parseDirectiveElseIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 4924 bool CaseInsensitive) { 4925 if (TheCondState.TheCond != AsmCond::IfCond && 4926 TheCondState.TheCond != AsmCond::ElseIfCond) 4927 return Error(DirectiveLoc, "Encountered an elseif that doesn't follow an" 4928 " if or an elseif"); 4929 TheCondState.TheCond = AsmCond::ElseIfCond; 4930 4931 bool LastIgnoreState = false; 4932 if (!TheCondStack.empty()) 4933 LastIgnoreState = TheCondStack.back().Ignore; 4934 if (LastIgnoreState || TheCondState.CondMet) { 4935 TheCondState.Ignore = true; 4936 eatToEndOfStatement(); 4937 } else { 4938 std::string String1, String2; 4939 4940 if (parseTextItem(String1)) { 4941 if (ExpectEqual) 4942 return TokError( 4943 "expected text item parameter for 'elseifidn' directive"); 4944 return TokError("expected text item parameter for 'elseifdif' directive"); 4945 } 4946 4947 if (Lexer.isNot(AsmToken::Comma)) { 4948 if (ExpectEqual) 4949 return TokError( 4950 "expected comma after first string for 'elseifidn' directive"); 4951 return TokError( 4952 "expected comma after first string for 'elseifdif' directive"); 4953 } 4954 Lex(); 4955 4956 if (parseTextItem(String2)) { 4957 if (ExpectEqual) 4958 return TokError( 4959 "expected text item parameter for 'elseifidn' directive"); 4960 return TokError("expected text item parameter for 'elseifdif' directive"); 4961 } 4962 4963 if (CaseInsensitive) 4964 TheCondState.CondMet = 4965 ExpectEqual == (StringRef(String1).equals_insensitive(String2)); 4966 else 4967 TheCondState.CondMet = ExpectEqual == (String1 == String2); 4968 TheCondState.Ignore = !TheCondState.CondMet; 4969 } 4970 4971 return false; 4972 } 4973 4974 /// parseDirectiveElse 4975 /// ::= else 4976 bool MasmParser::parseDirectiveElse(SMLoc DirectiveLoc) { 4977 if (parseEOL()) 4978 return true; 4979 4980 if (TheCondState.TheCond != AsmCond::IfCond && 4981 TheCondState.TheCond != AsmCond::ElseIfCond) 4982 return Error(DirectiveLoc, "Encountered an else that doesn't follow an if" 4983 " or an elseif"); 4984 TheCondState.TheCond = AsmCond::ElseCond; 4985 bool LastIgnoreState = false; 4986 if (!TheCondStack.empty()) 4987 LastIgnoreState = TheCondStack.back().Ignore; 4988 if (LastIgnoreState || TheCondState.CondMet) 4989 TheCondState.Ignore = true; 4990 else 4991 TheCondState.Ignore = false; 4992 4993 return false; 4994 } 4995 4996 /// parseDirectiveEnd 4997 /// ::= end 4998 bool MasmParser::parseDirectiveEnd(SMLoc DirectiveLoc) { 4999 if (parseEOL()) 5000 return true; 5001 5002 while (Lexer.isNot(AsmToken::Eof)) 5003 Lexer.Lex(); 5004 5005 return false; 5006 } 5007 5008 /// parseDirectiveError 5009 /// ::= .err [message] 5010 bool MasmParser::parseDirectiveError(SMLoc DirectiveLoc) { 5011 if (!TheCondStack.empty()) { 5012 if (TheCondStack.back().Ignore) { 5013 eatToEndOfStatement(); 5014 return false; 5015 } 5016 } 5017 5018 std::string Message = ".err directive invoked in source file"; 5019 if (Lexer.isNot(AsmToken::EndOfStatement)) 5020 Message = parseStringTo(AsmToken::EndOfStatement); 5021 Lex(); 5022 5023 return Error(DirectiveLoc, Message); 5024 } 5025 5026 /// parseDirectiveErrorIfb 5027 /// ::= .errb textitem[, message] 5028 bool MasmParser::parseDirectiveErrorIfb(SMLoc DirectiveLoc, bool ExpectBlank) { 5029 if (!TheCondStack.empty()) { 5030 if (TheCondStack.back().Ignore) { 5031 eatToEndOfStatement(); 5032 return false; 5033 } 5034 } 5035 5036 std::string Text; 5037 if (parseTextItem(Text)) 5038 return Error(getTok().getLoc(), "missing text item in '.errb' directive"); 5039 5040 std::string Message = ".errb directive invoked in source file"; 5041 if (Lexer.isNot(AsmToken::EndOfStatement)) { 5042 if (parseToken(AsmToken::Comma)) 5043 return addErrorSuffix(" in '.errb' directive"); 5044 Message = parseStringTo(AsmToken::EndOfStatement); 5045 } 5046 Lex(); 5047 5048 if (Text.empty() == ExpectBlank) 5049 return Error(DirectiveLoc, Message); 5050 return false; 5051 } 5052 5053 /// parseDirectiveErrorIfdef 5054 /// ::= .errdef name[, message] 5055 bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc, 5056 bool ExpectDefined) { 5057 if (!TheCondStack.empty()) { 5058 if (TheCondStack.back().Ignore) { 5059 eatToEndOfStatement(); 5060 return false; 5061 } 5062 } 5063 5064 bool IsDefined = false; 5065 MCRegister Reg; 5066 SMLoc StartLoc, EndLoc; 5067 IsDefined = 5068 getTargetParser().tryParseRegister(Reg, StartLoc, EndLoc).isSuccess(); 5069 if (!IsDefined) { 5070 StringRef Name; 5071 if (check(parseIdentifier(Name), "expected identifier after '.errdef'")) 5072 return true; 5073 5074 if (BuiltinSymbolMap.contains(Name.lower())) { 5075 IsDefined = true; 5076 } else if (Variables.contains(Name.lower())) { 5077 IsDefined = true; 5078 } else { 5079 MCSymbol *Sym = getContext().lookupSymbol(Name); 5080 IsDefined = (Sym && !Sym->isUndefined()); 5081 } 5082 } 5083 5084 std::string Message = ".errdef directive invoked in source file"; 5085 if (Lexer.isNot(AsmToken::EndOfStatement)) { 5086 if (parseToken(AsmToken::Comma)) 5087 return addErrorSuffix(" in '.errdef' directive"); 5088 Message = parseStringTo(AsmToken::EndOfStatement); 5089 } 5090 Lex(); 5091 5092 if (IsDefined == ExpectDefined) 5093 return Error(DirectiveLoc, Message); 5094 return false; 5095 } 5096 5097 /// parseDirectiveErrorIfidn 5098 /// ::= .erridn textitem, textitem[, message] 5099 bool MasmParser::parseDirectiveErrorIfidn(SMLoc DirectiveLoc, bool ExpectEqual, 5100 bool CaseInsensitive) { 5101 if (!TheCondStack.empty()) { 5102 if (TheCondStack.back().Ignore) { 5103 eatToEndOfStatement(); 5104 return false; 5105 } 5106 } 5107 5108 std::string String1, String2; 5109 5110 if (parseTextItem(String1)) { 5111 if (ExpectEqual) 5112 return TokError("expected string parameter for '.erridn' directive"); 5113 return TokError("expected string parameter for '.errdif' directive"); 5114 } 5115 5116 if (Lexer.isNot(AsmToken::Comma)) { 5117 if (ExpectEqual) 5118 return TokError( 5119 "expected comma after first string for '.erridn' directive"); 5120 return TokError( 5121 "expected comma after first string for '.errdif' directive"); 5122 } 5123 Lex(); 5124 5125 if (parseTextItem(String2)) { 5126 if (ExpectEqual) 5127 return TokError("expected string parameter for '.erridn' directive"); 5128 return TokError("expected string parameter for '.errdif' directive"); 5129 } 5130 5131 std::string Message; 5132 if (ExpectEqual) 5133 Message = ".erridn directive invoked in source file"; 5134 else 5135 Message = ".errdif directive invoked in source file"; 5136 if (Lexer.isNot(AsmToken::EndOfStatement)) { 5137 if (parseToken(AsmToken::Comma)) 5138 return addErrorSuffix(" in '.erridn' directive"); 5139 Message = parseStringTo(AsmToken::EndOfStatement); 5140 } 5141 Lex(); 5142 5143 if (CaseInsensitive) 5144 TheCondState.CondMet = 5145 ExpectEqual == (StringRef(String1).equals_insensitive(String2)); 5146 else 5147 TheCondState.CondMet = ExpectEqual == (String1 == String2); 5148 TheCondState.Ignore = !TheCondState.CondMet; 5149 5150 if ((CaseInsensitive && 5151 ExpectEqual == StringRef(String1).equals_insensitive(String2)) || 5152 (ExpectEqual == (String1 == String2))) 5153 return Error(DirectiveLoc, Message); 5154 return false; 5155 } 5156 5157 /// parseDirectiveErrorIfe 5158 /// ::= .erre expression[, message] 5159 bool MasmParser::parseDirectiveErrorIfe(SMLoc DirectiveLoc, bool ExpectZero) { 5160 if (!TheCondStack.empty()) { 5161 if (TheCondStack.back().Ignore) { 5162 eatToEndOfStatement(); 5163 return false; 5164 } 5165 } 5166 5167 int64_t ExprValue; 5168 if (parseAbsoluteExpression(ExprValue)) 5169 return addErrorSuffix(" in '.erre' directive"); 5170 5171 std::string Message = ".erre directive invoked in source file"; 5172 if (Lexer.isNot(AsmToken::EndOfStatement)) { 5173 if (parseToken(AsmToken::Comma)) 5174 return addErrorSuffix(" in '.erre' directive"); 5175 Message = parseStringTo(AsmToken::EndOfStatement); 5176 } 5177 Lex(); 5178 5179 if ((ExprValue == 0) == ExpectZero) 5180 return Error(DirectiveLoc, Message); 5181 return false; 5182 } 5183 5184 /// parseDirectiveEndIf 5185 /// ::= .endif 5186 bool MasmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) { 5187 if (parseEOL()) 5188 return true; 5189 5190 if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty()) 5191 return Error(DirectiveLoc, "Encountered a .endif that doesn't follow " 5192 "an .if or .else"); 5193 if (!TheCondStack.empty()) { 5194 TheCondState = TheCondStack.back(); 5195 TheCondStack.pop_back(); 5196 } 5197 5198 return false; 5199 } 5200 5201 void MasmParser::initializeDirectiveKindMap() { 5202 DirectiveKindMap["="] = DK_ASSIGN; 5203 DirectiveKindMap["equ"] = DK_EQU; 5204 DirectiveKindMap["textequ"] = DK_TEXTEQU; 5205 // DirectiveKindMap[".ascii"] = DK_ASCII; 5206 // DirectiveKindMap[".asciz"] = DK_ASCIZ; 5207 // DirectiveKindMap[".string"] = DK_STRING; 5208 DirectiveKindMap["byte"] = DK_BYTE; 5209 DirectiveKindMap["sbyte"] = DK_SBYTE; 5210 DirectiveKindMap["word"] = DK_WORD; 5211 DirectiveKindMap["sword"] = DK_SWORD; 5212 DirectiveKindMap["dword"] = DK_DWORD; 5213 DirectiveKindMap["sdword"] = DK_SDWORD; 5214 DirectiveKindMap["fword"] = DK_FWORD; 5215 DirectiveKindMap["qword"] = DK_QWORD; 5216 DirectiveKindMap["sqword"] = DK_SQWORD; 5217 DirectiveKindMap["real4"] = DK_REAL4; 5218 DirectiveKindMap["real8"] = DK_REAL8; 5219 DirectiveKindMap["real10"] = DK_REAL10; 5220 DirectiveKindMap["align"] = DK_ALIGN; 5221 DirectiveKindMap["even"] = DK_EVEN; 5222 DirectiveKindMap["org"] = DK_ORG; 5223 DirectiveKindMap["extern"] = DK_EXTERN; 5224 DirectiveKindMap["extrn"] = DK_EXTERN; 5225 DirectiveKindMap["public"] = DK_PUBLIC; 5226 // DirectiveKindMap[".comm"] = DK_COMM; 5227 DirectiveKindMap["comment"] = DK_COMMENT; 5228 DirectiveKindMap["include"] = DK_INCLUDE; 5229 DirectiveKindMap["repeat"] = DK_REPEAT; 5230 DirectiveKindMap["rept"] = DK_REPEAT; 5231 DirectiveKindMap["while"] = DK_WHILE; 5232 DirectiveKindMap["for"] = DK_FOR; 5233 DirectiveKindMap["irp"] = DK_FOR; 5234 DirectiveKindMap["forc"] = DK_FORC; 5235 DirectiveKindMap["irpc"] = DK_FORC; 5236 DirectiveKindMap["if"] = DK_IF; 5237 DirectiveKindMap["ife"] = DK_IFE; 5238 DirectiveKindMap["ifb"] = DK_IFB; 5239 DirectiveKindMap["ifnb"] = DK_IFNB; 5240 DirectiveKindMap["ifdef"] = DK_IFDEF; 5241 DirectiveKindMap["ifndef"] = DK_IFNDEF; 5242 DirectiveKindMap["ifdif"] = DK_IFDIF; 5243 DirectiveKindMap["ifdifi"] = DK_IFDIFI; 5244 DirectiveKindMap["ifidn"] = DK_IFIDN; 5245 DirectiveKindMap["ifidni"] = DK_IFIDNI; 5246 DirectiveKindMap["elseif"] = DK_ELSEIF; 5247 DirectiveKindMap["elseifdef"] = DK_ELSEIFDEF; 5248 DirectiveKindMap["elseifndef"] = DK_ELSEIFNDEF; 5249 DirectiveKindMap["elseifdif"] = DK_ELSEIFDIF; 5250 DirectiveKindMap["elseifidn"] = DK_ELSEIFIDN; 5251 DirectiveKindMap["else"] = DK_ELSE; 5252 DirectiveKindMap["end"] = DK_END; 5253 DirectiveKindMap["endif"] = DK_ENDIF; 5254 // DirectiveKindMap[".file"] = DK_FILE; 5255 // DirectiveKindMap[".line"] = DK_LINE; 5256 // DirectiveKindMap[".loc"] = DK_LOC; 5257 // DirectiveKindMap[".stabs"] = DK_STABS; 5258 // DirectiveKindMap[".cv_file"] = DK_CV_FILE; 5259 // DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID; 5260 // DirectiveKindMap[".cv_loc"] = DK_CV_LOC; 5261 // DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE; 5262 // DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE; 5263 // DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID; 5264 // DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE; 5265 // DirectiveKindMap[".cv_string"] = DK_CV_STRING; 5266 // DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE; 5267 // DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS; 5268 // DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET; 5269 // DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA; 5270 // DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS; 5271 // DirectiveKindMap[".cfi_startproc"] = DK_CFI_STARTPROC; 5272 // DirectiveKindMap[".cfi_endproc"] = DK_CFI_ENDPROC; 5273 // DirectiveKindMap[".cfi_def_cfa"] = DK_CFI_DEF_CFA; 5274 // DirectiveKindMap[".cfi_def_cfa_offset"] = DK_CFI_DEF_CFA_OFFSET; 5275 // DirectiveKindMap[".cfi_adjust_cfa_offset"] = DK_CFI_ADJUST_CFA_OFFSET; 5276 // DirectiveKindMap[".cfi_def_cfa_register"] = DK_CFI_DEF_CFA_REGISTER; 5277 // DirectiveKindMap[".cfi_offset"] = DK_CFI_OFFSET; 5278 // DirectiveKindMap[".cfi_rel_offset"] = DK_CFI_REL_OFFSET; 5279 // DirectiveKindMap[".cfi_personality"] = DK_CFI_PERSONALITY; 5280 // DirectiveKindMap[".cfi_lsda"] = DK_CFI_LSDA; 5281 // DirectiveKindMap[".cfi_remember_state"] = DK_CFI_REMEMBER_STATE; 5282 // DirectiveKindMap[".cfi_restore_state"] = DK_CFI_RESTORE_STATE; 5283 // DirectiveKindMap[".cfi_same_value"] = DK_CFI_SAME_VALUE; 5284 // DirectiveKindMap[".cfi_restore"] = DK_CFI_RESTORE; 5285 // DirectiveKindMap[".cfi_escape"] = DK_CFI_ESCAPE; 5286 // DirectiveKindMap[".cfi_return_column"] = DK_CFI_RETURN_COLUMN; 5287 // DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME; 5288 // DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED; 5289 // DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER; 5290 // DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE; 5291 // DirectiveKindMap[".cfi_b_key_frame"] = DK_CFI_B_KEY_FRAME; 5292 // DirectiveKindMap[".cfi_val_offset"] = DK_CFI_VAL_OFFSET; 5293 DirectiveKindMap["macro"] = DK_MACRO; 5294 DirectiveKindMap["exitm"] = DK_EXITM; 5295 DirectiveKindMap["endm"] = DK_ENDM; 5296 DirectiveKindMap["purge"] = DK_PURGE; 5297 DirectiveKindMap[".err"] = DK_ERR; 5298 DirectiveKindMap[".errb"] = DK_ERRB; 5299 DirectiveKindMap[".errnb"] = DK_ERRNB; 5300 DirectiveKindMap[".errdef"] = DK_ERRDEF; 5301 DirectiveKindMap[".errndef"] = DK_ERRNDEF; 5302 DirectiveKindMap[".errdif"] = DK_ERRDIF; 5303 DirectiveKindMap[".errdifi"] = DK_ERRDIFI; 5304 DirectiveKindMap[".erridn"] = DK_ERRIDN; 5305 DirectiveKindMap[".erridni"] = DK_ERRIDNI; 5306 DirectiveKindMap[".erre"] = DK_ERRE; 5307 DirectiveKindMap[".errnz"] = DK_ERRNZ; 5308 DirectiveKindMap[".pushframe"] = DK_PUSHFRAME; 5309 DirectiveKindMap[".pushreg"] = DK_PUSHREG; 5310 DirectiveKindMap[".savereg"] = DK_SAVEREG; 5311 DirectiveKindMap[".savexmm128"] = DK_SAVEXMM128; 5312 DirectiveKindMap[".setframe"] = DK_SETFRAME; 5313 DirectiveKindMap[".radix"] = DK_RADIX; 5314 DirectiveKindMap["db"] = DK_DB; 5315 DirectiveKindMap["dd"] = DK_DD; 5316 DirectiveKindMap["df"] = DK_DF; 5317 DirectiveKindMap["dq"] = DK_DQ; 5318 DirectiveKindMap["dw"] = DK_DW; 5319 DirectiveKindMap["echo"] = DK_ECHO; 5320 DirectiveKindMap["struc"] = DK_STRUCT; 5321 DirectiveKindMap["struct"] = DK_STRUCT; 5322 DirectiveKindMap["union"] = DK_UNION; 5323 DirectiveKindMap["ends"] = DK_ENDS; 5324 } 5325 5326 bool MasmParser::isMacroLikeDirective() { 5327 if (getLexer().is(AsmToken::Identifier)) { 5328 bool IsMacroLike = StringSwitch<bool>(getTok().getIdentifier()) 5329 .CasesLower("repeat", "rept", true) 5330 .CaseLower("while", true) 5331 .CasesLower("for", "irp", true) 5332 .CasesLower("forc", "irpc", true) 5333 .Default(false); 5334 if (IsMacroLike) 5335 return true; 5336 } 5337 if (peekTok().is(AsmToken::Identifier) && 5338 peekTok().getIdentifier().equals_insensitive("macro")) 5339 return true; 5340 5341 return false; 5342 } 5343 5344 MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { 5345 AsmToken EndToken, StartToken = getTok(); 5346 5347 unsigned NestLevel = 0; 5348 while (true) { 5349 // Check whether we have reached the end of the file. 5350 if (getLexer().is(AsmToken::Eof)) { 5351 printError(DirectiveLoc, "no matching 'endm' in definition"); 5352 return nullptr; 5353 } 5354 5355 if (isMacroLikeDirective()) 5356 ++NestLevel; 5357 5358 // Otherwise, check whether we have reached the endm. 5359 if (Lexer.is(AsmToken::Identifier) && 5360 getTok().getIdentifier().equals_insensitive("endm")) { 5361 if (NestLevel == 0) { 5362 EndToken = getTok(); 5363 Lex(); 5364 if (Lexer.isNot(AsmToken::EndOfStatement)) { 5365 printError(getTok().getLoc(), "unexpected token in 'endm' directive"); 5366 return nullptr; 5367 } 5368 break; 5369 } 5370 --NestLevel; 5371 } 5372 5373 // Otherwise, scan till the end of the statement. 5374 eatToEndOfStatement(); 5375 } 5376 5377 const char *BodyStart = StartToken.getLoc().getPointer(); 5378 const char *BodyEnd = EndToken.getLoc().getPointer(); 5379 StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); 5380 5381 // We Are Anonymous. 5382 MacroLikeBodies.emplace_back(StringRef(), Body, MCAsmMacroParameters()); 5383 return &MacroLikeBodies.back(); 5384 } 5385 5386 bool MasmParser::expandStatement(SMLoc Loc) { 5387 std::string Body = parseStringTo(AsmToken::EndOfStatement); 5388 SMLoc EndLoc = getTok().getLoc(); 5389 5390 MCAsmMacroParameters Parameters; 5391 MCAsmMacroArguments Arguments; 5392 5393 StringMap<std::string> BuiltinValues; 5394 for (const auto &S : BuiltinSymbolMap) { 5395 const BuiltinSymbol &Sym = S.getValue(); 5396 if (std::optional<std::string> Text = evaluateBuiltinTextMacro(Sym, Loc)) { 5397 BuiltinValues[S.getKey().lower()] = std::move(*Text); 5398 } 5399 } 5400 for (const auto &B : BuiltinValues) { 5401 MCAsmMacroParameter P; 5402 MCAsmMacroArgument A; 5403 P.Name = B.getKey(); 5404 P.Required = true; 5405 A.push_back(AsmToken(AsmToken::String, B.getValue())); 5406 5407 Parameters.push_back(std::move(P)); 5408 Arguments.push_back(std::move(A)); 5409 } 5410 5411 for (const auto &V : Variables) { 5412 const Variable &Var = V.getValue(); 5413 if (Var.IsText) { 5414 MCAsmMacroParameter P; 5415 MCAsmMacroArgument A; 5416 P.Name = Var.Name; 5417 P.Required = true; 5418 A.push_back(AsmToken(AsmToken::String, Var.TextValue)); 5419 5420 Parameters.push_back(std::move(P)); 5421 Arguments.push_back(std::move(A)); 5422 } 5423 } 5424 MacroLikeBodies.emplace_back(StringRef(), Body, Parameters); 5425 MCAsmMacro M = MacroLikeBodies.back(); 5426 5427 // Expand the statement in a new buffer. 5428 SmallString<80> Buf; 5429 raw_svector_ostream OS(Buf); 5430 if (expandMacro(OS, M.Body, M.Parameters, Arguments, M.Locals, EndLoc)) 5431 return true; 5432 std::unique_ptr<MemoryBuffer> Expansion = 5433 MemoryBuffer::getMemBufferCopy(OS.str(), "<expansion>"); 5434 5435 // Jump to the expanded statement and prime the lexer. 5436 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Expansion), EndLoc); 5437 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 5438 EndStatementAtEOFStack.push_back(false); 5439 Lex(); 5440 return false; 5441 } 5442 5443 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 5444 raw_svector_ostream &OS) { 5445 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/getTok().getLoc(), OS); 5446 } 5447 void MasmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, 5448 SMLoc ExitLoc, 5449 raw_svector_ostream &OS) { 5450 OS << "endm\n"; 5451 5452 std::unique_ptr<MemoryBuffer> Instantiation = 5453 MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>"); 5454 5455 // Create the macro instantiation object and add to the current macro 5456 // instantiation stack. 5457 MacroInstantiation *MI = new MacroInstantiation{DirectiveLoc, CurBuffer, 5458 ExitLoc, TheCondStack.size()}; 5459 ActiveMacros.push_back(MI); 5460 5461 // Jump to the macro instantiation and prime the lexer. 5462 CurBuffer = SrcMgr.AddNewSourceBuffer(std::move(Instantiation), SMLoc()); 5463 Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); 5464 EndStatementAtEOFStack.push_back(true); 5465 Lex(); 5466 } 5467 5468 /// parseDirectiveRepeat 5469 /// ::= ("repeat" | "rept") count 5470 /// body 5471 /// endm 5472 bool MasmParser::parseDirectiveRepeat(SMLoc DirectiveLoc, StringRef Dir) { 5473 const MCExpr *CountExpr; 5474 SMLoc CountLoc = getTok().getLoc(); 5475 if (parseExpression(CountExpr)) 5476 return true; 5477 5478 int64_t Count; 5479 if (!CountExpr->evaluateAsAbsolute(Count, getStreamer().getAssemblerPtr())) { 5480 return Error(CountLoc, "unexpected token in '" + Dir + "' directive"); 5481 } 5482 5483 if (check(Count < 0, CountLoc, "Count is negative") || parseEOL()) 5484 return true; 5485 5486 // Lex the repeat definition. 5487 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 5488 if (!M) 5489 return true; 5490 5491 // Macro instantiation is lexical, unfortunately. We construct a new buffer 5492 // to hold the macro body with substitutions. 5493 SmallString<256> Buf; 5494 raw_svector_ostream OS(Buf); 5495 while (Count--) { 5496 if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc())) 5497 return true; 5498 } 5499 instantiateMacroLikeBody(M, DirectiveLoc, OS); 5500 5501 return false; 5502 } 5503 5504 /// parseDirectiveWhile 5505 /// ::= "while" expression 5506 /// body 5507 /// endm 5508 bool MasmParser::parseDirectiveWhile(SMLoc DirectiveLoc) { 5509 const MCExpr *CondExpr; 5510 SMLoc CondLoc = getTok().getLoc(); 5511 if (parseExpression(CondExpr)) 5512 return true; 5513 5514 // Lex the repeat definition. 5515 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 5516 if (!M) 5517 return true; 5518 5519 // Macro instantiation is lexical, unfortunately. We construct a new buffer 5520 // to hold the macro body with substitutions. 5521 SmallString<256> Buf; 5522 raw_svector_ostream OS(Buf); 5523 int64_t Condition; 5524 if (!CondExpr->evaluateAsAbsolute(Condition, getStreamer().getAssemblerPtr())) 5525 return Error(CondLoc, "expected absolute expression in 'while' directive"); 5526 if (Condition) { 5527 // Instantiate the macro, then resume at this directive to recheck the 5528 // condition. 5529 if (expandMacro(OS, M->Body, {}, {}, M->Locals, getTok().getLoc())) 5530 return true; 5531 instantiateMacroLikeBody(M, DirectiveLoc, /*ExitLoc=*/DirectiveLoc, OS); 5532 } 5533 5534 return false; 5535 } 5536 5537 /// parseDirectiveFor 5538 /// ::= ("for" | "irp") symbol [":" qualifier], <values> 5539 /// body 5540 /// endm 5541 bool MasmParser::parseDirectiveFor(SMLoc DirectiveLoc, StringRef Dir) { 5542 MCAsmMacroParameter Parameter; 5543 MCAsmMacroArguments A; 5544 if (check(parseIdentifier(Parameter.Name), 5545 "expected identifier in '" + Dir + "' directive")) 5546 return true; 5547 5548 // Parse optional qualifier (default value, or "req") 5549 if (parseOptionalToken(AsmToken::Colon)) { 5550 if (parseOptionalToken(AsmToken::Equal)) { 5551 // Default value 5552 SMLoc ParamLoc; 5553 5554 ParamLoc = Lexer.getLoc(); 5555 if (parseMacroArgument(nullptr, Parameter.Value)) 5556 return true; 5557 } else { 5558 SMLoc QualLoc; 5559 StringRef Qualifier; 5560 5561 QualLoc = Lexer.getLoc(); 5562 if (parseIdentifier(Qualifier)) 5563 return Error(QualLoc, "missing parameter qualifier for " 5564 "'" + 5565 Parameter.Name + "' in '" + Dir + 5566 "' directive"); 5567 5568 if (Qualifier.equals_insensitive("req")) 5569 Parameter.Required = true; 5570 else 5571 return Error(QualLoc, 5572 Qualifier + " is not a valid parameter qualifier for '" + 5573 Parameter.Name + "' in '" + Dir + "' directive"); 5574 } 5575 } 5576 5577 if (parseToken(AsmToken::Comma, 5578 "expected comma in '" + Dir + "' directive") || 5579 parseToken(AsmToken::Less, 5580 "values in '" + Dir + 5581 "' directive must be enclosed in angle brackets")) 5582 return true; 5583 5584 while (true) { 5585 A.emplace_back(); 5586 if (parseMacroArgument(&Parameter, A.back(), /*EndTok=*/AsmToken::Greater)) 5587 return addErrorSuffix(" in arguments for '" + Dir + "' directive"); 5588 5589 // If we see a comma, continue, and allow line continuation. 5590 if (!parseOptionalToken(AsmToken::Comma)) 5591 break; 5592 parseOptionalToken(AsmToken::EndOfStatement); 5593 } 5594 5595 if (parseToken(AsmToken::Greater, 5596 "values in '" + Dir + 5597 "' directive must be enclosed in angle brackets") || 5598 parseEOL()) 5599 return true; 5600 5601 // Lex the for definition. 5602 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 5603 if (!M) 5604 return true; 5605 5606 // Macro instantiation is lexical, unfortunately. We construct a new buffer 5607 // to hold the macro body with substitutions. 5608 SmallString<256> Buf; 5609 raw_svector_ostream OS(Buf); 5610 5611 for (const MCAsmMacroArgument &Arg : A) { 5612 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc())) 5613 return true; 5614 } 5615 5616 instantiateMacroLikeBody(M, DirectiveLoc, OS); 5617 5618 return false; 5619 } 5620 5621 /// parseDirectiveForc 5622 /// ::= ("forc" | "irpc") symbol, <string> 5623 /// body 5624 /// endm 5625 bool MasmParser::parseDirectiveForc(SMLoc DirectiveLoc, StringRef Directive) { 5626 MCAsmMacroParameter Parameter; 5627 5628 std::string Argument; 5629 if (check(parseIdentifier(Parameter.Name), 5630 "expected identifier in '" + Directive + "' directive") || 5631 parseToken(AsmToken::Comma, 5632 "expected comma in '" + Directive + "' directive")) 5633 return true; 5634 if (parseAngleBracketString(Argument)) { 5635 // Match ml64.exe; treat all characters to end of statement as a string, 5636 // ignoring comment markers, then discard anything following a space (using 5637 // the C locale). 5638 Argument = parseStringTo(AsmToken::EndOfStatement); 5639 if (getTok().is(AsmToken::EndOfStatement)) 5640 Argument += getTok().getString(); 5641 size_t End = 0; 5642 for (; End < Argument.size(); ++End) { 5643 if (isSpace(Argument[End])) 5644 break; 5645 } 5646 Argument.resize(End); 5647 } 5648 if (parseEOL()) 5649 return true; 5650 5651 // Lex the irpc definition. 5652 MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); 5653 if (!M) 5654 return true; 5655 5656 // Macro instantiation is lexical, unfortunately. We construct a new buffer 5657 // to hold the macro body with substitutions. 5658 SmallString<256> Buf; 5659 raw_svector_ostream OS(Buf); 5660 5661 StringRef Values(Argument); 5662 for (std::size_t I = 0, End = Values.size(); I != End; ++I) { 5663 MCAsmMacroArgument Arg; 5664 Arg.emplace_back(AsmToken::Identifier, Values.substr(I, 1)); 5665 5666 if (expandMacro(OS, M->Body, Parameter, Arg, M->Locals, getTok().getLoc())) 5667 return true; 5668 } 5669 5670 instantiateMacroLikeBody(M, DirectiveLoc, OS); 5671 5672 return false; 5673 } 5674 5675 bool MasmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, 5676 size_t Len) { 5677 const MCExpr *Value; 5678 SMLoc ExprLoc = getLexer().getLoc(); 5679 if (parseExpression(Value)) 5680 return true; 5681 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 5682 if (!MCE) 5683 return Error(ExprLoc, "unexpected expression in _emit"); 5684 uint64_t IntValue = MCE->getValue(); 5685 if (!isUInt<8>(IntValue) && !isInt<8>(IntValue)) 5686 return Error(ExprLoc, "literal value out of range for directive"); 5687 5688 Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len); 5689 return false; 5690 } 5691 5692 bool MasmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { 5693 const MCExpr *Value; 5694 SMLoc ExprLoc = getLexer().getLoc(); 5695 if (parseExpression(Value)) 5696 return true; 5697 const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value); 5698 if (!MCE) 5699 return Error(ExprLoc, "unexpected expression in align"); 5700 uint64_t IntValue = MCE->getValue(); 5701 if (!isPowerOf2_64(IntValue)) 5702 return Error(ExprLoc, "literal value not a power of two greater then zero"); 5703 5704 Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue)); 5705 return false; 5706 } 5707 5708 bool MasmParser::parseDirectiveRadix(SMLoc DirectiveLoc) { 5709 const SMLoc Loc = getLexer().getLoc(); 5710 std::string RadixStringRaw = parseStringTo(AsmToken::EndOfStatement); 5711 StringRef RadixString = StringRef(RadixStringRaw).trim(); 5712 unsigned Radix; 5713 if (RadixString.getAsInteger(10, Radix)) { 5714 return Error(Loc, 5715 "radix must be a decimal number in the range 2 to 16; was " + 5716 RadixString); 5717 } 5718 if (Radix < 2 || Radix > 16) 5719 return Error(Loc, "radix must be in the range 2 to 16; was " + 5720 std::to_string(Radix)); 5721 getLexer().setMasmDefaultRadix(Radix); 5722 return false; 5723 } 5724 5725 /// parseDirectiveEcho 5726 /// ::= "echo" message 5727 bool MasmParser::parseDirectiveEcho(SMLoc DirectiveLoc) { 5728 std::string Message = parseStringTo(AsmToken::EndOfStatement); 5729 llvm::outs() << Message; 5730 if (!StringRef(Message).ends_with("\n")) 5731 llvm::outs() << '\n'; 5732 return false; 5733 } 5734 5735 // We are comparing pointers, but the pointers are relative to a single string. 5736 // Thus, this should always be deterministic. 5737 static int rewritesSort(const AsmRewrite *AsmRewriteA, 5738 const AsmRewrite *AsmRewriteB) { 5739 if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer()) 5740 return -1; 5741 if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer()) 5742 return 1; 5743 5744 // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output 5745 // rewrite to the same location. Make sure the SizeDirective rewrite is 5746 // performed first, then the Imm/ImmPrefix and finally the Input/Output. This 5747 // ensures the sort algorithm is stable. 5748 if (AsmRewritePrecedence[AsmRewriteA->Kind] > 5749 AsmRewritePrecedence[AsmRewriteB->Kind]) 5750 return -1; 5751 5752 if (AsmRewritePrecedence[AsmRewriteA->Kind] < 5753 AsmRewritePrecedence[AsmRewriteB->Kind]) 5754 return 1; 5755 llvm_unreachable("Unstable rewrite sort."); 5756 } 5757 5758 bool MasmParser::defineMacro(StringRef Name, StringRef Value) { 5759 Variable &Var = Variables[Name.lower()]; 5760 if (Var.Name.empty()) { 5761 Var.Name = Name; 5762 } else if (Var.Redefinable == Variable::NOT_REDEFINABLE) { 5763 return Error(SMLoc(), "invalid variable redefinition"); 5764 } else if (Var.Redefinable == Variable::WARN_ON_REDEFINITION && 5765 Warning(SMLoc(), "redefining '" + Name + 5766 "', already defined on the command line")) { 5767 return true; 5768 } 5769 Var.Redefinable = Variable::WARN_ON_REDEFINITION; 5770 Var.IsText = true; 5771 Var.TextValue = Value.str(); 5772 return false; 5773 } 5774 5775 bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const { 5776 const std::pair<StringRef, StringRef> BaseMember = Name.split('.'); 5777 const StringRef Base = BaseMember.first, Member = BaseMember.second; 5778 return lookUpField(Base, Member, Info); 5779 } 5780 5781 bool MasmParser::lookUpField(StringRef Base, StringRef Member, 5782 AsmFieldInfo &Info) const { 5783 if (Base.empty()) 5784 return true; 5785 5786 AsmFieldInfo BaseInfo; 5787 if (Base.contains('.') && !lookUpField(Base, BaseInfo)) 5788 Base = BaseInfo.Type.Name; 5789 5790 auto StructIt = Structs.find(Base.lower()); 5791 auto TypeIt = KnownType.find(Base.lower()); 5792 if (TypeIt != KnownType.end()) { 5793 StructIt = Structs.find(TypeIt->second.Name.lower()); 5794 } 5795 if (StructIt != Structs.end()) 5796 return lookUpField(StructIt->second, Member, Info); 5797 5798 return true; 5799 } 5800 5801 bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, 5802 AsmFieldInfo &Info) const { 5803 if (Member.empty()) { 5804 Info.Type.Name = Structure.Name; 5805 Info.Type.Size = Structure.Size; 5806 Info.Type.ElementSize = Structure.Size; 5807 Info.Type.Length = 1; 5808 return false; 5809 } 5810 5811 std::pair<StringRef, StringRef> Split = Member.split('.'); 5812 const StringRef FieldName = Split.first, FieldMember = Split.second; 5813 5814 auto StructIt = Structs.find(FieldName.lower()); 5815 if (StructIt != Structs.end()) 5816 return lookUpField(StructIt->second, FieldMember, Info); 5817 5818 auto FieldIt = Structure.FieldsByName.find(FieldName.lower()); 5819 if (FieldIt == Structure.FieldsByName.end()) 5820 return true; 5821 5822 const FieldInfo &Field = Structure.Fields[FieldIt->second]; 5823 if (FieldMember.empty()) { 5824 Info.Offset += Field.Offset; 5825 Info.Type.Size = Field.SizeOf; 5826 Info.Type.ElementSize = Field.Type; 5827 Info.Type.Length = Field.LengthOf; 5828 if (Field.Contents.FT == FT_STRUCT) 5829 Info.Type.Name = Field.Contents.StructInfo.Structure.Name; 5830 else 5831 Info.Type.Name = ""; 5832 return false; 5833 } 5834 5835 if (Field.Contents.FT != FT_STRUCT) 5836 return true; 5837 const StructFieldInfo &StructInfo = Field.Contents.StructInfo; 5838 5839 if (lookUpField(StructInfo.Structure, FieldMember, Info)) 5840 return true; 5841 5842 Info.Offset += Field.Offset; 5843 return false; 5844 } 5845 5846 bool MasmParser::lookUpType(StringRef Name, AsmTypeInfo &Info) const { 5847 unsigned Size = StringSwitch<unsigned>(Name) 5848 .CasesLower("byte", "db", "sbyte", 1) 5849 .CasesLower("word", "dw", "sword", 2) 5850 .CasesLower("dword", "dd", "sdword", 4) 5851 .CasesLower("fword", "df", 6) 5852 .CasesLower("qword", "dq", "sqword", 8) 5853 .CaseLower("real4", 4) 5854 .CaseLower("real8", 8) 5855 .CaseLower("real10", 10) 5856 .Default(0); 5857 if (Size) { 5858 Info.Name = Name; 5859 Info.ElementSize = Size; 5860 Info.Length = 1; 5861 Info.Size = Size; 5862 return false; 5863 } 5864 5865 auto StructIt = Structs.find(Name.lower()); 5866 if (StructIt != Structs.end()) { 5867 const StructInfo &Structure = StructIt->second; 5868 Info.Name = Name; 5869 Info.ElementSize = Structure.Size; 5870 Info.Length = 1; 5871 Info.Size = Structure.Size; 5872 return false; 5873 } 5874 5875 return true; 5876 } 5877 5878 bool MasmParser::parseMSInlineAsm( 5879 std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, 5880 SmallVectorImpl<std::pair<void *, bool>> &OpDecls, 5881 SmallVectorImpl<std::string> &Constraints, 5882 SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII, 5883 MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { 5884 SmallVector<void *, 4> InputDecls; 5885 SmallVector<void *, 4> OutputDecls; 5886 SmallVector<bool, 4> InputDeclsAddressOf; 5887 SmallVector<bool, 4> OutputDeclsAddressOf; 5888 SmallVector<std::string, 4> InputConstraints; 5889 SmallVector<std::string, 4> OutputConstraints; 5890 SmallVector<MCRegister, 4> ClobberRegs; 5891 5892 SmallVector<AsmRewrite, 4> AsmStrRewrites; 5893 5894 // Prime the lexer. 5895 Lex(); 5896 5897 // While we have input, parse each statement. 5898 unsigned InputIdx = 0; 5899 unsigned OutputIdx = 0; 5900 while (getLexer().isNot(AsmToken::Eof)) { 5901 // Parse curly braces marking block start/end. 5902 if (parseCurlyBlockScope(AsmStrRewrites)) 5903 continue; 5904 5905 ParseStatementInfo Info(&AsmStrRewrites); 5906 bool StatementErr = parseStatement(Info, &SI); 5907 5908 if (StatementErr || Info.ParseError) { 5909 // Emit pending errors if any exist. 5910 printPendingErrors(); 5911 return true; 5912 } 5913 5914 // No pending error should exist here. 5915 assert(!hasPendingError() && "unexpected error from parseStatement"); 5916 5917 if (Info.Opcode == ~0U) 5918 continue; 5919 5920 const MCInstrDesc &Desc = MII->get(Info.Opcode); 5921 5922 // Build the list of clobbers, outputs and inputs. 5923 for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) { 5924 MCParsedAsmOperand &Operand = *Info.ParsedOperands[i]; 5925 5926 // Register operand. 5927 if (Operand.isReg() && !Operand.needAddressOf() && 5928 !getTargetParser().omitRegisterFromClobberLists(Operand.getReg())) { 5929 unsigned NumDefs = Desc.getNumDefs(); 5930 // Clobber. 5931 if (NumDefs && Operand.getMCOperandNum() < NumDefs) 5932 ClobberRegs.push_back(Operand.getReg()); 5933 continue; 5934 } 5935 5936 // Expr/Input or Output. 5937 StringRef SymName = Operand.getSymName(); 5938 if (SymName.empty()) 5939 continue; 5940 5941 void *OpDecl = Operand.getOpDecl(); 5942 if (!OpDecl) 5943 continue; 5944 5945 StringRef Constraint = Operand.getConstraint(); 5946 if (Operand.isImm()) { 5947 // Offset as immediate. 5948 if (Operand.isOffsetOfLocal()) 5949 Constraint = "r"; 5950 else 5951 Constraint = "i"; 5952 } 5953 5954 bool isOutput = (i == 1) && Desc.mayStore(); 5955 SMLoc Start = SMLoc::getFromPointer(SymName.data()); 5956 if (isOutput) { 5957 ++InputIdx; 5958 OutputDecls.push_back(OpDecl); 5959 OutputDeclsAddressOf.push_back(Operand.needAddressOf()); 5960 OutputConstraints.push_back(("=" + Constraint).str()); 5961 AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size()); 5962 } else { 5963 InputDecls.push_back(OpDecl); 5964 InputDeclsAddressOf.push_back(Operand.needAddressOf()); 5965 InputConstraints.push_back(Constraint.str()); 5966 if (Desc.operands()[i - 1].isBranchTarget()) 5967 AsmStrRewrites.emplace_back(AOK_CallInput, Start, SymName.size()); 5968 else 5969 AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size()); 5970 } 5971 } 5972 5973 // Consider implicit defs to be clobbers. Think of cpuid and push. 5974 llvm::append_range(ClobberRegs, Desc.implicit_defs()); 5975 } 5976 5977 // Set the number of Outputs and Inputs. 5978 NumOutputs = OutputDecls.size(); 5979 NumInputs = InputDecls.size(); 5980 5981 // Set the unique clobbers. 5982 array_pod_sort(ClobberRegs.begin(), ClobberRegs.end()); 5983 ClobberRegs.erase(llvm::unique(ClobberRegs), ClobberRegs.end()); 5984 Clobbers.assign(ClobberRegs.size(), std::string()); 5985 for (unsigned I = 0, E = ClobberRegs.size(); I != E; ++I) { 5986 raw_string_ostream OS(Clobbers[I]); 5987 IP->printRegName(OS, ClobberRegs[I]); 5988 } 5989 5990 // Merge the various outputs and inputs. Output are expected first. 5991 if (NumOutputs || NumInputs) { 5992 unsigned NumExprs = NumOutputs + NumInputs; 5993 OpDecls.resize(NumExprs); 5994 Constraints.resize(NumExprs); 5995 for (unsigned i = 0; i < NumOutputs; ++i) { 5996 OpDecls[i] = std::make_pair(OutputDecls[i], OutputDeclsAddressOf[i]); 5997 Constraints[i] = OutputConstraints[i]; 5998 } 5999 for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) { 6000 OpDecls[j] = std::make_pair(InputDecls[i], InputDeclsAddressOf[i]); 6001 Constraints[j] = InputConstraints[i]; 6002 } 6003 } 6004 6005 // Build the IR assembly string. 6006 std::string AsmStringIR; 6007 raw_string_ostream OS(AsmStringIR); 6008 StringRef ASMString = 6009 SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer(); 6010 const char *AsmStart = ASMString.begin(); 6011 const char *AsmEnd = ASMString.end(); 6012 array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort); 6013 for (auto I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { 6014 const AsmRewrite &AR = *I; 6015 // Check if this has already been covered by another rewrite... 6016 if (AR.Done) 6017 continue; 6018 AsmRewriteKind Kind = AR.Kind; 6019 6020 const char *Loc = AR.Loc.getPointer(); 6021 assert(Loc >= AsmStart && "Expected Loc to be at or after Start!"); 6022 6023 // Emit everything up to the immediate/expression. 6024 if (unsigned Len = Loc - AsmStart) 6025 OS << StringRef(AsmStart, Len); 6026 6027 // Skip the original expression. 6028 if (Kind == AOK_Skip) { 6029 AsmStart = Loc + AR.Len; 6030 continue; 6031 } 6032 6033 unsigned AdditionalSkip = 0; 6034 // Rewrite expressions in $N notation. 6035 switch (Kind) { 6036 default: 6037 break; 6038 case AOK_IntelExpr: 6039 assert(AR.IntelExp.isValid() && "cannot write invalid intel expression"); 6040 if (AR.IntelExp.NeedBracs) 6041 OS << "["; 6042 if (AR.IntelExp.hasBaseReg()) 6043 OS << AR.IntelExp.BaseReg; 6044 if (AR.IntelExp.hasIndexReg()) 6045 OS << (AR.IntelExp.hasBaseReg() ? " + " : "") 6046 << AR.IntelExp.IndexReg; 6047 if (AR.IntelExp.Scale > 1) 6048 OS << " * $$" << AR.IntelExp.Scale; 6049 if (AR.IntelExp.hasOffset()) { 6050 if (AR.IntelExp.hasRegs()) 6051 OS << " + "; 6052 // Fuse this rewrite with a rewrite of the offset name, if present. 6053 StringRef OffsetName = AR.IntelExp.OffsetName; 6054 SMLoc OffsetLoc = SMLoc::getFromPointer(AR.IntelExp.OffsetName.data()); 6055 size_t OffsetLen = OffsetName.size(); 6056 auto rewrite_it = std::find_if( 6057 I, AsmStrRewrites.end(), [&](const AsmRewrite &FusingAR) { 6058 return FusingAR.Loc == OffsetLoc && FusingAR.Len == OffsetLen && 6059 (FusingAR.Kind == AOK_Input || 6060 FusingAR.Kind == AOK_CallInput); 6061 }); 6062 if (rewrite_it == AsmStrRewrites.end()) { 6063 OS << "offset " << OffsetName; 6064 } else if (rewrite_it->Kind == AOK_CallInput) { 6065 OS << "${" << InputIdx++ << ":P}"; 6066 rewrite_it->Done = true; 6067 } else { 6068 OS << '$' << InputIdx++; 6069 rewrite_it->Done = true; 6070 } 6071 } 6072 if (AR.IntelExp.Imm || AR.IntelExp.emitImm()) 6073 OS << (AR.IntelExp.emitImm() ? "$$" : " + $$") << AR.IntelExp.Imm; 6074 if (AR.IntelExp.NeedBracs) 6075 OS << "]"; 6076 break; 6077 case AOK_Label: 6078 OS << Ctx.getAsmInfo()->getPrivateLabelPrefix() << AR.Label; 6079 break; 6080 case AOK_Input: 6081 OS << '$' << InputIdx++; 6082 break; 6083 case AOK_CallInput: 6084 OS << "${" << InputIdx++ << ":P}"; 6085 break; 6086 case AOK_Output: 6087 OS << '$' << OutputIdx++; 6088 break; 6089 case AOK_SizeDirective: 6090 switch (AR.Val) { 6091 default: break; 6092 case 8: OS << "byte ptr "; break; 6093 case 16: OS << "word ptr "; break; 6094 case 32: OS << "dword ptr "; break; 6095 case 64: OS << "qword ptr "; break; 6096 case 80: OS << "xword ptr "; break; 6097 case 128: OS << "xmmword ptr "; break; 6098 case 256: OS << "ymmword ptr "; break; 6099 } 6100 break; 6101 case AOK_Emit: 6102 OS << ".byte"; 6103 break; 6104 case AOK_Align: { 6105 // MS alignment directives are measured in bytes. If the native assembler 6106 // measures alignment in bytes, we can pass it straight through. 6107 OS << ".align"; 6108 if (getContext().getAsmInfo()->getAlignmentIsInBytes()) 6109 break; 6110 6111 // Alignment is in log2 form, so print that instead and skip the original 6112 // immediate. 6113 unsigned Val = AR.Val; 6114 OS << ' ' << Val; 6115 assert(Val < 10 && "Expected alignment less then 2^10."); 6116 AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4; 6117 break; 6118 } 6119 case AOK_EVEN: 6120 OS << ".even"; 6121 break; 6122 case AOK_EndOfStatement: 6123 OS << "\n\t"; 6124 break; 6125 } 6126 6127 // Skip the original expression. 6128 AsmStart = Loc + AR.Len + AdditionalSkip; 6129 } 6130 6131 // Emit the remainder of the asm string. 6132 if (AsmStart != AsmEnd) 6133 OS << StringRef(AsmStart, AsmEnd - AsmStart); 6134 6135 AsmString = OS.str(); 6136 return false; 6137 } 6138 6139 void MasmParser::initializeBuiltinSymbolMaps() { 6140 // Numeric built-ins (supported in all versions) 6141 BuiltinSymbolMap["@version"] = BI_VERSION; 6142 BuiltinSymbolMap["@line"] = BI_LINE; 6143 6144 // Text built-ins (supported in all versions) 6145 BuiltinSymbolMap["@date"] = BI_DATE; 6146 BuiltinSymbolMap["@time"] = BI_TIME; 6147 BuiltinSymbolMap["@filecur"] = BI_FILECUR; 6148 BuiltinSymbolMap["@filename"] = BI_FILENAME; 6149 BuiltinSymbolMap["@curseg"] = BI_CURSEG; 6150 6151 // Function built-ins (supported in all versions) 6152 BuiltinFunctionMap["@catstr"] = BI_CATSTR; 6153 6154 // Some built-ins exist only for MASM32 (32-bit x86) 6155 if (getContext().getSubtargetInfo()->getTargetTriple().getArch() == 6156 Triple::x86) { 6157 // Numeric built-ins 6158 // BuiltinSymbolMap["@cpu"] = BI_CPU; 6159 // BuiltinSymbolMap["@interface"] = BI_INTERFACE; 6160 // BuiltinSymbolMap["@wordsize"] = BI_WORDSIZE; 6161 // BuiltinSymbolMap["@codesize"] = BI_CODESIZE; 6162 // BuiltinSymbolMap["@datasize"] = BI_DATASIZE; 6163 // BuiltinSymbolMap["@model"] = BI_MODEL; 6164 6165 // Text built-ins 6166 // BuiltinSymbolMap["@code"] = BI_CODE; 6167 // BuiltinSymbolMap["@data"] = BI_DATA; 6168 // BuiltinSymbolMap["@fardata?"] = BI_FARDATA; 6169 // BuiltinSymbolMap["@stack"] = BI_STACK; 6170 } 6171 } 6172 6173 const MCExpr *MasmParser::evaluateBuiltinValue(BuiltinSymbol Symbol, 6174 SMLoc StartLoc) { 6175 switch (Symbol) { 6176 default: 6177 return nullptr; 6178 case BI_VERSION: 6179 // Match a recent version of ML.EXE. 6180 return MCConstantExpr::create(1427, getContext()); 6181 case BI_LINE: { 6182 int64_t Line; 6183 if (ActiveMacros.empty()) 6184 Line = SrcMgr.FindLineNumber(StartLoc, CurBuffer); 6185 else 6186 Line = SrcMgr.FindLineNumber(ActiveMacros.front()->InstantiationLoc, 6187 ActiveMacros.front()->ExitBuffer); 6188 return MCConstantExpr::create(Line, getContext()); 6189 } 6190 } 6191 llvm_unreachable("unhandled built-in symbol"); 6192 } 6193 6194 std::optional<std::string> 6195 MasmParser::evaluateBuiltinTextMacro(BuiltinSymbol Symbol, SMLoc StartLoc) { 6196 switch (Symbol) { 6197 default: 6198 return {}; 6199 case BI_DATE: { 6200 // Current local date, formatted MM/DD/YY 6201 char TmpBuffer[sizeof("mm/dd/yy")]; 6202 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%D", &TM); 6203 return std::string(TmpBuffer, Len); 6204 } 6205 case BI_TIME: { 6206 // Current local time, formatted HH:MM:SS (24-hour clock) 6207 char TmpBuffer[sizeof("hh:mm:ss")]; 6208 const size_t Len = strftime(TmpBuffer, sizeof(TmpBuffer), "%T", &TM); 6209 return std::string(TmpBuffer, Len); 6210 } 6211 case BI_FILECUR: 6212 return SrcMgr 6213 .getMemoryBuffer( 6214 ActiveMacros.empty() ? CurBuffer : ActiveMacros.front()->ExitBuffer) 6215 ->getBufferIdentifier() 6216 .str(); 6217 case BI_FILENAME: 6218 return sys::path::stem(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID()) 6219 ->getBufferIdentifier()) 6220 .upper(); 6221 case BI_CURSEG: 6222 return getStreamer().getCurrentSectionOnly()->getName().str(); 6223 } 6224 llvm_unreachable("unhandled built-in symbol"); 6225 } 6226 6227 bool MasmParser::evaluateBuiltinMacroFunction(BuiltinFunction Function, 6228 StringRef Name, 6229 std::string &Res) { 6230 if (parseToken(AsmToken::LParen, "invoking macro function '" + Name + 6231 "' requires arguments in parentheses")) { 6232 return true; 6233 } 6234 6235 MCAsmMacroParameters P; 6236 switch (Function) { 6237 default: 6238 return true; 6239 case BI_CATSTR: 6240 break; 6241 } 6242 MCAsmMacro M(Name, "", P, {}, true); 6243 6244 MCAsmMacroArguments A; 6245 if (parseMacroArguments(&M, A, AsmToken::RParen) || parseRParen()) { 6246 return true; 6247 } 6248 6249 switch (Function) { 6250 default: 6251 llvm_unreachable("unhandled built-in function"); 6252 case BI_CATSTR: { 6253 for (const MCAsmMacroArgument &Arg : A) { 6254 for (const AsmToken &Tok : Arg) { 6255 if (Tok.is(AsmToken::String)) { 6256 Res.append(Tok.getStringContents()); 6257 } else { 6258 Res.append(Tok.getString()); 6259 } 6260 } 6261 } 6262 return false; 6263 } 6264 } 6265 llvm_unreachable("unhandled built-in function"); 6266 return true; 6267 } 6268 6269 /// Create an MCAsmParser instance. 6270 MCAsmParser *llvm::createMCMasmParser(SourceMgr &SM, MCContext &C, 6271 MCStreamer &Out, const MCAsmInfo &MAI, 6272 struct tm TM, unsigned CB) { 6273 return new MasmParser(SM, C, Out, MAI, TM, CB); 6274 } 6275