1 //===---- ParseStmtAsm.cpp - Assembly Statement Parser --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements parsing for GCC and Microsoft inline assembly. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Parse/Parser.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/TargetInfo.h" 17 #include "clang/Parse/RAIIObjectsForParser.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/MC/MCAsmInfo.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCInstPrinter.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCObjectFileInfo.h" 25 #include "llvm/MC/MCParser/MCAsmParser.h" 26 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/MC/MCStreamer.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/MC/MCTargetOptions.h" 31 #include "llvm/Support/SourceMgr.h" 32 #include "llvm/Support/TargetRegistry.h" 33 #include "llvm/Support/TargetSelect.h" 34 using namespace clang; 35 36 namespace { 37 class ClangAsmParserCallback : public llvm::MCAsmParserSemaCallback { 38 Parser &TheParser; 39 SourceLocation AsmLoc; 40 StringRef AsmString; 41 42 /// The tokens we streamed into AsmString and handed off to MC. 43 ArrayRef<Token> AsmToks; 44 45 /// The offset of each token in AsmToks within AsmString. 46 ArrayRef<unsigned> AsmTokOffsets; 47 48 public: 49 ClangAsmParserCallback(Parser &P, SourceLocation Loc, StringRef AsmString, 50 ArrayRef<Token> Toks, ArrayRef<unsigned> Offsets) 51 : TheParser(P), AsmLoc(Loc), AsmString(AsmString), AsmToks(Toks), 52 AsmTokOffsets(Offsets) { 53 assert(AsmToks.size() == AsmTokOffsets.size()); 54 } 55 56 void LookupInlineAsmIdentifier(StringRef &LineBuf, 57 llvm::InlineAsmIdentifierInfo &Info, 58 bool IsUnevaluatedContext) override; 59 60 StringRef LookupInlineAsmLabel(StringRef Identifier, llvm::SourceMgr &LSM, 61 llvm::SMLoc Location, 62 bool Create) override; 63 64 bool LookupInlineAsmField(StringRef Base, StringRef Member, 65 unsigned &Offset) override { 66 return TheParser.getActions().LookupInlineAsmField(Base, Member, Offset, 67 AsmLoc); 68 } 69 70 static void DiagHandlerCallback(const llvm::SMDiagnostic &D, void *Context) { 71 ((ClangAsmParserCallback *)Context)->handleDiagnostic(D); 72 } 73 74 private: 75 /// Collect the appropriate tokens for the given string. 76 void findTokensForString(StringRef Str, SmallVectorImpl<Token> &TempToks, 77 const Token *&FirstOrigToken) const; 78 79 SourceLocation translateLocation(const llvm::SourceMgr &LSM, 80 llvm::SMLoc SMLoc); 81 82 void handleDiagnostic(const llvm::SMDiagnostic &D); 83 }; 84 } 85 86 void ClangAsmParserCallback::LookupInlineAsmIdentifier( 87 StringRef &LineBuf, llvm::InlineAsmIdentifierInfo &Info, 88 bool IsUnevaluatedContext) { 89 // Collect the desired tokens. 90 SmallVector<Token, 16> LineToks; 91 const Token *FirstOrigToken = nullptr; 92 findTokensForString(LineBuf, LineToks, FirstOrigToken); 93 94 unsigned NumConsumedToks; 95 ExprResult Result = TheParser.ParseMSAsmIdentifier(LineToks, NumConsumedToks, 96 IsUnevaluatedContext); 97 98 // If we consumed the entire line, tell MC that. 99 // Also do this if we consumed nothing as a way of reporting failure. 100 if (NumConsumedToks == 0 || NumConsumedToks == LineToks.size()) { 101 // By not modifying LineBuf, we're implicitly consuming it all. 102 103 // Otherwise, consume up to the original tokens. 104 } else { 105 assert(FirstOrigToken && "not using original tokens?"); 106 107 // Since we're using original tokens, apply that offset. 108 assert(FirstOrigToken[NumConsumedToks].getLocation() == 109 LineToks[NumConsumedToks].getLocation()); 110 unsigned FirstIndex = FirstOrigToken - AsmToks.begin(); 111 unsigned LastIndex = FirstIndex + NumConsumedToks - 1; 112 113 // The total length we've consumed is the relative offset 114 // of the last token we consumed plus its length. 115 unsigned TotalOffset = 116 (AsmTokOffsets[LastIndex] + AsmToks[LastIndex].getLength() - 117 AsmTokOffsets[FirstIndex]); 118 LineBuf = LineBuf.substr(0, TotalOffset); 119 } 120 121 // Initialize Info with the lookup result. 122 if (!Result.isUsable()) 123 return; 124 TheParser.getActions().FillInlineAsmIdentifierInfo(Result.get(), Info); 125 } 126 127 StringRef ClangAsmParserCallback::LookupInlineAsmLabel(StringRef Identifier, 128 llvm::SourceMgr &LSM, 129 llvm::SMLoc Location, 130 bool Create) { 131 SourceLocation Loc = translateLocation(LSM, Location); 132 LabelDecl *Label = 133 TheParser.getActions().GetOrCreateMSAsmLabel(Identifier, Loc, Create); 134 return Label->getMSAsmLabel(); 135 } 136 137 void ClangAsmParserCallback::findTokensForString( 138 StringRef Str, SmallVectorImpl<Token> &TempToks, 139 const Token *&FirstOrigToken) const { 140 // For now, assert that the string we're working with is a substring 141 // of what we gave to MC. This lets us use the original tokens. 142 assert(!std::less<const char *>()(Str.begin(), AsmString.begin()) && 143 !std::less<const char *>()(AsmString.end(), Str.end())); 144 145 // Try to find a token whose offset matches the first token. 146 unsigned FirstCharOffset = Str.begin() - AsmString.begin(); 147 const unsigned *FirstTokOffset = 148 llvm::lower_bound(AsmTokOffsets, FirstCharOffset); 149 150 // For now, assert that the start of the string exactly 151 // corresponds to the start of a token. 152 assert(*FirstTokOffset == FirstCharOffset); 153 154 // Use all the original tokens for this line. (We assume the 155 // end of the line corresponds cleanly to a token break.) 156 unsigned FirstTokIndex = FirstTokOffset - AsmTokOffsets.begin(); 157 FirstOrigToken = &AsmToks[FirstTokIndex]; 158 unsigned LastCharOffset = Str.end() - AsmString.begin(); 159 for (unsigned i = FirstTokIndex, e = AsmTokOffsets.size(); i != e; ++i) { 160 if (AsmTokOffsets[i] >= LastCharOffset) 161 break; 162 TempToks.push_back(AsmToks[i]); 163 } 164 } 165 166 SourceLocation 167 ClangAsmParserCallback::translateLocation(const llvm::SourceMgr &LSM, 168 llvm::SMLoc SMLoc) { 169 // Compute an offset into the inline asm buffer. 170 // FIXME: This isn't right if .macro is involved (but hopefully, no 171 // real-world code does that). 172 const llvm::MemoryBuffer *LBuf = 173 LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(SMLoc)); 174 unsigned Offset = SMLoc.getPointer() - LBuf->getBufferStart(); 175 176 // Figure out which token that offset points into. 177 const unsigned *TokOffsetPtr = llvm::lower_bound(AsmTokOffsets, Offset); 178 unsigned TokIndex = TokOffsetPtr - AsmTokOffsets.begin(); 179 unsigned TokOffset = *TokOffsetPtr; 180 181 // If we come up with an answer which seems sane, use it; otherwise, 182 // just point at the __asm keyword. 183 // FIXME: Assert the answer is sane once we handle .macro correctly. 184 SourceLocation Loc = AsmLoc; 185 if (TokIndex < AsmToks.size()) { 186 const Token &Tok = AsmToks[TokIndex]; 187 Loc = Tok.getLocation(); 188 Loc = Loc.getLocWithOffset(Offset - TokOffset); 189 } 190 return Loc; 191 } 192 193 void ClangAsmParserCallback::handleDiagnostic(const llvm::SMDiagnostic &D) { 194 const llvm::SourceMgr &LSM = *D.getSourceMgr(); 195 SourceLocation Loc = translateLocation(LSM, D.getLoc()); 196 TheParser.Diag(Loc, diag::err_inline_ms_asm_parsing) << D.getMessage(); 197 } 198 199 /// Parse an identifier in an MS-style inline assembly block. 200 ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks, 201 unsigned &NumLineToksConsumed, 202 bool IsUnevaluatedContext) { 203 // Push a fake token on the end so that we don't overrun the token 204 // stream. We use ';' because it expression-parsing should never 205 // overrun it. 206 const tok::TokenKind EndOfStream = tok::semi; 207 Token EndOfStreamTok; 208 EndOfStreamTok.startToken(); 209 EndOfStreamTok.setKind(EndOfStream); 210 LineToks.push_back(EndOfStreamTok); 211 212 // Also copy the current token over. 213 LineToks.push_back(Tok); 214 215 PP.EnterTokenStream(LineToks, /*DisableMacroExpansions*/ true, 216 /*IsReinject*/ true); 217 218 // Clear the current token and advance to the first token in LineToks. 219 ConsumeAnyToken(); 220 221 // Parse an optional scope-specifier if we're in C++. 222 CXXScopeSpec SS; 223 if (getLangOpts().CPlusPlus) 224 ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr, 225 /*ObjectHadErrors=*/false, 226 /*EnteringContext=*/false); 227 228 // Require an identifier here. 229 SourceLocation TemplateKWLoc; 230 UnqualifiedId Id; 231 bool Invalid = true; 232 ExprResult Result; 233 if (Tok.is(tok::kw_this)) { 234 Result = ParseCXXThis(); 235 Invalid = false; 236 } else { 237 Invalid = 238 ParseUnqualifiedId(SS, /*ObjectType=*/nullptr, 239 /*ObjectHadErrors=*/false, 240 /*EnteringContext=*/false, 241 /*AllowDestructorName=*/false, 242 /*AllowConstructorName=*/false, 243 /*AllowDeductionGuide=*/false, &TemplateKWLoc, Id); 244 // Perform the lookup. 245 Result = Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id, 246 IsUnevaluatedContext); 247 } 248 // While the next two tokens are 'period' 'identifier', repeatedly parse it as 249 // a field access. We have to avoid consuming assembler directives that look 250 // like '.' 'else'. 251 while (Result.isUsable() && Tok.is(tok::period)) { 252 Token IdTok = PP.LookAhead(0); 253 if (IdTok.isNot(tok::identifier)) 254 break; 255 ConsumeToken(); // Consume the period. 256 IdentifierInfo *Id = Tok.getIdentifierInfo(); 257 ConsumeToken(); // Consume the identifier. 258 Result = Actions.LookupInlineAsmVarDeclField(Result.get(), Id->getName(), 259 Tok.getLocation()); 260 } 261 262 // Figure out how many tokens we are into LineToks. 263 unsigned LineIndex = 0; 264 if (Tok.is(EndOfStream)) { 265 LineIndex = LineToks.size() - 2; 266 } else { 267 while (LineToks[LineIndex].getLocation() != Tok.getLocation()) { 268 LineIndex++; 269 assert(LineIndex < LineToks.size() - 2); // we added two extra tokens 270 } 271 } 272 273 // If we've run into the poison token we inserted before, or there 274 // was a parsing error, then claim the entire line. 275 if (Invalid || Tok.is(EndOfStream)) { 276 NumLineToksConsumed = LineToks.size() - 2; 277 } else { 278 // Otherwise, claim up to the start of the next token. 279 NumLineToksConsumed = LineIndex; 280 } 281 282 // Finally, restore the old parsing state by consuming all the tokens we 283 // staged before, implicitly killing off the token-lexer we pushed. 284 for (unsigned i = 0, e = LineToks.size() - LineIndex - 2; i != e; ++i) { 285 ConsumeAnyToken(); 286 } 287 assert(Tok.is(EndOfStream)); 288 ConsumeToken(); 289 290 // Leave LineToks in its original state. 291 LineToks.pop_back(); 292 LineToks.pop_back(); 293 294 return Result; 295 } 296 297 /// Turn a sequence of our tokens back into a string that we can hand 298 /// to the MC asm parser. 299 static bool buildMSAsmString(Preprocessor &PP, SourceLocation AsmLoc, 300 ArrayRef<Token> AsmToks, 301 SmallVectorImpl<unsigned> &TokOffsets, 302 SmallString<512> &Asm) { 303 assert(!AsmToks.empty() && "Didn't expect an empty AsmToks!"); 304 305 // Is this the start of a new assembly statement? 306 bool isNewStatement = true; 307 308 for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) { 309 const Token &Tok = AsmToks[i]; 310 311 // Start each new statement with a newline and a tab. 312 if (!isNewStatement && (Tok.is(tok::kw_asm) || Tok.isAtStartOfLine())) { 313 Asm += "\n\t"; 314 isNewStatement = true; 315 } 316 317 // Preserve the existence of leading whitespace except at the 318 // start of a statement. 319 if (!isNewStatement && Tok.hasLeadingSpace()) 320 Asm += ' '; 321 322 // Remember the offset of this token. 323 TokOffsets.push_back(Asm.size()); 324 325 // Don't actually write '__asm' into the assembly stream. 326 if (Tok.is(tok::kw_asm)) { 327 // Complain about __asm at the end of the stream. 328 if (i + 1 == e) { 329 PP.Diag(AsmLoc, diag::err_asm_empty); 330 return true; 331 } 332 333 continue; 334 } 335 336 // Append the spelling of the token. 337 SmallString<32> SpellingBuffer; 338 bool SpellingInvalid = false; 339 Asm += PP.getSpelling(Tok, SpellingBuffer, &SpellingInvalid); 340 assert(!SpellingInvalid && "spelling was invalid after correct parse?"); 341 342 // We are no longer at the start of a statement. 343 isNewStatement = false; 344 } 345 346 // Ensure that the buffer is null-terminated. 347 Asm.push_back('\0'); 348 Asm.pop_back(); 349 350 assert(TokOffsets.size() == AsmToks.size()); 351 return false; 352 } 353 354 // Determine if this is a GCC-style asm statement. 355 bool Parser::isGCCAsmStatement(const Token &TokAfterAsm) const { 356 return TokAfterAsm.is(tok::l_paren) || isGNUAsmQualifier(TokAfterAsm); 357 } 358 359 bool Parser::isGNUAsmQualifier(const Token &TokAfterAsm) const { 360 return getGNUAsmQualifier(TokAfterAsm) != GNUAsmQualifiers::AQ_unspecified; 361 } 362 363 /// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled, 364 /// this routine is called to collect the tokens for an MS asm statement. 365 /// 366 /// [MS] ms-asm-statement: 367 /// ms-asm-block 368 /// ms-asm-block ms-asm-statement 369 /// 370 /// [MS] ms-asm-block: 371 /// '__asm' ms-asm-line '\n' 372 /// '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt] 373 /// 374 /// [MS] ms-asm-instruction-block 375 /// ms-asm-line 376 /// ms-asm-line '\n' ms-asm-instruction-block 377 /// 378 StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { 379 SourceManager &SrcMgr = PP.getSourceManager(); 380 SourceLocation EndLoc = AsmLoc; 381 SmallVector<Token, 4> AsmToks; 382 383 bool SingleLineMode = true; 384 unsigned BraceNesting = 0; 385 unsigned short savedBraceCount = BraceCount; 386 bool InAsmComment = false; 387 FileID FID; 388 unsigned LineNo = 0; 389 unsigned NumTokensRead = 0; 390 SmallVector<SourceLocation, 4> LBraceLocs; 391 bool SkippedStartOfLine = false; 392 393 if (Tok.is(tok::l_brace)) { 394 // Braced inline asm: consume the opening brace. 395 SingleLineMode = false; 396 BraceNesting = 1; 397 EndLoc = ConsumeBrace(); 398 LBraceLocs.push_back(EndLoc); 399 ++NumTokensRead; 400 } else { 401 // Single-line inline asm; compute which line it is on. 402 std::pair<FileID, unsigned> ExpAsmLoc = 403 SrcMgr.getDecomposedExpansionLoc(EndLoc); 404 FID = ExpAsmLoc.first; 405 LineNo = SrcMgr.getLineNumber(FID, ExpAsmLoc.second); 406 LBraceLocs.push_back(SourceLocation()); 407 } 408 409 SourceLocation TokLoc = Tok.getLocation(); 410 do { 411 // If we hit EOF, we're done, period. 412 if (isEofOrEom()) 413 break; 414 415 if (!InAsmComment && Tok.is(tok::l_brace)) { 416 // Consume the opening brace. 417 SkippedStartOfLine = Tok.isAtStartOfLine(); 418 AsmToks.push_back(Tok); 419 EndLoc = ConsumeBrace(); 420 BraceNesting++; 421 LBraceLocs.push_back(EndLoc); 422 TokLoc = Tok.getLocation(); 423 ++NumTokensRead; 424 continue; 425 } else if (!InAsmComment && Tok.is(tok::semi)) { 426 // A semicolon in an asm is the start of a comment. 427 InAsmComment = true; 428 if (!SingleLineMode) { 429 // Compute which line the comment is on. 430 std::pair<FileID, unsigned> ExpSemiLoc = 431 SrcMgr.getDecomposedExpansionLoc(TokLoc); 432 FID = ExpSemiLoc.first; 433 LineNo = SrcMgr.getLineNumber(FID, ExpSemiLoc.second); 434 } 435 } else if (SingleLineMode || InAsmComment) { 436 // If end-of-line is significant, check whether this token is on a 437 // new line. 438 std::pair<FileID, unsigned> ExpLoc = 439 SrcMgr.getDecomposedExpansionLoc(TokLoc); 440 if (ExpLoc.first != FID || 441 SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) { 442 // If this is a single-line __asm, we're done, except if the next 443 // line is MS-style asm too, in which case we finish a comment 444 // if needed and then keep processing the next line as a single 445 // line __asm. 446 bool isAsm = Tok.is(tok::kw_asm); 447 if (SingleLineMode && (!isAsm || isGCCAsmStatement(NextToken()))) 448 break; 449 // We're no longer in a comment. 450 InAsmComment = false; 451 if (isAsm) { 452 // If this is a new __asm {} block we want to process it separately 453 // from the single-line __asm statements 454 if (PP.LookAhead(0).is(tok::l_brace)) 455 break; 456 LineNo = SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second); 457 SkippedStartOfLine = Tok.isAtStartOfLine(); 458 } else if (Tok.is(tok::semi)) { 459 // A multi-line asm-statement, where next line is a comment 460 InAsmComment = true; 461 FID = ExpLoc.first; 462 LineNo = SrcMgr.getLineNumber(FID, ExpLoc.second); 463 } 464 } else if (!InAsmComment && Tok.is(tok::r_brace)) { 465 // In MSVC mode, braces only participate in brace matching and 466 // separating the asm statements. This is an intentional 467 // departure from the Apple gcc behavior. 468 if (!BraceNesting) 469 break; 470 } 471 } 472 if (!InAsmComment && BraceNesting && Tok.is(tok::r_brace) && 473 BraceCount == (savedBraceCount + BraceNesting)) { 474 // Consume the closing brace. 475 SkippedStartOfLine = Tok.isAtStartOfLine(); 476 // Don't want to add the closing brace of the whole asm block 477 if (SingleLineMode || BraceNesting > 1) { 478 Tok.clearFlag(Token::LeadingSpace); 479 AsmToks.push_back(Tok); 480 } 481 EndLoc = ConsumeBrace(); 482 BraceNesting--; 483 // Finish if all of the opened braces in the inline asm section were 484 // consumed. 485 if (BraceNesting == 0 && !SingleLineMode) 486 break; 487 else { 488 LBraceLocs.pop_back(); 489 TokLoc = Tok.getLocation(); 490 ++NumTokensRead; 491 continue; 492 } 493 } 494 495 // Consume the next token; make sure we don't modify the brace count etc. 496 // if we are in a comment. 497 EndLoc = TokLoc; 498 if (InAsmComment) 499 PP.Lex(Tok); 500 else { 501 // Set the token as the start of line if we skipped the original start 502 // of line token in case it was a nested brace. 503 if (SkippedStartOfLine) 504 Tok.setFlag(Token::StartOfLine); 505 AsmToks.push_back(Tok); 506 ConsumeAnyToken(); 507 } 508 TokLoc = Tok.getLocation(); 509 ++NumTokensRead; 510 SkippedStartOfLine = false; 511 } while (1); 512 513 if (BraceNesting && BraceCount != savedBraceCount) { 514 // __asm without closing brace (this can happen at EOF). 515 for (unsigned i = 0; i < BraceNesting; ++i) { 516 Diag(Tok, diag::err_expected) << tok::r_brace; 517 Diag(LBraceLocs.back(), diag::note_matching) << tok::l_brace; 518 LBraceLocs.pop_back(); 519 } 520 return StmtError(); 521 } else if (NumTokensRead == 0) { 522 // Empty __asm. 523 Diag(Tok, diag::err_expected) << tok::l_brace; 524 return StmtError(); 525 } 526 527 // Okay, prepare to use MC to parse the assembly. 528 SmallVector<StringRef, 4> ConstraintRefs; 529 SmallVector<Expr *, 4> Exprs; 530 SmallVector<StringRef, 4> ClobberRefs; 531 532 // We need an actual supported target. 533 const llvm::Triple &TheTriple = Actions.Context.getTargetInfo().getTriple(); 534 const std::string &TT = TheTriple.getTriple(); 535 const llvm::Target *TheTarget = nullptr; 536 if (!TheTriple.isX86()) { 537 Diag(AsmLoc, diag::err_msasm_unsupported_arch) << TheTriple.getArchName(); 538 } else { 539 std::string Error; 540 TheTarget = llvm::TargetRegistry::lookupTarget(TT, Error); 541 if (!TheTarget) 542 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) << Error; 543 } 544 545 assert(!LBraceLocs.empty() && "Should have at least one location here"); 546 547 SmallString<512> AsmString; 548 auto EmptyStmt = [&] { 549 return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, AsmString, 550 /*NumOutputs*/ 0, /*NumInputs*/ 0, 551 ConstraintRefs, ClobberRefs, Exprs, EndLoc); 552 }; 553 // If we don't support assembly, or the assembly is empty, we don't 554 // need to instantiate the AsmParser, etc. 555 if (!TheTarget || AsmToks.empty()) { 556 return EmptyStmt(); 557 } 558 559 // Expand the tokens into a string buffer. 560 SmallVector<unsigned, 8> TokOffsets; 561 if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, AsmString)) 562 return StmtError(); 563 564 const TargetOptions &TO = Actions.Context.getTargetInfo().getTargetOpts(); 565 std::string FeaturesStr = 566 llvm::join(TO.Features.begin(), TO.Features.end(), ","); 567 568 std::unique_ptr<llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT)); 569 if (!MRI) { 570 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) 571 << "target MC unavailable"; 572 return EmptyStmt(); 573 } 574 // FIXME: init MCOptions from sanitizer flags here. 575 llvm::MCTargetOptions MCOptions; 576 std::unique_ptr<llvm::MCAsmInfo> MAI( 577 TheTarget->createMCAsmInfo(*MRI, TT, MCOptions)); 578 // Get the instruction descriptor. 579 std::unique_ptr<llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 580 std::unique_ptr<llvm::MCSubtargetInfo> STI( 581 TheTarget->createMCSubtargetInfo(TT, TO.CPU, FeaturesStr)); 582 // Target MCTargetDesc may not be linked in clang-based tools. 583 584 if (!MAI || !MII || !STI) { 585 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) 586 << "target MC unavailable"; 587 return EmptyStmt(); 588 } 589 590 llvm::SourceMgr TempSrcMgr; 591 llvm::MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &TempSrcMgr); 592 std::unique_ptr<llvm::MCObjectFileInfo> MOFI( 593 TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); 594 Ctx.setObjectFileInfo(MOFI.get()); 595 596 std::unique_ptr<llvm::MemoryBuffer> Buffer = 597 llvm::MemoryBuffer::getMemBuffer(AsmString, "<MS inline asm>"); 598 599 // Tell SrcMgr about this buffer, which is what the parser will pick up. 600 TempSrcMgr.AddNewSourceBuffer(std::move(Buffer), llvm::SMLoc()); 601 602 std::unique_ptr<llvm::MCStreamer> Str(createNullStreamer(Ctx)); 603 std::unique_ptr<llvm::MCAsmParser> Parser( 604 createMCAsmParser(TempSrcMgr, Ctx, *Str.get(), *MAI)); 605 606 std::unique_ptr<llvm::MCTargetAsmParser> TargetParser( 607 TheTarget->createMCAsmParser(*STI, *Parser, *MII, MCOptions)); 608 // Target AsmParser may not be linked in clang-based tools. 609 if (!TargetParser) { 610 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) 611 << "target ASM parser unavailable"; 612 return EmptyStmt(); 613 } 614 615 std::unique_ptr<llvm::MCInstPrinter> IP( 616 TheTarget->createMCInstPrinter(llvm::Triple(TT), 1, *MAI, *MII, *MRI)); 617 618 // Change to the Intel dialect. 619 Parser->setAssemblerDialect(1); 620 Parser->setTargetParser(*TargetParser.get()); 621 Parser->setParsingMSInlineAsm(true); 622 TargetParser->setParsingMSInlineAsm(true); 623 624 ClangAsmParserCallback Callback(*this, AsmLoc, AsmString, AsmToks, 625 TokOffsets); 626 TargetParser->setSemaCallback(&Callback); 627 TempSrcMgr.setDiagHandler(ClangAsmParserCallback::DiagHandlerCallback, 628 &Callback); 629 630 unsigned NumOutputs; 631 unsigned NumInputs; 632 std::string AsmStringIR; 633 SmallVector<std::pair<void *, bool>, 4> OpExprs; 634 SmallVector<std::string, 4> Constraints; 635 SmallVector<std::string, 4> Clobbers; 636 if (Parser->parseMSInlineAsm(AsmStringIR, NumOutputs, NumInputs, OpExprs, 637 Constraints, Clobbers, MII.get(), IP.get(), 638 Callback)) 639 return StmtError(); 640 641 // Filter out "fpsw" and "mxcsr". They aren't valid GCC asm clobber 642 // constraints. Clang always adds fpsr to the clobber list anyway. 643 llvm::erase_if(Clobbers, [](const std::string &C) { 644 return C == "fpsr" || C == "mxcsr"; 645 }); 646 647 // Build the vector of clobber StringRefs. 648 ClobberRefs.insert(ClobberRefs.end(), Clobbers.begin(), Clobbers.end()); 649 650 // Recast the void pointers and build the vector of constraint StringRefs. 651 unsigned NumExprs = NumOutputs + NumInputs; 652 ConstraintRefs.resize(NumExprs); 653 Exprs.resize(NumExprs); 654 for (unsigned i = 0, e = NumExprs; i != e; ++i) { 655 Expr *OpExpr = static_cast<Expr *>(OpExprs[i].first); 656 if (!OpExpr) 657 return StmtError(); 658 659 // Need address of variable. 660 if (OpExprs[i].second) 661 OpExpr = 662 Actions.BuildUnaryOp(getCurScope(), AsmLoc, UO_AddrOf, OpExpr).get(); 663 664 ConstraintRefs[i] = StringRef(Constraints[i]); 665 Exprs[i] = OpExpr; 666 } 667 668 // FIXME: We should be passing source locations for better diagnostics. 669 return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, AsmStringIR, 670 NumOutputs, NumInputs, ConstraintRefs, 671 ClobberRefs, Exprs, EndLoc); 672 } 673 674 /// parseGNUAsmQualifierListOpt - Parse a GNU extended asm qualifier list. 675 /// asm-qualifier: 676 /// volatile 677 /// inline 678 /// goto 679 /// 680 /// asm-qualifier-list: 681 /// asm-qualifier 682 /// asm-qualifier-list asm-qualifier 683 bool Parser::parseGNUAsmQualifierListOpt(GNUAsmQualifiers &AQ) { 684 while (1) { 685 const GNUAsmQualifiers::AQ A = getGNUAsmQualifier(Tok); 686 if (A == GNUAsmQualifiers::AQ_unspecified) { 687 if (Tok.isNot(tok::l_paren)) { 688 Diag(Tok.getLocation(), diag::err_asm_qualifier_ignored); 689 SkipUntil(tok::r_paren, StopAtSemi); 690 return true; 691 } 692 return false; 693 } 694 if (AQ.setAsmQualifier(A)) 695 Diag(Tok.getLocation(), diag::err_asm_duplicate_qual) 696 << GNUAsmQualifiers::getQualifierName(A); 697 ConsumeToken(); 698 } 699 return false; 700 } 701 702 /// ParseAsmStatement - Parse a GNU extended asm statement. 703 /// asm-statement: 704 /// gnu-asm-statement 705 /// ms-asm-statement 706 /// 707 /// [GNU] gnu-asm-statement: 708 /// 'asm' asm-qualifier-list[opt] '(' asm-argument ')' ';' 709 /// 710 /// [GNU] asm-argument: 711 /// asm-string-literal 712 /// asm-string-literal ':' asm-operands[opt] 713 /// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] 714 /// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] 715 /// ':' asm-clobbers 716 /// 717 /// [GNU] asm-clobbers: 718 /// asm-string-literal 719 /// asm-clobbers ',' asm-string-literal 720 /// 721 StmtResult Parser::ParseAsmStatement(bool &msAsm) { 722 assert(Tok.is(tok::kw_asm) && "Not an asm stmt"); 723 SourceLocation AsmLoc = ConsumeToken(); 724 725 if (getLangOpts().AsmBlocks && !isGCCAsmStatement(Tok)) { 726 msAsm = true; 727 return ParseMicrosoftAsmStatement(AsmLoc); 728 } 729 730 SourceLocation Loc = Tok.getLocation(); 731 GNUAsmQualifiers GAQ; 732 if (parseGNUAsmQualifierListOpt(GAQ)) 733 return StmtError(); 734 735 if (GAQ.isGoto() && getLangOpts().SpeculativeLoadHardening) 736 Diag(Loc, diag::warn_slh_does_not_support_asm_goto); 737 738 BalancedDelimiterTracker T(*this, tok::l_paren); 739 T.consumeOpen(); 740 741 ExprResult AsmString(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); 742 743 // Check if GNU-style InlineAsm is disabled. 744 // Error on anything other than empty string. 745 if (!(getLangOpts().GNUAsm || AsmString.isInvalid())) { 746 const auto *SL = cast<StringLiteral>(AsmString.get()); 747 if (!SL->getString().trim().empty()) 748 Diag(Loc, diag::err_gnu_inline_asm_disabled); 749 } 750 751 if (AsmString.isInvalid()) { 752 // Consume up to and including the closing paren. 753 T.skipToEnd(); 754 return StmtError(); 755 } 756 757 SmallVector<IdentifierInfo *, 4> Names; 758 ExprVector Constraints; 759 ExprVector Exprs; 760 ExprVector Clobbers; 761 762 if (Tok.is(tok::r_paren)) { 763 // We have a simple asm expression like 'asm("foo")'. 764 T.consumeClose(); 765 return Actions.ActOnGCCAsmStmt( 766 AsmLoc, /*isSimple*/ true, GAQ.isVolatile(), 767 /*NumOutputs*/ 0, /*NumInputs*/ 0, nullptr, Constraints, Exprs, 768 AsmString.get(), Clobbers, /*NumLabels*/ 0, T.getCloseLocation()); 769 } 770 771 // Parse Outputs, if present. 772 bool AteExtraColon = false; 773 if (Tok.is(tok::colon) || Tok.is(tok::coloncolon)) { 774 // In C++ mode, parse "::" like ": :". 775 AteExtraColon = Tok.is(tok::coloncolon); 776 ConsumeToken(); 777 778 if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs)) 779 return StmtError(); 780 } 781 782 unsigned NumOutputs = Names.size(); 783 784 // Parse Inputs, if present. 785 if (AteExtraColon || Tok.is(tok::colon) || Tok.is(tok::coloncolon)) { 786 // In C++ mode, parse "::" like ": :". 787 if (AteExtraColon) 788 AteExtraColon = false; 789 else { 790 AteExtraColon = Tok.is(tok::coloncolon); 791 ConsumeToken(); 792 } 793 794 if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs)) 795 return StmtError(); 796 } 797 798 assert(Names.size() == Constraints.size() && 799 Constraints.size() == Exprs.size() && "Input operand size mismatch!"); 800 801 unsigned NumInputs = Names.size() - NumOutputs; 802 803 // Parse the clobbers, if present. 804 if (AteExtraColon || Tok.is(tok::colon) || Tok.is(tok::coloncolon)) { 805 if (AteExtraColon) 806 AteExtraColon = false; 807 else { 808 AteExtraColon = Tok.is(tok::coloncolon); 809 ConsumeToken(); 810 } 811 // Parse the asm-string list for clobbers if present. 812 if (!AteExtraColon && isTokenStringLiteral()) { 813 while (1) { 814 ExprResult Clobber(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); 815 816 if (Clobber.isInvalid()) 817 break; 818 819 Clobbers.push_back(Clobber.get()); 820 821 if (!TryConsumeToken(tok::comma)) 822 break; 823 } 824 } 825 } 826 if (!GAQ.isGoto() && (Tok.isNot(tok::r_paren) || AteExtraColon)) { 827 Diag(Tok, diag::err_expected) << tok::r_paren; 828 SkipUntil(tok::r_paren, StopAtSemi); 829 return StmtError(); 830 } 831 832 // Parse the goto label, if present. 833 unsigned NumLabels = 0; 834 if (AteExtraColon || Tok.is(tok::colon)) { 835 if (!AteExtraColon) 836 ConsumeToken(); 837 838 while (true) { 839 if (Tok.isNot(tok::identifier)) { 840 Diag(Tok, diag::err_expected) << tok::identifier; 841 SkipUntil(tok::r_paren, StopAtSemi); 842 return StmtError(); 843 } 844 LabelDecl *LD = Actions.LookupOrCreateLabel(Tok.getIdentifierInfo(), 845 Tok.getLocation()); 846 Names.push_back(Tok.getIdentifierInfo()); 847 if (!LD) { 848 SkipUntil(tok::r_paren, StopAtSemi); 849 return StmtError(); 850 } 851 ExprResult Res = 852 Actions.ActOnAddrLabel(Tok.getLocation(), Tok.getLocation(), LD); 853 Exprs.push_back(Res.get()); 854 NumLabels++; 855 ConsumeToken(); 856 if (!TryConsumeToken(tok::comma)) 857 break; 858 } 859 } else if (GAQ.isGoto()) { 860 Diag(Tok, diag::err_expected) << tok::colon; 861 SkipUntil(tok::r_paren, StopAtSemi); 862 return StmtError(); 863 } 864 T.consumeClose(); 865 return Actions.ActOnGCCAsmStmt(AsmLoc, false, GAQ.isVolatile(), NumOutputs, 866 NumInputs, Names.data(), Constraints, Exprs, 867 AsmString.get(), Clobbers, NumLabels, 868 T.getCloseLocation()); 869 } 870 871 /// ParseAsmOperands - Parse the asm-operands production as used by 872 /// asm-statement, assuming the leading ':' token was eaten. 873 /// 874 /// [GNU] asm-operands: 875 /// asm-operand 876 /// asm-operands ',' asm-operand 877 /// 878 /// [GNU] asm-operand: 879 /// asm-string-literal '(' expression ')' 880 /// '[' identifier ']' asm-string-literal '(' expression ')' 881 /// 882 // 883 // FIXME: Avoid unnecessary std::string trashing. 884 bool Parser::ParseAsmOperandsOpt(SmallVectorImpl<IdentifierInfo *> &Names, 885 SmallVectorImpl<Expr *> &Constraints, 886 SmallVectorImpl<Expr *> &Exprs) { 887 // 'asm-operands' isn't present? 888 if (!isTokenStringLiteral() && Tok.isNot(tok::l_square)) 889 return false; 890 891 while (1) { 892 // Read the [id] if present. 893 if (Tok.is(tok::l_square)) { 894 BalancedDelimiterTracker T(*this, tok::l_square); 895 T.consumeOpen(); 896 897 if (Tok.isNot(tok::identifier)) { 898 Diag(Tok, diag::err_expected) << tok::identifier; 899 SkipUntil(tok::r_paren, StopAtSemi); 900 return true; 901 } 902 903 IdentifierInfo *II = Tok.getIdentifierInfo(); 904 ConsumeToken(); 905 906 Names.push_back(II); 907 T.consumeClose(); 908 } else 909 Names.push_back(nullptr); 910 911 ExprResult Constraint(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); 912 if (Constraint.isInvalid()) { 913 SkipUntil(tok::r_paren, StopAtSemi); 914 return true; 915 } 916 Constraints.push_back(Constraint.get()); 917 918 if (Tok.isNot(tok::l_paren)) { 919 Diag(Tok, diag::err_expected_lparen_after) << "asm operand"; 920 SkipUntil(tok::r_paren, StopAtSemi); 921 return true; 922 } 923 924 // Read the parenthesized expression. 925 BalancedDelimiterTracker T(*this, tok::l_paren); 926 T.consumeOpen(); 927 ExprResult Res = Actions.CorrectDelayedTyposInExpr(ParseExpression()); 928 T.consumeClose(); 929 if (Res.isInvalid()) { 930 SkipUntil(tok::r_paren, StopAtSemi); 931 return true; 932 } 933 Exprs.push_back(Res.get()); 934 // Eat the comma and continue parsing if it exists. 935 if (!TryConsumeToken(tok::comma)) 936 return false; 937 } 938 } 939 940 const char *Parser::GNUAsmQualifiers::getQualifierName(AQ Qualifier) { 941 switch (Qualifier) { 942 case AQ_volatile: return "volatile"; 943 case AQ_inline: return "inline"; 944 case AQ_goto: return "goto"; 945 case AQ_unspecified: return "unspecified"; 946 } 947 llvm_unreachable("Unknown GNUAsmQualifier"); 948 } 949 950 Parser::GNUAsmQualifiers::AQ 951 Parser::getGNUAsmQualifier(const Token &Tok) const { 952 switch (Tok.getKind()) { 953 case tok::kw_volatile: return GNUAsmQualifiers::AQ_volatile; 954 case tok::kw_inline: return GNUAsmQualifiers::AQ_inline; 955 case tok::kw_goto: return GNUAsmQualifiers::AQ_goto; 956 default: return GNUAsmQualifiers::AQ_unspecified; 957 } 958 } 959 bool Parser::GNUAsmQualifiers::setAsmQualifier(AQ Qualifier) { 960 bool IsDuplicate = Qualifiers & Qualifier; 961 Qualifiers |= Qualifier; 962 return IsDuplicate; 963 } 964