1 //===---- ParseStmtAsm.cpp - Assembly Statement Parser --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements parsing for GCC and Microsoft inline assembly. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/Parse/Parser.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/Basic/Diagnostic.h" 16 #include "clang/Basic/TargetInfo.h" 17 #include "clang/Parse/RAIIObjectsForParser.h" 18 #include "llvm/ADT/SmallString.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/MC/MCAsmInfo.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCInstPrinter.h" 23 #include "llvm/MC/MCInstrInfo.h" 24 #include "llvm/MC/MCObjectFileInfo.h" 25 #include "llvm/MC/MCParser/MCAsmParser.h" 26 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/MC/MCStreamer.h" 29 #include "llvm/MC/MCSubtargetInfo.h" 30 #include "llvm/MC/MCTargetOptions.h" 31 #include "llvm/Support/SourceMgr.h" 32 #include "llvm/Support/TargetRegistry.h" 33 #include "llvm/Support/TargetSelect.h" 34 using namespace clang; 35 36 namespace { 37 class ClangAsmParserCallback : public llvm::MCAsmParserSemaCallback { 38 Parser &TheParser; 39 SourceLocation AsmLoc; 40 StringRef AsmString; 41 42 /// The tokens we streamed into AsmString and handed off to MC. 43 ArrayRef<Token> AsmToks; 44 45 /// The offset of each token in AsmToks within AsmString. 46 ArrayRef<unsigned> AsmTokOffsets; 47 48 public: 49 ClangAsmParserCallback(Parser &P, SourceLocation Loc, StringRef AsmString, 50 ArrayRef<Token> Toks, ArrayRef<unsigned> Offsets) 51 : TheParser(P), AsmLoc(Loc), AsmString(AsmString), AsmToks(Toks), 52 AsmTokOffsets(Offsets) { 53 assert(AsmToks.size() == AsmTokOffsets.size()); 54 } 55 56 void LookupInlineAsmIdentifier(StringRef &LineBuf, 57 llvm::InlineAsmIdentifierInfo &Info, 58 bool IsUnevaluatedContext) override; 59 60 StringRef LookupInlineAsmLabel(StringRef Identifier, llvm::SourceMgr &LSM, 61 llvm::SMLoc Location, 62 bool Create) override; 63 64 bool LookupInlineAsmField(StringRef Base, StringRef Member, 65 unsigned &Offset) override { 66 return TheParser.getActions().LookupInlineAsmField(Base, Member, Offset, 67 AsmLoc); 68 } 69 70 static void DiagHandlerCallback(const llvm::SMDiagnostic &D, void *Context) { 71 ((ClangAsmParserCallback *)Context)->handleDiagnostic(D); 72 } 73 74 private: 75 /// Collect the appropriate tokens for the given string. 76 void findTokensForString(StringRef Str, SmallVectorImpl<Token> &TempToks, 77 const Token *&FirstOrigToken) const; 78 79 SourceLocation translateLocation(const llvm::SourceMgr &LSM, 80 llvm::SMLoc SMLoc); 81 82 void handleDiagnostic(const llvm::SMDiagnostic &D); 83 }; 84 } 85 86 void ClangAsmParserCallback::LookupInlineAsmIdentifier( 87 StringRef &LineBuf, llvm::InlineAsmIdentifierInfo &Info, 88 bool IsUnevaluatedContext) { 89 // Collect the desired tokens. 90 SmallVector<Token, 16> LineToks; 91 const Token *FirstOrigToken = nullptr; 92 findTokensForString(LineBuf, LineToks, FirstOrigToken); 93 94 unsigned NumConsumedToks; 95 ExprResult Result = TheParser.ParseMSAsmIdentifier(LineToks, NumConsumedToks, 96 IsUnevaluatedContext); 97 98 // If we consumed the entire line, tell MC that. 99 // Also do this if we consumed nothing as a way of reporting failure. 100 if (NumConsumedToks == 0 || NumConsumedToks == LineToks.size()) { 101 // By not modifying LineBuf, we're implicitly consuming it all. 102 103 // Otherwise, consume up to the original tokens. 104 } else { 105 assert(FirstOrigToken && "not using original tokens?"); 106 107 // Since we're using original tokens, apply that offset. 108 assert(FirstOrigToken[NumConsumedToks].getLocation() == 109 LineToks[NumConsumedToks].getLocation()); 110 unsigned FirstIndex = FirstOrigToken - AsmToks.begin(); 111 unsigned LastIndex = FirstIndex + NumConsumedToks - 1; 112 113 // The total length we've consumed is the relative offset 114 // of the last token we consumed plus its length. 115 unsigned TotalOffset = 116 (AsmTokOffsets[LastIndex] + AsmToks[LastIndex].getLength() - 117 AsmTokOffsets[FirstIndex]); 118 LineBuf = LineBuf.substr(0, TotalOffset); 119 } 120 121 // Initialize Info with the lookup result. 122 if (!Result.isUsable()) 123 return; 124 TheParser.getActions().FillInlineAsmIdentifierInfo(Result.get(), Info); 125 } 126 127 StringRef ClangAsmParserCallback::LookupInlineAsmLabel(StringRef Identifier, 128 llvm::SourceMgr &LSM, 129 llvm::SMLoc Location, 130 bool Create) { 131 SourceLocation Loc = translateLocation(LSM, Location); 132 LabelDecl *Label = 133 TheParser.getActions().GetOrCreateMSAsmLabel(Identifier, Loc, Create); 134 return Label->getMSAsmLabel(); 135 } 136 137 void ClangAsmParserCallback::findTokensForString( 138 StringRef Str, SmallVectorImpl<Token> &TempToks, 139 const Token *&FirstOrigToken) const { 140 // For now, assert that the string we're working with is a substring 141 // of what we gave to MC. This lets us use the original tokens. 142 assert(!std::less<const char *>()(Str.begin(), AsmString.begin()) && 143 !std::less<const char *>()(AsmString.end(), Str.end())); 144 145 // Try to find a token whose offset matches the first token. 146 unsigned FirstCharOffset = Str.begin() - AsmString.begin(); 147 const unsigned *FirstTokOffset = 148 llvm::lower_bound(AsmTokOffsets, FirstCharOffset); 149 150 // For now, assert that the start of the string exactly 151 // corresponds to the start of a token. 152 assert(*FirstTokOffset == FirstCharOffset); 153 154 // Use all the original tokens for this line. (We assume the 155 // end of the line corresponds cleanly to a token break.) 156 unsigned FirstTokIndex = FirstTokOffset - AsmTokOffsets.begin(); 157 FirstOrigToken = &AsmToks[FirstTokIndex]; 158 unsigned LastCharOffset = Str.end() - AsmString.begin(); 159 for (unsigned i = FirstTokIndex, e = AsmTokOffsets.size(); i != e; ++i) { 160 if (AsmTokOffsets[i] >= LastCharOffset) 161 break; 162 TempToks.push_back(AsmToks[i]); 163 } 164 } 165 166 SourceLocation 167 ClangAsmParserCallback::translateLocation(const llvm::SourceMgr &LSM, 168 llvm::SMLoc SMLoc) { 169 // Compute an offset into the inline asm buffer. 170 // FIXME: This isn't right if .macro is involved (but hopefully, no 171 // real-world code does that). 172 const llvm::MemoryBuffer *LBuf = 173 LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(SMLoc)); 174 unsigned Offset = SMLoc.getPointer() - LBuf->getBufferStart(); 175 176 // Figure out which token that offset points into. 177 const unsigned *TokOffsetPtr = llvm::lower_bound(AsmTokOffsets, Offset); 178 unsigned TokIndex = TokOffsetPtr - AsmTokOffsets.begin(); 179 unsigned TokOffset = *TokOffsetPtr; 180 181 // If we come up with an answer which seems sane, use it; otherwise, 182 // just point at the __asm keyword. 183 // FIXME: Assert the answer is sane once we handle .macro correctly. 184 SourceLocation Loc = AsmLoc; 185 if (TokIndex < AsmToks.size()) { 186 const Token &Tok = AsmToks[TokIndex]; 187 Loc = Tok.getLocation(); 188 Loc = Loc.getLocWithOffset(Offset - TokOffset); 189 } 190 return Loc; 191 } 192 193 void ClangAsmParserCallback::handleDiagnostic(const llvm::SMDiagnostic &D) { 194 const llvm::SourceMgr &LSM = *D.getSourceMgr(); 195 SourceLocation Loc = translateLocation(LSM, D.getLoc()); 196 TheParser.Diag(Loc, diag::err_inline_ms_asm_parsing) << D.getMessage(); 197 } 198 199 /// Parse an identifier in an MS-style inline assembly block. 200 ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks, 201 unsigned &NumLineToksConsumed, 202 bool IsUnevaluatedContext) { 203 // Push a fake token on the end so that we don't overrun the token 204 // stream. We use ';' because it expression-parsing should never 205 // overrun it. 206 const tok::TokenKind EndOfStream = tok::semi; 207 Token EndOfStreamTok; 208 EndOfStreamTok.startToken(); 209 EndOfStreamTok.setKind(EndOfStream); 210 LineToks.push_back(EndOfStreamTok); 211 212 // Also copy the current token over. 213 LineToks.push_back(Tok); 214 215 PP.EnterTokenStream(LineToks, /*DisableMacroExpansions*/ true, 216 /*IsReinject*/ true); 217 218 // Clear the current token and advance to the first token in LineToks. 219 ConsumeAnyToken(); 220 221 // Parse an optional scope-specifier if we're in C++. 222 CXXScopeSpec SS; 223 if (getLangOpts().CPlusPlus) 224 ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr, 225 /*ObjectHadErrors=*/false, 226 /*EnteringContext=*/false); 227 228 // Require an identifier here. 229 SourceLocation TemplateKWLoc; 230 UnqualifiedId Id; 231 bool Invalid = true; 232 ExprResult Result; 233 if (Tok.is(tok::kw_this)) { 234 Result = ParseCXXThis(); 235 Invalid = false; 236 } else { 237 Invalid = 238 ParseUnqualifiedId(SS, /*ObjectType=*/nullptr, 239 /*ObjectHadErrors=*/false, 240 /*EnteringContext=*/false, 241 /*AllowDestructorName=*/false, 242 /*AllowConstructorName=*/false, 243 /*AllowDeductionGuide=*/false, &TemplateKWLoc, Id); 244 // Perform the lookup. 245 Result = Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id, 246 IsUnevaluatedContext); 247 } 248 // While the next two tokens are 'period' 'identifier', repeatedly parse it as 249 // a field access. We have to avoid consuming assembler directives that look 250 // like '.' 'else'. 251 while (Result.isUsable() && Tok.is(tok::period)) { 252 Token IdTok = PP.LookAhead(0); 253 if (IdTok.isNot(tok::identifier)) 254 break; 255 ConsumeToken(); // Consume the period. 256 IdentifierInfo *Id = Tok.getIdentifierInfo(); 257 ConsumeToken(); // Consume the identifier. 258 Result = Actions.LookupInlineAsmVarDeclField(Result.get(), Id->getName(), 259 Tok.getLocation()); 260 } 261 262 // Figure out how many tokens we are into LineToks. 263 unsigned LineIndex = 0; 264 if (Tok.is(EndOfStream)) { 265 LineIndex = LineToks.size() - 2; 266 } else { 267 while (LineToks[LineIndex].getLocation() != Tok.getLocation()) { 268 LineIndex++; 269 assert(LineIndex < LineToks.size() - 2); // we added two extra tokens 270 } 271 } 272 273 // If we've run into the poison token we inserted before, or there 274 // was a parsing error, then claim the entire line. 275 if (Invalid || Tok.is(EndOfStream)) { 276 NumLineToksConsumed = LineToks.size() - 2; 277 } else { 278 // Otherwise, claim up to the start of the next token. 279 NumLineToksConsumed = LineIndex; 280 } 281 282 // Finally, restore the old parsing state by consuming all the tokens we 283 // staged before, implicitly killing off the token-lexer we pushed. 284 for (unsigned i = 0, e = LineToks.size() - LineIndex - 2; i != e; ++i) { 285 ConsumeAnyToken(); 286 } 287 assert(Tok.is(EndOfStream)); 288 ConsumeToken(); 289 290 // Leave LineToks in its original state. 291 LineToks.pop_back(); 292 LineToks.pop_back(); 293 294 return Result; 295 } 296 297 /// Turn a sequence of our tokens back into a string that we can hand 298 /// to the MC asm parser. 299 static bool buildMSAsmString(Preprocessor &PP, SourceLocation AsmLoc, 300 ArrayRef<Token> AsmToks, 301 SmallVectorImpl<unsigned> &TokOffsets, 302 SmallString<512> &Asm) { 303 assert(!AsmToks.empty() && "Didn't expect an empty AsmToks!"); 304 305 // Is this the start of a new assembly statement? 306 bool isNewStatement = true; 307 308 for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) { 309 const Token &Tok = AsmToks[i]; 310 311 // Start each new statement with a newline and a tab. 312 if (!isNewStatement && (Tok.is(tok::kw_asm) || Tok.isAtStartOfLine())) { 313 Asm += "\n\t"; 314 isNewStatement = true; 315 } 316 317 // Preserve the existence of leading whitespace except at the 318 // start of a statement. 319 if (!isNewStatement && Tok.hasLeadingSpace()) 320 Asm += ' '; 321 322 // Remember the offset of this token. 323 TokOffsets.push_back(Asm.size()); 324 325 // Don't actually write '__asm' into the assembly stream. 326 if (Tok.is(tok::kw_asm)) { 327 // Complain about __asm at the end of the stream. 328 if (i + 1 == e) { 329 PP.Diag(AsmLoc, diag::err_asm_empty); 330 return true; 331 } 332 333 continue; 334 } 335 336 // Append the spelling of the token. 337 SmallString<32> SpellingBuffer; 338 bool SpellingInvalid = false; 339 Asm += PP.getSpelling(Tok, SpellingBuffer, &SpellingInvalid); 340 assert(!SpellingInvalid && "spelling was invalid after correct parse?"); 341 342 // We are no longer at the start of a statement. 343 isNewStatement = false; 344 } 345 346 // Ensure that the buffer is null-terminated. 347 Asm.push_back('\0'); 348 Asm.pop_back(); 349 350 assert(TokOffsets.size() == AsmToks.size()); 351 return false; 352 } 353 354 // Determine if this is a GCC-style asm statement. 355 bool Parser::isGCCAsmStatement(const Token &TokAfterAsm) const { 356 return TokAfterAsm.is(tok::l_paren) || isGNUAsmQualifier(TokAfterAsm); 357 } 358 359 bool Parser::isGNUAsmQualifier(const Token &TokAfterAsm) const { 360 return getGNUAsmQualifier(TokAfterAsm) != GNUAsmQualifiers::AQ_unspecified; 361 } 362 363 /// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled, 364 /// this routine is called to collect the tokens for an MS asm statement. 365 /// 366 /// [MS] ms-asm-statement: 367 /// ms-asm-block 368 /// ms-asm-block ms-asm-statement 369 /// 370 /// [MS] ms-asm-block: 371 /// '__asm' ms-asm-line '\n' 372 /// '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt] 373 /// 374 /// [MS] ms-asm-instruction-block 375 /// ms-asm-line 376 /// ms-asm-line '\n' ms-asm-instruction-block 377 /// 378 StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { 379 SourceManager &SrcMgr = PP.getSourceManager(); 380 SourceLocation EndLoc = AsmLoc; 381 SmallVector<Token, 4> AsmToks; 382 383 bool SingleLineMode = true; 384 unsigned BraceNesting = 0; 385 unsigned short savedBraceCount = BraceCount; 386 bool InAsmComment = false; 387 FileID FID; 388 unsigned LineNo = 0; 389 unsigned NumTokensRead = 0; 390 SmallVector<SourceLocation, 4> LBraceLocs; 391 bool SkippedStartOfLine = false; 392 393 if (Tok.is(tok::l_brace)) { 394 // Braced inline asm: consume the opening brace. 395 SingleLineMode = false; 396 BraceNesting = 1; 397 EndLoc = ConsumeBrace(); 398 LBraceLocs.push_back(EndLoc); 399 ++NumTokensRead; 400 } else { 401 // Single-line inline asm; compute which line it is on. 402 std::pair<FileID, unsigned> ExpAsmLoc = 403 SrcMgr.getDecomposedExpansionLoc(EndLoc); 404 FID = ExpAsmLoc.first; 405 LineNo = SrcMgr.getLineNumber(FID, ExpAsmLoc.second); 406 LBraceLocs.push_back(SourceLocation()); 407 } 408 409 SourceLocation TokLoc = Tok.getLocation(); 410 do { 411 // If we hit EOF, we're done, period. 412 if (isEofOrEom()) 413 break; 414 415 if (!InAsmComment && Tok.is(tok::l_brace)) { 416 // Consume the opening brace. 417 SkippedStartOfLine = Tok.isAtStartOfLine(); 418 AsmToks.push_back(Tok); 419 EndLoc = ConsumeBrace(); 420 BraceNesting++; 421 LBraceLocs.push_back(EndLoc); 422 TokLoc = Tok.getLocation(); 423 ++NumTokensRead; 424 continue; 425 } else if (!InAsmComment && Tok.is(tok::semi)) { 426 // A semicolon in an asm is the start of a comment. 427 InAsmComment = true; 428 if (!SingleLineMode) { 429 // Compute which line the comment is on. 430 std::pair<FileID, unsigned> ExpSemiLoc = 431 SrcMgr.getDecomposedExpansionLoc(TokLoc); 432 FID = ExpSemiLoc.first; 433 LineNo = SrcMgr.getLineNumber(FID, ExpSemiLoc.second); 434 } 435 } else if (SingleLineMode || InAsmComment) { 436 // If end-of-line is significant, check whether this token is on a 437 // new line. 438 std::pair<FileID, unsigned> ExpLoc = 439 SrcMgr.getDecomposedExpansionLoc(TokLoc); 440 if (ExpLoc.first != FID || 441 SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) { 442 // If this is a single-line __asm, we're done, except if the next 443 // line is MS-style asm too, in which case we finish a comment 444 // if needed and then keep processing the next line as a single 445 // line __asm. 446 bool isAsm = Tok.is(tok::kw_asm); 447 if (SingleLineMode && (!isAsm || isGCCAsmStatement(NextToken()))) 448 break; 449 // We're no longer in a comment. 450 InAsmComment = false; 451 if (isAsm) { 452 // If this is a new __asm {} block we want to process it separately 453 // from the single-line __asm statements 454 if (PP.LookAhead(0).is(tok::l_brace)) 455 break; 456 LineNo = SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second); 457 SkippedStartOfLine = Tok.isAtStartOfLine(); 458 } else if (Tok.is(tok::semi)) { 459 // A multi-line asm-statement, where next line is a comment 460 InAsmComment = true; 461 FID = ExpLoc.first; 462 LineNo = SrcMgr.getLineNumber(FID, ExpLoc.second); 463 } 464 } else if (!InAsmComment && Tok.is(tok::r_brace)) { 465 // In MSVC mode, braces only participate in brace matching and 466 // separating the asm statements. This is an intentional 467 // departure from the Apple gcc behavior. 468 if (!BraceNesting) 469 break; 470 } 471 } 472 if (!InAsmComment && BraceNesting && Tok.is(tok::r_brace) && 473 BraceCount == (savedBraceCount + BraceNesting)) { 474 // Consume the closing brace. 475 SkippedStartOfLine = Tok.isAtStartOfLine(); 476 // Don't want to add the closing brace of the whole asm block 477 if (SingleLineMode || BraceNesting > 1) { 478 Tok.clearFlag(Token::LeadingSpace); 479 AsmToks.push_back(Tok); 480 } 481 EndLoc = ConsumeBrace(); 482 BraceNesting--; 483 // Finish if all of the opened braces in the inline asm section were 484 // consumed. 485 if (BraceNesting == 0 && !SingleLineMode) 486 break; 487 else { 488 LBraceLocs.pop_back(); 489 TokLoc = Tok.getLocation(); 490 ++NumTokensRead; 491 continue; 492 } 493 } 494 495 // Consume the next token; make sure we don't modify the brace count etc. 496 // if we are in a comment. 497 EndLoc = TokLoc; 498 if (InAsmComment) 499 PP.Lex(Tok); 500 else { 501 // Set the token as the start of line if we skipped the original start 502 // of line token in case it was a nested brace. 503 if (SkippedStartOfLine) 504 Tok.setFlag(Token::StartOfLine); 505 AsmToks.push_back(Tok); 506 ConsumeAnyToken(); 507 } 508 TokLoc = Tok.getLocation(); 509 ++NumTokensRead; 510 SkippedStartOfLine = false; 511 } while (1); 512 513 if (BraceNesting && BraceCount != savedBraceCount) { 514 // __asm without closing brace (this can happen at EOF). 515 for (unsigned i = 0; i < BraceNesting; ++i) { 516 Diag(Tok, diag::err_expected) << tok::r_brace; 517 Diag(LBraceLocs.back(), diag::note_matching) << tok::l_brace; 518 LBraceLocs.pop_back(); 519 } 520 return StmtError(); 521 } else if (NumTokensRead == 0) { 522 // Empty __asm. 523 Diag(Tok, diag::err_expected) << tok::l_brace; 524 return StmtError(); 525 } 526 527 // Okay, prepare to use MC to parse the assembly. 528 SmallVector<StringRef, 4> ConstraintRefs; 529 SmallVector<Expr *, 4> Exprs; 530 SmallVector<StringRef, 4> ClobberRefs; 531 532 // We need an actual supported target. 533 const llvm::Triple &TheTriple = Actions.Context.getTargetInfo().getTriple(); 534 const std::string &TT = TheTriple.getTriple(); 535 const llvm::Target *TheTarget = nullptr; 536 if (!TheTriple.isX86()) { 537 Diag(AsmLoc, diag::err_msasm_unsupported_arch) << TheTriple.getArchName(); 538 } else { 539 std::string Error; 540 TheTarget = llvm::TargetRegistry::lookupTarget(TT, Error); 541 if (!TheTarget) 542 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) << Error; 543 } 544 545 assert(!LBraceLocs.empty() && "Should have at least one location here"); 546 547 SmallString<512> AsmString; 548 auto EmptyStmt = [&] { 549 return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, AsmString, 550 /*NumOutputs*/ 0, /*NumInputs*/ 0, 551 ConstraintRefs, ClobberRefs, Exprs, EndLoc); 552 }; 553 // If we don't support assembly, or the assembly is empty, we don't 554 // need to instantiate the AsmParser, etc. 555 if (!TheTarget || AsmToks.empty()) { 556 return EmptyStmt(); 557 } 558 559 // Expand the tokens into a string buffer. 560 SmallVector<unsigned, 8> TokOffsets; 561 if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, AsmString)) 562 return StmtError(); 563 564 const TargetOptions &TO = Actions.Context.getTargetInfo().getTargetOpts(); 565 std::string FeaturesStr = 566 llvm::join(TO.Features.begin(), TO.Features.end(), ","); 567 568 std::unique_ptr<llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT)); 569 if (!MRI) { 570 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) 571 << "target MC unavailable"; 572 return EmptyStmt(); 573 } 574 // FIXME: init MCOptions from sanitizer flags here. 575 llvm::MCTargetOptions MCOptions; 576 std::unique_ptr<llvm::MCAsmInfo> MAI( 577 TheTarget->createMCAsmInfo(*MRI, TT, MCOptions)); 578 // Get the instruction descriptor. 579 std::unique_ptr<llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo()); 580 std::unique_ptr<llvm::MCObjectFileInfo> MOFI(new llvm::MCObjectFileInfo()); 581 std::unique_ptr<llvm::MCSubtargetInfo> STI( 582 TheTarget->createMCSubtargetInfo(TT, TO.CPU, FeaturesStr)); 583 // Target MCTargetDesc may not be linked in clang-based tools. 584 if (!MAI || !MII | !MOFI || !STI) { 585 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) 586 << "target MC unavailable"; 587 return EmptyStmt(); 588 } 589 590 llvm::SourceMgr TempSrcMgr; 591 llvm::MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &TempSrcMgr); 592 MOFI->InitMCObjectFileInfo(TheTriple, /*PIC*/ false, Ctx); 593 std::unique_ptr<llvm::MemoryBuffer> Buffer = 594 llvm::MemoryBuffer::getMemBuffer(AsmString, "<MS inline asm>"); 595 596 // Tell SrcMgr about this buffer, which is what the parser will pick up. 597 TempSrcMgr.AddNewSourceBuffer(std::move(Buffer), llvm::SMLoc()); 598 599 std::unique_ptr<llvm::MCStreamer> Str(createNullStreamer(Ctx)); 600 std::unique_ptr<llvm::MCAsmParser> Parser( 601 createMCAsmParser(TempSrcMgr, Ctx, *Str.get(), *MAI)); 602 603 std::unique_ptr<llvm::MCTargetAsmParser> TargetParser( 604 TheTarget->createMCAsmParser(*STI, *Parser, *MII, MCOptions)); 605 // Target AsmParser may not be linked in clang-based tools. 606 if (!TargetParser) { 607 Diag(AsmLoc, diag::err_msasm_unable_to_create_target) 608 << "target ASM parser unavailable"; 609 return EmptyStmt(); 610 } 611 612 std::unique_ptr<llvm::MCInstPrinter> IP( 613 TheTarget->createMCInstPrinter(llvm::Triple(TT), 1, *MAI, *MII, *MRI)); 614 615 // Change to the Intel dialect. 616 Parser->setAssemblerDialect(1); 617 Parser->setTargetParser(*TargetParser.get()); 618 Parser->setParsingMSInlineAsm(true); 619 TargetParser->setParsingMSInlineAsm(true); 620 621 ClangAsmParserCallback Callback(*this, AsmLoc, AsmString, AsmToks, 622 TokOffsets); 623 TargetParser->setSemaCallback(&Callback); 624 TempSrcMgr.setDiagHandler(ClangAsmParserCallback::DiagHandlerCallback, 625 &Callback); 626 627 unsigned NumOutputs; 628 unsigned NumInputs; 629 std::string AsmStringIR; 630 SmallVector<std::pair<void *, bool>, 4> OpExprs; 631 SmallVector<std::string, 4> Constraints; 632 SmallVector<std::string, 4> Clobbers; 633 if (Parser->parseMSInlineAsm(AsmLoc.getPtrEncoding(), AsmStringIR, NumOutputs, 634 NumInputs, OpExprs, Constraints, Clobbers, 635 MII.get(), IP.get(), Callback)) 636 return StmtError(); 637 638 // Filter out "fpsw" and "mxcsr". They aren't valid GCC asm clobber 639 // constraints. Clang always adds fpsr to the clobber list anyway. 640 llvm::erase_if(Clobbers, [](const std::string &C) { 641 return C == "fpsr" || C == "mxcsr"; 642 }); 643 644 // Build the vector of clobber StringRefs. 645 ClobberRefs.insert(ClobberRefs.end(), Clobbers.begin(), Clobbers.end()); 646 647 // Recast the void pointers and build the vector of constraint StringRefs. 648 unsigned NumExprs = NumOutputs + NumInputs; 649 ConstraintRefs.resize(NumExprs); 650 Exprs.resize(NumExprs); 651 for (unsigned i = 0, e = NumExprs; i != e; ++i) { 652 Expr *OpExpr = static_cast<Expr *>(OpExprs[i].first); 653 if (!OpExpr) 654 return StmtError(); 655 656 // Need address of variable. 657 if (OpExprs[i].second) 658 OpExpr = 659 Actions.BuildUnaryOp(getCurScope(), AsmLoc, UO_AddrOf, OpExpr).get(); 660 661 ConstraintRefs[i] = StringRef(Constraints[i]); 662 Exprs[i] = OpExpr; 663 } 664 665 // FIXME: We should be passing source locations for better diagnostics. 666 return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLocs[0], AsmToks, AsmStringIR, 667 NumOutputs, NumInputs, ConstraintRefs, 668 ClobberRefs, Exprs, EndLoc); 669 } 670 671 /// parseGNUAsmQualifierListOpt - Parse a GNU extended asm qualifier list. 672 /// asm-qualifier: 673 /// volatile 674 /// inline 675 /// goto 676 /// 677 /// asm-qualifier-list: 678 /// asm-qualifier 679 /// asm-qualifier-list asm-qualifier 680 bool Parser::parseGNUAsmQualifierListOpt(GNUAsmQualifiers &AQ) { 681 while (1) { 682 const GNUAsmQualifiers::AQ A = getGNUAsmQualifier(Tok); 683 if (A == GNUAsmQualifiers::AQ_unspecified) { 684 if (Tok.isNot(tok::l_paren)) { 685 Diag(Tok.getLocation(), diag::err_asm_qualifier_ignored); 686 SkipUntil(tok::r_paren, StopAtSemi); 687 return true; 688 } 689 return false; 690 } 691 if (AQ.setAsmQualifier(A)) 692 Diag(Tok.getLocation(), diag::err_asm_duplicate_qual) 693 << GNUAsmQualifiers::getQualifierName(A); 694 ConsumeToken(); 695 } 696 return false; 697 } 698 699 /// ParseAsmStatement - Parse a GNU extended asm statement. 700 /// asm-statement: 701 /// gnu-asm-statement 702 /// ms-asm-statement 703 /// 704 /// [GNU] gnu-asm-statement: 705 /// 'asm' asm-qualifier-list[opt] '(' asm-argument ')' ';' 706 /// 707 /// [GNU] asm-argument: 708 /// asm-string-literal 709 /// asm-string-literal ':' asm-operands[opt] 710 /// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] 711 /// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] 712 /// ':' asm-clobbers 713 /// 714 /// [GNU] asm-clobbers: 715 /// asm-string-literal 716 /// asm-clobbers ',' asm-string-literal 717 /// 718 StmtResult Parser::ParseAsmStatement(bool &msAsm) { 719 assert(Tok.is(tok::kw_asm) && "Not an asm stmt"); 720 SourceLocation AsmLoc = ConsumeToken(); 721 722 if (getLangOpts().AsmBlocks && !isGCCAsmStatement(Tok)) { 723 msAsm = true; 724 return ParseMicrosoftAsmStatement(AsmLoc); 725 } 726 727 SourceLocation Loc = Tok.getLocation(); 728 GNUAsmQualifiers GAQ; 729 if (parseGNUAsmQualifierListOpt(GAQ)) 730 return StmtError(); 731 732 if (GAQ.isGoto() && getLangOpts().SpeculativeLoadHardening) 733 Diag(Loc, diag::warn_slh_does_not_support_asm_goto); 734 735 BalancedDelimiterTracker T(*this, tok::l_paren); 736 T.consumeOpen(); 737 738 ExprResult AsmString(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); 739 740 // Check if GNU-style InlineAsm is disabled. 741 // Error on anything other than empty string. 742 if (!(getLangOpts().GNUAsm || AsmString.isInvalid())) { 743 const auto *SL = cast<StringLiteral>(AsmString.get()); 744 if (!SL->getString().trim().empty()) 745 Diag(Loc, diag::err_gnu_inline_asm_disabled); 746 } 747 748 if (AsmString.isInvalid()) { 749 // Consume up to and including the closing paren. 750 T.skipToEnd(); 751 return StmtError(); 752 } 753 754 SmallVector<IdentifierInfo *, 4> Names; 755 ExprVector Constraints; 756 ExprVector Exprs; 757 ExprVector Clobbers; 758 759 if (Tok.is(tok::r_paren)) { 760 // We have a simple asm expression like 'asm("foo")'. 761 T.consumeClose(); 762 return Actions.ActOnGCCAsmStmt( 763 AsmLoc, /*isSimple*/ true, GAQ.isVolatile(), 764 /*NumOutputs*/ 0, /*NumInputs*/ 0, nullptr, Constraints, Exprs, 765 AsmString.get(), Clobbers, /*NumLabels*/ 0, T.getCloseLocation()); 766 } 767 768 // Parse Outputs, if present. 769 bool AteExtraColon = false; 770 if (Tok.is(tok::colon) || Tok.is(tok::coloncolon)) { 771 // In C++ mode, parse "::" like ": :". 772 AteExtraColon = Tok.is(tok::coloncolon); 773 ConsumeToken(); 774 775 if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs)) 776 return StmtError(); 777 } 778 779 unsigned NumOutputs = Names.size(); 780 781 // Parse Inputs, if present. 782 if (AteExtraColon || Tok.is(tok::colon) || Tok.is(tok::coloncolon)) { 783 // In C++ mode, parse "::" like ": :". 784 if (AteExtraColon) 785 AteExtraColon = false; 786 else { 787 AteExtraColon = Tok.is(tok::coloncolon); 788 ConsumeToken(); 789 } 790 791 if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs)) 792 return StmtError(); 793 } 794 795 assert(Names.size() == Constraints.size() && 796 Constraints.size() == Exprs.size() && "Input operand size mismatch!"); 797 798 unsigned NumInputs = Names.size() - NumOutputs; 799 800 // Parse the clobbers, if present. 801 if (AteExtraColon || Tok.is(tok::colon) || Tok.is(tok::coloncolon)) { 802 if (AteExtraColon) 803 AteExtraColon = false; 804 else { 805 AteExtraColon = Tok.is(tok::coloncolon); 806 ConsumeToken(); 807 } 808 // Parse the asm-string list for clobbers if present. 809 if (!AteExtraColon && isTokenStringLiteral()) { 810 while (1) { 811 ExprResult Clobber(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); 812 813 if (Clobber.isInvalid()) 814 break; 815 816 Clobbers.push_back(Clobber.get()); 817 818 if (!TryConsumeToken(tok::comma)) 819 break; 820 } 821 } 822 } 823 if (!GAQ.isGoto() && (Tok.isNot(tok::r_paren) || AteExtraColon)) { 824 Diag(Tok, diag::err_expected) << tok::r_paren; 825 SkipUntil(tok::r_paren, StopAtSemi); 826 return StmtError(); 827 } 828 829 // Parse the goto label, if present. 830 unsigned NumLabels = 0; 831 if (AteExtraColon || Tok.is(tok::colon)) { 832 if (!AteExtraColon) 833 ConsumeToken(); 834 835 while (true) { 836 if (Tok.isNot(tok::identifier)) { 837 Diag(Tok, diag::err_expected) << tok::identifier; 838 SkipUntil(tok::r_paren, StopAtSemi); 839 return StmtError(); 840 } 841 LabelDecl *LD = Actions.LookupOrCreateLabel(Tok.getIdentifierInfo(), 842 Tok.getLocation()); 843 Names.push_back(Tok.getIdentifierInfo()); 844 if (!LD) { 845 SkipUntil(tok::r_paren, StopAtSemi); 846 return StmtError(); 847 } 848 ExprResult Res = 849 Actions.ActOnAddrLabel(Tok.getLocation(), Tok.getLocation(), LD); 850 Exprs.push_back(Res.get()); 851 NumLabels++; 852 ConsumeToken(); 853 if (!TryConsumeToken(tok::comma)) 854 break; 855 } 856 } else if (GAQ.isGoto()) { 857 Diag(Tok, diag::err_expected) << tok::colon; 858 SkipUntil(tok::r_paren, StopAtSemi); 859 return StmtError(); 860 } 861 T.consumeClose(); 862 return Actions.ActOnGCCAsmStmt(AsmLoc, false, GAQ.isVolatile(), NumOutputs, 863 NumInputs, Names.data(), Constraints, Exprs, 864 AsmString.get(), Clobbers, NumLabels, 865 T.getCloseLocation()); 866 } 867 868 /// ParseAsmOperands - Parse the asm-operands production as used by 869 /// asm-statement, assuming the leading ':' token was eaten. 870 /// 871 /// [GNU] asm-operands: 872 /// asm-operand 873 /// asm-operands ',' asm-operand 874 /// 875 /// [GNU] asm-operand: 876 /// asm-string-literal '(' expression ')' 877 /// '[' identifier ']' asm-string-literal '(' expression ')' 878 /// 879 // 880 // FIXME: Avoid unnecessary std::string trashing. 881 bool Parser::ParseAsmOperandsOpt(SmallVectorImpl<IdentifierInfo *> &Names, 882 SmallVectorImpl<Expr *> &Constraints, 883 SmallVectorImpl<Expr *> &Exprs) { 884 // 'asm-operands' isn't present? 885 if (!isTokenStringLiteral() && Tok.isNot(tok::l_square)) 886 return false; 887 888 while (1) { 889 // Read the [id] if present. 890 if (Tok.is(tok::l_square)) { 891 BalancedDelimiterTracker T(*this, tok::l_square); 892 T.consumeOpen(); 893 894 if (Tok.isNot(tok::identifier)) { 895 Diag(Tok, diag::err_expected) << tok::identifier; 896 SkipUntil(tok::r_paren, StopAtSemi); 897 return true; 898 } 899 900 IdentifierInfo *II = Tok.getIdentifierInfo(); 901 ConsumeToken(); 902 903 Names.push_back(II); 904 T.consumeClose(); 905 } else 906 Names.push_back(nullptr); 907 908 ExprResult Constraint(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); 909 if (Constraint.isInvalid()) { 910 SkipUntil(tok::r_paren, StopAtSemi); 911 return true; 912 } 913 Constraints.push_back(Constraint.get()); 914 915 if (Tok.isNot(tok::l_paren)) { 916 Diag(Tok, diag::err_expected_lparen_after) << "asm operand"; 917 SkipUntil(tok::r_paren, StopAtSemi); 918 return true; 919 } 920 921 // Read the parenthesized expression. 922 BalancedDelimiterTracker T(*this, tok::l_paren); 923 T.consumeOpen(); 924 ExprResult Res = Actions.CorrectDelayedTyposInExpr(ParseExpression()); 925 T.consumeClose(); 926 if (Res.isInvalid()) { 927 SkipUntil(tok::r_paren, StopAtSemi); 928 return true; 929 } 930 Exprs.push_back(Res.get()); 931 // Eat the comma and continue parsing if it exists. 932 if (!TryConsumeToken(tok::comma)) 933 return false; 934 } 935 } 936 937 const char *Parser::GNUAsmQualifiers::getQualifierName(AQ Qualifier) { 938 switch (Qualifier) { 939 case AQ_volatile: return "volatile"; 940 case AQ_inline: return "inline"; 941 case AQ_goto: return "goto"; 942 case AQ_unspecified: return "unspecified"; 943 } 944 llvm_unreachable("Unknown GNUAsmQualifier"); 945 } 946 947 Parser::GNUAsmQualifiers::AQ 948 Parser::getGNUAsmQualifier(const Token &Tok) const { 949 switch (Tok.getKind()) { 950 case tok::kw_volatile: return GNUAsmQualifiers::AQ_volatile; 951 case tok::kw_inline: return GNUAsmQualifiers::AQ_inline; 952 case tok::kw_goto: return GNUAsmQualifiers::AQ_goto; 953 default: return GNUAsmQualifiers::AQ_unspecified; 954 } 955 } 956 bool Parser::GNUAsmQualifiers::setAsmQualifier(AQ Qualifier) { 957 bool IsDuplicate = Qualifiers & Qualifier; 958 Qualifiers |= Qualifier; 959 return IsDuplicate; 960 } 961