1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This code simply runs the preprocessor on the input file and prints out the 10 // result. This is the traditional behavior of the -E option. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Frontend/Utils.h" 15 #include "clang/Basic/CharInfo.h" 16 #include "clang/Basic/Diagnostic.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "clang/Frontend/PreprocessorOutputOptions.h" 19 #include "clang/Lex/MacroInfo.h" 20 #include "clang/Lex/PPCallbacks.h" 21 #include "clang/Lex/Pragma.h" 22 #include "clang/Lex/Preprocessor.h" 23 #include "clang/Lex/TokenConcatenation.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Support/ErrorHandling.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cstdio> 30 using namespace clang; 31 32 /// PrintMacroDefinition - Print a macro definition in a form that will be 33 /// properly accepted back as a definition. 34 static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, 35 Preprocessor &PP, raw_ostream *OS) { 36 *OS << "#define " << II.getName(); 37 38 if (MI.isFunctionLike()) { 39 *OS << '('; 40 if (!MI.param_empty()) { 41 MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); 42 for (; AI+1 != E; ++AI) { 43 *OS << (*AI)->getName(); 44 *OS << ','; 45 } 46 47 // Last argument. 48 if ((*AI)->getName() == "__VA_ARGS__") 49 *OS << "..."; 50 else 51 *OS << (*AI)->getName(); 52 } 53 54 if (MI.isGNUVarargs()) 55 *OS << "..."; // #define foo(x...) 56 57 *OS << ')'; 58 } 59 60 // GCC always emits a space, even if the macro body is empty. However, do not 61 // want to emit two spaces if the first token has a leading space. 62 if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace()) 63 *OS << ' '; 64 65 SmallString<128> SpellingBuffer; 66 for (const auto &T : MI.tokens()) { 67 if (T.hasLeadingSpace()) 68 *OS << ' '; 69 70 *OS << PP.getSpelling(T, SpellingBuffer); 71 } 72 } 73 74 //===----------------------------------------------------------------------===// 75 // Preprocessed token printer 76 //===----------------------------------------------------------------------===// 77 78 namespace { 79 class PrintPPOutputPPCallbacks : public PPCallbacks { 80 Preprocessor &PP; 81 SourceManager &SM; 82 TokenConcatenation ConcatInfo; 83 public: 84 raw_ostream *OS; 85 private: 86 unsigned CurLine; 87 88 bool EmittedTokensOnThisLine; 89 bool EmittedDirectiveOnThisLine; 90 SrcMgr::CharacteristicKind FileType; 91 SmallString<512> CurFilename; 92 bool Initialized; 93 bool DisableLineMarkers; 94 bool DumpDefines; 95 bool DumpIncludeDirectives; 96 bool UseLineDirectives; 97 bool IsFirstFileEntered; 98 bool MinimizeWhitespace; 99 bool DirectivesOnly; 100 bool KeepSystemIncludes; 101 raw_ostream *OrigOS; 102 std::unique_ptr<llvm::raw_null_ostream> NullOS; 103 104 Token PrevTok; 105 Token PrevPrevTok; 106 107 public: 108 PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, 109 bool defines, bool DumpIncludeDirectives, 110 bool UseLineDirectives, bool MinimizeWhitespace, 111 bool DirectivesOnly, bool KeepSystemIncludes) 112 : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), 113 DisableLineMarkers(lineMarkers), DumpDefines(defines), 114 DumpIncludeDirectives(DumpIncludeDirectives), 115 UseLineDirectives(UseLineDirectives), 116 MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly), 117 KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) { 118 CurLine = 0; 119 CurFilename += "<uninit>"; 120 EmittedTokensOnThisLine = false; 121 EmittedDirectiveOnThisLine = false; 122 FileType = SrcMgr::C_User; 123 Initialized = false; 124 IsFirstFileEntered = false; 125 if (KeepSystemIncludes) 126 NullOS = std::make_unique<llvm::raw_null_ostream>(); 127 128 PrevTok.startToken(); 129 PrevPrevTok.startToken(); 130 } 131 132 bool isMinimizeWhitespace() const { return MinimizeWhitespace; } 133 134 void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } 135 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; } 136 137 void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; } 138 bool hasEmittedDirectiveOnThisLine() const { 139 return EmittedDirectiveOnThisLine; 140 } 141 142 /// Ensure that the output stream position is at the beginning of a new line 143 /// and inserts one if it does not. It is intended to ensure that directives 144 /// inserted by the directives not from the input source (such as #line) are 145 /// in the first column. To insert newlines that represent the input, use 146 /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true). 147 void startNewLineIfNeeded(); 148 149 void FileChanged(SourceLocation Loc, FileChangeReason Reason, 150 SrcMgr::CharacteristicKind FileType, 151 FileID PrevFID) override; 152 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, 153 StringRef FileName, bool IsAngled, 154 CharSourceRange FilenameRange, 155 OptionalFileEntryRef File, StringRef SearchPath, 156 StringRef RelativePath, const Module *Imported, 157 SrcMgr::CharacteristicKind FileType) override; 158 void Ident(SourceLocation Loc, StringRef str) override; 159 void PragmaMessage(SourceLocation Loc, StringRef Namespace, 160 PragmaMessageKind Kind, StringRef Str) override; 161 void PragmaDebug(SourceLocation Loc, StringRef DebugType) override; 162 void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override; 163 void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override; 164 void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace, 165 diag::Severity Map, StringRef Str) override; 166 void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec, 167 ArrayRef<int> Ids) override; 168 void PragmaWarningPush(SourceLocation Loc, int Level) override; 169 void PragmaWarningPop(SourceLocation Loc) override; 170 void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override; 171 void PragmaExecCharsetPop(SourceLocation Loc) override; 172 void PragmaAssumeNonNullBegin(SourceLocation Loc) override; 173 void PragmaAssumeNonNullEnd(SourceLocation Loc) override; 174 175 /// Insert whitespace before emitting the next token. 176 /// 177 /// @param Tok Next token to be emitted. 178 /// @param RequireSpace Ensure at least one whitespace is emitted. Useful 179 /// if non-tokens have been emitted to the stream. 180 /// @param RequireSameLine Never emit newlines. Useful when semantics depend 181 /// on being on the same line, such as directives. 182 void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace, 183 bool RequireSameLine); 184 185 /// Move to the line of the provided source location. This will 186 /// return true if a newline was inserted or if 187 /// the requested location is the first token on the first line. 188 /// In these cases the next output will be the first column on the line and 189 /// make it possible to insert indention. The newline was inserted 190 /// implicitly when at the beginning of the file. 191 /// 192 /// @param Tok Token where to move to. 193 /// @param RequireStartOfLine Whether the next line depends on being in the 194 /// first column, such as a directive. 195 /// 196 /// @return Whether column adjustments are necessary. 197 bool MoveToLine(const Token &Tok, bool RequireStartOfLine) { 198 PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation()); 199 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; 200 bool IsFirstInFile = 201 Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1; 202 return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile; 203 } 204 205 /// Move to the line of the provided source location. Returns true if a new 206 /// line was inserted. 207 bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) { 208 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 209 unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; 210 return MoveToLine(TargetLine, RequireStartOfLine); 211 } 212 bool MoveToLine(unsigned LineNo, bool RequireStartOfLine); 213 214 bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok, 215 const Token &Tok) { 216 return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok); 217 } 218 void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr, 219 unsigned ExtraLen=0); 220 bool LineMarkersAreDisabled() const { return DisableLineMarkers; } 221 void HandleNewlinesInToken(const char *TokStr, unsigned Len); 222 223 /// MacroDefined - This hook is called whenever a macro definition is seen. 224 void MacroDefined(const Token &MacroNameTok, 225 const MacroDirective *MD) override; 226 227 /// MacroUndefined - This hook is called whenever a macro #undef is seen. 228 void MacroUndefined(const Token &MacroNameTok, 229 const MacroDefinition &MD, 230 const MacroDirective *Undef) override; 231 232 void BeginModule(const Module *M); 233 void EndModule(const Module *M); 234 }; 235 } // end anonymous namespace 236 237 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, 238 const char *Extra, 239 unsigned ExtraLen) { 240 startNewLineIfNeeded(); 241 242 // Emit #line directives or GNU line markers depending on what mode we're in. 243 if (UseLineDirectives) { 244 *OS << "#line" << ' ' << LineNo << ' ' << '"'; 245 OS->write_escaped(CurFilename); 246 *OS << '"'; 247 } else { 248 *OS << '#' << ' ' << LineNo << ' ' << '"'; 249 OS->write_escaped(CurFilename); 250 *OS << '"'; 251 252 if (ExtraLen) 253 OS->write(Extra, ExtraLen); 254 255 if (FileType == SrcMgr::C_System) 256 OS->write(" 3", 2); 257 else if (FileType == SrcMgr::C_ExternCSystem) 258 OS->write(" 3 4", 4); 259 } 260 *OS << '\n'; 261 } 262 263 /// MoveToLine - Move the output to the source line specified by the location 264 /// object. We can do this by emitting some number of \n's, or be emitting a 265 /// #line directive. This returns false if already at the specified line, true 266 /// if some newlines were emitted. 267 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo, 268 bool RequireStartOfLine) { 269 // If it is required to start a new line or finish the current, insert 270 // vertical whitespace now and take it into account when moving to the 271 // expected line. 272 bool StartedNewLine = false; 273 if ((RequireStartOfLine && EmittedTokensOnThisLine) || 274 EmittedDirectiveOnThisLine) { 275 *OS << '\n'; 276 StartedNewLine = true; 277 CurLine += 1; 278 EmittedTokensOnThisLine = false; 279 EmittedDirectiveOnThisLine = false; 280 } 281 282 // If this line is "close enough" to the original line, just print newlines, 283 // otherwise print a #line directive. 284 if (CurLine == LineNo) { 285 // Nothing to do if we are already on the correct line. 286 } else if (MinimizeWhitespace && DisableLineMarkers) { 287 // With -E -P -fminimize-whitespace, don't emit anything if not necessary. 288 } else if (!StartedNewLine && LineNo - CurLine == 1) { 289 // Printing a single line has priority over printing a #line directive, even 290 // when minimizing whitespace which otherwise would print #line directives 291 // for every single line. 292 *OS << '\n'; 293 StartedNewLine = true; 294 } else if (!DisableLineMarkers) { 295 if (LineNo - CurLine <= 8) { 296 const char *NewLines = "\n\n\n\n\n\n\n\n"; 297 OS->write(NewLines, LineNo - CurLine); 298 } else { 299 // Emit a #line or line marker. 300 WriteLineInfo(LineNo, nullptr, 0); 301 } 302 StartedNewLine = true; 303 } else if (EmittedTokensOnThisLine) { 304 // If we are not on the correct line and don't need to be line-correct, 305 // at least ensure we start on a new line. 306 *OS << '\n'; 307 StartedNewLine = true; 308 } 309 310 if (StartedNewLine) { 311 EmittedTokensOnThisLine = false; 312 EmittedDirectiveOnThisLine = false; 313 } 314 315 CurLine = LineNo; 316 return StartedNewLine; 317 } 318 319 void PrintPPOutputPPCallbacks::startNewLineIfNeeded() { 320 if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) { 321 *OS << '\n'; 322 EmittedTokensOnThisLine = false; 323 EmittedDirectiveOnThisLine = false; 324 } 325 } 326 327 /// FileChanged - Whenever the preprocessor enters or exits a #include file 328 /// it invokes this handler. Update our conception of the current source 329 /// position. 330 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, 331 FileChangeReason Reason, 332 SrcMgr::CharacteristicKind NewFileType, 333 FileID PrevFID) { 334 // Unless we are exiting a #include, make sure to skip ahead to the line the 335 // #include directive was at. 336 SourceManager &SourceMgr = SM; 337 338 PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc); 339 if (UserLoc.isInvalid()) 340 return; 341 342 unsigned NewLine = UserLoc.getLine(); 343 344 if (Reason == PPCallbacks::EnterFile) { 345 SourceLocation IncludeLoc = UserLoc.getIncludeLoc(); 346 if (IncludeLoc.isValid()) 347 MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false); 348 } else if (Reason == PPCallbacks::SystemHeaderPragma) { 349 // GCC emits the # directive for this directive on the line AFTER the 350 // directive and emits a bunch of spaces that aren't needed. This is because 351 // otherwise we will emit a line marker for THIS line, which requires an 352 // extra blank line after the directive to avoid making all following lines 353 // off by one. We can do better by simply incrementing NewLine here. 354 NewLine += 1; 355 } 356 357 CurLine = NewLine; 358 359 // In KeepSystemIncludes mode, redirect OS as needed. 360 if (KeepSystemIncludes && (isSystem(FileType) != isSystem(NewFileType))) 361 OS = isSystem(FileType) ? OrigOS : NullOS.get(); 362 363 CurFilename.clear(); 364 CurFilename += UserLoc.getFilename(); 365 FileType = NewFileType; 366 367 if (DisableLineMarkers) { 368 if (!MinimizeWhitespace) 369 startNewLineIfNeeded(); 370 return; 371 } 372 373 if (!Initialized) { 374 WriteLineInfo(CurLine); 375 Initialized = true; 376 } 377 378 // Do not emit an enter marker for the main file (which we expect is the first 379 // entered file). This matches gcc, and improves compatibility with some tools 380 // which track the # line markers as a way to determine when the preprocessed 381 // output is in the context of the main file. 382 if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) { 383 IsFirstFileEntered = true; 384 return; 385 } 386 387 switch (Reason) { 388 case PPCallbacks::EnterFile: 389 WriteLineInfo(CurLine, " 1", 2); 390 break; 391 case PPCallbacks::ExitFile: 392 WriteLineInfo(CurLine, " 2", 2); 393 break; 394 case PPCallbacks::SystemHeaderPragma: 395 case PPCallbacks::RenameFile: 396 WriteLineInfo(CurLine); 397 break; 398 } 399 } 400 401 void PrintPPOutputPPCallbacks::InclusionDirective( 402 SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, 403 bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, 404 StringRef SearchPath, StringRef RelativePath, const Module *Imported, 405 SrcMgr::CharacteristicKind FileType) { 406 // In -dI mode, dump #include directives prior to dumping their content or 407 // interpretation. Similar for -fkeep-system-includes. 408 if (DumpIncludeDirectives || (KeepSystemIncludes && isSystem(FileType))) { 409 MoveToLine(HashLoc, /*RequireStartOfLine=*/true); 410 const std::string TokenText = PP.getSpelling(IncludeTok); 411 assert(!TokenText.empty()); 412 *OS << "#" << TokenText << " " 413 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') 414 << " /* clang -E " 415 << (DumpIncludeDirectives ? "-dI" : "-fkeep-system-includes") 416 << " */"; 417 setEmittedDirectiveOnThisLine(); 418 } 419 420 // When preprocessing, turn implicit imports into module import pragmas. 421 if (Imported) { 422 switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) { 423 case tok::pp_include: 424 case tok::pp_import: 425 case tok::pp_include_next: 426 MoveToLine(HashLoc, /*RequireStartOfLine=*/true); 427 *OS << "#pragma clang module import " 428 << Imported->getFullModuleName(true) 429 << " /* clang -E: implicit import for " 430 << "#" << PP.getSpelling(IncludeTok) << " " 431 << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"') 432 << " */"; 433 setEmittedDirectiveOnThisLine(); 434 break; 435 436 case tok::pp___include_macros: 437 // #__include_macros has no effect on a user of a preprocessed source 438 // file; the only effect is on preprocessing. 439 // 440 // FIXME: That's not *quite* true: it causes the module in question to 441 // be loaded, which can affect downstream diagnostics. 442 break; 443 444 default: 445 llvm_unreachable("unknown include directive kind"); 446 break; 447 } 448 } 449 } 450 451 /// Handle entering the scope of a module during a module compilation. 452 void PrintPPOutputPPCallbacks::BeginModule(const Module *M) { 453 startNewLineIfNeeded(); 454 *OS << "#pragma clang module begin " << M->getFullModuleName(true); 455 setEmittedDirectiveOnThisLine(); 456 } 457 458 /// Handle leaving the scope of a module during a module compilation. 459 void PrintPPOutputPPCallbacks::EndModule(const Module *M) { 460 startNewLineIfNeeded(); 461 *OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/"; 462 setEmittedDirectiveOnThisLine(); 463 } 464 465 /// Ident - Handle #ident directives when read by the preprocessor. 466 /// 467 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) { 468 MoveToLine(Loc, /*RequireStartOfLine=*/true); 469 470 OS->write("#ident ", strlen("#ident ")); 471 OS->write(S.begin(), S.size()); 472 setEmittedTokensOnThisLine(); 473 } 474 475 /// MacroDefined - This hook is called whenever a macro definition is seen. 476 void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok, 477 const MacroDirective *MD) { 478 const MacroInfo *MI = MD->getMacroInfo(); 479 // Print out macro definitions in -dD mode and when we have -fdirectives-only 480 // for C++20 header units. 481 if ((!DumpDefines && !DirectivesOnly) || 482 // Ignore __FILE__ etc. 483 MI->isBuiltinMacro()) 484 return; 485 486 SourceLocation DefLoc = MI->getDefinitionLoc(); 487 if (DirectivesOnly && !MI->isUsed()) { 488 SourceManager &SM = PP.getSourceManager(); 489 if (SM.isWrittenInBuiltinFile(DefLoc) || 490 SM.isWrittenInCommandLineFile(DefLoc)) 491 return; 492 } 493 MoveToLine(DefLoc, /*RequireStartOfLine=*/true); 494 PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS); 495 setEmittedDirectiveOnThisLine(); 496 } 497 498 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok, 499 const MacroDefinition &MD, 500 const MacroDirective *Undef) { 501 // Print out macro definitions in -dD mode and when we have -fdirectives-only 502 // for C++20 header units. 503 if (!DumpDefines && !DirectivesOnly) 504 return; 505 506 MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true); 507 *OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName(); 508 setEmittedDirectiveOnThisLine(); 509 } 510 511 static void outputPrintable(raw_ostream *OS, StringRef Str) { 512 for (unsigned char Char : Str) { 513 if (isPrintable(Char) && Char != '\\' && Char != '"') 514 *OS << (char)Char; 515 else // Output anything hard as an octal escape. 516 *OS << '\\' 517 << (char)('0' + ((Char >> 6) & 7)) 518 << (char)('0' + ((Char >> 3) & 7)) 519 << (char)('0' + ((Char >> 0) & 7)); 520 } 521 } 522 523 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, 524 StringRef Namespace, 525 PragmaMessageKind Kind, 526 StringRef Str) { 527 MoveToLine(Loc, /*RequireStartOfLine=*/true); 528 *OS << "#pragma "; 529 if (!Namespace.empty()) 530 *OS << Namespace << ' '; 531 switch (Kind) { 532 case PMK_Message: 533 *OS << "message(\""; 534 break; 535 case PMK_Warning: 536 *OS << "warning \""; 537 break; 538 case PMK_Error: 539 *OS << "error \""; 540 break; 541 } 542 543 outputPrintable(OS, Str); 544 *OS << '"'; 545 if (Kind == PMK_Message) 546 *OS << ')'; 547 setEmittedDirectiveOnThisLine(); 548 } 549 550 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc, 551 StringRef DebugType) { 552 MoveToLine(Loc, /*RequireStartOfLine=*/true); 553 554 *OS << "#pragma clang __debug "; 555 *OS << DebugType; 556 557 setEmittedDirectiveOnThisLine(); 558 } 559 560 void PrintPPOutputPPCallbacks:: 561 PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) { 562 MoveToLine(Loc, /*RequireStartOfLine=*/true); 563 *OS << "#pragma " << Namespace << " diagnostic push"; 564 setEmittedDirectiveOnThisLine(); 565 } 566 567 void PrintPPOutputPPCallbacks:: 568 PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) { 569 MoveToLine(Loc, /*RequireStartOfLine=*/true); 570 *OS << "#pragma " << Namespace << " diagnostic pop"; 571 setEmittedDirectiveOnThisLine(); 572 } 573 574 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc, 575 StringRef Namespace, 576 diag::Severity Map, 577 StringRef Str) { 578 MoveToLine(Loc, /*RequireStartOfLine=*/true); 579 *OS << "#pragma " << Namespace << " diagnostic "; 580 switch (Map) { 581 case diag::Severity::Remark: 582 *OS << "remark"; 583 break; 584 case diag::Severity::Warning: 585 *OS << "warning"; 586 break; 587 case diag::Severity::Error: 588 *OS << "error"; 589 break; 590 case diag::Severity::Ignored: 591 *OS << "ignored"; 592 break; 593 case diag::Severity::Fatal: 594 *OS << "fatal"; 595 break; 596 } 597 *OS << " \"" << Str << '"'; 598 setEmittedDirectiveOnThisLine(); 599 } 600 601 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc, 602 PragmaWarningSpecifier WarningSpec, 603 ArrayRef<int> Ids) { 604 MoveToLine(Loc, /*RequireStartOfLine=*/true); 605 606 *OS << "#pragma warning("; 607 switch(WarningSpec) { 608 case PWS_Default: *OS << "default"; break; 609 case PWS_Disable: *OS << "disable"; break; 610 case PWS_Error: *OS << "error"; break; 611 case PWS_Once: *OS << "once"; break; 612 case PWS_Suppress: *OS << "suppress"; break; 613 case PWS_Level1: *OS << '1'; break; 614 case PWS_Level2: *OS << '2'; break; 615 case PWS_Level3: *OS << '3'; break; 616 case PWS_Level4: *OS << '4'; break; 617 } 618 *OS << ':'; 619 620 for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I) 621 *OS << ' ' << *I; 622 *OS << ')'; 623 setEmittedDirectiveOnThisLine(); 624 } 625 626 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc, 627 int Level) { 628 MoveToLine(Loc, /*RequireStartOfLine=*/true); 629 *OS << "#pragma warning(push"; 630 if (Level >= 0) 631 *OS << ", " << Level; 632 *OS << ')'; 633 setEmittedDirectiveOnThisLine(); 634 } 635 636 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) { 637 MoveToLine(Loc, /*RequireStartOfLine=*/true); 638 *OS << "#pragma warning(pop)"; 639 setEmittedDirectiveOnThisLine(); 640 } 641 642 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc, 643 StringRef Str) { 644 MoveToLine(Loc, /*RequireStartOfLine=*/true); 645 *OS << "#pragma character_execution_set(push"; 646 if (!Str.empty()) 647 *OS << ", " << Str; 648 *OS << ')'; 649 setEmittedDirectiveOnThisLine(); 650 } 651 652 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) { 653 MoveToLine(Loc, /*RequireStartOfLine=*/true); 654 *OS << "#pragma character_execution_set(pop)"; 655 setEmittedDirectiveOnThisLine(); 656 } 657 658 void PrintPPOutputPPCallbacks:: 659 PragmaAssumeNonNullBegin(SourceLocation Loc) { 660 MoveToLine(Loc, /*RequireStartOfLine=*/true); 661 *OS << "#pragma clang assume_nonnull begin"; 662 setEmittedDirectiveOnThisLine(); 663 } 664 665 void PrintPPOutputPPCallbacks:: 666 PragmaAssumeNonNullEnd(SourceLocation Loc) { 667 MoveToLine(Loc, /*RequireStartOfLine=*/true); 668 *OS << "#pragma clang assume_nonnull end"; 669 setEmittedDirectiveOnThisLine(); 670 } 671 672 void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, 673 bool RequireSpace, 674 bool RequireSameLine) { 675 // These tokens are not expanded to anything and don't need whitespace before 676 // them. 677 if (Tok.is(tok::eof) || 678 (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) && 679 !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) && 680 !Tok.is(tok::annot_repl_input_end))) 681 return; 682 683 // EmittedDirectiveOnThisLine takes priority over RequireSameLine. 684 if ((!RequireSameLine || EmittedDirectiveOnThisLine) && 685 MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) { 686 if (MinimizeWhitespace) { 687 // Avoid interpreting hash as a directive under -fpreprocessed. 688 if (Tok.is(tok::hash)) 689 *OS << ' '; 690 } else { 691 // Print out space characters so that the first token on a line is 692 // indented for easy reading. 693 unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation()); 694 695 // The first token on a line can have a column number of 1, yet still 696 // expect leading white space, if a macro expansion in column 1 starts 697 // with an empty macro argument, or an empty nested macro expansion. In 698 // this case, move the token to column 2. 699 if (ColNo == 1 && Tok.hasLeadingSpace()) 700 ColNo = 2; 701 702 // This hack prevents stuff like: 703 // #define HASH # 704 // HASH define foo bar 705 // From having the # character end up at column 1, which makes it so it 706 // is not handled as a #define next time through the preprocessor if in 707 // -fpreprocessed mode. 708 if (ColNo <= 1 && Tok.is(tok::hash)) 709 *OS << ' '; 710 711 // Otherwise, indent the appropriate number of spaces. 712 for (; ColNo > 1; --ColNo) 713 *OS << ' '; 714 } 715 } else { 716 // Insert whitespace between the previous and next token if either 717 // - The caller requires it 718 // - The input had whitespace between them and we are not in 719 // whitespace-minimization mode 720 // - The whitespace is necessary to keep the tokens apart and there is not 721 // already a newline between them 722 if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) || 723 ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) && 724 AvoidConcat(PrevPrevTok, PrevTok, Tok))) 725 *OS << ' '; 726 } 727 728 PrevPrevTok = PrevTok; 729 PrevTok = Tok; 730 } 731 732 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr, 733 unsigned Len) { 734 unsigned NumNewlines = 0; 735 for (; Len; --Len, ++TokStr) { 736 if (*TokStr != '\n' && 737 *TokStr != '\r') 738 continue; 739 740 ++NumNewlines; 741 742 // If we have \n\r or \r\n, skip both and count as one line. 743 if (Len != 1 && 744 (TokStr[1] == '\n' || TokStr[1] == '\r') && 745 TokStr[0] != TokStr[1]) { 746 ++TokStr; 747 --Len; 748 } 749 } 750 751 if (NumNewlines == 0) return; 752 753 CurLine += NumNewlines; 754 } 755 756 757 namespace { 758 struct UnknownPragmaHandler : public PragmaHandler { 759 const char *Prefix; 760 PrintPPOutputPPCallbacks *Callbacks; 761 762 // Set to true if tokens should be expanded 763 bool ShouldExpandTokens; 764 765 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks, 766 bool RequireTokenExpansion) 767 : Prefix(prefix), Callbacks(callbacks), 768 ShouldExpandTokens(RequireTokenExpansion) {} 769 void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, 770 Token &PragmaTok) override { 771 // Figure out what line we went to and insert the appropriate number of 772 // newline characters. 773 Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true); 774 Callbacks->OS->write(Prefix, strlen(Prefix)); 775 Callbacks->setEmittedTokensOnThisLine(); 776 777 if (ShouldExpandTokens) { 778 // The first token does not have expanded macros. Expand them, if 779 // required. 780 auto Toks = std::make_unique<Token[]>(1); 781 Toks[0] = PragmaTok; 782 PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1, 783 /*DisableMacroExpansion=*/false, 784 /*IsReinject=*/false); 785 PP.Lex(PragmaTok); 786 } 787 788 // Read and print all of the pragma tokens. 789 bool IsFirst = true; 790 while (PragmaTok.isNot(tok::eod)) { 791 Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst, 792 /*RequireSameLine=*/true); 793 IsFirst = false; 794 std::string TokSpell = PP.getSpelling(PragmaTok); 795 Callbacks->OS->write(&TokSpell[0], TokSpell.size()); 796 Callbacks->setEmittedTokensOnThisLine(); 797 798 if (ShouldExpandTokens) 799 PP.Lex(PragmaTok); 800 else 801 PP.LexUnexpandedToken(PragmaTok); 802 } 803 Callbacks->setEmittedDirectiveOnThisLine(); 804 } 805 }; 806 } // end anonymous namespace 807 808 809 static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, 810 PrintPPOutputPPCallbacks *Callbacks) { 811 bool DropComments = PP.getLangOpts().TraditionalCPP && 812 !PP.getCommentRetentionState(); 813 814 bool IsStartOfLine = false; 815 char Buffer[256]; 816 while (true) { 817 // Two lines joined with line continuation ('\' as last character on the 818 // line) must be emitted as one line even though Tok.getLine() returns two 819 // different values. In this situation Tok.isAtStartOfLine() is false even 820 // though it may be the first token on the lexical line. When 821 // dropping/skipping a token that is at the start of a line, propagate the 822 // start-of-line-ness to the next token to not append it to the previous 823 // line. 824 IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine(); 825 826 Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false, 827 /*RequireSameLine=*/!IsStartOfLine); 828 829 if (DropComments && Tok.is(tok::comment)) { 830 // Skip comments. Normally the preprocessor does not generate 831 // tok::comment nodes at all when not keeping comments, but under 832 // -traditional-cpp the lexer keeps /all/ whitespace, including comments. 833 PP.Lex(Tok); 834 continue; 835 } else if (Tok.is(tok::annot_repl_input_end)) { 836 PP.Lex(Tok); 837 continue; 838 } else if (Tok.is(tok::eod)) { 839 // Don't print end of directive tokens, since they are typically newlines 840 // that mess up our line tracking. These come from unknown pre-processor 841 // directives or hash-prefixed comments in standalone assembly files. 842 PP.Lex(Tok); 843 // FIXME: The token on the next line after #include should have 844 // Tok.isAtStartOfLine() set. 845 IsStartOfLine = true; 846 continue; 847 } else if (Tok.is(tok::annot_module_include)) { 848 // PrintPPOutputPPCallbacks::InclusionDirective handles producing 849 // appropriate output here. Ignore this token entirely. 850 PP.Lex(Tok); 851 IsStartOfLine = true; 852 continue; 853 } else if (Tok.is(tok::annot_module_begin)) { 854 // FIXME: We retrieve this token after the FileChanged callback, and 855 // retrieve the module_end token before the FileChanged callback, so 856 // we render this within the file and render the module end outside the 857 // file, but this is backwards from the token locations: the module_begin 858 // token is at the include location (outside the file) and the module_end 859 // token is at the EOF location (within the file). 860 Callbacks->BeginModule( 861 reinterpret_cast<Module *>(Tok.getAnnotationValue())); 862 PP.Lex(Tok); 863 IsStartOfLine = true; 864 continue; 865 } else if (Tok.is(tok::annot_module_end)) { 866 Callbacks->EndModule( 867 reinterpret_cast<Module *>(Tok.getAnnotationValue())); 868 PP.Lex(Tok); 869 IsStartOfLine = true; 870 continue; 871 } else if (Tok.is(tok::annot_header_unit)) { 872 // This is a header-name that has been (effectively) converted into a 873 // module-name. 874 // FIXME: The module name could contain non-identifier module name 875 // components. We don't have a good way to round-trip those. 876 Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue()); 877 std::string Name = M->getFullModuleName(); 878 Callbacks->OS->write(Name.data(), Name.size()); 879 Callbacks->HandleNewlinesInToken(Name.data(), Name.size()); 880 } else if (Tok.isAnnotation()) { 881 // Ignore annotation tokens created by pragmas - the pragmas themselves 882 // will be reproduced in the preprocessed output. 883 PP.Lex(Tok); 884 continue; 885 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) { 886 *Callbacks->OS << II->getName(); 887 } else if (Tok.isLiteral() && !Tok.needsCleaning() && 888 Tok.getLiteralData()) { 889 Callbacks->OS->write(Tok.getLiteralData(), Tok.getLength()); 890 } else if (Tok.getLength() < std::size(Buffer)) { 891 const char *TokPtr = Buffer; 892 unsigned Len = PP.getSpelling(Tok, TokPtr); 893 Callbacks->OS->write(TokPtr, Len); 894 895 // Tokens that can contain embedded newlines need to adjust our current 896 // line number. 897 // FIXME: The token may end with a newline in which case 898 // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is 899 // wrong. 900 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) 901 Callbacks->HandleNewlinesInToken(TokPtr, Len); 902 if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' && 903 TokPtr[1] == '/') { 904 // It's a line comment; 905 // Ensure that we don't concatenate anything behind it. 906 Callbacks->setEmittedDirectiveOnThisLine(); 907 } 908 } else { 909 std::string S = PP.getSpelling(Tok); 910 Callbacks->OS->write(S.data(), S.size()); 911 912 // Tokens that can contain embedded newlines need to adjust our current 913 // line number. 914 if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown) 915 Callbacks->HandleNewlinesInToken(S.data(), S.size()); 916 if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') { 917 // It's a line comment; 918 // Ensure that we don't concatenate anything behind it. 919 Callbacks->setEmittedDirectiveOnThisLine(); 920 } 921 } 922 Callbacks->setEmittedTokensOnThisLine(); 923 IsStartOfLine = false; 924 925 if (Tok.is(tok::eof)) break; 926 927 PP.Lex(Tok); 928 } 929 } 930 931 typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair; 932 static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) { 933 return LHS->first->getName().compare(RHS->first->getName()); 934 } 935 936 static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) { 937 // Ignore unknown pragmas. 938 PP.IgnorePragmas(); 939 940 // -dM mode just scans and ignores all tokens in the files, then dumps out 941 // the macro table at the end. 942 PP.EnterMainSourceFile(); 943 944 Token Tok; 945 do PP.Lex(Tok); 946 while (Tok.isNot(tok::eof)); 947 948 SmallVector<id_macro_pair, 128> MacrosByID; 949 for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end(); 950 I != E; ++I) { 951 auto *MD = I->second.getLatest(); 952 if (MD && MD->isDefined()) 953 MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo())); 954 } 955 llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare); 956 957 for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) { 958 MacroInfo &MI = *MacrosByID[i].second; 959 // Ignore computed macros like __LINE__ and friends. 960 if (MI.isBuiltinMacro()) continue; 961 962 PrintMacroDefinition(*MacrosByID[i].first, MI, PP, OS); 963 *OS << '\n'; 964 } 965 } 966 967 /// DoPrintPreprocessedInput - This implements -E mode. 968 /// 969 void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, 970 const PreprocessorOutputOptions &Opts) { 971 // Show macros with no output is handled specially. 972 if (!Opts.ShowCPP) { 973 assert(Opts.ShowMacros && "Not yet implemented!"); 974 DoPrintMacros(PP, OS); 975 return; 976 } 977 978 // Inform the preprocessor whether we want it to retain comments or not, due 979 // to -C or -CC. 980 PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments); 981 982 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( 983 PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, 984 Opts.ShowIncludeDirectives, Opts.UseLineDirectives, 985 Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); 986 987 // Expand macros in pragmas with -fms-extensions. The assumption is that 988 // the majority of pragmas in such a file will be Microsoft pragmas. 989 // Remember the handlers we will add so that we can remove them later. 990 std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler( 991 new UnknownPragmaHandler( 992 "#pragma", Callbacks, 993 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt)); 994 995 std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler( 996 "#pragma GCC", Callbacks, 997 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt)); 998 999 std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler( 1000 "#pragma clang", Callbacks, 1001 /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt)); 1002 1003 PP.AddPragmaHandler(MicrosoftExtHandler.get()); 1004 PP.AddPragmaHandler("GCC", GCCHandler.get()); 1005 PP.AddPragmaHandler("clang", ClangHandler.get()); 1006 1007 // The tokens after pragma omp need to be expanded. 1008 // 1009 // OpenMP [2.1, Directive format] 1010 // Preprocessing tokens following the #pragma omp are subject to macro 1011 // replacement. 1012 std::unique_ptr<UnknownPragmaHandler> OpenMPHandler( 1013 new UnknownPragmaHandler("#pragma omp", Callbacks, 1014 /*RequireTokenExpansion=*/true)); 1015 PP.AddPragmaHandler("omp", OpenMPHandler.get()); 1016 1017 PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks)); 1018 1019 // After we have configured the preprocessor, enter the main file. 1020 PP.EnterMainSourceFile(); 1021 if (Opts.DirectivesOnly) 1022 PP.SetMacroExpansionOnlyInDirectives(); 1023 1024 // Consume all of the tokens that come from the predefines buffer. Those 1025 // should not be emitted into the output and are guaranteed to be at the 1026 // start. 1027 const SourceManager &SourceMgr = PP.getSourceManager(); 1028 Token Tok; 1029 do { 1030 PP.Lex(Tok); 1031 if (Tok.is(tok::eof) || !Tok.getLocation().isFileID()) 1032 break; 1033 1034 PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation()); 1035 if (PLoc.isInvalid()) 1036 break; 1037 1038 if (strcmp(PLoc.getFilename(), "<built-in>")) 1039 break; 1040 } while (true); 1041 1042 // Read all the preprocessed tokens, printing them out to the stream. 1043 PrintPreprocessedTokens(PP, Tok, Callbacks); 1044 *OS << '\n'; 1045 1046 // Remove the handlers we just added to leave the preprocessor in a sane state 1047 // so that it can be reused (for example by a clang::Parser instance). 1048 PP.RemovePragmaHandler(MicrosoftExtHandler.get()); 1049 PP.RemovePragmaHandler("GCC", GCCHandler.get()); 1050 PP.RemovePragmaHandler("clang", ClangHandler.get()); 1051 PP.RemovePragmaHandler("omp", OpenMPHandler.get()); 1052 } 1053