xref: /freebsd/contrib/llvm-project/clang/lib/Frontend/PrintPreprocessedOutput.cpp (revision ef80df0a71912500ad84060334a24e903869f00b)
1  //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This code simply runs the preprocessor on the input file and prints out the
10  // result.  This is the traditional behavior of the -E option.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "clang/Frontend/Utils.h"
15  #include "clang/Basic/CharInfo.h"
16  #include "clang/Basic/Diagnostic.h"
17  #include "clang/Basic/SourceManager.h"
18  #include "clang/Frontend/PreprocessorOutputOptions.h"
19  #include "clang/Lex/MacroInfo.h"
20  #include "clang/Lex/PPCallbacks.h"
21  #include "clang/Lex/Pragma.h"
22  #include "clang/Lex/Preprocessor.h"
23  #include "clang/Lex/TokenConcatenation.h"
24  #include "llvm/ADT/STLExtras.h"
25  #include "llvm/ADT/SmallString.h"
26  #include "llvm/ADT/StringRef.h"
27  #include "llvm/Support/ErrorHandling.h"
28  #include "llvm/Support/raw_ostream.h"
29  #include <cstdio>
30  using namespace clang;
31  
32  /// PrintMacroDefinition - Print a macro definition in a form that will be
33  /// properly accepted back as a definition.
34  static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
35                                   Preprocessor &PP, raw_ostream *OS) {
36    *OS << "#define " << II.getName();
37  
38    if (MI.isFunctionLike()) {
39      *OS << '(';
40      if (!MI.param_empty()) {
41        MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
42        for (; AI+1 != E; ++AI) {
43          *OS << (*AI)->getName();
44          *OS << ',';
45        }
46  
47        // Last argument.
48        if ((*AI)->getName() == "__VA_ARGS__")
49          *OS << "...";
50        else
51          *OS << (*AI)->getName();
52      }
53  
54      if (MI.isGNUVarargs())
55        *OS << "...";  // #define foo(x...)
56  
57      *OS << ')';
58    }
59  
60    // GCC always emits a space, even if the macro body is empty.  However, do not
61    // want to emit two spaces if the first token has a leading space.
62    if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
63      *OS << ' ';
64  
65    SmallString<128> SpellingBuffer;
66    for (const auto &T : MI.tokens()) {
67      if (T.hasLeadingSpace())
68        *OS << ' ';
69  
70      *OS << PP.getSpelling(T, SpellingBuffer);
71    }
72  }
73  
74  //===----------------------------------------------------------------------===//
75  // Preprocessed token printer
76  //===----------------------------------------------------------------------===//
77  
78  namespace {
79  class PrintPPOutputPPCallbacks : public PPCallbacks {
80    Preprocessor &PP;
81    SourceManager &SM;
82    TokenConcatenation ConcatInfo;
83  public:
84    raw_ostream *OS;
85  private:
86    unsigned CurLine;
87  
88    bool EmittedTokensOnThisLine;
89    bool EmittedDirectiveOnThisLine;
90    SrcMgr::CharacteristicKind FileType;
91    SmallString<512> CurFilename;
92    bool Initialized;
93    bool DisableLineMarkers;
94    bool DumpDefines;
95    bool DumpIncludeDirectives;
96    bool UseLineDirectives;
97    bool IsFirstFileEntered;
98    bool MinimizeWhitespace;
99    bool DirectivesOnly;
100    bool KeepSystemIncludes;
101    raw_ostream *OrigOS;
102    std::unique_ptr<llvm::raw_null_ostream> NullOS;
103  
104    Token PrevTok;
105    Token PrevPrevTok;
106  
107  public:
108    PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers,
109                             bool defines, bool DumpIncludeDirectives,
110                             bool UseLineDirectives, bool MinimizeWhitespace,
111                             bool DirectivesOnly, bool KeepSystemIncludes)
112        : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
113          DisableLineMarkers(lineMarkers), DumpDefines(defines),
114          DumpIncludeDirectives(DumpIncludeDirectives),
115          UseLineDirectives(UseLineDirectives),
116          MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly),
117          KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) {
118      CurLine = 0;
119      CurFilename += "<uninit>";
120      EmittedTokensOnThisLine = false;
121      EmittedDirectiveOnThisLine = false;
122      FileType = SrcMgr::C_User;
123      Initialized = false;
124      IsFirstFileEntered = false;
125      if (KeepSystemIncludes)
126        NullOS = std::make_unique<llvm::raw_null_ostream>();
127  
128      PrevTok.startToken();
129      PrevPrevTok.startToken();
130    }
131  
132    bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
133  
134    void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
135    bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
136  
137    void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
138    bool hasEmittedDirectiveOnThisLine() const {
139      return EmittedDirectiveOnThisLine;
140    }
141  
142    /// Ensure that the output stream position is at the beginning of a new line
143    /// and inserts one if it does not. It is intended to ensure that directives
144    /// inserted by the directives not from the input source (such as #line) are
145    /// in the first column. To insert newlines that represent the input, use
146    /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
147    void startNewLineIfNeeded();
148  
149    void FileChanged(SourceLocation Loc, FileChangeReason Reason,
150                     SrcMgr::CharacteristicKind FileType,
151                     FileID PrevFID) override;
152    void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
153                            StringRef FileName, bool IsAngled,
154                            CharSourceRange FilenameRange,
155                            OptionalFileEntryRef File, StringRef SearchPath,
156                            StringRef RelativePath, const Module *Imported,
157                            SrcMgr::CharacteristicKind FileType) override;
158    void Ident(SourceLocation Loc, StringRef str) override;
159    void PragmaMessage(SourceLocation Loc, StringRef Namespace,
160                       PragmaMessageKind Kind, StringRef Str) override;
161    void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
162    void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
163    void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
164    void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
165                          diag::Severity Map, StringRef Str) override;
166    void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec,
167                       ArrayRef<int> Ids) override;
168    void PragmaWarningPush(SourceLocation Loc, int Level) override;
169    void PragmaWarningPop(SourceLocation Loc) override;
170    void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override;
171    void PragmaExecCharsetPop(SourceLocation Loc) override;
172    void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
173    void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
174  
175    /// Insert whitespace before emitting the next token.
176    ///
177    /// @param Tok             Next token to be emitted.
178    /// @param RequireSpace    Ensure at least one whitespace is emitted. Useful
179    ///                        if non-tokens have been emitted to the stream.
180    /// @param RequireSameLine Never emit newlines. Useful when semantics depend
181    ///                        on being on the same line, such as directives.
182    void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace,
183                                   bool RequireSameLine);
184  
185    /// Move to the line of the provided source location. This will
186    /// return true if a newline was inserted or if
187    /// the requested location is the first token on the first line.
188    /// In these cases the next output will be the first column on the line and
189    /// make it possible to insert indention. The newline was inserted
190    /// implicitly when at the beginning of the file.
191    ///
192    /// @param Tok                 Token where to move to.
193    /// @param RequireStartOfLine  Whether the next line depends on being in the
194    ///                            first column, such as a directive.
195    ///
196    /// @return Whether column adjustments are necessary.
197    bool MoveToLine(const Token &Tok, bool RequireStartOfLine) {
198      PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation());
199      unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
200      bool IsFirstInFile =
201          Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1;
202      return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile;
203    }
204  
205    /// Move to the line of the provided source location. Returns true if a new
206    /// line was inserted.
207    bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) {
208      PresumedLoc PLoc = SM.getPresumedLoc(Loc);
209      unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
210      return MoveToLine(TargetLine, RequireStartOfLine);
211    }
212    bool MoveToLine(unsigned LineNo, bool RequireStartOfLine);
213  
214    bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
215                     const Token &Tok) {
216      return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
217    }
218    void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
219                       unsigned ExtraLen=0);
220    bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
221    void HandleNewlinesInToken(const char *TokStr, unsigned Len);
222  
223    /// MacroDefined - This hook is called whenever a macro definition is seen.
224    void MacroDefined(const Token &MacroNameTok,
225                      const MacroDirective *MD) override;
226  
227    /// MacroUndefined - This hook is called whenever a macro #undef is seen.
228    void MacroUndefined(const Token &MacroNameTok,
229                        const MacroDefinition &MD,
230                        const MacroDirective *Undef) override;
231  
232    void BeginModule(const Module *M);
233    void EndModule(const Module *M);
234  };
235  }  // end anonymous namespace
236  
237  void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
238                                               const char *Extra,
239                                               unsigned ExtraLen) {
240    startNewLineIfNeeded();
241  
242    // Emit #line directives or GNU line markers depending on what mode we're in.
243    if (UseLineDirectives) {
244      *OS << "#line" << ' ' << LineNo << ' ' << '"';
245      OS->write_escaped(CurFilename);
246      *OS << '"';
247    } else {
248      *OS << '#' << ' ' << LineNo << ' ' << '"';
249      OS->write_escaped(CurFilename);
250      *OS << '"';
251  
252      if (ExtraLen)
253        OS->write(Extra, ExtraLen);
254  
255      if (FileType == SrcMgr::C_System)
256        OS->write(" 3", 2);
257      else if (FileType == SrcMgr::C_ExternCSystem)
258        OS->write(" 3 4", 4);
259    }
260    *OS << '\n';
261  }
262  
263  /// MoveToLine - Move the output to the source line specified by the location
264  /// object.  We can do this by emitting some number of \n's, or be emitting a
265  /// #line directive.  This returns false if already at the specified line, true
266  /// if some newlines were emitted.
267  bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
268                                            bool RequireStartOfLine) {
269    // If it is required to start a new line or finish the current, insert
270    // vertical whitespace now and take it into account when moving to the
271    // expected line.
272    bool StartedNewLine = false;
273    if ((RequireStartOfLine && EmittedTokensOnThisLine) ||
274        EmittedDirectiveOnThisLine) {
275      *OS << '\n';
276      StartedNewLine = true;
277      CurLine += 1;
278      EmittedTokensOnThisLine = false;
279      EmittedDirectiveOnThisLine = false;
280    }
281  
282    // If this line is "close enough" to the original line, just print newlines,
283    // otherwise print a #line directive.
284    if (CurLine == LineNo) {
285      // Nothing to do if we are already on the correct line.
286    } else if (MinimizeWhitespace && DisableLineMarkers) {
287      // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
288    } else if (!StartedNewLine && LineNo - CurLine == 1) {
289      // Printing a single line has priority over printing a #line directive, even
290      // when minimizing whitespace which otherwise would print #line directives
291      // for every single line.
292      *OS << '\n';
293      StartedNewLine = true;
294    } else if (!DisableLineMarkers) {
295      if (LineNo - CurLine <= 8) {
296        const char *NewLines = "\n\n\n\n\n\n\n\n";
297        OS->write(NewLines, LineNo - CurLine);
298      } else {
299        // Emit a #line or line marker.
300        WriteLineInfo(LineNo, nullptr, 0);
301      }
302      StartedNewLine = true;
303    } else if (EmittedTokensOnThisLine) {
304      // If we are not on the correct line and don't need to be line-correct,
305      // at least ensure we start on a new line.
306      *OS << '\n';
307      StartedNewLine = true;
308    }
309  
310    if (StartedNewLine) {
311      EmittedTokensOnThisLine = false;
312      EmittedDirectiveOnThisLine = false;
313    }
314  
315    CurLine = LineNo;
316    return StartedNewLine;
317  }
318  
319  void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
320    if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
321      *OS << '\n';
322      EmittedTokensOnThisLine = false;
323      EmittedDirectiveOnThisLine = false;
324    }
325  }
326  
327  /// FileChanged - Whenever the preprocessor enters or exits a #include file
328  /// it invokes this handler.  Update our conception of the current source
329  /// position.
330  void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
331                                             FileChangeReason Reason,
332                                         SrcMgr::CharacteristicKind NewFileType,
333                                         FileID PrevFID) {
334    // Unless we are exiting a #include, make sure to skip ahead to the line the
335    // #include directive was at.
336    SourceManager &SourceMgr = SM;
337  
338    PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
339    if (UserLoc.isInvalid())
340      return;
341  
342    unsigned NewLine = UserLoc.getLine();
343  
344    if (Reason == PPCallbacks::EnterFile) {
345      SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
346      if (IncludeLoc.isValid())
347        MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false);
348    } else if (Reason == PPCallbacks::SystemHeaderPragma) {
349      // GCC emits the # directive for this directive on the line AFTER the
350      // directive and emits a bunch of spaces that aren't needed. This is because
351      // otherwise we will emit a line marker for THIS line, which requires an
352      // extra blank line after the directive to avoid making all following lines
353      // off by one. We can do better by simply incrementing NewLine here.
354      NewLine += 1;
355    }
356  
357    CurLine = NewLine;
358  
359    // In KeepSystemIncludes mode, redirect OS as needed.
360    if (KeepSystemIncludes && (isSystem(FileType) != isSystem(NewFileType)))
361      OS = isSystem(FileType) ? OrigOS : NullOS.get();
362  
363    CurFilename.clear();
364    CurFilename += UserLoc.getFilename();
365    FileType = NewFileType;
366  
367    if (DisableLineMarkers) {
368      if (!MinimizeWhitespace)
369        startNewLineIfNeeded();
370      return;
371    }
372  
373    if (!Initialized) {
374      WriteLineInfo(CurLine);
375      Initialized = true;
376    }
377  
378    // Do not emit an enter marker for the main file (which we expect is the first
379    // entered file). This matches gcc, and improves compatibility with some tools
380    // which track the # line markers as a way to determine when the preprocessed
381    // output is in the context of the main file.
382    if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
383      IsFirstFileEntered = true;
384      return;
385    }
386  
387    switch (Reason) {
388    case PPCallbacks::EnterFile:
389      WriteLineInfo(CurLine, " 1", 2);
390      break;
391    case PPCallbacks::ExitFile:
392      WriteLineInfo(CurLine, " 2", 2);
393      break;
394    case PPCallbacks::SystemHeaderPragma:
395    case PPCallbacks::RenameFile:
396      WriteLineInfo(CurLine);
397      break;
398    }
399  }
400  
401  void PrintPPOutputPPCallbacks::InclusionDirective(
402      SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
403      bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
404      StringRef SearchPath, StringRef RelativePath, const Module *Imported,
405      SrcMgr::CharacteristicKind FileType) {
406    // In -dI mode, dump #include directives prior to dumping their content or
407    // interpretation. Similar for -fkeep-system-includes.
408    if (DumpIncludeDirectives || (KeepSystemIncludes && isSystem(FileType))) {
409      MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
410      const std::string TokenText = PP.getSpelling(IncludeTok);
411      assert(!TokenText.empty());
412      *OS << "#" << TokenText << " "
413          << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
414          << " /* clang -E "
415          << (DumpIncludeDirectives ? "-dI" : "-fkeep-system-includes")
416          << " */";
417      setEmittedDirectiveOnThisLine();
418    }
419  
420    // When preprocessing, turn implicit imports into module import pragmas.
421    if (Imported) {
422      switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
423      case tok::pp_include:
424      case tok::pp_import:
425      case tok::pp_include_next:
426        MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
427        *OS << "#pragma clang module import "
428            << Imported->getFullModuleName(true)
429            << " /* clang -E: implicit import for "
430            << "#" << PP.getSpelling(IncludeTok) << " "
431            << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
432            << " */";
433        setEmittedDirectiveOnThisLine();
434        break;
435  
436      case tok::pp___include_macros:
437        // #__include_macros has no effect on a user of a preprocessed source
438        // file; the only effect is on preprocessing.
439        //
440        // FIXME: That's not *quite* true: it causes the module in question to
441        // be loaded, which can affect downstream diagnostics.
442        break;
443  
444      default:
445        llvm_unreachable("unknown include directive kind");
446        break;
447      }
448    }
449  }
450  
451  /// Handle entering the scope of a module during a module compilation.
452  void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
453    startNewLineIfNeeded();
454    *OS << "#pragma clang module begin " << M->getFullModuleName(true);
455    setEmittedDirectiveOnThisLine();
456  }
457  
458  /// Handle leaving the scope of a module during a module compilation.
459  void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
460    startNewLineIfNeeded();
461    *OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
462    setEmittedDirectiveOnThisLine();
463  }
464  
465  /// Ident - Handle #ident directives when read by the preprocessor.
466  ///
467  void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
468    MoveToLine(Loc, /*RequireStartOfLine=*/true);
469  
470    OS->write("#ident ", strlen("#ident "));
471    OS->write(S.begin(), S.size());
472    setEmittedTokensOnThisLine();
473  }
474  
475  /// MacroDefined - This hook is called whenever a macro definition is seen.
476  void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
477                                              const MacroDirective *MD) {
478    const MacroInfo *MI = MD->getMacroInfo();
479    // Print out macro definitions in -dD mode and when we have -fdirectives-only
480    // for C++20 header units.
481    if ((!DumpDefines && !DirectivesOnly) ||
482        // Ignore __FILE__ etc.
483        MI->isBuiltinMacro())
484      return;
485  
486    SourceLocation DefLoc = MI->getDefinitionLoc();
487    if (DirectivesOnly && !MI->isUsed()) {
488      SourceManager &SM = PP.getSourceManager();
489      if (SM.isWrittenInBuiltinFile(DefLoc) ||
490          SM.isWrittenInCommandLineFile(DefLoc))
491        return;
492    }
493    MoveToLine(DefLoc, /*RequireStartOfLine=*/true);
494    PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
495    setEmittedDirectiveOnThisLine();
496  }
497  
498  void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
499                                                const MacroDefinition &MD,
500                                                const MacroDirective *Undef) {
501    // Print out macro definitions in -dD mode and when we have -fdirectives-only
502    // for C++20 header units.
503    if (!DumpDefines && !DirectivesOnly)
504      return;
505  
506    MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true);
507    *OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
508    setEmittedDirectiveOnThisLine();
509  }
510  
511  static void outputPrintable(raw_ostream *OS, StringRef Str) {
512    for (unsigned char Char : Str) {
513      if (isPrintable(Char) && Char != '\\' && Char != '"')
514        *OS << (char)Char;
515      else // Output anything hard as an octal escape.
516        *OS << '\\'
517            << (char)('0' + ((Char >> 6) & 7))
518            << (char)('0' + ((Char >> 3) & 7))
519            << (char)('0' + ((Char >> 0) & 7));
520    }
521  }
522  
523  void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
524                                               StringRef Namespace,
525                                               PragmaMessageKind Kind,
526                                               StringRef Str) {
527    MoveToLine(Loc, /*RequireStartOfLine=*/true);
528    *OS << "#pragma ";
529    if (!Namespace.empty())
530      *OS << Namespace << ' ';
531    switch (Kind) {
532      case PMK_Message:
533        *OS << "message(\"";
534        break;
535      case PMK_Warning:
536        *OS << "warning \"";
537        break;
538      case PMK_Error:
539        *OS << "error \"";
540        break;
541    }
542  
543    outputPrintable(OS, Str);
544    *OS << '"';
545    if (Kind == PMK_Message)
546      *OS << ')';
547    setEmittedDirectiveOnThisLine();
548  }
549  
550  void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
551                                             StringRef DebugType) {
552    MoveToLine(Loc, /*RequireStartOfLine=*/true);
553  
554    *OS << "#pragma clang __debug ";
555    *OS << DebugType;
556  
557    setEmittedDirectiveOnThisLine();
558  }
559  
560  void PrintPPOutputPPCallbacks::
561  PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
562    MoveToLine(Loc, /*RequireStartOfLine=*/true);
563    *OS << "#pragma " << Namespace << " diagnostic push";
564    setEmittedDirectiveOnThisLine();
565  }
566  
567  void PrintPPOutputPPCallbacks::
568  PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
569    MoveToLine(Loc, /*RequireStartOfLine=*/true);
570    *OS << "#pragma " << Namespace << " diagnostic pop";
571    setEmittedDirectiveOnThisLine();
572  }
573  
574  void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
575                                                  StringRef Namespace,
576                                                  diag::Severity Map,
577                                                  StringRef Str) {
578    MoveToLine(Loc, /*RequireStartOfLine=*/true);
579    *OS << "#pragma " << Namespace << " diagnostic ";
580    switch (Map) {
581    case diag::Severity::Remark:
582      *OS << "remark";
583      break;
584    case diag::Severity::Warning:
585      *OS << "warning";
586      break;
587    case diag::Severity::Error:
588      *OS << "error";
589      break;
590    case diag::Severity::Ignored:
591      *OS << "ignored";
592      break;
593    case diag::Severity::Fatal:
594      *OS << "fatal";
595      break;
596    }
597    *OS << " \"" << Str << '"';
598    setEmittedDirectiveOnThisLine();
599  }
600  
601  void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
602                                               PragmaWarningSpecifier WarningSpec,
603                                               ArrayRef<int> Ids) {
604    MoveToLine(Loc, /*RequireStartOfLine=*/true);
605  
606    *OS << "#pragma warning(";
607    switch(WarningSpec) {
608      case PWS_Default:  *OS << "default"; break;
609      case PWS_Disable:  *OS << "disable"; break;
610      case PWS_Error:    *OS << "error"; break;
611      case PWS_Once:     *OS << "once"; break;
612      case PWS_Suppress: *OS << "suppress"; break;
613      case PWS_Level1:   *OS << '1'; break;
614      case PWS_Level2:   *OS << '2'; break;
615      case PWS_Level3:   *OS << '3'; break;
616      case PWS_Level4:   *OS << '4'; break;
617    }
618    *OS << ':';
619  
620    for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
621      *OS << ' ' << *I;
622    *OS << ')';
623    setEmittedDirectiveOnThisLine();
624  }
625  
626  void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
627                                                   int Level) {
628    MoveToLine(Loc, /*RequireStartOfLine=*/true);
629    *OS << "#pragma warning(push";
630    if (Level >= 0)
631      *OS << ", " << Level;
632    *OS << ')';
633    setEmittedDirectiveOnThisLine();
634  }
635  
636  void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
637    MoveToLine(Loc, /*RequireStartOfLine=*/true);
638    *OS << "#pragma warning(pop)";
639    setEmittedDirectiveOnThisLine();
640  }
641  
642  void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
643                                                       StringRef Str) {
644    MoveToLine(Loc, /*RequireStartOfLine=*/true);
645    *OS << "#pragma character_execution_set(push";
646    if (!Str.empty())
647      *OS << ", " << Str;
648    *OS << ')';
649    setEmittedDirectiveOnThisLine();
650  }
651  
652  void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
653    MoveToLine(Loc, /*RequireStartOfLine=*/true);
654    *OS << "#pragma character_execution_set(pop)";
655    setEmittedDirectiveOnThisLine();
656  }
657  
658  void PrintPPOutputPPCallbacks::
659  PragmaAssumeNonNullBegin(SourceLocation Loc) {
660    MoveToLine(Loc, /*RequireStartOfLine=*/true);
661    *OS << "#pragma clang assume_nonnull begin";
662    setEmittedDirectiveOnThisLine();
663  }
664  
665  void PrintPPOutputPPCallbacks::
666  PragmaAssumeNonNullEnd(SourceLocation Loc) {
667    MoveToLine(Loc, /*RequireStartOfLine=*/true);
668    *OS << "#pragma clang assume_nonnull end";
669    setEmittedDirectiveOnThisLine();
670  }
671  
672  void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
673                                                           bool RequireSpace,
674                                                           bool RequireSameLine) {
675    // These tokens are not expanded to anything and don't need whitespace before
676    // them.
677    if (Tok.is(tok::eof) ||
678        (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
679         !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end) &&
680         !Tok.is(tok::annot_repl_input_end)))
681      return;
682  
683    // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
684    if ((!RequireSameLine || EmittedDirectiveOnThisLine) &&
685        MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) {
686      if (MinimizeWhitespace) {
687        // Avoid interpreting hash as a directive under -fpreprocessed.
688        if (Tok.is(tok::hash))
689          *OS << ' ';
690      } else {
691        // Print out space characters so that the first token on a line is
692        // indented for easy reading.
693        unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
694  
695        // The first token on a line can have a column number of 1, yet still
696        // expect leading white space, if a macro expansion in column 1 starts
697        // with an empty macro argument, or an empty nested macro expansion. In
698        // this case, move the token to column 2.
699        if (ColNo == 1 && Tok.hasLeadingSpace())
700          ColNo = 2;
701  
702        // This hack prevents stuff like:
703        // #define HASH #
704        // HASH define foo bar
705        // From having the # character end up at column 1, which makes it so it
706        // is not handled as a #define next time through the preprocessor if in
707        // -fpreprocessed mode.
708        if (ColNo <= 1 && Tok.is(tok::hash))
709          *OS << ' ';
710  
711        // Otherwise, indent the appropriate number of spaces.
712        for (; ColNo > 1; --ColNo)
713          *OS << ' ';
714      }
715    } else {
716      // Insert whitespace between the previous and next token if either
717      // - The caller requires it
718      // - The input had whitespace between them and we are not in
719      //   whitespace-minimization mode
720      // - The whitespace is necessary to keep the tokens apart and there is not
721      //   already a newline between them
722      if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
723          ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) &&
724           AvoidConcat(PrevPrevTok, PrevTok, Tok)))
725        *OS << ' ';
726    }
727  
728    PrevPrevTok = PrevTok;
729    PrevTok = Tok;
730  }
731  
732  void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
733                                                       unsigned Len) {
734    unsigned NumNewlines = 0;
735    for (; Len; --Len, ++TokStr) {
736      if (*TokStr != '\n' &&
737          *TokStr != '\r')
738        continue;
739  
740      ++NumNewlines;
741  
742      // If we have \n\r or \r\n, skip both and count as one line.
743      if (Len != 1 &&
744          (TokStr[1] == '\n' || TokStr[1] == '\r') &&
745          TokStr[0] != TokStr[1]) {
746        ++TokStr;
747        --Len;
748      }
749    }
750  
751    if (NumNewlines == 0) return;
752  
753    CurLine += NumNewlines;
754  }
755  
756  
757  namespace {
758  struct UnknownPragmaHandler : public PragmaHandler {
759    const char *Prefix;
760    PrintPPOutputPPCallbacks *Callbacks;
761  
762    // Set to true if tokens should be expanded
763    bool ShouldExpandTokens;
764  
765    UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
766                         bool RequireTokenExpansion)
767        : Prefix(prefix), Callbacks(callbacks),
768          ShouldExpandTokens(RequireTokenExpansion) {}
769    void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
770                      Token &PragmaTok) override {
771      // Figure out what line we went to and insert the appropriate number of
772      // newline characters.
773      Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true);
774      Callbacks->OS->write(Prefix, strlen(Prefix));
775      Callbacks->setEmittedTokensOnThisLine();
776  
777      if (ShouldExpandTokens) {
778        // The first token does not have expanded macros. Expand them, if
779        // required.
780        auto Toks = std::make_unique<Token[]>(1);
781        Toks[0] = PragmaTok;
782        PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
783                            /*DisableMacroExpansion=*/false,
784                            /*IsReinject=*/false);
785        PP.Lex(PragmaTok);
786      }
787  
788      // Read and print all of the pragma tokens.
789      bool IsFirst = true;
790      while (PragmaTok.isNot(tok::eod)) {
791        Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst,
792                                             /*RequireSameLine=*/true);
793        IsFirst = false;
794        std::string TokSpell = PP.getSpelling(PragmaTok);
795        Callbacks->OS->write(&TokSpell[0], TokSpell.size());
796        Callbacks->setEmittedTokensOnThisLine();
797  
798        if (ShouldExpandTokens)
799          PP.Lex(PragmaTok);
800        else
801          PP.LexUnexpandedToken(PragmaTok);
802      }
803      Callbacks->setEmittedDirectiveOnThisLine();
804    }
805  };
806  } // end anonymous namespace
807  
808  
809  static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
810                                      PrintPPOutputPPCallbacks *Callbacks) {
811    bool DropComments = PP.getLangOpts().TraditionalCPP &&
812                        !PP.getCommentRetentionState();
813  
814    bool IsStartOfLine = false;
815    char Buffer[256];
816    while (true) {
817      // Two lines joined with line continuation ('\' as last character on the
818      // line) must be emitted as one line even though Tok.getLine() returns two
819      // different values. In this situation Tok.isAtStartOfLine() is false even
820      // though it may be the first token on the lexical line. When
821      // dropping/skipping a token that is at the start of a line, propagate the
822      // start-of-line-ness to the next token to not append it to the previous
823      // line.
824      IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine();
825  
826      Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false,
827                                           /*RequireSameLine=*/!IsStartOfLine);
828  
829      if (DropComments && Tok.is(tok::comment)) {
830        // Skip comments. Normally the preprocessor does not generate
831        // tok::comment nodes at all when not keeping comments, but under
832        // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
833        PP.Lex(Tok);
834        continue;
835      } else if (Tok.is(tok::annot_repl_input_end)) {
836        PP.Lex(Tok);
837        continue;
838      } else if (Tok.is(tok::eod)) {
839        // Don't print end of directive tokens, since they are typically newlines
840        // that mess up our line tracking. These come from unknown pre-processor
841        // directives or hash-prefixed comments in standalone assembly files.
842        PP.Lex(Tok);
843        // FIXME: The token on the next line after #include should have
844        // Tok.isAtStartOfLine() set.
845        IsStartOfLine = true;
846        continue;
847      } else if (Tok.is(tok::annot_module_include)) {
848        // PrintPPOutputPPCallbacks::InclusionDirective handles producing
849        // appropriate output here. Ignore this token entirely.
850        PP.Lex(Tok);
851        IsStartOfLine = true;
852        continue;
853      } else if (Tok.is(tok::annot_module_begin)) {
854        // FIXME: We retrieve this token after the FileChanged callback, and
855        // retrieve the module_end token before the FileChanged callback, so
856        // we render this within the file and render the module end outside the
857        // file, but this is backwards from the token locations: the module_begin
858        // token is at the include location (outside the file) and the module_end
859        // token is at the EOF location (within the file).
860        Callbacks->BeginModule(
861            reinterpret_cast<Module *>(Tok.getAnnotationValue()));
862        PP.Lex(Tok);
863        IsStartOfLine = true;
864        continue;
865      } else if (Tok.is(tok::annot_module_end)) {
866        Callbacks->EndModule(
867            reinterpret_cast<Module *>(Tok.getAnnotationValue()));
868        PP.Lex(Tok);
869        IsStartOfLine = true;
870        continue;
871      } else if (Tok.is(tok::annot_header_unit)) {
872        // This is a header-name that has been (effectively) converted into a
873        // module-name.
874        // FIXME: The module name could contain non-identifier module name
875        // components. We don't have a good way to round-trip those.
876        Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
877        std::string Name = M->getFullModuleName();
878        Callbacks->OS->write(Name.data(), Name.size());
879        Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
880      } else if (Tok.isAnnotation()) {
881        // Ignore annotation tokens created by pragmas - the pragmas themselves
882        // will be reproduced in the preprocessed output.
883        PP.Lex(Tok);
884        continue;
885      } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
886        *Callbacks->OS << II->getName();
887      } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
888                 Tok.getLiteralData()) {
889        Callbacks->OS->write(Tok.getLiteralData(), Tok.getLength());
890      } else if (Tok.getLength() < std::size(Buffer)) {
891        const char *TokPtr = Buffer;
892        unsigned Len = PP.getSpelling(Tok, TokPtr);
893        Callbacks->OS->write(TokPtr, Len);
894  
895        // Tokens that can contain embedded newlines need to adjust our current
896        // line number.
897        // FIXME: The token may end with a newline in which case
898        // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
899        // wrong.
900        if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
901          Callbacks->HandleNewlinesInToken(TokPtr, Len);
902        if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' &&
903            TokPtr[1] == '/') {
904          // It's a line comment;
905          // Ensure that we don't concatenate anything behind it.
906          Callbacks->setEmittedDirectiveOnThisLine();
907        }
908      } else {
909        std::string S = PP.getSpelling(Tok);
910        Callbacks->OS->write(S.data(), S.size());
911  
912        // Tokens that can contain embedded newlines need to adjust our current
913        // line number.
914        if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
915          Callbacks->HandleNewlinesInToken(S.data(), S.size());
916        if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') {
917          // It's a line comment;
918          // Ensure that we don't concatenate anything behind it.
919          Callbacks->setEmittedDirectiveOnThisLine();
920        }
921      }
922      Callbacks->setEmittedTokensOnThisLine();
923      IsStartOfLine = false;
924  
925      if (Tok.is(tok::eof)) break;
926  
927      PP.Lex(Tok);
928    }
929  }
930  
931  typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
932  static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
933    return LHS->first->getName().compare(RHS->first->getName());
934  }
935  
936  static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
937    // Ignore unknown pragmas.
938    PP.IgnorePragmas();
939  
940    // -dM mode just scans and ignores all tokens in the files, then dumps out
941    // the macro table at the end.
942    PP.EnterMainSourceFile();
943  
944    Token Tok;
945    do PP.Lex(Tok);
946    while (Tok.isNot(tok::eof));
947  
948    SmallVector<id_macro_pair, 128> MacrosByID;
949    for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
950         I != E; ++I) {
951      auto *MD = I->second.getLatest();
952      if (MD && MD->isDefined())
953        MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
954    }
955    llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
956  
957    for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
958      MacroInfo &MI = *MacrosByID[i].second;
959      // Ignore computed macros like __LINE__ and friends.
960      if (MI.isBuiltinMacro()) continue;
961  
962      PrintMacroDefinition(*MacrosByID[i].first, MI, PP, OS);
963      *OS << '\n';
964    }
965  }
966  
967  /// DoPrintPreprocessedInput - This implements -E mode.
968  ///
969  void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
970                                       const PreprocessorOutputOptions &Opts) {
971    // Show macros with no output is handled specially.
972    if (!Opts.ShowCPP) {
973      assert(Opts.ShowMacros && "Not yet implemented!");
974      DoPrintMacros(PP, OS);
975      return;
976    }
977  
978    // Inform the preprocessor whether we want it to retain comments or not, due
979    // to -C or -CC.
980    PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
981  
982    PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
983        PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
984        Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
985        Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes);
986  
987    // Expand macros in pragmas with -fms-extensions.  The assumption is that
988    // the majority of pragmas in such a file will be Microsoft pragmas.
989    // Remember the handlers we will add so that we can remove them later.
990    std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
991        new UnknownPragmaHandler(
992            "#pragma", Callbacks,
993            /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
994  
995    std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
996        "#pragma GCC", Callbacks,
997        /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
998  
999    std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
1000        "#pragma clang", Callbacks,
1001        /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
1002  
1003    PP.AddPragmaHandler(MicrosoftExtHandler.get());
1004    PP.AddPragmaHandler("GCC", GCCHandler.get());
1005    PP.AddPragmaHandler("clang", ClangHandler.get());
1006  
1007    // The tokens after pragma omp need to be expanded.
1008    //
1009    //  OpenMP [2.1, Directive format]
1010    //  Preprocessing tokens following the #pragma omp are subject to macro
1011    //  replacement.
1012    std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
1013        new UnknownPragmaHandler("#pragma omp", Callbacks,
1014                                 /*RequireTokenExpansion=*/true));
1015    PP.AddPragmaHandler("omp", OpenMPHandler.get());
1016  
1017    PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
1018  
1019    // After we have configured the preprocessor, enter the main file.
1020    PP.EnterMainSourceFile();
1021    if (Opts.DirectivesOnly)
1022      PP.SetMacroExpansionOnlyInDirectives();
1023  
1024    // Consume all of the tokens that come from the predefines buffer.  Those
1025    // should not be emitted into the output and are guaranteed to be at the
1026    // start.
1027    const SourceManager &SourceMgr = PP.getSourceManager();
1028    Token Tok;
1029    do {
1030      PP.Lex(Tok);
1031      if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
1032        break;
1033  
1034      PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
1035      if (PLoc.isInvalid())
1036        break;
1037  
1038      if (strcmp(PLoc.getFilename(), "<built-in>"))
1039        break;
1040    } while (true);
1041  
1042    // Read all the preprocessed tokens, printing them out to the stream.
1043    PrintPreprocessedTokens(PP, Tok, Callbacks);
1044    *OS << '\n';
1045  
1046    // Remove the handlers we just added to leave the preprocessor in a sane state
1047    // so that it can be reused (for example by a clang::Parser instance).
1048    PP.RemovePragmaHandler(MicrosoftExtHandler.get());
1049    PP.RemovePragmaHandler("GCC", GCCHandler.get());
1050    PP.RemovePragmaHandler("clang", ClangHandler.get());
1051    PP.RemovePragmaHandler("omp", OpenMPHandler.get());
1052  }
1053