xref: /freebsd/contrib/llvm-project/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
10b57cec5SDimitry Andric //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This code rewrites include invocations into their expansions.  This gives you
100b57cec5SDimitry Andric // a file with all included files merged into it.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "clang/Rewrite/Frontend/Rewriters.h"
150b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
160b57cec5SDimitry Andric #include "clang/Frontend/PreprocessorOutputOptions.h"
170b57cec5SDimitry Andric #include "clang/Lex/HeaderSearch.h"
180b57cec5SDimitry Andric #include "clang/Lex/Pragma.h"
190b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
210b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric using namespace clang;
240b57cec5SDimitry Andric using namespace llvm;
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace {
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric class InclusionRewriter : public PPCallbacks {
290b57cec5SDimitry Andric   /// Information about which #includes were actually performed,
300b57cec5SDimitry Andric   /// created by preprocessor callbacks.
310b57cec5SDimitry Andric   struct IncludedFile {
320b57cec5SDimitry Andric     FileID Id;
330b57cec5SDimitry Andric     SrcMgr::CharacteristicKind FileType;
340b57cec5SDimitry Andric     const DirectoryLookup *DirLookup;
350b57cec5SDimitry Andric     IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType,
360b57cec5SDimitry Andric                  const DirectoryLookup *DirLookup)
370b57cec5SDimitry Andric         : Id(Id), FileType(FileType), DirLookup(DirLookup) {}
380b57cec5SDimitry Andric   };
390b57cec5SDimitry Andric   Preprocessor &PP; ///< Used to find inclusion directives.
400b57cec5SDimitry Andric   SourceManager &SM; ///< Used to read and manage source files.
410b57cec5SDimitry Andric   raw_ostream &OS; ///< The destination stream for rewritten contents.
420b57cec5SDimitry Andric   StringRef MainEOL; ///< The line ending marker to use.
43*e8d8bef9SDimitry Andric   llvm::MemoryBufferRef PredefinesBuffer; ///< The preprocessor predefines.
440b57cec5SDimitry Andric   bool ShowLineMarkers; ///< Show #line markers.
450b57cec5SDimitry Andric   bool UseLineDirectives; ///< Use of line directives or line markers.
460b57cec5SDimitry Andric   /// Tracks where inclusions that change the file are found.
47*e8d8bef9SDimitry Andric   std::map<SourceLocation, IncludedFile> FileIncludes;
480b57cec5SDimitry Andric   /// Tracks where inclusions that import modules are found.
49*e8d8bef9SDimitry Andric   std::map<SourceLocation, const Module *> ModuleIncludes;
500b57cec5SDimitry Andric   /// Tracks where inclusions that enter modules (in a module build) are found.
51*e8d8bef9SDimitry Andric   std::map<SourceLocation, const Module *> ModuleEntryIncludes;
52a7dea167SDimitry Andric   /// Tracks where #if and #elif directives get evaluated and whether to true.
53*e8d8bef9SDimitry Andric   std::map<SourceLocation, bool> IfConditions;
540b57cec5SDimitry Andric   /// Used transitively for building up the FileIncludes mapping over the
550b57cec5SDimitry Andric   /// various \c PPCallbacks callbacks.
560b57cec5SDimitry Andric   SourceLocation LastInclusionLocation;
570b57cec5SDimitry Andric public:
580b57cec5SDimitry Andric   InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,
590b57cec5SDimitry Andric                     bool UseLineDirectives);
600b57cec5SDimitry Andric   void Process(FileID FileId, SrcMgr::CharacteristicKind FileType,
610b57cec5SDimitry Andric                const DirectoryLookup *DirLookup);
62*e8d8bef9SDimitry Andric   void setPredefinesBuffer(const llvm::MemoryBufferRef &Buf) {
630b57cec5SDimitry Andric     PredefinesBuffer = Buf;
640b57cec5SDimitry Andric   }
650b57cec5SDimitry Andric   void detectMainFileEOL();
660b57cec5SDimitry Andric   void handleModuleBegin(Token &Tok) {
670b57cec5SDimitry Andric     assert(Tok.getKind() == tok::annot_module_begin);
68*e8d8bef9SDimitry Andric     ModuleEntryIncludes.insert(
69*e8d8bef9SDimitry Andric         {Tok.getLocation(), (Module *)Tok.getAnnotationValue()});
700b57cec5SDimitry Andric   }
710b57cec5SDimitry Andric private:
720b57cec5SDimitry Andric   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
730b57cec5SDimitry Andric                    SrcMgr::CharacteristicKind FileType,
740b57cec5SDimitry Andric                    FileID PrevFID) override;
75a7dea167SDimitry Andric   void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
760b57cec5SDimitry Andric                    SrcMgr::CharacteristicKind FileType) override;
770b57cec5SDimitry Andric   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
780b57cec5SDimitry Andric                           StringRef FileName, bool IsAngled,
790b57cec5SDimitry Andric                           CharSourceRange FilenameRange, const FileEntry *File,
800b57cec5SDimitry Andric                           StringRef SearchPath, StringRef RelativePath,
810b57cec5SDimitry Andric                           const Module *Imported,
820b57cec5SDimitry Andric                           SrcMgr::CharacteristicKind FileType) override;
83a7dea167SDimitry Andric   void If(SourceLocation Loc, SourceRange ConditionRange,
84a7dea167SDimitry Andric           ConditionValueKind ConditionValue) override;
85a7dea167SDimitry Andric   void Elif(SourceLocation Loc, SourceRange ConditionRange,
86a7dea167SDimitry Andric             ConditionValueKind ConditionValue, SourceLocation IfLoc) override;
870b57cec5SDimitry Andric   void WriteLineInfo(StringRef Filename, int Line,
880b57cec5SDimitry Andric                      SrcMgr::CharacteristicKind FileType,
890b57cec5SDimitry Andric                      StringRef Extra = StringRef());
900b57cec5SDimitry Andric   void WriteImplicitModuleImport(const Module *Mod);
91*e8d8bef9SDimitry Andric   void OutputContentUpTo(const MemoryBufferRef &FromFile, unsigned &WriteFrom,
92*e8d8bef9SDimitry Andric                          unsigned WriteTo, StringRef EOL, int &lines,
930b57cec5SDimitry Andric                          bool EnsureNewline);
940b57cec5SDimitry Andric   void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
95*e8d8bef9SDimitry Andric                            const MemoryBufferRef &FromFile, StringRef EOL,
960b57cec5SDimitry Andric                            unsigned &NextToWrite, int &Lines);
970b57cec5SDimitry Andric   const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const;
980b57cec5SDimitry Andric   const Module *FindModuleAtLocation(SourceLocation Loc) const;
990b57cec5SDimitry Andric   const Module *FindEnteredModule(SourceLocation Loc) const;
100a7dea167SDimitry Andric   bool IsIfAtLocationTrue(SourceLocation Loc) const;
1010b57cec5SDimitry Andric   StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
1020b57cec5SDimitry Andric };
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric }  // end anonymous namespace
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
1070b57cec5SDimitry Andric InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
1080b57cec5SDimitry Andric                                      bool ShowLineMarkers,
1090b57cec5SDimitry Andric                                      bool UseLineDirectives)
1100b57cec5SDimitry Andric     : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
111*e8d8bef9SDimitry Andric       ShowLineMarkers(ShowLineMarkers), UseLineDirectives(UseLineDirectives),
1120b57cec5SDimitry Andric       LastInclusionLocation(SourceLocation()) {}
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric /// Write appropriate line information as either #line directives or GNU line
1150b57cec5SDimitry Andric /// markers depending on what mode we're in, including the \p Filename and
1160b57cec5SDimitry Andric /// \p Line we are located at, using the specified \p EOL line separator, and
1170b57cec5SDimitry Andric /// any \p Extra context specifiers in GNU line directives.
1180b57cec5SDimitry Andric void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line,
1190b57cec5SDimitry Andric                                       SrcMgr::CharacteristicKind FileType,
1200b57cec5SDimitry Andric                                       StringRef Extra) {
1210b57cec5SDimitry Andric   if (!ShowLineMarkers)
1220b57cec5SDimitry Andric     return;
1230b57cec5SDimitry Andric   if (UseLineDirectives) {
1240b57cec5SDimitry Andric     OS << "#line" << ' ' << Line << ' ' << '"';
1250b57cec5SDimitry Andric     OS.write_escaped(Filename);
1260b57cec5SDimitry Andric     OS << '"';
1270b57cec5SDimitry Andric   } else {
1280b57cec5SDimitry Andric     // Use GNU linemarkers as described here:
1290b57cec5SDimitry Andric     // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
1300b57cec5SDimitry Andric     OS << '#' << ' ' << Line << ' ' << '"';
1310b57cec5SDimitry Andric     OS.write_escaped(Filename);
1320b57cec5SDimitry Andric     OS << '"';
1330b57cec5SDimitry Andric     if (!Extra.empty())
1340b57cec5SDimitry Andric       OS << Extra;
1350b57cec5SDimitry Andric     if (FileType == SrcMgr::C_System)
1360b57cec5SDimitry Andric       // "`3' This indicates that the following text comes from a system header
1370b57cec5SDimitry Andric       // file, so certain warnings should be suppressed."
1380b57cec5SDimitry Andric       OS << " 3";
1390b57cec5SDimitry Andric     else if (FileType == SrcMgr::C_ExternCSystem)
1400b57cec5SDimitry Andric       // as above for `3', plus "`4' This indicates that the following text
1410b57cec5SDimitry Andric       // should be treated as being wrapped in an implicit extern "C" block."
1420b57cec5SDimitry Andric       OS << " 3 4";
1430b57cec5SDimitry Andric   }
1440b57cec5SDimitry Andric   OS << MainEOL;
1450b57cec5SDimitry Andric }
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
1480b57cec5SDimitry Andric   OS << "#pragma clang module import " << Mod->getFullModuleName(true)
1490b57cec5SDimitry Andric      << " /* clang -frewrite-includes: implicit import */" << MainEOL;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric /// FileChanged - Whenever the preprocessor enters or exits a #include file
1530b57cec5SDimitry Andric /// it invokes this handler.
1540b57cec5SDimitry Andric void InclusionRewriter::FileChanged(SourceLocation Loc,
1550b57cec5SDimitry Andric                                     FileChangeReason Reason,
1560b57cec5SDimitry Andric                                     SrcMgr::CharacteristicKind NewFileType,
1570b57cec5SDimitry Andric                                     FileID) {
1580b57cec5SDimitry Andric   if (Reason != EnterFile)
1590b57cec5SDimitry Andric     return;
1600b57cec5SDimitry Andric   if (LastInclusionLocation.isInvalid())
1610b57cec5SDimitry Andric     // we didn't reach this file (eg: the main file) via an inclusion directive
1620b57cec5SDimitry Andric     return;
1630b57cec5SDimitry Andric   FileID Id = FullSourceLoc(Loc, SM).getFileID();
1640b57cec5SDimitry Andric   auto P = FileIncludes.insert(
165*e8d8bef9SDimitry Andric       std::make_pair(LastInclusionLocation,
1660b57cec5SDimitry Andric                      IncludedFile(Id, NewFileType, PP.GetCurDirLookup())));
1670b57cec5SDimitry Andric   (void)P;
1680b57cec5SDimitry Andric   assert(P.second && "Unexpected revisitation of the same include directive");
1690b57cec5SDimitry Andric   LastInclusionLocation = SourceLocation();
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric /// Called whenever an inclusion is skipped due to canonical header protection
1730b57cec5SDimitry Andric /// macros.
174a7dea167SDimitry Andric void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
1750b57cec5SDimitry Andric                                     const Token & /*FilenameTok*/,
1760b57cec5SDimitry Andric                                     SrcMgr::CharacteristicKind /*FileType*/) {
1770b57cec5SDimitry Andric   assert(LastInclusionLocation.isValid() &&
1780b57cec5SDimitry Andric          "A file, that wasn't found via an inclusion directive, was skipped");
1790b57cec5SDimitry Andric   LastInclusionLocation = SourceLocation();
1800b57cec5SDimitry Andric }
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric /// This should be called whenever the preprocessor encounters include
1830b57cec5SDimitry Andric /// directives. It does not say whether the file has been included, but it
1840b57cec5SDimitry Andric /// provides more information about the directive (hash location instead
1850b57cec5SDimitry Andric /// of location inside the included file). It is assumed that the matching
1860b57cec5SDimitry Andric /// FileChanged() or FileSkipped() is called after this (or neither is
1870b57cec5SDimitry Andric /// called if this #include results in an error or does not textually include
1880b57cec5SDimitry Andric /// anything).
1890b57cec5SDimitry Andric void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
1900b57cec5SDimitry Andric                                            const Token &/*IncludeTok*/,
1910b57cec5SDimitry Andric                                            StringRef /*FileName*/,
1920b57cec5SDimitry Andric                                            bool /*IsAngled*/,
1930b57cec5SDimitry Andric                                            CharSourceRange /*FilenameRange*/,
1940b57cec5SDimitry Andric                                            const FileEntry * /*File*/,
1950b57cec5SDimitry Andric                                            StringRef /*SearchPath*/,
1960b57cec5SDimitry Andric                                            StringRef /*RelativePath*/,
1970b57cec5SDimitry Andric                                            const Module *Imported,
1980b57cec5SDimitry Andric                                            SrcMgr::CharacteristicKind FileType){
1990b57cec5SDimitry Andric   if (Imported) {
200*e8d8bef9SDimitry Andric     auto P = ModuleIncludes.insert(std::make_pair(HashLoc, Imported));
2010b57cec5SDimitry Andric     (void)P;
2020b57cec5SDimitry Andric     assert(P.second && "Unexpected revisitation of the same include directive");
2030b57cec5SDimitry Andric   } else
2040b57cec5SDimitry Andric     LastInclusionLocation = HashLoc;
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric 
207a7dea167SDimitry Andric void InclusionRewriter::If(SourceLocation Loc, SourceRange ConditionRange,
208a7dea167SDimitry Andric                            ConditionValueKind ConditionValue) {
209*e8d8bef9SDimitry Andric   auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
210a7dea167SDimitry Andric   (void)P;
211a7dea167SDimitry Andric   assert(P.second && "Unexpected revisitation of the same if directive");
212a7dea167SDimitry Andric }
213a7dea167SDimitry Andric 
214a7dea167SDimitry Andric void InclusionRewriter::Elif(SourceLocation Loc, SourceRange ConditionRange,
215a7dea167SDimitry Andric                              ConditionValueKind ConditionValue,
216a7dea167SDimitry Andric                              SourceLocation IfLoc) {
217*e8d8bef9SDimitry Andric   auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
218a7dea167SDimitry Andric   (void)P;
219a7dea167SDimitry Andric   assert(P.second && "Unexpected revisitation of the same elif directive");
220a7dea167SDimitry Andric }
221a7dea167SDimitry Andric 
2220b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in
2230b57cec5SDimitry Andric /// an inclusion directive) in the map of inclusion information, FileChanges.
2240b57cec5SDimitry Andric const InclusionRewriter::IncludedFile *
2250b57cec5SDimitry Andric InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const {
226*e8d8bef9SDimitry Andric   const auto I = FileIncludes.find(Loc);
2270b57cec5SDimitry Andric   if (I != FileIncludes.end())
2280b57cec5SDimitry Andric     return &I->second;
2290b57cec5SDimitry Andric   return nullptr;
2300b57cec5SDimitry Andric }
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in
2330b57cec5SDimitry Andric /// an inclusion directive) in the map of module inclusion information.
2340b57cec5SDimitry Andric const Module *
2350b57cec5SDimitry Andric InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const {
236*e8d8bef9SDimitry Andric   const auto I = ModuleIncludes.find(Loc);
2370b57cec5SDimitry Andric   if (I != ModuleIncludes.end())
2380b57cec5SDimitry Andric     return I->second;
2390b57cec5SDimitry Andric   return nullptr;
2400b57cec5SDimitry Andric }
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in
2430b57cec5SDimitry Andric /// an inclusion directive) in the map of module entry information.
2440b57cec5SDimitry Andric const Module *
2450b57cec5SDimitry Andric InclusionRewriter::FindEnteredModule(SourceLocation Loc) const {
246*e8d8bef9SDimitry Andric   const auto I = ModuleEntryIncludes.find(Loc);
2470b57cec5SDimitry Andric   if (I != ModuleEntryIncludes.end())
2480b57cec5SDimitry Andric     return I->second;
2490b57cec5SDimitry Andric   return nullptr;
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric 
252a7dea167SDimitry Andric bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const {
253*e8d8bef9SDimitry Andric   const auto I = IfConditions.find(Loc);
254a7dea167SDimitry Andric   if (I != IfConditions.end())
255a7dea167SDimitry Andric     return I->second;
256a7dea167SDimitry Andric   return false;
257a7dea167SDimitry Andric }
258a7dea167SDimitry Andric 
2590b57cec5SDimitry Andric /// Detect the likely line ending style of \p FromFile by examining the first
2600b57cec5SDimitry Andric /// newline found within it.
261*e8d8bef9SDimitry Andric static StringRef DetectEOL(const MemoryBufferRef &FromFile) {
2620b57cec5SDimitry Andric   // Detect what line endings the file uses, so that added content does not mix
2630b57cec5SDimitry Andric   // the style. We need to check for "\r\n" first because "\n\r" will match
2640b57cec5SDimitry Andric   // "\r\n\r\n".
2650b57cec5SDimitry Andric   const char *Pos = strchr(FromFile.getBufferStart(), '\n');
2660b57cec5SDimitry Andric   if (!Pos)
2670b57cec5SDimitry Andric     return "\n";
2680b57cec5SDimitry Andric   if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
2690b57cec5SDimitry Andric     return "\r\n";
2700b57cec5SDimitry Andric   if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
2710b57cec5SDimitry Andric     return "\n\r";
2720b57cec5SDimitry Andric   return "\n";
2730b57cec5SDimitry Andric }
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric void InclusionRewriter::detectMainFileEOL() {
276*e8d8bef9SDimitry Andric   Optional<MemoryBufferRef> FromFile = *SM.getBufferOrNone(SM.getMainFileID());
277*e8d8bef9SDimitry Andric   assert(FromFile);
278*e8d8bef9SDimitry Andric   if (!FromFile)
2790b57cec5SDimitry Andric     return; // Should never happen, but whatever.
280*e8d8bef9SDimitry Andric   MainEOL = DetectEOL(*FromFile);
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
2840b57cec5SDimitry Andric /// \p WriteTo - 1.
285*e8d8bef9SDimitry Andric void InclusionRewriter::OutputContentUpTo(const MemoryBufferRef &FromFile,
2860b57cec5SDimitry Andric                                           unsigned &WriteFrom, unsigned WriteTo,
2870b57cec5SDimitry Andric                                           StringRef LocalEOL, int &Line,
2880b57cec5SDimitry Andric                                           bool EnsureNewline) {
2890b57cec5SDimitry Andric   if (WriteTo <= WriteFrom)
2900b57cec5SDimitry Andric     return;
291*e8d8bef9SDimitry Andric   if (FromFile == PredefinesBuffer) {
2920b57cec5SDimitry Andric     // Ignore the #defines of the predefines buffer.
2930b57cec5SDimitry Andric     WriteFrom = WriteTo;
2940b57cec5SDimitry Andric     return;
2950b57cec5SDimitry Andric   }
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric   // If we would output half of a line ending, advance one character to output
2980b57cec5SDimitry Andric   // the whole line ending.  All buffers are null terminated, so looking ahead
2990b57cec5SDimitry Andric   // one byte is safe.
3000b57cec5SDimitry Andric   if (LocalEOL.size() == 2 &&
3010b57cec5SDimitry Andric       LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&
3020b57cec5SDimitry Andric       LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])
3030b57cec5SDimitry Andric     WriteTo++;
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric   StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,
3060b57cec5SDimitry Andric                         WriteTo - WriteFrom);
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric   if (MainEOL == LocalEOL) {
3090b57cec5SDimitry Andric     OS << TextToWrite;
3100b57cec5SDimitry Andric     // count lines manually, it's faster than getPresumedLoc()
3110b57cec5SDimitry Andric     Line += TextToWrite.count(LocalEOL);
3120b57cec5SDimitry Andric     if (EnsureNewline && !TextToWrite.endswith(LocalEOL))
3130b57cec5SDimitry Andric       OS << MainEOL;
3140b57cec5SDimitry Andric   } else {
3150b57cec5SDimitry Andric     // Output the file one line at a time, rewriting the line endings as we go.
3160b57cec5SDimitry Andric     StringRef Rest = TextToWrite;
3170b57cec5SDimitry Andric     while (!Rest.empty()) {
3180b57cec5SDimitry Andric       StringRef LineText;
3190b57cec5SDimitry Andric       std::tie(LineText, Rest) = Rest.split(LocalEOL);
3200b57cec5SDimitry Andric       OS << LineText;
3210b57cec5SDimitry Andric       Line++;
3220b57cec5SDimitry Andric       if (!Rest.empty())
3230b57cec5SDimitry Andric         OS << MainEOL;
3240b57cec5SDimitry Andric     }
3250b57cec5SDimitry Andric     if (TextToWrite.endswith(LocalEOL) || EnsureNewline)
3260b57cec5SDimitry Andric       OS << MainEOL;
3270b57cec5SDimitry Andric   }
3280b57cec5SDimitry Andric   WriteFrom = WriteTo;
3290b57cec5SDimitry Andric }
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric /// Print characters from \p FromFile starting at \p NextToWrite up until the
3320b57cec5SDimitry Andric /// inclusion directive at \p StartToken, then print out the inclusion
3330b57cec5SDimitry Andric /// inclusion directive disabled by a #if directive, updating \p NextToWrite
3340b57cec5SDimitry Andric /// and \p Line to track the number of source lines visited and the progress
3350b57cec5SDimitry Andric /// through the \p FromFile buffer.
3360b57cec5SDimitry Andric void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
3370b57cec5SDimitry Andric                                             const Token &StartToken,
338*e8d8bef9SDimitry Andric                                             const MemoryBufferRef &FromFile,
3390b57cec5SDimitry Andric                                             StringRef LocalEOL,
3400b57cec5SDimitry Andric                                             unsigned &NextToWrite, int &Line) {
3410b57cec5SDimitry Andric   OutputContentUpTo(FromFile, NextToWrite,
3420b57cec5SDimitry Andric                     SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,
3430b57cec5SDimitry Andric                     false);
3440b57cec5SDimitry Andric   Token DirectiveToken;
3450b57cec5SDimitry Andric   do {
3460b57cec5SDimitry Andric     DirectiveLex.LexFromRawLexer(DirectiveToken);
3470b57cec5SDimitry Andric   } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
348*e8d8bef9SDimitry Andric   if (FromFile == PredefinesBuffer) {
3490b57cec5SDimitry Andric     // OutputContentUpTo() would not output anything anyway.
3500b57cec5SDimitry Andric     return;
3510b57cec5SDimitry Andric   }
3520b57cec5SDimitry Andric   OS << "#if 0 /* expanded by -frewrite-includes */" << MainEOL;
3530b57cec5SDimitry Andric   OutputContentUpTo(FromFile, NextToWrite,
3540b57cec5SDimitry Andric                     SM.getFileOffset(DirectiveToken.getLocation()) +
3550b57cec5SDimitry Andric                         DirectiveToken.getLength(),
3560b57cec5SDimitry Andric                     LocalEOL, Line, true);
3570b57cec5SDimitry Andric   OS << "#endif /* expanded by -frewrite-includes */" << MainEOL;
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric /// Find the next identifier in the pragma directive specified by \p RawToken.
3610b57cec5SDimitry Andric StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
3620b57cec5SDimitry Andric                                                 Token &RawToken) {
3630b57cec5SDimitry Andric   RawLex.LexFromRawLexer(RawToken);
3640b57cec5SDimitry Andric   if (RawToken.is(tok::raw_identifier))
3650b57cec5SDimitry Andric     PP.LookUpIdentifierInfo(RawToken);
3660b57cec5SDimitry Andric   if (RawToken.is(tok::identifier))
3670b57cec5SDimitry Andric     return RawToken.getIdentifierInfo()->getName();
3680b57cec5SDimitry Andric   return StringRef();
3690b57cec5SDimitry Andric }
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
3720b57cec5SDimitry Andric /// and including content of included files recursively.
3730b57cec5SDimitry Andric void InclusionRewriter::Process(FileID FileId,
3740b57cec5SDimitry Andric                                 SrcMgr::CharacteristicKind FileType,
3750b57cec5SDimitry Andric                                 const DirectoryLookup *DirLookup) {
376*e8d8bef9SDimitry Andric   MemoryBufferRef FromFile;
377*e8d8bef9SDimitry Andric   {
378*e8d8bef9SDimitry Andric     auto B = SM.getBufferOrNone(FileId);
379*e8d8bef9SDimitry Andric     assert(B && "Attempting to process invalid inclusion");
380*e8d8bef9SDimitry Andric     if (B)
381*e8d8bef9SDimitry Andric       FromFile = *B;
382*e8d8bef9SDimitry Andric   }
3830b57cec5SDimitry Andric   StringRef FileName = FromFile.getBufferIdentifier();
384*e8d8bef9SDimitry Andric   Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts());
3850b57cec5SDimitry Andric   RawLex.SetCommentRetentionState(false);
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   StringRef LocalEOL = DetectEOL(FromFile);
3880b57cec5SDimitry Andric 
3890b57cec5SDimitry Andric   // Per the GNU docs: "1" indicates entering a new file.
3900b57cec5SDimitry Andric   if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
3910b57cec5SDimitry Andric     WriteLineInfo(FileName, 1, FileType, "");
3920b57cec5SDimitry Andric   else
3930b57cec5SDimitry Andric     WriteLineInfo(FileName, 1, FileType, " 1");
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric   if (SM.getFileIDSize(FileId) == 0)
3960b57cec5SDimitry Andric     return;
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric   // The next byte to be copied from the source file, which may be non-zero if
3990b57cec5SDimitry Andric   // the lexer handled a BOM.
4000b57cec5SDimitry Andric   unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
4010b57cec5SDimitry Andric   assert(SM.getLineNumber(FileId, NextToWrite) == 1);
4020b57cec5SDimitry Andric   int Line = 1; // The current input file line number.
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric   Token RawToken;
4050b57cec5SDimitry Andric   RawLex.LexFromRawLexer(RawToken);
4060b57cec5SDimitry Andric 
4070b57cec5SDimitry Andric   // TODO: Consider adding a switch that strips possibly unimportant content,
4080b57cec5SDimitry Andric   // such as comments, to reduce the size of repro files.
4090b57cec5SDimitry Andric   while (RawToken.isNot(tok::eof)) {
4100b57cec5SDimitry Andric     if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
4110b57cec5SDimitry Andric       RawLex.setParsingPreprocessorDirective(true);
4120b57cec5SDimitry Andric       Token HashToken = RawToken;
4130b57cec5SDimitry Andric       RawLex.LexFromRawLexer(RawToken);
4140b57cec5SDimitry Andric       if (RawToken.is(tok::raw_identifier))
4150b57cec5SDimitry Andric         PP.LookUpIdentifierInfo(RawToken);
4160b57cec5SDimitry Andric       if (RawToken.getIdentifierInfo() != nullptr) {
4170b57cec5SDimitry Andric         switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
4180b57cec5SDimitry Andric           case tok::pp_include:
4190b57cec5SDimitry Andric           case tok::pp_include_next:
4200b57cec5SDimitry Andric           case tok::pp_import: {
4210b57cec5SDimitry Andric             CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL, NextToWrite,
4220b57cec5SDimitry Andric               Line);
4230b57cec5SDimitry Andric             if (FileId != PP.getPredefinesFileID())
4240b57cec5SDimitry Andric               WriteLineInfo(FileName, Line - 1, FileType, "");
4250b57cec5SDimitry Andric             StringRef LineInfoExtra;
4260b57cec5SDimitry Andric             SourceLocation Loc = HashToken.getLocation();
4270b57cec5SDimitry Andric             if (const Module *Mod = FindModuleAtLocation(Loc))
4280b57cec5SDimitry Andric               WriteImplicitModuleImport(Mod);
4290b57cec5SDimitry Andric             else if (const IncludedFile *Inc = FindIncludeAtLocation(Loc)) {
4300b57cec5SDimitry Andric               const Module *Mod = FindEnteredModule(Loc);
4310b57cec5SDimitry Andric               if (Mod)
4320b57cec5SDimitry Andric                 OS << "#pragma clang module begin "
4330b57cec5SDimitry Andric                    << Mod->getFullModuleName(true) << "\n";
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric               // Include and recursively process the file.
4360b57cec5SDimitry Andric               Process(Inc->Id, Inc->FileType, Inc->DirLookup);
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric               if (Mod)
4390b57cec5SDimitry Andric                 OS << "#pragma clang module end /*"
4400b57cec5SDimitry Andric                    << Mod->getFullModuleName(true) << "*/\n";
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric               // Add line marker to indicate we're returning from an included
4430b57cec5SDimitry Andric               // file.
4440b57cec5SDimitry Andric               LineInfoExtra = " 2";
4450b57cec5SDimitry Andric             }
4460b57cec5SDimitry Andric             // fix up lineinfo (since commented out directive changed line
4470b57cec5SDimitry Andric             // numbers) for inclusions that were skipped due to header guards
4480b57cec5SDimitry Andric             WriteLineInfo(FileName, Line, FileType, LineInfoExtra);
4490b57cec5SDimitry Andric             break;
4500b57cec5SDimitry Andric           }
4510b57cec5SDimitry Andric           case tok::pp_pragma: {
4520b57cec5SDimitry Andric             StringRef Identifier = NextIdentifierName(RawLex, RawToken);
4530b57cec5SDimitry Andric             if (Identifier == "clang" || Identifier == "GCC") {
4540b57cec5SDimitry Andric               if (NextIdentifierName(RawLex, RawToken) == "system_header") {
4550b57cec5SDimitry Andric                 // keep the directive in, commented out
4560b57cec5SDimitry Andric                 CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
4570b57cec5SDimitry Andric                   NextToWrite, Line);
4580b57cec5SDimitry Andric                 // update our own type
4590b57cec5SDimitry Andric                 FileType = SM.getFileCharacteristic(RawToken.getLocation());
4600b57cec5SDimitry Andric                 WriteLineInfo(FileName, Line, FileType);
4610b57cec5SDimitry Andric               }
4620b57cec5SDimitry Andric             } else if (Identifier == "once") {
4630b57cec5SDimitry Andric               // keep the directive in, commented out
4640b57cec5SDimitry Andric               CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
4650b57cec5SDimitry Andric                 NextToWrite, Line);
4660b57cec5SDimitry Andric               WriteLineInfo(FileName, Line, FileType);
4670b57cec5SDimitry Andric             }
4680b57cec5SDimitry Andric             break;
4690b57cec5SDimitry Andric           }
4700b57cec5SDimitry Andric           case tok::pp_if:
4710b57cec5SDimitry Andric           case tok::pp_elif: {
4720b57cec5SDimitry Andric             bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
4730b57cec5SDimitry Andric                          tok::pp_elif);
474a7dea167SDimitry Andric             bool isTrue = IsIfAtLocationTrue(RawToken.getLocation());
4750b57cec5SDimitry Andric             OutputContentUpTo(FromFile, NextToWrite,
476a7dea167SDimitry Andric                               SM.getFileOffset(HashToken.getLocation()),
477a7dea167SDimitry Andric                               LocalEOL, Line, /*EnsureNewline=*/true);
478a7dea167SDimitry Andric             do {
479a7dea167SDimitry Andric               RawLex.LexFromRawLexer(RawToken);
480a7dea167SDimitry Andric             } while (!RawToken.is(tok::eod) && RawToken.isNot(tok::eof));
481a7dea167SDimitry Andric             // We need to disable the old condition, but that is tricky.
482a7dea167SDimitry Andric             // Trying to comment it out can easily lead to comment nesting.
483a7dea167SDimitry Andric             // So instead make the condition harmless by making it enclose
484a7dea167SDimitry Andric             // and empty block. Moreover, put it itself inside an #if 0 block
485a7dea167SDimitry Andric             // to disable it from getting evaluated (e.g. __has_include_next
486a7dea167SDimitry Andric             // warns if used from the primary source file).
487a7dea167SDimitry Andric             OS << "#if 0 /* disabled by -frewrite-includes */" << MainEOL;
4880b57cec5SDimitry Andric             if (elif) {
489a7dea167SDimitry Andric               OS << "#if 0" << MainEOL;
490a7dea167SDimitry Andric             }
4910b57cec5SDimitry Andric             OutputContentUpTo(FromFile, NextToWrite,
4920b57cec5SDimitry Andric                               SM.getFileOffset(RawToken.getLocation()) +
4930b57cec5SDimitry Andric                                   RawToken.getLength(),
4940b57cec5SDimitry Andric                               LocalEOL, Line, /*EnsureNewline=*/true);
495a7dea167SDimitry Andric             // Close the empty block and the disabling block.
496a7dea167SDimitry Andric             OS << "#endif" << MainEOL;
497a7dea167SDimitry Andric             OS << "#endif /* disabled by -frewrite-includes */" << MainEOL;
498a7dea167SDimitry Andric             OS << (elif ? "#elif " : "#if ") << (isTrue ? "1" : "0")
499a7dea167SDimitry Andric                << " /* evaluated by -frewrite-includes */" << MainEOL;
5000b57cec5SDimitry Andric             WriteLineInfo(FileName, Line, FileType);
5010b57cec5SDimitry Andric             break;
5020b57cec5SDimitry Andric           }
5030b57cec5SDimitry Andric           case tok::pp_endif:
5040b57cec5SDimitry Andric           case tok::pp_else: {
5050b57cec5SDimitry Andric             // We surround every #include by #if 0 to comment it out, but that
5060b57cec5SDimitry Andric             // changes line numbers. These are fixed up right after that, but
5070b57cec5SDimitry Andric             // the whole #include could be inside a preprocessor conditional
5080b57cec5SDimitry Andric             // that is not processed. So it is necessary to fix the line
5090b57cec5SDimitry Andric             // numbers one the next line after each #else/#endif as well.
5100b57cec5SDimitry Andric             RawLex.SetKeepWhitespaceMode(true);
5110b57cec5SDimitry Andric             do {
5120b57cec5SDimitry Andric               RawLex.LexFromRawLexer(RawToken);
5130b57cec5SDimitry Andric             } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
5140b57cec5SDimitry Andric             OutputContentUpTo(FromFile, NextToWrite,
5150b57cec5SDimitry Andric                               SM.getFileOffset(RawToken.getLocation()) +
5160b57cec5SDimitry Andric                                   RawToken.getLength(),
5170b57cec5SDimitry Andric                               LocalEOL, Line, /*EnsureNewline=*/ true);
5180b57cec5SDimitry Andric             WriteLineInfo(FileName, Line, FileType);
5190b57cec5SDimitry Andric             RawLex.SetKeepWhitespaceMode(false);
5200b57cec5SDimitry Andric             break;
5210b57cec5SDimitry Andric           }
5220b57cec5SDimitry Andric           default:
5230b57cec5SDimitry Andric             break;
5240b57cec5SDimitry Andric         }
5250b57cec5SDimitry Andric       }
5260b57cec5SDimitry Andric       RawLex.setParsingPreprocessorDirective(false);
5270b57cec5SDimitry Andric     }
5280b57cec5SDimitry Andric     RawLex.LexFromRawLexer(RawToken);
5290b57cec5SDimitry Andric   }
5300b57cec5SDimitry Andric   OutputContentUpTo(FromFile, NextToWrite,
5310b57cec5SDimitry Andric                     SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,
5320b57cec5SDimitry Andric                     Line, /*EnsureNewline=*/true);
5330b57cec5SDimitry Andric }
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric /// InclusionRewriterInInput - Implement -frewrite-includes mode.
5360b57cec5SDimitry Andric void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
5370b57cec5SDimitry Andric                                    const PreprocessorOutputOptions &Opts) {
5380b57cec5SDimitry Andric   SourceManager &SM = PP.getSourceManager();
5390b57cec5SDimitry Andric   InclusionRewriter *Rewrite = new InclusionRewriter(
5400b57cec5SDimitry Andric       PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives);
5410b57cec5SDimitry Andric   Rewrite->detectMainFileEOL();
5420b57cec5SDimitry Andric 
5430b57cec5SDimitry Andric   PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite));
5440b57cec5SDimitry Andric   PP.IgnorePragmas();
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric   // First let the preprocessor process the entire file and call callbacks.
5470b57cec5SDimitry Andric   // Callbacks will record which #include's were actually performed.
5480b57cec5SDimitry Andric   PP.EnterMainSourceFile();
5490b57cec5SDimitry Andric   Token Tok;
5500b57cec5SDimitry Andric   // Only preprocessor directives matter here, so disable macro expansion
5510b57cec5SDimitry Andric   // everywhere else as an optimization.
5520b57cec5SDimitry Andric   // TODO: It would be even faster if the preprocessor could be switched
5530b57cec5SDimitry Andric   // to a mode where it would parse only preprocessor directives and comments,
5540b57cec5SDimitry Andric   // nothing else matters for parsing or processing.
5550b57cec5SDimitry Andric   PP.SetMacroExpansionOnlyInDirectives();
5560b57cec5SDimitry Andric   do {
5570b57cec5SDimitry Andric     PP.Lex(Tok);
5580b57cec5SDimitry Andric     if (Tok.is(tok::annot_module_begin))
5590b57cec5SDimitry Andric       Rewrite->handleModuleBegin(Tok);
5600b57cec5SDimitry Andric   } while (Tok.isNot(tok::eof));
561*e8d8bef9SDimitry Andric   Rewrite->setPredefinesBuffer(SM.getBufferOrFake(PP.getPredefinesFileID()));
5620b57cec5SDimitry Andric   Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User, nullptr);
5630b57cec5SDimitry Andric   Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User, nullptr);
5640b57cec5SDimitry Andric   OS->flush();
5650b57cec5SDimitry Andric }
566