10b57cec5SDimitry Andric //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This code rewrites include invocations into their expansions. This gives you
100b57cec5SDimitry Andric // a file with all included files merged into it.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "clang/Rewrite/Frontend/Rewriters.h"
150b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
160b57cec5SDimitry Andric #include "clang/Frontend/PreprocessorOutputOptions.h"
170b57cec5SDimitry Andric #include "clang/Lex/Pragma.h"
180b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
200b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
21bdd1243dSDimitry Andric #include <optional>
220b57cec5SDimitry Andric
230b57cec5SDimitry Andric using namespace clang;
240b57cec5SDimitry Andric using namespace llvm;
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric namespace {
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric class InclusionRewriter : public PPCallbacks {
290b57cec5SDimitry Andric /// Information about which #includes were actually performed,
300b57cec5SDimitry Andric /// created by preprocessor callbacks.
310b57cec5SDimitry Andric struct IncludedFile {
320b57cec5SDimitry Andric FileID Id;
330b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType;
IncludedFile__anon7638e4020111::InclusionRewriter::IncludedFile3404eeddc0SDimitry Andric IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType)
3504eeddc0SDimitry Andric : Id(Id), FileType(FileType) {}
360b57cec5SDimitry Andric };
370b57cec5SDimitry Andric Preprocessor &PP; ///< Used to find inclusion directives.
380b57cec5SDimitry Andric SourceManager &SM; ///< Used to read and manage source files.
390b57cec5SDimitry Andric raw_ostream &OS; ///< The destination stream for rewritten contents.
400b57cec5SDimitry Andric StringRef MainEOL; ///< The line ending marker to use.
41e8d8bef9SDimitry Andric llvm::MemoryBufferRef PredefinesBuffer; ///< The preprocessor predefines.
420b57cec5SDimitry Andric bool ShowLineMarkers; ///< Show #line markers.
430b57cec5SDimitry Andric bool UseLineDirectives; ///< Use of line directives or line markers.
440b57cec5SDimitry Andric /// Tracks where inclusions that change the file are found.
45e8d8bef9SDimitry Andric std::map<SourceLocation, IncludedFile> FileIncludes;
460b57cec5SDimitry Andric /// Tracks where inclusions that import modules are found.
47e8d8bef9SDimitry Andric std::map<SourceLocation, const Module *> ModuleIncludes;
480b57cec5SDimitry Andric /// Tracks where inclusions that enter modules (in a module build) are found.
49e8d8bef9SDimitry Andric std::map<SourceLocation, const Module *> ModuleEntryIncludes;
50a7dea167SDimitry Andric /// Tracks where #if and #elif directives get evaluated and whether to true.
51e8d8bef9SDimitry Andric std::map<SourceLocation, bool> IfConditions;
520b57cec5SDimitry Andric /// Used transitively for building up the FileIncludes mapping over the
530b57cec5SDimitry Andric /// various \c PPCallbacks callbacks.
540b57cec5SDimitry Andric SourceLocation LastInclusionLocation;
550b57cec5SDimitry Andric public:
560b57cec5SDimitry Andric InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,
570b57cec5SDimitry Andric bool UseLineDirectives);
5804eeddc0SDimitry Andric void Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
setPredefinesBuffer(const llvm::MemoryBufferRef & Buf)59e8d8bef9SDimitry Andric void setPredefinesBuffer(const llvm::MemoryBufferRef &Buf) {
600b57cec5SDimitry Andric PredefinesBuffer = Buf;
610b57cec5SDimitry Andric }
620b57cec5SDimitry Andric void detectMainFileEOL();
handleModuleBegin(Token & Tok)630b57cec5SDimitry Andric void handleModuleBegin(Token &Tok) {
640b57cec5SDimitry Andric assert(Tok.getKind() == tok::annot_module_begin);
65e8d8bef9SDimitry Andric ModuleEntryIncludes.insert(
66e8d8bef9SDimitry Andric {Tok.getLocation(), (Module *)Tok.getAnnotationValue()});
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric private:
690b57cec5SDimitry Andric void FileChanged(SourceLocation Loc, FileChangeReason Reason,
700b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType,
710b57cec5SDimitry Andric FileID PrevFID) override;
72a7dea167SDimitry Andric void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,
730b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType) override;
740b57cec5SDimitry Andric void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
750b57cec5SDimitry Andric StringRef FileName, bool IsAngled,
7681ad6265SDimitry Andric CharSourceRange FilenameRange,
77bdd1243dSDimitry Andric OptionalFileEntryRef File, StringRef SearchPath,
78*0fca6ea1SDimitry Andric StringRef RelativePath, const Module *SuggestedModule,
79*0fca6ea1SDimitry Andric bool ModuleImported,
800b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType) override;
81a7dea167SDimitry Andric void If(SourceLocation Loc, SourceRange ConditionRange,
82a7dea167SDimitry Andric ConditionValueKind ConditionValue) override;
83a7dea167SDimitry Andric void Elif(SourceLocation Loc, SourceRange ConditionRange,
84a7dea167SDimitry Andric ConditionValueKind ConditionValue, SourceLocation IfLoc) override;
850b57cec5SDimitry Andric void WriteLineInfo(StringRef Filename, int Line,
860b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType,
870b57cec5SDimitry Andric StringRef Extra = StringRef());
880b57cec5SDimitry Andric void WriteImplicitModuleImport(const Module *Mod);
89e8d8bef9SDimitry Andric void OutputContentUpTo(const MemoryBufferRef &FromFile, unsigned &WriteFrom,
90e8d8bef9SDimitry Andric unsigned WriteTo, StringRef EOL, int &lines,
910b57cec5SDimitry Andric bool EnsureNewline);
920b57cec5SDimitry Andric void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
93e8d8bef9SDimitry Andric const MemoryBufferRef &FromFile, StringRef EOL,
945f757f3fSDimitry Andric unsigned &NextToWrite, int &Lines,
955f757f3fSDimitry Andric const IncludedFile *Inc = nullptr);
960b57cec5SDimitry Andric const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const;
975f757f3fSDimitry Andric StringRef getIncludedFileName(const IncludedFile *Inc) const;
980b57cec5SDimitry Andric const Module *FindModuleAtLocation(SourceLocation Loc) const;
990b57cec5SDimitry Andric const Module *FindEnteredModule(SourceLocation Loc) const;
100a7dea167SDimitry Andric bool IsIfAtLocationTrue(SourceLocation Loc) const;
1010b57cec5SDimitry Andric StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
1020b57cec5SDimitry Andric };
1030b57cec5SDimitry Andric
1040b57cec5SDimitry Andric } // end anonymous namespace
1050b57cec5SDimitry Andric
1060b57cec5SDimitry Andric /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
InclusionRewriter(Preprocessor & PP,raw_ostream & OS,bool ShowLineMarkers,bool UseLineDirectives)1070b57cec5SDimitry Andric InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
1080b57cec5SDimitry Andric bool ShowLineMarkers,
1090b57cec5SDimitry Andric bool UseLineDirectives)
1100b57cec5SDimitry Andric : PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),
111e8d8bef9SDimitry Andric ShowLineMarkers(ShowLineMarkers), UseLineDirectives(UseLineDirectives),
1120b57cec5SDimitry Andric LastInclusionLocation(SourceLocation()) {}
1130b57cec5SDimitry Andric
1140b57cec5SDimitry Andric /// Write appropriate line information as either #line directives or GNU line
1150b57cec5SDimitry Andric /// markers depending on what mode we're in, including the \p Filename and
1160b57cec5SDimitry Andric /// \p Line we are located at, using the specified \p EOL line separator, and
1170b57cec5SDimitry Andric /// any \p Extra context specifiers in GNU line directives.
WriteLineInfo(StringRef Filename,int Line,SrcMgr::CharacteristicKind FileType,StringRef Extra)1180b57cec5SDimitry Andric void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line,
1190b57cec5SDimitry Andric SrcMgr::CharacteristicKind FileType,
1200b57cec5SDimitry Andric StringRef Extra) {
1210b57cec5SDimitry Andric if (!ShowLineMarkers)
1220b57cec5SDimitry Andric return;
1230b57cec5SDimitry Andric if (UseLineDirectives) {
1240b57cec5SDimitry Andric OS << "#line" << ' ' << Line << ' ' << '"';
1250b57cec5SDimitry Andric OS.write_escaped(Filename);
1260b57cec5SDimitry Andric OS << '"';
1270b57cec5SDimitry Andric } else {
1280b57cec5SDimitry Andric // Use GNU linemarkers as described here:
1290b57cec5SDimitry Andric // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
1300b57cec5SDimitry Andric OS << '#' << ' ' << Line << ' ' << '"';
1310b57cec5SDimitry Andric OS.write_escaped(Filename);
1320b57cec5SDimitry Andric OS << '"';
1330b57cec5SDimitry Andric if (!Extra.empty())
1340b57cec5SDimitry Andric OS << Extra;
1350b57cec5SDimitry Andric if (FileType == SrcMgr::C_System)
1360b57cec5SDimitry Andric // "`3' This indicates that the following text comes from a system header
1370b57cec5SDimitry Andric // file, so certain warnings should be suppressed."
1380b57cec5SDimitry Andric OS << " 3";
1390b57cec5SDimitry Andric else if (FileType == SrcMgr::C_ExternCSystem)
1400b57cec5SDimitry Andric // as above for `3', plus "`4' This indicates that the following text
1410b57cec5SDimitry Andric // should be treated as being wrapped in an implicit extern "C" block."
1420b57cec5SDimitry Andric OS << " 3 4";
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric OS << MainEOL;
1450b57cec5SDimitry Andric }
1460b57cec5SDimitry Andric
WriteImplicitModuleImport(const Module * Mod)1470b57cec5SDimitry Andric void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {
1480b57cec5SDimitry Andric OS << "#pragma clang module import " << Mod->getFullModuleName(true)
1490b57cec5SDimitry Andric << " /* clang -frewrite-includes: implicit import */" << MainEOL;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric
1520b57cec5SDimitry Andric /// FileChanged - Whenever the preprocessor enters or exits a #include file
1530b57cec5SDimitry Andric /// it invokes this handler.
FileChanged(SourceLocation Loc,FileChangeReason Reason,SrcMgr::CharacteristicKind NewFileType,FileID)1540b57cec5SDimitry Andric void InclusionRewriter::FileChanged(SourceLocation Loc,
1550b57cec5SDimitry Andric FileChangeReason Reason,
1560b57cec5SDimitry Andric SrcMgr::CharacteristicKind NewFileType,
1570b57cec5SDimitry Andric FileID) {
1580b57cec5SDimitry Andric if (Reason != EnterFile)
1590b57cec5SDimitry Andric return;
1600b57cec5SDimitry Andric if (LastInclusionLocation.isInvalid())
1610b57cec5SDimitry Andric // we didn't reach this file (eg: the main file) via an inclusion directive
1620b57cec5SDimitry Andric return;
1630b57cec5SDimitry Andric FileID Id = FullSourceLoc(Loc, SM).getFileID();
1640b57cec5SDimitry Andric auto P = FileIncludes.insert(
16504eeddc0SDimitry Andric std::make_pair(LastInclusionLocation, IncludedFile(Id, NewFileType)));
1660b57cec5SDimitry Andric (void)P;
1670b57cec5SDimitry Andric assert(P.second && "Unexpected revisitation of the same include directive");
1680b57cec5SDimitry Andric LastInclusionLocation = SourceLocation();
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric
1710b57cec5SDimitry Andric /// Called whenever an inclusion is skipped due to canonical header protection
1720b57cec5SDimitry Andric /// macros.
FileSkipped(const FileEntryRef &,const Token &,SrcMgr::CharacteristicKind)173a7dea167SDimitry Andric void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,
1740b57cec5SDimitry Andric const Token & /*FilenameTok*/,
1750b57cec5SDimitry Andric SrcMgr::CharacteristicKind /*FileType*/) {
1760b57cec5SDimitry Andric assert(LastInclusionLocation.isValid() &&
1770b57cec5SDimitry Andric "A file, that wasn't found via an inclusion directive, was skipped");
1780b57cec5SDimitry Andric LastInclusionLocation = SourceLocation();
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric
1810b57cec5SDimitry Andric /// This should be called whenever the preprocessor encounters include
1820b57cec5SDimitry Andric /// directives. It does not say whether the file has been included, but it
1830b57cec5SDimitry Andric /// provides more information about the directive (hash location instead
1840b57cec5SDimitry Andric /// of location inside the included file). It is assumed that the matching
1850b57cec5SDimitry Andric /// FileChanged() or FileSkipped() is called after this (or neither is
1860b57cec5SDimitry Andric /// called if this #include results in an error or does not textually include
1870b57cec5SDimitry Andric /// anything).
InclusionDirective(SourceLocation HashLoc,const Token &,StringRef,bool,CharSourceRange,OptionalFileEntryRef,StringRef,StringRef,const Module * SuggestedModule,bool ModuleImported,SrcMgr::CharacteristicKind FileType)188bdd1243dSDimitry Andric void InclusionRewriter::InclusionDirective(
189bdd1243dSDimitry Andric SourceLocation HashLoc, const Token & /*IncludeTok*/,
190bdd1243dSDimitry Andric StringRef /*FileName*/, bool /*IsAngled*/,
191bdd1243dSDimitry Andric CharSourceRange /*FilenameRange*/, OptionalFileEntryRef /*File*/,
192bdd1243dSDimitry Andric StringRef /*SearchPath*/, StringRef /*RelativePath*/,
193*0fca6ea1SDimitry Andric const Module *SuggestedModule, bool ModuleImported,
194*0fca6ea1SDimitry Andric SrcMgr::CharacteristicKind FileType) {
195*0fca6ea1SDimitry Andric if (ModuleImported) {
196*0fca6ea1SDimitry Andric auto P = ModuleIncludes.insert(std::make_pair(HashLoc, SuggestedModule));
1970b57cec5SDimitry Andric (void)P;
1980b57cec5SDimitry Andric assert(P.second && "Unexpected revisitation of the same include directive");
1990b57cec5SDimitry Andric } else
2000b57cec5SDimitry Andric LastInclusionLocation = HashLoc;
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric
If(SourceLocation Loc,SourceRange ConditionRange,ConditionValueKind ConditionValue)203a7dea167SDimitry Andric void InclusionRewriter::If(SourceLocation Loc, SourceRange ConditionRange,
204a7dea167SDimitry Andric ConditionValueKind ConditionValue) {
205e8d8bef9SDimitry Andric auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
206a7dea167SDimitry Andric (void)P;
207a7dea167SDimitry Andric assert(P.second && "Unexpected revisitation of the same if directive");
208a7dea167SDimitry Andric }
209a7dea167SDimitry Andric
Elif(SourceLocation Loc,SourceRange ConditionRange,ConditionValueKind ConditionValue,SourceLocation IfLoc)210a7dea167SDimitry Andric void InclusionRewriter::Elif(SourceLocation Loc, SourceRange ConditionRange,
211a7dea167SDimitry Andric ConditionValueKind ConditionValue,
212a7dea167SDimitry Andric SourceLocation IfLoc) {
213e8d8bef9SDimitry Andric auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));
214a7dea167SDimitry Andric (void)P;
215a7dea167SDimitry Andric assert(P.second && "Unexpected revisitation of the same elif directive");
216a7dea167SDimitry Andric }
217a7dea167SDimitry Andric
2180b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in
2190b57cec5SDimitry Andric /// an inclusion directive) in the map of inclusion information, FileChanges.
2200b57cec5SDimitry Andric const InclusionRewriter::IncludedFile *
FindIncludeAtLocation(SourceLocation Loc) const2210b57cec5SDimitry Andric InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const {
222e8d8bef9SDimitry Andric const auto I = FileIncludes.find(Loc);
2230b57cec5SDimitry Andric if (I != FileIncludes.end())
2240b57cec5SDimitry Andric return &I->second;
2250b57cec5SDimitry Andric return nullptr;
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric
2280b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in
2290b57cec5SDimitry Andric /// an inclusion directive) in the map of module inclusion information.
2300b57cec5SDimitry Andric const Module *
FindModuleAtLocation(SourceLocation Loc) const2310b57cec5SDimitry Andric InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const {
232e8d8bef9SDimitry Andric const auto I = ModuleIncludes.find(Loc);
2330b57cec5SDimitry Andric if (I != ModuleIncludes.end())
2340b57cec5SDimitry Andric return I->second;
2350b57cec5SDimitry Andric return nullptr;
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric /// Simple lookup for a SourceLocation (specifically one denoting the hash in
2390b57cec5SDimitry Andric /// an inclusion directive) in the map of module entry information.
2400b57cec5SDimitry Andric const Module *
FindEnteredModule(SourceLocation Loc) const2410b57cec5SDimitry Andric InclusionRewriter::FindEnteredModule(SourceLocation Loc) const {
242e8d8bef9SDimitry Andric const auto I = ModuleEntryIncludes.find(Loc);
2430b57cec5SDimitry Andric if (I != ModuleEntryIncludes.end())
2440b57cec5SDimitry Andric return I->second;
2450b57cec5SDimitry Andric return nullptr;
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric
IsIfAtLocationTrue(SourceLocation Loc) const248a7dea167SDimitry Andric bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const {
249e8d8bef9SDimitry Andric const auto I = IfConditions.find(Loc);
250a7dea167SDimitry Andric if (I != IfConditions.end())
251a7dea167SDimitry Andric return I->second;
252a7dea167SDimitry Andric return false;
253a7dea167SDimitry Andric }
254a7dea167SDimitry Andric
detectMainFileEOL()2550b57cec5SDimitry Andric void InclusionRewriter::detectMainFileEOL() {
256bdd1243dSDimitry Andric std::optional<MemoryBufferRef> FromFile =
257bdd1243dSDimitry Andric *SM.getBufferOrNone(SM.getMainFileID());
258e8d8bef9SDimitry Andric assert(FromFile);
259e8d8bef9SDimitry Andric if (!FromFile)
2600b57cec5SDimitry Andric return; // Should never happen, but whatever.
26104eeddc0SDimitry Andric MainEOL = FromFile->getBuffer().detectEOL();
2620b57cec5SDimitry Andric }
2630b57cec5SDimitry Andric
2640b57cec5SDimitry Andric /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
2650b57cec5SDimitry Andric /// \p WriteTo - 1.
OutputContentUpTo(const MemoryBufferRef & FromFile,unsigned & WriteFrom,unsigned WriteTo,StringRef LocalEOL,int & Line,bool EnsureNewline)266e8d8bef9SDimitry Andric void InclusionRewriter::OutputContentUpTo(const MemoryBufferRef &FromFile,
2670b57cec5SDimitry Andric unsigned &WriteFrom, unsigned WriteTo,
2680b57cec5SDimitry Andric StringRef LocalEOL, int &Line,
2690b57cec5SDimitry Andric bool EnsureNewline) {
2700b57cec5SDimitry Andric if (WriteTo <= WriteFrom)
2710b57cec5SDimitry Andric return;
272e8d8bef9SDimitry Andric if (FromFile == PredefinesBuffer) {
2730b57cec5SDimitry Andric // Ignore the #defines of the predefines buffer.
2740b57cec5SDimitry Andric WriteFrom = WriteTo;
2750b57cec5SDimitry Andric return;
2760b57cec5SDimitry Andric }
2770b57cec5SDimitry Andric
2780b57cec5SDimitry Andric // If we would output half of a line ending, advance one character to output
2790b57cec5SDimitry Andric // the whole line ending. All buffers are null terminated, so looking ahead
2800b57cec5SDimitry Andric // one byte is safe.
2810b57cec5SDimitry Andric if (LocalEOL.size() == 2 &&
2820b57cec5SDimitry Andric LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&
2830b57cec5SDimitry Andric LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])
2840b57cec5SDimitry Andric WriteTo++;
2850b57cec5SDimitry Andric
2860b57cec5SDimitry Andric StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,
2870b57cec5SDimitry Andric WriteTo - WriteFrom);
288bdd1243dSDimitry Andric // count lines manually, it's faster than getPresumedLoc()
289bdd1243dSDimitry Andric Line += TextToWrite.count(LocalEOL);
2900b57cec5SDimitry Andric
2910b57cec5SDimitry Andric if (MainEOL == LocalEOL) {
2920b57cec5SDimitry Andric OS << TextToWrite;
2930b57cec5SDimitry Andric } else {
2940b57cec5SDimitry Andric // Output the file one line at a time, rewriting the line endings as we go.
2950b57cec5SDimitry Andric StringRef Rest = TextToWrite;
2960b57cec5SDimitry Andric while (!Rest.empty()) {
297bdd1243dSDimitry Andric // Identify and output the next line excluding an EOL sequence if present.
298bdd1243dSDimitry Andric size_t Idx = Rest.find(LocalEOL);
299bdd1243dSDimitry Andric StringRef LineText = Rest.substr(0, Idx);
3000b57cec5SDimitry Andric OS << LineText;
301bdd1243dSDimitry Andric if (Idx != StringRef::npos) {
302bdd1243dSDimitry Andric // An EOL sequence was present, output the EOL sequence for the
303bdd1243dSDimitry Andric // main source file and skip past the local EOL sequence.
3040b57cec5SDimitry Andric OS << MainEOL;
305bdd1243dSDimitry Andric Idx += LocalEOL.size();
3060b57cec5SDimitry Andric }
307bdd1243dSDimitry Andric // Strip the line just handled. If Idx is npos or matches the end of the
308bdd1243dSDimitry Andric // text, Rest will be set to an empty string and the loop will terminate.
309bdd1243dSDimitry Andric Rest = Rest.substr(Idx);
310bdd1243dSDimitry Andric }
311bdd1243dSDimitry Andric }
3125f757f3fSDimitry Andric if (EnsureNewline && !TextToWrite.ends_with(LocalEOL))
3130b57cec5SDimitry Andric OS << MainEOL;
314bdd1243dSDimitry Andric
3150b57cec5SDimitry Andric WriteFrom = WriteTo;
3160b57cec5SDimitry Andric }
3170b57cec5SDimitry Andric
3185f757f3fSDimitry Andric StringRef
getIncludedFileName(const IncludedFile * Inc) const3195f757f3fSDimitry Andric InclusionRewriter::getIncludedFileName(const IncludedFile *Inc) const {
3205f757f3fSDimitry Andric if (Inc) {
3215f757f3fSDimitry Andric auto B = SM.getBufferOrNone(Inc->Id);
3225f757f3fSDimitry Andric assert(B && "Attempting to process invalid inclusion");
3235f757f3fSDimitry Andric if (B)
3245f757f3fSDimitry Andric return llvm::sys::path::filename(B->getBufferIdentifier());
3255f757f3fSDimitry Andric }
3265f757f3fSDimitry Andric return StringRef();
3275f757f3fSDimitry Andric }
3285f757f3fSDimitry Andric
3290b57cec5SDimitry Andric /// Print characters from \p FromFile starting at \p NextToWrite up until the
3300b57cec5SDimitry Andric /// inclusion directive at \p StartToken, then print out the inclusion
3310b57cec5SDimitry Andric /// inclusion directive disabled by a #if directive, updating \p NextToWrite
3320b57cec5SDimitry Andric /// and \p Line to track the number of source lines visited and the progress
3330b57cec5SDimitry Andric /// through the \p FromFile buffer.
CommentOutDirective(Lexer & DirectiveLex,const Token & StartToken,const MemoryBufferRef & FromFile,StringRef LocalEOL,unsigned & NextToWrite,int & Line,const IncludedFile * Inc)3340b57cec5SDimitry Andric void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
3350b57cec5SDimitry Andric const Token &StartToken,
336e8d8bef9SDimitry Andric const MemoryBufferRef &FromFile,
3370b57cec5SDimitry Andric StringRef LocalEOL,
3385f757f3fSDimitry Andric unsigned &NextToWrite, int &Line,
3395f757f3fSDimitry Andric const IncludedFile *Inc) {
3400b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite,
3410b57cec5SDimitry Andric SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,
3420b57cec5SDimitry Andric false);
3430b57cec5SDimitry Andric Token DirectiveToken;
3440b57cec5SDimitry Andric do {
3450b57cec5SDimitry Andric DirectiveLex.LexFromRawLexer(DirectiveToken);
3460b57cec5SDimitry Andric } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
347e8d8bef9SDimitry Andric if (FromFile == PredefinesBuffer) {
3480b57cec5SDimitry Andric // OutputContentUpTo() would not output anything anyway.
3490b57cec5SDimitry Andric return;
3500b57cec5SDimitry Andric }
3515f757f3fSDimitry Andric if (Inc) {
3525f757f3fSDimitry Andric OS << "#if defined(__CLANG_REWRITTEN_INCLUDES) ";
3535f757f3fSDimitry Andric if (isSystem(Inc->FileType))
3545f757f3fSDimitry Andric OS << "|| defined(__CLANG_REWRITTEN_SYSTEM_INCLUDES) ";
3555f757f3fSDimitry Andric OS << "/* " << getIncludedFileName(Inc);
3565f757f3fSDimitry Andric } else {
3575f757f3fSDimitry Andric OS << "#if 0 /*";
3585f757f3fSDimitry Andric }
3595f757f3fSDimitry Andric OS << " expanded by -frewrite-includes */" << MainEOL;
3600b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite,
3610b57cec5SDimitry Andric SM.getFileOffset(DirectiveToken.getLocation()) +
3620b57cec5SDimitry Andric DirectiveToken.getLength(),
3630b57cec5SDimitry Andric LocalEOL, Line, true);
3645f757f3fSDimitry Andric OS << (Inc ? "#else /* " : "#endif /*") << getIncludedFileName(Inc)
3655f757f3fSDimitry Andric << " expanded by -frewrite-includes */" << MainEOL;
3660b57cec5SDimitry Andric }
3670b57cec5SDimitry Andric
3680b57cec5SDimitry Andric /// Find the next identifier in the pragma directive specified by \p RawToken.
NextIdentifierName(Lexer & RawLex,Token & RawToken)3690b57cec5SDimitry Andric StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
3700b57cec5SDimitry Andric Token &RawToken) {
3710b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken);
3720b57cec5SDimitry Andric if (RawToken.is(tok::raw_identifier))
3730b57cec5SDimitry Andric PP.LookUpIdentifierInfo(RawToken);
3740b57cec5SDimitry Andric if (RawToken.is(tok::identifier))
3750b57cec5SDimitry Andric return RawToken.getIdentifierInfo()->getName();
3760b57cec5SDimitry Andric return StringRef();
3770b57cec5SDimitry Andric }
3780b57cec5SDimitry Andric
3790b57cec5SDimitry Andric /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
3800b57cec5SDimitry Andric /// and including content of included files recursively.
Process(FileID FileId,SrcMgr::CharacteristicKind FileType)3810b57cec5SDimitry Andric void InclusionRewriter::Process(FileID FileId,
38204eeddc0SDimitry Andric SrcMgr::CharacteristicKind FileType) {
383e8d8bef9SDimitry Andric MemoryBufferRef FromFile;
384e8d8bef9SDimitry Andric {
385e8d8bef9SDimitry Andric auto B = SM.getBufferOrNone(FileId);
386e8d8bef9SDimitry Andric assert(B && "Attempting to process invalid inclusion");
387e8d8bef9SDimitry Andric if (B)
388e8d8bef9SDimitry Andric FromFile = *B;
389e8d8bef9SDimitry Andric }
3900b57cec5SDimitry Andric StringRef FileName = FromFile.getBufferIdentifier();
391e8d8bef9SDimitry Andric Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts());
3920b57cec5SDimitry Andric RawLex.SetCommentRetentionState(false);
3930b57cec5SDimitry Andric
39404eeddc0SDimitry Andric StringRef LocalEOL = FromFile.getBuffer().detectEOL();
3950b57cec5SDimitry Andric
3960b57cec5SDimitry Andric // Per the GNU docs: "1" indicates entering a new file.
3970b57cec5SDimitry Andric if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
3980b57cec5SDimitry Andric WriteLineInfo(FileName, 1, FileType, "");
3990b57cec5SDimitry Andric else
4000b57cec5SDimitry Andric WriteLineInfo(FileName, 1, FileType, " 1");
4010b57cec5SDimitry Andric
4020b57cec5SDimitry Andric if (SM.getFileIDSize(FileId) == 0)
4030b57cec5SDimitry Andric return;
4040b57cec5SDimitry Andric
4050b57cec5SDimitry Andric // The next byte to be copied from the source file, which may be non-zero if
4060b57cec5SDimitry Andric // the lexer handled a BOM.
4070b57cec5SDimitry Andric unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
4080b57cec5SDimitry Andric assert(SM.getLineNumber(FileId, NextToWrite) == 1);
4090b57cec5SDimitry Andric int Line = 1; // The current input file line number.
4100b57cec5SDimitry Andric
4110b57cec5SDimitry Andric Token RawToken;
4120b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken);
4130b57cec5SDimitry Andric
4140b57cec5SDimitry Andric // TODO: Consider adding a switch that strips possibly unimportant content,
4150b57cec5SDimitry Andric // such as comments, to reduce the size of repro files.
4160b57cec5SDimitry Andric while (RawToken.isNot(tok::eof)) {
4170b57cec5SDimitry Andric if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
4180b57cec5SDimitry Andric RawLex.setParsingPreprocessorDirective(true);
4190b57cec5SDimitry Andric Token HashToken = RawToken;
4200b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken);
4210b57cec5SDimitry Andric if (RawToken.is(tok::raw_identifier))
4220b57cec5SDimitry Andric PP.LookUpIdentifierInfo(RawToken);
4230b57cec5SDimitry Andric if (RawToken.getIdentifierInfo() != nullptr) {
4240b57cec5SDimitry Andric switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
4250b57cec5SDimitry Andric case tok::pp_include:
4260b57cec5SDimitry Andric case tok::pp_include_next:
4270b57cec5SDimitry Andric case tok::pp_import: {
4285f757f3fSDimitry Andric SourceLocation Loc = HashToken.getLocation();
4295f757f3fSDimitry Andric const IncludedFile *Inc = FindIncludeAtLocation(Loc);
4305f757f3fSDimitry Andric CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
4315f757f3fSDimitry Andric NextToWrite, Line, Inc);
4320b57cec5SDimitry Andric if (FileId != PP.getPredefinesFileID())
4330b57cec5SDimitry Andric WriteLineInfo(FileName, Line - 1, FileType, "");
4340b57cec5SDimitry Andric StringRef LineInfoExtra;
4350b57cec5SDimitry Andric if (const Module *Mod = FindModuleAtLocation(Loc))
4360b57cec5SDimitry Andric WriteImplicitModuleImport(Mod);
4375f757f3fSDimitry Andric else if (Inc) {
4380b57cec5SDimitry Andric const Module *Mod = FindEnteredModule(Loc);
4390b57cec5SDimitry Andric if (Mod)
4400b57cec5SDimitry Andric OS << "#pragma clang module begin "
4410b57cec5SDimitry Andric << Mod->getFullModuleName(true) << "\n";
4420b57cec5SDimitry Andric
4430b57cec5SDimitry Andric // Include and recursively process the file.
44404eeddc0SDimitry Andric Process(Inc->Id, Inc->FileType);
4450b57cec5SDimitry Andric
4460b57cec5SDimitry Andric if (Mod)
4470b57cec5SDimitry Andric OS << "#pragma clang module end /*"
4480b57cec5SDimitry Andric << Mod->getFullModuleName(true) << "*/\n";
4495f757f3fSDimitry Andric // There's no #include, therefore no #if, for -include files.
4505f757f3fSDimitry Andric if (FromFile != PredefinesBuffer) {
4515f757f3fSDimitry Andric OS << "#endif /* " << getIncludedFileName(Inc)
4525f757f3fSDimitry Andric << " expanded by -frewrite-includes */" << LocalEOL;
4535f757f3fSDimitry Andric }
4540b57cec5SDimitry Andric
4550b57cec5SDimitry Andric // Add line marker to indicate we're returning from an included
4560b57cec5SDimitry Andric // file.
4570b57cec5SDimitry Andric LineInfoExtra = " 2";
4580b57cec5SDimitry Andric }
4590b57cec5SDimitry Andric // fix up lineinfo (since commented out directive changed line
4600b57cec5SDimitry Andric // numbers) for inclusions that were skipped due to header guards
4610b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType, LineInfoExtra);
4620b57cec5SDimitry Andric break;
4630b57cec5SDimitry Andric }
4640b57cec5SDimitry Andric case tok::pp_pragma: {
4650b57cec5SDimitry Andric StringRef Identifier = NextIdentifierName(RawLex, RawToken);
4660b57cec5SDimitry Andric if (Identifier == "clang" || Identifier == "GCC") {
4670b57cec5SDimitry Andric if (NextIdentifierName(RawLex, RawToken) == "system_header") {
4680b57cec5SDimitry Andric // keep the directive in, commented out
4690b57cec5SDimitry Andric CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
4700b57cec5SDimitry Andric NextToWrite, Line);
4710b57cec5SDimitry Andric // update our own type
4720b57cec5SDimitry Andric FileType = SM.getFileCharacteristic(RawToken.getLocation());
4730b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType);
4740b57cec5SDimitry Andric }
4750b57cec5SDimitry Andric } else if (Identifier == "once") {
4760b57cec5SDimitry Andric // keep the directive in, commented out
4770b57cec5SDimitry Andric CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,
4780b57cec5SDimitry Andric NextToWrite, Line);
4790b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType);
4800b57cec5SDimitry Andric }
4810b57cec5SDimitry Andric break;
4820b57cec5SDimitry Andric }
4830b57cec5SDimitry Andric case tok::pp_if:
4840b57cec5SDimitry Andric case tok::pp_elif: {
4850b57cec5SDimitry Andric bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
4860b57cec5SDimitry Andric tok::pp_elif);
487a7dea167SDimitry Andric bool isTrue = IsIfAtLocationTrue(RawToken.getLocation());
4880b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite,
489a7dea167SDimitry Andric SM.getFileOffset(HashToken.getLocation()),
490a7dea167SDimitry Andric LocalEOL, Line, /*EnsureNewline=*/true);
491a7dea167SDimitry Andric do {
492a7dea167SDimitry Andric RawLex.LexFromRawLexer(RawToken);
493a7dea167SDimitry Andric } while (!RawToken.is(tok::eod) && RawToken.isNot(tok::eof));
494a7dea167SDimitry Andric // We need to disable the old condition, but that is tricky.
495a7dea167SDimitry Andric // Trying to comment it out can easily lead to comment nesting.
496a7dea167SDimitry Andric // So instead make the condition harmless by making it enclose
497a7dea167SDimitry Andric // and empty block. Moreover, put it itself inside an #if 0 block
498a7dea167SDimitry Andric // to disable it from getting evaluated (e.g. __has_include_next
499a7dea167SDimitry Andric // warns if used from the primary source file).
500a7dea167SDimitry Andric OS << "#if 0 /* disabled by -frewrite-includes */" << MainEOL;
5010b57cec5SDimitry Andric if (elif) {
502a7dea167SDimitry Andric OS << "#if 0" << MainEOL;
503a7dea167SDimitry Andric }
5040b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite,
5050b57cec5SDimitry Andric SM.getFileOffset(RawToken.getLocation()) +
5060b57cec5SDimitry Andric RawToken.getLength(),
5070b57cec5SDimitry Andric LocalEOL, Line, /*EnsureNewline=*/true);
508a7dea167SDimitry Andric // Close the empty block and the disabling block.
509a7dea167SDimitry Andric OS << "#endif" << MainEOL;
510a7dea167SDimitry Andric OS << "#endif /* disabled by -frewrite-includes */" << MainEOL;
511a7dea167SDimitry Andric OS << (elif ? "#elif " : "#if ") << (isTrue ? "1" : "0")
512a7dea167SDimitry Andric << " /* evaluated by -frewrite-includes */" << MainEOL;
5130b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType);
5140b57cec5SDimitry Andric break;
5150b57cec5SDimitry Andric }
5160b57cec5SDimitry Andric case tok::pp_endif:
5170b57cec5SDimitry Andric case tok::pp_else: {
5180b57cec5SDimitry Andric // We surround every #include by #if 0 to comment it out, but that
5190b57cec5SDimitry Andric // changes line numbers. These are fixed up right after that, but
5200b57cec5SDimitry Andric // the whole #include could be inside a preprocessor conditional
5210b57cec5SDimitry Andric // that is not processed. So it is necessary to fix the line
5220b57cec5SDimitry Andric // numbers one the next line after each #else/#endif as well.
5230b57cec5SDimitry Andric RawLex.SetKeepWhitespaceMode(true);
5240b57cec5SDimitry Andric do {
5250b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken);
5260b57cec5SDimitry Andric } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
5270b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite,
5280b57cec5SDimitry Andric SM.getFileOffset(RawToken.getLocation()) +
5290b57cec5SDimitry Andric RawToken.getLength(),
5300b57cec5SDimitry Andric LocalEOL, Line, /*EnsureNewline=*/ true);
5310b57cec5SDimitry Andric WriteLineInfo(FileName, Line, FileType);
5320b57cec5SDimitry Andric RawLex.SetKeepWhitespaceMode(false);
5330b57cec5SDimitry Andric break;
5340b57cec5SDimitry Andric }
5350b57cec5SDimitry Andric default:
5360b57cec5SDimitry Andric break;
5370b57cec5SDimitry Andric }
5380b57cec5SDimitry Andric }
5390b57cec5SDimitry Andric RawLex.setParsingPreprocessorDirective(false);
5400b57cec5SDimitry Andric }
5410b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawToken);
5420b57cec5SDimitry Andric }
5430b57cec5SDimitry Andric OutputContentUpTo(FromFile, NextToWrite,
5440b57cec5SDimitry Andric SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,
5450b57cec5SDimitry Andric Line, /*EnsureNewline=*/true);
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric /// InclusionRewriterInInput - Implement -frewrite-includes mode.
RewriteIncludesInInput(Preprocessor & PP,raw_ostream * OS,const PreprocessorOutputOptions & Opts)5490b57cec5SDimitry Andric void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
5500b57cec5SDimitry Andric const PreprocessorOutputOptions &Opts) {
5510b57cec5SDimitry Andric SourceManager &SM = PP.getSourceManager();
5520b57cec5SDimitry Andric InclusionRewriter *Rewrite = new InclusionRewriter(
5530b57cec5SDimitry Andric PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives);
5540b57cec5SDimitry Andric Rewrite->detectMainFileEOL();
5550b57cec5SDimitry Andric
5560b57cec5SDimitry Andric PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite));
5570b57cec5SDimitry Andric PP.IgnorePragmas();
5580b57cec5SDimitry Andric
5590b57cec5SDimitry Andric // First let the preprocessor process the entire file and call callbacks.
5600b57cec5SDimitry Andric // Callbacks will record which #include's were actually performed.
5610b57cec5SDimitry Andric PP.EnterMainSourceFile();
5620b57cec5SDimitry Andric Token Tok;
5630b57cec5SDimitry Andric // Only preprocessor directives matter here, so disable macro expansion
5640b57cec5SDimitry Andric // everywhere else as an optimization.
5650b57cec5SDimitry Andric // TODO: It would be even faster if the preprocessor could be switched
5660b57cec5SDimitry Andric // to a mode where it would parse only preprocessor directives and comments,
5670b57cec5SDimitry Andric // nothing else matters for parsing or processing.
5680b57cec5SDimitry Andric PP.SetMacroExpansionOnlyInDirectives();
5690b57cec5SDimitry Andric do {
5700b57cec5SDimitry Andric PP.Lex(Tok);
5710b57cec5SDimitry Andric if (Tok.is(tok::annot_module_begin))
5720b57cec5SDimitry Andric Rewrite->handleModuleBegin(Tok);
5730b57cec5SDimitry Andric } while (Tok.isNot(tok::eof));
574e8d8bef9SDimitry Andric Rewrite->setPredefinesBuffer(SM.getBufferOrFake(PP.getPredefinesFileID()));
57504eeddc0SDimitry Andric Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
57604eeddc0SDimitry Andric Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
5770b57cec5SDimitry Andric OS->flush();
5780b57cec5SDimitry Andric }
579