1*0b57cec5SDimitry Andric //===--- RewriteMacros.cpp - Rewrite macros into their expansions ---------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This code rewrites macro invocations into their expansions. This gives you 10*0b57cec5SDimitry Andric // a macro expanded file that retains comments and #includes. 11*0b57cec5SDimitry Andric // 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #include "clang/Rewrite/Frontend/Rewriters.h" 15*0b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h" 16*0b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h" 17*0b57cec5SDimitry Andric #include "clang/Rewrite/Core/Rewriter.h" 18*0b57cec5SDimitry Andric #include "llvm/Support/Path.h" 19*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 20*0b57cec5SDimitry Andric #include <cstdio> 21*0b57cec5SDimitry Andric #include <memory> 22*0b57cec5SDimitry Andric 23*0b57cec5SDimitry Andric using namespace clang; 24*0b57cec5SDimitry Andric 25*0b57cec5SDimitry Andric /// isSameToken - Return true if the two specified tokens start have the same 26*0b57cec5SDimitry Andric /// content. 27*0b57cec5SDimitry Andric static bool isSameToken(Token &RawTok, Token &PPTok) { 28*0b57cec5SDimitry Andric // If two tokens have the same kind and the same identifier info, they are 29*0b57cec5SDimitry Andric // obviously the same. 30*0b57cec5SDimitry Andric if (PPTok.getKind() == RawTok.getKind() && 31*0b57cec5SDimitry Andric PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo()) 32*0b57cec5SDimitry Andric return true; 33*0b57cec5SDimitry Andric 34*0b57cec5SDimitry Andric // Otherwise, if they are different but have the same identifier info, they 35*0b57cec5SDimitry Andric // are also considered to be the same. This allows keywords and raw lexed 36*0b57cec5SDimitry Andric // identifiers with the same name to be treated the same. 37*0b57cec5SDimitry Andric if (PPTok.getIdentifierInfo() && 38*0b57cec5SDimitry Andric PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo()) 39*0b57cec5SDimitry Andric return true; 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric return false; 42*0b57cec5SDimitry Andric } 43*0b57cec5SDimitry Andric 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric /// GetNextRawTok - Return the next raw token in the stream, skipping over 46*0b57cec5SDimitry Andric /// comments if ReturnComment is false. 47*0b57cec5SDimitry Andric static const Token &GetNextRawTok(const std::vector<Token> &RawTokens, 48*0b57cec5SDimitry Andric unsigned &CurTok, bool ReturnComment) { 49*0b57cec5SDimitry Andric assert(CurTok < RawTokens.size() && "Overran eof!"); 50*0b57cec5SDimitry Andric 51*0b57cec5SDimitry Andric // If the client doesn't want comments and we have one, skip it. 52*0b57cec5SDimitry Andric if (!ReturnComment && RawTokens[CurTok].is(tok::comment)) 53*0b57cec5SDimitry Andric ++CurTok; 54*0b57cec5SDimitry Andric 55*0b57cec5SDimitry Andric return RawTokens[CurTok++]; 56*0b57cec5SDimitry Andric } 57*0b57cec5SDimitry Andric 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric /// LexRawTokensFromMainFile - Lets all the raw tokens from the main file into 60*0b57cec5SDimitry Andric /// the specified vector. 61*0b57cec5SDimitry Andric static void LexRawTokensFromMainFile(Preprocessor &PP, 62*0b57cec5SDimitry Andric std::vector<Token> &RawTokens) { 63*0b57cec5SDimitry Andric SourceManager &SM = PP.getSourceManager(); 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric // Create a lexer to lex all the tokens of the main file in raw mode. Even 66*0b57cec5SDimitry Andric // though it is in raw mode, it will not return comments. 67*0b57cec5SDimitry Andric const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID()); 68*0b57cec5SDimitry Andric Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts()); 69*0b57cec5SDimitry Andric 70*0b57cec5SDimitry Andric // Switch on comment lexing because we really do want them. 71*0b57cec5SDimitry Andric RawLex.SetCommentRetentionState(true); 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric Token RawTok; 74*0b57cec5SDimitry Andric do { 75*0b57cec5SDimitry Andric RawLex.LexFromRawLexer(RawTok); 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric // If we have an identifier with no identifier info for our raw token, look 78*0b57cec5SDimitry Andric // up the identifier info. This is important for equality comparison of 79*0b57cec5SDimitry Andric // identifier tokens. 80*0b57cec5SDimitry Andric if (RawTok.is(tok::raw_identifier)) 81*0b57cec5SDimitry Andric PP.LookUpIdentifierInfo(RawTok); 82*0b57cec5SDimitry Andric 83*0b57cec5SDimitry Andric RawTokens.push_back(RawTok); 84*0b57cec5SDimitry Andric } while (RawTok.isNot(tok::eof)); 85*0b57cec5SDimitry Andric } 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric 88*0b57cec5SDimitry Andric /// RewriteMacrosInInput - Implement -rewrite-macros mode. 89*0b57cec5SDimitry Andric void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) { 90*0b57cec5SDimitry Andric SourceManager &SM = PP.getSourceManager(); 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric Rewriter Rewrite; 93*0b57cec5SDimitry Andric Rewrite.setSourceMgr(SM, PP.getLangOpts()); 94*0b57cec5SDimitry Andric RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID()); 95*0b57cec5SDimitry Andric 96*0b57cec5SDimitry Andric std::vector<Token> RawTokens; 97*0b57cec5SDimitry Andric LexRawTokensFromMainFile(PP, RawTokens); 98*0b57cec5SDimitry Andric unsigned CurRawTok = 0; 99*0b57cec5SDimitry Andric Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false); 100*0b57cec5SDimitry Andric 101*0b57cec5SDimitry Andric 102*0b57cec5SDimitry Andric // Get the first preprocessing token. 103*0b57cec5SDimitry Andric PP.EnterMainSourceFile(); 104*0b57cec5SDimitry Andric Token PPTok; 105*0b57cec5SDimitry Andric PP.Lex(PPTok); 106*0b57cec5SDimitry Andric 107*0b57cec5SDimitry Andric // Preprocess the input file in parallel with raw lexing the main file. Ignore 108*0b57cec5SDimitry Andric // all tokens that are preprocessed from a file other than the main file (e.g. 109*0b57cec5SDimitry Andric // a header). If we see tokens that are in the preprocessed file but not the 110*0b57cec5SDimitry Andric // lexed file, we have a macro expansion. If we see tokens in the lexed file 111*0b57cec5SDimitry Andric // that aren't in the preprocessed view, we have macros that expand to no 112*0b57cec5SDimitry Andric // tokens, or macro arguments etc. 113*0b57cec5SDimitry Andric while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) { 114*0b57cec5SDimitry Andric SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation()); 115*0b57cec5SDimitry Andric 116*0b57cec5SDimitry Andric // If PPTok is from a different source file, ignore it. 117*0b57cec5SDimitry Andric if (!SM.isWrittenInMainFile(PPLoc)) { 118*0b57cec5SDimitry Andric PP.Lex(PPTok); 119*0b57cec5SDimitry Andric continue; 120*0b57cec5SDimitry Andric } 121*0b57cec5SDimitry Andric 122*0b57cec5SDimitry Andric // If the raw file hits a preprocessor directive, they will be extra tokens 123*0b57cec5SDimitry Andric // in the raw file that don't exist in the preprocsesed file. However, we 124*0b57cec5SDimitry Andric // choose to preserve them in the output file and otherwise handle them 125*0b57cec5SDimitry Andric // specially. 126*0b57cec5SDimitry Andric if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) { 127*0b57cec5SDimitry Andric // If this is a #warning directive or #pragma mark (GNU extensions), 128*0b57cec5SDimitry Andric // comment the line out. 129*0b57cec5SDimitry Andric if (RawTokens[CurRawTok].is(tok::identifier)) { 130*0b57cec5SDimitry Andric const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo(); 131*0b57cec5SDimitry Andric if (II->getName() == "warning") { 132*0b57cec5SDimitry Andric // Comment out #warning. 133*0b57cec5SDimitry Andric RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//"); 134*0b57cec5SDimitry Andric } else if (II->getName() == "pragma" && 135*0b57cec5SDimitry Andric RawTokens[CurRawTok+1].is(tok::identifier) && 136*0b57cec5SDimitry Andric (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() == 137*0b57cec5SDimitry Andric "mark")) { 138*0b57cec5SDimitry Andric // Comment out #pragma mark. 139*0b57cec5SDimitry Andric RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//"); 140*0b57cec5SDimitry Andric } 141*0b57cec5SDimitry Andric } 142*0b57cec5SDimitry Andric 143*0b57cec5SDimitry Andric // Otherwise, if this is a #include or some other directive, just leave it 144*0b57cec5SDimitry Andric // in the file by skipping over the line. 145*0b57cec5SDimitry Andric RawTok = GetNextRawTok(RawTokens, CurRawTok, false); 146*0b57cec5SDimitry Andric while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof)) 147*0b57cec5SDimitry Andric RawTok = GetNextRawTok(RawTokens, CurRawTok, false); 148*0b57cec5SDimitry Andric continue; 149*0b57cec5SDimitry Andric } 150*0b57cec5SDimitry Andric 151*0b57cec5SDimitry Andric // Okay, both tokens are from the same file. Get their offsets from the 152*0b57cec5SDimitry Andric // start of the file. 153*0b57cec5SDimitry Andric unsigned PPOffs = SM.getFileOffset(PPLoc); 154*0b57cec5SDimitry Andric unsigned RawOffs = SM.getFileOffset(RawTok.getLocation()); 155*0b57cec5SDimitry Andric 156*0b57cec5SDimitry Andric // If the offsets are the same and the token kind is the same, ignore them. 157*0b57cec5SDimitry Andric if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) { 158*0b57cec5SDimitry Andric RawTok = GetNextRawTok(RawTokens, CurRawTok, false); 159*0b57cec5SDimitry Andric PP.Lex(PPTok); 160*0b57cec5SDimitry Andric continue; 161*0b57cec5SDimitry Andric } 162*0b57cec5SDimitry Andric 163*0b57cec5SDimitry Andric // If the PP token is farther along than the raw token, something was 164*0b57cec5SDimitry Andric // deleted. Comment out the raw token. 165*0b57cec5SDimitry Andric if (RawOffs <= PPOffs) { 166*0b57cec5SDimitry Andric // Comment out a whole run of tokens instead of bracketing each one with 167*0b57cec5SDimitry Andric // comments. Add a leading space if RawTok didn't have one. 168*0b57cec5SDimitry Andric bool HasSpace = RawTok.hasLeadingSpace(); 169*0b57cec5SDimitry Andric RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]); 170*0b57cec5SDimitry Andric unsigned EndPos; 171*0b57cec5SDimitry Andric 172*0b57cec5SDimitry Andric do { 173*0b57cec5SDimitry Andric EndPos = RawOffs+RawTok.getLength(); 174*0b57cec5SDimitry Andric 175*0b57cec5SDimitry Andric RawTok = GetNextRawTok(RawTokens, CurRawTok, true); 176*0b57cec5SDimitry Andric RawOffs = SM.getFileOffset(RawTok.getLocation()); 177*0b57cec5SDimitry Andric 178*0b57cec5SDimitry Andric if (RawTok.is(tok::comment)) { 179*0b57cec5SDimitry Andric // Skip past the comment. 180*0b57cec5SDimitry Andric RawTok = GetNextRawTok(RawTokens, CurRawTok, false); 181*0b57cec5SDimitry Andric break; 182*0b57cec5SDimitry Andric } 183*0b57cec5SDimitry Andric 184*0b57cec5SDimitry Andric } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() && 185*0b57cec5SDimitry Andric (PPOffs != RawOffs || !isSameToken(RawTok, PPTok))); 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric RB.InsertTextBefore(EndPos, "*/"); 188*0b57cec5SDimitry Andric continue; 189*0b57cec5SDimitry Andric } 190*0b57cec5SDimitry Andric 191*0b57cec5SDimitry Andric // Otherwise, there was a replacement an expansion. Insert the new token 192*0b57cec5SDimitry Andric // in the output buffer. Insert the whole run of new tokens at once to get 193*0b57cec5SDimitry Andric // them in the right order. 194*0b57cec5SDimitry Andric unsigned InsertPos = PPOffs; 195*0b57cec5SDimitry Andric std::string Expansion; 196*0b57cec5SDimitry Andric while (PPOffs < RawOffs) { 197*0b57cec5SDimitry Andric Expansion += ' ' + PP.getSpelling(PPTok); 198*0b57cec5SDimitry Andric PP.Lex(PPTok); 199*0b57cec5SDimitry Andric PPLoc = SM.getExpansionLoc(PPTok.getLocation()); 200*0b57cec5SDimitry Andric PPOffs = SM.getFileOffset(PPLoc); 201*0b57cec5SDimitry Andric } 202*0b57cec5SDimitry Andric Expansion += ' '; 203*0b57cec5SDimitry Andric RB.InsertTextBefore(InsertPos, Expansion); 204*0b57cec5SDimitry Andric } 205*0b57cec5SDimitry Andric 206*0b57cec5SDimitry Andric // Get the buffer corresponding to MainFileID. If we haven't changed it, then 207*0b57cec5SDimitry Andric // we are done. 208*0b57cec5SDimitry Andric if (const RewriteBuffer *RewriteBuf = 209*0b57cec5SDimitry Andric Rewrite.getRewriteBufferFor(SM.getMainFileID())) { 210*0b57cec5SDimitry Andric //printf("Changed:\n"); 211*0b57cec5SDimitry Andric *OS << std::string(RewriteBuf->begin(), RewriteBuf->end()); 212*0b57cec5SDimitry Andric } else { 213*0b57cec5SDimitry Andric fprintf(stderr, "No changes\n"); 214*0b57cec5SDimitry Andric } 215*0b57cec5SDimitry Andric OS->flush(); 216*0b57cec5SDimitry Andric } 217