xref: /freebsd/contrib/llvm-project/clang/lib/Frontend/Rewrite/RewriteMacros.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===--- RewriteMacros.cpp - Rewrite macros into their expansions ---------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // This code rewrites macro invocations into their expansions.  This gives you
10*0b57cec5SDimitry Andric // a macro expanded file that retains comments and #includes.
11*0b57cec5SDimitry Andric //
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric 
14*0b57cec5SDimitry Andric #include "clang/Rewrite/Frontend/Rewriters.h"
15*0b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
16*0b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h"
17*0b57cec5SDimitry Andric #include "clang/Rewrite/Core/Rewriter.h"
18*0b57cec5SDimitry Andric #include "llvm/Support/Path.h"
19*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
20*0b57cec5SDimitry Andric #include <cstdio>
21*0b57cec5SDimitry Andric #include <memory>
22*0b57cec5SDimitry Andric 
23*0b57cec5SDimitry Andric using namespace clang;
24*0b57cec5SDimitry Andric 
25*0b57cec5SDimitry Andric /// isSameToken - Return true if the two specified tokens start have the same
26*0b57cec5SDimitry Andric /// content.
27*0b57cec5SDimitry Andric static bool isSameToken(Token &RawTok, Token &PPTok) {
28*0b57cec5SDimitry Andric   // If two tokens have the same kind and the same identifier info, they are
29*0b57cec5SDimitry Andric   // obviously the same.
30*0b57cec5SDimitry Andric   if (PPTok.getKind() == RawTok.getKind() &&
31*0b57cec5SDimitry Andric       PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
32*0b57cec5SDimitry Andric     return true;
33*0b57cec5SDimitry Andric 
34*0b57cec5SDimitry Andric   // Otherwise, if they are different but have the same identifier info, they
35*0b57cec5SDimitry Andric   // are also considered to be the same.  This allows keywords and raw lexed
36*0b57cec5SDimitry Andric   // identifiers with the same name to be treated the same.
37*0b57cec5SDimitry Andric   if (PPTok.getIdentifierInfo() &&
38*0b57cec5SDimitry Andric       PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
39*0b57cec5SDimitry Andric     return true;
40*0b57cec5SDimitry Andric 
41*0b57cec5SDimitry Andric   return false;
42*0b57cec5SDimitry Andric }
43*0b57cec5SDimitry Andric 
44*0b57cec5SDimitry Andric 
45*0b57cec5SDimitry Andric /// GetNextRawTok - Return the next raw token in the stream, skipping over
46*0b57cec5SDimitry Andric /// comments if ReturnComment is false.
47*0b57cec5SDimitry Andric static const Token &GetNextRawTok(const std::vector<Token> &RawTokens,
48*0b57cec5SDimitry Andric                                   unsigned &CurTok, bool ReturnComment) {
49*0b57cec5SDimitry Andric   assert(CurTok < RawTokens.size() && "Overran eof!");
50*0b57cec5SDimitry Andric 
51*0b57cec5SDimitry Andric   // If the client doesn't want comments and we have one, skip it.
52*0b57cec5SDimitry Andric   if (!ReturnComment && RawTokens[CurTok].is(tok::comment))
53*0b57cec5SDimitry Andric     ++CurTok;
54*0b57cec5SDimitry Andric 
55*0b57cec5SDimitry Andric   return RawTokens[CurTok++];
56*0b57cec5SDimitry Andric }
57*0b57cec5SDimitry Andric 
58*0b57cec5SDimitry Andric 
59*0b57cec5SDimitry Andric /// LexRawTokensFromMainFile - Lets all the raw tokens from the main file into
60*0b57cec5SDimitry Andric /// the specified vector.
61*0b57cec5SDimitry Andric static void LexRawTokensFromMainFile(Preprocessor &PP,
62*0b57cec5SDimitry Andric                                      std::vector<Token> &RawTokens) {
63*0b57cec5SDimitry Andric   SourceManager &SM = PP.getSourceManager();
64*0b57cec5SDimitry Andric 
65*0b57cec5SDimitry Andric   // Create a lexer to lex all the tokens of the main file in raw mode.  Even
66*0b57cec5SDimitry Andric   // though it is in raw mode, it will not return comments.
67*0b57cec5SDimitry Andric   const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID());
68*0b57cec5SDimitry Andric   Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts());
69*0b57cec5SDimitry Andric 
70*0b57cec5SDimitry Andric   // Switch on comment lexing because we really do want them.
71*0b57cec5SDimitry Andric   RawLex.SetCommentRetentionState(true);
72*0b57cec5SDimitry Andric 
73*0b57cec5SDimitry Andric   Token RawTok;
74*0b57cec5SDimitry Andric   do {
75*0b57cec5SDimitry Andric     RawLex.LexFromRawLexer(RawTok);
76*0b57cec5SDimitry Andric 
77*0b57cec5SDimitry Andric     // If we have an identifier with no identifier info for our raw token, look
78*0b57cec5SDimitry Andric     // up the identifier info.  This is important for equality comparison of
79*0b57cec5SDimitry Andric     // identifier tokens.
80*0b57cec5SDimitry Andric     if (RawTok.is(tok::raw_identifier))
81*0b57cec5SDimitry Andric       PP.LookUpIdentifierInfo(RawTok);
82*0b57cec5SDimitry Andric 
83*0b57cec5SDimitry Andric     RawTokens.push_back(RawTok);
84*0b57cec5SDimitry Andric   } while (RawTok.isNot(tok::eof));
85*0b57cec5SDimitry Andric }
86*0b57cec5SDimitry Andric 
87*0b57cec5SDimitry Andric 
88*0b57cec5SDimitry Andric /// RewriteMacrosInInput - Implement -rewrite-macros mode.
89*0b57cec5SDimitry Andric void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) {
90*0b57cec5SDimitry Andric   SourceManager &SM = PP.getSourceManager();
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric   Rewriter Rewrite;
93*0b57cec5SDimitry Andric   Rewrite.setSourceMgr(SM, PP.getLangOpts());
94*0b57cec5SDimitry Andric   RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID());
95*0b57cec5SDimitry Andric 
96*0b57cec5SDimitry Andric   std::vector<Token> RawTokens;
97*0b57cec5SDimitry Andric   LexRawTokensFromMainFile(PP, RawTokens);
98*0b57cec5SDimitry Andric   unsigned CurRawTok = 0;
99*0b57cec5SDimitry Andric   Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
100*0b57cec5SDimitry Andric 
101*0b57cec5SDimitry Andric 
102*0b57cec5SDimitry Andric   // Get the first preprocessing token.
103*0b57cec5SDimitry Andric   PP.EnterMainSourceFile();
104*0b57cec5SDimitry Andric   Token PPTok;
105*0b57cec5SDimitry Andric   PP.Lex(PPTok);
106*0b57cec5SDimitry Andric 
107*0b57cec5SDimitry Andric   // Preprocess the input file in parallel with raw lexing the main file. Ignore
108*0b57cec5SDimitry Andric   // all tokens that are preprocessed from a file other than the main file (e.g.
109*0b57cec5SDimitry Andric   // a header).  If we see tokens that are in the preprocessed file but not the
110*0b57cec5SDimitry Andric   // lexed file, we have a macro expansion.  If we see tokens in the lexed file
111*0b57cec5SDimitry Andric   // that aren't in the preprocessed view, we have macros that expand to no
112*0b57cec5SDimitry Andric   // tokens, or macro arguments etc.
113*0b57cec5SDimitry Andric   while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) {
114*0b57cec5SDimitry Andric     SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation());
115*0b57cec5SDimitry Andric 
116*0b57cec5SDimitry Andric     // If PPTok is from a different source file, ignore it.
117*0b57cec5SDimitry Andric     if (!SM.isWrittenInMainFile(PPLoc)) {
118*0b57cec5SDimitry Andric       PP.Lex(PPTok);
119*0b57cec5SDimitry Andric       continue;
120*0b57cec5SDimitry Andric     }
121*0b57cec5SDimitry Andric 
122*0b57cec5SDimitry Andric     // If the raw file hits a preprocessor directive, they will be extra tokens
123*0b57cec5SDimitry Andric     // in the raw file that don't exist in the preprocsesed file.  However, we
124*0b57cec5SDimitry Andric     // choose to preserve them in the output file and otherwise handle them
125*0b57cec5SDimitry Andric     // specially.
126*0b57cec5SDimitry Andric     if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) {
127*0b57cec5SDimitry Andric       // If this is a #warning directive or #pragma mark (GNU extensions),
128*0b57cec5SDimitry Andric       // comment the line out.
129*0b57cec5SDimitry Andric       if (RawTokens[CurRawTok].is(tok::identifier)) {
130*0b57cec5SDimitry Andric         const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo();
131*0b57cec5SDimitry Andric         if (II->getName() == "warning") {
132*0b57cec5SDimitry Andric           // Comment out #warning.
133*0b57cec5SDimitry Andric           RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
134*0b57cec5SDimitry Andric         } else if (II->getName() == "pragma" &&
135*0b57cec5SDimitry Andric                    RawTokens[CurRawTok+1].is(tok::identifier) &&
136*0b57cec5SDimitry Andric                    (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() ==
137*0b57cec5SDimitry Andric                     "mark")) {
138*0b57cec5SDimitry Andric           // Comment out #pragma mark.
139*0b57cec5SDimitry Andric           RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
140*0b57cec5SDimitry Andric         }
141*0b57cec5SDimitry Andric       }
142*0b57cec5SDimitry Andric 
143*0b57cec5SDimitry Andric       // Otherwise, if this is a #include or some other directive, just leave it
144*0b57cec5SDimitry Andric       // in the file by skipping over the line.
145*0b57cec5SDimitry Andric       RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
146*0b57cec5SDimitry Andric       while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof))
147*0b57cec5SDimitry Andric         RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
148*0b57cec5SDimitry Andric       continue;
149*0b57cec5SDimitry Andric     }
150*0b57cec5SDimitry Andric 
151*0b57cec5SDimitry Andric     // Okay, both tokens are from the same file.  Get their offsets from the
152*0b57cec5SDimitry Andric     // start of the file.
153*0b57cec5SDimitry Andric     unsigned PPOffs = SM.getFileOffset(PPLoc);
154*0b57cec5SDimitry Andric     unsigned RawOffs = SM.getFileOffset(RawTok.getLocation());
155*0b57cec5SDimitry Andric 
156*0b57cec5SDimitry Andric     // If the offsets are the same and the token kind is the same, ignore them.
157*0b57cec5SDimitry Andric     if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) {
158*0b57cec5SDimitry Andric       RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
159*0b57cec5SDimitry Andric       PP.Lex(PPTok);
160*0b57cec5SDimitry Andric       continue;
161*0b57cec5SDimitry Andric     }
162*0b57cec5SDimitry Andric 
163*0b57cec5SDimitry Andric     // If the PP token is farther along than the raw token, something was
164*0b57cec5SDimitry Andric     // deleted.  Comment out the raw token.
165*0b57cec5SDimitry Andric     if (RawOffs <= PPOffs) {
166*0b57cec5SDimitry Andric       // Comment out a whole run of tokens instead of bracketing each one with
167*0b57cec5SDimitry Andric       // comments.  Add a leading space if RawTok didn't have one.
168*0b57cec5SDimitry Andric       bool HasSpace = RawTok.hasLeadingSpace();
169*0b57cec5SDimitry Andric       RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]);
170*0b57cec5SDimitry Andric       unsigned EndPos;
171*0b57cec5SDimitry Andric 
172*0b57cec5SDimitry Andric       do {
173*0b57cec5SDimitry Andric         EndPos = RawOffs+RawTok.getLength();
174*0b57cec5SDimitry Andric 
175*0b57cec5SDimitry Andric         RawTok = GetNextRawTok(RawTokens, CurRawTok, true);
176*0b57cec5SDimitry Andric         RawOffs = SM.getFileOffset(RawTok.getLocation());
177*0b57cec5SDimitry Andric 
178*0b57cec5SDimitry Andric         if (RawTok.is(tok::comment)) {
179*0b57cec5SDimitry Andric           // Skip past the comment.
180*0b57cec5SDimitry Andric           RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
181*0b57cec5SDimitry Andric           break;
182*0b57cec5SDimitry Andric         }
183*0b57cec5SDimitry Andric 
184*0b57cec5SDimitry Andric       } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() &&
185*0b57cec5SDimitry Andric                (PPOffs != RawOffs || !isSameToken(RawTok, PPTok)));
186*0b57cec5SDimitry Andric 
187*0b57cec5SDimitry Andric       RB.InsertTextBefore(EndPos, "*/");
188*0b57cec5SDimitry Andric       continue;
189*0b57cec5SDimitry Andric     }
190*0b57cec5SDimitry Andric 
191*0b57cec5SDimitry Andric     // Otherwise, there was a replacement an expansion.  Insert the new token
192*0b57cec5SDimitry Andric     // in the output buffer.  Insert the whole run of new tokens at once to get
193*0b57cec5SDimitry Andric     // them in the right order.
194*0b57cec5SDimitry Andric     unsigned InsertPos = PPOffs;
195*0b57cec5SDimitry Andric     std::string Expansion;
196*0b57cec5SDimitry Andric     while (PPOffs < RawOffs) {
197*0b57cec5SDimitry Andric       Expansion += ' ' + PP.getSpelling(PPTok);
198*0b57cec5SDimitry Andric       PP.Lex(PPTok);
199*0b57cec5SDimitry Andric       PPLoc = SM.getExpansionLoc(PPTok.getLocation());
200*0b57cec5SDimitry Andric       PPOffs = SM.getFileOffset(PPLoc);
201*0b57cec5SDimitry Andric     }
202*0b57cec5SDimitry Andric     Expansion += ' ';
203*0b57cec5SDimitry Andric     RB.InsertTextBefore(InsertPos, Expansion);
204*0b57cec5SDimitry Andric   }
205*0b57cec5SDimitry Andric 
206*0b57cec5SDimitry Andric   // Get the buffer corresponding to MainFileID.  If we haven't changed it, then
207*0b57cec5SDimitry Andric   // we are done.
208*0b57cec5SDimitry Andric   if (const RewriteBuffer *RewriteBuf =
209*0b57cec5SDimitry Andric       Rewrite.getRewriteBufferFor(SM.getMainFileID())) {
210*0b57cec5SDimitry Andric     //printf("Changed:\n");
211*0b57cec5SDimitry Andric     *OS << std::string(RewriteBuf->begin(), RewriteBuf->end());
212*0b57cec5SDimitry Andric   } else {
213*0b57cec5SDimitry Andric     fprintf(stderr, "No changes\n");
214*0b57cec5SDimitry Andric   }
215*0b57cec5SDimitry Andric   OS->flush();
216*0b57cec5SDimitry Andric }
217