1 //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "clang/Analysis/MacroExpansionContext.h"
10 #include "llvm/Support/Debug.h"
11 #include <optional>
12
13 #define DEBUG_TYPE "macro-expansion-context"
14
15 static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
16 clang::Token Tok);
17
18 namespace clang {
19 namespace detail {
20 class MacroExpansionRangeRecorder : public PPCallbacks {
21 const Preprocessor &PP;
22 SourceManager &SM;
23 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
24
25 public:
MacroExpansionRangeRecorder(const Preprocessor & PP,SourceManager & SM,MacroExpansionContext::ExpansionRangeMap & ExpansionRanges)26 explicit MacroExpansionRangeRecorder(
27 const Preprocessor &PP, SourceManager &SM,
28 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
29 : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
30
MacroExpands(const Token & MacroName,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)31 void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
32 SourceRange Range, const MacroArgs *Args) override {
33 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
34 if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
35 return;
36
37 SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
38 assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
39
40 const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
41 // If the range is empty, use the length of the macro.
42 if (Range.getBegin() == Range.getEnd())
43 return SM.getExpansionLoc(
44 MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
45
46 // Include the last character.
47 return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
48 }();
49
50 (void)PP;
51 LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
52 dumpTokenInto(PP, llvm::dbgs(), MacroName);
53 llvm::dbgs()
54 << "' with length " << MacroName.getLength() << " at ";
55 MacroNameBegin.print(llvm::dbgs(), SM);
56 llvm::dbgs() << ", expansion end at ";
57 ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
58
59 // If the expansion range is empty, use the identifier of the macro as a
60 // range.
61 MacroExpansionContext::ExpansionRangeMap::iterator It;
62 bool Inserted;
63 std::tie(It, Inserted) =
64 ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
65 if (Inserted) {
66 LLVM_DEBUG(llvm::dbgs() << "maps ";
67 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
68 It->getSecond().print(llvm::dbgs(), SM);
69 llvm::dbgs() << '\n';);
70 } else {
71 if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
72 It->getSecond() = ExpansionEnd;
73 LLVM_DEBUG(
74 llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
75 llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
76 llvm::dbgs() << '\n';);
77 }
78 }
79 }
80 };
81 } // namespace detail
82 } // namespace clang
83
84 using namespace clang;
85
MacroExpansionContext(const LangOptions & LangOpts)86 MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
87 : LangOpts(LangOpts) {}
88
registerForPreprocessor(Preprocessor & NewPP)89 void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
90 PP = &NewPP;
91 SM = &NewPP.getSourceManager();
92
93 // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
94 PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
95 *PP, *SM, ExpansionRanges));
96 // Same applies here.
97 PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
98 }
99
100 std::optional<StringRef>
getExpandedText(SourceLocation MacroExpansionLoc) const101 MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
102 if (MacroExpansionLoc.isMacroID())
103 return std::nullopt;
104
105 // If there was no macro expansion at that location, return std::nullopt.
106 if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end())
107 return std::nullopt;
108
109 // There was macro expansion, but resulted in no tokens, return empty string.
110 const auto It = ExpandedTokens.find_as(MacroExpansionLoc);
111 if (It == ExpandedTokens.end())
112 return StringRef{""};
113
114 // Otherwise we have the actual token sequence as string.
115 return It->getSecond().str();
116 }
117
118 std::optional<StringRef>
getOriginalText(SourceLocation MacroExpansionLoc) const119 MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
120 if (MacroExpansionLoc.isMacroID())
121 return std::nullopt;
122
123 const auto It = ExpansionRanges.find_as(MacroExpansionLoc);
124 if (It == ExpansionRanges.end())
125 return std::nullopt;
126
127 assert(It->getFirst() != It->getSecond() &&
128 "Every macro expansion must cover a non-empty range.");
129
130 return Lexer::getSourceText(
131 CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM,
132 LangOpts);
133 }
134
dumpExpansionRanges() const135 void MacroExpansionContext::dumpExpansionRanges() const {
136 dumpExpansionRangesToStream(llvm::dbgs());
137 }
dumpExpandedTexts() const138 void MacroExpansionContext::dumpExpandedTexts() const {
139 dumpExpandedTextsToStream(llvm::dbgs());
140 }
141
dumpExpansionRangesToStream(raw_ostream & OS) const142 void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
143 std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
144 LocalExpansionRanges.reserve(ExpansionRanges.size());
145 for (const auto &Record : ExpansionRanges)
146 LocalExpansionRanges.emplace_back(
147 std::make_pair(Record.getFirst(), Record.getSecond()));
148 llvm::sort(LocalExpansionRanges);
149
150 OS << "\n=============== ExpansionRanges ===============\n";
151 for (const auto &Record : LocalExpansionRanges) {
152 OS << "> ";
153 Record.first.print(OS, *SM);
154 OS << ", ";
155 Record.second.print(OS, *SM);
156 OS << '\n';
157 }
158 }
159
dumpExpandedTextsToStream(raw_ostream & OS) const160 void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
161 std::vector<std::pair<SourceLocation, MacroExpansionText>>
162 LocalExpandedTokens;
163 LocalExpandedTokens.reserve(ExpandedTokens.size());
164 for (const auto &Record : ExpandedTokens)
165 LocalExpandedTokens.emplace_back(
166 std::make_pair(Record.getFirst(), Record.getSecond()));
167 llvm::sort(LocalExpandedTokens);
168
169 OS << "\n=============== ExpandedTokens ===============\n";
170 for (const auto &Record : LocalExpandedTokens) {
171 OS << "> ";
172 Record.first.print(OS, *SM);
173 OS << " -> '" << Record.second << "'\n";
174 }
175 }
176
dumpTokenInto(const Preprocessor & PP,raw_ostream & OS,Token Tok)177 static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
178 assert(Tok.isNot(tok::raw_identifier));
179
180 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
181 if (Tok.isAnnotation())
182 return;
183
184 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
185 // FIXME: For now, we don't respect whitespaces between macro expanded
186 // tokens. We just emit a space after every identifier to produce a valid
187 // code for `int a ;` like expansions.
188 // ^-^-- Space after the 'int' and 'a' identifiers.
189 OS << II->getName() << ' ';
190 } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
191 OS << StringRef(Tok.getLiteralData(), Tok.getLength());
192 } else {
193 char Tmp[256];
194 if (Tok.getLength() < sizeof(Tmp)) {
195 const char *TokPtr = Tmp;
196 // FIXME: Might use a different overload for cleaner callsite.
197 unsigned Len = PP.getSpelling(Tok, TokPtr);
198 OS.write(TokPtr, Len);
199 } else {
200 OS << "<too long token>";
201 }
202 }
203 }
204
onTokenLexed(const Token & Tok)205 void MacroExpansionContext::onTokenLexed(const Token &Tok) {
206 SourceLocation SLoc = Tok.getLocation();
207 if (SLoc.isFileID())
208 return;
209
210 LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
211 dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
212 SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
213
214 // Remove spelling location.
215 SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc);
216
217 MacroExpansionText TokenAsString;
218 llvm::raw_svector_ostream OS(TokenAsString);
219
220 // FIXME: Prepend newlines and space to produce the exact same output as the
221 // preprocessor would for this token.
222
223 dumpTokenInto(*PP, OS, Tok);
224
225 ExpansionMap::iterator It;
226 bool Inserted;
227 std::tie(It, Inserted) =
228 ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
229 if (!Inserted)
230 It->getSecond().append(TokenAsString);
231 }
232
233