1 //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/MacroExpansionContext.h" 10 #include "llvm/Support/Debug.h" 11 #include <optional> 12 13 #define DEBUG_TYPE "macro-expansion-context" 14 15 static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS, 16 clang::Token Tok); 17 18 namespace clang { 19 namespace detail { 20 class MacroExpansionRangeRecorder : public PPCallbacks { 21 const Preprocessor &PP; 22 SourceManager &SM; 23 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; 24 25 public: 26 explicit MacroExpansionRangeRecorder( 27 const Preprocessor &PP, SourceManager &SM, 28 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) 29 : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} 30 31 void MacroExpands(const Token &MacroName, const MacroDefinition &MD, 32 SourceRange Range, const MacroArgs *Args) override { 33 // Ignore annotation tokens like: _Pragma("pack(push, 1)") 34 if (MacroName.getIdentifierInfo()->getName() == "_Pragma") 35 return; 36 37 SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation()); 38 assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); 39 40 const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { 41 // If the range is empty, use the length of the macro. 42 if (Range.getBegin() == Range.getEnd()) 43 return SM.getExpansionLoc( 44 MacroName.getLocation().getLocWithOffset(MacroName.getLength())); 45 46 // Include the last character. 47 return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1); 48 }(); 49 50 (void)PP; 51 LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '"; 52 dumpTokenInto(PP, llvm::dbgs(), MacroName); 53 llvm::dbgs() 54 << "' with length " << MacroName.getLength() << " at "; 55 MacroNameBegin.print(llvm::dbgs(), SM); 56 llvm::dbgs() << ", expansion end at "; 57 ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); 58 59 // If the expansion range is empty, use the identifier of the macro as a 60 // range. 61 MacroExpansionContext::ExpansionRangeMap::iterator It; 62 bool Inserted; 63 std::tie(It, Inserted) = 64 ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd); 65 if (Inserted) { 66 LLVM_DEBUG(llvm::dbgs() << "maps "; 67 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to "; 68 It->getSecond().print(llvm::dbgs(), SM); 69 llvm::dbgs() << '\n';); 70 } else { 71 if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) { 72 It->getSecond() = ExpansionEnd; 73 LLVM_DEBUG( 74 llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM); 75 llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM); 76 llvm::dbgs() << '\n';); 77 } 78 } 79 } 80 }; 81 } // namespace detail 82 } // namespace clang 83 84 using namespace clang; 85 86 MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) 87 : LangOpts(LangOpts) {} 88 89 void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { 90 PP = &NewPP; 91 SM = &NewPP.getSourceManager(); 92 93 // Make sure that the Preprocessor does not outlive the MacroExpansionContext. 94 PP->addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>( 95 *PP, *SM, ExpansionRanges)); 96 // Same applies here. 97 PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); 98 } 99 100 std::optional<StringRef> 101 MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { 102 if (MacroExpansionLoc.isMacroID()) 103 return std::nullopt; 104 105 // If there was no macro expansion at that location, return std::nullopt. 106 if (ExpansionRanges.find_as(MacroExpansionLoc) == ExpansionRanges.end()) 107 return std::nullopt; 108 109 // There was macro expansion, but resulted in no tokens, return empty string. 110 const auto It = ExpandedTokens.find_as(MacroExpansionLoc); 111 if (It == ExpandedTokens.end()) 112 return StringRef{""}; 113 114 // Otherwise we have the actual token sequence as string. 115 return It->getSecond().str(); 116 } 117 118 std::optional<StringRef> 119 MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { 120 if (MacroExpansionLoc.isMacroID()) 121 return std::nullopt; 122 123 const auto It = ExpansionRanges.find_as(MacroExpansionLoc); 124 if (It == ExpansionRanges.end()) 125 return std::nullopt; 126 127 assert(It->getFirst() != It->getSecond() && 128 "Every macro expansion must cover a non-empty range."); 129 130 return Lexer::getSourceText( 131 CharSourceRange::getCharRange(It->getFirst(), It->getSecond()), *SM, 132 LangOpts); 133 } 134 135 void MacroExpansionContext::dumpExpansionRanges() const { 136 dumpExpansionRangesToStream(llvm::dbgs()); 137 } 138 void MacroExpansionContext::dumpExpandedTexts() const { 139 dumpExpandedTextsToStream(llvm::dbgs()); 140 } 141 142 void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { 143 std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; 144 LocalExpansionRanges.reserve(ExpansionRanges.size()); 145 for (const auto &Record : ExpansionRanges) 146 LocalExpansionRanges.emplace_back( 147 std::make_pair(Record.getFirst(), Record.getSecond())); 148 llvm::sort(LocalExpansionRanges); 149 150 OS << "\n=============== ExpansionRanges ===============\n"; 151 for (const auto &Record : LocalExpansionRanges) { 152 OS << "> "; 153 Record.first.print(OS, *SM); 154 OS << ", "; 155 Record.second.print(OS, *SM); 156 OS << '\n'; 157 } 158 } 159 160 void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { 161 std::vector<std::pair<SourceLocation, MacroExpansionText>> 162 LocalExpandedTokens; 163 LocalExpandedTokens.reserve(ExpandedTokens.size()); 164 for (const auto &Record : ExpandedTokens) 165 LocalExpandedTokens.emplace_back( 166 std::make_pair(Record.getFirst(), Record.getSecond())); 167 llvm::sort(LocalExpandedTokens); 168 169 OS << "\n=============== ExpandedTokens ===============\n"; 170 for (const auto &Record : LocalExpandedTokens) { 171 OS << "> "; 172 Record.first.print(OS, *SM); 173 OS << " -> '" << Record.second << "'\n"; 174 } 175 } 176 177 static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { 178 assert(Tok.isNot(tok::raw_identifier)); 179 180 // Ignore annotation tokens like: _Pragma("pack(push, 1)") 181 if (Tok.isAnnotation()) 182 return; 183 184 if (IdentifierInfo *II = Tok.getIdentifierInfo()) { 185 // FIXME: For now, we don't respect whitespaces between macro expanded 186 // tokens. We just emit a space after every identifier to produce a valid 187 // code for `int a ;` like expansions. 188 // ^-^-- Space after the 'int' and 'a' identifiers. 189 OS << II->getName() << ' '; 190 } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { 191 OS << StringRef(Tok.getLiteralData(), Tok.getLength()); 192 } else { 193 char Tmp[256]; 194 if (Tok.getLength() < sizeof(Tmp)) { 195 const char *TokPtr = Tmp; 196 // FIXME: Might use a different overload for cleaner callsite. 197 unsigned Len = PP.getSpelling(Tok, TokPtr); 198 OS.write(TokPtr, Len); 199 } else { 200 OS << "<too long token>"; 201 } 202 } 203 } 204 205 void MacroExpansionContext::onTokenLexed(const Token &Tok) { 206 SourceLocation SLoc = Tok.getLocation(); 207 if (SLoc.isFileID()) 208 return; 209 210 LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '"; 211 dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at "; 212 SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); 213 214 // Remove spelling location. 215 SourceLocation CurrExpansionLoc = SM->getExpansionLoc(SLoc); 216 217 MacroExpansionText TokenAsString; 218 llvm::raw_svector_ostream OS(TokenAsString); 219 220 // FIXME: Prepend newlines and space to produce the exact same output as the 221 // preprocessor would for this token. 222 223 dumpTokenInto(*PP, OS, Tok); 224 225 ExpansionMap::iterator It; 226 bool Inserted; 227 std::tie(It, Inserted) = 228 ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString)); 229 if (!Inserted) 230 It->getSecond().append(TokenAsString); 231 } 232 233