1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of MacroExpander, which handles macro 11 /// configuration and expansion while formatting. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "Macros.h" 16 17 #include "Encoding.h" 18 #include "FormatToken.h" 19 #include "FormatTokenLexer.h" 20 #include "clang/Basic/TokenKinds.h" 21 #include "clang/Format/Format.h" 22 #include "clang/Lex/HeaderSearch.h" 23 #include "clang/Lex/Lexer.h" 24 #include "clang/Lex/PreprocessorOptions.h" 25 #include "llvm/ADT/StringSet.h" 26 #include "llvm/Support/ErrorHandling.h" 27 28 namespace clang { 29 namespace format { 30 31 struct MacroExpander::Definition { 32 StringRef Name; 33 SmallVector<FormatToken *, 8> Params; 34 SmallVector<FormatToken *, 8> Body; 35 36 // Map from each argument's name to its position in the argument list. 37 // With "M(x, y) x + y": 38 // x -> 0 39 // y -> 1 40 llvm::StringMap<size_t> ArgMap; 41 42 bool ObjectLike = true; 43 }; 44 45 class MacroExpander::DefinitionParser { 46 public: 47 DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { 48 assert(!Tokens.empty()); 49 Current = Tokens[0]; 50 } 51 52 // Parse the token stream and return the corresponding Definition object. 53 // Returns an empty definition object with a null-Name on error. 54 MacroExpander::Definition parse() { 55 if (Current->isNot(tok::identifier)) 56 return {}; 57 Def.Name = Current->TokenText; 58 nextToken(); 59 if (Current->is(tok::l_paren)) { 60 Def.ObjectLike = false; 61 if (!parseParams()) 62 return {}; 63 } 64 if (!parseExpansion()) 65 return {}; 66 67 return Def; 68 } 69 70 private: 71 bool parseParams() { 72 assert(Current->is(tok::l_paren)); 73 nextToken(); 74 while (Current->is(tok::identifier)) { 75 Def.Params.push_back(Current); 76 Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; 77 nextToken(); 78 if (Current->isNot(tok::comma)) 79 break; 80 nextToken(); 81 } 82 if (Current->isNot(tok::r_paren)) 83 return false; 84 nextToken(); 85 return true; 86 } 87 88 bool parseExpansion() { 89 if (!Current->isOneOf(tok::equal, tok::eof)) 90 return false; 91 if (Current->is(tok::equal)) 92 nextToken(); 93 parseTail(); 94 return true; 95 } 96 97 void parseTail() { 98 while (Current->isNot(tok::eof)) { 99 Def.Body.push_back(Current); 100 nextToken(); 101 } 102 Def.Body.push_back(Current); 103 } 104 105 void nextToken() { 106 if (Pos + 1 < Tokens.size()) 107 ++Pos; 108 Current = Tokens[Pos]; 109 Current->Finalized = true; 110 } 111 112 size_t Pos = 0; 113 FormatToken *Current = nullptr; 114 Definition Def; 115 ArrayRef<FormatToken *> Tokens; 116 }; 117 118 MacroExpander::MacroExpander( 119 const std::vector<std::string> &Macros, SourceManager &SourceMgr, 120 const FormatStyle &Style, 121 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 122 IdentifierTable &IdentTable) 123 : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), 124 IdentTable(IdentTable) { 125 for (const std::string &Macro : Macros) 126 parseDefinition(Macro); 127 } 128 129 MacroExpander::~MacroExpander() = default; 130 131 void MacroExpander::parseDefinition(const std::string &Macro) { 132 Buffers.push_back( 133 llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>")); 134 FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef()); 135 FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, 136 Allocator, IdentTable); 137 const auto Tokens = Lex.lex(); 138 if (!Tokens.empty()) { 139 DefinitionParser Parser(Tokens); 140 auto Definition = Parser.parse(); 141 if (Definition.ObjectLike) { 142 ObjectLike[Definition.Name] = std::move(Definition); 143 } else { 144 FunctionLike[Definition.Name][Definition.Params.size()] = 145 std::move(Definition); 146 } 147 } 148 } 149 150 bool MacroExpander::defined(StringRef Name) const { 151 return FunctionLike.contains(Name) || ObjectLike.contains(Name); 152 } 153 154 bool MacroExpander::objectLike(StringRef Name) const { 155 return ObjectLike.contains(Name); 156 } 157 158 bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const { 159 auto it = FunctionLike.find(Name); 160 return it != FunctionLike.end() && it->second.contains(Arity); 161 } 162 163 SmallVector<FormatToken *, 8> 164 MacroExpander::expand(FormatToken *ID, 165 std::optional<ArgsList> OptionalArgs) const { 166 if (OptionalArgs) 167 assert(hasArity(ID->TokenText, OptionalArgs->size())); 168 else 169 assert(objectLike(ID->TokenText)); 170 const Definition &Def = OptionalArgs 171 ? FunctionLike.find(ID->TokenText) 172 ->second.find(OptionalArgs.value().size()) 173 ->second 174 : ObjectLike.find(ID->TokenText)->second; 175 ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); 176 SmallVector<FormatToken *, 8> Result; 177 // Expand each argument at most once. 178 llvm::StringSet<> ExpandedArgs; 179 180 // Adds the given token to Result. 181 auto pushToken = [&](FormatToken *Tok) { 182 Tok->MacroCtx->ExpandedFrom.push_back(ID); 183 Result.push_back(Tok); 184 }; 185 186 // If Tok references a parameter, adds the corresponding argument to Result. 187 // Returns false if Tok does not reference a parameter. 188 auto expandArgument = [&](FormatToken *Tok) -> bool { 189 // If the current token references a parameter, expand the corresponding 190 // argument. 191 if (Tok->isNot(tok::identifier)) 192 return false; 193 if (!ExpandedArgs.insert(Tok->TokenText).second) 194 return false; 195 auto I = Def.ArgMap.find(Tok->TokenText); 196 if (I == Def.ArgMap.end()) 197 return false; 198 // If there are fewer arguments than referenced parameters, treat the 199 // parameter as empty. 200 // FIXME: Potentially fully abort the expansion instead. 201 if (I->getValue() >= Args.size()) 202 return true; 203 for (FormatToken *Arg : Args[I->getValue()]) { 204 // A token can be part of a macro argument at multiple levels. 205 // For example, with "ID(x) x": 206 // in ID(ID(x)), 'x' is expanded first as argument to the inner 207 // ID, then again as argument to the outer ID. We keep the macro 208 // role the token had from the inner expansion. 209 if (!Arg->MacroCtx) 210 Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); 211 pushToken(Arg); 212 } 213 return true; 214 }; 215 216 // Expand the definition into Result. 217 for (FormatToken *Tok : Def.Body) { 218 if (expandArgument(Tok)) 219 continue; 220 // Create a copy of the tokens from the macro body, i.e. were not provided 221 // by user code. 222 FormatToken *New = new (Allocator.Allocate()) FormatToken; 223 New->copyFrom(*Tok); 224 assert(!New->MacroCtx); 225 // Tokens that are not part of the user code are not formatted. 226 New->MacroCtx = MacroExpansion(MR_Hidden); 227 pushToken(New); 228 } 229 assert(!Result.empty() && Result.back()->is(tok::eof)); 230 if (Result.size() > 1) { 231 ++Result[0]->MacroCtx->StartOfExpansion; 232 ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; 233 } else { 234 // If the macro expansion is empty, mark the start and end. 235 Result[0]->MacroCtx->StartOfExpansion = 1; 236 Result[0]->MacroCtx->EndOfExpansion = 1; 237 } 238 return Result; 239 } 240 241 } // namespace format 242 } // namespace clang 243