xref: /freebsd/contrib/llvm-project/clang/lib/Format/MacroExpander.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of MacroExpander, which handles macro
11 /// configuration and expansion while formatting.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "Macros.h"
16 
17 #include "Encoding.h"
18 #include "FormatToken.h"
19 #include "FormatTokenLexer.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "clang/Format/Format.h"
22 #include "clang/Lex/HeaderSearch.h"
23 #include "clang/Lex/Lexer.h"
24 #include "clang/Lex/PreprocessorOptions.h"
25 #include "llvm/ADT/StringSet.h"
26 #include "llvm/Support/ErrorHandling.h"
27 
28 namespace clang {
29 namespace format {
30 
31 struct MacroExpander::Definition {
32   StringRef Name;
33   SmallVector<FormatToken *, 8> Params;
34   SmallVector<FormatToken *, 8> Body;
35 
36   // Map from each argument's name to its position in the argument list.
37   // With "M(x, y) x + y":
38   //   x -> 0
39   //   y -> 1
40   llvm::StringMap<size_t> ArgMap;
41 
42   bool ObjectLike = true;
43 };
44 
45 class MacroExpander::DefinitionParser {
46 public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)47   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
48     assert(!Tokens.empty());
49     Current = Tokens[0];
50   }
51 
52   // Parse the token stream and return the corresponding Definition object.
53   // Returns an empty definition object with a null-Name on error.
parse()54   MacroExpander::Definition parse() {
55     if (Current->isNot(tok::identifier))
56       return {};
57     Def.Name = Current->TokenText;
58     nextToken();
59     if (Current->is(tok::l_paren)) {
60       Def.ObjectLike = false;
61       if (!parseParams())
62         return {};
63     }
64     if (!parseExpansion())
65       return {};
66 
67     return Def;
68   }
69 
70 private:
parseParams()71   bool parseParams() {
72     assert(Current->is(tok::l_paren));
73     nextToken();
74     while (Current->is(tok::identifier)) {
75       Def.Params.push_back(Current);
76       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
77       nextToken();
78       if (Current->isNot(tok::comma))
79         break;
80       nextToken();
81     }
82     if (Current->isNot(tok::r_paren))
83       return false;
84     nextToken();
85     return true;
86   }
87 
parseExpansion()88   bool parseExpansion() {
89     if (!Current->isOneOf(tok::equal, tok::eof))
90       return false;
91     if (Current->is(tok::equal))
92       nextToken();
93     parseTail();
94     return true;
95   }
96 
parseTail()97   void parseTail() {
98     while (Current->isNot(tok::eof)) {
99       Def.Body.push_back(Current);
100       nextToken();
101     }
102     Def.Body.push_back(Current);
103   }
104 
nextToken()105   void nextToken() {
106     if (Pos + 1 < Tokens.size())
107       ++Pos;
108     Current = Tokens[Pos];
109     Current->Finalized = true;
110   }
111 
112   size_t Pos = 0;
113   FormatToken *Current = nullptr;
114   Definition Def;
115   ArrayRef<FormatToken *> Tokens;
116 };
117 
MacroExpander(const std::vector<std::string> & Macros,SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)118 MacroExpander::MacroExpander(
119     const std::vector<std::string> &Macros, SourceManager &SourceMgr,
120     const FormatStyle &Style,
121     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
122     IdentifierTable &IdentTable)
123     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
124       IdentTable(IdentTable) {
125   for (const std::string &Macro : Macros)
126     parseDefinition(Macro);
127 }
128 
129 MacroExpander::~MacroExpander() = default;
130 
parseDefinition(const std::string & Macro)131 void MacroExpander::parseDefinition(const std::string &Macro) {
132   Buffers.push_back(
133       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
134   FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
135   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
136                        Allocator, IdentTable);
137   const auto Tokens = Lex.lex();
138   if (!Tokens.empty()) {
139     DefinitionParser Parser(Tokens);
140     auto Definition = Parser.parse();
141     if (Definition.ObjectLike) {
142       ObjectLike[Definition.Name] = std::move(Definition);
143     } else {
144       FunctionLike[Definition.Name][Definition.Params.size()] =
145           std::move(Definition);
146     }
147   }
148 }
149 
defined(StringRef Name) const150 bool MacroExpander::defined(StringRef Name) const {
151   return FunctionLike.contains(Name) || ObjectLike.contains(Name);
152 }
153 
objectLike(StringRef Name) const154 bool MacroExpander::objectLike(StringRef Name) const {
155   return ObjectLike.contains(Name);
156 }
157 
hasArity(StringRef Name,unsigned Arity) const158 bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const {
159   auto it = FunctionLike.find(Name);
160   return it != FunctionLike.end() && it->second.contains(Arity);
161 }
162 
163 SmallVector<FormatToken *, 8>
expand(FormatToken * ID,std::optional<ArgsList> OptionalArgs) const164 MacroExpander::expand(FormatToken *ID,
165                       std::optional<ArgsList> OptionalArgs) const {
166   if (OptionalArgs)
167     assert(hasArity(ID->TokenText, OptionalArgs->size()));
168   else
169     assert(objectLike(ID->TokenText));
170   const Definition &Def = OptionalArgs
171                               ? FunctionLike.find(ID->TokenText)
172                                     ->second.find(OptionalArgs.value().size())
173                                     ->second
174                               : ObjectLike.find(ID->TokenText)->second;
175   ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
176   SmallVector<FormatToken *, 8> Result;
177   // Expand each argument at most once.
178   llvm::StringSet<> ExpandedArgs;
179 
180   // Adds the given token to Result.
181   auto pushToken = [&](FormatToken *Tok) {
182     Tok->MacroCtx->ExpandedFrom.push_back(ID);
183     Result.push_back(Tok);
184   };
185 
186   // If Tok references a parameter, adds the corresponding argument to Result.
187   // Returns false if Tok does not reference a parameter.
188   auto expandArgument = [&](FormatToken *Tok) -> bool {
189     // If the current token references a parameter, expand the corresponding
190     // argument.
191     if (Tok->isNot(tok::identifier))
192       return false;
193     if (!ExpandedArgs.insert(Tok->TokenText).second)
194       return false;
195     auto I = Def.ArgMap.find(Tok->TokenText);
196     if (I == Def.ArgMap.end())
197       return false;
198     // If there are fewer arguments than referenced parameters, treat the
199     // parameter as empty.
200     // FIXME: Potentially fully abort the expansion instead.
201     if (I->getValue() >= Args.size())
202       return true;
203     for (FormatToken *Arg : Args[I->getValue()]) {
204       // A token can be part of a macro argument at multiple levels.
205       // For example, with "ID(x) x":
206       // in ID(ID(x)), 'x' is expanded first as argument to the inner
207       // ID, then again as argument to the outer ID. We keep the macro
208       // role the token had from the inner expansion.
209       if (!Arg->MacroCtx)
210         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
211       pushToken(Arg);
212     }
213     return true;
214   };
215 
216   // Expand the definition into Result.
217   for (FormatToken *Tok : Def.Body) {
218     if (expandArgument(Tok))
219       continue;
220     // Create a copy of the tokens from the macro body, i.e. were not provided
221     // by user code.
222     FormatToken *New = new (Allocator.Allocate()) FormatToken;
223     New->copyFrom(*Tok);
224     assert(!New->MacroCtx);
225     // Tokens that are not part of the user code are not formatted.
226     New->MacroCtx = MacroExpansion(MR_Hidden);
227     pushToken(New);
228   }
229   assert(!Result.empty() && Result.back()->is(tok::eof));
230   if (Result.size() > 1) {
231     ++Result[0]->MacroCtx->StartOfExpansion;
232     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
233   } else {
234     // If the macro expansion is empty, mark the start and end.
235     Result[0]->MacroCtx->StartOfExpansion = 1;
236     Result[0]->MacroCtx->EndOfExpansion = 1;
237   }
238   return Result;
239 }
240 
241 } // namespace format
242 } // namespace clang
243