1e8d8bef9SDimitry Andric //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2e8d8bef9SDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric ///
9e8d8bef9SDimitry Andric /// \file
10e8d8bef9SDimitry Andric /// This file contains the implementation of MacroExpander, which handles macro
11e8d8bef9SDimitry Andric /// configuration and expansion while formatting.
12e8d8bef9SDimitry Andric ///
13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
14e8d8bef9SDimitry Andric
15e8d8bef9SDimitry Andric #include "Macros.h"
16e8d8bef9SDimitry Andric
17e8d8bef9SDimitry Andric #include "Encoding.h"
18e8d8bef9SDimitry Andric #include "FormatToken.h"
19e8d8bef9SDimitry Andric #include "FormatTokenLexer.h"
20e8d8bef9SDimitry Andric #include "clang/Basic/TokenKinds.h"
21e8d8bef9SDimitry Andric #include "clang/Format/Format.h"
22e8d8bef9SDimitry Andric #include "clang/Lex/HeaderSearch.h"
23e8d8bef9SDimitry Andric #include "clang/Lex/HeaderSearchOptions.h"
24e8d8bef9SDimitry Andric #include "clang/Lex/Lexer.h"
25e8d8bef9SDimitry Andric #include "clang/Lex/ModuleLoader.h"
26e8d8bef9SDimitry Andric #include "clang/Lex/Preprocessor.h"
27e8d8bef9SDimitry Andric #include "clang/Lex/PreprocessorOptions.h"
28e8d8bef9SDimitry Andric #include "llvm/ADT/StringSet.h"
29e8d8bef9SDimitry Andric #include "llvm/Support/ErrorHandling.h"
30e8d8bef9SDimitry Andric
31e8d8bef9SDimitry Andric namespace clang {
32e8d8bef9SDimitry Andric namespace format {
33e8d8bef9SDimitry Andric
34e8d8bef9SDimitry Andric struct MacroExpander::Definition {
35e8d8bef9SDimitry Andric StringRef Name;
36e8d8bef9SDimitry Andric SmallVector<FormatToken *, 8> Params;
37e8d8bef9SDimitry Andric SmallVector<FormatToken *, 8> Body;
38e8d8bef9SDimitry Andric
39e8d8bef9SDimitry Andric // Map from each argument's name to its position in the argument list.
40e8d8bef9SDimitry Andric // With "M(x, y) x + y":
41e8d8bef9SDimitry Andric // x -> 0
42e8d8bef9SDimitry Andric // y -> 1
43e8d8bef9SDimitry Andric llvm::StringMap<size_t> ArgMap;
44e8d8bef9SDimitry Andric
45e8d8bef9SDimitry Andric bool ObjectLike = true;
46e8d8bef9SDimitry Andric };
47e8d8bef9SDimitry Andric
48e8d8bef9SDimitry Andric class MacroExpander::DefinitionParser {
49e8d8bef9SDimitry Andric public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)50e8d8bef9SDimitry Andric DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51e8d8bef9SDimitry Andric assert(!Tokens.empty());
52e8d8bef9SDimitry Andric Current = Tokens[0];
53e8d8bef9SDimitry Andric }
54e8d8bef9SDimitry Andric
55349cc55cSDimitry Andric // Parse the token stream and return the corresponding Definition object.
56e8d8bef9SDimitry Andric // Returns an empty definition object with a null-Name on error.
parse()57e8d8bef9SDimitry Andric MacroExpander::Definition parse() {
585f757f3fSDimitry Andric if (Current->isNot(tok::identifier))
59e8d8bef9SDimitry Andric return {};
60e8d8bef9SDimitry Andric Def.Name = Current->TokenText;
61e8d8bef9SDimitry Andric nextToken();
62e8d8bef9SDimitry Andric if (Current->is(tok::l_paren)) {
63e8d8bef9SDimitry Andric Def.ObjectLike = false;
64e8d8bef9SDimitry Andric if (!parseParams())
65e8d8bef9SDimitry Andric return {};
66e8d8bef9SDimitry Andric }
67e8d8bef9SDimitry Andric if (!parseExpansion())
68e8d8bef9SDimitry Andric return {};
69e8d8bef9SDimitry Andric
70e8d8bef9SDimitry Andric return Def;
71e8d8bef9SDimitry Andric }
72e8d8bef9SDimitry Andric
73e8d8bef9SDimitry Andric private:
parseParams()74e8d8bef9SDimitry Andric bool parseParams() {
75e8d8bef9SDimitry Andric assert(Current->is(tok::l_paren));
76e8d8bef9SDimitry Andric nextToken();
77e8d8bef9SDimitry Andric while (Current->is(tok::identifier)) {
78e8d8bef9SDimitry Andric Def.Params.push_back(Current);
79e8d8bef9SDimitry Andric Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80e8d8bef9SDimitry Andric nextToken();
81e8d8bef9SDimitry Andric if (Current->isNot(tok::comma))
82e8d8bef9SDimitry Andric break;
83e8d8bef9SDimitry Andric nextToken();
84e8d8bef9SDimitry Andric }
85e8d8bef9SDimitry Andric if (Current->isNot(tok::r_paren))
86e8d8bef9SDimitry Andric return false;
87e8d8bef9SDimitry Andric nextToken();
88e8d8bef9SDimitry Andric return true;
89e8d8bef9SDimitry Andric }
90e8d8bef9SDimitry Andric
parseExpansion()91e8d8bef9SDimitry Andric bool parseExpansion() {
92e8d8bef9SDimitry Andric if (!Current->isOneOf(tok::equal, tok::eof))
93e8d8bef9SDimitry Andric return false;
94e8d8bef9SDimitry Andric if (Current->is(tok::equal))
95e8d8bef9SDimitry Andric nextToken();
96e8d8bef9SDimitry Andric parseTail();
97e8d8bef9SDimitry Andric return true;
98e8d8bef9SDimitry Andric }
99e8d8bef9SDimitry Andric
parseTail()100e8d8bef9SDimitry Andric void parseTail() {
101e8d8bef9SDimitry Andric while (Current->isNot(tok::eof)) {
102e8d8bef9SDimitry Andric Def.Body.push_back(Current);
103e8d8bef9SDimitry Andric nextToken();
104e8d8bef9SDimitry Andric }
105e8d8bef9SDimitry Andric Def.Body.push_back(Current);
106e8d8bef9SDimitry Andric }
107e8d8bef9SDimitry Andric
nextToken()108e8d8bef9SDimitry Andric void nextToken() {
109e8d8bef9SDimitry Andric if (Pos + 1 < Tokens.size())
110e8d8bef9SDimitry Andric ++Pos;
111e8d8bef9SDimitry Andric Current = Tokens[Pos];
112e8d8bef9SDimitry Andric Current->Finalized = true;
113e8d8bef9SDimitry Andric }
114e8d8bef9SDimitry Andric
115e8d8bef9SDimitry Andric size_t Pos = 0;
116e8d8bef9SDimitry Andric FormatToken *Current = nullptr;
117e8d8bef9SDimitry Andric Definition Def;
118e8d8bef9SDimitry Andric ArrayRef<FormatToken *> Tokens;
119e8d8bef9SDimitry Andric };
120e8d8bef9SDimitry Andric
MacroExpander(const std::vector<std::string> & Macros,SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)121e8d8bef9SDimitry Andric MacroExpander::MacroExpander(
122*0fca6ea1SDimitry Andric const std::vector<std::string> &Macros, SourceManager &SourceMgr,
123e8d8bef9SDimitry Andric const FormatStyle &Style,
124e8d8bef9SDimitry Andric llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125e8d8bef9SDimitry Andric IdentifierTable &IdentTable)
126e8d8bef9SDimitry Andric : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127e8d8bef9SDimitry Andric IdentTable(IdentTable) {
12881ad6265SDimitry Andric for (const std::string &Macro : Macros)
129e8d8bef9SDimitry Andric parseDefinition(Macro);
130e8d8bef9SDimitry Andric }
131e8d8bef9SDimitry Andric
132e8d8bef9SDimitry Andric MacroExpander::~MacroExpander() = default;
133e8d8bef9SDimitry Andric
parseDefinition(const std::string & Macro)134e8d8bef9SDimitry Andric void MacroExpander::parseDefinition(const std::string &Macro) {
135e8d8bef9SDimitry Andric Buffers.push_back(
136e8d8bef9SDimitry Andric llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
137*0fca6ea1SDimitry Andric FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138e8d8bef9SDimitry Andric FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139e8d8bef9SDimitry Andric Allocator, IdentTable);
140e8d8bef9SDimitry Andric const auto Tokens = Lex.lex();
141e8d8bef9SDimitry Andric if (!Tokens.empty()) {
142e8d8bef9SDimitry Andric DefinitionParser Parser(Tokens);
143e8d8bef9SDimitry Andric auto Definition = Parser.parse();
14406c3fb27SDimitry Andric if (Definition.ObjectLike) {
14506c3fb27SDimitry Andric ObjectLike[Definition.Name] = std::move(Definition);
14606c3fb27SDimitry Andric } else {
14706c3fb27SDimitry Andric FunctionLike[Definition.Name][Definition.Params.size()] =
14806c3fb27SDimitry Andric std::move(Definition);
14906c3fb27SDimitry Andric }
150e8d8bef9SDimitry Andric }
151e8d8bef9SDimitry Andric }
152e8d8bef9SDimitry Andric
defined(StringRef Name) const153*0fca6ea1SDimitry Andric bool MacroExpander::defined(StringRef Name) const {
15406c3fb27SDimitry Andric return FunctionLike.contains(Name) || ObjectLike.contains(Name);
155e8d8bef9SDimitry Andric }
156e8d8bef9SDimitry Andric
objectLike(StringRef Name) const157*0fca6ea1SDimitry Andric bool MacroExpander::objectLike(StringRef Name) const {
15806c3fb27SDimitry Andric return ObjectLike.contains(Name);
159e8d8bef9SDimitry Andric }
160e8d8bef9SDimitry Andric
hasArity(StringRef Name,unsigned Arity) const161*0fca6ea1SDimitry Andric bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const {
16206c3fb27SDimitry Andric auto it = FunctionLike.find(Name);
16306c3fb27SDimitry Andric return it != FunctionLike.end() && it->second.contains(Arity);
16406c3fb27SDimitry Andric }
165e8d8bef9SDimitry Andric
166*0fca6ea1SDimitry Andric SmallVector<FormatToken *, 8>
expand(FormatToken * ID,std::optional<ArgsList> OptionalArgs) const16706c3fb27SDimitry Andric MacroExpander::expand(FormatToken *ID,
16806c3fb27SDimitry Andric std::optional<ArgsList> OptionalArgs) const {
16906c3fb27SDimitry Andric if (OptionalArgs)
17006c3fb27SDimitry Andric assert(hasArity(ID->TokenText, OptionalArgs->size()));
17106c3fb27SDimitry Andric else
17206c3fb27SDimitry Andric assert(objectLike(ID->TokenText));
17306c3fb27SDimitry Andric const Definition &Def = OptionalArgs
17406c3fb27SDimitry Andric ? FunctionLike.find(ID->TokenText)
17506c3fb27SDimitry Andric ->second.find(OptionalArgs.value().size())
17606c3fb27SDimitry Andric ->second
17706c3fb27SDimitry Andric : ObjectLike.find(ID->TokenText)->second;
17806c3fb27SDimitry Andric ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
17906c3fb27SDimitry Andric SmallVector<FormatToken *, 8> Result;
180e8d8bef9SDimitry Andric // Expand each argument at most once.
181e8d8bef9SDimitry Andric llvm::StringSet<> ExpandedArgs;
182e8d8bef9SDimitry Andric
183e8d8bef9SDimitry Andric // Adds the given token to Result.
184e8d8bef9SDimitry Andric auto pushToken = [&](FormatToken *Tok) {
185e8d8bef9SDimitry Andric Tok->MacroCtx->ExpandedFrom.push_back(ID);
186e8d8bef9SDimitry Andric Result.push_back(Tok);
187e8d8bef9SDimitry Andric };
188e8d8bef9SDimitry Andric
189e8d8bef9SDimitry Andric // If Tok references a parameter, adds the corresponding argument to Result.
190e8d8bef9SDimitry Andric // Returns false if Tok does not reference a parameter.
191e8d8bef9SDimitry Andric auto expandArgument = [&](FormatToken *Tok) -> bool {
192e8d8bef9SDimitry Andric // If the current token references a parameter, expand the corresponding
193e8d8bef9SDimitry Andric // argument.
1945f757f3fSDimitry Andric if (Tok->isNot(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
195e8d8bef9SDimitry Andric return false;
196e8d8bef9SDimitry Andric ExpandedArgs.insert(Tok->TokenText);
197e8d8bef9SDimitry Andric auto I = Def.ArgMap.find(Tok->TokenText);
198e8d8bef9SDimitry Andric if (I == Def.ArgMap.end())
199e8d8bef9SDimitry Andric return false;
200e8d8bef9SDimitry Andric // If there are fewer arguments than referenced parameters, treat the
201e8d8bef9SDimitry Andric // parameter as empty.
202e8d8bef9SDimitry Andric // FIXME: Potentially fully abort the expansion instead.
203e8d8bef9SDimitry Andric if (I->getValue() >= Args.size())
204e8d8bef9SDimitry Andric return true;
205e8d8bef9SDimitry Andric for (FormatToken *Arg : Args[I->getValue()]) {
206e8d8bef9SDimitry Andric // A token can be part of a macro argument at multiple levels.
207e8d8bef9SDimitry Andric // For example, with "ID(x) x":
208e8d8bef9SDimitry Andric // in ID(ID(x)), 'x' is expanded first as argument to the inner
209e8d8bef9SDimitry Andric // ID, then again as argument to the outer ID. We keep the macro
210e8d8bef9SDimitry Andric // role the token had from the inner expansion.
211e8d8bef9SDimitry Andric if (!Arg->MacroCtx)
212e8d8bef9SDimitry Andric Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
213e8d8bef9SDimitry Andric pushToken(Arg);
214e8d8bef9SDimitry Andric }
215e8d8bef9SDimitry Andric return true;
216e8d8bef9SDimitry Andric };
217e8d8bef9SDimitry Andric
218e8d8bef9SDimitry Andric // Expand the definition into Result.
219e8d8bef9SDimitry Andric for (FormatToken *Tok : Def.Body) {
220e8d8bef9SDimitry Andric if (expandArgument(Tok))
221e8d8bef9SDimitry Andric continue;
222e8d8bef9SDimitry Andric // Create a copy of the tokens from the macro body, i.e. were not provided
223e8d8bef9SDimitry Andric // by user code.
224e8d8bef9SDimitry Andric FormatToken *New = new (Allocator.Allocate()) FormatToken;
225e8d8bef9SDimitry Andric New->copyFrom(*Tok);
226e8d8bef9SDimitry Andric assert(!New->MacroCtx);
227e8d8bef9SDimitry Andric // Tokens that are not part of the user code are not formatted.
228e8d8bef9SDimitry Andric New->MacroCtx = MacroExpansion(MR_Hidden);
229e8d8bef9SDimitry Andric pushToken(New);
230e8d8bef9SDimitry Andric }
231e8d8bef9SDimitry Andric assert(Result.size() >= 1 && Result.back()->is(tok::eof));
232e8d8bef9SDimitry Andric if (Result.size() > 1) {
233e8d8bef9SDimitry Andric ++Result[0]->MacroCtx->StartOfExpansion;
234e8d8bef9SDimitry Andric ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
235e8d8bef9SDimitry Andric }
236e8d8bef9SDimitry Andric return Result;
237e8d8bef9SDimitry Andric }
238e8d8bef9SDimitry Andric
239e8d8bef9SDimitry Andric } // namespace format
240e8d8bef9SDimitry Andric } // namespace clang
241